+#
############################################################
#
# perltidy - a perl script indenter and formatter
#
-# Copyright (c) 2000-2003 by Steve Hancock
+# Copyright (c) 2000-2017 by Steve Hancock
# Distributed under the GPL license agreement; see file COPYING
#
# This program is free software; you can redistribute it and/or modify
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
-# For brief instructions instructions, try 'perltidy -h'.
+# For brief instructions, try 'perltidy -h'.
# For more complete documentation, try 'man perltidy'
# or visit http://perltidy.sourceforge.net
#
#
# perltidy Tidy.pm
#
-# Code Contributions:
+# Code Contributions: See ChangeLog.html for a complete history.
# Michael Cartmell supplied code for adaptation to VMS and helped with
# v-strings.
# Hugh S. Myers supplied sub streamhandle and the supporting code to
# create a Perl::Tidy module which can operate on strings, arrays, etc.
# Yves Orton supplied coding to help detect Windows versions.
# Axel Rose supplied a patch for MacPerl.
+# Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
+# Dan Tyrell contributed a patch for binary I/O.
+# Ueli Hugenschmidt contributed a patch for -fpsc
+# Sam Kington supplied a patch to identify the initial indentation of
+# entabbed code.
+# jonathan swartz supplied patches for:
+# * .../ pattern, which looks upwards from directory
+# * --notidy, to be used in directories where we want to avoid
+# accidentally tidying
+# * prefilter and postfilter
+# * iterations option
+#
# Many others have supplied key ideas, suggestions, and bug reports;
# see the CHANGES file.
#
############################################################
package Perl::Tidy;
-use 5.004; # need IO::File from 5.004 or later
-BEGIN { $^W = 1; } # turn on warnings
+# Actually should use a version later than about 5.8.5 to use
+# wide characters.
+use 5.004; # need IO::File from 5.004 or later
+use warnings;
use strict;
use Exporter;
use Carp;
@ISA
@EXPORT
$missing_file_spec
+ $fh_stderr
+ $rOpts_character_encoding
};
@ISA = qw( Exporter );
@EXPORT = qw( &perltidy );
+use Cwd;
+use Encode ();
use IO::File;
use File::Basename;
+use File::Copy;
+use File::Temp qw(tempfile);
BEGIN {
- ( $VERSION = q($Id: Tidy.pm,v 1.46 2003/10/21 14:09:29 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
+ ( $VERSION = q($Id: Tidy.pm,v 1.74 2017/05/21 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
}
sub streamhandle {
# skipped and we can just let it crash if there is no
# getline.
if ( $mode =~ /[rR]/ ) {
- if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
+
+ # RT#97159; part 1 of 2: updated to use 'can'
+ ##if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
+ if ( $ref->can('getline') ) {
$New = sub { $filename };
}
else {
# Accept an object with a print method for writing.
# See note above about IO::File
if ( $mode =~ /[wW]/ ) {
- if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
+
+ # RT#97159; part 2 of 2: updated to use 'can'
+ ##if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
+ if ( $ref->can('print') ) {
$New = sub { $filename };
}
else {
}
}
$fh = $New->( $filename, $mode )
- or warn "Couldn't open file:$filename in mode:$mode : $!\n";
+ or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
+
return $fh, ( $ref or $filename );
}
my $test_file = $path . $name;
my ( $test_name, $test_path ) = fileparse($test_file);
return $test_file if ( $test_name eq $name );
- return undef if ( $^O eq 'VMS' );
+ return undef if ( $^O eq 'VMS' );
# this should work at least for Windows and Unix:
$test_file = $path . '/' . $name;
return undef;
}
-sub make_temporary_filename {
-
- # Make a temporary filename.
- #
- # The POSIX tmpnam() function tends to be unreliable for non-unix
- # systems (at least for the win32 systems that I've tested), so use
- # a pre-defined name. A slight disadvantage of this is that two
- # perltidy runs in the same working directory may conflict.
- # However, the chance of that is small and managable by the user.
- # An alternative would be to check for the file's existance and use,
- # say .TMP0, .TMP1, etc, but that scheme has its own problems. So,
- # keep it simple.
- my $name = "perltidy.TMP";
- if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
- return $name;
- }
- eval "use POSIX qw(tmpnam)";
- if ($@) { return $name }
- use IO::File;
-
- # just make a couple of tries before giving up and using the default
- for ( 0 .. 1 ) {
- my $tmpname = tmpnam();
- my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
- if ($fh) {
- $fh->close();
- return ($tmpname);
- last;
- }
- }
- return ($name);
-}
-
# Here is a map of the flow of data from the input source to the output
# line sink:
#
# messages. It writes a .LOG file, which may be saved with a
# '-log' or a '-g' flag.
-{
+sub perltidy {
- # variables needed by interrupt handler:
- my $tokenizer;
- my $input_file;
+ my %defaults = (
+ argv => undef,
+ destination => undef,
+ formatter => undef,
+ logfile => undef,
+ errorfile => undef,
+ perltidyrc => undef,
+ source => undef,
+ stderr => undef,
+ dump_options => undef,
+ dump_options_type => undef,
+ dump_getopt_flags => undef,
+ dump_options_category => undef,
+ dump_options_range => undef,
+ dump_abbreviations => undef,
+ prefilter => undef,
+ postfilter => undef,
+ );
+
+ # don't overwrite callers ARGV
+ local @ARGV = @ARGV;
+ local *STDERR = *STDERR;
+
+ my %input_hash = @_;
+
+ if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
+ local $" = ')(';
+ my @good_keys = sort keys %defaults;
+ @bad_keys = sort @bad_keys;
+ confess <<EOM;
+------------------------------------------------------------------------
+Unknown perltidy parameter : (@bad_keys)
+perltidy only understands : (@good_keys)
+------------------------------------------------------------------------
- # this routine may be called to give a status report if interrupted. If a
- # parameter is given, it will call exit with that parameter. This is no
- # longer used because it works under Unix but not under Windows.
- sub interrupt_handler {
+EOM
+ }
- my $exit_flag = shift;
- print STDERR "perltidy interrupted";
- if ($tokenizer) {
- my $input_line_number =
- Perl::Tidy::Tokenizer::get_input_line_number();
- print STDERR " at line $input_line_number";
+ my $get_hash_ref = sub {
+ my ($key) = @_;
+ my $hash_ref = $input_hash{$key};
+ if ( defined($hash_ref) ) {
+ unless ( ref($hash_ref) eq 'HASH' ) {
+ my $what = ref($hash_ref);
+ my $but_is =
+ $what ? "but is ref to $what" : "but is not a reference";
+ croak <<EOM;
+------------------------------------------------------------------------
+error in call to perltidy:
+-$key must be reference to HASH $but_is
+------------------------------------------------------------------------
+EOM
+ }
}
- if ($input_file) {
+ return $hash_ref;
+ };
- if ( ref $input_file ) { print STDERR " of reference to:" }
- else { print STDERR " of file:" }
- print STDERR " $input_file";
+ %input_hash = ( %defaults, %input_hash );
+ my $argv = $input_hash{'argv'};
+ my $destination_stream = $input_hash{'destination'};
+ my $errorfile_stream = $input_hash{'errorfile'};
+ my $logfile_stream = $input_hash{'logfile'};
+ my $perltidyrc_stream = $input_hash{'perltidyrc'};
+ my $source_stream = $input_hash{'source'};
+ my $stderr_stream = $input_hash{'stderr'};
+ my $user_formatter = $input_hash{'formatter'};
+ my $prefilter = $input_hash{'prefilter'};
+ my $postfilter = $input_hash{'postfilter'};
+
+ if ($stderr_stream) {
+ ( $fh_stderr, my $stderr_file ) =
+ Perl::Tidy::streamhandle( $stderr_stream, 'w' );
+ if ( !$fh_stderr ) {
+ croak <<EOM;
+------------------------------------------------------------------------
+Unable to redirect STDERR to $stderr_stream
+Please check value of -stderr in call to perltidy
+------------------------------------------------------------------------
+EOM
}
- print STDERR "\n";
- exit $exit_flag if defined($exit_flag);
+ }
+ else {
+ $fh_stderr = *STDERR;
}
- sub perltidy {
+ sub Warn ($) { $fh_stderr->print( $_[0] ); }
- my %defaults = (
- argv => undef,
- destination => undef,
- formatter => undef,
- logfile => undef,
- errorfile => undef,
- perltidyrc => undef,
- source => undef,
- stderr => undef,
- );
+ sub Exit ($) {
+ if ( $_[0] ) { goto ERROR_EXIT }
+ else { goto NORMAL_EXIT }
+ }
- # don't overwrite callers ARGV
- local @ARGV = @ARGV;
+ sub Die ($) { Warn $_[0]; Exit(1); }
- my %input_hash = @_;
- if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
- local $" = ')(';
- my @good_keys = sort keys %defaults;
- @bad_keys = sort @bad_keys;
- confess <<EOM;
+ # extract various dump parameters
+ my $dump_options_type = $input_hash{'dump_options_type'};
+ my $dump_options = $get_hash_ref->('dump_options');
+ my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
+ my $dump_options_category = $get_hash_ref->('dump_options_category');
+ my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
+ my $dump_options_range = $get_hash_ref->('dump_options_range');
+
+ # validate dump_options_type
+ if ( defined($dump_options) ) {
+ unless ( defined($dump_options_type) ) {
+ $dump_options_type = 'perltidyrc';
+ }
+ unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
+ croak <<EOM;
------------------------------------------------------------------------
-Unknown perltidy parameter : (@bad_keys)
-perltidy only understands : (@good_keys)
+Please check value of -dump_options_type in call to perltidy;
+saw: '$dump_options_type'
+expecting: 'perltidyrc' or 'full'
------------------------------------------------------------------------
-
EOM
- }
- %input_hash = ( %defaults, %input_hash );
- my $argv = $input_hash{'argv'};
- my $destination_stream = $input_hash{'destination'};
- my $errorfile_stream = $input_hash{'errorfile'};
- my $logfile_stream = $input_hash{'logfile'};
- my $perltidyrc_stream = $input_hash{'perltidyrc'};
- my $source_stream = $input_hash{'source'};
- my $stderr_stream = $input_hash{'stderr'};
- my $user_formatter = $input_hash{'formatter'};
+ }
+ }
+ else {
+ $dump_options_type = "";
+ }
- if ($user_formatter) {
+ if ($user_formatter) {
- # if the user defines a formatter, there is no output stream,
- # but we need a null stream to keep coding simple
- $destination_stream = Perl::Tidy::DevNull->new();
- }
+ # if the user defines a formatter, there is no output stream,
+ # but we need a null stream to keep coding simple
+ $destination_stream = Perl::Tidy::DevNull->new();
+ }
- # see if ARGV is overridden
- if ( defined($argv) ) {
+ # see if ARGV is overridden
+ if ( defined($argv) ) {
- my $rargv = ref $argv;
- if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
+ my $rargv = ref $argv;
+ if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
- # ref to ARRAY
- if ($rargv) {
- if ( $rargv eq 'ARRAY' ) {
- @ARGV = @$argv;
- }
- else {
- croak <<EOM;
+ # ref to ARRAY
+ if ($rargv) {
+ if ( $rargv eq 'ARRAY' ) {
+ @ARGV = @$argv;
+ }
+ else {
+ croak <<EOM;
------------------------------------------------------------------------
Please check value of -argv in call to perltidy;
it must be a string or ref to ARRAY but is: $rargv
------------------------------------------------------------------------
EOM
- }
}
+ }
- # string
- else {
- my ( $rargv, $msg ) = parse_args($argv);
- if ($msg) {
- die <<EOM;
+ # string
+ else {
+ my ( $rargv, $msg ) = parse_args($argv);
+ if ($msg) {
+ Die <<EOM;
Error parsing this string passed to to perltidy with 'argv':
$msg
EOM
- }
- @ARGV = @{$rargv};
}
+ @ARGV = @{$rargv};
}
+ }
- # redirect STDERR if requested
- if ($stderr_stream) {
- my ( $fh_stderr, $stderr_file ) =
- Perl::Tidy::streamhandle( $stderr_stream, 'w' );
- if ($fh_stderr) { *STDERR = $fh_stderr }
- else {
- croak <<EOM;
-------------------------------------------------------------------------
-Unable to redirect STDERR to $stderr_stream
-Please check value of -stderr in call to perltidy
-------------------------------------------------------------------------
-EOM
- }
- }
+ my $rpending_complaint;
+ $$rpending_complaint = "";
+ my $rpending_logfile_message;
+ $$rpending_logfile_message = "";
- my $rpending_complaint;
- $$rpending_complaint = "";
- my $rpending_logfile_message;
- $$rpending_logfile_message = "";
+ my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint);
+
+ # VMS file names are restricted to a 40.40 format, so we append _tdy
+ # instead of .tdy, etc. (but see also sub check_vms_filename)
+ my $dot;
+ my $dot_pattern;
+ if ( $^O eq 'VMS' ) {
+ $dot = '_';
+ $dot_pattern = '_';
+ }
+ else {
+ $dot = '.';
+ $dot_pattern = '\.'; # must escape for use in regex
+ }
- my ( $is_Windows, $Windows_type ) =
- look_for_Windows($rpending_complaint);
+ #---------------------------------------------------------------
+ # get command line options
+ #---------------------------------------------------------------
+ my ( $rOpts, $config_file, $rraw_options, $roption_string,
+ $rexpansion, $roption_category, $roption_range )
+ = process_command_line(
+ $perltidyrc_stream, $is_Windows, $Windows_type,
+ $rpending_complaint, $dump_options_type,
+ );
- # VMS file names are restricted to a 40.40 format, so we append _tdy
- # instead of .tdy, etc. (but see also sub check_vms_filename)
- my $dot;
- my $dot_pattern;
- if ( $^O eq 'VMS' ) {
- $dot = '_';
- $dot_pattern = '_';
- }
- else {
- $dot = '.';
- $dot_pattern = '\.'; # must escape for use in regex
- }
+ my $saw_extrude = ( grep m/^-extrude$/, @$rraw_options ) ? 1 : 0;
+ my $saw_pbp =
+ ( grep m/^-(pbp|perl-best-practices)$/, @$rraw_options ) ? 1 : 0;
- # handle command line options
- my ( $rOpts, $config_file, $rraw_options, $saw_extrude ) =
- process_command_line(
- $perltidyrc_stream, $is_Windows,
- $Windows_type, $rpending_complaint
- );
+ #---------------------------------------------------------------
+ # Handle requests to dump information
+ #---------------------------------------------------------------
- if ($user_formatter) {
- $rOpts->{'format'} = 'user';
- }
+ # return or exit immediately after all dumps
+ my $quit_now = 0;
- # there must be one entry here for every possible format
- my %default_file_extension = (
- tidy => 'tdy',
- html => 'html',
- user => '',
- );
+ # Getopt parameters and their flags
+ if ( defined($dump_getopt_flags) ) {
+ $quit_now = 1;
+ foreach my $op ( @{$roption_string} ) {
+ my $opt = $op;
+ my $flag = "";
- # be sure we have a valid output format
- unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
- my $formats = join ' ',
- sort map { "'" . $_ . "'" } keys %default_file_extension;
- my $fmt = $rOpts->{'format'};
- die "-format='$fmt' but must be one of: $formats\n";
+ # Examples:
+ # some-option=s
+ # some-option=i
+ # some-option:i
+ # some-option!
+ if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
+ $opt = $1;
+ $flag = $2;
+ }
+ $dump_getopt_flags->{$opt} = $flag;
}
+ }
+
+ if ( defined($dump_options_category) ) {
+ $quit_now = 1;
+ %{$dump_options_category} = %{$roption_category};
+ }
- my $output_extension =
- make_extension( $rOpts->{'output-file-extension'},
- $default_file_extension{ $rOpts->{'format'} }, $dot );
+ if ( defined($dump_options_range) ) {
+ $quit_now = 1;
+ %{$dump_options_range} = %{$roption_range};
+ }
- my $backup_extension =
- make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
+ if ( defined($dump_abbreviations) ) {
+ $quit_now = 1;
+ %{$dump_abbreviations} = %{$rexpansion};
+ }
- my $html_toc_extension =
- make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
+ if ( defined($dump_options) ) {
+ $quit_now = 1;
+ %{$dump_options} = %{$rOpts};
+ }
- my $html_src_extension =
- make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
+ Exit 0 if ($quit_now);
- # check for -b option;
- my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
- && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
- && @ARGV > 0; # silently ignore if standard input;
- # this allows -b to be in a .perltidyrc file
- # without error messages when running from an editor
+ # make printable string of options for this run as possible diagnostic
+ my $readable_options = readable_options( $rOpts, $roption_string );
- # turn off -b with warnings in case of conflicts with other options
- if ($in_place_modify) {
- if ( $rOpts->{'standard-output'} ) {
- warn "Ignoring -b; you may not use -b and -st together\n";
- $in_place_modify = 0;
- }
- if ($destination_stream) {
- warn
-"Ignoring -b; you may not specify a destination array and -b together\n";
- $in_place_modify = 0;
- }
- if ($source_stream) {
- warn
-"Ignoring -b; you may not specify a source array and -b together\n";
- $in_place_modify = 0;
- }
- if ( $rOpts->{'outfile'} ) {
- warn "Ignoring -b; you may not use -b and -o together\n";
- $in_place_modify = 0;
- }
- if ( defined( $rOpts->{'output-path'} ) ) {
- warn "Ignoring -b; you may not use -b and -opath together\n";
- $in_place_modify = 0;
- }
- }
+ # dump from command line
+ if ( $rOpts->{'dump-options'} ) {
+ print STDOUT $readable_options;
+ Exit 0;
+ }
- Perl::Tidy::Formatter::check_options($rOpts);
- if ( $rOpts->{'format'} eq 'html' ) {
- Perl::Tidy::HtmlWriter->check_options($rOpts);
- }
+ #---------------------------------------------------------------
+ # check parameters and their interactions
+ #---------------------------------------------------------------
+ my $tabsize =
+ check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint );
- # make the pattern of file extensions that we shouldn't touch
- my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
- if ($output_extension) {
- $_ = quotemeta($output_extension);
- $forbidden_file_extensions .= "|$_";
- }
- if ( $in_place_modify && $backup_extension ) {
- $_ = quotemeta($backup_extension);
- $forbidden_file_extensions .= "|$_";
- }
- $forbidden_file_extensions .= ')$';
+ if ($user_formatter) {
+ $rOpts->{'format'} = 'user';
+ }
- # Create a diagnostics object if requested;
- # This is only useful for code development
- my $diagnostics_object = undef;
- if ( $rOpts->{'DIAGNOSTICS'} ) {
- $diagnostics_object = Perl::Tidy::Diagnostics->new();
- }
+ # there must be one entry here for every possible format
+ my %default_file_extension = (
+ tidy => 'tdy',
+ html => 'html',
+ user => '',
+ );
- # no filenames should be given if input is from an array
- if ($source_stream) {
- if ( @ARGV > 0 ) {
- die
-"You may not specify any filenames when a source array is given\n";
- }
+ $rOpts_character_encoding = $rOpts->{'character-encoding'};
- # we'll stuff the source array into ARGV
- unshift( @ARGV, $source_stream );
+ # be sure we have a valid output format
+ unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
+ my $formats = join ' ',
+ sort map { "'" . $_ . "'" } keys %default_file_extension;
+ my $fmt = $rOpts->{'format'};
+ Die "-format='$fmt' but must be one of: $formats\n";
+ }
- # No special treatment for source stream which is a filename.
- # This will enable checks for binary files and other bad stuff.
- $source_stream = undef unless ref($source_stream);
- }
+ my $output_extension = make_extension( $rOpts->{'output-file-extension'},
+ $default_file_extension{ $rOpts->{'format'} }, $dot );
- # use stdin by default if no source array and no args
- else {
- unshift( @ARGV, '-' ) unless @ARGV;
+ # If the backup extension contains a / character then the backup should
+ # be deleted when the -b option is used. On older versions of
+ # perltidy this will generate an error message due to an illegal
+ # file name.
+ #
+ # A backup file will still be generated but will be deleted
+ # at the end. If -bext='/' then this extension will be
+ # the default 'bak'. Otherwise it will be whatever characters
+ # remains after all '/' characters are removed. For example:
+ # -bext extension slashes
+ # '/' bak 1
+ # '/delete' delete 1
+ # 'delete/' delete 1
+ # '/dev/null' devnull 2 (Currently not allowed)
+ my $bext = $rOpts->{'backup-file-extension'};
+ my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g );
+
+ # At present only one forward slash is allowed. In the future multiple
+ # slashes may be allowed to allow for other options
+ if ( $delete_backup > 1 ) {
+ Die "-bext=$bext contains more than one '/'\n";
+ }
+
+ my $backup_extension =
+ make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
+
+ my $html_toc_extension =
+ make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
+
+ my $html_src_extension =
+ make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
+
+ # check for -b option;
+ # silently ignore unless beautify mode
+ my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
+ && $rOpts->{'format'} eq 'tidy';
+
+ # Turn off -b with warnings in case of conflicts with other options.
+ # NOTE: Do this silently, without warnings, if there is a source or
+ # destination stream, or standard output is used. This is because the -b
+ # flag may have been in a .perltidyrc file and warnings break
+ # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014.
+ if ($in_place_modify) {
+ if ( $rOpts->{'standard-output'} ) {
+## my $msg = "Ignoring -b; you may not use -b and -st together";
+## $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
+## Warn "$msg\n";
+ $in_place_modify = 0;
+ }
+ if ($destination_stream) {
+ ##Warn "Ignoring -b; you may not specify a destination stream and -b together\n";
+ $in_place_modify = 0;
+ }
+ if ( ref($source_stream) ) {
+ ##Warn "Ignoring -b; you may not specify a source array and -b together\n";
+ $in_place_modify = 0;
+ }
+ if ( $rOpts->{'outfile'} ) {
+ ##Warn "Ignoring -b; you may not use -b and -o together\n";
+ $in_place_modify = 0;
+ }
+ if ( defined( $rOpts->{'output-path'} ) ) {
+ ##Warn "Ignoring -b; you may not use -b and -opath together\n";
+ $in_place_modify = 0;
+ }
+ }
+
+ Perl::Tidy::Formatter::check_options($rOpts);
+ if ( $rOpts->{'format'} eq 'html' ) {
+ Perl::Tidy::HtmlWriter->check_options($rOpts);
+ }
+
+ # make the pattern of file extensions that we shouldn't touch
+ my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
+ if ($output_extension) {
+ my $ext = quotemeta($output_extension);
+ $forbidden_file_extensions .= "|$ext";
+ }
+ if ( $in_place_modify && $backup_extension ) {
+ my $ext = quotemeta($backup_extension);
+ $forbidden_file_extensions .= "|$ext";
+ }
+ $forbidden_file_extensions .= ')$';
+
+ # Create a diagnostics object if requested;
+ # This is only useful for code development
+ my $diagnostics_object = undef;
+ if ( $rOpts->{'DIAGNOSTICS'} ) {
+ $diagnostics_object = Perl::Tidy::Diagnostics->new();
+ }
+
+ # no filenames should be given if input is from an array
+ if ($source_stream) {
+ if ( @ARGV > 0 ) {
+ Die
+"You may not specify any filenames when a source array is given\n";
}
- # loop to process all files in argument list
- my $number_of_files = @ARGV;
- my $formatter = undef;
- $tokenizer = undef;
- while ( $input_file = shift @ARGV ) {
- my $fileroot;
- my $input_file_permissions;
+ # we'll stuff the source array into ARGV
+ unshift( @ARGV, $source_stream );
- #---------------------------------------------------------------
- # determine the input file name
- #---------------------------------------------------------------
- if ($source_stream) {
- $fileroot = "perltidy";
- }
- elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
- $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
- $in_place_modify = 0;
+ # No special treatment for source stream which is a filename.
+ # This will enable checks for binary files and other bad stuff.
+ $source_stream = undef unless ref($source_stream);
+ }
+
+ # use stdin by default if no source array and no args
+ else {
+ unshift( @ARGV, '-' ) unless @ARGV;
+ }
+
+ #---------------------------------------------------------------
+ # Ready to go...
+ # main loop to process all files in argument list
+ #---------------------------------------------------------------
+ my $number_of_files = @ARGV;
+ my $formatter = undef;
+ my $tokenizer = undef;
+ while ( my $input_file = shift @ARGV ) {
+ my $fileroot;
+ my $input_file_permissions;
+
+ #---------------------------------------------------------------
+ # prepare this input stream
+ #---------------------------------------------------------------
+ if ($source_stream) {
+ $fileroot = "perltidy";
+
+ # If the source is from an array or string, then .LOG output
+ # is only possible if a logfile stream is specified. This prevents
+ # unexpected perltidy.LOG files.
+ if ( !defined($logfile_stream) ) {
+ $logfile_stream = Perl::Tidy::DevNull->new();
}
- else {
- $fileroot = $input_file;
- unless ( -e $input_file ) {
-
- # file doesn't exist - check for a file glob
- if ( $input_file =~ /([\?\*\[\{])/ ) {
-
- # Windows shell may not remove quotes, so do it
- my $input_file = $input_file;
- if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
- if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
- my $pattern = fileglob_to_re($input_file);
- eval "/$pattern/";
- if ( !$@ && opendir( DIR, './' ) ) {
- my @files =
- grep { /$pattern/ && !-d $_ } readdir(DIR);
- closedir(DIR);
- if (@files) {
- unshift @ARGV, @files;
- next;
- }
+ }
+ elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
+ $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
+ $in_place_modify = 0;
+ }
+ else {
+ $fileroot = $input_file;
+ unless ( -e $input_file ) {
+
+ # file doesn't exist - check for a file glob
+ if ( $input_file =~ /([\?\*\[\{])/ ) {
+
+ # Windows shell may not remove quotes, so do it
+ my $input_file = $input_file;
+ if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
+ if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
+ my $pattern = fileglob_to_re($input_file);
+ ##eval "/$pattern/";
+ if ( !$@ && opendir( DIR, './' ) ) {
+ my @files =
+ grep { /$pattern/ && !-d $_ } readdir(DIR);
+ closedir(DIR);
+ if (@files) {
+ unshift @ARGV, @files;
+ next;
}
}
- print "skipping file: '$input_file': no matches found\n";
- next;
}
+ Warn "skipping file: '$input_file': no matches found\n";
+ next;
+ }
- unless ( -f $input_file ) {
- print "skipping file: $input_file: not a regular file\n";
- next;
- }
+ unless ( -f $input_file ) {
+ Warn "skipping file: $input_file: not a regular file\n";
+ next;
+ }
- unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
- print
-"skipping file: $input_file: Non-text (override with -f)\n";
- next;
- }
+ # As a safety precaution, skip zero length files.
+ # If for example a source file got clobbered somehow,
+ # the old .tdy or .bak files might still exist so we
+ # shouldn't overwrite them with zero length files.
+ unless ( -s $input_file ) {
+ Warn "skipping file: $input_file: Zero size\n";
+ next;
+ }
+
+ unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
+ Warn
+ "skipping file: $input_file: Non-text (override with -f)\n";
+ next;
+ }
- # we should have a valid filename now
- $fileroot = $input_file;
- $input_file_permissions = ( stat $input_file )[2] & 07777;
+ # we should have a valid filename now
+ $fileroot = $input_file;
+ $input_file_permissions = ( stat $input_file )[2] & 07777;
- if ( $^O eq 'VMS' ) {
- ( $fileroot, $dot ) = check_vms_filename($fileroot);
- }
+ if ( $^O eq 'VMS' ) {
+ ( $fileroot, $dot ) = check_vms_filename($fileroot);
+ }
- # add option to change path here
- if ( defined( $rOpts->{'output-path'} ) ) {
+ # add option to change path here
+ if ( defined( $rOpts->{'output-path'} ) ) {
- my ( $base, $old_path ) = fileparse($fileroot);
- my $new_path = $rOpts->{'output-path'};
- unless ( -d $new_path ) {
- unless ( mkdir $new_path, 0777 ) {
- die "unable to create directory $new_path: $!\n";
- }
+ my ( $base, $old_path ) = fileparse($fileroot);
+ my $new_path = $rOpts->{'output-path'};
+ unless ( -d $new_path ) {
+ unless ( mkdir $new_path, 0777 ) {
+ Die "unable to create directory $new_path: $!\n";
}
- my $path = $new_path;
- $fileroot = catfile( $path, $base );
- unless ($fileroot) {
- die <<EOM;
+ }
+ my $path = $new_path;
+ $fileroot = catfile( $path, $base );
+ unless ($fileroot) {
+ Die <<EOM;
------------------------------------------------------------------------
Problem combining $new_path and $base to make a filename; check -opath
------------------------------------------------------------------------
EOM
- }
}
}
+ }
- # Skip files with same extension as the output files because
- # this can lead to a messy situation with files like
- # script.tdy.tdy.tdy ... or worse problems ... when you
- # rerun perltidy over and over with wildcard input.
- if (
- !$source_stream
- && ( $input_file =~ /$forbidden_file_extensions/o
- || $input_file eq 'DIAGNOSTICS' )
- )
+ # Skip files with same extension as the output files because
+ # this can lead to a messy situation with files like
+ # script.tdy.tdy.tdy ... or worse problems ... when you
+ # rerun perltidy over and over with wildcard input.
+ if (
+ !$source_stream
+ && ( $input_file =~ /$forbidden_file_extensions/o
+ || $input_file eq 'DIAGNOSTICS' )
+ )
+ {
+ Warn "skipping file: $input_file: wrong extension\n";
+ next;
+ }
+
+ # the 'source_object' supplies a method to read the input file
+ my $source_object =
+ Perl::Tidy::LineSource->new( $input_file, $rOpts,
+ $rpending_logfile_message );
+ next unless ($source_object);
+
+ # Prefilters and postfilters: The prefilter is a code reference
+ # that will be applied to the source before tidying, and the
+ # postfilter is a code reference to the result before outputting.
+ if (
+ $prefilter
+ || ( $rOpts_character_encoding
+ && $rOpts_character_encoding eq 'utf8' )
+ )
+ {
+ my $buf = '';
+ while ( my $line = $source_object->get_line() ) {
+ $buf .= $line;
+ }
+
+ $buf = $prefilter->($buf) if $prefilter;
+
+ if ( $rOpts_character_encoding
+ && $rOpts_character_encoding eq 'utf8'
+ && !utf8::is_utf8($buf) )
{
- print "skipping file: $input_file: wrong extension\n";
- next;
+ eval {
+ $buf = Encode::decode( 'UTF-8', $buf,
+ Encode::FB_CROAK | Encode::LEAVE_SRC );
+ };
+ if ($@) {
+ Warn
+"skipping file: $input_file: Unable to decode source as UTF-8\n";
+ next;
+ }
}
- # the 'source_object' supplies a method to read the input file
- my $source_object =
- Perl::Tidy::LineSource->new( $input_file, $rOpts,
+ $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
$rpending_logfile_message );
- next unless ($source_object);
+ }
- # register this file name with the Diagnostics package
- $diagnostics_object->set_input_file($input_file)
- if $diagnostics_object;
+ # register this file name with the Diagnostics package
+ $diagnostics_object->set_input_file($input_file)
+ if $diagnostics_object;
- #---------------------------------------------------------------
- # determine the output file name
- #---------------------------------------------------------------
- my $output_file = undef;
- my $actual_output_extension;
+ #---------------------------------------------------------------
+ # prepare the output stream
+ #---------------------------------------------------------------
+ my $output_file = undef;
+ my $actual_output_extension;
- if ( $rOpts->{'outfile'} ) {
+ if ( $rOpts->{'outfile'} ) {
- if ( $number_of_files <= 1 ) {
+ if ( $number_of_files <= 1 ) {
- if ( $rOpts->{'standard-output'} ) {
- die "You may not use -o and -st together\n";
- }
- elsif ($destination_stream) {
- die
+ if ( $rOpts->{'standard-output'} ) {
+ my $msg = "You may not use -o and -st together";
+ $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
+ Die "$msg\n";
+ }
+ elsif ($destination_stream) {
+ Die
"You may not specify a destination array and -o together\n";
- }
- elsif ( defined( $rOpts->{'output-path'} ) ) {
- die "You may not specify -o and -opath together\n";
- }
- elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
- die "You may not specify -o and -oext together\n";
- }
- $output_file = $rOpts->{outfile};
-
- # make sure user gives a file name after -o
- if ( $output_file =~ /^-/ ) {
- die "You must specify a valid filename after -o\n";
- }
-
- # do not overwrite input file with -o
- if ( defined($input_file_permissions)
- && ( $output_file eq $input_file ) )
- {
- die
- "Use 'perltidy -b $input_file' to modify in-place\n";
- }
}
- else {
- die "You may not use -o with more than one input file\n";
+ elsif ( defined( $rOpts->{'output-path'} ) ) {
+ Die "You may not specify -o and -opath together\n";
}
- }
- elsif ( $rOpts->{'standard-output'} ) {
- if ($destination_stream) {
- die
-"You may not specify a destination array and -st together\n";
+ elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
+ Die "You may not specify -o and -oext together\n";
}
- $output_file = '-';
+ $output_file = $rOpts->{outfile};
- if ( $number_of_files <= 1 ) {
+ # make sure user gives a file name after -o
+ if ( $output_file =~ /^-/ ) {
+ Die "You must specify a valid filename after -o\n";
}
- else {
- die "You may not use -st with more than one input file\n";
+
+ # do not overwrite input file with -o
+ if ( defined($input_file_permissions)
+ && ( $output_file eq $input_file ) )
+ {
+ Die "Use 'perltidy -b $input_file' to modify in-place\n";
}
}
- elsif ($destination_stream) {
- $output_file = $destination_stream;
+ else {
+ Die "You may not use -o with more than one input file\n";
+ }
+ }
+ elsif ( $rOpts->{'standard-output'} ) {
+ if ($destination_stream) {
+ my $msg =
+ "You may not specify a destination array and -st together\n";
+ $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
+ Die "$msg\n";
+ }
+ $output_file = '-';
+
+ if ( $number_of_files <= 1 ) {
}
- elsif ($source_stream) { # source but no destination goes to stdout
- $output_file = '-';
+ else {
+ Die "You may not use -st with more than one input file\n";
}
- elsif ( $input_file eq '-' ) {
- $output_file = '-';
+ }
+ elsif ($destination_stream) {
+ $output_file = $destination_stream;
+ }
+ elsif ($source_stream) { # source but no destination goes to stdout
+ $output_file = '-';
+ }
+ elsif ( $input_file eq '-' ) {
+ $output_file = '-';
+ }
+ else {
+ if ($in_place_modify) {
+ $output_file = IO::File->new_tmpfile()
+ or Die "cannot open temp file for -b option: $!\n";
}
else {
- if ($in_place_modify) {
- $output_file = IO::File->new_tmpfile()
- or die "cannot open temp file for -b option: $!\n";
- }
- else {
- $actual_output_extension = $output_extension;
- $output_file = $fileroot . $output_extension;
- }
+ $actual_output_extension = $output_extension;
+ $output_file = $fileroot . $output_extension;
}
+ }
- # the 'sink_object' knows how to write the output file
- my $tee_file = $fileroot . $dot . "TEE";
+ # the 'sink_object' knows how to write the output file
+ my $tee_file = $fileroot . $dot . "TEE";
- my $line_separator = $rOpts->{'output-line-ending'};
- if ( $rOpts->{'preserve-line-endings'} ) {
- $line_separator = find_input_line_ending($input_file);
- }
- $line_separator = "\n" unless defined($line_separator);
+ my $line_separator = $rOpts->{'output-line-ending'};
+ if ( $rOpts->{'preserve-line-endings'} ) {
+ $line_separator = find_input_line_ending($input_file);
+ }
+
+ # Eventually all I/O may be done with binmode, but for now it is
+ # only done when a user requests a particular line separator
+ # through the -ple or -ole flags
+ my $binmode = defined($line_separator)
+ || defined($rOpts_character_encoding);
+ $line_separator = "\n" unless defined($line_separator);
- my $sink_object =
+ my ( $sink_object, $postfilter_buffer );
+ if ($postfilter) {
+ $sink_object =
+ Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
+ $line_separator, $rOpts, $rpending_logfile_message, $binmode );
+ }
+ else {
+ $sink_object =
Perl::Tidy::LineSink->new( $output_file, $tee_file,
- $line_separator, $rOpts, $rpending_logfile_message );
+ $line_separator, $rOpts, $rpending_logfile_message, $binmode );
+ }
- #---------------------------------------------------------------
- # initialize the error logger
- #---------------------------------------------------------------
- my $warning_file = $fileroot . $dot . "ERR";
- if ($errorfile_stream) { $warning_file = $errorfile_stream }
- my $log_file = $fileroot . $dot . "LOG";
- if ($logfile_stream) { $log_file = $logfile_stream }
-
- my $logger_object =
- Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
- $saw_extrude );
- write_logfile_header(
- $rOpts, $logger_object, $config_file,
- $rraw_options, $Windows_type
- );
- if ($$rpending_logfile_message) {
- $logger_object->write_logfile_entry($$rpending_logfile_message);
+ #---------------------------------------------------------------
+ # initialize the error logger for this file
+ #---------------------------------------------------------------
+ my $warning_file = $fileroot . $dot . "ERR";
+ if ($errorfile_stream) { $warning_file = $errorfile_stream }
+ my $log_file = $fileroot . $dot . "LOG";
+ if ($logfile_stream) { $log_file = $logfile_stream }
+
+ my $logger_object =
+ Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
+ $fh_stderr, $saw_extrude );
+ write_logfile_header(
+ $rOpts, $logger_object, $config_file,
+ $rraw_options, $Windows_type, $readable_options,
+ );
+ if ($$rpending_logfile_message) {
+ $logger_object->write_logfile_entry($$rpending_logfile_message);
+ }
+ if ($$rpending_complaint) {
+ $logger_object->complain($$rpending_complaint);
+ }
+
+ #---------------------------------------------------------------
+ # initialize the debug object, if any
+ #---------------------------------------------------------------
+ my $debugger_object = undef;
+ if ( $rOpts->{DEBUG} ) {
+ $debugger_object =
+ Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
+ }
+
+ #---------------------------------------------------------------
+ # loop over iterations for one source stream
+ #---------------------------------------------------------------
+
+ # We will do a convergence test if 3 or more iterations are allowed.
+ # It would be pointless for fewer because we have to make at least
+ # two passes before we can see if we are converged, and the test
+ # would just slow things down.
+ my $max_iterations = $rOpts->{'iterations'};
+ my $convergence_log_message;
+ my %saw_md5;
+ my $do_convergence_test = $max_iterations > 2;
+ if ($do_convergence_test) {
+ eval "use Digest::MD5 qw(md5_hex)";
+ $do_convergence_test = !$@;
+
+ # Trying to avoid problems with ancient versions of perl because
+ # I don't know in which version number utf8::encode was introduced.
+ eval { my $string = "perltidy"; utf8::encode($string) };
+ $do_convergence_test = $do_convergence_test && !$@;
+ }
+
+ # save objects to allow redirecting output during iterations
+ my $sink_object_final = $sink_object;
+ my $debugger_object_final = $debugger_object;
+ my $logger_object_final = $logger_object;
+
+ for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
+
+ # send output stream to temp buffers until last iteration
+ my $sink_buffer;
+ if ( $iter < $max_iterations ) {
+ $sink_object =
+ Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file,
+ $line_separator, $rOpts, $rpending_logfile_message,
+ $binmode );
}
- if ($$rpending_complaint) {
- $logger_object->complain($$rpending_complaint);
+ else {
+ $sink_object = $sink_object_final;
}
- #---------------------------------------------------------------
- # initialize the debug object, if any
- #---------------------------------------------------------------
- my $debugger_object = undef;
- if ( $rOpts->{DEBUG} ) {
- $debugger_object =
- Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
+ # Save logger, debugger output only on pass 1 because:
+ # (1) line number references must be to the starting
+ # source, not an intermediate result, and
+ # (2) we need to know if there are errors so we can stop the
+ # iterations early if necessary.
+ if ( $iter > 1 ) {
+ $debugger_object = undef;
+ $logger_object = undef;
}
- #---------------------------------------------------------------
- # create a formatter for this file : html writer or pretty printer
- #---------------------------------------------------------------
+ #------------------------------------------------------------
+ # create a formatter for this file : html writer or
+ # pretty printer
+ #------------------------------------------------------------
# we have to delete any old formatter because, for safety,
# the formatter will check to see that there is only one.
);
}
else {
- die "I don't know how to do -format=$rOpts->{'format'}\n";
+ Die "I don't know how to do -format=$rOpts->{'format'}\n";
}
unless ($formatter) {
- die "Unable to continue with $rOpts->{'format'} formatting\n";
+ Die "Unable to continue with $rOpts->{'format'} formatting\n";
}
#---------------------------------------------------------------
#---------------------------------------------------------------
$tokenizer = undef; # must destroy old tokenizer
$tokenizer = Perl::Tidy::Tokenizer->new(
- source_object => $source_object,
- logger_object => $logger_object,
- debugger_object => $debugger_object,
- diagnostics_object => $diagnostics_object,
+ source_object => $source_object,
+ logger_object => $logger_object,
+ debugger_object => $debugger_object,
+ diagnostics_object => $diagnostics_object,
+ tabsize => $tabsize,
+
starting_level => $rOpts->{'starting-indentation-level'},
- tabs => $rOpts->{'tabs'},
indent_columns => $rOpts->{'indent-columns'},
look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
look_for_autoloader => $rOpts->{'look-for-autoloader'},
look_for_selfloader => $rOpts->{'look-for-selfloader'},
trim_qw => $rOpts->{'trim-qw'},
+ extended_syntax => $rOpts->{'extended-syntax'},
+
+ continuation_indentation =>
+ $rOpts->{'continuation-indentation'},
+ outdent_labels => $rOpts->{'outdent-labels'},
);
#---------------------------------------------------------------
#---------------------------------------------------------------
$source_object->close_input_file();
- # get file names to use for syntax check
- my $ifname = $source_object->get_input_file_copy_name();
- my $ofname = $sink_object->get_output_file_copy();
+ # line source for next iteration (if any) comes from the current
+ # temporary output buffer
+ if ( $iter < $max_iterations ) {
- #---------------------------------------------------------------
- # handle the -b option (backup and modify in-place)
- #---------------------------------------------------------------
- if ($in_place_modify) {
- unless ( -f $input_file ) {
+ $sink_object->close_output_file();
+ $source_object =
+ Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts,
+ $rpending_logfile_message );
- # oh, oh, no real file to backup ..
- # shouldn't happen because of numerous preliminary checks
- die print
-"problem with -b backing up input file '$input_file': not a file\n";
+ # stop iterations if errors or converged
+ my $stop_now = $logger_object->{_warning_count};
+ if ($stop_now) {
+ $convergence_log_message = <<EOM;
+Stopping iterations because of errors.
+EOM
}
- my $backup_name = $input_file . $backup_extension;
- if ( -f $backup_name ) {
- unlink($backup_name)
- or die
-"unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
+ elsif ($do_convergence_test) {
+
+ # Patch for [rt.cpan.org #88020]
+ # Use utf8::encode since md5_hex() only operates on bytes.
+ my $digest = md5_hex( utf8::encode($sink_buffer) );
+ if ( !$saw_md5{$digest} ) {
+ $saw_md5{$digest} = $iter;
+ }
+ else {
+
+ # Deja vu, stop iterating
+ $stop_now = 1;
+ my $iterm = $iter - 1;
+ if ( $saw_md5{$digest} != $iterm ) {
+
+ # Blinking (oscillating) between two stable
+ # end states. This has happened in the past
+ # but at present there are no known instances.
+ $convergence_log_message = <<EOM;
+Blinking. Output for iteration $iter same as for $saw_md5{$digest}.
+EOM
+ $diagnostics_object->write_diagnostics(
+ $convergence_log_message)
+ if $diagnostics_object;
+ }
+ else {
+ $convergence_log_message = <<EOM;
+Converged. Output for iteration $iter same as for iter $iterm.
+EOM
+ $diagnostics_object->write_diagnostics(
+ $convergence_log_message)
+ if $diagnostics_object && $iterm > 2;
+ }
+ }
+ } ## end if ($do_convergence_test)
+
+ if ($stop_now) {
+
+ # we are stopping the iterations early;
+ # copy the output stream to its final destination
+ $sink_object = $sink_object_final;
+ while ( my $line = $source_object->get_line() ) {
+ $sink_object->write_line($line);
+ }
+ $source_object->close_input_file();
+ last;
}
+ } ## end if ( $iter < $max_iterations)
+ } # end loop over iterations for one source file
+
+ # restore objects which have been temporarily undefined
+ # for second and higher iterations
+ $debugger_object = $debugger_object_final;
+ $logger_object = $logger_object_final;
+
+ $logger_object->write_logfile_entry($convergence_log_message)
+ if $convergence_log_message;
+
+ #---------------------------------------------------------------
+ # Perform any postfilter operation
+ #---------------------------------------------------------------
+ if ($postfilter) {
+ $sink_object->close_output_file();
+ $sink_object =
+ Perl::Tidy::LineSink->new( $output_file, $tee_file,
+ $line_separator, $rOpts, $rpending_logfile_message, $binmode );
+ my $buf = $postfilter->($postfilter_buffer);
+ $source_object =
+ Perl::Tidy::LineSource->new( \$buf, $rOpts,
+ $rpending_logfile_message );
+ while ( my $line = $source_object->get_line() ) {
+ $sink_object->write_line($line);
+ }
+ $source_object->close_input_file();
+ }
+
+ # Save names of the input and output files for syntax check
+ my $ifname = $input_file;
+ my $ofname = $output_file;
+
+ #---------------------------------------------------------------
+ # handle the -b option (backup and modify in-place)
+ #---------------------------------------------------------------
+ if ($in_place_modify) {
+ unless ( -f $input_file ) {
+
+ # oh, oh, no real file to backup ..
+ # shouldn't happen because of numerous preliminary checks
+ Die
+"problem with -b backing up input file '$input_file': not a file\n";
+ }
+ my $backup_name = $input_file . $backup_extension;
+ if ( -f $backup_name ) {
+ unlink($backup_name)
+ or Die
+"unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
+ }
+
+ # backup the input file
+ # we use copy for symlinks, move for regular files
+ if ( -l $input_file ) {
+ File::Copy::copy( $input_file, $backup_name )
+ or Die "File::Copy failed trying to backup source: $!";
+ }
+ else {
rename( $input_file, $backup_name )
- or die
+ or Die
"problem renaming $input_file to $backup_name for -b option: $!\n";
- $ifname = $backup_name;
+ }
+ $ifname = $backup_name;
+
+ # copy the output to the original input file
+ # NOTE: it would be nice to just close $output_file and use
+ # File::Copy::copy here, but in this case $output_file is the
+ # handle of an open nameless temporary file so we would lose
+ # everything if we closed it.
+ seek( $output_file, 0, 0 )
+ or Die "unable to rewind a temporary file for -b option: $!\n";
+ my $fout = IO::File->new("> $input_file")
+ or Die
+"problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
+ if ($binmode) {
+ if ( $rOpts->{'character-encoding'}
+ && $rOpts->{'character-encoding'} eq 'utf8' )
+ {
+ binmode $fout, ":encoding(UTF-8)";
+ }
+ else { binmode $fout }
+ }
+ my $line;
+ while ( $line = $output_file->getline() ) {
+ $fout->print($line);
+ }
+ $fout->close();
+ $output_file = $input_file;
+ $ofname = $input_file;
+ }
+
+ #---------------------------------------------------------------
+ # clean up and report errors
+ #---------------------------------------------------------------
+ $sink_object->close_output_file() if $sink_object;
+ $debugger_object->close_debug_file() if $debugger_object;
- seek( $output_file, 0, 0 )
- or die "unable to rewind tmp file for -b option: $!\n";
+ # set output file permissions
+ if ( $output_file && -f $output_file && !-l $output_file ) {
+ if ($input_file_permissions) {
- my $fout = IO::File->new("> $input_file")
- or die
-"problem opening $input_file for write for -b option; check directory permissions: $!\n";
- my $line;
- while ( $line = $output_file->getline() ) {
- $fout->print($line);
+ # give output script same permissions as input script, but
+ # make it user-writable or else we can't run perltidy again.
+ # Thus we retain whatever executable flags were set.
+ if ( $rOpts->{'format'} eq 'tidy' ) {
+ chmod( $input_file_permissions | 0600, $output_file );
}
- $fout->close();
- $output_file = $input_file;
- $ofname = $input_file;
+
+ # else use default permissions for html and any other format
}
+ }
- #---------------------------------------------------------------
- # clean up and report errors
- #---------------------------------------------------------------
- $sink_object->close_output_file() if $sink_object;
- $debugger_object->close_debug_file() if $debugger_object;
+ #---------------------------------------------------------------
+ # Do syntax check if requested and possible
+ #---------------------------------------------------------------
+ my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
+ if ( $logger_object
+ && $rOpts->{'check-syntax'}
+ && $ifname
+ && $ofname )
+ {
+ $infile_syntax_ok =
+ check_syntax( $ifname, $ofname, $logger_object, $rOpts );
+ }
+
+ #---------------------------------------------------------------
+ # remove the original file for in-place modify as follows:
+ # $delete_backup=0 never
+ # $delete_backup=1 only if no errors
+ # $delete_backup>1 always : NOT ALLOWED, too risky, see above
+ #---------------------------------------------------------------
+ if ( $in_place_modify
+ && $delete_backup
+ && -f $ifname
+ && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) )
+ {
- my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
- if ($output_file) {
+ # As an added safety precaution, do not delete the source file
+ # if its size has dropped from positive to zero, since this
+ # could indicate a disaster of some kind, including a hardware
+ # failure. Actually, this could happen if you had a file of
+ # all comments (or pod) and deleted everything with -dac (-dap)
+ # for some reason.
+ if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) {
+ Warn(
+"output file '$output_file' missing or zero length; original '$ifname' not deleted\n"
+ );
+ }
+ else {
+ unlink($ifname)
+ or Die
+"unable to remove previous '$ifname' for -b option; check permissions: $!\n";
+ }
+ }
- if ($input_file_permissions) {
+ $logger_object->finish( $infile_syntax_ok, $formatter )
+ if $logger_object;
+ } # end of main loop to process all files
- # give output script same permissions as input script, but
- # make it user-writable or else we can't run perltidy again.
- # Thus we retain whatever executable flags were set.
- if ( $rOpts->{'format'} eq 'tidy' ) {
- chmod( $input_file_permissions | 0600, $output_file );
- }
+ NORMAL_EXIT:
+ return 0;
- # else use default permissions for html and any other format
+ ERROR_EXIT:
+ return 1;
+} # end of main program perltidy
+sub get_stream_as_named_file {
+
+ # Return the name of a file containing a stream of data, creating
+ # a temporary file if necessary.
+ # Given:
+ # $stream - the name of a file or stream
+ # Returns:
+ # $fname = name of file if possible, or undef
+ # $if_tmpfile = true if temp file, undef if not temp file
+ #
+ # This routine is needed for passing actual files to Perl for
+ # a syntax check.
+ my ($stream) = @_;
+ my $is_tmpfile;
+ my $fname;
+ if ($stream) {
+ if ( ref($stream) ) {
+ my ( $fh_stream, $fh_name ) =
+ Perl::Tidy::streamhandle( $stream, 'r' );
+ if ($fh_stream) {
+ my ( $fout, $tmpnam ) = File::Temp::tempfile();
+ if ($fout) {
+ $fname = $tmpnam;
+ $is_tmpfile = 1;
+ binmode $fout;
+ while ( my $line = $fh_stream->getline() ) {
+ $fout->print($line);
+ }
+ $fout->close();
}
- if ( $logger_object && $rOpts->{'check-syntax'} ) {
- $infile_syntax_ok =
- check_syntax( $ifname, $ofname, $logger_object, $rOpts );
- }
+ $fh_stream->close();
}
-
- $logger_object->finish( $infile_syntax_ok, $formatter )
- if $logger_object;
- } # end of loop to process all files
- } # end of main program
+ }
+ elsif ( $stream ne '-' && -f $stream ) {
+ $fname = $stream;
+ }
+ }
+ return ( $fname, $is_tmpfile );
}
sub fileglob_to_re {
}
sub write_logfile_header {
- my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) =
- @_;
+ my (
+ $rOpts, $logger_object, $config_file,
+ $rraw_options, $Windows_type, $readable_options
+ ) = @_;
$logger_object->write_logfile_entry(
"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
);
$logger_object->write_logfile_entry(
"------------------------------------\n");
- foreach ( keys %{$rOpts} ) {
- $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" );
- }
+ $logger_object->write_logfile_entry($readable_options);
+
$logger_object->write_logfile_entry(
"------------------------------------\n");
}
"To find error messages search for 'WARNING' with your editor\n");
}
-sub process_command_line {
-
- my ( $perltidyrc_stream, $is_Windows, $Windows_type, $rpending_complaint ) =
- @_;
-
- use Getopt::Long;
+sub generate_options {
######################################################################
+ # Generate and return references to:
+ # @option_string - the list of options to be passed to Getopt::Long
+ # @defaults - the list of default options
+ # %expansion - a hash showing how all abbreviations are expanded
+ # %category - a hash giving the general category of each option
+ # %option_range - a hash giving the valid ranges of certain options
+
# Note: a few options are not documented in the man page and usage
# message. This is because these are experimental or debug options and
# may or may not be retained in future versions.
# fll --> fuzzy-line-length # a trivial parameter which gets
# turned off for the extrude option
# which is mainly for debugging
- # chk --> check-multiline-quotes # check for old bug; to be deleted
# scl --> short-concatenation-item-length # helps break at '.'
# recombine # for debugging line breaks
+ # valign # for debugging vertical alignment
# I --> DIAGNOSTICS # for debugging
######################################################################
# Define the option string passed to GetOptions.
#---------------------------------------------------------------
- my @option_string = ();
- my %expansion = ();
- my $rexpansion = \%expansion;
+ my @option_string = ();
+ my %expansion = ();
+ my %option_category = ();
+ my %option_range = ();
+ my $rexpansion = \%expansion;
+
+ # names of categories in manual
+ # leading integers will allow sorting
+ my @category_name = (
+ '0. I/O control',
+ '1. Basic formatting options',
+ '2. Code indentation control',
+ '3. Whitespace control',
+ '4. Comment controls',
+ '5. Linebreak controls',
+ '6. Controlling list formatting',
+ '7. Retaining or ignoring existing line breaks',
+ '8. Blank line control',
+ '9. Other controls',
+ '10. HTML options',
+ '11. pod2html options',
+ '12. Controlling HTML properties',
+ '13. Debugging',
+ );
# These options are parsed directly by perltidy:
# help h
no-profile
npro
recombine!
+ valign!
+ notidy
);
+ my $category = 13; # Debugging
+ foreach (@option_string) {
+ my $opt = $_; # must avoid changing the actual flag
+ $opt =~ s/!$//;
+ $option_category{$opt} = $category_name[$category];
+ }
+
+ $category = 11; # HTML
+ $option_category{html} = $category_name[$category];
+
# routine to install and check options
my $add_option = sub {
my ( $long_name, $short_name, $flag ) = @_;
push @option_string, $long_name . $flag;
+ $option_category{$long_name} = $category_name[$category];
if ($short_name) {
if ( $expansion{$short_name} ) {
my $existing_name = $expansion{$short_name}[0];
- die
+ Die
"redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
}
$expansion{$short_name} = [$long_name];
my $nolong_name = 'no' . $long_name;
if ( $expansion{$nshort_name} ) {
my $existing_name = $expansion{$nshort_name}[0];
- die
+ Die
"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
}
$expansion{$nshort_name} = [$nolong_name];
# Install long option names which have a simple abbreviation.
# Options with code '!' get standard negation ('no' for long names,
- # 'n' for abbreviations)
- $add_option->( 'DEBUG', 'D', '!' );
- $add_option->( 'DIAGNOSTICS', 'I', '!' );
- $add_option->( 'add-newlines', 'anl', '!' );
+ # 'n' for abbreviations). Categories follow the manual.
+
+ ###########################
+ $category = 0; # I/O_Control
+ ###########################
+ $add_option->( 'backup-and-modify-in-place', 'b', '!' );
+ $add_option->( 'backup-file-extension', 'bext', '=s' );
+ $add_option->( 'force-read-binary', 'f', '!' );
+ $add_option->( 'format', 'fmt', '=s' );
+ $add_option->( 'iterations', 'it', '=i' );
+ $add_option->( 'logfile', 'log', '!' );
+ $add_option->( 'logfile-gap', 'g', ':i' );
+ $add_option->( 'outfile', 'o', '=s' );
+ $add_option->( 'output-file-extension', 'oext', '=s' );
+ $add_option->( 'output-path', 'opath', '=s' );
+ $add_option->( 'profile', 'pro', '=s' );
+ $add_option->( 'quiet', 'q', '!' );
+ $add_option->( 'standard-error-output', 'se', '!' );
+ $add_option->( 'standard-output', 'st', '!' );
+ $add_option->( 'warning-output', 'w', '!' );
+ $add_option->( 'character-encoding', 'enc', '=s' );
+
+ # options which are both toggle switches and values moved here
+ # to hide from tidyview (which does not show category 0 flags):
+ # -ole moved here from category 1
+ # -sil moved here from category 2
+ $add_option->( 'output-line-ending', 'ole', '=s' );
+ $add_option->( 'starting-indentation-level', 'sil', '=i' );
+
+ ########################################
+ $category = 1; # Basic formatting options
+ ########################################
+ $add_option->( 'check-syntax', 'syn', '!' );
+ $add_option->( 'entab-leading-whitespace', 'et', '=i' );
+ $add_option->( 'indent-columns', 'i', '=i' );
+ $add_option->( 'maximum-line-length', 'l', '=i' );
+ $add_option->( 'variable-maximum-line-length', 'vmll', '!' );
+ $add_option->( 'whitespace-cycle', 'wc', '=i' );
+ $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
+ $add_option->( 'preserve-line-endings', 'ple', '!' );
+ $add_option->( 'tabs', 't', '!' );
+ $add_option->( 'default-tabsize', 'dt', '=i' );
+ $add_option->( 'extended-syntax', 'xs', '!' );
+
+ ########################################
+ $category = 2; # Code indentation control
+ ########################################
+ $add_option->( 'continuation-indentation', 'ci', '=i' );
+ $add_option->( 'line-up-parentheses', 'lp', '!' );
+ $add_option->( 'outdent-keyword-list', 'okwl', '=s' );
+ $add_option->( 'outdent-keywords', 'okw', '!' );
+ $add_option->( 'outdent-labels', 'ola', '!' );
+ $add_option->( 'outdent-long-quotes', 'olq', '!' );
+ $add_option->( 'indent-closing-brace', 'icb', '!' );
+ $add_option->( 'closing-token-indentation', 'cti', '=i' );
+ $add_option->( 'closing-paren-indentation', 'cpi', '=i' );
+ $add_option->( 'closing-brace-indentation', 'cbi', '=i' );
+ $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
+ $add_option->( 'brace-left-and-indent', 'bli', '!' );
+ $add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
+
+ ########################################
+ $category = 3; # Whitespace control
+ ########################################
$add_option->( 'add-semicolons', 'asc', '!' );
$add_option->( 'add-whitespace', 'aws', '!' );
- $add_option->( 'backup-and-modify-in-place', 'b', '!' );
- $add_option->( 'backup-file-extension', 'bext', '=s' );
- $add_option->( 'blanks-before-blocks', 'bbb', '!' );
- $add_option->( 'blanks-before-comments', 'bbc', '!' );
- $add_option->( 'blanks-before-subs', 'bbs', '!' );
$add_option->( 'block-brace-tightness', 'bbt', '=i' );
- $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
- $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
- $add_option->( 'brace-left-and-indent', 'bli', '!' );
- $add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
$add_option->( 'brace-tightness', 'bt', '=i' );
- $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
- $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
- $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
- $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
- $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
- $add_option->( 'break-at-old-trinary-breakpoints', 'bot', '!' );
- $add_option->( 'check-multiline-quotes', 'chk', '!' );
- $add_option->( 'check-syntax', 'syn', '!' );
- $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
- $add_option->( 'closing-side-comment-interval', 'csci', '=i' );
- $add_option->( 'closing-side-comment-list', 'cscl', '=s' );
- $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
- $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
- $add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
- $add_option->( 'closing-side-comments', 'csc', '!' );
- $add_option->( 'closing-token-indentation', 'cti', '=i' );
- $add_option->( 'closing-paren-indentation', 'cpi', '=i' );
- $add_option->( 'closing-brace-indentation', 'cbi', '=i' );
- $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
- $add_option->( 'continuation-indentation', 'ci', '=i' );
- $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
- $add_option->( 'cuddled-else', 'ce', '!' );
- $add_option->( 'delete-block-comments', 'dbc', '!' );
- $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
- $add_option->( 'delete-old-newlines', 'dnl', '!' );
$add_option->( 'delete-old-whitespace', 'dws', '!' );
- $add_option->( 'delete-pod', 'dp', '!' );
$add_option->( 'delete-semicolons', 'dsm', '!' );
- $add_option->( 'delete-side-comments', 'dsc', '!' );
- $add_option->( 'dump-defaults', 'ddf', '!' );
- $add_option->( 'dump-long-names', 'dln', '!' );
- $add_option->( 'dump-options', 'dop', '!' );
- $add_option->( 'dump-profile', 'dpro', '!' );
- $add_option->( 'dump-short-names', 'dsn', '!' );
- $add_option->( 'dump-token-types', 'dtt', '!' );
- $add_option->( 'dump-want-left-space', 'dwls', '!' );
- $add_option->( 'dump-want-right-space', 'dwrs', '!' );
- $add_option->( 'entab-leading-whitespace', 'et', '=i' );
- $add_option->( 'force-read-binary', 'f', '!' );
- $add_option->( 'format', 'fmt', '=s' );
- $add_option->( 'fuzzy-line-length', 'fll', '!' );
- $add_option->( 'hanging-side-comments', 'hsc', '!' );
- $add_option->( 'help', 'h', '' );
- $add_option->( 'ignore-old-line-breaks', 'iob', '!' );
- $add_option->( 'indent-block-comments', 'ibc', '!' );
- $add_option->( 'indent-closing-brace', 'icb', '!' );
- $add_option->( 'indent-columns', 'i', '=i' );
- $add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
- $add_option->( 'line-up-parentheses', 'lp', '!' );
- $add_option->( 'logfile', 'log', '!' );
- $add_option->( 'logfile-gap', 'g', ':i' );
- $add_option->( 'long-block-line-count', 'lbl', '=i' );
- $add_option->( 'look-for-autoloader', 'lal', '!' );
- $add_option->( 'look-for-hash-bang', 'x', '!' );
- $add_option->( 'look-for-selfloader', 'lsl', '!' );
- $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
- $add_option->( 'maximum-fields-per-table', 'mft', '=i' );
- $add_option->( 'maximum-line-length', 'l', '=i' );
- $add_option->( 'minimum-space-to-comment', 'msc', '=i' );
+ $add_option->( 'nospace-after-keyword', 'nsak', '=s' );
$add_option->( 'nowant-left-space', 'nwls', '=s' );
$add_option->( 'nowant-right-space', 'nwrs', '=s' );
- $add_option->( 'nospace-after-keyword', 'nsak', '=s' );
- $add_option->( 'opening-brace-always-on-right', 'bar', '' );
- $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
- $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
- $add_option->( 'outdent-keyword-list', 'okwl', '=s' );
- $add_option->( 'outdent-keywords', 'okw', '!' );
- $add_option->( 'outdent-labels', 'ola', '!' );
- $add_option->( 'outdent-long-comments', 'olc', '!' );
- $add_option->( 'outdent-long-quotes', 'olq', '!' );
- $add_option->( 'outdent-static-block-comments', 'osbc', '!' );
- $add_option->( 'outfile', 'o', '=s' );
- $add_option->( 'output-file-extension', 'oext', '=s' );
- $add_option->( 'output-line-ending', 'ole', '=s' );
- $add_option->( 'output-path', 'opath', '=s' );
$add_option->( 'paren-tightness', 'pt', '=i' );
- $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
- $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
- $add_option->( 'pass-version-line', 'pvl', '!' );
- $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
- $add_option->( 'preserve-line-endings', 'ple', '!' );
- $add_option->( 'profile', 'pro', '=s' );
- $add_option->( 'quiet', 'q', '!' );
- $add_option->( 'short-concatenation-item-length', 'scl', '=i' );
- $add_option->( 'show-options', 'opt', '!' );
$add_option->( 'space-after-keyword', 'sak', '=s' );
$add_option->( 'space-for-semicolon', 'sfs', '!' );
+ $add_option->( 'space-function-paren', 'sfp', '!' );
+ $add_option->( 'space-keyword-paren', 'skp', '!' );
$add_option->( 'space-terminal-semicolon', 'sts', '!' );
$add_option->( 'square-bracket-tightness', 'sbt', '=i' );
$add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
$add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
- $add_option->( 'standard-error-output', 'se', '!' );
- $add_option->( 'standard-output', 'st', '!' );
- $add_option->( 'starting-indentation-level', 'sil', '=i' );
- $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
- $add_option->( 'static-block-comments', 'sbc', '!' );
- $add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
- $add_option->( 'static-side-comments', 'ssc', '!' );
- $add_option->( 'swallow-optional-blank-lines', 'sob', '!' );
- $add_option->( 'tabs', 't', '!' );
- $add_option->( 'tee-block-comments', 'tbc', '!' );
- $add_option->( 'tee-pod', 'tp', '!' );
- $add_option->( 'tee-side-comments', 'tsc', '!' );
+ $add_option->( 'tight-secret-operators', 'tso', '!' );
$add_option->( 'trim-qw', 'tqw', '!' );
- $add_option->( 'version', 'v', '' );
- $add_option->( 'vertical-tightness', 'vt', '=i' );
- $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
- $add_option->( 'want-break-after', 'wba', '=s' );
- $add_option->( 'want-break-before', 'wbb', '=s' );
+ $add_option->( 'trim-pod', 'trp', '!' );
$add_option->( 'want-left-space', 'wls', '=s' );
$add_option->( 'want-right-space', 'wrs', '=s' );
- $add_option->( 'warning-output', 'w', '!' );
+
+ ########################################
+ $category = 4; # Comment controls
+ ########################################
+ $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
+ $add_option->( 'closing-side-comment-interval', 'csci', '=i' );
+ $add_option->( 'closing-side-comment-list', 'cscl', '=s' );
+ $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
+ $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
+ $add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
+ $add_option->( 'closing-side-comments', 'csc', '!' );
+ $add_option->( 'closing-side-comments-balanced', 'cscb', '!' );
+ $add_option->( 'format-skipping', 'fs', '!' );
+ $add_option->( 'format-skipping-begin', 'fsb', '=s' );
+ $add_option->( 'format-skipping-end', 'fse', '=s' );
+ $add_option->( 'hanging-side-comments', 'hsc', '!' );
+ $add_option->( 'indent-block-comments', 'ibc', '!' );
+ $add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
+ $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
+ $add_option->( 'minimum-space-to-comment', 'msc', '=i' );
+ $add_option->( 'outdent-long-comments', 'olc', '!' );
+ $add_option->( 'outdent-static-block-comments', 'osbc', '!' );
+ $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
+ $add_option->( 'static-block-comments', 'sbc', '!' );
+ $add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
+ $add_option->( 'static-side-comments', 'ssc', '!' );
+ $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' );
+
+ ########################################
+ $category = 5; # Linebreak controls
+ ########################################
+ $add_option->( 'add-newlines', 'anl', '!' );
+ $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
+ $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
+ $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
+ $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
+ $add_option->( 'cuddled-else', 'ce', '!' );
+ $add_option->( 'delete-old-newlines', 'dnl', '!' );
+ $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
+ $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
+ $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
+ $add_option->( 'opening-paren-right', 'opr', '!' );
+ $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
+ $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' );
+ $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
+ $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
+ $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
+ $add_option->( 'stack-closing-block-brace', 'scbb', '!' );
+ $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
+ $add_option->( 'stack-closing-paren', 'scp', '!' );
+ $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
+ $add_option->( 'stack-opening-block-brace', 'sobb', '!' );
+ $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
+ $add_option->( 'stack-opening-paren', 'sop', '!' );
+ $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
+ $add_option->( 'vertical-tightness', 'vt', '=i' );
+ $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
+ $add_option->( 'want-break-after', 'wba', '=s' );
+ $add_option->( 'want-break-before', 'wbb', '=s' );
+ $add_option->( 'break-after-all-operators', 'baao', '!' );
+ $add_option->( 'break-before-all-operators', 'bbao', '!' );
+ $add_option->( 'keep-interior-semicolons', 'kis', '!' );
+
+ ########################################
+ $category = 6; # Controlling list formatting
+ ########################################
+ $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
+ $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
+ $add_option->( 'maximum-fields-per-table', 'mft', '=i' );
+
+ ########################################
+ $category = 7; # Retaining or ignoring existing line breaks
+ ########################################
+ $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
+ $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
+ $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
+ $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
+ $add_option->( 'ignore-old-breakpoints', 'iob', '!' );
+
+ ########################################
+ $category = 8; # Blank line control
+ ########################################
+ $add_option->( 'blanks-before-blocks', 'bbb', '!' );
+ $add_option->( 'blanks-before-comments', 'bbc', '!' );
+ $add_option->( 'blank-lines-before-subs', 'blbs', '=i' );
+ $add_option->( 'blank-lines-before-packages', 'blbp', '=i' );
+ $add_option->( 'long-block-line-count', 'lbl', '=i' );
+ $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
+ $add_option->( 'keep-old-blank-lines', 'kbl', '=i' );
+
+ $add_option->( 'blank-lines-after-opening-block', 'blao', '=i' );
+ $add_option->( 'blank-lines-before-closing-block', 'blbc', '=i' );
+ $add_option->( 'blank-lines-after-opening-block-list', 'blaol', '=s' );
+ $add_option->( 'blank-lines-before-closing-block-list', 'blbcl', '=s' );
+
+ ########################################
+ $category = 9; # Other controls
+ ########################################
+ $add_option->( 'delete-block-comments', 'dbc', '!' );
+ $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
+ $add_option->( 'delete-pod', 'dp', '!' );
+ $add_option->( 'delete-side-comments', 'dsc', '!' );
+ $add_option->( 'tee-block-comments', 'tbc', '!' );
+ $add_option->( 'tee-pod', 'tp', '!' );
+ $add_option->( 'tee-side-comments', 'tsc', '!' );
+ $add_option->( 'look-for-autoloader', 'lal', '!' );
+ $add_option->( 'look-for-hash-bang', 'x', '!' );
+ $add_option->( 'look-for-selfloader', 'lsl', '!' );
+ $add_option->( 'pass-version-line', 'pvl', '!' );
+
+ ########################################
+ $category = 13; # Debugging
+ ########################################
+ $add_option->( 'DEBUG', 'D', '!' );
+ $add_option->( 'DIAGNOSTICS', 'I', '!' );
+ $add_option->( 'dump-defaults', 'ddf', '!' );
+ $add_option->( 'dump-long-names', 'dln', '!' );
+ $add_option->( 'dump-options', 'dop', '!' );
+ $add_option->( 'dump-profile', 'dpro', '!' );
+ $add_option->( 'dump-short-names', 'dsn', '!' );
+ $add_option->( 'dump-token-types', 'dtt', '!' );
+ $add_option->( 'dump-want-left-space', 'dwls', '!' );
+ $add_option->( 'dump-want-right-space', 'dwrs', '!' );
+ $add_option->( 'fuzzy-line-length', 'fll', '!' );
+ $add_option->( 'help', 'h', '' );
+ $add_option->( 'short-concatenation-item-length', 'scl', '=i' );
+ $add_option->( 'show-options', 'opt', '!' );
+ $add_option->( 'version', 'v', '' );
+ $add_option->( 'memoize', 'mem', '!' );
+
+ #---------------------------------------------------------------------
# The Perl::Tidy::HtmlWriter will add its own options to the string
Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
+ ########################################
+ # Set categories 10, 11, 12
+ ########################################
+ # Based on their known order
+ $category = 12; # HTML properties
+ foreach my $opt (@option_string) {
+ my $long_name = $opt;
+ $long_name =~ s/(!|=.*|:.*)$//;
+ unless ( defined( $option_category{$long_name} ) ) {
+ if ( $long_name =~ /^html-linked/ ) {
+ $category = 10; # HTML options
+ }
+ elsif ( $long_name =~ /^pod2html/ ) {
+ $category = 11; # Pod2html
+ }
+ $option_category{$long_name} = $category_name[$category];
+ }
+ }
+
+ #---------------------------------------------------------------
+ # Assign valid ranges to certain options
+ #---------------------------------------------------------------
+ # In the future, these may be used to make preliminary checks
+ # hash keys are long names
+ # If key or value is undefined:
+ # strings may have any value
+ # integer ranges are >=0
+ # If value is defined:
+ # value is [qw(any valid words)] for strings
+ # value is [min, max] for integers
+ # if min is undefined, there is no lower limit
+ # if max is undefined, there is no upper limit
+ # Parameters not listed here have defaults
+ %option_range = (
+ 'format' => [ 'tidy', 'html', 'user' ],
+ 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
+ 'character-encoding' => [ 'none', 'utf8' ],
+
+ 'block-brace-tightness' => [ 0, 2 ],
+ 'brace-tightness' => [ 0, 2 ],
+ 'paren-tightness' => [ 0, 2 ],
+ 'square-bracket-tightness' => [ 0, 2 ],
+
+ 'block-brace-vertical-tightness' => [ 0, 2 ],
+ 'brace-vertical-tightness' => [ 0, 2 ],
+ 'brace-vertical-tightness-closing' => [ 0, 2 ],
+ 'paren-vertical-tightness' => [ 0, 2 ],
+ 'paren-vertical-tightness-closing' => [ 0, 2 ],
+ 'square-bracket-vertical-tightness' => [ 0, 2 ],
+ 'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
+ 'vertical-tightness' => [ 0, 2 ],
+ 'vertical-tightness-closing' => [ 0, 2 ],
+
+ 'closing-brace-indentation' => [ 0, 3 ],
+ 'closing-paren-indentation' => [ 0, 3 ],
+ 'closing-square-bracket-indentation' => [ 0, 3 ],
+ 'closing-token-indentation' => [ 0, 3 ],
+
+ 'closing-side-comment-else-flag' => [ 0, 2 ],
+ 'comma-arrow-breakpoints' => [ 0, 5 ],
+ );
+
+ # Note: we could actually allow negative ci if someone really wants it:
+ # $option_range{'continuation-indentation'} = [ undef, undef ];
+
#---------------------------------------------------------------
# Assign default values to the above options here, except
# for 'outfile' and 'help'.
add-whitespace
blanks-before-blocks
blanks-before-comments
- blanks-before-subs
+ blank-lines-before-subs=1
+ blank-lines-before-packages=1
block-brace-tightness=0
block-brace-vertical-tightness=0
brace-tightness=1
brace-vertical-tightness-closing=0
brace-vertical-tightness=0
break-at-old-logical-breakpoints
- break-at-old-trinary-breakpoints
+ break-at-old-ternary-breakpoints
+ break-at-old-attribute-breakpoints
break-at-old-keyword-breakpoints
- comma-arrow-breakpoints=1
+ comma-arrow-breakpoints=5
nocheck-syntax
closing-side-comment-interval=6
closing-side-comment-maximum-text=20
closing-side-comment-else-flag=0
+ closing-side-comments-balanced
closing-paren-indentation=0
closing-brace-indentation=0
closing-square-bracket-indentation=0
continuation-indentation=2
delete-old-newlines
delete-semicolons
+ extended-syntax
fuzzy-line-length
hanging-side-comments
indent-block-comments
indent-columns=4
+ iterations=1
+ keep-old-blank-lines=1
long-block-line-count=8
look-for-autoloader
look-for-selfloader
maximum-consecutive-blank-lines=1
maximum-fields-per-table=0
maximum-line-length=80
+ memoize
minimum-space-to-comment=4
nobrace-left-and-indent
nocuddled-else
noquiet
noshow-options
nostatic-side-comments
- noswallow-optional-blank-lines
notabs
nowarning-output
+ character-encoding=none
outdent-labels
outdent-long-quotes
outdent-long-comments
paren-vertical-tightness=0
pass-version-line
recombine
+ valign
short-concatenation-item-length=8
space-for-semicolon
square-bracket-tightness=1
trim-qw
format=tidy
backup-file-extension=bak
+ format-skipping
+ default-tabsize=8
pod2html
html-table-of-contents
push @defaults, "perl-syntax-check-flags=-c -T";
- #---------------------------------------------------------------
- # set the defaults by passing the above list through GetOptions
- #---------------------------------------------------------------
- my %Opts = ();
- {
- local @ARGV;
- my $i;
-
- for $i (@defaults) { push @ARGV, "--" . $i }
-
- if ( !GetOptions( \%Opts, @option_string ) ) {
- die "Programming Bug: error in setting default options";
- }
- }
-
#---------------------------------------------------------------
# Define abbreviations which will be expanded into the above primitives.
# These may be defined recursively.
#---------------------------------------------------------------
%expansion = (
%expansion,
- 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
- 'fnl' => [qw(freeze-newlines)],
- 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
- 'fws' => [qw(freeze-whitespace)],
+ 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
+ 'fnl' => [qw(freeze-newlines)],
+ 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
+ 'fws' => [qw(freeze-whitespace)],
+ 'freeze-blank-lines' =>
+ [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
+ 'fbl' => [qw(freeze-blank-lines)],
'indent-only' => [qw(freeze-newlines freeze-whitespace)],
'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
'nooutdent-long-lines' =>
[qw(nooutdent-long-quotes nooutdent-long-comments)],
- 'noll' => [qw(nooutdent-long-lines)],
- 'io' => [qw(indent-only)],
+ 'noll' => [qw(nooutdent-long-lines)],
+ 'io' => [qw(indent-only)],
'delete-all-comments' =>
[qw(delete-block-comments delete-side-comments delete-pod)],
'nodelete-all-comments' =>
[qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
- 'dac' => [qw(delete-all-comments)],
- 'ndac' => [qw(nodelete-all-comments)],
- 'gnu' => [qw(gnu-style)],
+ 'dac' => [qw(delete-all-comments)],
+ 'ndac' => [qw(nodelete-all-comments)],
+ 'gnu' => [qw(gnu-style)],
+ 'pbp' => [qw(perl-best-practices)],
'tee-all-comments' =>
[qw(tee-block-comments tee-side-comments tee-pod)],
'notee-all-comments' =>
'nhtml' => [qw(format=tidy)],
'tidy' => [qw(format=tidy)],
+ 'utf8' => [qw(character-encoding=utf8)],
+ 'UTF8' => [qw(character-encoding=utf8)],
+
+ 'swallow-optional-blank-lines' => [qw(kbl=0)],
+ 'noswallow-optional-blank-lines' => [qw(kbl=1)],
+ 'sob' => [qw(kbl=0)],
+ 'nsob' => [qw(kbl=1)],
+
'break-after-comma-arrows' => [qw(cab=0)],
'nobreak-after-comma-arrows' => [qw(cab=1)],
'baa' => [qw(cab=0)],
'nbaa' => [qw(cab=1)],
+ 'blanks-before-subs' => [qw(blbs=1 blbp=1)],
+ 'bbs' => [qw(blbs=1 blbp=1)],
+ 'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
+ 'nbbs' => [qw(blbs=0 blbp=0)],
+
+ 'break-at-old-trinary-breakpoints' => [qw(bot)],
+
'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
+ 'otr' => [qw(opr ohbr osbr)],
+ 'opening-token-right' => [qw(opr ohbr osbr)],
+ 'notr' => [qw(nopr nohbr nosbr)],
+ 'noopening-token-right' => [qw(nopr nohbr nosbr)],
+
+ 'sot' => [qw(sop sohb sosb)],
+ 'nsot' => [qw(nsop nsohb nsosb)],
+ 'stack-opening-tokens' => [qw(sop sohb sosb)],
+ 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
+
+ 'sct' => [qw(scp schb scsb)],
+ 'stack-closing-tokens' => => [qw(scp schb scsb)],
+ 'nsct' => [qw(nscp nschb nscsb)],
+ 'nostack-closing-tokens' => [qw(nscp nschb nscsb)],
+
+ 'sac' => [qw(sot sct)],
+ 'nsac' => [qw(nsot nsct)],
+ 'stack-all-containers' => [qw(sot sct)],
+ 'nostack-all-containers' => [qw(nsot nsct)],
+
+ 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
+ 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
+ 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
+ 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
+ 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
+ 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
+
+ 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)],
+ 'sobb' => [qw(bbvt=2 bbvtl=*)],
+ 'nostack-opening-block-brace' => [qw(bbvt=0)],
+ 'nsobb' => [qw(bbvt=0)],
+
+ 'converge' => [qw(it=4)],
+ 'noconverge' => [qw(it=1)],
+ 'conv' => [qw(it=4)],
+ 'nconv' => [qw(it=1)],
+
# 'mangle' originally deleted pod and comments, but to keep it
# reversible, it no longer does. But if you really want to
# delete them, just use:
'mangle' => [
qw(
check-syntax
+ keep-old-blank-lines=0
delete-old-newlines
delete-old-whitespace
delete-semicolons
noadd-semicolons
noadd-whitespace
noblanks-before-blocks
- noblanks-before-subs
+ blank-lines-before-subs=0
+ blank-lines-before-packages=0
notabs
)
],
noadd-semicolons
noadd-whitespace
noblanks-before-blocks
- noblanks-before-subs
+ blank-lines-before-subs=0
+ blank-lines-before-packages=0
nofuzzy-line-length
notabs
+ norecombine
)
],
)
],
+ # Style suggested in Damian Conway's Perl Best Practices
+ 'perl-best-practices' => [
+ qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
+q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
+ ],
+
# Additional styles can be added here
);
# Uncomment next line to dump all expansions for debugging:
# dump_short_names(\%expansion);
+ return (
+ \@option_string, \@defaults, \%expansion,
+ \%option_category, \%option_range
+ );
+
+} # end of generate_options
+
+# Memoize process_command_line. Given same @ARGV passed in, return same
+# values and same @ARGV back.
+# This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds
+# up masontidy (https://metacpan.org/module/masontidy)
+
+my %process_command_line_cache;
+
+sub process_command_line {
+
+ my (
+ $perltidyrc_stream, $is_Windows, $Windows_type,
+ $rpending_complaint, $dump_options_type
+ ) = @_;
+
+ my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type;
+ if ($use_cache) {
+ my $cache_key = join( chr(28), @ARGV );
+ if ( my $result = $process_command_line_cache{$cache_key} ) {
+ my ( $argv, @retvals ) = @$result;
+ @ARGV = @$argv;
+ return @retvals;
+ }
+ else {
+ my @retvals = _process_command_line(@_);
+ $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ]
+ if $retvals[0]->{'memoize'};
+ return @retvals;
+ }
+ }
+ else {
+ return _process_command_line(@_);
+ }
+}
+
+# (note the underscore here)
+sub _process_command_line {
+
+ my (
+ $perltidyrc_stream, $is_Windows, $Windows_type,
+ $rpending_complaint, $dump_options_type
+ ) = @_;
+
+ use Getopt::Long;
+
+ # Save any current Getopt::Long configuration
+ # and set to Getopt::Long defaults. Use eval to avoid
+ # breaking old versions of Perl without these routines.
+ # Previous configuration is reset at the exit of this routine.
+ my $glc;
+ eval { $glc = Getopt::Long::Configure() };
+ unless ($@) {
+ eval { Getopt::Long::ConfigDefaults() };
+ }
+ else { $glc = undef }
+
+ my (
+ $roption_string, $rdefaults, $rexpansion,
+ $roption_category, $roption_range
+ ) = generate_options();
+
+ #---------------------------------------------------------------
+ # set the defaults by passing the above list through GetOptions
+ #---------------------------------------------------------------
+ my %Opts = ();
+ {
+ local @ARGV;
+ my $i;
+
+ # do not load the defaults if we are just dumping perltidyrc
+ unless ( $dump_options_type eq 'perltidyrc' ) {
+ for $i (@$rdefaults) { push @ARGV, "--" . $i }
+ }
+ if ( !GetOptions( \%Opts, @$roption_string ) ) {
+ Die "Programming Bug: error in setting default options";
+ }
+ }
my $word;
my @raw_options = ();
my $config_file = "";
my $saw_ignore_profile = 0;
- my $saw_extrude = 0;
my $saw_dump_profile = 0;
my $i;
}
elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
if ($config_file) {
- warn
+ Warn
"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
}
$config_file = $2;
+
+ # resolve <dir>/.../<file>, meaning look upwards from directory
+ if ( defined($config_file) ) {
+ if ( my ( $start_dir, $search_file ) =
+ ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
+ {
+ $start_dir = '.' if !$start_dir;
+ $start_dir = Cwd::realpath($start_dir);
+ if ( my $found_file =
+ find_file_upwards( $start_dir, $search_file ) )
+ {
+ $config_file = $found_file;
+ }
+ }
+ }
unless ( -e $config_file ) {
- warn "cannot find file given with -pro=$config_file: $!\n";
+ Warn "cannot find file given with -pro=$config_file: $!\n";
$config_file = "";
}
}
elsif ( $i =~ /^-(pro|profile)=?$/ ) {
- die "usage: -pro=filename or --profile=filename, no spaces\n";
- }
- elsif ( $i =~ /^-extrude$/ ) {
- $saw_extrude = 1;
+ Die "usage: -pro=filename or --profile=filename, no spaces\n";
}
- elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
+ elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
usage();
- exit 1;
+ Exit 0;
}
elsif ( $i =~ /^-(version|v)$/ ) {
show_version();
- exit 1;
+ Exit 0;
}
elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
- dump_defaults(@defaults);
- exit 1;
+ dump_defaults(@$rdefaults);
+ Exit 0;
}
elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
- dump_long_names(@option_string);
- exit 1;
+ dump_long_names(@$roption_string);
+ Exit 0;
}
elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
- dump_short_names( \%expansion );
- exit 1;
+ dump_short_names($rexpansion);
+ Exit 0;
}
elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
- exit 1;
+ Exit 0;
}
}
if ( $saw_dump_profile && $saw_ignore_profile ) {
- warn "No profile to dump because of -npro\n";
- exit 1;
+ Warn "No profile to dump because of -npro\n";
+ Exit 1;
}
#---------------------------------------------------------------
# line.
if ($perltidyrc_stream) {
if ($config_file) {
- warn <<EOM;
+ Warn <<EOM;
Conflict: a perltidyrc configuration file was specified both as this
perltidy call parameter: $perltidyrc_stream
and with this -profile=$config_file.
# look for a config file if we don't have one yet
my $rconfig_file_chatter;
$$rconfig_file_chatter = "";
- $config_file =
+ $config_file =
find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
$rpending_complaint )
unless $config_file;
}
if ($saw_dump_profile) {
- if ($saw_dump_profile) {
- dump_config_file( $fh_config, $config_file,
- $rconfig_file_chatter );
- exit 1;
- }
+ dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
+ Exit 0;
}
if ($fh_config) {
- my $rconfig_list =
- read_config_file( $fh_config, $config_file, \%expansion );
+ my ( $rconfig_list, $death_message ) =
+ read_config_file( $fh_config, $config_file, $rexpansion );
+ Die $death_message if ($death_message);
# process any .perltidyrc parameters right now so we can
# localize errors
if (@$rconfig_list) {
local @ARGV = @$rconfig_list;
- expand_command_abbreviations( \%expansion, \@raw_options,
+ expand_command_abbreviations( $rexpansion, \@raw_options,
$config_file );
- if ( !GetOptions( \%Opts, @option_string ) ) {
- die
+ if ( !GetOptions( \%Opts, @$roption_string ) ) {
+ Die
"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
}
+ # Anything left in this local @ARGV is an error and must be
+ # invalid bare words from the configuration file. We cannot
+ # check this earlier because bare words may have been valid
+ # values for parameters. We had to wait for GetOptions to have
+ # a look at @ARGV.
+ if (@ARGV) {
+ my $count = @ARGV;
+ my $str = "\'" . pop(@ARGV) . "\'";
+ while ( my $param = pop(@ARGV) ) {
+ if ( length($str) < 70 ) {
+ $str .= ", '$param'";
+ }
+ else {
+ $str .= ", ...";
+ last;
+ }
+ }
+ Die <<EOM;
+There are $count unrecognized values in the configuration file '$config_file':
+$str
+Use leading dashes for parameters. Use -npro to ignore this file.
+EOM
+ }
+
# Undo any options which cause premature exit. They are not
# appropriate for a config file, and it could be hard to
# diagnose the cause of the premature exit.
}
)
{
+
if ( defined( $Opts{$_} ) ) {
delete $Opts{$_};
- warn "ignoring --$_ in config file: $config_file\n";
+ Warn "ignoring --$_ in config file: $config_file\n";
}
}
}
#---------------------------------------------------------------
# now process the command line parameters
#---------------------------------------------------------------
- expand_command_abbreviations( \%expansion, \@raw_options, $config_file );
+ expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
- if ( !GetOptions( \%Opts, @option_string ) ) {
- die "Error on command line; for help try 'perltidy -h'\n";
+ local $SIG{'__WARN__'} = sub { Warn $_[0] };
+ if ( !GetOptions( \%Opts, @$roption_string ) ) {
+ Die "Error on command line; for help try 'perltidy -h'\n";
}
- if ( $Opts{'dump-options'} ) {
- dump_options( \%Opts );
- exit 1;
- }
+ # reset Getopt::Long configuration back to its previous value
+ eval { Getopt::Long::Configure($glc) } if defined $glc;
+
+ return ( \%Opts, $config_file, \@raw_options, $roption_string,
+ $rexpansion, $roption_category, $roption_range );
+} # end of _process_command_line
+
+sub check_options {
+
+ my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
#---------------------------------------------------------------
- # Now we have to handle any interactions among the options..
+ # check and handle any interactions among the basic options..
#---------------------------------------------------------------
# Since -vt, -vtc, and -cti are abbreviations, but under
# won't be seen. Therefore, we will catch them here if
# they get through.
- if ( defined $Opts{'vertical-tightness'} ) {
- my $vt = $Opts{'vertical-tightness'};
- $Opts{'paren-vertical-tightness'} = $vt;
- $Opts{'square-bracket-vertical-tightness'} = $vt;
- $Opts{'brace-vertical-tightness'} = $vt;
+ if ( defined $rOpts->{'vertical-tightness'} ) {
+ my $vt = $rOpts->{'vertical-tightness'};
+ $rOpts->{'paren-vertical-tightness'} = $vt;
+ $rOpts->{'square-bracket-vertical-tightness'} = $vt;
+ $rOpts->{'brace-vertical-tightness'} = $vt;
}
- if ( defined $Opts{'vertical-tightness-closing'} ) {
- my $vtc = $Opts{'vertical-tightness-closing'};
- $Opts{'paren-vertical-tightness-closing'} = $vtc;
- $Opts{'square-bracket-vertical-tightness-closing'} = $vtc;
- $Opts{'brace-vertical-tightness-closing'} = $vtc;
+ if ( defined $rOpts->{'vertical-tightness-closing'} ) {
+ my $vtc = $rOpts->{'vertical-tightness-closing'};
+ $rOpts->{'paren-vertical-tightness-closing'} = $vtc;
+ $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
+ $rOpts->{'brace-vertical-tightness-closing'} = $vtc;
}
- if ( defined $Opts{'closing-token-indentation'} ) {
- my $cti = $Opts{'closing-token-indentation'};
- $Opts{'closing-square-bracket-indentation'} = $cti;
- $Opts{'closing-brace-indentation'} = $cti;
- $Opts{'closing-paren-indentation'} = $cti;
+ if ( defined $rOpts->{'closing-token-indentation'} ) {
+ my $cti = $rOpts->{'closing-token-indentation'};
+ $rOpts->{'closing-square-bracket-indentation'} = $cti;
+ $rOpts->{'closing-brace-indentation'} = $cti;
+ $rOpts->{'closing-paren-indentation'} = $cti;
}
# In quiet mode, there is no log file and hence no way to report
# results of syntax check, so don't do it.
- if ( $Opts{'quiet'} ) {
- $Opts{'check-syntax'} = 0;
+ if ( $rOpts->{'quiet'} ) {
+ $rOpts->{'check-syntax'} = 0;
}
# can't check syntax if no output
- if ( $Opts{'format'} ne 'tidy' ) {
- $Opts{'check-syntax'} = 0;
+ if ( $rOpts->{'format'} ne 'tidy' ) {
+ $rOpts->{'check-syntax'} = 0;
}
# Never let Windows 9x/Me systems run syntax check -- this will prevent a
# wide variety of nasty problems on these systems, because they cannot
# reliably run backticks. Don't even think about changing this!
- if ( $Opts{'check-syntax'}
+ if ( $rOpts->{'check-syntax'}
&& $is_Windows
&& ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
{
- $Opts{'check-syntax'} = 0;
+ $rOpts->{'check-syntax'} = 0;
}
# It's really a bad idea to check syntax as root unless you wrote
# the script yourself. FIXME: not sure if this works with VMS
unless ($is_Windows) {
- if ( $< == 0 && $Opts{'check-syntax'} ) {
- $Opts{'check-syntax'} = 0;
+ if ( $< == 0 && $rOpts->{'check-syntax'} ) {
+ $rOpts->{'check-syntax'} = 0;
$$rpending_complaint .=
"Syntax check deactivated for safety; you shouldn't run this as root\n";
}
}
- # see if user set a non-negative logfile-gap
- if ( defined( $Opts{'logfile-gap'} ) && $Opts{'logfile-gap'} >= 0 ) {
+ # check iteration count and quietly fix if necessary:
+ # - iterations option only applies to code beautification mode
+ # - the convergence check should stop most runs on iteration 2, and
+ # virtually all on iteration 3. But we'll allow up to 6.
+ if ( $rOpts->{'format'} ne 'tidy' ) {
+ $rOpts->{'iterations'} = 1;
+ }
+ elsif ( defined( $rOpts->{'iterations'} ) ) {
+ if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
+ elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 }
+ }
+ else {
+ $rOpts->{'iterations'} = 1;
+ }
- # a zero gap will be taken as a 1
- if ( $Opts{'logfile-gap'} == 0 ) {
- $Opts{'logfile-gap'} = 1;
+ my $check_blank_count = sub {
+ my ( $key, $abbrev ) = @_;
+ if ( $rOpts->{$key} ) {
+ if ( $rOpts->{$key} < 0 ) {
+ $rOpts->{$key} = 0;
+ Warn "negative value of $abbrev, setting 0\n";
+ }
+ if ( $rOpts->{$key} > 100 ) {
+ Warn "unreasonably large value of $abbrev, reducing\n";
+ $rOpts->{$key} = 100;
+ }
}
+ };
- # setting a non-negative logfile gap causes logfile to be saved
- $Opts{'logfile'} = 1;
- }
+ # check for reasonable number of blank lines and fix to avoid problems
+ $check_blank_count->( 'blank-lines-before-subs', '-blbs' );
+ $check_blank_count->( 'blank-lines-before-packages', '-blbp' );
+ $check_blank_count->( 'blank-lines-after-block-opening', '-blao' );
+ $check_blank_count->( 'blank-lines-before-block-closing', '-blbc' );
- # not setting logfile gap, or setting it negative, causes default of 50
- else {
- $Opts{'logfile-gap'} = 50;
+ # setting a non-negative logfile gap causes logfile to be saved
+ if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
+ $rOpts->{'logfile'} = 1;
}
# set short-cut flag when only indentation is to be done.
# Note that the user may or may not have already set the
# indent-only flag.
- if ( !$Opts{'add-whitespace'}
- && !$Opts{'delete-old-whitespace'}
- && !$Opts{'add-newlines'}
- && !$Opts{'delete-old-newlines'} )
+ if ( !$rOpts->{'add-whitespace'}
+ && !$rOpts->{'delete-old-whitespace'}
+ && !$rOpts->{'add-newlines'}
+ && !$rOpts->{'delete-old-newlines'} )
{
- $Opts{'indent-only'} = 1;
+ $rOpts->{'indent-only'} = 1;
}
# -isbc implies -ibc
- if ( $Opts{'indent-spaced-block-comments'} ) {
- $Opts{'indent-block-comments'} = 1;
+ if ( $rOpts->{'indent-spaced-block-comments'} ) {
+ $rOpts->{'indent-block-comments'} = 1;
}
# -bli flag implies -bl
- if ( $Opts{'brace-left-and-indent'} ) {
- $Opts{'opening-brace-on-new-line'} = 1;
+ if ( $rOpts->{'brace-left-and-indent'} ) {
+ $rOpts->{'opening-brace-on-new-line'} = 1;
}
- if ( $Opts{'opening-brace-always-on-right'}
- && $Opts{'opening-brace-on-new-line'} )
+ if ( $rOpts->{'opening-brace-always-on-right'}
+ && $rOpts->{'opening-brace-on-new-line'} )
{
- warn <<EOM;
+ Warn <<EOM;
Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
'opening-brace-on-new-line' (-bl). Ignoring -bl.
EOM
- $Opts{'opening-brace-on-new-line'} = 0;
+ $rOpts->{'opening-brace-on-new-line'} = 0;
}
# it simplifies things if -bl is 0 rather than undefined
- if ( !defined( $Opts{'opening-brace-on-new-line'} ) ) {
- $Opts{'opening-brace-on-new-line'} = 0;
+ if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
+ $rOpts->{'opening-brace-on-new-line'} = 0;
}
# -sbl defaults to -bl if not defined
- if ( !defined( $Opts{'opening-sub-brace-on-new-line'} ) ) {
- $Opts{'opening-sub-brace-on-new-line'} =
- $Opts{'opening-brace-on-new-line'};
- }
-
- # set shortcut flag if no blanks to be written
- unless ( $Opts{'maximum-consecutive-blank-lines'} ) {
- $Opts{'swallow-optional-blank-lines'} = 1;
+ if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
+ $rOpts->{'opening-sub-brace-on-new-line'} =
+ $rOpts->{'opening-brace-on-new-line'};
}
- if ( $Opts{'entab-leading-whitespace'} ) {
- if ( $Opts{'entab-leading-whitespace'} < 0 ) {
- warn "-et=n must use a positive integer; ignoring -et\n";
- $Opts{'entab-leading-whitespace'} = undef;
+ if ( $rOpts->{'entab-leading-whitespace'} ) {
+ if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
+ Warn "-et=n must use a positive integer; ignoring -et\n";
+ $rOpts->{'entab-leading-whitespace'} = undef;
}
# entab leading whitespace has priority over the older 'tabs' option
- if ( $Opts{'tabs'} ) { $Opts{'tabs'} = 0; }
+ if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
}
- if ( $Opts{'output-line-ending'} ) {
- unless ( is_unix() ) {
- warn "ignoring -ole; only works under unix\n";
- $Opts{'output-line-ending'} = undef;
+ # set a default tabsize to be used in guessing the starting indentation
+ # level if and only if this run does not use tabs and the old code does
+ # use tabs
+ if ( $rOpts->{'default-tabsize'} ) {
+ if ( $rOpts->{'default-tabsize'} < 0 ) {
+ Warn "negative value of -dt, setting 0\n";
+ $rOpts->{'default-tabsize'} = 0;
}
- }
- if ( $Opts{'preserve-line-endings'} ) {
- unless ( is_unix() ) {
- warn "ignoring -ple; only works under unix\n";
- $Opts{'preserve-line-endings'} = undef;
+ if ( $rOpts->{'default-tabsize'} > 20 ) {
+ Warn "unreasonably large value of -dt, reducing\n";
+ $rOpts->{'default-tabsize'} = 20;
}
}
+ else {
+ $rOpts->{'default-tabsize'} = 8;
+ }
+
+ # Define $tabsize, the number of spaces per tab for use in
+ # guessing the indentation of source lines with leading tabs.
+ # Assume same as for this run if tabs are used , otherwise assume
+ # a default value, typically 8
+ my $tabsize =
+ $rOpts->{'entab-leading-whitespace'}
+ ? $rOpts->{'entab-leading-whitespace'}
+ : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
+ : $rOpts->{'default-tabsize'};
+ return $tabsize;
+}
+
+sub find_file_upwards {
+ my ( $search_dir, $search_file ) = @_;
- return ( \%Opts, $config_file, \@raw_options, $saw_extrude );
+ $search_dir =~ s{/+$}{};
+ $search_file =~ s{^/+}{};
-} # end of process_command_line
+ while (1) {
+ my $try_path = "$search_dir/$search_file";
+ if ( -f $try_path ) {
+ return $try_path;
+ }
+ elsif ( $search_dir eq '/' ) {
+ return undef;
+ }
+ else {
+ $search_dir = dirname($search_dir);
+ }
+ }
+}
sub expand_command_abbreviations {
# make sure we are not in an infinite loop
if ( $pass_count == $max_passes ) {
- print STDERR
-"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
- print STDERR "Here are the raw options\n";
local $" = ')(';
- print STDERR "(@$rraw_options)\n";
+ Warn <<EOM;
+I'm tired. We seem to be in an infinite loop trying to expand aliases.
+Here are the raw options;
+(rraw_options)
+EOM
my $num = @new_argv;
-
if ( $num < 50 ) {
- print STDERR "After $max_passes passes here is ARGV\n";
- print STDERR "(@new_argv)\n";
+ Warn <<EOM;
+After $max_passes passes here is ARGV
+(@new_argv)
+EOM
}
else {
- print STDERR "After $max_passes passes ARGV has $num entries\n";
+ Warn <<EOM;
+After $max_passes passes ARGV has $num entries
+EOM
}
if ($config_file) {
- die <<"DIE";
+ Die <<"DIE";
Please check your configuration file $config_file for circular-references.
To deactivate it, use -npro.
DIE
}
else {
- die <<'DIE';
+ Die <<'DIE';
Program bug - circular-references in the %expansion hash, probably due to
a recent program change.
DIE
# normalise filename, if there are no unescaped dots then append one
$base .= '.' unless $base =~ /(?:^|[^^])\./;
- # if we don't already have an extension then we just append the extention
+ # if we don't already have an extension then we just append the extension
my $separator = ( $base =~ /\.$/ ) ? "" : "_";
return ( $path . $base, $separator );
}
sub Win_OS_Type {
+ # TODO: are these more standard names?
+ # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
+
# Returns a string that determines what MS OS we are on.
- # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net
- # Returns nothing if not an MS system.
- # Contributed by: Yves Orton
+ # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
+ # Returns blank string if not an MS system.
+ # Original code contributed by: Yves Orton
+ # We need to know this to decide where to look for config files
my $rpending_complaint = shift;
- return unless $^O =~ /win32|dos/i; # is it a MS box?
+ my $os = "";
+ return $os unless $^O =~ /win32|dos/i; # is it a MS box?
- # It _should_ have Win32 unless something is really weird
- return unless eval('require Win32');
+ # Systems built from Perl source may not have Win32.pm
+ # But probably have Win32::GetOSVersion() anyway so the
+ # following line is not 'required':
+ # return $os unless eval('require Win32');
# Use the standard API call to determine the version
- my ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion();
+ my ( $undef, $major, $minor, $build, $id );
+ eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
- return "win32s" unless $id; # If id==0 then its a win32s box.
- my $os = { # Magic numbers from MSDN
- # documentation of GetOSVersion
+ #
+ # NAME ID MAJOR MINOR
+ # Windows NT 4 2 4 0
+ # Windows 2000 2 5 0
+ # Windows XP 2 5 1
+ # Windows Server 2003 2 5 2
+
+ return "win32s" unless $id; # If id==0 then its a win32s box.
+ $os = { # Magic numbers from MSDN
+ # documentation of GetOSVersion
1 => {
0 => "95",
10 => "98",
90 => "Me"
},
2 => {
- 0 => "2000",
+ 0 => "2000", # or NT 4, see below
1 => "XP/.Net",
+ 2 => "Win2003",
51 => "NT3.51"
}
}->{$id}->{$minor};
- # This _really_ shouldnt happen. At least not for quite a while
+ # If $os is undefined, the above code is out of date. Suggested updates
+ # are welcome.
unless ( defined $os ) {
+ $os = "";
$$rpending_complaint .= <<EOS;
Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
We won't be able to look for a system-wide config file.
EOS
}
- # Unfortunately the logic used for the various versions isnt so clever..
+ # Unfortunately the logic used for the various versions isn't so clever..
# so we have to handle an outside case.
return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
}
sub is_unix {
- return ( $^O !~ /win32|dos/i )
+ return
+ ( $^O !~ /win32|dos/i )
&& ( $^O ne 'VMS' )
&& ( $^O ne 'OS2' )
&& ( $^O ne 'MacOS' );
sub find_config_file {
# look for a .perltidyrc configuration file
+ # For Windows also look for a file named perltidy.ini
my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
$rpending_complaint ) = @_;
$$rconfig_file_chatter .= " $^O\n";
}
- # sub to check file existance and record all tests
+ # sub to check file existence and record all tests
my $exists_config_file = sub {
my $config_file = shift;
return 0 unless $config_file;
# look in current directory first
$config_file = ".perltidyrc";
return $config_file if $exists_config_file->($config_file);
+ if ($is_Windows) {
+ $config_file = "perltidy.ini";
+ return $config_file if $exists_config_file->($config_file);
+ }
# Default environment vars.
my @envs = qw(PERLTIDY HOME);
# network def
push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
- # Now go through the enviornment ...
+ # Now go through the environment ...
foreach my $var (@envs) {
$$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
if ( defined( $ENV{$var} ) ) {
# test ENV as directory:
$config_file = catfile( $ENV{$var}, ".perltidyrc" );
return $config_file if $exists_config_file->($config_file);
+
+ if ($is_Windows) {
+ $config_file = catfile( $ENV{$var}, "perltidy.ini" );
+ return $config_file if $exists_config_file->($config_file);
+ }
}
else {
$$rconfig_file_chatter .= "\n";
Win_Config_Locs( $rpending_complaint, $Windows_type );
# Check All Users directory, if there is one.
+ # i.e. C:\Documents and Settings\User\perltidy.ini
if ($allusers) {
+
$config_file = catfile( $allusers, ".perltidyrc" );
return $config_file if $exists_config_file->($config_file);
+
+ $config_file = catfile( $allusers, "perltidy.ini" );
+ return $config_file if $exists_config_file->($config_file);
}
# Check system directory.
+ # retain old code in case someone has been able to create
+ # a file with a leading period.
$config_file = catfile( $system, ".perltidyrc" );
return $config_file if $exists_config_file->($config_file);
+
+ $config_file = catfile( $system, "perltidy.ini" );
+ return $config_file if $exists_config_file->($config_file);
}
}
# 9x/Me box. Contributed by: Yves Orton.
my $rpending_complaint = shift;
- my $os = (@_) ? shift: Win_OS_Type();
+ my $os = (@_) ? shift : Win_OS_Type();
return unless $os;
my $system = "";
if ( $os =~ /9[58]|Me/ ) {
$system = "C:/Windows";
}
- elsif ( $os =~ /NT|XP|2000/ ) {
+ elsif ( $os =~ /NT|XP|200?/ ) {
$system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
$allusers =
( $os =~ /NT/ )
}
else {
- # This currently would only happen on a win32s computer.
- # I dont have one to test So I am unsure how to proceed.
- # Sorry. :-)
+ # This currently would only happen on a win32s computer. I don't have
+ # one to test, so I am unsure how to proceed. Suggestions welcome!
$$rpending_complaint .=
"I dont know a sensible place to look for config files on an $os system.\n";
return;
print STDOUT "$$rconfig_file_chatter";
if ($fh) {
print STDOUT "# Dump of file: '$config_file'\n";
- while ( $_ = $fh->getline() ) { print STDOUT }
+ while ( my $line = $fh->getline() ) { print STDOUT $line }
eval { $fh->close() };
}
else {
my ( $fh, $config_file, $rexpansion ) = @_;
my @config_list = ();
+ # file is bad if non-empty $death_message is returned
+ my $death_message = "";
+
my $name = undef;
my $line_no;
- while ( $_ = $fh->getline() ) {
+ my $opening_brace_line;
+ while ( my $line = $fh->getline() ) {
$line_no++;
- chomp;
- next if /^\s*#/; # skip full-line comment
- $_ = strip_comment( $_, $config_file, $line_no );
- s/^\s*(.*?)\s*$/$1/; # trim both ends
- next unless $_;
+ chomp $line;
+ ( $line, $death_message ) =
+ strip_comment( $line, $config_file, $line_no );
+ last if ($death_message);
+ next unless $line;
+ $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
+ next unless $line;
+
+ my $body = $line;
+ my $newname;
- # look for something of the general form
- # newname { body }
- # or just
- # body
+ # Look for complete or partial abbreviation definition of the form
+ # name { body } or name { or name { body
+ # See rules in perltidy's perldoc page
+ # Section: Other Controls - Creating a new abbreviation
+ if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) {
+ my $oldname = $name;
+ ( $name, $body ) = ( $2, $3 );
- if ( $_ =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
- my ( $newname, $body, $curly ) = ( $2, $3, $4 );
+ # Cannot start new abbreviation unless old abbreviation is complete
+ last if ($opening_brace_line);
+
+ $opening_brace_line = $line_no unless ( $body && $body =~ s/\}$// );
# handle a new alias definition
- if ($newname) {
- if ($name) {
- die
-"No '}' seen after $name and before $newname in config file $config_file line $.\n";
- }
- $name = $newname;
+ if ( ${$rexpansion}{$name} ) {
+ local $" = ')(';
+ my @names = sort keys %$rexpansion;
+ $death_message =
+ "Here is a list of all installed aliases\n(@names)\n"
+ . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
+ last;
+ }
+ ${$rexpansion}{$name} = [];
+ }
- if ( ${$rexpansion}{$name} ) {
- local $" = ')(';
- my @names = sort keys %$rexpansion;
- print "Here is a list of all installed aliases\n(@names)\n";
- die
-"Attempting to redefine alias ($name) in config file $config_file line $.\n";
- }
- ${$rexpansion}{$name} = [];
+ # leading opening braces not allowed
+ elsif ( $line =~ /^{/ ) {
+ $opening_brace_line = undef;
+ $death_message =
+ "Unexpected '{' at line $line_no in config file '$config_file'\n";
+ last;
+ }
+
+ # Look for abbreviation closing: body } or }
+ elsif ( $line =~ /^(.*)?\}$/ ) {
+ $body = $1;
+ if ($opening_brace_line) {
+ $opening_brace_line = undef;
+ }
+ else {
+ $death_message =
+"Unexpected '}' at line $line_no in config file '$config_file'\n";
+ last;
}
+ }
- # now do the body
- if ($body) {
+ # Now store any parameters
+ if ($body) {
- my ( $rbody_parts, $msg ) = parse_args($body);
- if ($msg) {
- die <<EOM;
-Error reading file $config_file at line number $line_no.
+ my ( $rbody_parts, $msg ) = parse_args($body);
+ if ($msg) {
+ $death_message = <<EOM;
+Error reading file '$config_file' at line number $line_no.
$msg
Please fix this line or use -npro to avoid reading this file
EOM
- }
-
- if ($name) {
+ last;
+ }
- # remove leading dashes if this is an alias
- foreach (@$rbody_parts) { s/^\-+//; }
- push @{ ${$rexpansion}{$name} }, @$rbody_parts;
- }
+ if ($name) {
- else {
- push( @config_list, @$rbody_parts );
- }
+ # remove leading dashes if this is an alias
+ foreach (@$rbody_parts) { s/^\-+//; }
+ push @{ ${$rexpansion}{$name} }, @$rbody_parts;
}
-
- if ($curly) {
- unless ($name) {
- die
-"Unexpected '}' seen in config file $config_file line $.\n";
- }
- $name = undef;
+ else {
+ push( @config_list, @$rbody_parts );
}
}
}
+
+ if ($opening_brace_line) {
+ $death_message =
+"Didn't see a '}' to match the '{' at line $opening_brace_line in config file '$config_file'\n";
+ }
eval { $fh->close() };
- return ( \@config_list );
+ return ( \@config_list, $death_message );
}
sub strip_comment {
+ # Strip any comment from a command line
my ( $instr, $config_file, $line_no ) = @_;
+ my $msg = "";
+
+ # check for full-line comment
+ if ( $instr =~ /^\s*#/ ) {
+ return ( "", $msg );
+ }
# nothing to do if no comments
if ( $instr !~ /#/ ) {
- return $instr;
+ return ( $instr, $msg );
}
- # use simple method of no quotes
+ # handle case of no quotes
elsif ( $instr !~ /['"]/ ) {
- $instr =~ s/\s*\#.*$//; # simple trim
- return $instr;
+
+ # We now require a space before the # of a side comment
+ # this allows something like:
+ # -sbcp=#
+ # Otherwise, it would have to be quoted:
+ # -sbcp='#'
+ $instr =~ s/\s+\#.*$//;
+ return ( $instr, $msg );
}
# handle comments and quotes
# error..we reached the end without seeing the ending quote char
else {
- die <<EOM;
+ $msg = <<EOM;
Error reading file $config_file at line number $line_no.
Did not see ending quote character <$quote_char> in this text:
$instr
$outstr .= $1;
$quote_char = $1;
}
+
+ # Note: not yet enforcing the space-before-hash rule for side
+ # comments if the parameter is quoted.
elsif ( $instr =~ /\G#/gc ) {
last;
}
}
}
}
- return $outstr;
+ return ( $outstr, $msg );
}
sub parse_args {
# error..we reached the end without seeing the ending quote char
else {
- if ($part) { push @body_parts, $part; }
+ if ( length($part) ) { push @body_parts, $part; }
$msg = <<EOM;
Did not see ending quote character <$quote_char> in this text:
$body
$quote_char = $1;
}
elsif ( $body =~ /\G(\s+)/gc ) {
- if ($part) { push @body_parts, $part; }
+ if ( length($part) ) { push @body_parts, $part; }
$part = "";
}
elsif ( $body =~ /\G(.)/gc ) {
$part .= $1;
}
else {
- if ($part) { push @body_parts, $part; }
+ if ( length($part) ) { push @body_parts, $part; }
last;
}
}
foreach (@_) { print STDOUT "$_\n" }
}
-sub dump_options {
- my ($rOpts) = @_;
- local $" = "\n";
- print STDOUT "Final parameter set for this run\n";
- foreach ( sort keys %{$rOpts} ) {
- print STDOUT "$_=$rOpts->{$_}\n";
+sub readable_options {
+
+ # return options for this run as a string which could be
+ # put in a perltidyrc file
+ my ( $rOpts, $roption_string ) = @_;
+ my %Getopt_flags;
+ my $rGetopt_flags = \%Getopt_flags;
+ my $readable_options = "# Final parameter set for this run.\n";
+ $readable_options .=
+ "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
+ foreach my $opt ( @{$roption_string} ) {
+ my $flag = "";
+ if ( $opt =~ /(.*)(!|=.*)$/ ) {
+ $opt = $1;
+ $flag = $2;
+ }
+ if ( defined( $rOpts->{$opt} ) ) {
+ $rGetopt_flags->{$opt} = $flag;
+ }
+ }
+ foreach my $key ( sort keys %{$rOpts} ) {
+ my $flag = $rGetopt_flags->{$key};
+ my $value = $rOpts->{$key};
+ my $prefix = '--';
+ my $suffix = "";
+ if ($flag) {
+ if ( $flag =~ /^=/ ) {
+ if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
+ $suffix = "=" . $value;
+ }
+ elsif ( $flag =~ /^!/ ) {
+ $prefix .= "no" unless ($value);
+ }
+ else {
+
+ # shouldn't happen
+ $readable_options .=
+ "# ERROR in dump_options: unrecognized flag $flag for $key\n";
+ }
+ }
+ $readable_options .= $prefix . $key . $suffix . "\n";
}
+ return $readable_options;
}
sub show_version {
- print <<"EOM";
+ print STDOUT <<"EOM";
This is perltidy, v$VERSION
-Copyright 2000-2003, Steve Hancock
+Copyright 2000-2017, Steve Hancock
Perltidy is free software and may be copied under the terms of the GNU
General Public License, which is included in the distribution files.
-npro ignore .perltidyrc configuration command file
-pro=file read configuration commands from file instead of .perltidyrc
-st send output to standard output, STDOUT
- -se send error output to standard error output, STDERR
+ -se send all error output to standard error output, STDERR
-v display version number to standard output and quit
Basic Options:
-bbs add blank line before subs and packages
-bbc add blank line before block comments
-bbb add blank line between major blocks
- -sob swallow optional blank lines
+ -kbl=n keep old blank lines? 0=no, 1=some, 2=all
+ -mbl=n maximum consecutive blank lines to output (default=1)
-ce cuddled else; use this style: '} else {'
-dnl delete old newlines (default)
- -mbl=n maximum consecutive blank lines (default=1)
-l=n maximum line length; default n=80
-bl opening brace on new line
-sbl opening sub brace on new line. value of -bl is used if not given.
-wbb=s want break before tokens in string
Following Old Breakpoints
+ -kis keep interior semicolons. Allows multiple statements per line.
-boc break at old comma breaks: turns off all automatic list formatting
-bol break at old logical breakpoints: or, and, ||, && (default)
-bok break at old list keyword breakpoints such as map, sort (default)
- -bot break at old conditional (trinary ?:) operator breakpoints (default)
+ -bot break at old conditional (ternary ?:) operator breakpoints (default)
+ -boa break at old attribute breakpoints
-cab=n break at commas after a comma-arrow (=>):
n=0 break at all commas after =>
n=1 stable: break unless this breaks an existing one-line container
-ibc indent block comments (default)
-isbc indent spaced block comments; may indent unless no leading space
-msc=n minimum desired spaces to side comment, default 4
+ -fpsc=n fix position for side comments; default 0;
-csc add or update closing side comments after closing BLOCK brace
-dcsc delete closing side comments created by a -csc command
-cscp=s change closing side comment prefix to be other than '## end'
# Use 'perl -c' to make sure that we did not create bad syntax
# This is a very good independent check for programming errors
#
- # Given names of the input and output files, ($ifname, $ofname),
+ # Given names of the input and output files, ($istream, $ostream),
# we do the following:
# - check syntax of the input file
# - if bad, all done (could be an incomplete code snippet)
# - if outfile syntax bad, issue warning; this implies a code bug!
# - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
- my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
+ my ( $istream, $ostream, $logger_object, $rOpts ) = @_;
my $infile_syntax_ok = 0;
my $line_of_dashes = '-' x 42 . "\n";
if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
}
- # this shouldn't happen unless a termporary file couldn't be made
- if ( $ifname eq '-' ) {
+ # this shouldn't happen unless a temporary file couldn't be made
+ if ( $istream eq '-' ) {
$logger_object->write_logfile_entry(
"Cannot run perl -c on STDIN and STDOUT\n");
return $infile_syntax_ok;
$logger_object->write_logfile_entry(
"checking input file syntax with perl $flags\n");
- $logger_object->write_logfile_entry($line_of_dashes);
# Not all operating systems/shells support redirection of the standard
# error output.
my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
- my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
+ my ( $istream_filename, $perl_output ) =
+ do_syntax_check( $istream, $flags, $error_redirection );
+ $logger_object->write_logfile_entry(
+ "Input stream passed to Perl as file $istream_filename\n");
+ $logger_object->write_logfile_entry($line_of_dashes);
$logger_object->write_logfile_entry("$perl_output\n");
if ( $perl_output =~ /syntax\s*OK/ ) {
$logger_object->write_logfile_entry($line_of_dashes);
$logger_object->write_logfile_entry(
"checking output file syntax with perl $flags ...\n");
+ my ( $ostream_filename, $perl_output ) =
+ do_syntax_check( $ostream, $flags, $error_redirection );
+ $logger_object->write_logfile_entry(
+ "Output stream passed to Perl as file $ostream_filename\n");
$logger_object->write_logfile_entry($line_of_dashes);
-
- my $perl_output =
- do_syntax_check( $ofname, $flags, $error_redirection );
$logger_object->write_logfile_entry("$perl_output\n");
unless ( $perl_output =~ /syntax\s*OK/ ) {
$logger_object->write_logfile_entry($line_of_dashes);
$logger_object->warning(
-"The output file has a syntax error when tested with perl $flags $ofname !\n"
+"The output file has a syntax error when tested with perl $flags $ostream !\n"
);
$logger_object->warning(
- "This implies an error in perltidy; the file $ofname is bad\n");
+ "This implies an error in perltidy; the file $ostream is bad\n"
+ );
$logger_object->report_definite_bug();
# the perl version number will be helpful for diagnosing the problem
# Only warn of perl -c syntax errors. Other messages,
# such as missing modules, are too common. They can be
# seen by running with perltidy -w
- $logger_object->complain("A syntax check using perl $flags gives: \n");
+ $logger_object->complain("A syntax check using perl $flags\n");
+ $logger_object->complain(
+ "for the output in file $istream_filename gives:\n");
$logger_object->complain($line_of_dashes);
$logger_object->complain("$perl_output\n");
$logger_object->complain($line_of_dashes);
}
sub do_syntax_check {
- my ( $fname, $flags, $error_redirection ) = @_;
+ my ( $stream, $flags, $error_redirection ) = @_;
+
+ # We need a named input file for executing perl
+ my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
+
+ # TODO: Need to add name of file to log somewhere
+ # otherwise Perl output is hard to read
+ if ( !$stream_filename ) { return $stream_filename, "" }
# We have to quote the filename in case it has unusual characters
# or spaces. Example: this filename #CM11.pm# gives trouble.
- $fname = '"' . $fname . '"';
+ my $quoted_stream_filename = '"' . $stream_filename . '"';
# Under VMS something like -T will become -t (and an error) so we
# will put quotes around the flags. Double quotes seem to work on
$flags = '"' . $flags . '"';
# now wish for luck...
- return qx/perl $flags $fname $error_redirection/;
+ my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
+
+ if ($is_tmpfile) {
+ unlink $stream_filename
+ or Perl::Tidy::Die("couldn't unlink stream $stream_filename: $!\n");
+ }
+ return $stream_filename, $msg;
}
#####################################################################
# Convert a scalar to an array.
# This avoids looking for "\n" on each call to getline
- my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
+ #
+ # NOTES: The -1 count is needed to avoid loss of trailing blank lines
+ # (which might be important in a DATA section).
+ my @array;
+ if ( $rscalar && ${$rscalar} ) {
+ @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
+
+ # remove possible extra blank line introduced with split
+ if ( @array && $array[-1] eq "\n" ) { pop @array }
+ }
my $i_next = 0;
return bless [ \@array, $mode, $i_next ], $package;
}
EOM
}
my $i = $self->[2]++;
- ##my $line = $self->[0]->[$i];
return $self->[0]->[$i];
}
# a getline method which reads lines (mode='r'), or
# a print method which reads lines (mode='w')
#
-# NOTE: this routine assumes that that there aren't any embedded
+# NOTE: this routine assumes that there aren't any embedded
# newlines within any of the array elements. There are no checks
# for that.
#
EOM
}
my $i = $self->[2]++;
- ##my $line = $self->[0]->[$i];
return $self->[0]->[$i];
}
sub new {
my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
- my $input_file_copy = undef;
- my $fh_copy;
my $input_line_ending;
if ( $rOpts->{'preserve-line-endings'} ) {
# The reason is that temporary files cause problems on
# on many systems.
$rOpts->{'check-syntax'} = 0;
- $input_file_copy = '-';
$$rpending_logfile_message .= <<EOM;
Note: --syntax check will be skipped because standard input is used
return bless {
_fh => $fh,
- _fh_copy => $fh_copy,
_filename => $input_file,
- _input_file_copy => $input_file_copy,
_input_line_ending => $input_line_ending,
_rinput_buffer => [],
_started => 0,
}, $class;
}
-sub get_input_file_copy_name {
- my $self = shift;
- my $ifname = $self->{_input_file_copy};
- unless ($ifname) {
- $ifname = $self->{_filename};
- }
- return $ifname;
-}
-
sub close_input_file {
my $self = shift;
- eval { $self->{_fh}->close() };
- eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
+
+ # Only close physical files, not STDIN and other objects
+ my $filename = $self->{_filename};
+ if ( $filename ne '-' && !ref $filename ) {
+ eval { $self->{_fh}->close() };
+ }
}
sub get_line {
my $self = shift;
my $line = undef;
my $fh = $self->{_fh};
- my $fh_copy = $self->{_fh_copy};
my $rinput_buffer = $self->{_rinput_buffer};
if ( scalar(@$rinput_buffer) ) {
$self->{_started}++;
}
}
- if ( $line && $fh_copy ) { $fh_copy->print($line); }
- return $line;
-}
-
-sub old_get_line {
- my $self = shift;
- my $line = undef;
- my $fh = $self->{_fh};
- my $fh_copy = $self->{_fh_copy};
- $line = $fh->getline();
- if ( $line && $fh_copy ) { $fh_copy->print($line); }
return $line;
}
sub new {
my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
- $rpending_logfile_message )
+ $rpending_logfile_message, $binmode )
= @_;
- my $fh = undef;
- my $fh_copy = undef;
- my $fh_tee = undef;
- my $output_file_copy = "";
+ my $fh = undef;
+ my $fh_tee = undef;
+
my $output_file_open = 0;
if ( $rOpts->{'format'} eq 'tidy' ) {
( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
- unless ($fh) { die "Cannot write to output stream\n"; }
+ unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; }
$output_file_open = 1;
+ if ($binmode) {
+ if ( $rOpts->{'character-encoding'}
+ && $rOpts->{'character-encoding'} eq 'utf8' )
+ {
+ if ( ref($fh) eq 'IO::File' ) {
+ $fh->binmode(":encoding(UTF-8)");
+ }
+ elsif ( $output_file eq '-' ) {
+ binmode STDOUT, ":encoding(UTF-8)";
+ }
+ }
+ elsif ( $output_file eq '-' ) { binmode STDOUT }
+ }
}
# in order to check output syntax when standard output is used,
# The reason is that temporary files cause problems on
# on many systems.
$rOpts->{'check-syntax'} = 0;
- $output_file_copy = '-';
$$rpending_logfile_message .= <<EOM;
Note: --syntax check will be skipped because standard output is used
EOM
bless {
_fh => $fh,
- _fh_copy => $fh_copy,
_fh_tee => $fh_tee,
_output_file => $output_file,
_output_file_open => $output_file_open,
- _output_file_copy => $output_file_copy,
_tee_flag => 0,
_tee_file => $tee_file,
_tee_file_opened => 0,
_line_separator => $line_separator,
+ _binmode => $binmode,
}, $class;
}
sub write_line {
- my $self = shift;
- my $fh = $self->{_fh};
- my $fh_copy = $self->{_fh_copy};
+ my $self = shift;
+ my $fh = $self->{_fh};
my $output_file_open = $self->{_output_file_open};
chomp $_[0];
$_[0] .= $self->{_line_separator};
$fh->print( $_[0] ) if ( $self->{_output_file_open} );
- print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
if ( $self->{_tee_flag} ) {
unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
}
}
-sub get_output_file_copy {
- my $self = shift;
- my $ofname = $self->{_output_file_copy};
- unless ($ofname) {
- $ofname = $self->{_output_file};
- }
- return $ofname;
-}
-
sub tee_on {
my $self = shift;
$self->{_tee_flag} = 1;
my $tee_file = $self->{_tee_file};
my $fh_tee;
$fh_tee = IO::File->new(">$tee_file")
- or die("couldn't open TEE file $tee_file: $!\n");
+ or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n");
+ binmode $fh_tee if $self->{_binmode};
$self->{_tee_file_opened} = 1;
$self->{_fh_tee} = $fh_tee;
}
sub close_output_file {
my $self = shift;
- eval { $self->{_fh}->close() } if $self->{_output_file_open};
- eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
+
+ # Only close physical files, not STDOUT and other objects
+ my $output_file = $self->{_output_file};
+ if ( $output_file ne '-' && !ref $output_file ) {
+ eval { $self->{_fh}->close() } if $self->{_output_file_open};
+ }
$self->close_tee_file();
}
sub close_tee_file {
my $self = shift;
+ # Only close physical files, not STDOUT and other objects
if ( $self->{_tee_file_opened} ) {
- eval { $self->{_fh_tee}->close() };
- $self->{_tee_file_opened} = 0;
+ my $tee_file = $self->{_tee_file};
+ if ( $tee_file ne '-' && !ref $tee_file ) {
+ eval { $self->{_fh_tee}->close() };
+ $self->{_tee_file_opened} = 0;
+ }
}
}
sub new {
my $class = shift;
my $fh;
- my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
+ my ( $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude, ) = @_;
- # remove any old error output file
- unless ( ref($warning_file) ) {
- if ( -e $warning_file ) { unlink($warning_file) }
+ my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef;
+
+ # remove any old error output file if we might write a new one
+ unless ( $fh_warnings || ref($warning_file) ) {
+ if ( -e $warning_file ) {
+ unlink($warning_file)
+ or Perl::Tidy::Die(
+ "couldn't unlink warning file $warning_file: $!\n");
+ }
}
+ my $logfile_gap =
+ defined( $rOpts->{'logfile-gap'} )
+ ? $rOpts->{'logfile-gap'}
+ : 50;
+ if ( $logfile_gap == 0 ) { $logfile_gap = 1 }
+
bless {
_log_file => $log_file,
- _fh_warnings => undef,
+ _logfile_gap => $logfile_gap,
_rOpts => $rOpts,
- _fh_warnings => undef,
+ _fh_warnings => $fh_warnings,
_last_input_line_written => 0,
_at_end_of_file => 0,
_use_prefix => 1,
}, $class;
}
-sub close_log_file {
-
- my $self = shift;
- if ( $self->{_fh_warnings} ) {
- eval { $self->{_fh_warnings}->close() };
- $self->{_fh_warnings} = undef;
- }
-}
-
sub get_warning_count {
my $self = shift;
return $self->{_warning_count};
if (
(
( $input_line_number - $last_input_line_written ) >=
- $rOpts->{'logfile-gap'}
+ $self->{_logfile_gap}
)
|| ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
)
sub write_logfile_entry {
my $self = shift;
- # add leading >>> to avoid confusing error mesages and code
+ # add leading >>> to avoid confusing error messages and code
$self->logfile_output( ">>>", "@_" );
}
my $line_information_string = "";
if ($input_line_number) {
- my $output_line_number = $self->{_output_line_number};
- my $brace_depth = $line_of_tokens->{_curly_brace_depth};
- my $paren_depth = $line_of_tokens->{_paren_depth};
- my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
- my $python_indentation_level =
- $line_of_tokens->{_python_indentation_level};
+ my $output_line_number = $self->{_output_line_number};
+ my $brace_depth = $line_of_tokens->{_curly_brace_depth};
+ my $paren_depth = $line_of_tokens->{_paren_depth};
+ my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
+ my $guessed_indentation_level =
+ $line_of_tokens->{_guessed_indentation_level};
my $rlevels = $line_of_tokens->{_rlevels};
my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
my $rci_levels = $line_of_tokens->{_rci_levels};
# for longer scripts it doesn't really matter
my $extra_space = "";
$extra_space .=
- ( $input_line_number < 10 ) ? " "
+ ( $input_line_number < 10 ) ? " "
: ( $input_line_number < 100 ) ? " "
- : "";
+ : "";
$extra_space .=
- ( $output_line_number < 10 ) ? " "
+ ( $output_line_number < 10 ) ? " "
: ( $output_line_number < 100 ) ? " "
- : "";
+ : "";
# there are 2 possible nesting strings:
# the original which looks like this: (0 [1 {2
$nesting_string =
$nesting_string_new . " " x ( 8 - length($nesting_string_new) );
}
- if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
$line_information_string =
-"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
+"L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
}
return $line_information_string;
}
unless ( $rOpts->{'quiet'} ) {
my $warning_count = $self->{_warning_count};
- unless ($warning_count) {
+ my $fh_warnings = $self->{_fh_warnings};
+ if ( !$fh_warnings ) {
my $warning_file = $self->{_warning_file};
- my $fh_warnings;
- if ( $rOpts->{'standard-error-output'} ) {
- $fh_warnings = *STDERR;
- }
- else {
- ( $fh_warnings, my $filename ) =
- Perl::Tidy::streamhandle( $warning_file, 'w' );
- $fh_warnings or die("couldn't open $filename $!\n");
- warn "## Please see file $filename\n";
- }
+ ( $fh_warnings, my $filename ) =
+ Perl::Tidy::streamhandle( $warning_file, 'w' );
+ $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n");
+ Perl::Tidy::Warn "## Please see file $filename\n"
+ unless ref($warning_file);
$self->{_fh_warnings} = $fh_warnings;
+ $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n");
}
- my $fh_warnings = $self->{_fh_warnings};
if ( $warning_count < WARNING_LIMIT ) {
if ( $self->get_use_prefix() > 0 ) {
my $input_line_number =
Perl::Tidy::Tokenizer::get_input_line_number();
- print $fh_warnings "$input_line_number:\t@_";
+ if ( !defined($input_line_number) ) { $input_line_number = -1 }
+ $fh_warnings->print("$input_line_number:\t@_");
$self->write_logfile_entry("WARNING: @_");
}
else {
- print $fh_warnings @_;
+ $fh_warnings->print(@_);
$self->write_logfile_entry(@_);
}
}
$self->{_warning_count} = $warning_count;
if ( $warning_count == WARNING_LIMIT ) {
- print $fh_warnings "No further warnings will be given";
+ $fh_warnings->print("No further warnings will be given\n");
}
}
}
elsif ( $saw_code_bug == 1 ) {
if ( $self->{_saw_extrude} ) {
$self->warning(<<EOM);
-You may have encountered a bug in perltidy. However, since you are
-using the -extrude option, the problem may be with perl itself, which
-has occasional parsing problems with this type of file. If you believe
-that the problem is with perltidy, and the problem is not listed in the
-BUGS file at http://perltidy.sourceforge.net, please report it so that
-it can be corrected. Include the smallest possible script which has the
-problem, along with the .LOG file. See the manual pages for contact
-information.
+
+You may have encountered a bug in perltidy. However, since you are using the
+-extrude option, the problem may be with perl or one of its modules, which have
+occasional problems with this type of file. If you believe that the
+problem is with perltidy, and the problem is not listed in the BUGS file at
+http://perltidy.sourceforge.net, please report it so that it can be corrected.
+Include the smallest possible script which has the problem, along with the .LOG
+file. See the manual pages for contact information.
Thank you!
EOM
}
my $warning_count = $self->{_warning_count};
my $saw_code_bug = $self->{_saw_code_bug};
- my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
+ my $save_logfile =
+ ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
|| $saw_code_bug == 1
|| $rOpts->{'logfile'};
my $log_file = $self->{_log_file};
}
if ( $self->{_saw_brace_error}
- && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
+ && ( $self->{_logfile_gap} > 1 || !$save_logfile ) )
{
$self->warning("To save a full .LOG file rerun with -g\n");
}
if ($fh) {
my $routput_array = $self->{_output_array};
foreach ( @{$routput_array} ) { $fh->print($_) }
- eval { $fh->close() };
+ if ( $log_file ne '-' && !ref $log_file ) {
+ eval { $fh->close() };
+ }
}
}
}
( $html_fh, my $html_filename ) =
Perl::Tidy::streamhandle( $html_file, 'w' );
unless ($html_fh) {
- warn("can't open $html_file: $!\n");
+ Perl::Tidy::Warn("can't open $html_file: $!\n");
return undef;
}
$html_file_opened = 1;
else {
eval "use Pod::Html";
if ($@) {
- warn
+ Perl::Tidy::Warn
"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
undef $rOpts->{'pod2html'};
}
my $src_filename;
if ( $rOpts->{'frames'} ) {
unless ($extension) {
- warn
+ Perl::Tidy::Warn
"cannot use frames without a specified output extension; ignoring -frm\n";
undef $rOpts->{'frames'};
}
);
# These token types will all be called identifiers for now
- # FIXME: need to separate user defined modules as separate type
- my @identifier = qw" i t U C Y Z G :: ";
+ # FIXME: could separate user defined modules as separate type
+ my @identifier = qw" i t U C Y Z G :: CORE::";
@token_short_names{@identifier} = ('i') x scalar(@identifier);
# These token types will be called 'structure'
# my @list = qw" == != < > <= <=> ";
# @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
#
- # my @list = qw" && || ! &&= ||= ";
+ # my @list = qw" && || ! &&= ||= //= ";
# @token_long_names{@list} = ('logical') x scalar(@list);
#
# my @list = qw" . .= =~ !~ x x= ";
# write style sheet to STDOUT and die if requested
if ( defined( $rOpts->{'stylesheet'} ) ) {
write_style_sheet_file('-');
- exit 1;
+ Perl::Tidy::Exit 0;
}
# make sure user gives a file name after -css
if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
$css_linkname = $rOpts->{'html-linked-style-sheet'};
if ( $css_linkname =~ /^-/ ) {
- die "You must specify a valid filename after -css\n";
+ Perl::Tidy::Die "You must specify a valid filename after -css\n";
}
}
my $css_filename = shift;
my $fh;
unless ( $fh = IO::File->new("> $css_filename") ) {
- die "can't open $css_filename: $!\n";
+ Perl::Tidy::Die "can't open $css_filename: $!\n";
}
write_style_sheet_data($fh);
eval { $fh->close };
}
# Pod::Html requires a real temporary filename
- # If we are making a frame, we have a name available
- # Otherwise, we have to fine one
- my $tmpfile;
- if ( $rOpts->{'frames'} ) {
- $tmpfile = $self->{_toc_filename};
- }
- else {
- $tmpfile = Perl::Tidy::make_temporary_filename();
- }
- my $fh_tmp = IO::File->new( $tmpfile, 'w' );
+ my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile();
unless ($fh_tmp) {
- warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
+ Perl::Tidy::Warn
+ "unable to open temporary file $tmpfile; cannot use pod2html\n";
return $success_flag;
}
# Must clean up if pod2html dies (it can);
# Be careful not to overwrite callers __DIE__ routine
local $SIG{__DIE__} = sub {
- print $_[0];
unlink $tmpfile if -e $tmpfile;
- exit 1;
+ Perl::Tidy::Die $_[0];
};
pod2html(@args);
unless ($fh_tmp) {
# this error shouldn't happen ... we just used this filename
- warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
+ Perl::Tidy::Warn
+ "unable to open temporary file $tmpfile; cannot use pod2html\n";
goto RETURN;
}
my $html_fh = $self->{_html_fh};
my @toc;
my $in_toc;
+ my $ul_level = 0;
my $no_print;
# This routine will write the html selectively and store the toc
$title = escape_html($title);
$html_print->("<h1>$title</h1>\n");
}
+
+ # check for start of index, old pod2html
+ # before Pod::Html VERSION 1.15_02 it is delimited by comments as:
+ # <!-- INDEX BEGIN -->
+ # <ul>
+ # ...
+ # </ul>
+ # <!-- INDEX END -->
+ #
elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
- $in_toc = 1;
+ $in_toc = 'INDEX';
+
+ # when frames are used, an extra table of contents in the
+ # contents panel is confusing, so don't print it
+ $no_print = $rOpts->{'frames'}
+ || !$rOpts->{'html-table-of-contents'};
+ $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
+ $html_print->($line);
+ }
+
+ # check for start of index, new pod2html
+ # After Pod::Html VERSION 1.15_02 it is delimited as:
+ # <ul id="index">
+ # ...
+ # </ul>
+ elsif ( $line =~ /^\s*<ul\s+id="index">/i ) {
+ $in_toc = 'UL';
+ $ul_level = 1;
# when frames are used, an extra table of contents in the
# contents panel is confusing, so don't print it
$html_print->($line);
}
- # Copy the perltidy toc, if any, after the Pod::Html toc
+ # Check for end of index, old pod2html
elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
$saw_index = 1;
$html_print->($line);
+
+ # Copy the perltidy toc, if any, after the Pod::Html toc
if ($toc_string) {
$html_print->("<hr />\n") if $rOpts->{'frames'};
$html_print->("<h2>Code Index:</h2>\n");
my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
$html_print->(@toc);
}
- $in_toc = 0;
+ $in_toc = "";
$no_print = 0;
}
+ # must track <ul> depth level for new pod2html
+ elsif ( $line =~ /\s*<ul>\s*$/i && $in_toc eq 'UL' ) {
+ $ul_level++;
+ $html_print->($line);
+ }
+
+ # Check for end of index, for new pod2html
+ elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) {
+ $ul_level--;
+ $html_print->($line);
+
+ # Copy the perltidy toc, if any, after the Pod::Html toc
+ if ( $ul_level <= 0 ) {
+ $saw_index = 1;
+ if ($toc_string) {
+ $html_print->("<hr />\n") if $rOpts->{'frames'};
+ $html_print->("<h2>Code Index:</h2>\n");
+ my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
+ $html_print->(@toc);
+ }
+ $in_toc = "";
+ $ul_level = 0;
+ $no_print = 0;
+ }
+ }
+
# Copy one perltidy section after each marker
elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
$line = $2;
# shouldn't happen: we stored a string before writing
# each marker.
- warn
+ Perl::Tidy::Warn
"Problem merging html stream with pod2html; order may be wrong\n";
}
$html_print->($line);
$success_flag = 1;
unless ($saw_body) {
- warn "Did not see <body> in pod2html output\n";
+ Perl::Tidy::Warn "Did not see <body> in pod2html output\n";
$success_flag = 0;
}
unless ($saw_body_end) {
- warn "Did not see </body> in pod2html output\n";
+ Perl::Tidy::Warn "Did not see </body> in pod2html output\n";
$success_flag = 0;
}
unless ($saw_index) {
- warn "Did not find INDEX END in pod2html output\n";
+ Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n";
$success_flag = 0;
}
# note that we have to unlink tmpfile before making frames
# because the tmpfile may be one of the names used for frames
- unlink $tmpfile if -e $tmpfile;
+ if ( -e $tmpfile ) {
+ unless ( unlink($tmpfile) ) {
+ Perl::Tidy::Warn("couldn't unlink temporary file $tmpfile: $!\n");
+ $success_flag = 0;
+ }
+ }
+
if ( $success_flag && $rOpts->{'frames'} ) {
$self->make_frame( \@toc );
}
# 1. Make the table of contents panel, with appropriate changes
# to the anchor names
my $src_frame_name = 'SRC';
- my $first_anchor =
+ my $first_anchor =
write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
$src_frame_name );
# 2. The current .html filename is renamed to be the contents panel
rename( $html_filename, $src_filename )
- or die "Cannot rename $html_filename to $src_filename:$!\n";
+ or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n";
# 3. Then use the original html filename for the frame
write_frame_html(
# write a separate html table of contents file for frames
my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
my $fh = IO::File->new( $toc_filename, 'w' )
- or die "Cannot open $toc_filename:$!\n";
+ or Perl::Tidy::Die "Cannot open $toc_filename:$!\n";
$fh->print(<<EOM);
<html>
<head>
my (
$title, $frame_filename, $top_basename,
$toc_basename, $src_basename, $src_frame_name
- )
- = @_;
+ ) = @_;
my $fh = IO::File->new( $frame_filename, 'w' )
- or die "Cannot open $toc_basename:$!\n";
+ or Perl::Tidy::Die "Cannot open $toc_basename:$!\n";
$fh->print(<<EOM);
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
my $self = shift;
my ( $token, $type ) = @_;
- return $token if ( $type eq 'b' ); # skip a blank token
+ return $token if ( $type eq 'b' ); # skip a blank token
return $token if ( $token =~ /^\s*$/ ); # skip a blank line
$token = escape_html($token);
elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
- elsif ( $line_type eq 'END_START' ) {
+ elsif ( $line_type eq 'END_START' ) {
$line_character = 'k';
$self->add_toc_item( '__END__', '__END__' );
}
# add the line number if requested
if ( $rOpts->{'html-line-numbers'} ) {
my $extra_space .=
- ( $line_number < 10 ) ? " "
+ ( $line_number < 10 ) ? " "
: ( $line_number < 100 ) ? " "
: ( $line_number < 1000 ) ? " "
- : "";
+ : "";
$html_line = $extra_space . $line_number . " " . $html_line;
}
# Caution: these debug flags produce a lot of output
# They should all be 0 except when debugging small scripts
- use constant FORMATTER_DEBUG_FLAG_BOND => 0;
- use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
- use constant FORMATTER_DEBUG_FLAG_CI => 0;
- use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
- use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
- use constant FORMATTER_DEBUG_FLAG_LIST => 0;
- use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
- use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
- use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
- use constant FORMATTER_DEBUG_FLAG_STORE => 0;
- use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
- use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
+ use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
+ use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
+ use constant FORMATTER_DEBUG_FLAG_BOND => 0;
+ use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
+ use constant FORMATTER_DEBUG_FLAG_CI => 0;
+ use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
+ use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
+ use constant FORMATTER_DEBUG_FLAG_LIST => 0;
+ use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
+ use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
+ use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
+ use constant FORMATTER_DEBUG_FLAG_STORE => 0;
+ use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
+ use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
my $debug_warning = sub {
- print "FORMATTER_DEBUGGING with key $_[0]\n";
+ print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
};
- FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
- FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
- FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
- FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
- FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
- FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
- FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
- FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
- FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
- FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
- FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
- FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
+ FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
+ FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
+ FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
+ FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
+ FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
+ FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
+ FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
+ FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
+ FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
+ FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
+ FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
+ FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
+ FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
+ FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
}
use Carp;
$last_indentation_written
$last_unadjusted_indentation
$last_leading_token
+ $last_output_short_opening_token
$saw_VERSION_in_this_file
$saw_END_or_DATA_
@container_environment_to_go
@bond_strength_to_go
@forced_breakpoint_to_go
- @lengths_to_go
+ @token_lengths_to_go
+ @summed_lengths_to_go
@levels_to_go
@leading_spaces_to_go
@reduced_spaces_to_go
@old_breakpoint_to_go
@tokens_to_go
@types_to_go
+ @inext_to_go
+ @iprev_to_go
%saved_opening_indentation
$last_last_nonblank_token_to_go
@nonblank_lines_at_depth
$starting_in_quote
+ $ending_in_quote
+ @whitespace_level_stack
+ $whitespace_last_level
+
+ $in_format_skipping_section
+ $format_skipping_pattern_begin
+ $format_skipping_pattern_end
$forced_breakpoint_count
$forced_breakpoint_undo_count
$added_semicolon_count
$first_added_semicolon_at
$last_added_semicolon_at
- $saw_negative_indentation
$first_tabbing_disagreement
$last_tabbing_disagreement
$in_tabbing_disagreement
%block_leading_text
%block_opening_line_number
$csc_new_statement_ok
+ $csc_last_label
+ %csc_block_label
$accumulating_text_for_block
$leading_block_text
$rleading_block_if_elsif_text
$closing_side_comment_prefix_pattern
$closing_side_comment_list_pattern
+ $blank_lines_after_opening_block_pattern
+ $blank_lines_before_closing_block_pattern
+
$last_nonblank_token
$last_nonblank_type
$last_last_nonblank_token
%is_assignment
%is_chain_operator
%is_if_unless_and_or_last_next_redo_return
+ %ok_to_add_semicolon_for_block_type
@has_broken_sublist
@dont_align
@want_comma_break
+ $is_static_block_comment
$index_start_one_line_block
$semicolons_before_block_self_destruct
$index_max_forced_break
%opening_vertical_tightness
%closing_vertical_tightness
%closing_token_indentation
+ $some_closing_token_indentation
+
+ %opening_token_right
+ %stack_opening_token
+ %stack_closing_token
+
$block_brace_vertical_tightness_pattern
$rOpts_add_newlines
$rOpts_break_at_old_keyword_breakpoints
$rOpts_break_at_old_comma_breakpoints
$rOpts_break_at_old_logical_breakpoints
- $rOpts_break_at_old_trinary_breakpoints
+ $rOpts_break_at_old_ternary_breakpoints
+ $rOpts_break_at_old_attribute_breakpoints
$rOpts_closing_side_comment_else_flag
$rOpts_closing_side_comment_maximum_text
$rOpts_continuation_indentation
$rOpts_line_up_parentheses
$rOpts_maximum_fields_per_table
$rOpts_maximum_line_length
+ $rOpts_variable_maximum_line_length
$rOpts_short_concatenation_item_length
- $rOpts_swallow_optional_blank_lines
- $rOpts_ignore_old_line_breaks
-
- $half_maximum_line_length
+ $rOpts_keep_old_blank_lines
+ $rOpts_ignore_old_breakpoints
+ $rOpts_format_skipping
+ $rOpts_space_function_paren
+ $rOpts_space_keyword_paren
+ $rOpts_keep_interior_semicolons
+ $rOpts_ignore_side_comment_lengths
+ $rOpts_stack_closing_block_brace
+ $rOpts_whitespace_cycle
+ $rOpts_tight_secret_operators
%is_opening_type
%is_closing_type
%is_opening_type
%is_closing_token
%is_opening_token
+
+ $SUB_PATTERN
+ $ASUB_PATTERN
};
BEGIN {
$bli_list_string = 'if else elsif unless while for foreach do : sub';
@_ = qw(
- .. :: << >> ** && .. || -> => += -= .= %= &= |= ^= *= <>
+ .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
<= >= == =~ !~ != ++ -- /= x=
);
@is_digraph{@_} = (1) x scalar(@_);
- @_ = qw( ... **= <<= >>= &&= ||= <=> );
+ @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
@is_trigraph{@_} = (1) x scalar(@_);
@_ = qw(
= **= += *= &= <<= &&=
- -= /= |= >>= ||=
+ -= /= |= >>= ||= //=
.= %= ^=
x=
);
);
@is_keyword_returning_list{@_} = (1) x scalar(@_);
- @_ = qw(is if unless and or last next redo return);
+ @_ = qw(is if unless and or err last next redo return);
@is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
@_ = qw(last next redo return);
@_ = qw(if unless);
@is_if_unless{@_} = (1) x scalar(@_);
- @_ = qw(and or);
+ @_ = qw(and or err);
@is_and_or{@_} = (1) x scalar(@_);
+ # Identify certain operators which often occur in chains.
+ # Note: the minus (-) causes a side effect of padding of the first line in
+ # something like this (by sub set_logical_padding):
+ # Checkbutton => 'Transmission checked',
+ # -variable => \$TRANS
+ # This usually improves appearance so it seems ok.
+ @_ = qw(&& || and or : ? . + - * /);
+ @is_chain_operator{@_} = (1) x scalar(@_);
+
# We can remove semicolons after blocks preceded by these keywords
- @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
- unless while until for foreach);
+ @_ =
+ qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
+ unless while until for foreach given when default);
@is_block_without_semicolon{@_} = (1) x scalar(@_);
+ # We will allow semicolons to be added within these block types
+ # as well as sub and package blocks.
+ # NOTES:
+ # 1. Note that these keywords are omitted:
+ # switch case given when default sort map grep
+ # 2. It is also ok to add for sub and package blocks and a labeled block
+ # 3. But not okay for other perltidy types including:
+ # { } ; G t
+ # 4. Test files: blktype.t, blktype1.t, semicolon.t
+ @_ =
+ qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
+ unless do while until eval for foreach );
+ @ok_to_add_semicolon_for_block_type{@_} = (1) x scalar(@_);
+
# 'L' is token for opening { at hash key
@_ = qw" L { ( [ ";
@is_opening_type{@_} = (1) x scalar(@_);
@_ = qw" } ) ] ";
@is_closing_token{@_} = (1) x scalar(@_);
+
+ # Patterns for standardizing matches to block types for regular subs and
+ # anonymous subs. Examples
+ # 'sub process' is a named sub
+ # 'sub ::m' is a named sub
+ # 'sub' is an anonymous sub
+ # 'sub:' is a label, not a sub
+ # 'substr' is a keyword
+ $SUB_PATTERN = '^sub\s+(::|\w)';
+ $ASUB_PATTERN = '^sub$';
}
# whitespace codes
sub _decrement_count { --$_count }
}
+sub trim {
+
+ # trim leading and trailing whitespace from a string
+ $_[0] =~ s/\s+$//;
+ $_[0] =~ s/^\s+//;
+ return $_[0];
+}
+
+sub max {
+ my $max = shift;
+ foreach (@_) {
+ $max = ( $max < $_ ) ? $_ : $max;
+ }
+ return $max;
+}
+
+sub min {
+ my $min = shift;
+ foreach (@_) {
+ $min = ( $min > $_ ) ? $_ : $min;
+ }
+ return $min;
+}
+
+sub split_words {
+
+ # given a string containing words separated by whitespace,
+ # return the list of words
+ my ($str) = @_;
+ return unless $str;
+ $str =~ s/\s+$//;
+ $str =~ s/^\s+//;
+ return split( /\s+/, $str );
+}
+
# interface to Perl::Tidy::Logger routines
sub warning {
if ($logger_object) {
$max_gnu_stack_index = 0;
$max_gnu_item_index = -1;
$gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
- @gnu_item_list = ();
- $last_output_indentation = 0;
- $last_indentation_written = 0;
- $last_unadjusted_indentation = 0;
- $last_leading_token = "";
+ @gnu_item_list = ();
+ $last_output_indentation = 0;
+ $last_indentation_written = 0;
+ $last_unadjusted_indentation = 0;
+ $last_leading_token = "";
+ $last_output_short_opening_token = 0;
$saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
$saw_END_or_DATA_ = 0;
@container_environment_to_go = ();
@bond_strength_to_go = ();
@forced_breakpoint_to_go = ();
- @lengths_to_go = (); # line length to start of ith token
+ @summed_lengths_to_go = (); # line length to start of ith token
+ @token_lengths_to_go = ();
@levels_to_go = ();
@matching_token_to_go = ();
@mate_index_to_go = ();
@types_to_go = ();
@leading_spaces_to_go = ();
@reduced_spaces_to_go = ();
+ @inext_to_go = ();
+ @iprev_to_go = ();
+
+ @whitespace_level_stack = ();
+ $whitespace_last_level = -1;
@dont_align = ();
@has_broken_sublist = ();
@want_comma_break = ();
@ci_stack = ("");
- $saw_negative_indentation = 0;
$first_tabbing_disagreement = 0;
$last_tabbing_disagreement = 0;
$tabbing_disagreement_count = 0;
$first_added_semicolon_at = 0;
$last_added_semicolon_at = 0;
$last_line_had_side_comment = 0;
+ $is_static_block_comment = 0;
%postponed_breakpoint = ();
# variables for adding side comments
%block_leading_text = ();
%block_opening_line_number = ();
$csc_new_statement_ok = 1;
+ %csc_block_label = ();
- %saved_opening_indentation = ();
+ %saved_opening_indentation = ();
+ $in_format_skipping_section = 0;
reset_block_text_accumulator();
$forced_breakpoint_count = 0;
$forced_breakpoint_undo_count = 0;
$rbrace_follower = undef;
- $lengths_to_go[0] = 0;
+ $summed_lengths_to_go[0] = 0;
$old_line_count_in_batch = 1;
$comma_count_in_batch = 0;
$starting_in_quote = 0;
my $line_type = $line_of_tokens->{_line_type};
my $input_line = $line_of_tokens->{_line_text};
- my $want_blank_line_next = 0;
+ if ( $rOpts->{notidy} ) {
+ write_unindented_line($input_line);
+ $last_line_type = $line_type;
+ return;
+ }
# _line_type codes are:
# SYSTEM - system-specific code before hash-bang line
# END_START - __END__ line
# END - unidentified text following __END__
# ERROR - we are in big trouble, probably not a perl script
- #
+
+ # put a blank line after an =cut which comes before __END__ and __DATA__
+ # (required by podchecker)
+ if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
+ }
+
# handle line of code..
if ( $line_type eq 'CODE' ) {
my $tee_line = 0;
if ( $line_type =~ /^POD/ ) {
- # Pod docs should have a preceding blank line. But be
- # very careful in __END__ and __DATA__ sections, because:
- # 1. the user may be using this section for any purpose whatsoever
- # 2. the blank counters are not active there
- # It should be safe to request a blank line between an
- # __END__ or __DATA__ and an immediately following '=head'
- # type line, (types END_START and DATA_START), but not for
- # any other lines of type END or DATA.
+ # Pod docs should have a preceding blank line. But stay
+ # out of __END__ and __DATA__ sections, because
+ # the user may be using this section for any purpose whatsoever
if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
- if ( !$skip_line
+ if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
+ if ( !$skip_line
&& $line_type eq 'POD_START'
- && $last_line_type !~ /^(END|DATA)$/ )
+ && !$saw_END_or_DATA_ )
{
want_blank_line();
}
-
- # patch to put a blank line after =cut
- # (required by podchecker)
- if ( $line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
- $file_writer_object->reset_consecutive_blank_lines();
- $want_blank_line_next = 1;
- }
}
# leave the blank counters in a predictable state
if ( !$skip_line ) {
if ($tee_line) { $file_writer_object->tee_on() }
write_unindented_line($input_line);
- if ($tee_line) { $file_writer_object->tee_off() }
- if ($want_blank_line_next) { want_blank_line(); }
+ if ($tee_line) { $file_writer_object->tee_off() }
}
}
$last_line_type = $line_type;
# return the number of indentation spaces for a token in the output stream;
# these were previously stored by 'set_leading_whitespace'.
- return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
+ my $ii = shift;
+ if ( $ii < 0 ) { $ii = 0 }
+ return get_SPACES( $leading_spaces_to_go[$ii] );
}
# define: space count of leading string which would apply if it
# were the first token of a new line.
- my ( $level, $ci_level, $in_continued_quote ) = @_;
+ my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
+
+ # Adjust levels if necessary to recycle whitespace:
+ # given $level_abs, the absolute level
+ # define $level, a possibly reduced level for whitespace
+ my $level = $level_abs;
+ if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
+ if ( $level_abs < $whitespace_last_level ) {
+ pop(@whitespace_level_stack);
+ }
+ if ( !@whitespace_level_stack ) {
+ push @whitespace_level_stack, $level_abs;
+ }
+ elsif ( $level_abs > $whitespace_last_level ) {
+ $level = $whitespace_level_stack[-1] +
+ ( $level_abs - $whitespace_last_level );
+
+ if (
+ # 1 Try to break at a block brace
+ (
+ $level > $rOpts_whitespace_cycle
+ && $last_nonblank_type eq '{'
+ && $last_nonblank_token eq '{'
+ )
+
+ # 2 Then either a brace or bracket
+ || ( $level > $rOpts_whitespace_cycle + 1
+ && $last_nonblank_token =~ /^[\{\[]$/ )
+
+ # 3 Then a paren too
+ || $level > $rOpts_whitespace_cycle + 2
+ )
+ {
+ $level = 1;
+ }
+ push @whitespace_level_stack, $level;
+ }
+ $level = $whitespace_level_stack[-1];
+ }
+ $whitespace_last_level = $level_abs;
# modify for -bli, which adds one continuation indentation for
# opening braces
# handle the standard indentation scheme
#-------------------------------------------
unless ($rOpts_line_up_parentheses) {
- my $space_count = $ci_level * $rOpts_continuation_indentation + $level *
- $rOpts_indent_columns;
+ my $space_count =
+ $ci_level * $rOpts_continuation_indentation +
+ $level * $rOpts_indent_columns;
my $ci_spaces =
( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
my $space_count = 0;
my $available_space = 0;
$level = -1; # flag to prevent storing in item_list
- $leading_spaces_to_go[$max_index_to_go] =
+ $leading_spaces_to_go[$max_index_to_go] =
$reduced_spaces_to_go[$max_index_to_go] =
new_lp_indentation_item( $space_count, $level, $ci_level,
$available_space, 0 );
# find the position if we break at the '='
my $i_test = $last_equals;
if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
+
+ # TESTING
+ ##my $too_close = ($i_test==$max_index_to_go-1);
+
my $test_position = total_line_length( $i_test, $max_index_to_go );
+ my $mll = maximum_line_length($i_test);
if (
+ # the equals is not just before an open paren (testing)
+ ##!$too_close &&
+
# if we are beyond the midpoint
- $gnu_position_predictor > $half_maximum_line_length
+ $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
- # or if we can save some space by breaking at the '='
- # without obscuring the second line by the first
- || ( $test_position > 1 +
- total_line_length( $line_start_index_to_go, $last_equals ) )
+ # or we are beyond the 1/4 point and there was an old
+ # break at the equals
+ || (
+ $gnu_position_predictor >
+ $mll - $rOpts_maximum_line_length * 3 / 4
+ && (
+ $old_breakpoint_to_go[$last_equals]
+ || ( $last_equals > 0
+ && $old_breakpoint_to_go[ $last_equals - 1 ] )
+ || ( $last_equals > 1
+ && $types_to_go[ $last_equals - 1 ] eq 'b'
+ && $old_breakpoint_to_go[ $last_equals - 2 ] )
+ )
+ )
)
{
}
}
+ my $halfway =
+ maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
+
# Check for decreasing depth ..
# Note that one token may have both decreasing and then increasing
# depth. For example, (level, ci) can go from (1,1) to (2,0). So,
# to this minimum standard indentation. But the most deeply
# nested container will still probably be able to shift its
# parameters to the right for proper alignment, so in most
- # cases this will not be noticable.
- if ( $available_space > 0
- && $space_count > $half_maximum_line_length )
- {
+ # cases this will not be noticeable.
+ if ( $available_space > 0 && $space_count > $halfway ) {
$gnu_stack[$max_gnu_stack_index]
->tentatively_decrease_AVAILABLE_SPACES($available_space);
}
$last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
# and it is significantly to the right
- || $gnu_position_predictor > $half_maximum_line_length
+ || $gnu_position_predictor > $halfway
)
)
)
total_line_length( $line_start_index_to_go, $max_index_to_go );
}
else {
- $gnu_position_predictor = $space_count +
- token_sequence_length( $max_index_to_go, $max_index_to_go );
+ $gnu_position_predictor =
+ $space_count + $token_lengths_to_go[$max_index_to_go];
}
# store the indentation object for this token
# keep 2 extra free because they are needed in some cases
# (result of trial-and-error testing)
my $spaces_needed =
- $gnu_position_predictor - $rOpts_maximum_line_length + 2;
+ $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
- return if ( $spaces_needed < 0 );
+ return if ( $spaces_needed <= 0 );
# We are over the limit, so try to remove a requested number of
# spaces from leading whitespace. We are only allowed to remove
for ( ; $i <= $max_gnu_item_index ; $i++ ) {
my $old_spaces = $gnu_item_list[$i]->get_SPACES();
- if ( $old_spaces > $deleted_spaces ) {
+ if ( $old_spaces >= $deleted_spaces ) {
$gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
}
sub finish_lp_batch {
- # This routine is called once after each each output stream batch is
+ # This routine is called once after each output stream batch is
# finished to undo indentation for all incomplete -lp
# indentation levels. It is too risky to leave a level open,
# because then we can't backtrack in case of a long line to follow.
sub token_sequence_length {
- # return length of tokens ($ifirst .. $ilast) including first & last
- # returns 0 if $ifirst > $ilast
- my $ifirst = shift;
- my $ilast = shift;
- return 0 if ( $ilast < 0 || $ifirst > $ilast );
- return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
- return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
+ # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
+ # returns 0 if $ibeg > $iend (shouldn't happen)
+ my ( $ibeg, $iend ) = @_;
+ return 0 if ( $iend < 0 || $ibeg > $iend );
+ return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
+ return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
}
sub total_line_length {
- # return length of a line of tokens ($ifirst .. $ilast)
- my $ifirst = shift;
- my $ilast = shift;
- if ( $ifirst < 0 ) { $ifirst = 0 }
+ # return length of a line of tokens ($ibeg .. $iend)
+ my ( $ibeg, $iend ) = @_;
+ return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
+}
+
+sub maximum_line_length_for_level {
+
+ # return maximum line length for line starting with a given level
+ my $maximum_line_length = $rOpts_maximum_line_length;
+
+ # Modify if -vmll option is selected
+ if ($rOpts_variable_maximum_line_length) {
+ my $level = shift;
+ if ( $level < 0 ) { $level = 0 }
+ $maximum_line_length += $level * $rOpts_indent_columns;
+ }
+ return $maximum_line_length;
+}
+
+sub maximum_line_length {
+
+ # return maximum line length for line starting with the token at given index
+ return maximum_line_length_for_level( $levels_to_go[ $_[0] ] );
- return leading_spaces_to_go($ifirst) +
- token_sequence_length( $ifirst, $ilast );
}
sub excess_line_length {
- # return number of characters by which a line of tokens ($ifirst..$ilast)
+ # return number of characters by which a line of tokens ($ibeg..$iend)
# exceeds the allowable line length.
- my $ifirst = shift;
- my $ilast = shift;
- if ( $ifirst < 0 ) { $ifirst = 0 }
- return leading_spaces_to_go($ifirst) +
- token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
+ my ( $ibeg, $iend ) = @_;
+ return total_line_length( $ibeg, $iend ) - maximum_line_length($ibeg);
}
sub finish_formatting {
write_logfile_entry("No indentation disagreement seen\n");
}
}
+ if ($first_tabbing_disagreement) {
+ write_logfile_entry(
+"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
+ );
+ }
write_logfile_entry("\n");
$vertical_aligner_object->report_anything_unusual();
# This routine is called to check the Opts hash after it is defined
($rOpts) = @_;
- my ( $tabbing_string, $tab_msg );
make_static_block_comment_pattern();
make_static_side_comment_pattern();
make_closing_side_comment_prefix();
make_closing_side_comment_list_pattern();
+ $format_skipping_pattern_begin =
+ make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
+ $format_skipping_pattern_end =
+ make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
# If closing side comments ARE selected, then we can safely
# delete old closing side comments unless closing side comment
make_bli_pattern();
make_block_brace_vertical_tightness_pattern();
+ make_blank_line_pattern();
if ( $rOpts->{'line-up-parentheses'} ) {
|| !$rOpts->{'add-newlines'}
|| !$rOpts->{'delete-old-newlines'} )
{
- warn <<EOM;
+ Perl::Tidy::Warn <<EOM;
-----------------------------------------------------------------------
Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
}
}
- # At present, tabs are not compatable with the line-up-parentheses style
+ # At present, tabs are not compatible with the line-up-parentheses style
# (it would be possible to entab the total leading whitespace
# just prior to writing the line, if desired).
if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
- warn <<EOM;
+ Perl::Tidy::Warn <<EOM;
Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
- # Likewise, tabs are not compatable with outdenting..
+ # Likewise, tabs are not compatible with outdenting..
if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
- warn <<EOM;
+ Perl::Tidy::Warn <<EOM;
Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
- warn <<EOM;
+ Perl::Tidy::Warn <<EOM;
Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
# implement outdenting preferences for keywords
%outdent_keyword = ();
-
- # load defaults
- @_ = qw(next last redo goto return);
-
- # override defaults if requested
- if ( $_ = $rOpts->{'outdent-keyword-list'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
+ @_ = qw(next last redo goto return); # defaults
}
# FUTURE: if not a keyword, assume that it is an identifier
$outdent_keyword{$_} = 1;
}
else {
- warn "ignoring '$_' in -okwl list; not a perl keyword";
+ Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword";
}
}
# implement user whitespace preferences
- if ( $_ = $rOpts->{'want-left-space'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
@want_left_space{@_} = (1) x scalar(@_);
}
- if ( $_ = $rOpts->{'want-right-space'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
@want_right_space{@_} = (1) x scalar(@_);
}
- if ( $_ = $rOpts->{'nowant-left-space'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+
+ if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
@want_left_space{@_} = (-1) x scalar(@_);
}
- if ( $_ = $rOpts->{'nowant-right-space'} ) {
- s/^\s+//;
- s/\s+$//;
+ if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
@want_right_space{@_} = (-1) x scalar(@_);
}
if ( $rOpts->{'dump-want-left-space'} ) {
dump_want_left_space(*STDOUT);
- exit 1;
+ Perl::Tidy::Exit 0;
}
if ( $rOpts->{'dump-want-right-space'} ) {
dump_want_right_space(*STDOUT);
- exit 1;
+ Perl::Tidy::Exit 0;
}
# default keywords for which space is introduced before an opening paren
# (at present, including them messes up vertical alignment)
- @_ = qw(my local our and or eq ne if else elsif until
- unless while for foreach return switch case given when);
+ @_ = qw(my local our and or err eq ne if else elsif until
+ unless while for foreach return switch case given when catch);
@space_after_keyword{@_} = (1) x scalar(@_);
- # allow user to modify these defaults
- if ( $_ = $rOpts->{'space-after-keyword'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
- @space_after_keyword{@_} = (1) x scalar(@_);
- }
+ # first remove any or all of these if desired
+ if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
- if ( $_ = $rOpts->{'nospace-after-keyword'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ # -nsak='*' selects all the above keywords
+ if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
@space_after_keyword{@_} = (0) x scalar(@_);
}
+ # then allow user to add to these defaults
+ if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
+ @space_after_keyword{@_} = (1) x scalar(@_);
+ }
+
# implement user break preferences
- if ( $_ = $rOpts->{'want-break-after'} ) {
- @_ = split /\s+/;
+ my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+ . : ? && || and or err xor
+ );
+
+ my $break_after = sub {
foreach my $tok (@_) {
if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
my $lbs = $left_bond_strength{$tok};
( $lbs, $rbs );
}
}
- }
+ };
- if ( $_ = $rOpts->{'want-break-before'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ my $break_before = sub {
foreach my $tok (@_) {
my $lbs = $left_bond_strength{$tok};
my $rbs = $right_bond_strength{$tok};
( $lbs, $rbs );
}
}
- }
+ };
+
+ $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
+ $break_before->(@all_operators)
+ if ( $rOpts->{'break-before-all-operators'} );
+
+ $break_after->( split_words( $rOpts->{'want-break-after'} ) );
+ $break_before->( split_words( $rOpts->{'want-break-before'} ) );
# make note if breaks are before certain key types
%want_break_before = ();
-
- foreach my $tok ( '.', ',', ':', '?', '&&', '||', 'and', 'or', 'xor' ) {
+ foreach my $tok ( @all_operators, ',' ) {
$want_break_before{$tok} =
$left_bond_strength{$tok} < $right_bond_strength{$tok};
}
# Define here tokens which may follow the closing brace of a do statement
# on the same line, as in:
# } while ( $something);
- @_ = qw(until while unless if ; );
+ @_ = qw(until while unless if ; : );
push @_, ',';
@is_do_follower{@_} = (1) x scalar(@_);
%is_else_brace_follower = ();
# what can follow a multi-line anonymous sub definition closing curly:
- @_ = qw# ; : => or and && || ) #;
+ @_ = qw# ; : => or and && || ~~ !~~ ) #;
push @_, ',';
@is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
- # what can follow a one-line anonynomous sub closing curly:
- # one-line anonumous subs also have ']' here...
+ # what can follow a one-line anonymous sub closing curly:
+ # one-line anonymous subs also have ']' here...
# see tk3.t and PP.pm
- @_ = qw# ; : => or and && || ) ] #;
+ @_ = qw# ; : => or and && || ) ] ~~ !~~ #;
push @_, ',';
@is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
$rOpts->{'long-block-line-count'} = 1000000;
}
+ my $enc = $rOpts->{'character-encoding'};
+ if ( $enc && $enc !~ /^(none|utf8)$/i ) {
+ Perl::Tidy::Die <<EOM;
+Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
+EOM
+ }
+
my $ole = $rOpts->{'output-line-ending'};
- ##if ($^O =~ /^(VMS|
if ($ole) {
my %endings = (
dos => "\015\012",
mac => "\015",
unix => "\012",
);
- $ole = lc $ole;
- unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
- my $str = join " ", keys %endings;
- die <<EOM;
+
+ # Patch for RT #99514, a memoization issue.
+ # Normally, the user enters one of 'dos', 'win', etc, and we change the
+ # value in the options parameter to be the corresponding line ending
+ # character. But, if we are using memoization, on later passes through
+ # here the option parameter will already have the desired ending
+ # character rather than the keyword 'dos', 'win', etc. So
+ # we must check to see if conversion has already been done and, if so,
+ # bypass the conversion step.
+ my %endings_inverted = (
+ "\015\012" => 'dos',
+ "\015\012" => 'win',
+ "\015" => 'mac',
+ "\012" => 'unix',
+ );
+
+ if ( defined( $endings_inverted{$ole} ) ) {
+
+ # we already have valid line ending, nothing more to do
+ }
+ else {
+ $ole = lc $ole;
+ unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
+ my $str = join " ", keys %endings;
+ Perl::Tidy::Die <<EOM;
Unrecognized line ending '$ole'; expecting one of: $str
EOM
- }
- if ( $rOpts->{'preserve-line-endings'} ) {
- warn "Ignoring -ple; conflicts with -ole\n";
- $rOpts->{'preserve-line-endings'} = undef;
+ }
+ if ( $rOpts->{'preserve-line-endings'} ) {
+ Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
+ $rOpts->{'preserve-line-endings'} = undef;
+ }
}
}
);
# frequently used parameters
- $rOpts_add_newlines = $rOpts->{'add-newlines'};
- $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
- $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
+ $rOpts_add_newlines = $rOpts->{'add-newlines'};
+ $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
+ $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
$rOpts_block_brace_vertical_tightness =
$rOpts->{'block-brace-vertical-tightness'};
$rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
$rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
- $rOpts_break_at_old_trinary_breakpoints =
- $rOpts->{'break-at-old-trinary-breakpoints'};
+ $rOpts_break_at_old_ternary_breakpoints =
+ $rOpts->{'break-at-old-ternary-breakpoints'};
+ $rOpts_break_at_old_attribute_breakpoints =
+ $rOpts->{'break-at-old-attribute-breakpoints'};
$rOpts_break_at_old_comma_breakpoints =
$rOpts->{'break-at-old-comma-breakpoints'};
$rOpts_break_at_old_keyword_breakpoints =
$rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
$rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
+ $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
+
+ $rOpts_variable_maximum_line_length =
+ $rOpts->{'variable-maximum-line-length'};
$rOpts_short_concatenation_item_length =
$rOpts->{'short-concatenation-item-length'};
- $rOpts_swallow_optional_blank_lines =
- $rOpts->{'swallow-optional-blank-lines'};
- $rOpts_ignore_old_line_breaks = $rOpts->{'ignore-old-line-breaks'};
- $half_maximum_line_length = $rOpts_maximum_line_length / 2;
+
+ $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
+ $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
+ $rOpts_format_skipping = $rOpts->{'format-skipping'};
+ $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
+ $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
+ $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
+ $rOpts_ignore_side_comment_lengths =
+ $rOpts->{'ignore-side-comment-lengths'};
# Note that both opening and closing tokens can access the opening
# and closing flags of their container types.
']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
);
+ $rOpts_tight_secret_operators = $rOpts->{'tight-secret-operators'};
+
# assume flag for '>' same as ')' for closing qw quotes
%closing_token_indentation = (
')' => $rOpts->{'closing-paren-indentation'},
']' => $rOpts->{'closing-square-bracket-indentation'},
'>' => $rOpts->{'closing-paren-indentation'},
);
+
+ # flag indicating if any closing tokens are indented
+ $some_closing_token_indentation =
+ $rOpts->{'closing-paren-indentation'}
+ || $rOpts->{'closing-brace-indentation'}
+ || $rOpts->{'closing-square-bracket-indentation'}
+ || $rOpts->{'indent-closing-brace'};
+
+ %opening_token_right = (
+ '(' => $rOpts->{'opening-paren-right'},
+ '{' => $rOpts->{'opening-hash-brace-right'},
+ '[' => $rOpts->{'opening-square-bracket-right'},
+ );
+
+ %stack_opening_token = (
+ '(' => $rOpts->{'stack-opening-paren'},
+ '{' => $rOpts->{'stack-opening-hash-brace'},
+ '[' => $rOpts->{'stack-opening-square-bracket'},
+ );
+
+ %stack_closing_token = (
+ ')' => $rOpts->{'stack-closing-paren'},
+ '}' => $rOpts->{'stack-closing-hash-brace'},
+ ']' => $rOpts->{'stack-closing-square-bracket'},
+ );
+ $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
}
sub make_static_block_comment_pattern {
# create the pattern used to identify static block comments
- $static_block_comment_pattern = '^(\s*)##';
+ $static_block_comment_pattern = '^\s*##';
# allow the user to change it
if ( $rOpts->{'static-block-comment-prefix'} ) {
my $prefix = $rOpts->{'static-block-comment-prefix'};
$prefix =~ s/^\s*//;
- if ( $prefix !~ /^#/ ) {
- die "ERROR: the -sbcp prefix '$prefix' must begin with '#'\n";
+ my $pattern = $prefix;
+ # user may give leading caret to force matching left comments only
+ if ( $prefix !~ /^\^#/ ) {
+ if ( $prefix !~ /^#/ ) {
+ Perl::Tidy::Die
+"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
+ }
+ $pattern = '^\s*' . $prefix;
}
- my $pattern = '^(\s*)' . $prefix;
eval "'##'=~/$pattern/";
if ($@) {
- die
+ Perl::Tidy::Die
"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
}
$static_block_comment_pattern = $pattern;
}
}
+sub make_format_skipping_pattern {
+ my ( $opt_name, $default ) = @_;
+ my $param = $rOpts->{$opt_name};
+ unless ($param) { $param = $default }
+ $param =~ s/^\s*//;
+ if ( $param !~ /^#/ ) {
+ Perl::Tidy::Die
+ "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
+ }
+ my $pattern = '^' . $param . '\s';
+ eval "'#'=~/$pattern/";
+ if ($@) {
+ Perl::Tidy::Die
+"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
+ }
+ return $pattern;
+}
+
sub make_closing_side_comment_list_pattern {
# turn any input list into a regex for recognizing selected block types
sub make_bli_pattern {
- if (
- defined(
- $rOpts->{'brace-left-and-indent-list'}
- && $rOpts->{'brace-left-and-indent-list'}
- )
- )
+ if ( defined( $rOpts->{'brace-left-and-indent-list'} )
+ && $rOpts->{'brace-left-and-indent-list'} )
{
$bli_list_string = $rOpts->{'brace-left-and-indent-list'};
}
# turn any input list into a regex for recognizing selected block types
$block_brace_vertical_tightness_pattern =
'^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
-
- if (
- defined(
- $rOpts->{'block-brace-vertical-tightness-list'}
- && $rOpts->{'block-brace-vertical-tightness-list'}
- )
- )
+ if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
+ && $rOpts->{'block-brace-vertical-tightness-list'} )
{
$block_brace_vertical_tightness_pattern =
make_block_pattern( '-bbvtl',
}
}
+sub make_blank_line_pattern {
+
+ $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
+ my $key = 'blank-lines-before-closing-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_before_closing_block_pattern =
+ make_block_pattern( '-blbcl', $rOpts->{$key} );
+ }
+
+ $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
+ $key = 'blank-lines-after-opening-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_after_opening_block_pattern =
+ make_block_pattern( '-blaol', $rOpts->{$key} );
+ }
+}
+
sub make_block_pattern {
# given a string of block-type keywords, return a regex to match them
# input string: "if else elsif unless while for foreach do : sub";
# pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
+ # Minor Update:
+ #
+ # To distinguish between anonymous subs and named subs, use 'sub' to
+ # indicate a named sub, and 'asub' to indicate an anonymous sub
+
my ( $abbrev, $string ) = @_;
- $string =~ s/^\s+//;
- $string =~ s/\s+$//;
- my @list = split /\s+/, $string;
+ my @list = split_words($string);
my @words = ();
my %seen;
for my $i (@list) {
+ if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
next if $seen{$i};
$seen{$i} = 1;
if ( $i eq 'sub' ) {
}
+ elsif ( $i eq 'asub' ) {
+ }
+ elsif ( $i eq ';' ) {
+ push @words, ';';
+ }
+ elsif ( $i eq '{' ) {
+ push @words, '\{';
+ }
elsif ( $i eq ':' ) {
push @words, '\w+:';
}
push @words, $i;
}
else {
- warn "unrecognized block type $i after $abbrev, ignoring\n";
+ Perl::Tidy::Warn
+ "unrecognized block type $i after $abbrev, ignoring\n";
}
}
my $pattern = '(' . join( '|', @words ) . ')$';
+ my $sub_patterns = "";
if ( $seen{'sub'} ) {
- $pattern = '(' . $pattern . '|sub)';
+ $sub_patterns .= '|' . $SUB_PATTERN;
+ }
+ if ( $seen{'asub'} ) {
+ $sub_patterns .= '|' . $ASUB_PATTERN;
+ }
+ if ($sub_patterns) {
+ $pattern = '(' . $pattern . $sub_patterns . ')';
}
$pattern = '^' . $pattern;
return $pattern;
my $pattern = '^' . $prefix;
eval "'##'=~/$pattern/";
if ($@) {
- die
+ Perl::Tidy::Die
"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
}
$static_side_comment_pattern = $pattern;
# shouldn't happen..must have screwed up escaping, above
report_definite_bug();
- warn
+ Perl::Tidy::Warn
"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
# just warn and keep going with defaults
- warn "Please consider using a simpler -cscp prefix\n";
- warn "Using default -cscp instead; please check output\n";
+ Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n";
+ Perl::Tidy::Warn
+ "Using default -cscp instead; please check output\n";
}
else {
$csc_prefix = $test_csc_prefix;
sub is_essential_whitespace {
- # Essential whitespace means whitespace which cannot be safely deleted.
+ # Essential whitespace means whitespace which cannot be safely deleted
+ # without risking the introduction of a syntax error.
# We are given three tokens and their types:
# ($tokenl, $typel) is the token to the left of the space in question
# ($tokenr, $typer) is the token to the right of the space in question
#
# This is a slow routine but is not needed too often except when -mangle
# is used.
+ #
+ # Note: This routine should almost never need to be changed. It is
+ # for avoiding syntax problems rather than for formatting.
my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
- # never combine two bare words or numbers
- my $result = ( ( $tokenr =~ /^[\'\w]/ ) && ( $tokenl =~ /[\'\w]$/ ) )
+ my $result =
+
+ # never combine two bare words or numbers
+ # examples: and ::ok(1)
+ # return ::spw(...)
+ # for bla::bla:: abc
+ # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+ # $input eq"quit" to make $inputeq"quit"
+ # my $size=-s::SINK if $file; <==OK but we won't do it
+ # don't join something like: for bla::bla:: abc
+ # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+ ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
+ && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
- # do not combine a number with a concatination dot
+ # do not combine a number with a concatenation dot
# example: pom.caputo:
# $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
|| ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
# || ($tokenr eq '-')
# keep a space between a quote and a bareword to prevent the
- # bareword from becomming a quote modifier.
+ # bareword from becoming a quote modifier.
|| ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
# keep a space between a token ending in '$' and any word;
# retain any space after possible filehandle
# (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
- || ( $typel eq 'Z' || $typell eq 'Z' )
+ || ( $typel eq 'Z' )
+
+ # Perl is sensitive to whitespace after the + here:
+ # $b = xvals $a + 0.1 * yvals $a;
+ || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
# keep paren separate in 'use Foo::Bar ()'
|| ( $tokenr eq '('
- && $typel eq 'w'
- && $typell eq 'k'
+ && $typel eq 'w'
+ && $typell eq 'k'
&& $tokenll eq 'use' )
# keep any space between filehandle and paren:
# retain any space after here doc operator ( hereerr.t)
|| ( $typel eq 'h' )
- # FIXME: this needs some further work; extrude.t has test cases
- # it is safest to retain any space after start of ? : operator
- # because of perl's quirky parser.
- # ie, this line will fail if you remove the space after the '?':
- # $b=join $comma ? ',' : ':', @_; # ok
- # $b=join $comma ?',' : ':', @_; # error!
- # but this is ok :)
- # $b=join $comma?',' : ':', @_; # not a problem!
- ## || ($typel eq '?')
-
# be careful with a space around ++ and --, to avoid ambiguity as to
# which token it applies
|| ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
$tokenl eq 'my'
# /^(for|foreach)$/
- && $is_for_foreach{$tokenll} && $tokenr =~ /^\$/
+ && $is_for_foreach{$tokenll}
+ && $tokenr =~ /^\$/
)
# must have space between grep and left paren; "grep(" will fail
#use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
|| ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
- # don't join something like: for bla::bla:: abc
- # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
- || ( $tokenl =~ /\:\:$/ && ( $tokenr =~ /^[\'\w]/ ) )
+ # We must be sure that a space between a ? and a quoted string
+ # remains if the space before the ? remains. [Loca.pm, lockarea]
+ # ie,
+ # $b=join $comma ? ',' : ':', @_; # ok
+ # $b=join $comma?',' : ':', @_; # ok!
+ # $b=join $comma ?',' : ':', @_; # error!
+ # Not really required:
+ ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
+
+ # do not remove space between an '&' and a bare word because
+ # it may turn into a function evaluation, like here
+ # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
+ # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
+ || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
+
+ # space stacked labels (TODO: check if really necessary)
+ || ( $typel eq 'J' && $typer eq 'J' )
+
; # the value of this long logic sequence is the result we want
return $result;
}
}
-sub set_white_space_flag {
+{
+ my %secret_operators;
+ my %is_leading_secret_token;
- # This routine examines each pair of nonblank tokens and
- # sets values for array @white_space_flag.
+ BEGIN {
+
+ # token lists for perl secret operators as compiled by Philippe Bruhat
+ # at: https://metacpan.org/module/perlsecret
+ %secret_operators = (
+ 'Goatse' => [qw#= ( ) =#], #=( )=
+ 'Venus1' => [qw#0 +#], # 0+
+ 'Venus2' => [qw#+ 0#], # +0
+ 'Enterprise' => [qw#) x ! !#], # ()x!!
+ 'Kite1' => [qw#~ ~ <>#], # ~~<>
+ 'Kite2' => [qw#~~ <>#], # ~~<>
+ 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
+ );
+
+ # The following operators and constants are not included because they
+ # are normally kept tight by perltidy:
+ # !! ~~ <~>
+ #
+
+ # Make a lookup table indexed by the first token of each operator:
+ # first token => [list, list, ...]
+ foreach my $value ( values(%secret_operators) ) {
+ my $tok = $value->[0];
+ push @{ $is_leading_secret_token{$tok} }, $value;
+ }
+ }
+
+ sub secret_operator_whitespace {
+
+ my ( $jmax, $rtokens, $rtoken_type, $rwhite_space_flag ) = @_;
+
+ # Loop over all tokens in this line
+ my ( $j, $token, $type );
+ for ( $j = 0 ; $j <= $jmax ; $j++ ) {
+
+ $token = $$rtokens[$j];
+ $type = $$rtoken_type[$j];
+
+ # Skip unless this token might start a secret operator
+ next if ( $type eq 'b' );
+ next unless ( $is_leading_secret_token{$token} );
+
+ # Loop over all secret operators with this leading token
+ foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
+ my $jend = $j - 1;
+ foreach my $tok ( @{$rpattern} ) {
+ $jend++;
+ $jend++
+
+ if ( $jend <= $jmax && $$rtoken_type[$jend] eq 'b' );
+ if ( $jend > $jmax || $tok ne $$rtokens[$jend] ) {
+ $jend = undef;
+ last;
+ }
+ }
+
+ if ($jend) {
+
+ # set flags to prevent spaces within this operator
+ for ( my $jj = $j + 1 ; $jj <= $jend ; $jj++ ) {
+ $rwhite_space_flag->[$jj] = WS_NO;
+ }
+ $j = $jend;
+ last;
+ }
+ } ## End Loop over all operators
+ } ## End loop over all tokens
+ } # End sub
+}
+
+sub set_white_space_flag {
+
+ # This routine examines each pair of nonblank tokens and
+ # sets values for array @white_space_flag.
#
# $white_space_flag[$j] is a flag indicating whether a white space
# BEFORE token $j is needed, with the following values:
#
- # -1 do not want a space before token $j
- # 0 optional space or $j is a whitespace
- # 1 want a space before token $j
+ # WS_NO = -1 do not want a space before token $j
+ # WS_OPTIONAL= 0 optional space or $j is a whitespace
+ # WS_YES = 1 want a space before token $j
#
#
# The values for the first token will be defined based
@is_closing_type{@_} = (1) x scalar(@_);
my @spaces_both_sides = qw"
- + - * / % ? = . : x < > | & ^ .. << >> ** && .. || => += -=
- .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>=
- &&= ||= <=> A k f w F n C Y U G v
+ + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
+ .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
+ &&= ||= //= <=> A k f w F n C Y U G v
";
my @spaces_left_side = qw"
; } ) ] R J ++ -- **=
";
push( @spaces_right_side, ',' ); # avoids warning message
+
+ # Note that we are in a BEGIN block here. Later in processing
+ # the values of %want_left_space and %want_right_space
+ # may be overridden by any user settings specified by the
+ # -wls and -wrs parameters. However the binary_whitespace_rules
+ # are hardwired and have priority.
@want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
@want_right_space{@spaces_both_sides} =
(1) x scalar(@spaces_both_sides);
(-1) x scalar(@spaces_right_side);
@want_right_space{@spaces_right_side} =
(1) x scalar(@spaces_right_side);
- $want_left_space{'L'} = WS_NO;
- $want_left_space{'->'} = WS_NO;
- $want_right_space{'->'} = WS_NO;
- $want_left_space{'**'} = WS_NO;
- $want_right_space{'**'} = WS_NO;
-
+ $want_left_space{'->'} = WS_NO;
+ $want_right_space{'->'} = WS_NO;
+ $want_left_space{'**'} = WS_NO;
+ $want_right_space{'**'} = WS_NO;
+ $want_right_space{'CORE::'} = WS_NO;
+
+ # These binary_ws_rules are hardwired and have priority over the above
+ # settings. It would be nice to allow adjustment by the user,
+ # but it would be complicated to specify.
+ #
# hash type information must stay tightly bound
# as in : ${xxxx}
$binary_ws_rules{'i'}{'L'} = WS_NO;
$binary_ws_rules{'@'}{'L'} = WS_NO;
$binary_ws_rules{'@'}{'{'} = WS_NO;
$binary_ws_rules{'='}{'L'} = WS_YES;
+ $binary_ws_rules{'J'}{'J'} = WS_YES;
# the following includes ') {'
# as in : if ( xxx ) { yyy }
$binary_ws_rules{'R'}{'++'} = WS_NO;
$binary_ws_rules{'R'}{'--'} = WS_NO;
- $binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label
- $binary_ws_rules{'w'}{':'} = WS_NO;
$binary_ws_rules{'i'}{'Q'} = WS_YES;
$binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
- # FIXME: we need to split 'i' into variables and functions
+ # FIXME: we could to split 'i' into variables and functions
# and have no space for functions but space for variables. For now,
# I have a special patch in the special rules below
$binary_ws_rules{'i'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'{'} = WS_YES;
- }
+ } ## end BEGIN block
+
my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
my ( $last_token, $last_type, $last_block_type, $token, $type,
$block_type );
$token = $tokens_to_go[$max_index_to_go];
$type = $types_to_go[$max_index_to_go];
$block_type = $block_type_to_go[$max_index_to_go];
+
+ #---------------------------------------------------------------
+ # Patch due to splitting of tokens with leading ->
+ #---------------------------------------------------------------
+ #
+ # This routine is dealing with the raw tokens from the tokenizer,
+ # but to get started it needs the previous token, which will
+ # have been stored in the '_to_go' arrays.
+ #
+ # This patch avoids requiring two iterations to
+ # converge for cases such as the following, where a paren
+ # comes in on a line following a variable with leading arrow:
+ # $self->{main}->add_content_defer_opening
+ # ($name, $wmkf, $self->{attrs}, $self);
+ # In this case when we see the opening paren on line 2 we need
+ # to know if the last token on the previous line had an arrow,
+ # but it has already been split off so we have to add it back
+ # in to avoid getting an unwanted space before the paren.
+ if ( $type =~ /^[wi]$/ ) {
+ my $im = $iprev_to_go[$max_index_to_go];
+ my $tm = ( $im >= 0 ) ? $types_to_go[$im] : "";
+ if ( $tm eq '->' ) { $token = $tm . $token }
+ }
+
+ #---------------------------------------------------------------
+ # End patch due to splitting of tokens with leading ->
+ #---------------------------------------------------------------
}
else {
$token = ' ';
$block_type = '';
}
- # loop over all tokens
my ( $j, $ws );
+ # main loop over all tokens to define the whitespace flags
for ( $j = 0 ; $j <= $jmax ; $j++ ) {
if ( $$rtoken_type[$j] eq 'b' ) {
$block_type = $$rblock_type[$j];
#---------------------------------------------------------------
- # section 1:
- # handle space on the inside of opening braces
+ # Whitespace Rules Section 1:
+ # Handle space on the inside of opening braces.
#---------------------------------------------------------------
# /^[L\{\(\[]$/
}
else { $tightness = $tightness{$last_token} }
+ #=============================================================
+ # Patch for test problem fabrice_bug.pl
+ # We must always avoid spaces around a bare word beginning
+ # with ^ as in:
+ # my $before = ${^PREMATCH};
+ # Because all of the following cause an error in perl:
+ # my $before = ${ ^PREMATCH };
+ # my $before = ${ ^PREMATCH};
+ # my $before = ${^PREMATCH };
+ # So if brace tightness flag is -bt=0 we must temporarily reset
+ # to bt=1. Note that here we must set tightness=1 and not 2 so
+ # that the closing space
+ # is also avoided (via the $j_tight_closing_paren flag in coding)
+ if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
+
+ #=============================================================
+
if ( $tightness <= 0 ) {
$ws = WS_YES;
}
my $j_here = $j;
++$j_here
if ( $token eq '-'
- && $last_token eq '{'
+ && $last_token eq '{'
&& $$rtoken_type[ $j + 1 ] eq 'w' );
# $j_next is where a closing token should be if
# but watch out for this: [ [ ] (misc.t)
&& $last_token ne $token
+
+ # double diamond is usually spaced
+ && $token ne '<<>>'
+
)
{
}
}
}
- } # done with opening braces and brackets
+ } # end setting space flag inside opening tokens
my $ws_1 = $ws
if FORMATTER_DEBUG_FLAG_WHITE;
#---------------------------------------------------------------
- # section 2:
- # handle space on inside of closing brace pairs
+ # Whitespace Rules Section 2:
+ # Handle space on inside of closing brace pairs.
#---------------------------------------------------------------
# /[\}\)\]R]/
$ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
}
}
- }
+ } # end setting space flag inside closing tokens
my $ws_2 = $ws
if FORMATTER_DEBUG_FLAG_WHITE;
#---------------------------------------------------------------
- # section 3:
- # use the binary table
+ # Whitespace Rules Section 3:
+ # Use the binary rule table.
#---------------------------------------------------------------
if ( !defined($ws) ) {
$ws = $binary_ws_rules{$last_type}{$type};
if FORMATTER_DEBUG_FLAG_WHITE;
#---------------------------------------------------------------
- # section 4:
- # some special cases
+ # Whitespace Rules Section 4:
+ # Handle some special cases.
#---------------------------------------------------------------
if ( $token eq '(' ) {
# This will have to be tweaked as tokenization changes.
- # We want a space after certain block types:
+ # We usually want a space at '} (', for example:
# map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
#
# But not others:
- # &{ $_->[1] } ( delete $_[$#_]{ $_->[0] } );
- # At present, the & block is not marked as a code block, so
- # this works:
- if ( $last_type eq '}' ) {
+ # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
+ # At present, the above & block is marked as type L/R so this case
+ # won't go through here.
+ if ( $last_type eq '}' ) { $ws = WS_YES }
- if ( $is_sort_map_grep{$last_block_type} ) {
- $ws = WS_YES;
- }
- else {
- $ws = WS_NO;
- }
+ # NOTE: some older versions of Perl had occasional problems if
+ # spaces are introduced between keywords or functions and opening
+ # parens. So the default is not to do this except is certain
+ # cases. The current Perl seems to tolerate spaces.
+
+ # Space between keyword and '('
+ elsif ( $last_type eq 'k' ) {
+ $ws = WS_NO
+ unless ( $rOpts_space_keyword_paren
+ || $space_after_keyword{$last_token} );
}
+ # Space between function and '('
# -----------------------------------------------------
# 'w' and 'i' checks for something like:
# myfun( &myfun( ->myfun(
# -----------------------------------------------------
- if ( ( $last_type =~ /^[wkU]$/ )
+ elsif (( $last_type =~ /^[wUG]$/ )
|| ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
{
-
- # Do not introduce new space between keyword or function
- # ( except in special cases) because this can
- # introduce errors in some cases ( prnterr1.t )
- unless ( $last_type eq 'k'
- && $space_after_keyword{$last_token} )
- {
- $ws = WS_NO;
- }
+ $ws = WS_NO unless ($rOpts_space_function_paren);
}
# space between something like $i and ( in
# allow constant function followed by '()' to retain no space
elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
- ;
$ws = WS_NO;
}
}
# patch for SWITCH/CASE: make space at ']{' optional
# since the '{' might begin a case or when block
- elsif ( $token eq '{' && $last_token eq ']' ) {
+ elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
$ws = WS_OPTIONAL;
}
elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
# always preserver whatever space was used after a possible
- # filehandle or here doc operator
- if ( $type ne '#' && ( $last_type eq 'Z' || $last_type eq 'h' ) ) {
+ # filehandle (except _) or here doc operator
+ if (
+ $type ne '#'
+ && ( ( $last_type eq 'Z' && $last_token ne '_' )
+ || $last_type eq 'h' )
+ )
+ {
$ws = WS_OPTIONAL;
}
if FORMATTER_DEBUG_FLAG_WHITE;
#---------------------------------------------------------------
- # section 5:
- # default rules not covered above
+ # Whitespace Rules Section 5:
+ # Apply default rules not covered above.
#---------------------------------------------------------------
- # if we fall through to here,
- # look at the pre-defined hash tables for the two tokens, and
- # if (they are equal) use the common value
- # if (either is zero or undef) use the other
- # if (either is -1) use it
+
+ # If we fall through to here, look at the pre-defined hash tables for
+ # the two tokens, and:
+ # if (they are equal) use the common value
+ # if (either is zero or undef) use the other
+ # if (either is -1) use it
# That is,
# left vs right
# 1 vs 1 --> 1
if ( !defined($ws_2) ) { $ws_2 = "*" }
if ( !defined($ws_3) ) { $ws_3 = "*" }
if ( !defined($ws_4) ) { $ws_4 = "*" }
- print
+ print STDOUT
"WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
};
+ } ## end main loop
+
+ if ($rOpts_tight_secret_operators) {
+ secret_operator_whitespace( $jmax, $rtokens, $rtoken_type,
+ \@white_space_flag );
}
+
return \@white_space_flag;
-}
+} ## end sub set_white_space_flag
{ # begin print_line_of_tokens
my $rnesting_blocks;
my $in_quote;
- my $python_indentation_level;
+ my $guessed_indentation_level;
# These local token variables are stored by store_token_to_go:
my $block_type;
$nesting_blocks, $no_internal_newlines,
$slevel, $token,
$type, $type_sequence,
- )
- = @saved_token;
+ ) = @saved_token;
}
}
+ sub token_length {
+
+ # Returns the length of a token, given:
+ # $token=text of the token
+ # $type = type
+ # $not_first_token = should be TRUE if this is not the first token of
+ # the line. It might the index of this token in an array. It is
+ # used to test for a side comment vs a block comment.
+ # Note: Eventually this should be the only routine determining the
+ # length of a token in this package.
+ my ( $token, $type, $not_first_token ) = @_;
+ my $token_length = length($token);
+
+ # We mark lengths of side comments as just 1 if we are
+ # ignoring their lengths when setting line breaks.
+ $token_length = 1
+ if ( $rOpts_ignore_side_comment_lengths
+ && $not_first_token
+ && $type eq '#' );
+ return $token_length;
+ }
+
+ sub rtoken_length {
+
+ # return length of ith token in @{$rtokens}
+ my ($i) = @_;
+ return token_length( $$rtokens[$i], $$rtoken_type[$i], $i );
+ }
+
# Routine to place the current token into the output stream.
# Called once per output token.
sub store_token_to_go {
$ci_levels_to_go[$max_index_to_go] = $ci_level;
$mate_index_to_go[$max_index_to_go] = -1;
$matching_token_to_go[$max_index_to_go] = '';
+ $bond_strength_to_go[$max_index_to_go] = 0;
# Note: negative levels are currently retained as a diagnostic so that
# the 'final indentation level' is correctly reported for bad scripts.
# If this becomes too much of a problem, we might give up and just clip
# them at zero.
## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
- $levels_to_go[$max_index_to_go] = $level;
+ $levels_to_go[$max_index_to_go] = $level;
$nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
- $lengths_to_go[ $max_index_to_go + 1 ] =
- $lengths_to_go[$max_index_to_go] + length($token);
+
+ # link the non-blank tokens
+ my $iprev = $max_index_to_go - 1;
+ $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
+ $iprev_to_go[$max_index_to_go] = $iprev;
+ $inext_to_go[$iprev] = $max_index_to_go
+ if ( $iprev >= 0 && $type ne 'b' );
+ $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
+
+ $token_lengths_to_go[$max_index_to_go] =
+ token_length( $token, $type, $max_index_to_go );
+
+ # We keep a running sum of token lengths from the start of this batch:
+ # summed_lengths_to_go[$i] = total length to just before token $i
+ # summed_lengths_to_go[$i+1] = total length to just after token $i
+ $summed_lengths_to_go[ $max_index_to_go + 1 ] =
+ $summed_lengths_to_go[$max_index_to_go] +
+ $token_lengths_to_go[$max_index_to_go];
# Define the indentation that this token would have if it started
# a new line. We have to do this now because we need to know this
# when considering one-line blocks.
set_leading_whitespace( $level, $ci_level, $in_continued_quote );
+ # remember previous nonblank tokens seen
if ( $type ne 'b' ) {
$last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
$last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
FORMATTER_DEBUG_FLAG_STORE && do {
my ( $a, $b, $c ) = caller();
- print
+ print STDOUT
"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
};
}
return;
}
- my %is_until_while_for_if_elsif_else;
-
- BEGIN {
-
- # always break after a closing curly of these block types:
- @_ = qw(until while for if elsif else);
- @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
-
- }
-
sub print_line_of_tokens {
my $line_of_tokens = shift;
# processing. This routine decides if there should be
# whitespace between each pair of non-white tokens, so later
# routines only need to decide on any additional line breaks.
- # Any whitespace is initally a single space character. Later,
+ # Any whitespace is initially a single space character. Later,
# the vertical aligner may expand that to be multiple space
# characters if necessary for alignment.
$in_continued_quote = $starting_in_quote =
$line_of_tokens->{_starting_in_quote};
- $in_quote = $line_of_tokens->{_ending_in_quote};
- $python_indentation_level =
- $line_of_tokens->{_python_indentation_level};
+ $in_quote = $line_of_tokens->{_ending_in_quote};
+ $ending_in_quote = $in_quote;
+ $guessed_indentation_level =
+ $line_of_tokens->{_guessed_indentation_level};
my $j;
my $j_next;
my $next_nonblank_token_type;
my $rwhite_space_flag;
- $jmax = @$rtokens - 1;
- $block_type = "";
- $container_type = "";
- $container_environment = "";
- $type_sequence = "";
- $no_internal_newlines = 1 - $rOpts_add_newlines;
+ $jmax = @$rtokens - 1;
+ $block_type = "";
+ $container_type = "";
+ $container_environment = "";
+ $type_sequence = "";
+ $no_internal_newlines = 1 - $rOpts_add_newlines;
+ $is_static_block_comment = 0;
# Handle a continued quote..
if ($in_continued_quote) {
$last_line_had_side_comment = 0;
return;
}
+ }
- # prior to version 20010406, perltidy had a bug which placed
- # continuation indentation before the last line of some multiline
- # quotes and patterns -- exactly the lines passing this way.
- # To help find affected lines in scripts run with these
- # versions, run with '-chk', and it will warn of any quotes or
- # patterns which might have been modified by these early
- # versions.
- if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
- warning(
-"-chk: please check this line for extra leading whitespace\n"
- );
+ # Write line verbatim if we are in a formatting skip section
+ if ($in_format_skipping_section) {
+ write_unindented_line("$input_line");
+ $last_line_had_side_comment = 0;
+
+ # Note: extra space appended to comment simplifies pattern matching
+ if ( $jmax == 0
+ && $$rtoken_type[0] eq '#'
+ && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
+ {
+ $in_format_skipping_section = 0;
+ write_logfile_entry("Exiting formatting skip section\n");
+ $file_writer_object->reset_consecutive_blank_lines();
}
+ return;
+ }
+
+ # See if we are entering a formatting skip section
+ if ( $rOpts_format_skipping
+ && $jmax == 0
+ && $$rtoken_type[0] eq '#'
+ && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
+ {
+ flush();
+ $in_format_skipping_section = 1;
+ write_logfile_entry("Entering formatting skip section\n");
+ write_unindented_line("$input_line");
+ $last_line_had_side_comment = 0;
+ return;
}
# delete trailing blank tokens
# Handle a blank line..
if ( $jmax < 0 ) {
- # For the 'swallow-optional-blank-lines' option, we delete all
+ # If keep-old-blank-lines is zero, we delete all
# old blank lines and let the blank line rules generate any
# needed blanks.
- if ( !$rOpts_swallow_optional_blank_lines ) {
+ if ($rOpts_keep_old_blank_lines) {
flush();
- $file_writer_object->write_blank_code_line();
+ $file_writer_object->write_blank_code_line(
+ $rOpts_keep_old_blank_lines == 2 );
$last_line_leading_type = 'b';
}
$last_line_had_side_comment = 0;
return;
}
- # see if this is a static block comment (starts with ##)
- my $is_static_block_comment = 0;
+ # see if this is a static block comment (starts with ## by default)
my $is_static_block_comment_without_leading_space = 0;
if ( $jmax == 0
&& $$rtoken_type[0] eq '#'
&& $rOpts->{'static-block-comments'}
&& $input_line =~ /$static_block_comment_pattern/o )
{
- $is_static_block_comment = 1;
+ $is_static_block_comment = 1;
$is_static_block_comment_without_leading_space =
- ( length($1) <= 0 );
+ substr( $input_line, 0, 1 ) eq '#';
+ }
+
+ # Check for comments which are line directives
+ # Treat exactly as static block comments without leading space
+ # reference: perlsyn, near end, section Plain Old Comments (Not!)
+ # example: '# line 42 "new_filename.plx"'
+ if (
+ $jmax == 0
+ && $$rtoken_type[0] eq '#'
+ && $input_line =~ /^\# \s*
+ line \s+ (\d+) \s*
+ (?:\s("?)([^"]+)\2)? \s*
+ $/x
+ )
+ {
+ $is_static_block_comment = 1;
+ $is_static_block_comment_without_leading_space = 1;
}
# create a hanging side comment if appropriate
+ my $is_hanging_side_comment;
if (
$jmax == 0
- && $$rtoken_type[0] eq '#' # only token is a comment
- && $last_line_had_side_comment # last line had side comment
- && $input_line =~ /^\s/ # there is some leading space
+ && $$rtoken_type[0] eq '#' # only token is a comment
+ && $last_line_had_side_comment # last line had side comment
+ && $input_line =~ /^\s/ # there is some leading space
&& !$is_static_block_comment # do not make static comment hanging
- && $rOpts->{'hanging-side-comments'} # user is allowing this
+ && $rOpts->{'hanging-side-comments'} # user is allowing
+ # hanging side comments
+ # like this
)
{
# We will insert an empty qw string at the start of the token list
# to force this comment to be a side comment. The vertical aligner
# should then line it up with the previous side comment.
+ $is_hanging_side_comment = 1;
unshift @$rtoken_type, 'q';
unshift @$rtokens, '';
unshift @$rlevels, $$rlevels[0];
# output a blank line before block comments
if (
- $last_line_leading_type !~ /^[#b]$/
- && $rOpts->{'blanks-before-comments'} # only if allowed
- && !
- $is_static_block_comment # never before static block comments
+ # unless we follow a blank or comment line
+ $last_line_leading_type !~ /^[#b]$/
+
+ # only if allowed
+ && $rOpts->{'blanks-before-comments'}
+
+ # not if this is an empty comment line
+ && $$rtokens[0] ne '#'
+
+ # not after a short line ending in an opening token
+ # because we already have space above this comment.
+ # Note that the first comment in this if block, after
+ # the 'if (', does not get a blank line because of this.
+ && !$last_output_short_opening_token
+
+ # never before static block comments
+ && !$is_static_block_comment
)
{
- flush(); # switching to new output stream
+ flush(); # switching to new output stream
$file_writer_object->write_blank_code_line();
$last_line_leading_type = 'b';
}
# TRIM COMMENTS -- This could be turned off as a option
- $$rtokens[0] =~ s/\s*$//; # trim right end
+ $$rtokens[0] =~ s/\s*$//; # trim right end
if (
$rOpts->{'indent-block-comments'}
- && ( !$rOpts->{'indent-spaced-block-comments'}
+ && ( !$rOpts->{'indent-spaced-block-comments'}
|| $input_line =~ /^\s+/ )
&& !$is_static_block_comment_without_leading_space
)
# Note: this test is placed here because we know the continuation flag
# at this point, which allows us to avoid non-meaningful checks.
my $structural_indentation_level = $$rlevels[0];
- compare_indentation_levels( $python_indentation_level,
+ compare_indentation_levels( $guessed_indentation_level,
$structural_indentation_level )
- unless ( $python_indentation_level < 0
- || ( $$rci_levels[0] > 0 )
- || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
- );
+ unless ( $is_hanging_side_comment
+ || $$rci_levels[0] > 0
+ || $guessed_indentation_level == 0 && $$rtoken_type[0] eq 'Q' );
# Patch needed for MakeMaker. Do not break a statement
# in which $VERSION may be calculated. See MakeMaker.pm;
# /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
# Examples:
# *VERSION = \'1.01';
- # ( $VERSION ) = '$Revision: 1.46 $ ' =~ /\$Revision:\s+([^\s]+)/;
+ # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
# We will pass such a line straight through without breaking
- # it unless -npvl is used
+ # it unless -npvl is used.
+
+ # Patch for problem reported in RT #81866, where files
+ # had been flattened into a single line and couldn't be
+ # tidied without -npvl. There are two parts to this patch:
+ # First, it is not done for a really long line (80 tokens for now).
+ # Second, we will only allow up to one semicolon
+ # before the VERSION. We need to allow at least one semicolon
+ # for statements like this:
+ # require Exporter; our $VERSION = $Exporter::VERSION;
+ # where both statements must be on a single line for MakeMaker
my $is_VERSION_statement = 0;
-
- if (
- !$saw_VERSION_in_this_file
- && $input_line =~ /VERSION/ # quick check to reject most lines
- && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
- )
+ if ( !$saw_VERSION_in_this_file
+ && $jmax < 80
+ && $input_line =~
+ /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
{
$saw_VERSION_in_this_file = 1;
$is_VERSION_statement = 1;
}
# take care of indentation-only
- # also write a line which is entirely a 'qw' list
- if ( $rOpts->{'indent-only'}
- || ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq 'q' ) ) )
- {
+ # NOTE: In previous versions we sent all qw lines out immediately here.
+ # No longer doing this: also write a line which is entirely a 'qw' list
+ # to allow stacking of opening and closing tokens. Note that interior
+ # qw lines will still go out at the end of this routine.
+ if ( $rOpts->{'indent-only'} ) {
flush();
- $input_line =~ s/^\s*//; # trim left end
- $input_line =~ s/\s*$//; # trim right end
+ my $line = $input_line;
+
+ # delete side comments if requested with -io, but
+ # we will not allow deleting of closing side comments with -io
+ # because the coding would be more complex
+ if ( $rOpts->{'delete-side-comments'}
+ && $rtoken_type->[$jmax] eq '#' )
+ {
+ $line = join "", @{$rtokens}[ 0 .. $jmax - 1 ];
+ }
+ trim($line);
extract_token(0);
- $token = $input_line;
+ $token = $line;
$type = 'q';
$block_type = "";
$container_type = "";
($rwhite_space_flag) =
set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
- # find input tabbing to allow checks for tabbing disagreement
- ## not used for now
- ##$input_line_tabbing = "";
- ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
-
# if the buffer hasn't been flushed, add a leading space if
# necessary to keep essential whitespace. This is really only
# necessary if we are squeezing out all ws.
}
# This is a good place to kill incomplete one-line blocks
- if ( ( $semicolons_before_block_self_destruct == 0 )
- && ( $max_index_to_go >= 0 )
- && ( $types_to_go[$max_index_to_go] eq ';' )
- && ( $$rtokens[0] ne '}' ) )
+ if (
+ (
+ ( $semicolons_before_block_self_destruct == 0 )
+ && ( $max_index_to_go >= 0 )
+ && ( $types_to_go[$max_index_to_go] eq ';' )
+ && ( $$rtokens[0] ne '}' )
+ )
+
+ # Patch for RT #98902. Honor request to break at old commas.
+ || ( $rOpts_break_at_old_comma_breakpoints
+ && $max_index_to_go >= 0
+ && $types_to_go[$max_index_to_go] eq ',' )
+ )
{
+ $forced_breakpoint_to_go[$max_index_to_go] = 1
+ if ($rOpts_break_at_old_comma_breakpoints);
destroy_one_line_block();
output_line_to_go();
}
$token =~ s/\s*//g;
}
- if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
+ # Split identifiers with leading arrows, inserting blanks if
+ # necessary. It is easier and safer here than in the
+ # tokenizer. For example '->new' becomes two tokens, '->' and
+ # 'new' with a possible blank between.
+ #
+ # Note: there is a related patch in sub set_white_space_flag
+ if ( $token =~ /^\-\>(.*)$/ && $1 ) {
+ my $token_save = $1;
+ my $type_save = $type;
+
+ # store a blank to left of arrow if necessary
+ if ( $max_index_to_go >= 0
+ && $types_to_go[$max_index_to_go] ne 'b'
+ && $want_left_space{'->'} == WS_YES )
+ {
+ insert_new_token_to_go( ' ', 'b', $slevel,
+ $no_internal_newlines );
+ }
+
+ # then store the arrow
+ $token = '->';
+ $type = $token;
+ store_token_to_go();
+
+ # then reset the current token to be the remainder,
+ # and reset the whitespace flag according to the arrow
+ $$rwhite_space_flag[$j] = $want_right_space{'->'};
+ $token = $token_save;
+ $type = $type_save;
+ }
+
+ if ( $token =~ /$SUB_PATTERN/ ) { $token =~ s/\s+/ /g }
+
+ # trim identifiers of trailing blanks which can occur
+ # under some unusual circumstances, such as if the
+ # identifier 'witch' has trailing blanks on input here:
+ #
+ # sub
+ # witch
+ # () # prototype may be on new line ...
+ # ...
+ if ( $type eq 'i' ) { $token =~ s/\s+$//g }
}
# change 'LABEL :' to 'LABEL:'
# make note of something like '$var = s/xxx/yyy/;'
# in case it should have been '$var =~ s/xxx/yyy/;'
if (
- $token =~ /^(s|tr|y|m|\/)/
+ $token =~ /^(s|tr|y|m|\/)/
&& $last_nonblank_token =~ /^(=|==|!=)$/
- # precededed by simple scalar
+ # preceded by simple scalar
&& $last_last_nonblank_type eq 'i'
&& $last_last_nonblank_token =~ /^\$/
# (but give complaint if we can's see far enough ahead)
&& $next_nonblank_token =~ /^[; \)\}]$/
- # scalar is not decleared
+ # scalar is not declared
&& !(
$types_to_go[0] eq 'k'
&& $tokens_to_go[0] =~ /^(my|our|local)$/
my $want_break =
# use -bl flag if not a sub block of any type
- $block_type !~ /^sub/
+ #$block_type !~ /^sub/
+ $block_type !~ /^sub\b/
? $rOpts->{'opening-brace-on-new-line'}
- # use -sbl flag unless this is an anonymous sub block
- : $block_type !~ /^sub\W*$/
+ # use -sbl flag for a named sub block
+ : $block_type !~ /$ASUB_PATTERN/
? $rOpts->{'opening-sub-brace-on-new-line'}
- # do not break for anonymous subs
- : 0;
+ # use -asbl flag for an anonymous sub block
+ : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
# Break before an opening '{' ...
if (
# and we don't have one
&& ( $last_nonblank_type ne ';' )
- # patch until some block type issues are fixed:
- # Do not add semi-colon for block types '{',
- # '}', and ';' because we cannot be sure yet
- # that this is a block and not an anonomyous
- # hash (blktype.t, blktype1.t)
- && ( $block_type !~ /^[\{\};]$/ )
-
- # it seems best not to add semicolons in these
- # special block types: sort|map|grep
- && ( !$is_sort_map_grep{$block_type} )
-
# and we are allowed to do so.
&& $rOpts->{'add-semicolons'}
+
+ # and we are allowed to for this block type
+ && ( $ok_to_add_semicolon_for_block_type{$block_type}
+ || $block_type =~ /^(sub|package)/
+ || $block_type =~ /^\w+\:$/ )
+
)
{
#
# But make a line break if the curly ends a
# significant block:
- if ( $is_until_while_for_if_elsif_else{$block_type} ) {
+ if (
+ (
+ $is_block_without_semicolon{$block_type}
+
+ # Follow users break point for
+ # one line block types U & G, such as a 'try' block
+ || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
+ )
+
+ # if needless semicolon follows we handle it later
+ && $next_nonblank_token ne ';'
+ )
+ {
output_line_to_go() unless ($no_internal_newlines);
}
}
}
# anonymous sub
- elsif ( $block_type =~ /^sub\W*$/ ) {
+ elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
if ($is_one_line_block) {
$rbrace_follower = \%is_anon_sub_1_brace_follower;
}
}
- # TESTING ONLY for SWITCH/CASE - this is where to start
- # recoding to retain else's on the same line as a case,
- # but there is a lot more that would need to be done.
- ##elsif ($block_type eq 'case') {$rbrace_follower = {else=>1};}
-
# None of the above: specify what can follow a closing
# brace of a block which is not an
# if/elsif/else/do/sort/map/grep/eval
&& (
$is_block_without_semicolon{
$last_nonblank_block_type}
- || $last_nonblank_block_type =~ /^sub\s+\w/
+ || $last_nonblank_block_type =~ /$SUB_PATTERN/
|| $last_nonblank_block_type =~ /^\w+:$/ )
)
|| $last_nonblank_type eq ';'
output_line_to_go()
unless ( $no_internal_newlines
+ || ( $rOpts_keep_interior_semicolons && $j < $jmax )
|| ( $next_nonblank_token eq '}' ) );
}
# if there is a side comment
( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
- # if this line which ends in a quote
+ # if this line ends in a quote
+ # NOTE: This is critically important for insuring that quoted lines
+ # do not get processed by things like -sot and -sct
|| $in_quote
# if this is a VERSION statement
|| $is_VERSION_statement
- # to keep a label on one line if that is how it is now
- || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
+ # to keep a label at the end of a line
+ || $type eq 'J'
# if we are instructed to keep all old line breaks
|| !$rOpts->{'delete-old-newlines'}
}
# mark old line breakpoints in current output stream
- if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_line_breaks ) {
+ if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
$old_breakpoint_to_go[$max_index_to_go] = 1;
}
- }
-} # end print_line_of_tokens
+ } ## end sub print_line_of_tokens
+} ## end block print_line_of_tokens
-sub note_added_semicolon {
- $last_added_semicolon_at = $input_line_number;
- if ( $added_semicolon_count == 0 ) {
- $first_added_semicolon_at = $last_added_semicolon_at;
- }
- $added_semicolon_count++;
- write_logfile_entry("Added ';' here\n");
-}
+# sub output_line_to_go sends one logical line of tokens on down the
+# pipeline to the VerticalAligner package, breaking the line into continuation
+# lines as necessary. The line of tokens is ready to go in the "to_go"
+# arrays.
+sub output_line_to_go {
-sub note_deleted_semicolon {
- $last_deleted_semicolon_at = $input_line_number;
- if ( $deleted_semicolon_count == 0 ) {
- $first_deleted_semicolon_at = $last_deleted_semicolon_at;
- }
- $deleted_semicolon_count++;
- write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
-}
+ # debug stuff; this routine can be called from many points
+ FORMATTER_DEBUG_FLAG_OUTPUT && do {
+ my ( $a, $b, $c ) = caller;
+ write_diagnostics(
+"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
+ );
+ my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
+ write_diagnostics("$output_str\n");
+ };
-sub note_embedded_tab {
- $embedded_tab_count++;
- $last_embedded_tab_at = $input_line_number;
- if ( !$first_embedded_tab_at ) {
- $first_embedded_tab_at = $last_embedded_tab_at;
+ # just set a tentative breakpoint if we might be in a one-line block
+ if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ set_forced_breakpoint($max_index_to_go);
+ return;
}
- if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
- write_logfile_entry("Embedded tabs in quote or pattern\n");
- }
-}
+ my $cscw_block_comment;
+ $cscw_block_comment = add_closing_side_comment()
+ if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
-sub starting_one_line_block {
+ my $comma_arrow_count_contained = match_opening_and_closing_tokens();
- # after seeing an opening curly brace, look for the closing brace
- # and see if the entire block will fit on a line. This routine is
- # not always right because it uses the old whitespace, so a check
- # is made later (at the closing brace) to make sure we really
- # have a one-line block. We have to do this preliminary check,
- # though, because otherwise we would always break at a semicolon
- # within a one-line block if the block contains multiple statements.
+ # tell the -lp option we are outputting a batch so it can close
+ # any unfinished items in its stack
+ finish_lp_batch();
- my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
- $rblock_type )
- = @_;
+ # If this line ends in a code block brace, set breaks at any
+ # previous closing code block braces to breakup a chain of code
+ # blocks on one line. This is very rare but can happen for
+ # user-defined subs. For example we might be looking at this:
+ # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
+ my $saw_good_break = 0; # flag to force breaks even if short line
+ if (
- # kill any current block - we can only go 1 deep
- destroy_one_line_block();
+ # looking for opening or closing block brace
+ $block_type_to_go[$max_index_to_go]
- # return value:
- # 1=distance from start of block to opening brace exceeds line length
- # 0=otherwise
+ # but not one of these which are never duplicated on a line:
+ # until|while|for|if|elsif|else
+ && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
+ )
+ {
+ my $lev = $nesting_depth_to_go[$max_index_to_go];
- my $i_start = 0;
+ # Walk backwards from the end and
+ # set break at any closing block braces at the same level.
+ # But quit if we are not in a chain of blocks.
+ for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
+ last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
+ next if ( $levels_to_go[$i] > $lev ); # skip past higher level
- # shouldn't happen: there must have been a prior call to
- # store_token_to_go to put the opening brace in the output stream
- if ( $max_index_to_go < 0 ) {
- warning("program bug: store_token_to_go called incorrectly\n");
- report_definite_bug();
- }
- else {
+ if ( $block_type_to_go[$i] ) {
+ if ( $tokens_to_go[$i] eq '}' ) {
+ set_forced_breakpoint($i);
+ $saw_good_break = 1;
+ }
+ }
- # cannot use one-line blocks with cuddled else else/elsif lines
- if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
- return 0;
+ # quit if we see anything besides words, function, blanks
+ # at this level
+ elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
}
}
- my $block_type = $$rblock_type[$j];
-
- # find the starting keyword for this block (such as 'if', 'else', ...)
+ my $imin = 0;
+ my $imax = $max_index_to_go;
- if ( $block_type =~ /^[\{\}\;\:]$/ ) {
- $i_start = $max_index_to_go;
+ # trim any blank tokens
+ if ( $max_index_to_go >= 0 ) {
+ if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
+ if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
}
- elsif ( $last_last_nonblank_token_to_go eq ')' ) {
+ # anything left to write?
+ if ( $imin <= $imax ) {
- # For something like "if (xxx) {", the keyword "if" will be
- # just after the most recent break. This will be 0 unless
- # we have just killed a one-line block and are starting another.
- # (doif.t)
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
- }
+ # add a blank line before certain key types but not after a comment
+ if ( $last_line_leading_type !~ /^[#]/ ) {
+ my $want_blank = 0;
+ my $leading_token = $tokens_to_go[$imin];
+ my $leading_type = $types_to_go[$imin];
- unless ( $tokens_to_go[$i_start] eq $block_type ) {
- return 0;
+ # blank lines before subs except declarations and one-liners
+ # MCONVERSION LOCATION - for sub tokenization change
+ if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if (
+ terminal_type( \@types_to_go, \@block_type_to_go, $imin,
+ $imax ) !~ /^[\;\}]$/
+ );
+ }
+
+ # break before all package declarations
+ # MCONVERSION LOCATION - for tokenizaton change
+ elsif ($leading_token =~ /^(package\s)/
+ && $leading_type eq 'i' )
+ {
+ $want_blank = $rOpts->{'blank-lines-before-packages'};
+ }
+
+ # break before certain key blocks except one-liners
+ if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if (
+ terminal_type( \@types_to_go, \@block_type_to_go, $imin,
+ $imax ) ne '}'
+ );
+ }
+
+ # Break before certain block types if we haven't had a
+ # break at this level for a while. This is the
+ # difficult decision..
+ elsif ($leading_type eq 'k'
+ && $last_line_leading_type ne 'b'
+ && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
+ {
+ my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
+ if ( !defined($lc) ) { $lc = 0 }
+
+ $want_blank =
+ $rOpts->{'blanks-before-blocks'}
+ && $lc >= $rOpts->{'long-block-line-count'}
+ && $file_writer_object->get_consecutive_nonblank_lines() >=
+ $rOpts->{'long-block-line-count'}
+ && (
+ terminal_type( \@types_to_go, \@block_type_to_go, $imin,
+ $imax ) ne '}'
+ );
+ }
+
+ # Check for blank lines wanted before a closing brace
+ if ( $leading_token eq '}' ) {
+ if ( $rOpts->{'blank-lines-before-closing-block'}
+ && $block_type_to_go[$imin]
+ && $block_type_to_go[$imin] =~
+ /$blank_lines_before_closing_block_pattern/ )
+ {
+ my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
+ if ( $nblanks > $want_blank ) {
+ $want_blank = $nblanks;
+ }
+ }
+ }
+
+ if ($want_blank) {
+
+ # future: send blank line down normal path to VerticalAligner
+ Perl::Tidy::VerticalAligner::flush();
+ $file_writer_object->require_blank_code_lines($want_blank);
+ }
+ }
+
+ # update blank line variables and count number of consecutive
+ # non-blank, non-comment lines at this level
+ $last_last_line_leading_level = $last_line_leading_level;
+ $last_line_leading_level = $levels_to_go[$imin];
+ if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
+ $last_line_leading_type = $types_to_go[$imin];
+ if ( $last_line_leading_level == $last_last_line_leading_level
+ && $last_line_leading_type ne 'b'
+ && $last_line_leading_type ne '#'
+ && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
+ {
+ $nonblank_lines_at_depth[$last_line_leading_level]++;
+ }
+ else {
+ $nonblank_lines_at_depth[$last_line_leading_level] = 1;
+ }
+
+ FORMATTER_DEBUG_FLAG_FLUSH && do {
+ my ( $package, $file, $line ) = caller;
+ print STDOUT
+"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
+ };
+
+ # add a couple of extra terminal blank tokens
+ pad_array_to_go();
+
+ # set all forced breakpoints for good list formatting
+ my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
+
+ if (
+ $is_long_line
+ || $old_line_count_in_batch > 1
+
+ # must always call scan_list() with unbalanced batches because it
+ # is maintaining some stacks
+ || is_unbalanced_batch()
+
+ # call scan_list if we might want to break at commas
+ || (
+ $comma_count_in_batch
+ && ( $rOpts_maximum_fields_per_table > 0
+ || $rOpts_comma_arrow_breakpoints == 0 )
+ )
+
+ # call scan_list if user may want to break open some one-line
+ # hash references
+ || ( $comma_arrow_count_contained
+ && $rOpts_comma_arrow_breakpoints != 3 )
+ )
+ {
+ ## This caused problems in one version of perl for unknown reasons:
+ ## $saw_good_break ||= scan_list();
+ my $sgb = scan_list();
+ $saw_good_break ||= $sgb;
+ }
+
+ # let $ri_first and $ri_last be references to lists of
+ # first and last tokens of line fragments to output..
+ my ( $ri_first, $ri_last );
+
+ # write a single line if..
+ if (
+
+ # we aren't allowed to add any newlines
+ !$rOpts_add_newlines
+
+ # or, we don't already have an interior breakpoint
+ # and we didn't see a good breakpoint
+ || (
+ !$forced_breakpoint_count
+ && !$saw_good_break
+
+ # and this line is 'short'
+ && !$is_long_line
+ )
+ )
+ {
+ @$ri_first = ($imin);
+ @$ri_last = ($imax);
+ }
+
+ # otherwise use multiple lines
+ else {
+
+ ( $ri_first, $ri_last, my $colon_count ) =
+ set_continuation_breaks($saw_good_break);
+
+ break_all_chain_tokens( $ri_first, $ri_last );
+
+ break_equals( $ri_first, $ri_last );
+
+ # now we do a correction step to clean this up a bit
+ # (The only time we would not do this is for debugging)
+ if ( $rOpts->{'recombine'} ) {
+ ( $ri_first, $ri_last ) =
+ recombine_breakpoints( $ri_first, $ri_last );
+ }
+
+ insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
+ }
+
+ # do corrector step if -lp option is used
+ my $do_not_pad = 0;
+ if ($rOpts_line_up_parentheses) {
+ $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
}
+ send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
+
+ # Insert any requested blank lines after an opening brace. We have to
+ # skip back before any side comment to find the terminal token
+ my $iterm;
+ for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
+ next if $types_to_go[$iterm] eq '#';
+ next if $types_to_go[$iterm] eq 'b';
+ last;
+ }
+
+ # write requested number of blank lines after an opening block brace
+ if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
+ if ( $rOpts->{'blank-lines-after-opening-block'}
+ && $block_type_to_go[$iterm]
+ && $block_type_to_go[$iterm] =~
+ /$blank_lines_after_opening_block_pattern/ )
+ {
+ my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
+ Perl::Tidy::VerticalAligner::flush();
+ $file_writer_object->require_blank_code_lines($nblanks);
+ }
+ }
+ }
+
+ prepare_for_new_input_lines();
+
+ # output any new -cscw block comment
+ if ($cscw_block_comment) {
+ flush();
+ $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
+ }
+}
+
+sub note_added_semicolon {
+ $last_added_semicolon_at = $input_line_number;
+ if ( $added_semicolon_count == 0 ) {
+ $first_added_semicolon_at = $last_added_semicolon_at;
+ }
+ $added_semicolon_count++;
+ write_logfile_entry("Added ';' here\n");
+}
+
+sub note_deleted_semicolon {
+ $last_deleted_semicolon_at = $input_line_number;
+ if ( $deleted_semicolon_count == 0 ) {
+ $first_deleted_semicolon_at = $last_deleted_semicolon_at;
+ }
+ $deleted_semicolon_count++;
+ write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
+}
+
+sub note_embedded_tab {
+ $embedded_tab_count++;
+ $last_embedded_tab_at = $input_line_number;
+ if ( !$first_embedded_tab_at ) {
+ $first_embedded_tab_at = $last_embedded_tab_at;
+ }
+
+ if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry("Embedded tabs in quote or pattern\n");
+ }
+}
+
+sub starting_one_line_block {
+
+ # after seeing an opening curly brace, look for the closing brace
+ # and see if the entire block will fit on a line. This routine is
+ # not always right because it uses the old whitespace, so a check
+ # is made later (at the closing brace) to make sure we really
+ # have a one-line block. We have to do this preliminary check,
+ # though, because otherwise we would always break at a semicolon
+ # within a one-line block if the block contains multiple statements.
+
+ my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
+ $rblock_type )
+ = @_;
+
+ # kill any current block - we can only go 1 deep
+ destroy_one_line_block();
+
+ # return value:
+ # 1=distance from start of block to opening brace exceeds line length
+ # 0=otherwise
+
+ my $i_start = 0;
+
+ # shouldn't happen: there must have been a prior call to
+ # store_token_to_go to put the opening brace in the output stream
+ if ( $max_index_to_go < 0 ) {
+ warning("program bug: store_token_to_go called incorrectly\n");
+ report_definite_bug();
+ }
+ else {
+
+ # cannot use one-line blocks with cuddled else/elsif lines
+ if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
+ return 0;
+ }
+ }
+
+ my $block_type = $$rblock_type[$j];
+
+ # find the starting keyword for this block (such as 'if', 'else', ...)
+
+ if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
+ $i_start = $max_index_to_go;
}
# the previous nonblank token should start these block types
- elsif (
- ( $last_last_nonblank_token_to_go eq $block_type )
- || ( $block_type =~ /^sub/
- && $last_last_nonblank_token_to_go =~ /^sub/ )
- )
+ elsif (( $last_last_nonblank_token_to_go eq $block_type )
+ || ( $block_type =~ /^sub\b/ )
+ || $block_type =~ /\(\)/ )
{
$i_start = $last_last_nonblank_index_to_go;
+
+ # For signatures and extended syntax ...
+ # If this brace follows a parenthesized list, we should look back to
+ # find the keyword before the opening paren because otherwise we might
+ # form a one line block which stays intack, and cause the parenthesized
+ # expression to break open. That looks bad. However, actually
+ # searching for the opening paren is slow and tedius.
+ # The actual keyword is often at the start of a line, but might not be.
+ # For example, we might have an anonymous sub with signature list
+ # following a =>. It is safe to mark the start anywhere before the
+ # opening paren, so we just go back to the prevoious break (or start of
+ # the line) if that is before the opening paren. The minor downside is
+ # that we may very occasionally break open a block unnecessarily.
+ if ( $tokens_to_go[$i_start] eq ')' ) {
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
+ my $lev = $levels_to_go[$i_start];
+ if ( $lev > $level ) { return 0 }
+ }
+ }
+
+ elsif ( $last_last_nonblank_token_to_go eq ')' ) {
+
+ # For something like "if (xxx) {", the keyword "if" will be
+ # just after the most recent break. This will be 0 unless
+ # we have just killed a one-line block and are starting another.
+ # (doif.t)
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) {
+ $i_start++;
+ }
+
+ # Patch to avoid breaking short blocks defined with extended_syntax:
+ # Strip off any trailing () which was added in the parser to mark
+ # the opening keyword. For example, in the following
+ # create( TypeFoo $e) {$bubba}
+ # the blocktype would be marked as create()
+ my $stripped_block_type = $block_type;
+ $stripped_block_type =~ s/\(\)$//;
+
+ unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
+ return 0;
+ }
}
# patch for SWITCH/CASE to retain one-line case/when blocks
elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
+
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
$i_start = $index_max_forced_break + 1;
if ( $types_to_go[$i_start] eq 'b' ) {
$i_start++;
my $i;
# see if length is too long to even start
- if ( $pos > $rOpts_maximum_line_length ) {
+ if ( $pos > maximum_line_length($i_start) ) {
return 1;
}
for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
# old whitespace could be arbitrarily large, so don't use it
- if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
- else { $pos += length( $$rtokens[$i] ) }
+ if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
+ else { $pos += rtoken_length($i) }
# Return false result if we exceed the maximum line length,
- if ( $pos > $rOpts_maximum_line_length ) {
+ if ( $pos > maximum_line_length($i_start) ) {
return 0;
}
my $i_nonblank =
( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
- if ( $$rtoken_type[$i_nonblank] eq '#' ) {
- $pos += length( $$rtokens[$i_nonblank] );
+ # Patch for one-line sort/map/grep/eval blocks with side comments:
+ # We will ignore the side comment length for sort/map/grep/eval
+ # because this can lead to statements which change every time
+ # perltidy is run. Here is an example from Denis Moskowitz which
+ # oscillates between these two states without this patch:
+
+## --------
+## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+##
+## grep {
+## $_->foo ne 'bar'
+## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+## --------
+
+ # When the first line is input it gets broken apart by the main
+ # line break logic in sub print_line_of_tokens.
+ # When the second line is input it gets recombined by
+ # print_line_of_tokens and passed to the output routines. The
+ # output routines (set_continuation_breaks) do not break it apart
+ # because the bond strengths are set to the highest possible value
+ # for grep/map/eval/sort blocks, so the first version gets output.
+ # It would be possible to fix this by changing bond strengths,
+ # but they are high to prevent errors in older versions of perl.
+
+ if ( $$rtoken_type[$i_nonblank] eq '#'
+ && !$is_sort_map_grep{$block_type} )
+ {
+
+ $pos += rtoken_length($i_nonblank);
if ( $i_nonblank > $i + 1 ) {
- $pos += length( $$rtokens[ $i + 1 ] );
+
+ # source whitespace could be anything, assume
+ # at least one space before the hash on output
+ if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 }
+ else { $pos += rtoken_length( $i + 1 ) }
}
- if ( $pos > $rOpts_maximum_line_length ) {
+ if ( $pos >= maximum_line_length($i_start) ) {
return 0;
}
}
sub want_blank_line {
flush();
- $file_writer_object->want_blank_line();
+ $file_writer_object->want_blank_line() unless $in_format_skipping_section;
}
sub write_unindented_line {
$file_writer_object->write_line( $_[0] );
}
-sub undo_lp_ci {
+sub undo_ci {
- # If there is a single, long parameter within parens, like this:
- #
- # $self->command( "/msg "
- # . $infoline->chan
- # . " You said $1, but did you know that it's square was "
- # . $1 * $1 . " ?" );
- #
- # we can remove the continuation indentation of the 2nd and higher lines
- # to achieve this effect, which is more pleasing:
- #
- # $self->command("/msg "
+ # Undo continuation indentation in certain sequences
+ # For example, we can undo continuation indentation in sort/map/grep chains
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
+ # To align the map/sort/grep keywords like this:
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
+ my ( $ri_first, $ri_last ) = @_;
+ my ( $line_1, $line_2, $lev_last );
+ my $this_line_is_semicolon_terminated;
+ my $max_line = @$ri_first - 1;
+
+ # looking at each line of this batch..
+ # We are looking at leading tokens and looking for a sequence
+ # all at the same level and higher level than enclosing lines.
+ foreach my $line ( 0 .. $max_line ) {
+
+ my $ibeg = $$ri_first[$line];
+ my $lev = $levels_to_go[$ibeg];
+ if ( $line > 0 ) {
+
+ # if we have started a chain..
+ if ($line_1) {
+
+ # see if it continues..
+ if ( $lev == $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
+
+ # chain continues...
+ # check for chain ending at end of a statement
+ if ( $line == $max_line ) {
+
+ # see of this line ends a statement
+ my $iend = $$ri_last[$line];
+ $this_line_is_semicolon_terminated =
+ $types_to_go[$iend] eq ';'
+
+ # with possible side comment
+ || ( $types_to_go[$iend] eq '#'
+ && $iend - $ibeg >= 2
+ && $types_to_go[ $iend - 2 ] eq ';'
+ && $types_to_go[ $iend - 1 ] eq 'b' );
+ }
+ $line_2 = $line if ($this_line_is_semicolon_terminated);
+ }
+ else {
+
+ # kill chain
+ $line_1 = undef;
+ }
+ }
+ elsif ( $lev < $lev_last ) {
+
+ # chain ends with previous line
+ $line_2 = $line - 1;
+ }
+ elsif ( $lev > $lev_last ) {
+
+ # kill chain
+ $line_1 = undef;
+ }
+
+ # undo the continuation indentation if a chain ends
+ if ( defined($line_2) && defined($line_1) ) {
+ my $continuation_line_count = $line_2 - $line_1 + 1;
+ @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
+ (0) x ($continuation_line_count);
+ @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
+ @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
+ $line_1 = undef;
+ }
+ }
+
+ # not in a chain yet..
+ else {
+
+ # look for start of a new sort/map/grep chain
+ if ( $lev > $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
+ $line_1 = $line;
+ }
+ }
+ }
+ }
+ $lev_last = $lev;
+ }
+}
+
+sub undo_lp_ci {
+
+ # If there is a single, long parameter within parens, like this:
+ #
+ # $self->command( "/msg "
+ # . $infoline->chan
+ # . " You said $1, but did you know that it's square was "
+ # . $1 * $1 . " ?" );
+ #
+ # we can remove the continuation indentation of the 2nd and higher lines
+ # to achieve this effect, which is more pleasing:
+ #
+ # $self->command("/msg "
# . $infoline->chan
# . " You said $1, but did you know that it's square was "
# . $1 * $1 . " ?");
@reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
}
-{
+sub pad_token {
- # Identify certain operators which often occur in chains.
- # We will try to improve alignment when these lead a line.
- my %is_chain_operator;
+ # insert $pad_spaces before token number $ipad
+ my ( $ipad, $pad_spaces ) = @_;
+ if ( $pad_spaces > 0 ) {
+ $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
+ }
+ elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
+ $tokens_to_go[$ipad] = "";
+ }
+ else {
+
+ # shouldn't happen
+ return;
+ }
+
+ $token_lengths_to_go[$ipad] += $pad_spaces;
+ for ( my $i = $ipad ; $i <= $max_index_to_go ; $i++ ) {
+ $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
+ }
+}
+
+{
+ my %is_math_op;
BEGIN {
- @_ = qw(&& || and or : ? .);
- @is_chain_operator{@_} = (1) x scalar(@_);
+
+ @_ = qw( + - * / );
+ @is_math_op{@_} = (1) x scalar(@_);
}
sub set_logical_padding {
my $max_line = @$ri_first - 1;
my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line,
- $pad_spaces, $tok_next, $has_leading_op_next, $has_leading_op );
+ $pad_spaces,
+ $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
# looking at each line of this batch..
foreach $line ( 0 .. $max_line - 1 ) {
# see if the next line begins with a logical operator
- $ibeg = $$ri_first[$line];
- $iend = $$ri_last[$line];
- $ibeg_next = $$ri_first[ $line + 1 ];
- $tok_next = $tokens_to_go[$ibeg_next];
- $has_leading_op_next = $is_chain_operator{$tok_next};
+ $ibeg = $$ri_first[$line];
+ $iend = $$ri_last[$line];
+ $ibeg_next = $$ri_first[ $line + 1 ];
+ $tok_next = $tokens_to_go[$ibeg_next];
+ $type_next = $types_to_go[$ibeg_next];
+
+ $has_leading_op_next = ( $tok_next =~ /^\w/ )
+ ? $is_chain_operator{$tok_next} # + - * / : ? && ||
+ : $is_chain_operator{$type_next}; # and, or
+
next unless ($has_leading_op_next);
# next line must not be at lesser depth
# if this is not first line of the batch ...
if ( $line > 0 ) {
- # and we have leading operator
+ # and we have leading operator..
next if $has_leading_op;
- # and ..
+ # Introduce padding if..
# 1. the previous line is at lesser depth, or
# 2. the previous line ends in an assignment
- #
+ # 3. the previous line ends in a 'return'
+ # 4. the previous line ends in a comma
# Example 1: previous line at lesser depth
# if ( ( $Year < 1601 ) # <- we are here but
# || ( $Year > 2899 ) # list has not yet
# : $year % 100 ? 1
# : $year % 400 ? 0
# : 1;
+ #
+ # Example 3: previous line ending in comma:
+ # push @expr,
+ # /test/ ? undef
+ # : eval($_) ? 1
+ # : eval($_) ? 1
+ # : 0;
+
+ # be sure levels agree (do not indent after an indented 'if')
+ next
+ if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
+
+ # allow padding on first line after a comma but only if:
+ # (1) this is line 2 and
+ # (2) there are at more than three lines and
+ # (3) lines 3 and 4 have the same leading operator
+ # These rules try to prevent padding within a long
+ # comma-separated list.
+ my $ok_comma;
+ if ( $types_to_go[$iendm] eq ','
+ && $line == 1
+ && $max_line > 2 )
+ {
+ my $ibeg_next_next = $$ri_first[ $line + 2 ];
+ my $tok_next_next = $tokens_to_go[$ibeg_next_next];
+ $ok_comma = $tok_next_next eq $tok_next;
+ }
+
next
unless (
- $is_assignment{ $types_to_go[$iendm] }
+ $is_assignment{ $types_to_go[$iendm] }
+ || $ok_comma
|| ( $nesting_depth_to_go[$ibegm] <
$nesting_depth_to_go[$ibeg] )
+ || ( $types_to_go[$iendm] eq 'k'
+ && $tokens_to_go[$iendm] eq 'return' )
);
# we will add padding before the first token
# we might pad token $ibeg, so be sure that it
# is at the same depth as the next line.
next
- if ( $nesting_depth_to_go[ $ibeg + 1 ] !=
+ if ( $nesting_depth_to_go[$ibeg] !=
$nesting_depth_to_go[$ibeg_next] );
# We can pad on line 1 of a statement if at least 3
# lines will be aligned. Otherwise, it
# can look very confusing.
- if ( $max_line > 2 ) {
+
+ # We have to be careful not to pad if there are too few
+ # lines. The current rule is:
+ # (1) in general we require at least 3 consecutive lines
+ # with the same leading chain operator token,
+ # (2) but an exception is that we only require two lines
+ # with leading colons if there are no more lines. For example,
+ # the first $i in the following snippet would get padding
+ # by the second rule:
+ #
+ # $i == 1 ? ( "First", "Color" )
+ # : $i == 2 ? ( "Then", "Rarity" )
+ # : ( "Then", "Name" );
+
+ if ( $max_line > 1 ) {
my $leading_token = $tokens_to_go[$ibeg_next];
+ my $tokens_differ;
# never indent line 1 of a '.' series because
# previous line is most likely at same level.
my $count = 1;
foreach my $l ( 2 .. 3 ) {
+ last if ( $line + $l > $max_line );
my $ibeg_next_next = $$ri_first[ $line + $l ];
- next
- unless $tokens_to_go[$ibeg_next_next] eq
- $leading_token;
+ if ( $tokens_to_go[$ibeg_next_next] ne
+ $leading_token )
+ {
+ $tokens_differ = 1;
+ last;
+ }
$count++;
}
- next unless $count == 3;
+ next if ($tokens_differ);
+ next if ( $count < 3 && $leading_token ne ':' );
$ipad = $ibeg;
}
else {
&& $mate_index_to_go[$i] > $iend );
# find next nonblank token to pad
- $ipad = $i + 1;
- if ( $types_to_go[$ipad] eq 'b' ) {
- $ipad++;
- last if ( $ipad > $iend );
- }
+ $ipad = $inext_to_go[$i];
+ last if ( $ipad > $iend );
}
last unless $ipad;
}
+ # We cannot pad a leading token at the lowest level because
+ # it could cause a bug in which the starting indentation
+ # level is guessed incorrectly each time the code is run
+ # though perltidy, thus causing the code to march off to
+ # the right. For example, the following snippet would have
+ # this problem:
+
+## ov_method mycan( $package, '(""' ), $package
+## or ov_method mycan( $package, '(0+' ), $package
+## or ov_method mycan( $package, '(bool' ), $package
+## or ov_method mycan( $package, '(nomethod' ), $package;
+
+ # If this snippet is within a block this won't happen
+ # unless the user just processes the snippet alone within
+ # an editor. In that case either the user will see and
+ # fix the problem or it will be corrected next time the
+ # entire file is processed with perltidy.
+ next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
+
+## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
+## IT DID MORE HARM THAN GOOD
+## ceil(
+## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
+## / $upem
+## ),
+##? # do not put leading padding for just 2 lines of math
+##? if ( $ipad == $ibeg
+##? && $line > 0
+##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
+##? && $is_math_op{$type_next}
+##? && $line + 2 <= $max_line )
+##? {
+##? my $ibeg_next_next = $$ri_first[ $line + 2 ];
+##? my $type_next_next = $types_to_go[$ibeg_next_next];
+##? next if !$is_math_op{$type_next_next};
+##? }
+
# next line must not be at greater depth
my $iend_next = $$ri_last[ $line + 1 ];
next
$nesting_depth_to_go[$ipad] );
# lines must be somewhat similar to be padded..
- my $inext_next = $ibeg_next + 1;
- if ( $types_to_go[$inext_next] eq 'b' ) {
- $inext_next++;
- }
- my $type = $types_to_go[$ipad];
+ my $inext_next = $inext_to_go[$ibeg_next];
+ my $type = $types_to_go[$ipad];
+ my $type_next = $types_to_go[ $ipad + 1 ];
# see if there are multiple continuation lines
my $logical_continuation_lines = 1;
$logical_continuation_lines++;
}
}
+
+ # see if leading types match
+ my $types_match = $types_to_go[$inext_next] eq $type;
+ my $matches_without_bang;
+
+ # if first line has leading ! then compare the following token
+ if ( !$types_match && $type eq '!' ) {
+ $types_match = $matches_without_bang =
+ $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
+ }
+
if (
# either we have multiple continuation lines to follow
|| (
# types must match
- $types_to_go[$inext_next] eq $type
+ $types_match
# and keywords must match if keyword
&& !(
)
{
- #----------------------begin special check---------------
+ #----------------------begin special checks--------------
#
- # One more check is needed before we can make the pad.
+ # SPECIAL CHECK 1:
+ # A check is needed before we can make the pad.
# If we are in a list with some long items, we want each
# item to stand out. So in the following example, the
- # first line begining with '$casefold->' would look good
+ # first line beginning with '$casefold->' would look good
# padded to align with the next line, but then it
# would be indented more than the last line, so we
# won't do it.
);
}
}
+
+ # SPECIAL CHECK 2:
+ # a minus may introduce a quoted variable, and we will
+ # add the pad only if this line begins with a bare word,
+ # such as for the word 'Button' here:
+ # [
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ #
+ # On the other hand, if 'Button' is quoted, it looks best
+ # not to pad:
+ # [
+ # 'Button' => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ if ( $types_to_go[$ibeg_next] eq 'm' ) {
+ $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
+ }
+
next unless $ok_to_pad;
#----------------------end special check---------------
my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
$pad_spaces = $length_2 - $length_1;
+ # If the first line has a leading ! and the second does
+ # not, then remove one space to try to align the next
+ # leading characters, which are often the same. For example:
+ # if ( !$ts
+ # || $ts == $self->Holder
+ # || $self->Holder->Type eq "Arena" )
+ #
+ # This usually helps readability, but if there are subsequent
+ # ! operators things will still get messed up. For example:
+ #
+ # if ( !exists $Net::DNS::typesbyname{$qtype}
+ # && exists $Net::DNS::classesbyname{$qtype}
+ # && !exists $Net::DNS::classesbyname{$qclass}
+ # && exists $Net::DNS::typesbyname{$qclass} )
+ # We can't fix that.
+ if ($matches_without_bang) { $pad_spaces-- }
+
# make sure this won't change if -lp is used
my $indentation_1 = $leading_spaces_to_go[$ibeg];
if ( ref($indentation_1) ) {
# we might be able to handle a pad of -1 by removing a blank
# token
if ( $pad_spaces < 0 ) {
+
if ( $pad_spaces == -1 ) {
if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
{
- $tokens_to_go[ $ipad - 1 ] = '';
+ pad_token( $ipad - 1, $pad_spaces );
}
}
$pad_spaces = 0;
# now apply any padding for alignment
if ( $ipad >= 0 && $pad_spaces ) {
+
my $length_t = total_line_length( $ibeg, $iend );
- if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length )
+ if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
{
- $tokens_to_go[$ipad] =
- ' ' x $pad_spaces . $tokens_to_go[$ipad];
+ pad_token( $ipad, $pad_spaces );
}
}
}
# skip closed container on this line
if ( $i > $ibeg ) {
- my $im = $i - 1;
- if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
+ my $im = max( $ibeg, $iprev_to_go[$i] );
if ( $type_sequence_to_go[$im]
&& $mate_index_to_go[$im] <= $iend )
{
$max_length = $length_t;
}
}
- $right_margin = $rOpts_maximum_line_length - $max_length;
+ $right_margin = maximum_line_length($ibeg) - $max_length;
if ( $right_margin < 0 ) { $right_margin = 0 }
}
# then we are probably vertically aligned. We could set
# an exact flag in sub scan_list, but this is good
# enough.
- my $indentation_count = keys %saw_indentation;
+ my $indentation_count = keys %saw_indentation;
my $is_vertically_aligned =
( $i == $ibeg
&& $first_line_comma_count > 1
Perl::Tidy::VerticalAligner::flush();
}
-# output_line_to_go sends one logical line of tokens on down the
-# pipeline to the VerticalAligner package, breaking the line into continuation
-# lines as necessary. The line of tokens is ready to go in the "to_go"
-# arrays.
-
-sub output_line_to_go {
+sub reset_block_text_accumulator {
- # debug stuff; this routine can be called from many points
- FORMATTER_DEBUG_FLAG_OUTPUT && do {
- my ( $a, $b, $c ) = caller;
- write_diagnostics(
-"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
- );
- my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
- write_diagnostics("$output_str\n");
- };
+ # save text after 'if' and 'elsif' to append after 'else'
+ if ($accumulating_text_for_block) {
- # just set a tentative breakpoint if we might be in a one-line block
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
- set_forced_breakpoint($max_index_to_go);
- return;
+ if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
+ push @{$rleading_block_if_elsif_text}, $leading_block_text;
+ }
}
+ $accumulating_text_for_block = "";
+ $leading_block_text = "";
+ $leading_block_text_level = 0;
+ $leading_block_text_length_exceeded = 0;
+ $leading_block_text_line_number = 0;
+ $leading_block_text_line_length = 0;
+}
- my $cscw_block_comment;
- $cscw_block_comment = add_closing_side_comment()
- if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
+sub set_block_text_accumulator {
+ my $i = shift;
+ $accumulating_text_for_block = $tokens_to_go[$i];
+ if ( $accumulating_text_for_block !~ /^els/ ) {
+ $rleading_block_if_elsif_text = [];
+ }
+ $leading_block_text = "";
+ $leading_block_text_level = $levels_to_go[$i];
+ $leading_block_text_line_number =
+ $vertical_aligner_object->get_output_line_number();
+ $leading_block_text_length_exceeded = 0;
- match_opening_and_closing_tokens();
+ # this will contain the column number of the last character
+ # of the closing side comment
+ $leading_block_text_line_length =
+ length($csc_last_label) +
+ length($accumulating_text_for_block) +
+ length( $rOpts->{'closing-side-comment-prefix'} ) +
+ $leading_block_text_level * $rOpts_indent_columns + 3;
+}
- # tell the -lp option we are outputting a batch so it can close
- # any unfinished items in its stack
- finish_lp_batch();
+sub accumulate_block_text {
+ my $i = shift;
- my $imin = 0;
- my $imax = $max_index_to_go;
+ # accumulate leading text for -csc, ignoring any side comments
+ if ( $accumulating_text_for_block
+ && !$leading_block_text_length_exceeded
+ && $types_to_go[$i] ne '#' )
+ {
- # trim any blank tokens
- if ( $max_index_to_go >= 0 ) {
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- }
+ my $added_length = $token_lengths_to_go[$i];
+ $added_length += 1 if $i == 0;
+ my $new_line_length = $leading_block_text_line_length + $added_length;
- # anything left to write?
- if ( $imin <= $imax ) {
+ # we can add this text if we don't exceed some limits..
+ if (
- # add a blank line before certain key types
- if ( $last_line_leading_type !~ /^[#b]/ ) {
- my $want_blank = 0;
- my $leading_token = $tokens_to_go[$imin];
- my $leading_type = $types_to_go[$imin];
+ # we must not have already exceeded the text length limit
+ length($leading_block_text) <
+ $rOpts_closing_side_comment_maximum_text
- # blank lines before subs except declarations and one-liners
- # MCONVERSION LOCATION - for sub tokenization change
- if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
- $want_blank = ( $rOpts->{'blanks-before-subs'} )
- && (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) !~ /^[\;\}]$/
- );
- }
+ # and either:
+ # the new total line length must be below the line length limit
+ # or the new length must be below the text length limit
+ # (ie, we may allow one token to exceed the text length limit)
+ && (
+ $new_line_length <
+ maximum_line_length_for_level($leading_block_text_level)
- # break before all package declarations
- # MCONVERSION LOCATION - for tokenizaton change
- elsif ( $leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) {
- $want_blank = ( $rOpts->{'blanks-before-subs'} );
- }
+ || length($leading_block_text) + $added_length <
+ $rOpts_closing_side_comment_maximum_text
+ )
- # break before certain key blocks except one-liners
- if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
- $want_blank = ( $rOpts->{'blanks-before-subs'} )
- && (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) ne '}'
- );
- }
+ # UNLESS: we are adding a closing paren before the brace we seek.
+ # This is an attempt to avoid situations where the ... to be
+ # added are longer than the omitted right paren, as in:
- # Break before certain block types if we haven't had a break at this
- # level for a while. This is the difficult decision..
- elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
- && $leading_type eq 'k' )
- {
- my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
- if ( !defined($lc) ) { $lc = 0 }
+ # foreach my $item (@a_rather_long_variable_name_here) {
+ # &whatever;
+ # } ## end foreach my $item (@a_rather_long_variable_name_here...
- $want_blank = $rOpts->{'blanks-before-blocks'}
- && $lc >= $rOpts->{'long-block-line-count'}
- && $file_writer_object->get_consecutive_nonblank_lines() >=
- $rOpts->{'long-block-line-count'}
- && (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) ne '}'
- );
- }
+ || (
+ $tokens_to_go[$i] eq ')'
+ && (
+ (
+ $i + 1 <= $max_index_to_go
+ && $block_type_to_go[ $i + 1 ] eq
+ $accumulating_text_for_block
+ )
+ || ( $i + 2 <= $max_index_to_go
+ && $block_type_to_go[ $i + 2 ] eq
+ $accumulating_text_for_block )
+ )
+ )
+ )
+ {
- if ($want_blank) {
+ # add an extra space at each newline
+ if ( $i == 0 ) { $leading_block_text .= ' ' }
- # future: send blank line down normal path to VerticalAligner
- Perl::Tidy::VerticalAligner::flush();
- $file_writer_object->write_blank_code_line();
- }
+ # add the token text
+ $leading_block_text .= $tokens_to_go[$i];
+ $leading_block_text_line_length = $new_line_length;
}
- # update blank line variables and count number of consecutive
- # non-blank, non-comment lines at this level
- $last_last_line_leading_level = $last_line_leading_level;
- $last_line_leading_level = $levels_to_go[$imin];
- if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
- $last_line_leading_type = $types_to_go[$imin];
- if ( $last_line_leading_level == $last_last_line_leading_level
- && $last_line_leading_type ne 'b'
- && $last_line_leading_type ne '#'
- && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
- {
- $nonblank_lines_at_depth[$last_line_leading_level]++;
- }
- else {
- $nonblank_lines_at_depth[$last_line_leading_level] = 1;
+ # show that text was truncated if necessary
+ elsif ( $types_to_go[$i] ne 'b' ) {
+ $leading_block_text_length_exceeded = 1;
+## Please see file perltidy.ERR
+ $leading_block_text .= '...';
}
-
- FORMATTER_DEBUG_FLAG_FLUSH && do {
- my ( $package, $file, $line ) = caller;
- print
-"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
- };
-
- # add a couple of extra terminal blank tokens
- pad_array_to_go();
-
- # set all forced breakpoints for good list formatting
- my $saw_good_break = 0;
- my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
-
- if (
- $max_index_to_go > 0
- && (
- $is_long_line
- || $old_line_count_in_batch > 1
- || is_unbalanced_batch()
- || (
- $comma_count_in_batch
- && ( $rOpts_maximum_fields_per_table > 0
- || $rOpts_comma_arrow_breakpoints == 0 )
- )
- )
- )
- {
- $saw_good_break = scan_list();
- }
-
- # let $ri_first and $ri_last be references to lists of
- # first and last tokens of line fragments to output..
- my ( $ri_first, $ri_last );
-
- # write a single line if..
- if (
-
- # we aren't allowed to add any newlines
- !$rOpts_add_newlines
-
- # or, we don't already have an interior breakpoint
- # and we didn't see a good breakpoint
- || (
- !$forced_breakpoint_count
- && !$saw_good_break
-
- # and this line is 'short'
- && !$is_long_line
- )
- )
- {
- @$ri_first = ($imin);
- @$ri_last = ($imax);
- }
-
- # otherwise use multiple lines
- else {
-
- ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break);
-
- # now we do a correction step to clean this up a bit
- # (The only time we would not do this is for debugging)
- if ( $rOpts->{'recombine'} ) {
- ( $ri_first, $ri_last ) =
- recombine_breakpoints( $ri_first, $ri_last );
- }
- }
-
- # do corrector step if -lp option is used
- my $do_not_pad = 0;
- if ($rOpts_line_up_parentheses) {
- $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
- }
- send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
- }
- prepare_for_new_input_lines();
-
- # output any new -cscw block comment
- if ($cscw_block_comment) {
- flush();
- $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
- }
-}
-
-sub reset_block_text_accumulator {
-
- # save text after 'if' and 'elsif' to append after 'else'
- if ($accumulating_text_for_block) {
-
- if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
- push @{$rleading_block_if_elsif_text}, $leading_block_text;
- }
- }
- $accumulating_text_for_block = "";
- $leading_block_text = "";
- $leading_block_text_level = 0;
- $leading_block_text_length_exceeded = 0;
- $leading_block_text_line_number = 0;
- $leading_block_text_line_length = 0;
-}
-
-sub set_block_text_accumulator {
- my $i = shift;
- $accumulating_text_for_block = $tokens_to_go[$i];
- if ( $accumulating_text_for_block !~ /^els/ ) {
- $rleading_block_if_elsif_text = [];
- }
- $leading_block_text = "";
- $leading_block_text_level = $levels_to_go[$i];
- $leading_block_text_line_number =
- $vertical_aligner_object->get_output_line_number();
- $leading_block_text_length_exceeded = 0;
-
- # this will contain the column number of the last character
- # of the closing side comment
- $leading_block_text_line_length =
- length($accumulating_text_for_block) +
- length( $rOpts->{'closing-side-comment-prefix'} ) +
- $leading_block_text_level * $rOpts_indent_columns + 3;
-}
-
-sub accumulate_block_text {
- my $i = shift;
-
- # accumulate leading text for -csc, ignoring any side comments
- if ( $accumulating_text_for_block
- && !$leading_block_text_length_exceeded
- && $types_to_go[$i] ne '#' )
- {
-
- my $added_length = length( $tokens_to_go[$i] );
- $added_length += 1 if $i == 0;
- my $new_line_length = $leading_block_text_line_length + $added_length;
-
- # we can add this text if we don't exceed some limits..
- if (
-
- # we must not have already exceeded the text length limit
- length($leading_block_text) <
- $rOpts_closing_side_comment_maximum_text
-
- # and either:
- # the new total line length must be below the line length limit
- # or the new length must be below the text length limit
- # (ie, we may allow one token to exceed the text length limit)
- && ( $new_line_length < $rOpts_maximum_line_length
- || length($leading_block_text) + $added_length <
- $rOpts_closing_side_comment_maximum_text )
-
- # UNLESS: we are adding a closing paren before the brace we seek.
- # This is an attempt to avoid situations where the ... to be
- # added are longer than the omitted right paren, as in:
-
- # foreach my $item (@a_rather_long_variable_name_here) {
- # &whatever;
- # } ## end foreach my $item (@a_rather_long_variable_name_here...
-
- || (
- $tokens_to_go[$i] eq ')'
- && (
- (
- $i + 1 <= $max_index_to_go
- && $block_type_to_go[ $i + 1 ] eq
- $accumulating_text_for_block
- )
- || ( $i + 2 <= $max_index_to_go
- && $block_type_to_go[ $i + 2 ] eq
- $accumulating_text_for_block )
- )
- )
- )
- {
-
- # add an extra space at each newline
- if ( $i == 0 ) { $leading_block_text .= ' ' }
-
- # add the token text
- $leading_block_text .= $tokens_to_go[$i];
- $leading_block_text_line_length = $new_line_length;
- }
-
- # show that text was truncated if necessary
- elsif ( $types_to_go[$i] ne 'b' ) {
- $leading_block_text_length_exceeded = 1;
- $leading_block_text .= '...';
- }
- }
-}
+ }
+}
{
my %is_if_elsif_else_unless_while_until_for_foreach;
# curly. Note: 'else' does not, but must be included to allow trailing
# if/elsif text to be appended.
# patch for SWITCH/CASE: added 'case' and 'when'
- @_ = qw(if elsif else unless while until for foreach case when);
- @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
+ @_ = qw(if elsif else unless while until for foreach case when catch);
+ @is_if_elsif_else_unless_while_until_for_foreach{@_} =
+ (1) x scalar(@_);
}
sub accumulate_csc_text {
my $i_terminal = 0; # index of last nonblank token
my $terminal_block_type = "";
+ # update most recent statement label
+ $csc_last_label = "" unless ($csc_last_label);
+ if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
+ my $block_label = $csc_last_label;
+
+ # Loop over all tokens of this batch
for my $i ( 0 .. $max_index_to_go ) {
my $type = $types_to_go[$i];
my $block_type = $block_type_to_go[$i];
# restore any leading text saved when we entered this block
if ( defined( $block_leading_text{$type_sequence} ) ) {
- ( $block_leading_text, $rblock_leading_if_elsif_text ) =
- @{ $block_leading_text{$type_sequence} };
+ ( $block_leading_text, $rblock_leading_if_elsif_text )
+ = @{ $block_leading_text{$type_sequence} };
$i_block_leading_text = $i;
delete $block_leading_text{$type_sequence};
$rleading_block_if_elsif_text =
$rblock_leading_if_elsif_text;
}
+ if ( defined( $csc_block_label{$type_sequence} ) ) {
+ $block_label = $csc_block_label{$type_sequence};
+ delete $csc_block_label{$type_sequence};
+ }
+
# if we run into a '}' then we probably started accumulating
# at something like a trailing 'if' clause..no harm done.
if ( $accumulating_text_for_block
{
my $output_line_number =
$vertical_aligner_object->get_output_line_number();
- $block_line_count = $output_line_number -
+ $block_line_count =
+ $output_line_number -
$block_opening_line_number{$type_sequence} + 1;
delete $block_opening_line_number{$type_sequence};
}
$vertical_aligner_object->get_output_line_number();
$block_opening_line_number{$type_sequence} = $line_number;
+ # set a label for this block, except for
+ # a bare block which already has the label
+ # A label can only be used on the next {
+ if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
+ $csc_block_label{$type_sequence} = $csc_last_label;
+ $csc_last_label = "";
+
if ( $accumulating_text_for_block
&& $levels_to_go[$i] == $leading_block_text_level )
{
$block_leading_text, $rblock_leading_if_elsif_text );
}
+ # if this line ends in a label then remember it for the next pass
+ $csc_last_label = "";
+ if ( $terminal_type eq 'J' ) {
+ $csc_last_label = $tokens_to_go[$i_terminal];
+ }
+
return ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count );
+ $block_leading_text, $block_line_count, $block_label );
}
}
my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
my $csc_text = $block_leading_text;
- if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
+ if ( $block_type eq 'elsif'
+ && $rOpts_closing_side_comment_else_flag == 0 )
{
return $csc_text;
}
# undo it if line length exceeded
my $length =
- length($csc_text) + length($block_type) +
+ length($csc_text) +
+ length($block_type) +
length( $rOpts->{'closing-side-comment-prefix'} ) +
$levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
- if ( $length > $rOpts_maximum_line_length ) {
+ if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
$csc_text = $saved_text;
}
return $csc_text;
}
+{ # sub balance_csc_text
+
+ my %matching_char;
+
+ BEGIN {
+ %matching_char = (
+ '{' => '}',
+ '(' => ')',
+ '[' => ']',
+ '}' => '{',
+ ')' => '(',
+ ']' => '[',
+ );
+ }
+
+ sub balance_csc_text {
+
+ # Append characters to balance a closing side comment so that editors
+ # such as vim can correctly jump through code.
+ # Simple Example:
+ # input = ## end foreach my $foo ( sort { $b ...
+ # output = ## end foreach my $foo ( sort { $b ...})
+
+ # NOTE: This routine does not currently filter out structures within
+ # quoted text because the bounce algorithms in text editors do not
+ # necessarily do this either (a version of vim was checked and
+ # did not do this).
+
+ # Some complex examples which will cause trouble for some editors:
+ # while ( $mask_string =~ /\{[^{]*?\}/g ) {
+ # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
+ # if ( $1 eq '{' ) {
+ # test file test1/braces.pl has many such examples.
+
+ my ($csc) = @_;
+
+ # loop to examine characters one-by-one, RIGHT to LEFT and
+ # build a balancing ending, LEFT to RIGHT.
+ for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
+
+ my $char = substr( $csc, $pos, 1 );
+
+ # ignore everything except structural characters
+ next unless ( $matching_char{$char} );
+
+ # pop most recently appended character
+ my $top = chop($csc);
+
+ # push it back plus the mate to the newest character
+ # unless they balance each other.
+ $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
+ }
+
+ # return the balanced string
+ return $csc;
+ }
+}
+
sub add_closing_side_comment {
# add closing side comments after closing block braces if -csc used
#---------------------------------------------------------------
my ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count )
+ $block_leading_text, $block_line_count, $block_label )
= accumulate_csc_text();
#---------------------------------------------------------------
&& $block_type_to_go[$i_terminal] =~
/$closing_side_comment_list_pattern/o
+ # .. but not an anonymous sub
+ # These are not normally of interest, and their closing braces are
+ # often followed by commas or semicolons anyway. This also avoids
+ # possible erratic output due to line numbering inconsistencies
+ # in the cases where their closing braces terminate a line.
+ && $block_type_to_go[$i_terminal] ne 'sub'
+
# ..and the corresponding opening brace must is not in this batch
# (because we do not need to tag one-line blocks, although this
# should also be caught with a positive -csci value)
# ..and either
&& (
- # this is the last token (line doesnt have a side comment)
+ # this is the last token (line doesn't have a side comment)
!$have_side_comment
# or the old side comment is a closing side comment
{
# then make the closing side comment text
+ if ($block_label) { $block_label .= " " }
my $token =
-"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
+"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
# append any extra descriptive text collected above
if ( $i_block_leading_text == $i_terminal ) {
$token .= $block_leading_text;
}
+
+ $token = balance_csc_text($token)
+ if $rOpts->{'closing-side-comments-balanced'};
+
$token =~ s/\s*$//; # trim any trailing whitespace
# handle case of existing closing side comment
if ( $rOpts->{'closing-side-comment-warnings'} ) {
my $old_csc = $tokens_to_go[$max_index_to_go];
my $new_csc = $token;
- $new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
- my $new_trailing_dots = $1;
- $old_csc =~ s/\.\.\.\s*$//;
$new_csc =~ s/\s+//g; # trim all whitespace
- $old_csc =~ s/\s+//g;
+ $old_csc =~ s/\s+//g; # trim all whitespace
+ $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
+ my $new_trailing_dots = $1;
+ $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
# Patch to handle multiple closing side comments at
# else and elsif's. These have become too complicated
# if the new comment is shorter and has been limited,
# only compare the common part.
- if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
+ if ( length($new_csc) < length($old_csc)
+ && $new_trailing_dots )
{
$old_csc = substr( $old_csc, 0, length($new_csc) );
}
else {
# insert the new side comment into the output token stream
- my $type = '#';
- my $block_type = '';
- my $type_sequence = '';
+ my $type = '#';
+ my $block_type = '';
+ my $type_sequence = '';
my $container_environment =
$container_environment_to_go[$max_index_to_go];
my $level = $levels_to_go[$max_index_to_go];
}
sub previous_nonblank_token {
- my ($i) = @_;
- if ( $i <= 0 ) {
- return "";
- }
- elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
- return $tokens_to_go[ $i - 1 ];
- }
- elsif ( $i > 1 ) {
- return $tokens_to_go[ $i - 2 ];
- }
- else {
- return "";
+ my ($i) = @_;
+ my $name = "";
+ my $im = $i - 1;
+ return "" if ( $im < 0 );
+ if ( $types_to_go[$im] eq 'b' ) { $im--; }
+ return "" if ( $im < 0 );
+ $name = $tokens_to_go[$im];
+
+ # prepend any sub name to an isolated -> to avoid unwanted alignments
+ # [test case is test8/penco.pl]
+ if ( $name eq '->' ) {
+ $im--;
+ if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
+ $name = $tokens_to_go[$im] . $name;
+ }
}
+ return $name;
}
sub send_lines_to_vertical_aligner {
my $rindentation_list = [0]; # ref to indentations for each line
+ # define the array @matching_token_to_go for the output tokens
+ # which will be non-blank for each special token (such as =>)
+ # for which alignment is required.
set_vertical_alignment_markers( $ri_first, $ri_last );
# flush if necessary to avoid unwanted alignment
Perl::Tidy::VerticalAligner::flush();
}
+ undo_ci( $ri_first, $ri_last );
+
set_logical_padding( $ri_first, $ri_last );
# loop to prepare each line for shipment
my $ibeg = $$ri_first[$n];
my $iend = $$ri_last[$n];
- my @patterns = ();
- my @tokens = ();
- my @fields = ();
- my $i_start = $ibeg;
- my $i;
+ my ( $rtokens, $rfields, $rpatterns ) =
+ make_alignment_patterns( $ibeg, $iend );
- my $depth = 0;
- my @container_name = ("");
- my @multiple_comma_arrows = (undef);
+ # Set flag to show how much level changes between this line
+ # and the next line, if we have it.
+ my $ljump = 0;
+ if ( $n < $n_last_line ) {
+ my $ibegp = $$ri_first[ $n + 1 ];
+ $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
+ }
- my $j = 0; # field index
+ my ( $indentation, $lev, $level_end, $terminal_type,
+ $is_semicolon_terminated, $is_outdented_line )
+ = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
+ $ri_first, $ri_last, $rindentation_list, $ljump );
- $patterns[0] = "";
- for $i ( $ibeg .. $iend ) {
+ # we will allow outdenting of long lines..
+ my $outdent_long_lines = (
- # Keep track of containers balanced on this line only.
- # These are used below to prevent unwanted cross-line alignments.
- # Unbalanced containers already avoid aligning across
- # container boundaries.
- if ( $tokens_to_go[$i] eq '(' ) {
- my $i_mate = $mate_index_to_go[$i];
- if ( $i_mate > $i && $i_mate <= $iend ) {
- $depth++;
- my $seqno = $type_sequence_to_go[$i];
- my $count = comma_arrow_count($seqno);
- $multiple_comma_arrows[$depth] = $count && $count > 1;
- my $name = previous_nonblank_token($i);
- $name =~ s/^->//;
- $container_name[$depth] = "+" . $name;
- }
- }
- elsif ( $tokens_to_go[$i] eq ')' ) {
- $depth-- if $depth > 0;
- }
+ # which are long quotes, if allowed
+ ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
- # if we find a new synchronization token, we are done with
- # a field
- if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
+ # which are long block comments, if allowed
+ || (
+ $types_to_go[$ibeg] eq '#'
+ && $rOpts->{'outdent-long-comments'}
- my $tok = my $raw_tok = $matching_token_to_go[$i];
+ # but not if this is a static block comment
+ && !$is_static_block_comment
+ )
+ );
+
+ my $level_jump =
+ $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
+
+ my $rvertical_tightness_flags =
+ set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
+ $ri_first, $ri_last );
+
+ # flush an outdented line to avoid any unwanted vertical alignment
+ Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+
+ # Set a flag at the final ':' of a ternary chain to request
+ # vertical alignment of the final term. Here is a
+ # slightly complex example:
+ #
+ # $self->{_text} = (
+ # !$section ? ''
+ # : $type eq 'item' ? "the $section entry"
+ # : "the section on $section"
+ # )
+ # . (
+ # $page
+ # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
+ # : ' elsewhere in this document'
+ # );
+ #
+ my $is_terminal_ternary = 0;
+ if ( $tokens_to_go[$ibeg] eq ':'
+ || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
+ {
+ my $last_leading_type = ":";
+ if ( $n > 0 ) {
+ my $iprev = $$ri_first[ $n - 1 ];
+ $last_leading_type = $types_to_go[$iprev];
+ }
+ if ( $terminal_type ne ';'
+ && $n_last_line > $n
+ && $level_end == $lev )
+ {
+ my $inext = $$ri_first[ $n + 1 ];
+ $level_end = $levels_to_go[$inext];
+ $terminal_type = $types_to_go[$inext];
+ }
+
+ $is_terminal_ternary = $last_leading_type eq ':'
+ && ( ( $terminal_type eq ';' && $level_end <= $lev )
+ || ( $terminal_type ne ':' && $level_end < $lev ) )
+
+ # the terminal term must not contain any ternary terms, as in
+ # my $ECHO = (
+ # $Is_MSWin32 ? ".\\echo$$"
+ # : $Is_MacOS ? ":echo$$"
+ # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
+ # );
+ && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ];
+ }
+
+ # send this new line down the pipe
+ my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
+ Perl::Tidy::VerticalAligner::valign_input(
+ $lev,
+ $level_end,
+ $indentation,
+ $rfields,
+ $rtokens,
+ $rpatterns,
+ $forced_breakpoint_to_go[$iend] || $in_comma_list,
+ $outdent_long_lines,
+ $is_terminal_ternary,
+ $is_semicolon_terminated,
+ $do_not_pad,
+ $rvertical_tightness_flags,
+ $level_jump,
+ );
+ $in_comma_list =
+ $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
+
+ # flush an outdented line to avoid any unwanted vertical alignment
+ Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+
+ $do_not_pad = 0;
+
+ # Set flag indicating if this line ends in an opening
+ # token and is very short, so that a blank line is not
+ # needed if the subsequent line is a comment.
+ # Examples of what we are looking for:
+ # {
+ # && (
+ # BEGIN {
+ # default {
+ # sub {
+ $last_output_short_opening_token
+
+ # line ends in opening token
+ = $types_to_go[$iend] =~ /^[\{\(\[L]$/
+
+ # and either
+ && (
+ # line has either single opening token
+ $iend == $ibeg
+
+ # or is a single token followed by opening token.
+ # Note that sub identifiers have blanks like 'sub doit'
+ || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
+ )
+
+ # and limit total to 10 character widths
+ && token_sequence_length( $ibeg, $iend ) <= 10;
+
+ } # end of loop to output each line
+
+ # remember indentation of lines containing opening containers for
+ # later use by sub set_adjusted_indentation
+ save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
+}
+
+{ # begin make_alignment_patterns
+
+ my %block_type_map;
+ my %keyword_map;
+
+ BEGIN {
+
+ # map related block names into a common name to
+ # allow alignment
+ %block_type_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'if',
+ 'default' => 'if',
+ 'case' => 'if',
+ 'sort' => 'map',
+ 'grep' => 'map',
+ );
+
+ # map certain keywords to the same 'if' class to align
+ # long if/elsif sequences. [elsif.pl]
+ %keyword_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'given',
+ 'default' => 'given',
+ 'case' => 'switch',
+
+ # treat an 'undef' similar to numbers and quotes
+ 'undef' => 'Q',
+ );
+ }
+
+ sub make_alignment_patterns {
+
+ # Here we do some important preliminary work for the
+ # vertical aligner. We create three arrays for one
+ # output line. These arrays contain strings that can
+ # be tested by the vertical aligner to see if
+ # consecutive lines can be aligned vertically.
+ #
+ # The three arrays are indexed on the vertical
+ # alignment fields and are:
+ # @tokens - a list of any vertical alignment tokens for this line.
+ # These are tokens, such as '=' '&&' '#' etc which
+ # we want to might align vertically. These are
+ # decorated with various information such as
+ # nesting depth to prevent unwanted vertical
+ # alignment matches.
+ # @fields - the actual text of the line between the vertical alignment
+ # tokens.
+ # @patterns - a modified list of token types, one for each alignment
+ # field. These should normally each match before alignment is
+ # allowed, even when the alignment tokens match.
+ my ( $ibeg, $iend ) = @_;
+ my @tokens = ();
+ my @fields = ();
+ my @patterns = ();
+ my $i_start = $ibeg;
+ my $i;
+
+ my $depth = 0;
+ my @container_name = ("");
+ my @multiple_comma_arrows = (undef);
+
+ my $j = 0; # field index
+
+ $patterns[0] = "";
+ for $i ( $ibeg .. $iend ) {
+
+ # Keep track of containers balanced on this line only.
+ # These are used below to prevent unwanted cross-line alignments.
+ # Unbalanced containers already avoid aligning across
+ # container boundaries.
+ if ( $tokens_to_go[$i] eq '(' ) {
+
+ # if container is balanced on this line...
+ my $i_mate = $mate_index_to_go[$i];
+ if ( $i_mate > $i && $i_mate <= $iend ) {
+ $depth++;
+ my $seqno = $type_sequence_to_go[$i];
+ my $count = comma_arrow_count($seqno);
+ $multiple_comma_arrows[$depth] = $count && $count > 1;
+
+ # Append the previous token name to make the container name
+ # more unique. This name will also be given to any commas
+ # within this container, and it helps avoid undesirable
+ # alignments of different types of containers.
+ my $name = previous_nonblank_token($i);
+ $name =~ s/^->//;
+ $container_name[$depth] = "+" . $name;
+
+ # Make the container name even more unique if necessary.
+ # If we are not vertically aligning this opening paren,
+ # append a character count to avoid bad alignment because
+ # it usually looks bad to align commas within containers
+ # for which the opening parens do not align. Here
+ # is an example very BAD alignment of commas (because
+ # the atan2 functions are not all aligned):
+ # $XY =
+ # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
+ # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
+ # $X * atan2( $X, 1 ) -
+ # $Y * atan2( $Y, 1 );
+ #
+ # On the other hand, it is usually okay to align commas if
+ # opening parens align, such as:
+ # glVertex3d( $cx + $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy + $s * $ys, $z );
+ # glVertex3d( $cx - $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy - $s * $ys, $z );
+ #
+ # To distinguish between these situations, we will
+ # append the length of the line from the previous matching
+ # token, or beginning of line, to the function name. This
+ # will allow the vertical aligner to reject undesirable
+ # matches.
+
+ # if we are not aligning on this paren...
+ if ( $matching_token_to_go[$i] eq '' ) {
+
+ # Sum length from previous alignment, or start of line.
+ my $len =
+ ( $i_start == $ibeg )
+ ? total_line_length( $i_start, $i - 1 )
+ : token_sequence_length( $i_start, $i - 1 );
+
+ # tack length onto the container name to make unique
+ $container_name[$depth] .= "-" . $len;
+ }
+ }
+ }
+ elsif ( $tokens_to_go[$i] eq ')' ) {
+ $depth-- if $depth > 0;
+ }
+
+ # if we find a new synchronization token, we are done with
+ # a field
+ if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
+
+ my $tok = my $raw_tok = $matching_token_to_go[$i];
# make separators in different nesting depths unique
# by appending the nesting depth digit.
$tok .= "$nesting_depth_to_go[$i]";
}
- # do any special decorations for commas to avoid unwanted
- # cross-line alignments.
- if ( $raw_tok eq ',' ) {
+ # also decorate commas with any container name to avoid
+ # unwanted cross-line alignments.
+ if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
if ( $container_name[$depth] ) {
$tok .= $container_name[$depth];
}
}
- # decorate '=>' with:
- # - Nothing if this container is unbalanced on this line.
- # - The previous token if it is balanced and multiple '=>'s
- # - The container name if it is bananced and no other '=>'s
- elsif ( $raw_tok eq '=>' ) {
- if ( $container_name[$depth] ) {
- if ( $multiple_comma_arrows[$depth] ) {
- $tok .= "+" . previous_nonblank_token($i);
- }
- else {
- $tok .= $container_name[$depth];
- }
+ # Patch to avoid aligning leading and trailing if, unless.
+ # Mark trailing if, unless statements with container names.
+ # This makes them different from leading if, unless which
+ # are not so marked at present. If we ever need to name
+ # them too, we could use ci to distinguish them.
+ # Example problem to avoid:
+ # return ( 2, "DBERROR" )
+ # if ( $retval == 2 );
+ # if ( scalar @_ ) {
+ # my ( $a, $b, $c, $d, $e, $f ) = @_;
+ # }
+ if ( $raw_tok eq '(' ) {
+ my $ci = $ci_levels_to_go[$ibeg];
+ if ( $container_name[$depth] =~ /^\+(if|unless)/
+ && $ci )
+ {
+ $tok .= $container_name[$depth];
}
}
+ # Decorate block braces with block types to avoid
+ # unwanted alignments such as the following:
+ # foreach ( @{$routput_array} ) { $fh->print($_) }
+ # eval { $fh->close() };
+ if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
+ my $block_type = $block_type_to_go[$i];
+
+ # map certain related block types to allow
+ # else blocks to align
+ $block_type = $block_type_map{$block_type}
+ if ( defined( $block_type_map{$block_type} ) );
+
+ # remove sub names to allow one-line sub braces to align
+ # regardless of name
+ #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
+ if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
+
+ # allow all control-type blocks to align
+ if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
+
+ $tok .= $block_type;
+ }
+
# concatenate the text of the consecutive tokens to form
# the field
push( @fields,
# Mark most things before arrows as a quote to
# get them to line up. Testfile: mixed.pl.
if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
- my $next_type = $types_to_go[ $i + 1 ];
+ my $next_type = $types_to_go[ $i + 1 ];
my $i_next_nonblank =
( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
$type = 'Q';
+
+ # Patch to ignore leading minus before words,
+ # by changing pattern 'mQ' into just 'Q',
+ # so that we can align things like this:
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
}
}
- # minor patch to make numbers and quotes align
+ # patch to make numbers and quotes align
if ( $type eq 'n' ) { $type = 'Q' }
+ # patch to ignore any ! in patterns
+ if ( $type eq '!' ) { $type = '' }
+
$patterns[$j] .= $type;
}
# for keywords we have to use the actual text
else {
- # map certain keywords to the same 'if' class to align
- # long if/elsif sequences. my testfile: elsif.pl
my $tok = $tokens_to_go[$i];
- if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) {
- $tok = 'if';
- }
+
+ # but map certain keywords to a common string to allow
+ # alignment.
+ $tok = $keyword_map{$tok}
+ if ( defined( $keyword_map{$tok} ) );
$patterns[$j] .= $tok;
}
}
# done with this line .. join text of tokens to make the last field
push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
+ return ( \@tokens, \@fields, \@patterns );
+ }
- my ( $indentation, $lev, $level_end, $is_semicolon_terminated,
- $is_outdented_line )
- = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
- $ri_first, $ri_last, $rindentation_list );
-
- # we will allow outdenting of long lines..
- my $outdent_long_lines = (
-
- # which are long quotes, if allowed
- ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
-
- # which are long block comments, if allowed
- || (
- $types_to_go[$ibeg] eq '#'
- && $rOpts->{'outdent-long-comments'}
-
- # but not if this is a static block comment
- && !(
- $rOpts->{'static-block-comments'}
- && $tokens_to_go[$ibeg] =~ /$static_block_comment_pattern/o
- )
- )
- );
-
- my $level_jump =
- $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
-
- my $rvertical_tightness_flags =
- set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
- $ri_first, $ri_last );
-
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
-
- # send this new line down the pipe
- my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
- Perl::Tidy::VerticalAligner::append_line(
- $lev,
- $level_end,
- $indentation,
- \@fields,
- \@tokens,
- \@patterns,
- $forced_breakpoint_to_go[$iend] || $in_comma_list,
- $outdent_long_lines,
- $is_semicolon_terminated,
- $do_not_pad,
- $rvertical_tightness_flags,
- $level_jump,
- );
- $in_comma_list =
- $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
-
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
-
- $do_not_pad = 0;
-
- } # end of loop to output each line
-
- # remember indentation of lines containing opening containers for
- # later use by sub set_adjusted_indentation
- save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
-}
+} # end make_alignment_patterns
-{ # begin unmatched_indexes
+{ # begin unmatched_indexes
# closure to keep track of unbalanced containers.
# arrays shared by the routines in this block:
@unmatched_opening_indexes_in_this_batch = ();
@unmatched_closing_indexes_in_this_batch = ();
%comma_arrow_count = ();
+ my $comma_arrow_count_contained = 0;
my ( $i, $i_mate, $token );
foreach $i ( 0 .. $max_index_to_go ) {
{
$mate_index_to_go[$i] = $i_mate;
$mate_index_to_go[$i_mate] = $i;
+ my $seqno = $type_sequence_to_go[$i];
+ if ( $comma_arrow_count{$seqno} ) {
+ $comma_arrow_count_contained +=
+ $comma_arrow_count{$seqno};
+ }
}
else {
push @unmatched_opening_indexes_in_this_batch,
}
}
}
+ return $comma_arrow_count_contained;
}
sub save_opening_indentation {
# first, see if the opening token is in the current batch
my $i_opening = $mate_index_to_go[$i_closing];
- my ( $indent, $offset );
+ my ( $indent, $offset, $is_leading, $exists );
+ $exists = 1;
if ( $i_opening >= 0 ) {
# it is..look up the indentation
- ( $indent, $offset ) =
+ ( $indent, $offset, $is_leading ) =
lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
$rindentation_list );
}
my $seqno = $type_sequence_to_go[$i_closing];
if ($seqno) {
if ( $saved_opening_indentation{$seqno} ) {
- ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
+ ( $indent, $offset, $is_leading ) =
+ @{ $saved_opening_indentation{$seqno} };
+ }
+
+ # some kind of serious error
+ # (example is badfile.t)
+ else {
+ $indent = 0;
+ $offset = 0;
+ $is_leading = 0;
+ $exists = 0;
}
}
# if no sequence number it must be an unbalanced container
else {
- $indent = 0;
- $offset = 0;
+ $indent = 0;
+ $offset = 0;
+ $is_leading = 0;
+ $exists = 0;
}
}
- return ( $indent, $offset );
+ return ( $indent, $offset, $is_leading, $exists );
}
sub lookup_opening_indentation {
$rindentation_list->[0] =
$nline; # save line number to start looking next call
- my $ibeg = $ri_start->[$nline];
- my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
- return ( $rindentation_list->[ $nline + 1 ], $offset );
+ my $ibeg = $ri_start->[$nline];
+ my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
+ my $is_leading = ( $ibeg == $i_opening );
+ return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
}
-sub set_adjusted_indentation {
+{
+ my %is_if_elsif_else_unless_while_until_for_foreach;
- # This routine has the final say regarding the actual indentation of
- # a line. It starts with the basic indentation which has been
- # defined for the leading token, and then takes into account any
- # options that the user has set regarding special indenting and
- # outdenting.
+ BEGIN {
- my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
- $rindentation_list )
- = @_;
+ # These block types may have text between the keyword and opening
+ # curly. Note: 'else' does not, but must be included to allow trailing
+ # if/elsif text to be appended.
+ # patch for SWITCH/CASE: added 'case' and 'when'
+ @_ = qw(if elsif else unless while until for foreach case when);
+ @is_if_elsif_else_unless_while_until_for_foreach{@_} =
+ (1) x scalar(@_);
+ }
- # we need to know the last token of this line
- my ( $terminal_type, $i_terminal ) =
- terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
+ sub set_adjusted_indentation {
- my $is_outdented_line = 0;
+ # This routine has the final say regarding the actual indentation of
+ # a line. It starts with the basic indentation which has been
+ # defined for the leading token, and then takes into account any
+ # options that the user has set regarding special indenting and
+ # outdenting.
- my $is_semicolon_terminated = $terminal_type eq ';'
- && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
+ my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
+ $rindentation_list, $level_jump )
+ = @_;
- # Most lines are indented according to the initial token.
- # But it is common to outdent to the level just after the
- # terminal token in certain cases...
- # adjust_indentation flag:
- # 0 - do not adjust
- # 1 - outdent
- # 2 - vertically align with opening token
- # 3 - indent
- my $adjust_indentation = 0;
- my $default_adjust_indentation = $adjust_indentation;
+ # we need to know the last token of this line
+ my ( $terminal_type, $i_terminal ) =
+ terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
- my ( $opening_indentation, $opening_offset );
+ my $is_outdented_line = 0;
- # if we are at a closing token of some type..
- if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
+ my $is_semicolon_terminated = $terminal_type eq ';'
+ && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
- # get the indentation of the line containing the corresponding
- # opening token
- ( $opening_indentation, $opening_offset ) =
- get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
+ ##########################################################
+ # Section 1: set a flag and a default indentation
+ #
+ # Most lines are indented according to the initial token.
+ # But it is common to outdent to the level just after the
+ # terminal token in certain cases...
+ # adjust_indentation flag:
+ # 0 - do not adjust
+ # 1 - outdent
+ # 2 - vertically align with opening token
+ # 3 - indent
+ ##########################################################
+ my $adjust_indentation = 0;
+ my $default_adjust_indentation = $adjust_indentation;
- # First set the default behavior:
- # default behavior is to outdent closing lines
- # of the form: "); }; ]; )->xxx;"
- if (
- $is_semicolon_terminated
+ my (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ );
- # and 'cuddled parens' of the form: ")->pack("
- || (
- $terminal_type eq '('
- && $types_to_go[$ibeg] eq ')'
- && ( $nesting_depth_to_go[$iend] + 1 ==
- $nesting_depth_to_go[$ibeg] )
- )
- )
- {
- $adjust_indentation = 1;
- }
+ # if we are at a closing token of some type..
+ if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
- # TESTING: outdent something like '),'
- if (
- $terminal_type eq ','
+ # get the indentation of the line containing the corresponding
+ # opening token
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
- # allow just one character before the comma
- && $i_terminal == $ibeg + 1
+ # First set the default behavior:
+ if (
- # requre LIST environment; otherwise, we may outdent too much --
- # this can happen in calls without parentheses (overload.t);
- && $container_environment_to_go[$i_terminal] eq 'LIST'
- )
- {
- $adjust_indentation = 1;
- }
+ # default behavior is to outdent closing lines
+ # of the form: "); }; ]; )->xxx;"
+ $is_semicolon_terminated
- # undo continuation indentation of a terminal closing token if
- # it is the last token before a level decrease. This will allow
- # a closing token to line up with its opening counterpart, and
- # avoids a indentation jump larger than 1 level.
- if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
- && $i_terminal == $ibeg )
- {
- my $ci = $ci_levels_to_go[$ibeg];
- my $lev = $levels_to_go[$ibeg];
- my $next_type = $types_to_go[ $ibeg + 1 ];
- my $i_next_nonblank =
- ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
- if ( $i_next_nonblank <= $max_index_to_go
- && $levels_to_go[$i_next_nonblank] < $lev )
+ # and 'cuddled parens' of the form: ")->pack("
+ || (
+ $terminal_type eq '('
+ && $types_to_go[$ibeg] eq ')'
+ && ( $nesting_depth_to_go[$iend] + 1 ==
+ $nesting_depth_to_go[$ibeg] )
+ )
+
+ # and when the next line is at a lower indentation level
+ # PATCH: and only if the style allows undoing continuation
+ # for all closing token types. We should really wait until
+ # the indentation of the next line is known and then make
+ # a decision, but that would require another pass.
+ || ( $level_jump < 0 && !$some_closing_token_indentation )
+ )
{
$adjust_indentation = 1;
}
- }
- $default_adjust_indentation = $adjust_indentation;
-
- # Now modify default behavior according to user request:
- # handle option to indent non-blocks of the form ); }; ];
- # But don't do special indentation to something like ')->pack('
- if ( !$block_type_to_go[$ibeg] ) {
- my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
+ # outdent something like '),'
if (
- $cti == 1
- && ( $i_terminal <= $ibeg + 1
- || $is_semicolon_terminated )
+ $terminal_type eq ','
+
+ # allow just one character before the comma
+ && $i_terminal == $ibeg + 1
+
+ # require LIST environment; otherwise, we may outdent too much -
+ # this can happen in calls without parentheses (overload.t);
+ && $container_environment_to_go[$i_terminal] eq 'LIST'
)
{
- $adjust_indentation = 2;
+ $adjust_indentation = 1;
}
- elsif ($cti == 2
- && $is_semicolon_terminated
- && $i_terminal == $ibeg + 1 )
+
+ # undo continuation indentation of a terminal closing token if
+ # it is the last token before a level decrease. This will allow
+ # a closing token to line up with its opening counterpart, and
+ # avoids a indentation jump larger than 1 level.
+ if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
+ && $i_terminal == $ibeg )
{
- $adjust_indentation = 3;
+ my $ci = $ci_levels_to_go[$ibeg];
+ my $lev = $levels_to_go[$ibeg];
+ my $next_type = $types_to_go[ $ibeg + 1 ];
+ my $i_next_nonblank =
+ ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
+ if ( $i_next_nonblank <= $max_index_to_go
+ && $levels_to_go[$i_next_nonblank] < $lev )
+ {
+ $adjust_indentation = 1;
+ }
+
+ # Patch for RT #96101, in which closing brace of anonymous subs
+ # was not outdented. We should look ahead and see if there is
+ # a level decrease at the next token (i.e., a closing token),
+ # but right now we do not have that information. For now
+ # we see if we are in a list, and this works well.
+ # See test files 'sub*.t' for good test cases.
+ if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
+ && $container_environment_to_go[$i_terminal] eq 'LIST'
+ && !$rOpts->{'indent-closing-brace'} )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( defined($opening_indentation)
+ && get_SPACES($indentation) >
+ get_SPACES($opening_indentation) )
+ {
+ $adjust_indentation = 1;
+ }
+ }
}
- }
- # handle option to indent blocks
- else {
- if (
- $rOpts->{'indent-closing-brace'}
- && (
- $i_terminal == $ibeg # isolated terminal '}'
- || $is_semicolon_terminated
- )
- ) # } xxxx ;
+ # YVES patch 1 of 2:
+ # Undo ci of line with leading closing eval brace,
+ # but not beyond the indention of the line with
+ # the opening brace.
+ if ( $block_type_to_go[$ibeg] eq 'eval'
+ && !$rOpts->{'line-up-parentheses'}
+ && !$rOpts->{'indent-closing-brace'} )
{
- $adjust_indentation = 3;
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( defined($opening_indentation)
+ && get_SPACES($indentation) >
+ get_SPACES($opening_indentation) )
+ {
+ $adjust_indentation = 1;
+ }
}
- }
- }
-
- # if at ');', '};', '>;', and '];' of a terminal qw quote
- elsif ( $$rpatterns[0] =~ /^qb*;$/ && $$rfields[0] =~ /^([\)\}\]\>]);$/ ) {
- if ( $closing_token_indentation{$1} == 0 ) {
- $adjust_indentation = 1;
- }
- else {
- $adjust_indentation = 3;
- }
- }
-
- # Handle variation in indentation styles...
- # Select the indentation object to define leading
- # whitespace. If we are outdenting something like '} } );'
- # then we want to use one level below the last token
- # ($i_terminal) in order to get it to fully outdent through
- # all levels.
- my $indentation;
- my $lev;
- my $level_end = $levels_to_go[$iend];
-
- if ( $adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
- $lev = $levels_to_go[$ibeg];
- }
- elsif ( $adjust_indentation == 1 ) {
- $indentation = $reduced_spaces_to_go[$i_terminal];
- $lev = $levels_to_go[$i_terminal];
- }
-
- # handle indented closing token which aligns with opening token
- elsif ( $adjust_indentation == 2 ) {
-
- # handle option to align closing token with opening token
- $lev = $levels_to_go[$ibeg];
- # calculate spaces needed to align with opening token
- my $space_count = get_SPACES($opening_indentation) + $opening_offset;
+ $default_adjust_indentation = $adjust_indentation;
- # Indent less than the previous line.
- #
- # Problem: For -lp we don't exactly know what it was if there were
- # recoverable spaces sent to the aligner. A good solution would be to
- # force a flush of the vertical alignment buffer, so that we would
- # know. For now, this rule is used for -lp:
- #
- # When the last line did not start with a closing token we will be
- # optimistic that the aligner will recover everything wanted.
- #
- # This rule will prevent us from breaking a hierarchy of closing
- # tokens, and in a worst case will leave a closing paren too far
- # indented, but this is better than frequently leaving it not indented
- # enough.
- my $last_spaces = get_SPACES($last_indentation_written);
- if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
- $last_spaces += get_RECOVERABLE_SPACES($last_indentation_written);
- }
-
- # reset the indentation to the new space count if it works
- # only options are all or none: nothing in-between looks good
- $lev = $levels_to_go[$ibeg];
- if ( $space_count < $last_spaces ) {
- if ($rOpts_line_up_parentheses) {
- my $lev = $levels_to_go[$ibeg];
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
+ # Now modify default behavior according to user request:
+ # handle option to indent non-blocks of the form ); }; ];
+ # But don't do special indentation to something like ')->pack('
+ if ( !$block_type_to_go[$ibeg] ) {
+ my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
+ if ( $cti == 1 ) {
+ if ( $i_terminal <= $ibeg + 1
+ || $is_semicolon_terminated )
+ {
+ $adjust_indentation = 2;
+ }
+ else {
+ $adjust_indentation = 0;
+ }
+ }
+ elsif ( $cti == 2 ) {
+ if ($is_semicolon_terminated) {
+ $adjust_indentation = 3;
+ }
+ else {
+ $adjust_indentation = 0;
+ }
+ }
+ elsif ( $cti == 3 ) {
+ $adjust_indentation = 3;
+ }
}
+
+ # handle option to indent blocks
else {
- $indentation = $space_count;
+ if (
+ $rOpts->{'indent-closing-brace'}
+ && (
+ $i_terminal == $ibeg # isolated terminal '}'
+ || $is_semicolon_terminated
+ )
+ ) # } xxxx ;
+ {
+ $adjust_indentation = 3;
+ }
}
}
- # revert to default if it doesnt work
- else {
- $space_count = leading_spaces_to_go($ibeg);
- if ( $default_adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
+ # if at ');', '};', '>;', and '];' of a terminal qw quote
+ elsif ($$rpatterns[0] =~ /^qb*;$/
+ && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
+ {
+ if ( $closing_token_indentation{$1} == 0 ) {
+ $adjust_indentation = 1;
}
- elsif ( $default_adjust_indentation == 1 ) {
- $indentation = $reduced_spaces_to_go[$i_terminal];
- $lev = $levels_to_go[$i_terminal];
+ else {
+ $adjust_indentation = 3;
}
}
- }
-
- # Full indentaion of closing tokens (-icb and -icp or -cti=2)
- else {
- # There are two ways to handle -icb and -icp...
- # One way is to use the indentation of the previous line:
- # $indentation = $last_indentation_written;
+ # if line begins with a ':', align it with any
+ # previous line leading with corresponding ?
+ elsif ( $types_to_go[$ibeg] eq ':' ) {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ if ($is_leading) { $adjust_indentation = 2; }
+ }
- # The other way is to use the indentation that the previous line
- # would have had if it hadn't been adjusted:
- $indentation = $last_unadjusted_indentation;
+ ##########################################################
+ # Section 2: set indentation according to flag set above
+ #
+ # Select the indentation object to define leading
+ # whitespace. If we are outdenting something like '} } );'
+ # then we want to use one level below the last token
+ # ($i_terminal) in order to get it to fully outdent through
+ # all levels.
+ ##########################################################
+ my $indentation;
+ my $lev;
+ my $level_end = $levels_to_go[$iend];
- # Current method: use the minimum of the two. This avoids inconsistent
- # indentation.
- if ( get_SPACES($last_indentation_written) < get_SPACES($indentation) )
- {
- $indentation = $last_indentation_written;
+ if ( $adjust_indentation == 0 ) {
+ $indentation = $leading_spaces_to_go[$ibeg];
+ $lev = $levels_to_go[$ibeg];
+ }
+ elsif ( $adjust_indentation == 1 ) {
+ $indentation = $reduced_spaces_to_go[$i_terminal];
+ $lev = $levels_to_go[$i_terminal];
}
- # use previous indentation but use own level
- # to cause list to be flushed properly
- $lev = $levels_to_go[$ibeg];
- }
+ # handle indented closing token which aligns with opening token
+ elsif ( $adjust_indentation == 2 ) {
- # remember indentation except for multi-line quotes, which get
- # no indentation
- unless ( $types_to_go[$ibeg] eq 'Q' && $lev == 0 ) {
- $last_indentation_written = $indentation;
- $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
- $last_leading_token = $tokens_to_go[$ibeg];
- }
+ # handle option to align closing token with opening token
+ $lev = $levels_to_go[$ibeg];
- # be sure lines with leading closing tokens are not outdented more
- # than the line which contained the corresponding opening token.
- my $is_isolated_block_brace =
- ( $iend == $ibeg ) && $block_type_to_go[$ibeg];
- if ( !$is_isolated_block_brace && defined($opening_indentation) ) {
- if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
- $indentation = $opening_indentation;
- }
- }
+ # calculate spaces needed to align with opening token
+ my $space_count =
+ get_SPACES($opening_indentation) + $opening_offset;
- # remember the indentation of each line of this batch
- push @{$rindentation_list}, $indentation;
+ # Indent less than the previous line.
+ #
+ # Problem: For -lp we don't exactly know what it was if there
+ # were recoverable spaces sent to the aligner. A good solution
+ # would be to force a flush of the vertical alignment buffer, so
+ # that we would know. For now, this rule is used for -lp:
+ #
+ # When the last line did not start with a closing token we will
+ # be optimistic that the aligner will recover everything wanted.
+ #
+ # This rule will prevent us from breaking a hierarchy of closing
+ # tokens, and in a worst case will leave a closing paren too far
+ # indented, but this is better than frequently leaving it not
+ # indented enough.
+ my $last_spaces = get_SPACES($last_indentation_written);
+ if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
+ $last_spaces +=
+ get_RECOVERABLE_SPACES($last_indentation_written);
+ }
+
+ # reset the indentation to the new space count if it works
+ # only options are all or none: nothing in-between looks good
+ $lev = $levels_to_go[$ibeg];
+ if ( $space_count < $last_spaces ) {
+ if ($rOpts_line_up_parentheses) {
+ my $lev = $levels_to_go[$ibeg];
+ $indentation =
+ new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
+ }
+ else {
+ $indentation = $space_count;
+ }
+ }
- # outdent lines with certain leading tokens...
- if (
+ # revert to default if it doesn't work
+ else {
+ $space_count = leading_spaces_to_go($ibeg);
+ if ( $default_adjust_indentation == 0 ) {
+ $indentation = $leading_spaces_to_go[$ibeg];
+ }
+ elsif ( $default_adjust_indentation == 1 ) {
+ $indentation = $reduced_spaces_to_go[$i_terminal];
+ $lev = $levels_to_go[$i_terminal];
+ }
+ }
+ }
- # must be first word of this batch
- $ibeg == 0
+ # Full indentaion of closing tokens (-icb and -icp or -cti=2)
+ else {
- # and ...
- && (
+ # handle -icb (indented closing code block braces)
+ # Updated method for indented block braces: indent one full level if
+ # there is no continuation indentation. This will occur for major
+ # structures such as sub, if, else, but not for things like map
+ # blocks.
+ #
+ # Note: only code blocks without continuation indentation are
+ # handled here (if, else, unless, ..). In the following snippet,
+ # the terminal brace of the sort block will have continuation
+ # indentation as shown so it will not be handled by the coding
+ # here. We would have to undo the continuation indentation to do
+ # this, but it probably looks ok as is. This is a possible future
+ # update for semicolon terminated lines.
+ #
+ # if ($sortby eq 'date' or $sortby eq 'size') {
+ # @files = sort {
+ # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
+ # or $a cmp $b
+ # } @files;
+ # }
+ #
+ if ( $block_type_to_go[$ibeg]
+ && $ci_levels_to_go[$i_terminal] == 0 )
+ {
+ my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
+ $indentation = $spaces + $rOpts_indent_columns;
- # certain leading keywords if requested
- (
- $rOpts->{'outdent-keywords'}
- && $types_to_go[$ibeg] eq 'k'
- && $outdent_keyword{ $tokens_to_go[$ibeg] }
- )
+ # NOTE: for -lp we could create a new indentation object, but
+ # there is probably no need to do it
+ }
- # or labels if requested
- || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
+ # handle -icp and any -icb block braces which fall through above
+ # test such as the 'sort' block mentioned above.
+ else {
- # or static block comments if requested
- || ( $types_to_go[$ibeg] eq '#'
- && $rOpts->{'outdent-static-block-comments'}
- && $tokens_to_go[$ibeg] =~ /$static_block_comment_pattern/o
- && $rOpts->{'static-block-comments'} )
- )
- )
+ # There are currently two ways to handle -icp...
+ # One way is to use the indentation of the previous line:
+ # $indentation = $last_indentation_written;
- {
- my $space_count = leading_spaces_to_go($ibeg);
- if ( $space_count > 0 ) {
- $space_count -= $rOpts_continuation_indentation;
- $is_outdented_line = 1;
- if ( $space_count < 0 ) { $space_count = 0 }
+ # The other way is to use the indentation that the previous line
+ # would have had if it hadn't been adjusted:
+ $indentation = $last_unadjusted_indentation;
- # do not promote a spaced static block comment to non-spaced;
- # this is not normally necessary but could be for some
- # unusual user inputs (such as -ci = -i)
- if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
- $space_count = 1;
+ # Current method: use the minimum of the two. This avoids
+ # inconsistent indentation.
+ if ( get_SPACES($last_indentation_written) <
+ get_SPACES($indentation) )
+ {
+ $indentation = $last_indentation_written;
+ }
}
- if ($rOpts_line_up_parentheses) {
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
+ # use previous indentation but use own level
+ # to cause list to be flushed properly
+ $lev = $levels_to_go[$ibeg];
+ }
+
+ # remember indentation except for multi-line quotes, which get
+ # no indentation
+ unless ( $ibeg == 0 && $starting_in_quote ) {
+ $last_indentation_written = $indentation;
+ $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
+ $last_leading_token = $tokens_to_go[$ibeg];
+ }
+
+ # be sure lines with leading closing tokens are not outdented more
+ # than the line which contained the corresponding opening token.
+
+ #############################################################
+ # updated per bug report in alex_bug.pl: we must not
+ # mess with the indentation of closing logical braces so
+ # we must treat something like '} else {' as if it were
+ # an isolated brace my $is_isolated_block_brace = (
+ # $iend == $ibeg ) && $block_type_to_go[$ibeg];
+ #############################################################
+ my $is_isolated_block_brace = $block_type_to_go[$ibeg]
+ && ( $iend == $ibeg
+ || $is_if_elsif_else_unless_while_until_for_foreach{
+ $block_type_to_go[$ibeg]
+ } );
+
+ # only do this for a ':; which is aligned with its leading '?'
+ my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
+ if ( defined($opening_indentation)
+ && !$is_isolated_block_brace
+ && !$is_unaligned_colon )
+ {
+ if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
+ $indentation = $opening_indentation;
}
- else {
- $indentation = $space_count;
+ }
+
+ # remember the indentation of each line of this batch
+ push @{$rindentation_list}, $indentation;
+
+ # outdent lines with certain leading tokens...
+ if (
+
+ # must be first word of this batch
+ $ibeg == 0
+
+ # and ...
+ && (
+
+ # certain leading keywords if requested
+ (
+ $rOpts->{'outdent-keywords'}
+ && $types_to_go[$ibeg] eq 'k'
+ && $outdent_keyword{ $tokens_to_go[$ibeg] }
+ )
+
+ # or labels if requested
+ || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
+
+ # or static block comments if requested
+ || ( $types_to_go[$ibeg] eq '#'
+ && $rOpts->{'outdent-static-block-comments'}
+ && $is_static_block_comment )
+ )
+ )
+
+ {
+ my $space_count = leading_spaces_to_go($ibeg);
+ if ( $space_count > 0 ) {
+ $space_count -= $rOpts_continuation_indentation;
+ $is_outdented_line = 1;
+ if ( $space_count < 0 ) { $space_count = 0 }
+
+ # do not promote a spaced static block comment to non-spaced;
+ # this is not normally necessary but could be for some
+ # unusual user inputs (such as -ci = -i)
+ if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
+ $space_count = 1;
+ }
+
+ if ($rOpts_line_up_parentheses) {
+ $indentation =
+ new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
+ }
+ else {
+ $indentation = $space_count;
+ }
}
}
- }
- return ( $indentation, $lev, $level_end, $is_semicolon_terminated,
- $is_outdented_line );
+ return ( $indentation, $lev, $level_end, $terminal_type,
+ $is_semicolon_terminated, $is_outdented_line );
+ }
}
sub set_vertical_tightness_flags {
# if we should combine this line with the next line to achieve the
# desired vertical tightness. The array of parameters contains:
#
- # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace
+ # [0] type: 1=opening non-block 2=closing non-block
+ # 3=opening block brace 4=closing block brace
+ #
# [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
# if closing: spaces of padding to use
# [2] sequence number of container
# These flags are used by sub set_leading_whitespace in
# the vertical aligner
- my $rvertical_tightness_flags;
+ my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1:
+ # Handle Lines 1 .. n-1 but not the last line
# For non-BLOCK tokens, we will need to examine the next line
# too, so we won't consider the last line.
+ #--------------------------------------------------------------
if ( $n < $n_last_line ) {
- # see if last token is an opening token...not a BLOCK...
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1a:
+ # Look for Type 1, last token of this line is a non-block opening token
+ #--------------------------------------------------------------
my $ibeg_next = $$ri_first[ $n + 1 ];
my $token_end = $tokens_to_go[$iend];
my $iend_next = $$ri_last[ $n + 1 ];
}
}
- # see if first token of next line is a closing token...
- # ..and be sure this line does not have a side comment
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1b:
+ # Look for Type 2, first token of next line is a non-block closing
+ # token .. and be sure this line does not have a side comment
+ #--------------------------------------------------------------
my $token_next = $tokens_to_go[$ibeg_next];
if ( $type_sequence_to_go[$ibeg_next]
&& !$block_type_to_go[$ibeg_next]
}
}
}
+
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1c:
+ # Implement the Opening Token Right flag (Type 2)..
+ # If requested, move an isolated trailing opening token to the end of
+ # the previous line which ended in a comma. We could do this
+ # in sub recombine_breakpoints but that would cause problems
+ # with -lp formatting. The problem is that indentation will
+ # quickly move far to the right in nested expressions. By
+ # doing it after indentation has been set, we avoid changes
+ # to the indentation. Actual movement of the token takes place
+ # in sub valign_output_step_B.
+ #--------------------------------------------------------------
+ if (
+ $opening_token_right{ $tokens_to_go[$ibeg_next] }
+
+ # previous line is not opening
+ # (use -sot to combine with it)
+ && !$is_opening_token{$token_end}
+
+ # previous line ended in one of these
+ # (add other cases if necessary; '=>' and '.' are not necessary
+ && !$block_type_to_go[$ibeg_next]
+
+ # this is a line with just an opening token
+ && ( $iend_next == $ibeg_next
+ || $iend_next == $ibeg_next + 2
+ && $types_to_go[$iend_next] eq '#' )
+
+ # looks bad if we align vertically with the wrong container
+ && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
+ )
+ {
+ my $valid_flag = 1;
+ my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
+ @{$rvertical_tightness_flags} =
+ ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
+ }
+
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1d:
+ # Stacking of opening and closing tokens (Type 2)
+ #--------------------------------------------------------------
+ my $stackable;
+ my $token_beg_next = $tokens_to_go[$ibeg_next];
+
+ # patch to make something like 'qw(' behave like an opening paren
+ # (aran.t)
+ if ( $types_to_go[$ibeg_next] eq 'q' ) {
+ if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
+ $token_beg_next = $1;
+ }
+ }
+
+ if ( $is_closing_token{$token_end}
+ && $is_closing_token{$token_beg_next} )
+ {
+ $stackable = $stack_closing_token{$token_beg_next}
+ unless ( $block_type_to_go[$ibeg_next] )
+ ; # shouldn't happen; just checking
+ }
+ elsif ($is_opening_token{$token_end}
+ && $is_opening_token{$token_beg_next} )
+ {
+ $stackable = $stack_opening_token{$token_beg_next}
+ unless ( $block_type_to_go[$ibeg_next] )
+ ; # shouldn't happen; just checking
+ }
+
+ if ($stackable) {
+
+ my $is_semicolon_terminated;
+ if ( $n + 1 == $n_last_line ) {
+ my ( $terminal_type, $i_terminal ) = terminal_type(
+ \@types_to_go, \@block_type_to_go,
+ $ibeg_next, $iend_next
+ );
+ $is_semicolon_terminated = $terminal_type eq ';'
+ && $nesting_depth_to_go[$iend_next] <
+ $nesting_depth_to_go[$ibeg_next];
+ }
+
+ # this must be a line with just an opening token
+ # or end in a semicolon
+ if (
+ $is_semicolon_terminated
+ || ( $iend_next == $ibeg_next
+ || $iend_next == $ibeg_next + 2
+ && $types_to_go[$iend_next] eq '#' )
+ )
+ {
+ my $valid_flag = 1;
+ my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
+ @{$rvertical_tightness_flags} =
+ ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
+ );
+ }
+ }
}
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 2:
+ # Handle type 3, opening block braces on last line of the batch
# Check for a last line with isolated opening BLOCK curly
+ #--------------------------------------------------------------
elsif ($rOpts_block_brace_vertical_tightness
- && $ibeg eq $iend
+ && $ibeg eq $iend
&& $types_to_go[$iend] eq '{'
&& $block_type_to_go[$iend] =~
/$block_brace_vertical_tightness_pattern/o )
( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
}
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 3:
+ # Handle type 4, a closing block brace on the last line of the batch Check
+ # for a last line with isolated closing BLOCK curly
+ #--------------------------------------------------------------
+ elsif ($rOpts_stack_closing_block_brace
+ && $ibeg eq $iend
+ && $block_type_to_go[$iend]
+ && $types_to_go[$iend] eq '}' )
+ {
+ my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
+ @{$rvertical_tightness_flags} =
+ ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
+ }
+
+ # pack in the sequence numbers of the ends of this line
+ $rvertical_tightness_flags->[4] = get_seqno($ibeg);
+ $rvertical_tightness_flags->[5] = get_seqno($iend);
return $rvertical_tightness_flags;
}
+sub get_seqno {
+
+ # get opening and closing sequence numbers of a token for the vertical
+ # aligner. Assign qw quotes a value to allow qw opening and closing tokens
+ # to be treated somewhat like opening and closing tokens for stacking
+ # tokens by the vertical aligner.
+ my ($ii) = @_;
+ my $seqno = $type_sequence_to_go[$ii];
+ if ( $types_to_go[$ii] eq 'q' ) {
+ my $SEQ_QW = -1;
+ if ( $ii > 0 ) {
+ $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
+ }
+ else {
+ if ( !$ending_in_quote ) {
+ $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
+ }
+ }
+ }
+ return ($seqno);
+}
+
{
my %is_vertical_alignment_type;
my %is_vertical_alignment_keyword;
+ my %is_terminal_alignment_type;
BEGIN {
+ # Removed =~ from list to improve chances of alignment
@_ = qw#
- = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=
- { ? : => =~ && ||
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+ { ? : => && || // ~~ !~~
#;
@is_vertical_alignment_type{@_} = (1) x scalar(@_);
- @_ = qw(if unless and or eq ne for foreach while until);
+ # only align these at end of line
+ @_ = qw(&& ||);
+ @is_terminal_alignment_type{@_} = (1) x scalar(@_);
+
+ # eq and ne were removed from this list to improve alignment chances
+ @_ = qw(if unless and or err for foreach while until);
@is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
}
sub set_vertical_alignment_markers {
- # Look at the tokens in this output batch and define the array
- # 'matching_token_to_go' which marks tokens at which we would
+ # This routine takes the first step toward vertical alignment of the
+ # lines of output text. It looks for certain tokens which can serve as
+ # vertical alignment markers (such as an '=').
+ #
+ # Method: We look at each token $i in this output batch and set
+ # $matching_token_to_go[$i] equal to those tokens at which we would
# accept vertical alignment.
# nothing to do if we aren't allowed to change whitespace
my ( $ri_first, $ri_last ) = @_;
+ # remember the index of last nonblank token before any sidecomment
+ my $i_terminal = $max_index_to_go;
+ if ( $types_to_go[$i_terminal] eq '#' ) {
+ if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
+ if ( $i_terminal > 0 ) { --$i_terminal }
+ }
+ }
+
# look at each line of this batch..
my $last_vertical_alignment_before_index;
my $vert_last_nonblank_type;
my $max_line = @$ri_first - 1;
my ( $i, $type, $token, $block_type, $alignment_type );
my ( $ibeg, $iend, $line );
+
foreach $line ( 0 .. $max_line ) {
$ibeg = $$ri_first[$line];
$iend = $$ri_last[$line];
# align before the first token and 2) the second
# token must be a blank if we are to align before
# the third
- if ( $i < $ibeg + 2 ) {
- }
+ if ( $i < $ibeg + 2 ) { }
# must follow a blank token
- elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
- }
+ elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
# align a side comment --
elsif ( $type eq '#' ) {
# otherwise, do not align two in a row to create a
# blank field
- elsif ( $last_vertical_alignment_before_index == $i - 2 ) {
- }
+ elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
# align before one of these keywords
# (within a line, since $i>1)
elsif ( $is_vertical_alignment_type{$type} ) {
$alignment_type = $token;
+ # Do not align a terminal token. Although it might
+ # occasionally look ok to do this, this has been found to be
+ # a good general rule. The main problems are:
+ # (1) that the terminal token (such as an = or :) might get
+ # moved far to the right where it is hard to see because
+ # nothing follows it, and
+ # (2) doing so may prevent other good alignments.
+ # Current exceptions are && and ||
+ if ( $i == $iend || $i >= $i_terminal ) {
+ $alignment_type = ""
+ unless ( $is_terminal_alignment_type{$type} );
+ }
+
+ # Do not align leading ': (' or '. ('. This would prevent
+ # alignment in something like the following:
+ # $extra_space .=
+ # ( $input_line_number < 10 ) ? " "
+ # : ( $input_line_number < 100 ) ? " "
+ # : "";
+ # or
+ # $code =
+ # ( $case_matters ? $accessor : " lc($accessor) " )
+ # . ( $yesno ? " eq " : " ne " )
+ if ( $i == $ibeg + 2
+ && $types_to_go[$ibeg] =~ /^[\.\:]$/
+ && $types_to_go[ $i - 1 ] eq 'b' )
+ {
+ $alignment_type = "";
+ }
+
# For a paren after keyword, only align something like this:
# if ( $a ) { &a }
# elsif ( $b ) { &b }
# if ($token ne $type) {$alignment_type .= $type}
}
- # NOTE: This is deactivated until the new vertical aligner
- # is finished because it causes the previous if/elsif alignment
- # to fail
- #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) {
- # $alignment_type = $type;
- #}
+ # NOTE: This is deactivated because it causes the previous
+ # if/elsif alignment to fail
+ #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
+ #{ $alignment_type = $type; }
if ($alignment_type) {
$last_vertical_alignment_before_index = $i;
}
else {
- # start at end and walk bakwards..
+ # start at end and walk backwards..
for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
# skip past any side comment and blanks
}
}
-{
+{ # set_bond_strengths
+
my %is_good_keyword_breakpoint;
my %is_lt_gt_le_ge;
+ my %binary_bond_strength;
+ my %nobreak_lhs;
+ my %nobreak_rhs;
+
+ my @bias_tokens;
+ my $delta_bias;
+
+ sub bias_table_key {
+ my ( $type, $token ) = @_;
+ my $bias_table_key = $type;
+ if ( $type eq 'k' ) {
+ $bias_table_key = $token;
+ if ( $token eq 'err' ) { $bias_table_key = 'or' }
+ }
+ return $bias_table_key;
+ }
+
sub set_bond_strengths {
BEGIN {
@_ = qw(lt gt le ge);
@is_lt_gt_le_ge{@_} = (1) x scalar(@_);
+ #
+ # The decision about where to break a line depends upon a "bond
+ # strength" between tokens. The LOWER the bond strength, the MORE
+ # likely a break. A bond strength may be any value but to simplify
+ # things there are several pre-defined strength levels:
+
+ # NO_BREAK => 10000;
+ # VERY_STRONG => 100;
+ # STRONG => 2.1;
+ # NOMINAL => 1.1;
+ # WEAK => 0.8;
+ # VERY_WEAK => 0.55;
+
+ # The strength values are based on trial-and-error, and need to be
+ # tweaked occasionally to get desired results. Some comments:
+ #
+ # 1. Only relative strengths are important. small differences
+ # in strengths can make big formatting differences.
+ # 2. Each indentation level adds one unit of bond strength.
+ # 3. A value of NO_BREAK makes an unbreakable bond
+ # 4. A value of VERY_WEAK is the strength of a ','
+ # 5. Values below NOMINAL are considered ok break points.
+ # 6. Values above NOMINAL are considered poor break points.
+ #
+ # The bond strengths should roughly follow precedence order where
+ # possible. If you make changes, please check the results very
+ # carefully on a variety of scripts. Testing with the -extrude
+ # options is particularly helpful in exercising all of the rules.
- ###############################################################
- # NOTE: NO_BREAK's set here are HINTS which may not be honored;
- # essential NO_BREAKS's must be enforced in section 2, below.
- ###############################################################
+ # Wherever possible, bond strengths are defined in the following
+ # tables. There are two main stages to setting bond strengths and
+ # two types of tables:
+ #
+ # The first stage involves looking at each token individually and
+ # defining left and right bond strengths, according to if we want
+ # to break to the left or right side, and how good a break point it
+ # is. For example tokens like =, ||, && make good break points and
+ # will have low strengths, but one might want to break on either
+ # side to put them at the end of one line or beginning of the next.
+ #
+ # The second stage involves looking at certain pairs of tokens and
+ # defining a bond strength for that particular pair. This second
+ # stage has priority.
+
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 1.
+ # Set left and right bond strengths of individual tokens.
+ #---------------------------------------------------------------
- # adding NEW_TOKENS: add a left and right bond strength by
- # mimmicking what is done for an existing token type. You
- # can skip this step at first and take the default, then
- # tweak later to get desired results.
+ # NOTE: NO_BREAK's set in this section first are HINTS which will
+ # probably not be honored. Essential NO_BREAKS's should be set in
+ # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
+ # of this subroutine.
- # The bond strengths should roughly follow precenence order where
- # possible. If you make changes, please check the results very
- # carefully on a variety of scripts.
+ # Note that we are setting defaults in this section. The user
+ # cannot change bond strengths but can cause the left and right
+ # bond strengths of any token type to be swapped through the use of
+ # the -wba and -wbb flags. In this way the user can determine if a
+ # breakpoint token should appear at the end of one line or the
+ # beginning of the next line.
+
+ # The hash keys in this section are token types, plus the text of
+ # certain keywords like 'or', 'and'.
# no break around possible filehandle
$left_bond_strength{'Z'} = NO_BREAK;
# example print (STDERR, "bla"); will fail with break after (
$left_bond_strength{'w'} = NO_BREAK;
- # blanks always have infinite strength to force breaks after real tokens
+ # blanks always have infinite strength to force breaks after
+ # real tokens
$right_bond_strength{'b'} = NO_BREAK;
# try not to break on exponentation
$left_bond_strength{'->'} = STRONG;
$right_bond_strength{'->'} = VERY_STRONG;
- # breaking AFTER these is just ok:
- @_ = qw" % + - * / x ";
+ $left_bond_strength{'CORE::'} = NOMINAL;
+ $right_bond_strength{'CORE::'} = NO_BREAK;
+
+ # breaking AFTER modulus operator is ok:
+ @_ = qw" % ";
+ @left_bond_strength{@_} = (STRONG) x scalar(@_);
+ @right_bond_strength{@_} =
+ ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
+
+ # Break AFTER math operators * and /
+ @_ = qw" * / x ";
@left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} = (NOMINAL) x scalar(@_);
+ # Break AFTER weakest math operators + and -
+ # Make them weaker than * but a bit stronger than '.'
+ @_ = qw" + - ";
+ @left_bond_strength{@_} = (STRONG) x scalar(@_);
+ @right_bond_strength{@_} =
+ ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
+
# breaking BEFORE these is just ok:
@_ = qw" >> << ";
@right_bond_strength{@_} = (STRONG) x scalar(@_);
@left_bond_strength{@_} = (NOMINAL) x scalar(@_);
- # I prefer breaking before the string concatenation operator
+ # breaking before the string concatenation operator seems best
# because it can be hard to see at the end of a line
- # swap these to break after a '.'
- # this could be a future option
$right_bond_strength{'.'} = STRONG;
$left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
- @_ = qw"} ] ) ";
+ @_ = qw"} ] ) R";
@left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} = (NOMINAL) x scalar(@_);
# make these a little weaker than nominal so that they get
# favored for end-of-line characters
- @_ = qw"!= == =~ !~";
- @left_bond_strength{@_} = (STRONG) x scalar(@_);
+ @_ = qw"!= == =~ !~ ~~ !~~";
+ @left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} =
( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
# break AFTER these
- @_ = qw" < > | & >= <=";
- @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
+ @_ = qw" < > | & >= <=";
+ @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
@right_bond_strength{@_} =
( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
$left_bond_strength{'G'} = NOMINAL;
$right_bond_strength{'G'} = STRONG;
- # it is very good to break AFTER various assignment operators
+ # assignment operators
@_ = qw(
= **= += *= &= <<= &&=
- -= /= |= >>= ||=
+ -= /= |= >>= ||= //=
.= %= ^=
x=
);
- @left_bond_strength{@_} = (STRONG) x scalar(@_);
+
+ # Default is to break AFTER various assignment operators
+ @left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} =
( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
- # break BEFORE '&&' and '||'
+ # Default is to break BEFORE '&&' and '||' and '//'
# set strength of '||' to same as '=' so that chains like
# $a = $b || $c || $d will break before the first '||'
$right_bond_strength{'||'} = NOMINAL;
$left_bond_strength{'||'} = $right_bond_strength{'='};
+ # same thing for '//'
+ $right_bond_strength{'//'} = NOMINAL;
+ $left_bond_strength{'//'} = $right_bond_strength{'='};
+
# set strength of && a little higher than ||
$right_bond_strength{'&&'} = NOMINAL;
$left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
$left_bond_strength{','} = VERY_STRONG;
$right_bond_strength{','} = VERY_WEAK;
+ # remaining digraphs and trigraphs not defined above
+ @_ = qw( :: <> ++ --);
+ @left_bond_strength{@_} = (WEAK) x scalar(@_);
+ @right_bond_strength{@_} = (STRONG) x scalar(@_);
+
# Set bond strengths of certain keywords
- # make 'or', 'and' slightly weaker than a ','
+ # make 'or', 'err', 'and' slightly weaker than a ','
$left_bond_strength{'and'} = VERY_WEAK - 0.01;
$left_bond_strength{'or'} = VERY_WEAK - 0.02;
+ $left_bond_strength{'err'} = VERY_WEAK - 0.02;
$left_bond_strength{'xor'} = NOMINAL;
$right_bond_strength{'and'} = NOMINAL;
$right_bond_strength{'or'} = NOMINAL;
+ $right_bond_strength{'err'} = NOMINAL;
$right_bond_strength{'xor'} = STRONG;
- }
-
- # patch-its always ok to break at end of line
- $nobreak_to_go[$max_index_to_go] = 0;
- # adding a small 'bias' to strengths is a simple way to make a line
- # break at the first of a sequence of identical terms. For example,
- # to force long string of conditional operators to break with
- # each line ending in a ':', we can add a small number to the bond
- # strength of each ':'
- my $colon_bias = 0;
- my $amp_bias = 0;
- my $bar_bias = 0;
- my $and_bias = 0;
- my $or_bias = 0;
- my $dot_bias = 0;
- my $f_bias = 0;
- my $code_bias = -.01;
- my $type = 'b';
- my $token = ' ';
- my $last_type;
- my $last_nonblank_type = $type;
- my $last_nonblank_token = $token;
- my $delta_bias = 0.0001;
- my $list_str = $left_bond_strength{'?'};
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 2.
+ # Set binary rules for bond strengths between certain token types.
+ #---------------------------------------------------------------
- my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
- $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
- );
+ # We have a little problem making tables which apply to the
+ # container tokens. Here is a list of container tokens and
+ # their types:
+ #
+ # type tokens // meaning
+ # { {, [, ( // indent
+ # } }, ], ) // outdent
+ # [ [ // left non-structural [ (enclosing an array index)
+ # ] ] // right non-structural square bracket
+ # ( ( // left non-structural paren
+ # ) ) // right non-structural paren
+ # L { // left non-structural curly brace (enclosing a key)
+ # R } // right non-structural curly brace
+ #
+ # Some rules apply to token types and some to just the token
+ # itself. We solve the problem by combining type and token into a
+ # new hash key for the container types.
+ #
+ # If a rule applies to a token 'type' then we need to make rules
+ # for each of these 'type.token' combinations:
+ # Type Type.Token
+ # { {{, {[, {(
+ # [ [[
+ # ( ((
+ # L L{
+ # } }}, }], })
+ # ] ]]
+ # ) ))
+ # R R}
+ #
+ # If a rule applies to a token then we need to make rules for
+ # these 'type.token' combinations:
+ # Token Type.Token
+ # { {{, L{
+ # [ {[, [[
+ # ( {(, ((
+ # } }}, R}
+ # ] }], ]]
+ # ) }), ))
- # preliminary loop to compute bond strengths
- for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
- $last_type = $type;
- if ( $type ne 'b' ) {
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- }
- $type = $types_to_go[$i];
+ # allow long lines before final { in an if statement, as in:
+ # if (..........
+ # ..........)
+ # {
+ #
+ # Otherwise, the line before the { tends to be too short.
- # strength on both sides of a blank is the same
- if ( $type eq 'b' && $last_type ne 'b' ) {
- $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
- next;
- }
+ $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
+ $binary_bond_strength{'(('}{'{{'} = NOMINAL;
- $token = $tokens_to_go[$i];
- $block_type = $block_type_to_go[$i];
- $i_next = $i + 1;
- $next_type = $types_to_go[$i_next];
- $next_token = $tokens_to_go[$i_next];
- $total_nesting_depth = $nesting_depth_to_go[$i_next];
- $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ # break on something like '} (', but keep this stronger than a ','
+ # example is in 'howe.pl'
+ $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
+ $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
- # Some token chemistry... The decision about where to break a
- # line depends upon a "bond strength" between tokens. The LOWER
- # the bond strength, the MORE likely a break. The strength
- # values are based on trial-and-error, and need to be tweaked
- # occasionally to get desired results. Things to keep in mind
- # are:
- # 1. relative strengths are important. small differences
- # in strengths can make big formatting differences.
- # 2. each indentation level adds one unit of bond strength
- # 3. a value of NO_BREAK makes an unbreakable bond
- # 4. a value of VERY_WEAK is the strength of a ','
- # 5. values below NOMINAL are considered ok break points
- # 6. values above NOMINAL are considered poor break points
- # We are computing the strength of the bond between the current
- # token and the NEXT token.
- my $bond_str = VERY_STRONG; # a default, high strength
+ # keep matrix and hash indices together
+ # but make them a little below STRONG to allow breaking open
+ # something like {'some-word'}{'some-very-long-word'} at the }{
+ # (bracebrk.t)
+ $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
+
+ # increase strength to the point where a break in the following
+ # will be after the opening paren rather than at the arrow:
+ # $a->$b($c);
+ $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
+
+ $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+
+ $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
#---------------------------------------------------------------
- # section 1:
- # use minimum of left and right bond strengths if defined;
- # digraphs and trigraphs like to break on their left
+ # Binary NO_BREAK rules
#---------------------------------------------------------------
- my $bsr = $right_bond_strength{$type};
- if ( !defined($bsr) ) {
+ # use strict requires that bare word and => not be separated
+ $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
+ $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
- if ( $is_digraph{$type} || $is_trigraph{$type} ) {
- $bsr = STRONG;
- }
- else {
- $bsr = VERY_STRONG;
- }
- }
+ # Never break between a bareword and a following paren because
+ # perl may give an error. For example, if a break is placed
+ # between 'to_filehandle' and its '(' the following line will
+ # give a syntax error [Carp.pm]: my( $no) =fileno(
+ # to_filehandle( $in)) ;
+ $binary_bond_strength{'C'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'C'}{'{('} = NO_BREAK;
+ $binary_bond_strength{'U'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'U'}{'{('} = NO_BREAK;
- # define right bond strengths of certain keywords
- if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
- $bsr = $right_bond_strength{$token};
- }
- elsif ( $token eq 'ne' or $token eq 'eq' ) {
- $bsr = NOMINAL;
- }
- my $bsl = $left_bond_strength{$next_nonblank_type};
+ # use strict requires that bare word within braces not start new
+ # line
+ $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
- # set terminal bond strength to the nominal value
- # this will cause good preceding breaks to be retained
- if ( $i_next_nonblank > $max_index_to_go ) {
- $bsl = NOMINAL;
- }
+ $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
- if ( !defined($bsl) ) {
+ # use strict requires that bare word and => not be separated
+ $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
- if ( $is_digraph{$next_nonblank_type}
- || $is_trigraph{$next_nonblank_type} )
- {
- $bsl = WEAK;
- }
- else {
- $bsl = VERY_STRONG;
- }
- }
+ # use strict does not allow separating type info from trailing { }
+ # testfile is readmail.pl
+ $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
+ $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
- # define right bond strengths of certain keywords
- if ( $next_nonblank_type eq 'k'
- && defined( $left_bond_strength{$next_nonblank_token} ) )
- {
- $bsl = $left_bond_strength{$next_nonblank_token};
- }
- elsif ($next_nonblank_token eq 'ne'
- or $next_nonblank_token eq 'eq' )
- {
- $bsl = NOMINAL;
- }
- elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
- $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
- }
+ # As a defensive measure, do not break between a '(' and a
+ # filehandle. In some cases, this can cause an error. For
+ # example, the following program works:
+ # my $msg="hi!\n";
+ # print
+ # ( STDOUT
+ # $msg
+ # );
+ #
+ # But this program fails:
+ # my $msg="hi!\n";
+ # print
+ # (
+ # STDOUT
+ # $msg
+ # );
+ #
+ # This is normally only a problem with the 'extrude' option
+ $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
+ $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
- # Note: it might seem that we would want to keep a NO_BREAK if
- # either token has this value. This didn't work, because in an
- # arrow list, it prevents the comma from separating from the
- # following bare word (which is probably quoted by its arrow).
- # So necessary NO_BREAK's have to be handled as special cases
- # in the final section.
- $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
- my $bond_str_1 = $bond_str;
+ # never break between sub name and opening paren
+ $binary_bond_strength{'w'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'w'}{'{('} = NO_BREAK;
+
+ # keep '}' together with ';'
+ $binary_bond_strength{'}}'}{';'} = NO_BREAK;
+
+ # Breaking before a ++ can cause perl to guess wrong. For
+ # example the following line will cause a syntax error
+ # with -extrude if we break between '$i' and '++' [fixstyle2]
+ # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
+ $nobreak_lhs{'++'} = NO_BREAK;
+
+ # Do not break before a possible file handle
+ $nobreak_lhs{'Z'} = NO_BREAK;
+
+ # use strict hates bare words on any new line. For
+ # example, a break before the underscore here provokes the
+ # wrath of use strict:
+ # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
+ $nobreak_rhs{'F'} = NO_BREAK;
+ $nobreak_rhs{'CORE::'} = NO_BREAK;
#---------------------------------------------------------------
- # section 2:
- # special cases
+ # Bond Strength BEGIN Section 3.
+ # Define tables and values for applying a small bias to the above
+ # values.
#---------------------------------------------------------------
+ # Adding a small 'bias' to strengths is a simple way to make a line
+ # break at the first of a sequence of identical terms. For
+ # example, to force long string of conditional operators to break
+ # with each line ending in a ':', we can add a small number to the
+ # bond strength of each ':' (colon.t)
+ @bias_tokens = qw( : && || f and or . ); # tokens which get bias
+ $delta_bias = 0.0001; # a very small strength level
- # allow long lines before final { in an if statement, as in:
- # if (..........
- # ..........)
- # {
- #
- # Otherwise, the line before the { tends to be too short.
- if ( $type eq ')' ) {
- if ( $next_nonblank_type eq '{' ) {
- $bond_str = VERY_WEAK + 0.03;
- }
- }
+ } ## end BEGIN
- elsif ( $type eq '(' ) {
- if ( $next_nonblank_type eq '{' ) {
- $bond_str = NOMINAL;
- }
- }
+ # patch-its always ok to break at end of line
+ $nobreak_to_go[$max_index_to_go] = 0;
- # break on something like '} (', but keep this stronger than a ','
- # example is in 'howe.pl'
- elsif ( $type eq 'R' or $type eq '}' ) {
- if ( $next_nonblank_type eq '(' ) {
- $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
- }
- }
+ # we start a new set of bias values for each line
+ my %bias;
+ @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
+ my $code_bias = -.01; # bias for closing block braces
- #-----------------------------------------------------------------
- # adjust bond strength bias
- #-----------------------------------------------------------------
+ my $type = 'b';
+ my $token = ' ';
+ my $last_type;
+ my $last_nonblank_type = $type;
+ my $last_nonblank_token = $token;
+ my $list_str = $left_bond_strength{'?'};
- elsif ( $type eq 'f' ) {
- $bond_str += $f_bias;
- $f_bias += $delta_bias;
- }
+ my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
+ $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
+ );
- # in long ?: conditionals, bias toward just one set per line (colon.t)
- elsif ( $type eq ':' ) {
- if ( !$want_break_before{$type} ) {
- $bond_str += $colon_bias;
- $colon_bias += $delta_bias;
- }
+ # main loop to compute bond strengths between each pair of tokens
+ for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
+ $last_type = $type;
+ if ( $type ne 'b' ) {
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
}
+ $type = $types_to_go[$i];
- if ( $next_nonblank_type eq ':'
- && $want_break_before{$next_nonblank_type} )
- {
- $bond_str += $colon_bias;
- $colon_bias += $delta_bias;
+ # strength on both sides of a blank is the same
+ if ( $type eq 'b' && $last_type ne 'b' ) {
+ $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
+ next;
}
- # if leading '.' is used, align all but 'short' quotes;
- # the idea is to not place something like "\n" on a single line.
- elsif ( $next_nonblank_type eq '.' ) {
- if ( $want_break_before{'.'} ) {
- unless (
- $last_nonblank_type eq '.'
- && (
- length($token) <=
- $rOpts_short_concatenation_item_length )
- && ( $token !~ /^[\)\]\}]$/ )
- )
- {
- $dot_bias += $delta_bias;
- }
- $bond_str += $dot_bias;
- }
- }
- elsif ($next_nonblank_type eq '&&'
- && $want_break_before{$next_nonblank_type} )
- {
- $bond_str += $amp_bias;
- $amp_bias += $delta_bias;
- }
- elsif ($next_nonblank_type eq '||'
- && $want_break_before{$next_nonblank_type} )
- {
- $bond_str += $bar_bias;
- $bar_bias += $delta_bias;
- }
- elsif ( $next_nonblank_type eq 'k' ) {
+ $token = $tokens_to_go[$i];
+ $block_type = $block_type_to_go[$i];
+ $i_next = $i + 1;
+ $next_type = $types_to_go[$i_next];
+ $next_token = $tokens_to_go[$i_next];
+ $total_nesting_depth = $nesting_depth_to_go[$i_next];
+ $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
+ $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- if ( $next_nonblank_token eq 'and'
- && $want_break_before{$next_nonblank_token} )
- {
- $bond_str += $and_bias;
- $and_bias += $delta_bias;
- }
- elsif ($next_nonblank_token eq 'or'
- && $want_break_before{$next_nonblank_token} )
- {
- $bond_str += $or_bias;
- $or_bias += $delta_bias;
- }
+ # We are computing the strength of the bond between the current
+ # token and the NEXT token.
- # FIXME: needs more testing
- elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
- $bond_str = $list_str if ( $bond_str > $list_str );
- }
- }
+ #---------------------------------------------------------------
+ # Bond Strength Section 1:
+ # First Approximation.
+ # Use minimum of individual left and right tabulated bond
+ # strengths.
+ #---------------------------------------------------------------
+ my $bsr = $right_bond_strength{$type};
+ my $bsl = $left_bond_strength{$next_nonblank_type};
- if ( $type eq ':'
- && !$want_break_before{$type} )
- {
- $bond_str += $colon_bias;
- $colon_bias += $delta_bias;
- }
- elsif ( $type eq '&&'
- && !$want_break_before{$type} )
- {
- $bond_str += $amp_bias;
- $amp_bias += $delta_bias;
+ # define right bond strengths of certain keywords
+ if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
+ $bsr = $right_bond_strength{$token};
}
- elsif ( $type eq '||'
- && !$want_break_before{$type} )
- {
- $bond_str += $bar_bias;
- $bar_bias += $delta_bias;
+ elsif ( $token eq 'ne' or $token eq 'eq' ) {
+ $bsr = NOMINAL;
}
- elsif ( $type eq 'k' ) {
- if ( $token eq 'and'
- && !$want_break_before{$token} )
- {
- $bond_str += $and_bias;
- $and_bias += $delta_bias;
- }
- elsif ( $token eq 'or'
- && !$want_break_before{$token} )
- {
- $bond_str += $or_bias;
- $or_bias += $delta_bias;
- }
+ # set terminal bond strength to the nominal value
+ # this will cause good preceding breaks to be retained
+ if ( $i_next_nonblank > $max_index_to_go ) {
+ $bsl = NOMINAL;
}
- # keep matrix and hash indices together
- # but make them a little below STRONG to allow breaking open
- # something like {'some-word'}{'some-very-long-word'} at the }{
- # (bracebrk.t)
- if ( ( $type eq ']' or $type eq 'R' )
- && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
- )
+ # define right bond strengths of certain keywords
+ if ( $next_nonblank_type eq 'k'
+ && defined( $left_bond_strength{$next_nonblank_token} ) )
{
- $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
+ $bsl = $left_bond_strength{$next_nonblank_token};
}
-
- if ( $next_nonblank_token =~ /^->/ ) {
-
- # increase strength to the point where a break in the following
- # will be after the opening paren rather than at the arrow:
- # $a->$b($c);
- if ( $type eq 'i' ) {
- $bond_str = 1.45 * STRONG;
- }
-
- elsif ( $type =~ /^[\)\]\}R]$/ ) {
- $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
- }
-
- # otherwise make strength before an '->' a little over a '+'
- else {
- if ( $bond_str <= NOMINAL ) {
- $bond_str = NOMINAL + 0.01;
- }
- }
+ elsif ($next_nonblank_token eq 'ne'
+ or $next_nonblank_token eq 'eq' )
+ {
+ $bsl = NOMINAL;
}
-
- if ( $token eq ')' && $next_nonblank_token eq '[' ) {
- $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
+ elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
+ $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
}
- # map1.t -- correct for a quirk in perl
- if ( $token eq '('
- && $next_nonblank_type eq 'i'
- && $last_nonblank_type eq 'k'
- && $is_sort_map_grep{$last_nonblank_token} )
+ # Use the minimum of the left and right strengths. Note: it might
+ # seem that we would want to keep a NO_BREAK if either token has
+ # this value. This didn't work, for example because in an arrow
+ # list, it prevents the comma from separating from the following
+ # bare word (which is probably quoted by its arrow). So necessary
+ # NO_BREAK's have to be handled as special cases in the final
+ # section.
+ if ( !defined($bsr) ) { $bsr = VERY_STRONG }
+ if ( !defined($bsl) ) { $bsl = VERY_STRONG }
+ my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
+ my $bond_str_1 = $bond_str;
- # /^(sort|map|grep)$/ )
- {
- $bond_str = NO_BREAK;
- }
+ #---------------------------------------------------------------
+ # Bond Strength Section 2:
+ # Apply hardwired rules..
+ #---------------------------------------------------------------
- # extrude.t: do not break before paren at:
- # -l pid_filename(
- if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
- $bond_str = NO_BREAK;
+ # Patch to put terminal or clauses on a new line: Weaken the bond
+ # at an || followed by die or similar keyword to make the terminal
+ # or clause fall on a new line, like this:
+ #
+ # my $class = shift
+ # || die "Cannot add broadcast: No class identifier found";
+ #
+ # Otherwise the break will be at the previous '=' since the || and
+ # = have the same starting strength and the or is biased, like
+ # this:
+ #
+ # my $class =
+ # shift || die "Cannot add broadcast: No class identifier found";
+ #
+ # In any case if the user places a break at either the = or the ||
+ # it should remain there.
+ if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
+ if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
+ if ( $want_break_before{$token} && $i > 0 ) {
+ $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
+ }
+ else {
+ $bond_str -= $delta_bias;
+ }
+ }
}
# good to break after end of code blocks
- if ( $type eq '}' && $block_type ) {
+ if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
$bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
$code_bias += $delta_bias;
$bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
}
-# Don't break after keyword my. This is a quick fix for a
-# rare problem with perl. An example is this line from file
-# Container.pm:
-# foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
+ # Don't break after keyword my. This is a quick fix for a
+ # rare problem with perl. An example is this line from file
+ # Container.pm:
+
+ # foreach my $question( Debian::DebConf::ConfigDb::gettree(
+ # $this->{'question'} ) )
if ( $token eq 'my' ) {
$bond_str = NO_BREAK;
$bond_str = VERY_WEAK;
}
- if ( $next_nonblank_type eq 'k' ) {
+ if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
+
+ # FIXME: needs more testing
+ if ( $is_keyword_returning_list{$next_nonblank_token} ) {
+ $bond_str = $list_str if ( $bond_str > $list_str );
+ }
# keywords like 'unless', 'if', etc, within statements
# make good breaks
if ( $bond_str < STRONG ) { $bond_str = STRONG }
}
- #----------------------------------------------------------------------
- # only set NO_BREAK's from here on
- #----------------------------------------------------------------------
- if ( $type eq 'C' or $type eq 'U' ) {
+ #---------------------------------------------------------------
+ # Additional hardwired NOBREAK rules
+ #---------------------------------------------------------------
- # use strict requires that bare word and => not be separated
- if ( $next_nonblank_type eq '=>' ) {
- $bond_str = NO_BREAK;
- }
+ # map1.t -- correct for a quirk in perl
+ if ( $token eq '('
+ && $next_nonblank_type eq 'i'
+ && $last_nonblank_type eq 'k'
+ && $is_sort_map_grep{$last_nonblank_token} )
+ # /^(sort|map|grep)$/ )
+ {
+ $bond_str = NO_BREAK;
}
- # use strict requires that bare word within braces not start new line
- elsif ( $type eq 'L' ) {
-
- if ( $next_nonblank_type eq 'w' ) {
- $bond_str = NO_BREAK;
- }
+ # extrude.t: do not break before paren at:
+ # -l pid_filename(
+ if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
+ $bond_str = NO_BREAK;
}
# in older version of perl, use strict can cause problems with
# breaks before bare words following opening parens. For example,
# this will fail under older versions if a break is made between
- # '(' and 'MAIL':
- # use strict;
- # open( MAIL, "a long filename or command");
- # close MAIL;
- elsif ( $type eq '{' ) {
+ # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
+ # command"); close MAIL;
+ if ( $type eq '{' ) {
if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
$next_next_type = $types_to_go[$i_next_next_nonblank];
}
- ##if ( $next_next_type ne '=>' ) {
- # these are ok: '->xxx', '=>', '('
-
- # We'll check for an old breakpoint and keep a leading
- # bareword if it was that way in the input file. Presumably
- # it was ok that way. For example, the following would remain
- # unchanged:
- #
- # @months = (
- # January, February, March, April,
- # May, June, July, August,
- # September, October, November, December,
- # );
- #
- # This should be sufficient:
- if ( !$old_breakpoint_to_go[$i]
- && ( $next_next_type eq ',' || $next_next_type eq '}' )
+ # We'll check for an old breakpoint and keep a leading
+ # bareword if it was that way in the input file.
+ # Presumably it was ok that way. For example, the
+ # following would remain unchanged:
+ #
+ # @months = (
+ # January, February, March, April,
+ # May, June, July, August,
+ # September, October, November, December,
+ # );
+ #
+ # This should be sufficient:
+ if (
+ !$old_breakpoint_to_go[$i]
+ && ( $next_next_type eq ','
+ || $next_next_type eq '}' )
)
{
$bond_str = NO_BREAK;
}
}
- elsif ( $type eq 'w' ) {
-
- if ( $next_nonblank_type eq 'R' ) {
- $bond_str = NO_BREAK;
- }
-
- # use strict requires that bare word and => not be separated
- if ( $next_nonblank_type eq '=>' ) {
- $bond_str = NO_BREAK;
- }
- }
-
- # in fact, use strict hates bare words on any new line. For example,
- # a break before the underscore here provokes the wrath of use strict:
- # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
- elsif ( $type eq 'F' ) {
- $bond_str = NO_BREAK;
- }
-
- # use strict does not allow separating type info from trailing { }
- # testfile is readmail.pl
- elsif ( $type eq 't' or $type eq 'i' ) {
-
- if ( $next_nonblank_type eq 'L' ) {
- $bond_str = NO_BREAK;
- }
- }
-
- # Do not break between a possible filehandle and a ? or /
- # and do not introduce a break after it if there is no blank (extrude.t)
+ # Do not break between a possible filehandle and a ? or / and do
+ # not introduce a break after it if there is no blank
+ # (extrude.t)
elsif ( $type eq 'Z' ) {
- # dont break..
+ # don't break..
if (
# if there is no blank and we do not want one. Examples:
}
}
- # Do not break before a possible file handle
- if ( $next_nonblank_type eq 'Z' ) {
- $bond_str = NO_BREAK;
+ # Breaking before a ? before a quote can cause trouble if
+ # they are not separated by a blank.
+ # Example: a syntax error occurs if you break before the ? here
+ # my$logic=join$all?' && ':' || ',@regexps;
+ # From: Professional_Perl_Programming_Code/multifind.pl
+ if ( $next_nonblank_type eq '?' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
}
- # As a defensive measure, do not break between a '(' and a
- # filehandle. In some cases, this can cause an error. For
- # example, the following program works:
- # my $msg="hi!\n";
- # print
- # ( STDOUT
- # $msg
- # );
- #
- # But this program fails:
- # my $msg="hi!\n";
- # print
- # (
- # STDOUT
- # $msg
- # );
- #
- # This is normally only a problem with the 'extrude' option
- if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
- $bond_str = NO_BREAK;
+ # Breaking before a . followed by a number
+ # can cause trouble if there is no intervening space
+ # Example: a syntax error occurs if you break before the .2 here
+ # $str .= pack($endian.2, ensurrogate($ord));
+ # From: perl58/Unicode.pm
+ elsif ( $next_nonblank_type eq '.' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
}
# patch to put cuddled elses back together when on multiple
$bond_str = NO_BREAK;
}
}
+ my $bond_str_2 = $bond_str;
- # keep '}' together with ';'
- if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
- $bond_str = NO_BREAK;
- }
+ #---------------------------------------------------------------
+ # End of hardwired rules
+ #---------------------------------------------------------------
- # never break between sub name and opening paren
- if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
- $bond_str = NO_BREAK;
+ #---------------------------------------------------------------
+ # Bond Strength Section 3:
+ # Apply table rules. These have priority over the above
+ # hardwired rules.
+ #---------------------------------------------------------------
+
+ my $tabulated_bond_str;
+ my $ltype = $type;
+ my $rtype = $next_nonblank_type;
+ if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
+ if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
+ $rtype = $next_nonblank_type . $next_nonblank_token;
+ }
+
+ if ( $binary_bond_strength{$ltype}{$rtype} ) {
+ $bond_str = $binary_bond_strength{$ltype}{$rtype};
+ $tabulated_bond_str = $bond_str;
+ }
+
+ if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
+ $bond_str = NO_BREAK;
+ $tabulated_bond_str = $bond_str;
+ }
+ my $bond_str_3 = $bond_str;
+
+ # If the hardwired rules conflict with the tabulated bond
+ # strength then there is an inconsistency that should be fixed
+ FORMATTER_DEBUG_FLAG_BOND_TABLES
+ && $tabulated_bond_str
+ && $bond_str_1
+ && $bond_str_1 != $bond_str_2
+ && $bond_str_2 != $tabulated_bond_str
+ && do {
+ print STDERR
+"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
+ };
+
+ #-----------------------------------------------------------------
+ # Bond Strength Section 4:
+ # Modify strengths of certain tokens which often occur in sequence
+ # by adding a small bias to each one in turn so that the breaks
+ # occur from left to right.
+ #
+ # Note that we only changing strengths by small amounts here,
+ # and usually increasing, so we should not be altering any NO_BREAKs.
+ # Other routines which check for NO_BREAKs will use a tolerance
+ # of one to avoid any problem.
+ #-----------------------------------------------------------------
+
+ # The bias tables use special keys
+ my $left_key = bias_table_key( $type, $token );
+ my $right_key =
+ bias_table_key( $next_nonblank_type, $next_nonblank_token );
+
+ # add any bias set by sub scan_list at old comma break points.
+ if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
+
+ # bias left token
+ elsif ( defined( $bias{$left_key} ) ) {
+ if ( !$want_break_before{$left_key} ) {
+ $bias{$left_key} += $delta_bias;
+ $bond_str += $bias{$left_key};
+ }
+ }
+
+ # bias right token
+ if ( defined( $bias{$right_key} ) ) {
+ if ( $want_break_before{$right_key} ) {
+
+ # for leading '.' align all but 'short' quotes; the idea
+ # is to not place something like "\n" on a single line.
+ if ( $right_key eq '.' ) {
+ unless (
+ $last_nonblank_type eq '.'
+ && (
+ length($token) <=
+ $rOpts_short_concatenation_item_length )
+ && ( $token !~ /^[\)\]\}]$/ )
+ )
+ {
+ $bias{$right_key} += $delta_bias;
+ }
+ }
+ else {
+ $bias{$right_key} += $delta_bias;
+ }
+ $bond_str += $bias{$right_key};
+ }
}
+ my $bond_str_4 = $bond_str;
#---------------------------------------------------------------
- # section 3:
- # now take nesting depth into account
+ # Bond Strength Section 5:
+ # Fifth Approximation.
+ # Take nesting depth into account by adding the nesting depth
+ # to the bond strength.
#---------------------------------------------------------------
- # final strength incorporates the bond strength and nesting depth
my $strength;
if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
FORMATTER_DEBUG_FLAG_BOND && do {
my $str = substr( $token, 0, 15 );
$str .= ' ' x ( 16 - length($str) );
- print
-"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
+ print STDOUT
+"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
};
- }
- }
-
+ } ## end main loop
+ } ## end sub set_bond_strengths
}
sub pad_array_to_go {
# to simplify coding in scan_list and set_bond_strengths, it helps
# to create some extra blank tokens at the end of the arrays
- $tokens_to_go[ $max_index_to_go + 1 ] = '';
- $tokens_to_go[ $max_index_to_go + 2 ] = '';
- $types_to_go[ $max_index_to_go + 1 ] = 'b';
- $types_to_go[ $max_index_to_go + 2 ] = 'b';
+ $tokens_to_go[ $max_index_to_go + 1 ] = '';
+ $tokens_to_go[ $max_index_to_go + 2 ] = '';
+ $types_to_go[ $max_index_to_go + 1 ] = 'b';
+ $types_to_go[ $max_index_to_go + 2 ] = 'b';
$nesting_depth_to_go[ $max_index_to_go + 1 ] =
$nesting_depth_to_go[$max_index_to_go];
{ # begin scan_list
my (
- $block_type, $current_depth,
- $depth, $i,
- $i_last_nonblank_token, $last_colon_sequence_number,
- $last_nonblank_token, $last_nonblank_type,
- $last_old_breakpoint_count, $minimum_depth,
- $next_nonblank_block_type, $next_nonblank_token,
- $next_nonblank_type, $old_breakpoint_count,
- $starting_breakpoint_count, $starting_depth,
- $token, $type,
- $type_sequence,
+ $block_type, $current_depth,
+ $depth, $i,
+ $i_last_nonblank_token, $last_colon_sequence_number,
+ $last_nonblank_token, $last_nonblank_type,
+ $last_nonblank_block_type, $last_old_breakpoint_count,
+ $minimum_depth, $next_nonblank_block_type,
+ $next_nonblank_token, $next_nonblank_type,
+ $old_breakpoint_count, $starting_breakpoint_count,
+ $starting_depth, $token,
+ $type, $type_sequence,
);
my (
my $dd = shift;
my $bp_count = 0;
my $do_not_break_apart = 0;
- if ( $item_count_stack[$dd] && !$dont_align[$dd] ) {
-
- my $fbc = $forced_breakpoint_count;
-
- # always open comma lists not preceded by keywords,
- # barewords, identifiers (that is, anything that doesn't
- # look like a function call)
- my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
-
- set_comma_breakpoints_do(
- $dd,
- $opening_structure_index_stack[$dd],
- $i,
- $item_count_stack[$dd],
- $identifier_count_stack[$dd],
- $comma_index[$dd],
- $next_nonblank_type,
- $container_type[$dd],
- $interrupted_list[$dd],
- \$do_not_break_apart,
- $must_break_open,
- );
- $bp_count = $forced_breakpoint_count - $fbc;
- $do_not_break_apart = 0 if $must_break_open;
+
+ # anything to do?
+ if ( $item_count_stack[$dd] ) {
+
+ # handle commas not in containers...
+ if ( $dont_align[$dd] ) {
+ do_uncontained_comma_breaks($dd);
+ }
+
+ # handle commas within containers...
+ else {
+ my $fbc = $forced_breakpoint_count;
+
+ # always open comma lists not preceded by keywords,
+ # barewords, identifiers (that is, anything that doesn't
+ # look like a function call)
+ my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
+
+ set_comma_breakpoints_do(
+ $dd,
+ $opening_structure_index_stack[$dd],
+ $i,
+ $item_count_stack[$dd],
+ $identifier_count_stack[$dd],
+ $comma_index[$dd],
+ $next_nonblank_type,
+ $container_type[$dd],
+ $interrupted_list[$dd],
+ \$do_not_break_apart,
+ $must_break_open,
+ );
+ $bp_count = $forced_breakpoint_count - $fbc;
+ $do_not_break_apart = 0 if $must_break_open;
+ }
}
return ( $bp_count, $do_not_break_apart );
}
+ sub do_uncontained_comma_breaks {
+
+ # Handle commas not in containers...
+ # This is a catch-all routine for commas that we
+ # don't know what to do with because the don't fall
+ # within containers. We will bias the bond strength
+ # to break at commas which ended lines in the input
+ # file. This usually works better than just trying
+ # to put as many items on a line as possible. A
+ # downside is that if the input file is garbage it
+ # won't work very well. However, the user can always
+ # prevent following the old breakpoints with the
+ # -iob flag.
+ my $dd = shift;
+ my $bias = -.01;
+ my $old_comma_break_count = 0;
+ foreach my $ii ( @{ $comma_index[$dd] } ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $old_comma_break_count++;
+ $bond_strength_to_go[$ii] = $bias;
+
+ # reduce bias magnitude to force breaks in order
+ $bias *= 0.99;
+ }
+ }
+
+ # Also put a break before the first comma if
+ # (1) there was a break there in the input, and
+ # (2) there was exactly one old break before the first comma break
+ # (3) OLD: there are multiple old comma breaks
+ # (3) NEW: there are one or more old comma breaks (see return example)
+ #
+ # For example, we will follow the user and break after
+ # 'print' in this snippet:
+ # print
+ # "conformability (Not the same dimension)\n",
+ # "\t", $have, " is ", text_unit($hu), "\n",
+ # "\t", $want, " is ", text_unit($wu), "\n",
+ # ;
+ #
+ # Another example, just one comma, where we will break after
+ # the return:
+ # return
+ # $x * cos($a) - $y * sin($a),
+ # $x * sin($a) + $y * cos($a);
+
+ # Breaking a print statement:
+ # print SAVEOUT
+ # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
+ # ( $? & 128 ) ? " -- core dumped" : "", "\n";
+ #
+ # But we will not force a break after the opening paren here
+ # (causes a blinker):
+ # $heap->{stream}->set_output_filter(
+ # poe::filter::reference->new('myotherfreezer') ),
+ # ;
+ #
+ my $i_first_comma = $comma_index[$dd]->[0];
+ if ( $old_breakpoint_to_go[$i_first_comma] ) {
+ my $level_comma = $levels_to_go[$i_first_comma];
+ my $ibreak = -1;
+ my $obp_count = 0;
+ for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $obp_count++;
+ last if ( $obp_count > 1 );
+ $ibreak = $ii
+ if ( $levels_to_go[$ii] == $level_comma );
+ }
+ }
+
+ # Changed rule from multiple old commas to just one here:
+ if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
+ {
+ # Do not to break before an opening token because
+ # it can lead to "blinkers".
+ my $ibreakm = $ibreak;
+ $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
+ if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
+ {
+ set_forced_breakpoint($ibreak);
+ }
+ }
+ }
+ }
+
my %is_logical_container;
BEGIN {
- @_ = qw# if elsif unless while and or not && | || ? : ! #;
+ @_ = qw# if elsif unless while and or err not && | || ? : ! #;
@is_logical_container{@_} = (1) x scalar(@_);
}
$item_count_stack[$dd] == 0
&& $is_logical_container{ $container_type[$dd] }
- # TESTING:
|| $has_old_logical_breakpoints[$dd]
)
{
$last_colon_sequence_number = -1;
$last_nonblank_token = ';';
$last_nonblank_type = ';';
+ $last_nonblank_block_type = ' ';
$last_old_breakpoint_count = 0;
$minimum_depth = $current_depth + 1; # forces update in check below
$old_breakpoint_count = 0;
$type = ';';
$type_sequence = '';
+ my $total_depth_variation = 0;
+ my $i_old_assignment_break;
+ my $depth_last = $starting_depth;
+
check_for_new_minimum_depth($current_depth);
my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
# loop over all tokens in this batch
while ( ++$i <= $max_index_to_go ) {
if ( $type ne 'b' ) {
- $i_last_nonblank_token = $i - 1;
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- }
+ $i_last_nonblank_token = $i - 1;
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+ $last_nonblank_block_type = $block_type;
+ } ## end if ( $type ne 'b' )
$type = $types_to_go[$i];
$block_type = $block_type_to_go[$i];
$token = $tokens_to_go[$i];
# as '}') which forms a one-line block, this break might
# get undone.
$want_previous_breakpoint = $i;
+ } ## end if ( $next_nonblank_type...)
+ } ## end if ($rOpts_break_at_old_keyword_breakpoints)
+
+ # Break before attributes if user broke there
+ if ($rOpts_break_at_old_attribute_breakpoints) {
+ if ( $next_nonblank_type eq 'A' ) {
+ $want_previous_breakpoint = $i;
}
}
- }
+
+ # remember an = break as possible good break point
+ if ( $is_assignment{$type} ) {
+ $i_old_assignment_break = $i;
+ }
+ elsif ( $is_assignment{$next_nonblank_type} ) {
+ $i_old_assignment_break = $i_next_nonblank;
+ }
+ } ## end if ( $old_breakpoint_to_go...)
next if ( $type eq 'b' );
$depth = $nesting_depth_to_go[ $i + 1 ];
+ $total_depth_variation += abs( $depth - $depth_last );
+ $depth_last = $depth;
+
# safety check - be sure we always break after a comment
# Shouldn't happen .. an error here probably means that the
# nobreak flag did not get turned off correctly during
report_definite_bug();
$nobreak_to_go[$i] = 0;
set_forced_breakpoint($i);
- }
- }
+ } ## end if ( $i != $max_index_to_go)
+ } ## end if ( $type eq '#' )
# Force breakpoints at certain tokens in long lines.
# Note that such breakpoints will be undone later if these tokens
# are fully contained within parens on a line.
if (
- $type eq 'k'
+
+ # break before a keyword within a line
+ $type eq 'k'
&& $i > 0
- && $token =~ /^(if|unless)$/
+
+ # if one of these keywords:
+ && $token =~ /^(if|unless|while|until|for)$/
+
+ # but do not break at something like '1 while'
+ && ( $last_nonblank_type ne 'n' || $i > 2 )
+
+ # and let keywords follow a closing 'do' brace
+ && $last_nonblank_block_type ne 'do'
+
&& (
$is_long_line
)
{
set_forced_breakpoint( $i - 1 );
- }
+ } ## end if ( $type eq 'k' && $i...)
# remember locations of '||' and '&&' for possible breaks if we
# decide this is a long logical expression.
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
- }
+ } ## end if ( $type eq '||' )
elsif ( $type eq '&&' ) {
push @{ $rand_or_list[$depth][3] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
- }
+ } ## end elsif ( $type eq '&&' )
elsif ( $type eq 'f' ) {
push @{ $rfor_semicolon_list[$depth] }, $i;
}
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
- }
+ } ## end if ( $token eq 'and' )
# break immediately at 'or's which are probably not in a logical
# block -- but we will break in logical breaks below so that
{
$saw_good_breakpoint = 1;
}
- }
- }
+ } ## end else [ if ( $is_logical_container...)]
+ } ## end elsif ( $token eq 'or' )
elsif ( $token eq 'if' || $token eq 'unless' ) {
push @{ $rand_or_list[$depth][4] }, $i;
if ( ( $i == $i_line_start || $i == $i_line_end )
{
set_forced_breakpoint($i);
}
- }
- }
+ } ## end elsif ( $token eq 'if' ||...)
+ } ## end elsif ( $type eq 'k' )
elsif ( $is_assignment{$type} ) {
$i_equals[$depth] = $i;
}
if ( $type eq ':' ) {
$last_colon_sequence_number = $type_sequence;
- # TESTING: retain break at a ':' line break
+ # retain break at a ':' line break
if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_trinary_breakpoints )
+ && $rOpts_break_at_old_ternary_breakpoints )
{
- # TESTING:
set_forced_breakpoint($i);
# break at previous '='
set_forced_breakpoint( $i_equals[$depth] );
$i_equals[$depth] = -1;
}
- }
- }
+ } ## end if ( ( $i == $i_line_start...))
+ } ## end if ( $type eq ':' )
if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
my $inc = ( $type eq ':' ) ? 0 : 1;
set_forced_breakpoint( $i - $inc );
delete $postponed_breakpoint{$type_sequence};
}
- }
+ } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
# set breaks at ?/: if they will get separated (and are
# not a ?/: chain), or if the '?' is at the end of the
|| $tokens_to_go[$max_index_to_go] eq '#'
);
set_closing_breakpoint($i);
- }
- }
- }
+ } ## end if ( $i_colon <= 0 ||...)
+ } ## end elsif ( $token eq '?' )
+ } ## end if ($type_sequence)
#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
$rfor_semicolon_list[$depth] = [];
$i_equals[$depth] = -1;
$want_comma_break[$depth] = 0;
- $container_type[$depth] =
+ $container_type[$depth] =
( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
? $last_nonblank_token
: "";
# and user wants brace to left
&& !$rOpts->{'opening-brace-always-on-right'}
- && ( $type eq '{' ) # should be true
+ && ( $type eq '{' ) # should be true
&& ( $token eq '{' ) # should be true
)
{
set_forced_breakpoint( $i - 1 );
- }
- }
+ } ## end if ( $block_type && ( ...))
+ } ## end if ( $depth > $current_depth)
#------------------------------------------------------------
# Handle Decreasing Depth..
&& !$rOpts->{'opening-brace-always-on-right'} )
{
set_forced_breakpoint($i);
- }
+ } ## end if ( $token eq ')' && ...
#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
# this term is long if we had to break at interior commas..
my $is_long_term = $bp_count > 0;
- # ..or if the length between opening and closing parens exceeds
- # allowed line length
+ # If this is a short container with one or more comma arrows,
+ # then we will mark it as a long term to open it if requested.
+ # $rOpts_comma_arrow_breakpoints =
+ # 0 - open only if comma precedes closing brace
+ # 1 - stable: except for one line blocks
+ # 2 - try to form 1 line blocks
+ # 3 - ignore =>
+ # 4 - always open up if vt=0
+ # 5 - stable: even for one line blocks if vt=0
+ if ( !$is_long_term
+ && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
+ && $index_before_arrow[ $depth + 1 ] > 0
+ && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
+ )
+ {
+ $is_long_term = $rOpts_comma_arrow_breakpoints == 4
+ || ( $rOpts_comma_arrow_breakpoints == 0
+ && $last_nonblank_token eq ',' )
+ || ( $rOpts_comma_arrow_breakpoints == 5
+ && $old_breakpoint_to_go[$i_opening] );
+ } ## end if ( !$is_long_term &&...)
+
+ # mark term as long if the length between opening and closing
+ # parens exceeds allowed line length
if ( !$is_long_term && $saw_opening_structure ) {
my $i_opening_minus = find_token_starting_list($i_opening);
# semicolon, hence the '>=' here (oneline.t)
$is_long_term =
excess_line_length( $i_opening_minus, $i ) >= 0;
- }
+ } ## end if ( !$is_long_term &&...)
# We've set breaks after all comma-arrows. Now we have to
# undo them if this can be a one-line block
# user doesn't require breaking after all comma-arrows
( $rOpts_comma_arrow_breakpoints != 0 )
+ && ( $rOpts_comma_arrow_breakpoints != 4 )
# and if the opening structure is in this batch
&& $saw_opening_structure
{
undo_forced_breakpoint_stack(
$breakpoint_undo_stack[$current_depth] );
- }
+ } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
# now see if we have any comma breakpoints left
my $has_comma_breakpoints =
$forced_breakpoint_count );
# update broken-sublist flag of the outer container
- $has_broken_sublist[$depth] = $has_broken_sublist[$depth]
+ $has_broken_sublist[$depth] =
+ $has_broken_sublist[$depth]
|| $has_broken_sublist[$current_depth]
|| $is_long_term
|| $has_comma_breakpoints;
else {
set_logical_breakpoints($current_depth);
}
- }
+ } ## end if ( $item_count_stack...)
if ( $is_long_term
&& @{ $rfor_semicolon_list[$current_depth] } )
# leading term alignment unless -lp is used.
$has_comma_breakpoints = 1
unless $rOpts_line_up_parentheses;
- }
+ } ## end if ( $is_long_term && ...)
if (
if ( $rOpts_line_up_parentheses && $saw_opening_structure )
{
my $item = $leading_spaces_to_go[ $i_opening + 1 ];
+ if ( $i_opening + 1 < $max_index_to_go
+ && $types_to_go[ $i_opening + 1 ] eq 'b' )
+ {
+ $item = $leading_spaces_to_go[ $i_opening + 2 ];
+ }
if ( defined($item) ) {
my $i_start_2 = $item->get_STARTING_INDEX();
if (
if ( $test2 == $test1 ) {
set_forced_breakpoint( $i_start_2 - 1 );
}
- }
- }
- }
+ } ## end if ( defined($i_start_2...))
+ } ## end if ( defined($item) )
+ } ## end if ( $rOpts_line_up_parentheses...)
# break after opening structure.
# note: break before closing structure will be automatic
unless ( $do_not_break_apart
|| is_unbreakable_container($current_depth) );
+ # break at ',' of lower depth level before opening token
+ if ( $last_comma_index[$depth] ) {
+ set_forced_breakpoint( $last_comma_index[$depth] );
+ }
+
# break at '.' of lower depth level before opening token
if ( $last_dot_index[$depth] ) {
set_forced_breakpoint( $last_dot_index[$depth] );
}
- # break before opening structure if preeced by another
+ # break before opening structure if preceded by another
# closing structure and a comma. This is normally
# done by the previous closing brace, but not
# if it was a one-line block.
if ( $want_break_before{$token_prev} ) {
set_forced_breakpoint($i_prev);
}
- }
- }
- }
+ } ## end elsif ( $types_to_go[$i_prev...])
+ } ## end if ( $i_opening > 2 )
+ } ## end if ( $minimum_depth <=...)
# break after comma following closing structure
if ( $next_type eq ',' ) {
)
{
set_forced_breakpoint($i);
- }
+ } ## end if ( $is_assignment{$next_nonblank_type...})
# break at any comma before the opening structure Added
# for -lp, but seems to be good in general. It isn't
# must set fake breakpoint to alert outer containers that
# they are complex
set_fake_breakpoint();
- }
- }
+ } ## end elsif ($is_long_term)
+
+ } ## end elsif ( $depth < $current_depth)
#------------------------------------------------------------
# Handle this token
$want_comma_break[$depth] = 1;
$index_before_arrow[$depth] = $i_last_nonblank_token;
next;
- }
+ } ## end if ( $type eq '=>' )
elsif ( $type eq '.' ) {
$last_dot_index[$depth] = $i;
$dont_align[$depth] = 1;
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
- }
+ } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
# now just handle any commas
next unless ( $type eq ',' );
if ( $want_comma_break[$depth] ) {
if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- next;
+ if ($rOpts_comma_arrow_breakpoints) {
+ $want_comma_break[$depth] = 0;
+ ##$index_before_arrow[$depth] = -1;
+ next;
+ }
}
set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
# break before the previous token if it looks safe
# Example of something that we will not try to break before:
# DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
+ # Also we don't want to break at a binary operator (like +):
+ # $c->createOval(
+ # $x + $R, $y +
+ # $R => $x - $R,
+ # $y - $R, -fill => 'black',
+ # );
my $ibreak = $index_before_arrow[$depth] - 1;
if ( $ibreak > 0
&& $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
{
if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] =~ /^[,b\(\{\[]$/ ) {
- set_forced_breakpoint($ibreak);
- }
- }
+ if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
+ if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
+
+ # don't break pointer calls, such as the following:
+ # File::Spec->curdir => 1,
+ # (This is tokenized as adjacent 'w' tokens)
+ ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
+
+ # And don't break before a comma, as in the following:
+ # ( LONGER_THAN,=> 1,
+ # EIGHTY_CHARACTERS,=> 2,
+ # CAUSES_FORMATTING,=> 3,
+ # LIKE_THIS,=> 4,
+ # );
+ # This example is for -tso but should be general rule
+ if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
+ && $tokens_to_go[ $ibreak + 1 ] ne ',' )
+ {
+ set_forced_breakpoint($ibreak);
+ }
+ } ## end if ( $types_to_go[$ibreak...])
+ } ## end if ( $ibreak > 0 && $tokens_to_go...)
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
# treat any list items so far as an interrupted list
$interrupted_list[$depth] = 1;
next;
- }
-
- # skip past these commas if we are not supposed to format them
- next if ( $dont_align[$depth] );
+ } ## end if ( $want_comma_break...)
# break after all commas above starting depth
- if ( $depth < $starting_depth ) {
+ if ( $depth < $starting_depth && !$dont_align[$depth] ) {
set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
next;
}
&& $container_environment_to_go[$i] eq 'BLOCK' )
{
$dont_align[$depth] = 1;
- next;
}
- }
+ } ## end if ( $item_count == 0 )
$comma_index[$depth][$item_count] = $i;
++$item_count_stack[$depth];
if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
$identifier_count_stack[$depth]++;
}
- }
+ } ## end while ( ++$i <= $max_index_to_go)
#-------------------------------------------
# end of loop over all tokens in this batch
&& $i_opening >= $max_index_to_go - 2
&& $token =~ /^['"]$/ )
);
- }
+ } ## end for ( my $dd = $current_depth...)
# Return a flag indicating if the input file had some good breakpoints.
# This flag will be used to force a break in a line shorter than the
if ( $has_old_logical_breakpoints[$current_depth] ) {
$saw_good_breakpoint = 1;
}
+
+ # A complex line with one break at an = has a good breakpoint.
+ # This is not complex ($total_depth_variation=0):
+ # $res1
+ # = 10;
+ #
+ # This is complex ($total_depth_variation=6):
+ # $res2 =
+ # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
+ elsif ($i_old_assignment_break
+ && $total_depth_variation > 4
+ && $old_breakpoint_count == 1 )
+ {
+ $saw_good_breakpoint = 1;
+ } ## end elsif ( $i_old_assignment_break...)
+
return $saw_good_breakpoint;
- }
+ } ## end sub scan_list
} # end scan_list
sub find_token_starting_list {
$item_count, $identifier_count, $rcomma_index,
$next_nonblank_type, $list_type, $interrupted,
$rdo_not_break_apart, $must_break_open,
- )
- = @_;
+ ) = @_;
# nothing to do if no commas seen
return if ( $item_count < 1 );
}
#my ( $a, $b, $c ) = caller();
-#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
+#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
#---------------------------------------------------------------
# Interrupted List Rule:
- # A list is is forced to use old breakpoints if it was interrupted
+ # A list is forced to use old breakpoints if it was interrupted
# by side comments or blank lines, or requested by user.
#---------------------------------------------------------------
if ( $rOpts_break_at_old_comma_breakpoints
# Looks like a list of items. We have to look at it and size it up.
#---------------------------------------------------------------
- my $opening_token = $tokens_to_go[$i_opening_paren];
+ my $opening_token = $tokens_to_go[$i_opening_paren];
my $opening_environment =
$container_environment_to_go[$i_opening_paren];
# exceeds the available space after the '('.
my $need_lp_break_open = $must_break_open;
if ( $rOpts_line_up_parentheses && !$must_break_open ) {
- my $columns_if_unbroken = $rOpts_maximum_line_length -
+ my $columns_if_unbroken =
+ maximum_line_length($i_opening_minus) -
total_line_length( $i_opening_minus, $i_opening_paren );
- $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken )
+ $need_lp_break_open =
+ ( $max_length[0] > $columns_if_unbroken )
|| ( $max_length[1] > $columns_if_unbroken )
|| ( $first_term_length > $columns_if_unbroken );
}
# Field width parameters
my $pair_width = ( $max_length[0] + $max_length[1] );
- my $max_width =
+ my $max_width =
( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
# Number of free columns across the page width for laying out tables
# )
# if $style eq 'all';
- my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
- my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
+ my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
+ my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
my $long_first_term =
excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
# or if this is a sublist of a larger list
|| $in_hierarchical_list
- # or if multiple commas and we dont have a long first or last
+ # or if multiple commas and we don't have a long first or last
# term
|| ( $comma_count > 1
&& !( $long_last_term || $long_first_term ) )
if ( $number_of_fields > 1 ) {
$formatted_columns =
- ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) *
- $max_width );
+ ( $pair_width * ( int( $item_count / 2 ) ) +
+ ( $item_count % 2 ) * $max_width );
}
else {
$formatted_columns = $max_width * $item_count;
# align; high sparsity does not look good, especially with few lines
my $sparsity = ($unused_columns) / ($formatted_columns);
my $max_allowed_sparsity =
- ( $item_count < 3 ) ? 0.1
+ ( $item_count < 3 ) ? 0.1
: ( $packed_lines == 1 ) ? 0.15
: ( $packed_lines == 2 ) ? 0.4
- : 0.7;
+ : 0.7;
# Begin check for shortcut methods, which avoid treating a list
# as a table for relatively small parenthesized lists. These
# are usually easier to read if not formatted as tables.
if (
- $packed_lines <= 2 # probably can fit in 2 lines
- && $item_count < 9 # doesn't have too many items
+ $packed_lines <= 2 # probably can fit in 2 lines
+ && $item_count < 9 # doesn't have too many items
&& $opening_environment eq 'BLOCK' # not a sub-container
- && $opening_token eq '(' # is paren list
+ && $opening_token eq '(' # is paren list
)
{
# Shortcut method 1: for -lp and just one comma:
# This is a no-brainer, just break at the comma.
if (
- $rOpts_line_up_parentheses # -lp
- && $item_count == 2 # two items, one comma
+ $rOpts_line_up_parentheses # -lp
+ && $item_count == 2 # two items, one comma
&& !$must_break_open
)
{
)
{
- my $break_count =
- set_ragged_breakpoints( \@i_term_comma,
+ my $break_count = set_ragged_breakpoints( \@i_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
# debug stuff
FORMATTER_DEBUG_FLAG_SPARSE && do {
- print
+ print STDOUT
"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
};
# imprecise, but not too bad. (steve.t)
if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
- $too_long =
- excess_line_length( $i_opening_minus,
+ $too_long = excess_line_length( $i_opening_minus,
$i_effective_last_comma + 1 ) > 0;
}
if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
my $i_opening_minus = $i_opening_paren - 4;
if ( $i_opening_minus >= 0 ) {
- $too_long =
- excess_line_length( $i_opening_minus,
+ $too_long = excess_line_length( $i_opening_minus,
$i_effective_last_comma + 1 ) > 0;
}
}
# let the continuation logic handle it if 2 lines
else {
- my $break_count =
- set_ragged_breakpoints( \@i_term_comma,
+ my $break_count = set_ragged_breakpoints( \@i_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
sub table_columns_available {
my $i_first_comma = shift;
- my $columns =
- $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
+ my $columns =
+ maximum_line_length($i_first_comma) -
+ leading_spaces_to_go($i_first_comma);
# Patch: the vertical formatter does not line up lines whose lengths
# exactly equal the available line length because of allowances
FORMATTER_DEBUG_FLAG_NOBREAK && do {
my ( $a, $b, $c ) = caller();
- print(
-"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
- );
+ print STDOUT
+"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
};
@nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
else {
FORMATTER_DEBUG_FLAG_NOBREAK && do {
my ( $a, $b, $c ) = caller();
- print(
-"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
- );
+ print STDOUT
+ "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
};
}
}
# if we break before or after it
my $token = $tokens_to_go[$i];
- if ( $token =~ /^([\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
+ if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
}
FORMATTER_DEBUG_FLAG_FORCE && do {
my ( $a, $b, $c ) = caller();
- print
-"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
+ print STDOUT
+"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
};
if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
FORMATTER_DEBUG_FLAG_UNDOBP && do {
my ( $a, $b, $c ) = caller();
- print(
-"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
- );
+ print STDOUT
+"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
};
}
else {
FORMATTER_DEBUG_FLAG_UNDOBP && do {
my ( $a, $b, $c ) = caller();
- print(
-"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
- );
+ print STDOUT
+"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
};
}
}
}
-sub recombine_breakpoints {
+{ # begin recombine_breakpoints
- # sub set_continuation_breaks is very liberal in setting line breaks
- # for long lines, always setting breaks at good breakpoints, even
- # when that creates small lines. Occasionally small line fragments
- # are produced which would look better if they were combined.
- # That's the task of this routine, recombine_breakpoints.
- my ( $ri_first, $ri_last ) = @_;
- my $more_to_do = 1;
+ my %is_amp_amp;
+ my %is_ternary;
+ my %is_math_op;
+ my %is_plus_minus;
+ my %is_mult_div;
- # Keep looping until there are no more possible recombinations
- my $nmax_last = @$ri_last;
- while ($more_to_do) {
- my $n_best = 0;
- my $bs_best;
- my $n;
- my $nmax = @$ri_last - 1;
+ BEGIN {
- # safety check..
- unless ( $nmax < $nmax_last ) {
+ @_ = qw( && || );
+ @is_amp_amp{@_} = (1) x scalar(@_);
- # shouldn't happen because splice below decreases nmax on each pass:
- # but i get paranoid sometimes
- die "Program bug-infinite loop in recombine breakpoints\n";
- }
- $nmax_last = $nmax;
- $more_to_do = 0;
+ @_ = qw( ? : );
+ @is_ternary{@_} = (1) x scalar(@_);
- # loop over all remaining lines...
- for $n ( 1 .. $nmax ) {
+ @_ = qw( + - * / );
+ @is_math_op{@_} = (1) x scalar(@_);
- #----------------------------------------------------------
- # Indexes of the endpoints of the two lines are:
- #
- # ---left---- | ---right---
- # $if $imid | $imidr $il
- #
- # We want to decide if we should join tokens $imid to $imidr
- #----------------------------------------------------------
- my $if = $$ri_first[ $n - 1 ];
- my $il = $$ri_last[$n];
- my $imid = $$ri_last[ $n - 1 ];
- my $imidr = $$ri_first[$n];
-
-#print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n";
-
- #----------------------------------------------------------
- # Start of special recombination rules
- # These are ad-hoc rules which have been found to work ok.
- # Skip to next pair to avoid re-combination.
- #----------------------------------------------------------
-
- # a terminal '{' should stay where it is
- next if ( $n == $nmax && $types_to_go[$imidr] eq '{' );
-
- #----------------------------------------------------------
- # examine token at $imid (right end of first line of pair)
- #----------------------------------------------------------
-
- # an isolated '}' may join with a ';' terminated segment
- if ( $types_to_go[$imid] eq '}' ) {
- next
- unless (
+ @_ = qw( + - );
+ @is_plus_minus{@_} = (1) x scalar(@_);
- # join } and ;
- ( ( $if == $imid ) && ( $types_to_go[$il] eq ';' ) )
+ @_ = qw( * / );
+ @is_mult_div{@_} = (1) x scalar(@_);
+ }
- # handle '.' and '?' below
- || ( $types_to_go[$imidr] =~ /^[\.\?]$/ )
- );
- }
+ sub DUMP_BREAKPOINTS {
- # do not recombine lines with ending &&, ||, or :
- elsif ( $types_to_go[$imid] =~ /^(|:|\&\&|\|\|)$/ ) {
- next unless $want_break_before{ $types_to_go[$imid] };
+ # Debug routine to dump current breakpoints...not normally called
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $ri_beg, $ri_end, $msg ) = @_;
+ print STDERR "----Dumping breakpoints from: $msg----\n";
+ for my $n ( 0 .. @{$ri_end} - 1 ) {
+ my $ibeg = $$ri_beg[$n];
+ my $iend = $$ri_end[$n];
+ my $text = "";
+ foreach my $i ( $ibeg .. $iend ) {
+ $text .= $tokens_to_go[$i];
}
+ print STDERR "$n ($ibeg:$iend) $text\n";
+ }
+ print STDERR "----\n";
+ }
- # for lines ending in a comma...
- elsif ( $types_to_go[$imid] eq ',' ) {
+ sub recombine_breakpoints {
- # an isolated '},' may join with an identifier + ';'
- # this is useful for the class of a 'bless' statement (bless.t)
- if ( $types_to_go[$if] eq '}'
- && $types_to_go[$imidr] eq 'i' )
+ # sub set_continuation_breaks is very liberal in setting line breaks
+ # for long lines, always setting breaks at good breakpoints, even
+ # when that creates small lines. Sometimes small line fragments
+ # are produced which would look better if they were combined.
+ # That's the task of this routine.
+ #
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $ri_beg, $ri_end ) = @_;
+
+ # Make a list of all good joining tokens between the lines
+ # n-1 and n.
+ my @joint;
+ my $nmax = @$ri_end - 1;
+ for my $n ( 1 .. $nmax ) {
+ my $ibeg_1 = $$ri_beg[ $n - 1 ];
+ my $iend_1 = $$ri_end[ $n - 1 ];
+ my $iend_2 = $$ri_end[$n];
+ my $ibeg_2 = $$ri_beg[$n];
+
+ my ( $itok, $itokp, $itokm );
+
+ foreach my $itest ( $iend_1, $ibeg_2 ) {
+ my $type = $types_to_go[$itest];
+ if ( $is_math_op{$type}
+ || $is_amp_amp{$type}
+ || $is_assignment{$type}
+ || $type eq ':' )
{
- next
- unless ( ( $if == ( $imid - 1 ) )
- && ( $il == ( $imidr + 1 ) )
- && ( $types_to_go[$il] eq ';' ) );
-
- # override breakpoint
- $forced_breakpoint_to_go[$imid] = 0;
- }
-
- # but otherwise, do not recombine unless this will leave
- # just 1 more line
- else {
- next unless ( $n + 1 >= $nmax );
+ $itok = $itest;
}
}
+ $joint[$n] = [$itok];
+ }
- # opening paren..
- elsif ( $types_to_go[$imid] eq '(' ) {
+ my $more_to_do = 1;
- # No longer doing this
- }
+ # We keep looping over all of the lines of this batch
+ # until there are no more possible recombinations
+ my $nmax_last = @$ri_end;
+ while ($more_to_do) {
+ my $n_best = 0;
+ my $bs_best;
+ my $n;
+ my $nmax = @$ri_end - 1;
- elsif ( $types_to_go[$imid] eq ')' ) {
+ # Safety check for infinite loop
+ unless ( $nmax < $nmax_last ) {
- # No longer doing this
+ # Shouldn't happen because splice below decreases nmax on each
+ # pass.
+ Perl::Tidy::Die
+ "Program bug-infinite loop in recombine breakpoints\n";
}
+ $nmax_last = $nmax;
+ $more_to_do = 0;
+ my $previous_outdentable_closing_paren;
+ my $leading_amp_count = 0;
+ my $this_line_is_semicolon_terminated;
- # keep a terminal colon
- elsif ( $types_to_go[$imid] eq ':' ) {
- next;
- }
+ # loop over all remaining lines in this batch
+ for $n ( 1 .. $nmax ) {
- # keep a terminal for-semicolon
- elsif ( $types_to_go[$imid] eq 'f' ) {
- next;
- }
+ #----------------------------------------------------------
+ # If we join the current pair of lines,
+ # line $n-1 will become the left part of the joined line
+ # line $n will become the right part of the joined line
+ #
+ # Here are Indexes of the endpoint tokens of the two lines:
+ #
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^
+ # |
+ # We want to decide if we should remove the line break
+ # between the tokens at $iend_1 and $ibeg_2
+ #
+ # We will apply a number of ad-hoc tests to see if joining
+ # here will look ok. The code will just issue a 'next'
+ # command if the join doesn't look good. If we get through
+ # the gauntlet of tests, the lines will be recombined.
+ #----------------------------------------------------------
+ #
+ # beginning and ending tokens of the lines we are working on
+ my $ibeg_1 = $$ri_beg[ $n - 1 ];
+ my $iend_1 = $$ri_end[ $n - 1 ];
+ my $iend_2 = $$ri_end[$n];
+ my $ibeg_2 = $$ri_beg[$n];
+ my $ibeg_nmax = $$ri_beg[$nmax];
+
+ my $type_iend_1 = $types_to_go[$iend_1];
+ my $type_iend_2 = $types_to_go[$iend_2];
+ my $type_ibeg_1 = $types_to_go[$ibeg_1];
+ my $type_ibeg_2 = $types_to_go[$ibeg_2];
+
+ # some beginning indexes of other lines, which may not exist
+ my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1;
+ my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1;
+ my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
+
+ my $bs_tweak = 0;
+
+ #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
+ # $nesting_depth_to_go[$ibeg_1] );
+
+ FORMATTER_DEBUG_FLAG_RECOMBINE && do {
+ print STDERR
+"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
+ };
+
+ # If line $n is the last line, we set some flags and
+ # do any special checks for it
+ if ( $n == $nmax ) {
+
+ # a terminal '{' should stay where it is
+ next if $type_ibeg_2 eq '{';
+
+ # set flag if statement $n ends in ';'
+ $this_line_is_semicolon_terminated = $type_iend_2 eq ';'
+
+ # with possible side comment
+ || ( $type_iend_2 eq '#'
+ && $iend_2 - $ibeg_2 >= 2
+ && $types_to_go[ $iend_2 - 2 ] eq ';'
+ && $types_to_go[ $iend_2 - 1 ] eq 'b' );
+ }
+
+ #----------------------------------------------------------
+ # Recombine Section 1:
+ # Examine the special token joining this line pair, if any.
+ # Put as many tests in this section to avoid duplicate code and
+ # to make formatting independent of whether breaks are to the
+ # left or right of an operator.
+ #----------------------------------------------------------
+
+ my ($itok) = @{ $joint[$n] };
+ if ($itok) {
+
+ # FIXME: Patch - may not be necessary
+ my $iend_1 =
+ $type_iend_1 eq 'b'
+ ? $iend_1 - 1
+ : $iend_1;
+
+ my $iend_2 =
+ $type_iend_2 eq 'b'
+ ? $iend_2 - 1
+ : $iend_2;
+ ## END PATCH
+
+ my $type = $types_to_go[$itok];
- # if '=' at end of line ...
- elsif ( $is_assignment{ $types_to_go[$imid] } ) {
+ if ( $type eq ':' ) {
- # otherwise always ok to join isolated '='
- unless ( $if == $imid ) {
+ # do not join at a colon unless it disobeys the break request
+ if ( $itok eq $iend_1 ) {
+ next unless $want_break_before{$type};
+ }
+ else {
+ $leading_amp_count++;
+ next if $want_break_before{$type};
+ }
+ } ## end if ':'
- my $is_math = (
- ( $types_to_go[$il] =~ /^[+-\/\*\)]$/ )
+ # handle math operators + - * /
+ elsif ( $is_math_op{$type} ) {
- # note no '$' in pattern because -> can
- # start long identifier
- && !grep { $_ =~ /^(->|=>|[\,])/ }
- @types_to_go[ $imidr .. $il ]
- );
+ # Combine these lines if this line is a single
+ # number, or if it is a short term with same
+ # operator as the previous line. For example, in
+ # the following code we will combine all of the
+ # short terms $A, $B, $C, $D, $E, $F, together
+ # instead of leaving them one per line:
+ # my $time =
+ # $A * $B * $C * $D * $E * $F *
+ # ( 2. * $eps * $sigma * $area ) *
+ # ( 1. / $tcold**3 - 1. / $thot**3 );
- # retain the break after the '=' unless ...
- next
- unless (
+ # This can be important in math-intensive code.
- # '=' is followed by a number and looks like math
- ( $types_to_go[$imidr] eq 'n' && $is_math )
+ my $good_combo;
- # or followed by a scalar and looks like math
- || ( ( $types_to_go[$imidr] eq 'i' )
- && ( $tokens_to_go[$imidr] =~ /^\$/ )
- && $is_math )
+ my $itokp = min( $inext_to_go[$itok], $iend_2 );
+ my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
+ my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
+ my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
- # or followed by a single "short" token
- # ('12' is arbitrary)
- || ( $il == $imidr
- && token_sequence_length( $imidr, $imidr ) < 12 )
+ # check for a number on the right
+ if ( $types_to_go[$itokp] eq 'n' ) {
- );
- }
- unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) {
- $forced_breakpoint_to_go[$imid] = 0;
- }
- }
+ # ok if nothing else on right
+ if ( $itokp == $iend_2 ) {
+ $good_combo = 1;
+ }
+ else {
- # for keywords..
- elsif ( $types_to_go[$imid] eq 'k' ) {
+ # look one more token to right..
+ # okay if math operator or some termination
+ $good_combo =
+ ( ( $itokpp == $iend_2 )
+ && $is_math_op{ $types_to_go[$itokpp] } )
+ || $types_to_go[$itokpp] =~ /^[#,;]$/;
+ }
+ }
- # make major control keywords stand out
- # (recombine.t)
- next
- if (
+ # check for a number on the left
+ if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
- #/^(last|next|redo|return)$/
- $is_last_next_redo_return{ $tokens_to_go[$imid] }
- );
+ # okay if nothing else to left
+ if ( $itokm == $ibeg_1 ) {
+ $good_combo = 1;
+ }
- if ( $is_and_or{ $tokens_to_go[$imid] } ) {
- next unless $want_break_before{ $tokens_to_go[$imid] };
- }
- }
-
- #----------------------------------------------------------
- # examine token at $imidr (left end of second line of pair)
- #----------------------------------------------------------
-
- # do not recombine lines with leading &&, ||, or :
- if ( $types_to_go[$imidr] =~ /^(|:|\&\&|\|\|)$/ ) {
- next if $want_break_before{ $types_to_go[$imidr] };
- }
-
- # Identify and recombine a broken ?/: chain
- elsif ( $types_to_go[$imidr] eq '?' ) {
-
- # indexes of line first tokens --
- # mm - line before previous line
- # f - previous line
- # <-- this line
- # ff - next line
- # fff - line after next
- my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1;
- my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
- my $imm = $n > 1 ? $$ri_first[ $n - 2 ] : -1;
- my $seqno = $type_sequence_to_go[$imidr];
- my $f_ok =
- ( $types_to_go[$if] eq ':'
- && $type_sequence_to_go[$if] ==
- $seqno - TYPE_SEQUENCE_INCREMENT );
- my $mm_ok =
- ( $imm >= 0
- && $types_to_go[$imm] eq ':'
- && $type_sequence_to_go[$imm] ==
- $seqno - 2 * TYPE_SEQUENCE_INCREMENT );
-
- my $ff_ok =
- ( $iff > 0
- && $types_to_go[$iff] eq ':'
- && $type_sequence_to_go[$iff] == $seqno );
- my $fff_ok =
- ( $ifff > 0
- && $types_to_go[$ifff] eq ':'
- && $type_sequence_to_go[$ifff] ==
- $seqno + TYPE_SEQUENCE_INCREMENT );
-
- # we require that this '?' be part of a correct sequence
- # of 3 in a row or else no recombination is done.
- next
- unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) );
- $forced_breakpoint_to_go[$imid] = 0;
- }
+ # otherwise look one more token to left
+ else {
- # do not recombine lines with leading '.'
- elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) {
- my $i_next_nonblank = $imidr + 1;
- if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
- $i_next_nonblank++;
- }
+ # okay if math operator, comma, or assignment
+ $good_combo = ( $itokmm == $ibeg_1 )
+ && ( $is_math_op{ $types_to_go[$itokmm] }
+ || $types_to_go[$itokmm] =~ /^[,]$/
+ || $is_assignment{ $types_to_go[$itokmm] }
+ );
+ }
+ }
- next
- unless (
+ # look for a single short token either side of the
+ # operator
+ if ( !$good_combo ) {
+
+ # Slight adjustment factor to make results
+ # independent of break before or after operator in
+ # long summed lists. (An operator and a space make
+ # two spaces).
+ my $two = ( $itok eq $iend_1 ) ? 2 : 0;
+
+ $good_combo =
+
+ # numbers or id's on both sides of this joint
+ $types_to_go[$itokp] =~ /^[in]$/
+ && $types_to_go[$itokm] =~ /^[in]$/
+
+ # one of the two lines must be short:
+ && (
+ (
+ # no more than 2 nonblank tokens right of
+ # joint
+ $itokpp == $iend_2
+
+ # short
+ && token_sequence_length( $itokp, $iend_2 )
+ < $two +
+ $rOpts_short_concatenation_item_length
+ )
+ || (
+ # no more than 2 nonblank tokens left of
+ # joint
+ $itokmm == $ibeg_1
+
+ # short
+ && token_sequence_length( $ibeg_1, $itokm )
+ < 2 - $two +
+ $rOpts_short_concatenation_item_length
+ )
- # ... unless there is just one and we can reduce this to
- # two lines if we do. For example, this :
- #
- # $bodyA .=
- # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
- #
- # looks better than this:
- # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
- # . '$args .= $pat;'
+ )
- (
- $n == 2
- && $n == $nmax
- && $types_to_go[$if] ne $types_to_go[$imidr]
- )
+ # keep pure terms; don't mix +- with */
+ && !(
+ $is_plus_minus{$type}
+ && ( $is_mult_div{ $types_to_go[$itokmm] }
+ || $is_mult_div{ $types_to_go[$itokpp] } )
+ )
+ && !(
+ $is_mult_div{$type}
+ && ( $is_plus_minus{ $types_to_go[$itokmm] }
+ || $is_plus_minus{ $types_to_go[$itokpp] } )
+ )
- #
- # ... or this would strand a short quote , like this
- # . "some long qoute"
- # . "\n";
- #
+ ;
+ }
- || ( $types_to_go[$i_next_nonblank] eq 'Q'
- && $i_next_nonblank >= $il - 1
- && length( $tokens_to_go[$i_next_nonblank] ) <
- $rOpts_short_concatenation_item_length )
- );
- }
+ # it is also good to combine if we can reduce to 2 lines
+ if ( !$good_combo ) {
- # handle leading keyword..
- elsif ( $types_to_go[$imidr] eq 'k' ) {
+ # index on other line where same token would be in a
+ # long chain.
+ my $iother =
+ ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
- # handle leading "and" and "or"
- if ( $is_and_or{ $tokens_to_go[$imidr] } ) {
+ $good_combo =
+ $n == 2
+ && $n == $nmax
+ && $types_to_go[$iother] ne $type;
+ }
- # Decide if we will combine a single terminal 'and' and
- # 'or' after an 'if' or 'unless'. We should consider the
- # possible vertical alignment, and visual clutter.
+ next unless ($good_combo);
- # This looks best with the 'and' on the same line as the 'if':
- #
- # $a = 1
- # if $seconds and $nu < 2;
- #
- # But this looks better as shown:
- #
- # $a = 1
- # if !$this->{Parents}{$_}
- # or $this->{Parents}{$_} eq $_;
- #
- # Eventually, it would be nice to look for similarities (such as 'this' or
- # 'Parents'), but for now I'm using a simple rule that says that the
- # resulting line length must not be more than half the maximum line length
- # (making it 80/2 = 40 characters by default).
+ } ## end math
+
+ elsif ( $is_amp_amp{$type} ) {
+ ##TBD
+ } ## end &&, ||
+
+ elsif ( $is_assignment{$type} ) {
+ ##TBD
+ } ## end assignment
+ }
+
+ #----------------------------------------------------------
+ # Recombine Section 2:
+ # Examine token at $iend_1 (right end of first line of pair)
+ #----------------------------------------------------------
+
+ # an isolated '}' may join with a ';' terminated segment
+ if ( $type_iend_1 eq '}' ) {
+
+ # Check for cases where combining a semicolon terminated
+ # statement with a previous isolated closing paren will
+ # allow the combined line to be outdented. This is
+ # generally a good move. For example, we can join up
+ # the last two lines here:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # )
+ # = stat($file);
+ #
+ # to get:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # ) = stat($file);
+ #
+ # which makes the parens line up.
+ #
+ # Another example, from Joe Matarazzo, probably looks best
+ # with the 'or' clause appended to the trailing paren:
+ # $self->some_method(
+ # PARAM1 => 'foo',
+ # PARAM2 => 'bar'
+ # ) or die "Some_method didn't work";
+ #
+ # But we do not want to do this for something like the -lp
+ # option where the paren is not outdentable because the
+ # trailing clause will be far to the right.
+ #
+ # The logic here is synchronized with the logic in sub
+ # sub set_adjusted_indentation, which actually does
+ # the outdenting.
+ #
+ $previous_outdentable_closing_paren =
+ $this_line_is_semicolon_terminated
+
+ # only one token on last line
+ && $ibeg_1 == $iend_1
+
+ # must be structural paren
+ && $tokens_to_go[$iend_1] eq ')'
+
+ # style must allow outdenting,
+ && !$closing_token_indentation{')'}
+
+ # only leading '&&', '||', and ':' if no others seen
+ # (but note: our count made below could be wrong
+ # due to intervening comments)
+ && ( $leading_amp_count == 0
+ || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
+
+ # but leading colons probably line up with a
+ # previous colon or question (count could be wrong).
+ && $type_ibeg_2 ne ':'
+
+ # only one step in depth allowed. this line must not
+ # begin with a ')' itself.
+ && ( $nesting_depth_to_go[$iend_1] ==
+ $nesting_depth_to_go[$iend_2] + 1 );
+
+ # YVES patch 2 of 2:
+ # Allow cuddled eval chains, like this:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # } or do {
+ # #handle error
+ # };
+ # This patch works together with a patch in
+ # setting adjusted indentation (where the closing eval
+ # brace is outdented if possible).
+ # The problem is that an 'eval' block has continuation
+ # indentation and it looks better to undo it in some
+ # cases. If we do not use this patch we would get:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # }
+ # or do {
+ # #handle error
+ # };
+ # The alternative, for uncuddled style, is to create
+ # a patch in set_adjusted_indentation which undoes
+ # the indentation of a leading line like 'or do {'.
+ # This doesn't work well with -icb through
+ if (
+ $block_type_to_go[$iend_1] eq 'eval'
+ && !$rOpts->{'line-up-parentheses'}
+ && !$rOpts->{'indent-closing-brace'}
+ && $tokens_to_go[$iend_2] eq '{'
+ && (
+ ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
+ || ( $type_ibeg_2 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_2] } )
+ || $is_if_unless{ $tokens_to_go[$ibeg_2] }
+ )
+ )
+ {
+ $previous_outdentable_closing_paren ||= 1;
+ }
next
unless (
- $n == $nmax # if this is the last line
- && $types_to_go[$il] eq ';' # ending in ';'
- && $types_to_go[$if] eq 'k' # after 'if' or 'unless'
- # /^(if|unless)$/
- && $is_if_unless{ $tokens_to_go[$if] }
-
- # and if this doesn't make a long last line
- && total_line_length( $if, $il ) <=
- $half_maximum_line_length
+ $previous_outdentable_closing_paren
+
+ # handle '.' and '?' specially below
+ || ( $type_ibeg_2 =~ /^[\.\?]$/ )
);
+ }
+
+ # YVES
+ # honor breaks at opening brace
+ # Added to prevent recombining something like this:
+ # } || eval { package main;
+ elsif ( $type_iend_1 eq '{' ) {
+ next if $forced_breakpoint_to_go[$iend_1];
+ }
- # override breakpoint
- $forced_breakpoint_to_go[$imid] = 0;
+ # do not recombine lines with ending &&, ||,
+ elsif ( $is_amp_amp{$type_iend_1} ) {
+ next unless $want_break_before{$type_iend_1};
}
- # handle leading "if" and "unless"
- elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) {
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_iend_1 eq '?' ) {
- # FIXME: This is still experimental..may not be too useful
+ # Do not recombine different levels
next
- unless (
- $n == $nmax # if this is the last line
- && $types_to_go[$il] eq ';' # ending in ';'
- && $types_to_go[$if] eq 'k'
+ if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
- # /^(and|or)$/
- && $is_and_or{ $tokens_to_go[$if] }
+ # do not recombine unless next line ends in :
+ next unless $type_iend_2 eq ':';
+ }
- # and if this doesn't make a long last line
- && total_line_length( $if, $il ) <=
- $half_maximum_line_length
- );
+ # for lines ending in a comma...
+ elsif ( $type_iend_1 eq ',' ) {
- # override breakpoint
- $forced_breakpoint_to_go[$imid] = 0;
- }
+ # Do not recombine at comma which is following the
+ # input bias.
+ # TODO: might be best to make a special flag
+ next if ( $old_breakpoint_to_go[$iend_1] );
- # handle all other leading keywords
- else {
+ # an isolated '},' may join with an identifier + ';'
+ # this is useful for the class of a 'bless' statement (bless.t)
+ if ( $type_ibeg_1 eq '}'
+ && $type_ibeg_2 eq 'i' )
+ {
+ next
+ unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
+ && ( $iend_2 == ( $ibeg_2 + 1 ) )
+ && $this_line_is_semicolon_terminated );
- # keywords look best at start of lines,
- # but combine things like "1 while"
+ # override breakpoint
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ # but otherwise ..
+ else {
+
+ # do not recombine after a comma unless this will leave
+ # just 1 more line
+ next unless ( $n + 1 >= $nmax );
- unless ( $is_assignment{ $types_to_go[$imid] } ) {
+ # do not recombine if there is a change in indentation depth
next
- if ( ( $types_to_go[$imid] ne 'k' )
- && ( $tokens_to_go[$imidr] !~ /^(while)$/ ) );
+ if (
+ $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
+
+ # do not recombine a "complex expression" after a
+ # comma. "complex" means no parens.
+ my $saw_paren;
+ foreach my $ii ( $ibeg_2 .. $iend_2 ) {
+ if ( $tokens_to_go[$ii] eq '(' ) {
+ $saw_paren = 1;
+ last;
+ }
+ }
+ next if $saw_paren;
}
}
- }
- # similar treatment of && and || as above for 'and' and 'or':
- elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) {
+ # opening paren..
+ elsif ( $type_iend_1 eq '(' ) {
- # maybe looking at something like:
- # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
+ # No longer doing this
+ }
- next
- unless (
- $n == $nmax # if this is the last line
- && $types_to_go[$il] eq ';' # ending in ';'
- && $types_to_go[$if] eq 'k' # after an 'if' or 'unless'
- # /^(if|unless)$/
- && $is_if_unless{ $tokens_to_go[$if] }
-
- # and if this doesn't make a long last line
- && total_line_length( $if, $il ) <=
- $half_maximum_line_length
- );
+ elsif ( $type_iend_1 eq ')' ) {
- # override breakpoint
- $forced_breakpoint_to_go[$imid] = 0;
- }
+ # No longer doing this
+ }
- # honor hard breakpoints
- next if ( $forced_breakpoint_to_go[$imid] > 0 );
+ # keep a terminal for-semicolon
+ elsif ( $type_iend_1 eq 'f' ) {
+ next;
+ }
- #----------------------------------------------------------
- # end of special recombination rules
- #----------------------------------------------------------
+ # if '=' at end of line ...
+ elsif ( $is_assignment{$type_iend_1} ) {
- my $bs = $bond_strength_to_go[$imid];
+ # keep break after = if it was in input stream
+ # this helps prevent 'blinkers'
+ next if $old_breakpoint_to_go[$iend_1]
- # combined line cannot be too long
- next
- if excess_line_length( $if, $il ) > 0;
+ # don't strand an isolated '='
+ && $iend_1 != $ibeg_1;
- # do not recombine if we would skip in indentation levels
- if ( $n < $nmax ) {
- my $if_next = $$ri_first[ $n + 1 ];
- next
- if (
- $levels_to_go[$if] < $levels_to_go[$imidr]
- && $levels_to_go[$imidr] < $levels_to_go[$if_next]
+ my $is_short_quote =
+ ( $type_ibeg_2 eq 'Q'
+ && $ibeg_2 == $iend_2
+ && token_sequence_length( $ibeg_2, $ibeg_2 ) <
+ $rOpts_short_concatenation_item_length );
+ my $is_ternary =
+ ( $type_ibeg_1 eq '?'
+ && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
- # but an isolated 'if (' is undesirable
- && !(
- $n == 1
- && $imid - $if <= 2
- && $types_to_go[$if] eq 'k'
- && $tokens_to_go[$if] eq 'if'
- && $tokens_to_go[$imid] ne '('
- )
+ # always join an isolated '=', a short quote, or if this
+ # will put ?/: at start of adjacent lines
+ if ( $ibeg_1 != $iend_1
+ && !$is_short_quote
+ && !$is_ternary )
+ {
+ next
+ unless (
+ (
- #
- );
- }
+ # unless we can reduce this to two lines
+ $nmax < $n + 2
- # honor no-break's
- next if ( $bs == NO_BREAK );
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == $n + 2
+ && $types_to_go[$ibeg_nmax] eq ';' )
- # remember the pair with the greatest bond strength
- if ( !$n_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- else {
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
- if ( $bs > $bs_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
+ # or the next line ends in an open paren or brace
+ # and the break hasn't been forced [dima.t]
+ || ( !$forced_breakpoint_to_go[$iend_1]
+ && $type_iend_2 eq '{' )
+ )
+
+ # do not recombine if the two lines might align well
+ # this is a very approximate test for this
+ && ( $ibeg_3 >= 0
+ && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
+ );
+
+ if (
+
+ # Recombine if we can make two lines
+ $nmax >= $n + 2
+
+ # -lp users often prefer this:
+ # my $title = function($env, $env, $sysarea,
+ # "bubba Borrower Entry");
+ # so we will recombine if -lp is used we have
+ # ending comma
+ && ( !$rOpts_line_up_parentheses
+ || $type_iend_2 ne ',' )
+ )
+ {
+
+ # otherwise, scan the rhs line up to last token for
+ # complexity. Note that we are not counting the last
+ # token in case it is an opening paren.
+ my $tv = 0;
+ my $depth = $nesting_depth_to_go[$ibeg_2];
+ for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 1 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
+
+ # ok to recombine if no level changes before last token
+ if ( $tv > 0 ) {
+
+ # otherwise, do not recombine if more than two
+ # level changes.
+ next if ( $tv > 1 );
+
+ # check total complexity of the two adjacent lines
+ # that will occur if we do this join
+ my $istop =
+ ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
+ for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 2 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
+
+ # do not recombine if total is more than 2 level changes
+ next if ( $tv > 2 );
+ }
+ }
+ }
+
+ unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+ }
+
+ # for keywords..
+ elsif ( $type_iend_1 eq 'k' ) {
+
+ # make major control keywords stand out
+ # (recombine.t)
+ next
+ if (
+
+ #/^(last|next|redo|return)$/
+ $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
+
+ # but only if followed by multiple lines
+ && $n < $nmax
+ );
+
+ if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
+ next
+ unless $want_break_before{ $tokens_to_go[$iend_1] };
+ }
+ }
+
+ #----------------------------------------------------------
+ # Recombine Section 3:
+ # Examine token at $ibeg_2 (left end of second line of pair)
+ #----------------------------------------------------------
+
+ # join lines identified above as capable of
+ # causing an outdented line with leading closing paren
+ # Note that we are skipping the rest of this section
+ if ($previous_outdentable_closing_paren) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ # handle lines with leading &&, ||
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
+
+ $leading_amp_count++;
+
+ # ok to recombine if it follows a ? or :
+ # and is followed by an open paren..
+ my $ok =
+ ( $is_ternary{$type_ibeg_1}
+ && $tokens_to_go[$iend_2] eq '(' )
+
+ # or is followed by a ? or : at same depth
+ #
+ # We are looking for something like this. We can
+ # recombine the && line with the line above to make the
+ # structure more clear:
+ # return
+ # exists $G->{Attr}->{V}
+ # && exists $G->{Attr}->{V}->{$u}
+ # ? %{ $G->{Attr}->{V}->{$u} }
+ # : ();
+ #
+ # We should probably leave something like this alone:
+ # return
+ # exists $G->{Attr}->{E}
+ # && exists $G->{Attr}->{E}->{$u}
+ # && exists $G->{Attr}->{E}->{$u}->{$v}
+ # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
+ # : ();
+ # so that we either have all of the &&'s (or ||'s)
+ # on one line, as in the first example, or break at
+ # each one as in the second example. However, it
+ # sometimes makes things worse to check for this because
+ # it prevents multiple recombinations. So this is not done.
+ || ( $ibeg_3 >= 0
+ && $is_ternary{ $types_to_go[$ibeg_3] }
+ && $nesting_depth_to_go[$ibeg_3] ==
+ $nesting_depth_to_go[$ibeg_2] );
+
+ next if !$ok && $want_break_before{$type_ibeg_2};
+ $forced_breakpoint_to_go[$iend_1] = 0;
+
+ # tweak the bond strength to give this joint priority
+ # over ? and :
+ $bs_tweak = 0.25;
+ }
+
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_ibeg_2 eq '?' ) {
+
+ # Do not recombine different levels
+ my $lev = $levels_to_go[$ibeg_2];
+ next if ( $lev ne $levels_to_go[$ibeg_1] );
+
+ # Do not recombine a '?' if either next line or
+ # previous line does not start with a ':'. The reasons
+ # are that (1) no alignment of the ? will be possible
+ # and (2) the expression is somewhat complex, so the
+ # '?' is harder to see in the interior of the line.
+ my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
+ my $precedes_colon =
+ $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
+ next unless ( $follows_colon || $precedes_colon );
+
+ # we will always combining a ? line following a : line
+ if ( !$follows_colon ) {
+
+ # ...otherwise recombine only if it looks like a chain.
+ # we will just look at a few nearby lines to see if
+ # this looks like a chain.
+ my $local_count = 0;
+ foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
+ $local_count++
+ if $ii >= 0
+ && $types_to_go[$ii] eq ':'
+ && $levels_to_go[$ii] == $lev;
+ }
+ next unless ( $local_count > 1 );
+ }
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ # do not recombine lines with leading '.'
+ elsif ( $type_ibeg_2 eq '.' ) {
+ my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
+ next
+ unless (
+
+ # ... unless there is just one and we can reduce
+ # this to two lines if we do. For example, this
+ #
+ #
+ # $bodyA .=
+ # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
+ #
+ # looks better than this:
+ # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
+ # . '$args .= $pat;'
+
+ (
+ $n == 2
+ && $n == $nmax
+ && $type_ibeg_1 ne $type_ibeg_2
+ )
+
+ # ... or this would strand a short quote , like this
+ # . "some long quote"
+ # . "\n";
+
+ || ( $types_to_go[$i_next_nonblank] eq 'Q'
+ && $i_next_nonblank >= $iend_2 - 1
+ && $token_lengths_to_go[$i_next_nonblank] <
+ $rOpts_short_concatenation_item_length )
+ );
+ }
+
+ # handle leading keyword..
+ elsif ( $type_ibeg_2 eq 'k' ) {
+
+ # handle leading "or"
+ if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+
+ # important: only combine a very simple or
+ # statement because the step below may have
+ # combined a trailing 'and' with this or,
+ # and we do not want to then combine
+ # everything together
+ && ( $iend_2 - $ibeg_2 <= 7 )
+ )
+ );
+##X: RT #81854
+ $forced_breakpoint_to_go[$iend_1] = 0
+ unless $old_breakpoint_to_go[$iend_1];
+ }
+
+ # handle leading 'and'
+ elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
+
+ # Decide if we will combine a single terminal 'and'
+ # after an 'if' or 'unless'.
+
+ # This looks best with the 'and' on the same
+ # line as the 'if':
+ #
+ # $a = 1
+ # if $seconds and $nu < 2;
+ #
+ # But this looks better as shown:
+ #
+ # $a = 1
+ # if !$this->{Parents}{$_}
+ # or $this->{Parents}{$_} eq $_;
+ #
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ || $tokens_to_go[$ibeg_1] eq 'or' )
+ )
+ );
+ }
+
+ # handle leading "if" and "unless"
+ elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
+
+ # FIXME: This is still experimental..may not be too useful
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+
+ # previous line begins with 'and' or 'or'
+ && $type_ibeg_1 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_1] }
+
+ );
+ }
+
+ # handle all other leading keywords
+ else {
+
+ # keywords look best at start of lines,
+ # but combine things like "1 while"
+ unless ( $is_assignment{$type_iend_1} ) {
+ next
+ if ( ( $type_iend_1 ne 'k' )
+ && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
+ }
+ }
+ }
+
+ # similar treatment of && and || as above for 'and' and 'or':
+ # NOTE: This block of code is currently bypassed because
+ # of a previous block but is retained for possible future use.
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
+
+ # maybe looking at something like:
+ # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
+
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+
+ # previous line begins with an 'if' or 'unless' keyword
+ && $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+
+ );
+ }
+
+ # handle line with leading = or similar
+ elsif ( $is_assignment{$type_ibeg_2} ) {
+ next unless ( $n == 1 || $n == $nmax );
+ next if $old_breakpoint_to_go[$iend_1];
+ next
+ unless (
+
+ # unless we can reduce this to two lines
+ $nmax == 2
+
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
+
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
+
+ # or this is a short line ending in ;
+ || ( $n == $nmax && $this_line_is_semicolon_terminated )
+ );
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ #----------------------------------------------------------
+ # Recombine Section 4:
+ # Combine the lines if we arrive here and it is possible
+ #----------------------------------------------------------
+
+ # honor hard breakpoints
+ next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
+
+ my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
+
+ # combined line cannot be too long
+ my $excess = excess_line_length( $ibeg_1, $iend_2 );
+ next if ( $excess > 0 );
+
+ # Require a few extra spaces before recombining lines if we are
+ # at an old breakpoint unless this is a simple list or terminal
+ # line. The goal is to avoid oscillating between two
+ # quasi-stable end states. For example this snippet caused
+ # problems:
+## my $this =
+## bless {
+## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
+## },
+## $type;
+ next
+ if ( $old_breakpoint_to_go[$iend_1]
+ && !$this_line_is_semicolon_terminated
+ && $n < $nmax
+ && $excess + 4 > 0
+ && $type_iend_2 ne ',' );
+
+ # do not recombine if we would skip in indentation levels
+ if ( $n < $nmax ) {
+ my $if_next = $$ri_beg[ $n + 1 ];
+ next
+ if (
+ $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
+ && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
+
+ # but an isolated 'if (' is undesirable
+ && !(
+ $n == 1
+ && $iend_1 - $ibeg_1 <= 2
+ && $type_ibeg_1 eq 'k'
+ && $tokens_to_go[$ibeg_1] eq 'if'
+ && $tokens_to_go[$iend_1] ne '('
+ )
+ );
+ }
+
+ # honor no-break's
+ next if ( $bs >= NO_BREAK - 1 );
+
+ # remember the pair with the greatest bond strength
+ if ( !$n_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
+ }
+ else {
+
+ if ( $bs > $bs_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
+ }
+ }
+ }
+
+ # recombine the pair with the greatest bond strength
+ if ($n_best) {
+ splice @$ri_beg, $n_best, 1;
+ splice @$ri_end, $n_best - 1, 1;
+ splice @joint, $n_best, 1;
- # we have 2 or more candidates, so need another pass
+ # keep going if we are still making progress
$more_to_do++;
}
}
+ return ( $ri_beg, $ri_end );
+ }
+} # end recombine_breakpoints
+
+sub break_all_chain_tokens {
+
+ # scan the current breakpoints looking for breaks at certain "chain
+ # operators" (. : && || + etc) which often occur repeatedly in a long
+ # statement. If we see a break at any one, break at all similar tokens
+ # within the same container.
+ #
+ my ( $ri_left, $ri_right ) = @_;
+
+ my %saw_chain_type;
+ my %left_chain_type;
+ my %right_chain_type;
+ my %interior_chain_type;
+ my $nmax = @$ri_right - 1;
+
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $$ri_left[$n];
+ my $ir = $$ri_right[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ $typel = '+' if ( $typel eq '-' ); # treat + and - the same
+ $typer = '+' if ( $typer eq '-' );
+ $typel = '*' if ( $typel eq '/' ); # treat * and / the same
+ $typer = '*' if ( $typer eq '/' );
+ my $tokenl = $tokens_to_go[$il];
+ my $tokenr = $tokens_to_go[$ir];
+
+ if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
+ next if ( $typel eq '?' );
+ push @{ $left_chain_type{$typel} }, $il;
+ $saw_chain_type{$typel} = 1;
+ $count++;
+ }
+ if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
+ next if ( $typer eq '?' );
+ push @{ $right_chain_type{$typer} }, $ir;
+ $saw_chain_type{$typer} = 1;
+ $count++;
+ }
+ }
+ return unless $count;
+
+ # now look for any interior tokens of the same types
+ $count = 0;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $$ri_left[$n];
+ my $ir = $$ri_right[$n];
+ for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
+ my $type = $types_to_go[$i];
+ $type = '+' if ( $type eq '-' );
+ $type = '*' if ( $type eq '/' );
+ if ( $saw_chain_type{$type} ) {
+ push @{ $interior_chain_type{$type} }, $i;
+ $count++;
+ }
+ }
+ }
+ return unless $count;
+
+ # now make a list of all new break points
+ my @insert_list;
+
+ # loop over all chain types
+ foreach my $type ( keys %saw_chain_type ) {
+
+ # quit if just ONE continuation line with leading . For example--
+ # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
+ # . $contents;
+ last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
+
+ # loop over all interior chain tokens
+ foreach my $itest ( @{ $interior_chain_type{$type} } ) {
+
+ # loop over all left end tokens of same type
+ if ( $left_chain_type{$type} ) {
+ next if $nobreak_to_go[ $itest - 1 ];
+ foreach my $i ( @{ $left_chain_type{$type} } ) {
+ next unless in_same_container( $i, $itest );
+ push @insert_list, $itest - 1;
+
+ # Break at matching ? if this : is at a different level.
+ # For example, the ? before $THRf_DEAD in the following
+ # should get a break if its : gets a break.
+ #
+ # my $flags =
+ # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
+ # : ( $_ & 4 ) ? $THRf_R_DETACHED
+ # : $THRf_R_JOINABLE;
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question > 0 ) {
+ push @insert_list, $i_question - 1;
+ }
+ }
+ last;
+ }
+ }
+
+ # loop over all right end tokens of same type
+ if ( $right_chain_type{$type} ) {
+ next if $nobreak_to_go[$itest];
+ foreach my $i ( @{ $right_chain_type{$type} } ) {
+ next unless in_same_container( $i, $itest );
+ push @insert_list, $itest;
+
+ # break at matching ? if this : is at a different level
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question >= 0 ) {
+ push @insert_list, $i_question;
+ }
+ }
+ last;
+ }
+ }
+ }
+ }
+
+ # insert any new break points
+ if (@insert_list) {
+ insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+}
+
+sub break_equals {
+
+ # Look for assignment operators that could use a breakpoint.
+ # For example, in the following snippet
+ #
+ # $HOME = $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # we could break at the = to get this, which is a little nicer:
+ # $HOME =
+ # $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # The logic here follows the logic in set_logical_padding, which
+ # will add the padding in the second line to improve alignment.
+ #
+ my ( $ri_left, $ri_right ) = @_;
+ my $nmax = @$ri_right - 1;
+ return unless ( $nmax >= 2 );
+
+ # scan the left ends of first two lines
+ my $tokbeg = "";
+ my $depth_beg;
+ for my $n ( 1 .. 2 ) {
+ my $il = $$ri_left[$n];
+ my $typel = $types_to_go[$il];
+ my $tokenl = $tokens_to_go[$il];
+
+ my $has_leading_op = ( $tokenl =~ /^\w/ )
+ ? $is_chain_operator{$tokenl} # + - * / : ? && ||
+ : $is_chain_operator{$typel}; # and, or
+ return unless ($has_leading_op);
+ if ( $n > 1 ) {
+ return
+ unless ( $tokenl eq $tokbeg
+ && $nesting_depth_to_go[$il] eq $depth_beg );
+ }
+ $tokbeg = $tokenl;
+ $depth_beg = $nesting_depth_to_go[$il];
+ }
+
+ # now look for any interior tokens of the same types
+ my $il = $$ri_left[0];
+ my $ir = $$ri_right[0];
+
+ # now make a list of all new break points
+ my @insert_list;
+ for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
+ my $type = $types_to_go[$i];
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ if ( $want_break_before{$type} ) {
+ push @insert_list, $i - 1;
+ }
+ else {
+ push @insert_list, $i;
+ }
+ }
+ }
+
+ # Break after a 'return' followed by a chain of operators
+ # return ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ # To give:
+ # return
+ # ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ my $i = 0;
+ if ( $types_to_go[$i] eq 'k'
+ && $tokens_to_go[$i] eq 'return'
+ && $ir > $il
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ push @insert_list, $i;
+ }
+
+ return unless (@insert_list);
+
+ # One final check...
+ # scan second and third lines and be sure there are no assignments
+ # we want to avoid breaking at an = to make something like this:
+ # unless ( $icon =
+ # $html_icons{"$type-$state"}
+ # or $icon = $html_icons{$type}
+ # or $icon = $html_icons{$state} )
+ for my $n ( 1 .. 2 ) {
+ my $il = $$ri_left[$n];
+ my $ir = $$ri_right[$n];
+ for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
+ my $type = $types_to_go[$i];
+ return
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg );
+ }
+ }
+
+ # ok, insert any new break point
+ if (@insert_list) {
+ insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+}
+
+sub insert_final_breaks {
+
+ my ( $ri_left, $ri_right ) = @_;
+
+ my $nmax = @$ri_right - 1;
+
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ my $i_first_colon = -1;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $$ri_left[$n];
+ my $ir = $$ri_right[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ return if ( $typel eq '?' );
+ return if ( $typer eq '?' );
+ if ( $typel eq ':' ) { $i_first_colon = $il; last; }
+ elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
+ }
+
+ # For long ternary chains,
+ # if the first : we see has its # ? is in the interior
+ # of a preceding line, then see if there are any good
+ # breakpoints before the ?.
+ if ( $i_first_colon > 0 ) {
+ my $i_question = $mate_index_to_go[$i_first_colon];
+ if ( $i_question > 0 ) {
+ my @insert_list;
+ for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ my $token = $tokens_to_go[$ii];
+ my $type = $types_to_go[$ii];
+
+ # For now, a good break is either a comma or a 'return'.
+ if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
+ && in_same_container( $ii, $i_question ) )
+ {
+ push @insert_list, $ii;
+ last;
+ }
+ }
+
+ # insert any new break points
+ if (@insert_list) {
+ insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ }
+ }
+}
+
+sub in_same_container {
+
+ # check to see if tokens at i1 and i2 are in the
+ # same container, and not separated by a comma, ? or :
+ my ( $i1, $i2 ) = @_;
+ my $type = $types_to_go[$i1];
+ my $depth = $nesting_depth_to_go[$i1];
+ return unless ( $nesting_depth_to_go[$i2] == $depth );
+ if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
- # recombine the pair with the greatest bond strength
- if ($n_best) {
- splice @$ri_first, $n_best, 1;
- splice @$ri_last, $n_best - 1, 1;
+ ###########################################################
+ # This is potentially a very slow routine and not critical.
+ # For safety just give up for large differences.
+ # See test file 'infinite_loop.txt'
+ # TODO: replace this loop with a data structure
+ ###########################################################
+ return if ( $i2 - $i1 > 200 );
+
+ for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
+ next if ( $nesting_depth_to_go[$i] > $depth );
+ return if ( $nesting_depth_to_go[$i] < $depth );
+
+ my $tok = $tokens_to_go[$i];
+ $tok = ',' if $tok eq '=>'; # treat => same as ,
+
+ # Example: we would not want to break at any of these .'s
+ # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
+ if ( $type ne ':' ) {
+ return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
+ }
+ else {
+ return if ( $tok =~ /^[\,]$/ );
}
}
- return ( $ri_first, $ri_last );
+ return 1;
}
sub set_continuation_breaks {
# Define an array of indexes for inserting newline characters to
# keep the line lengths below the maximum desired length. There is
# an implied break after the last token, so it need not be included.
- # We'll break at points where the bond strength is lowest.
+
+ # Method:
+ # This routine is part of series of routines which adjust line
+ # lengths. It is only called if a statement is longer than the
+ # maximum line length, or if a preliminary scanning located
+ # desirable break points. Sub scan_list has already looked at
+ # these tokens and set breakpoints (in array
+ # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
+ # after commas, after opening parens, and before closing parens).
+ # This routine will honor these breakpoints and also add additional
+ # breakpoints as necessary to keep the line length below the maximum
+ # requested. It bases its decision on where the 'bond strength' is
+ # lowest.
+
+ # Output: returns references to the arrays:
+ # @i_first
+ # @i_last
+ # which contain the indexes $i of the first and last tokens on each
+ # line.
+
+ # In addition, the array:
+ # $forced_breakpoint_to_go[$i]
+ # may be updated to be =1 for any index $i after which there must be
+ # a break. This signals later routines not to undo the breakpoint.
my $saw_good_break = shift;
my @i_first = (); # the first index to output
my $imax = $max_index_to_go;
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- my $i_begin = $imin;
+ my $i_begin = $imin; # index for starting next iteration
my $leading_spaces = leading_spaces_to_go($imin);
my $line_count = 0;
# see if any ?/:'s are in order
my $colons_in_order = 1;
my $last_tok = "";
- my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
+ my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ];
+ my $colon_count = @colon_list;
foreach (@colon_list) {
if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
$last_tok = $_;
# This is a sufficient but not necessary condition for colon chain
my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
+ #-------------------------------------------------------
+ # BEGINNING of main loop to set continuation breakpoints
+ # Keep iterating until we reach the end
+ #-------------------------------------------------------
while ( $i_begin <= $imax ) {
my $lowest_strength = NO_BREAK;
- my $starting_sum = $lengths_to_go[$i_begin];
+ my $starting_sum = $summed_lengths_to_go[$i_begin];
my $i_lowest = -1;
my $i_test = -1;
my $lowest_next_token = '';
my $lowest_next_type = 'b';
my $i_lowest_next_nonblank = -1;
- # loop to find next break point
+ #-------------------------------------------------------
+ # BEGINNING of inner loop to find the best next breakpoint
+ #-------------------------------------------------------
for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
- my $type = $types_to_go[$i_test];
- my $token = $tokens_to_go[$i_test];
- my $next_type = $types_to_go[ $i_test + 1 ];
- my $next_token = $tokens_to_go[ $i_test + 1 ];
- my $i_next_nonblank =
- ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
+ my $type = $types_to_go[$i_test];
+ my $token = $tokens_to_go[$i_test];
+ my $next_type = $types_to_go[ $i_test + 1 ];
+ my $next_token = $tokens_to_go[ $i_test + 1 ];
+ my $i_next_nonblank = $inext_to_go[$i_test];
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
my $strength = $bond_strength_to_go[$i_test];
- my $must_break = 0;
+ my $maximum_line_length = maximum_line_length($i_begin);
+
+ # use old breaks as a tie-breaker. For example to
+ # prevent blinkers with -pbp in this code:
- # FIXME: TESTING: Might want to be able to break after these
- # force an immediate break at certain operators
- # with lower level than the start of the line
+##@keywords{
+## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
+## = ();
+
+ # At the same time try to prevent a leading * in this code
+ # with the default formatting:
+ #
+## return
+## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
+## * ( $x**( $a - 1 ) )
+## * ( ( 1 - $x )**( $b - 1 ) );
+
+ # reduce strength a bit to break ties at an old breakpoint ...
+ if (
+ $old_breakpoint_to_go[$i_test]
+
+ # which is a 'good' breakpoint, meaning ...
+ # we don't want to break before it
+ && !$want_break_before{$type}
+
+ # and either we want to break before the next token
+ # or the next token is not short (i.e. not a '*', '/' etc.)
+ && $i_next_nonblank <= $imax
+ && ( $want_break_before{$next_nonblank_type}
+ || $token_lengths_to_go[$i_next_nonblank] > 2
+ || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
+ )
+ {
+ $strength -= $tiny_bias;
+ }
+
+ # otherwise increase strength a bit if this token would be at the
+ # maximum line length. This is necessary to avoid blinking
+ # in the above example when the -iob flag is added.
+ else {
+ my $len =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 1 ] -
+ $starting_sum;
+ if ( $len >= $maximum_line_length ) {
+ $strength += $tiny_bias;
+ }
+ }
+
+ my $must_break = 0;
+
+ # Force an immediate break at certain operators
+ # with lower level than the start of the line,
+ # unless we've already seen a better break.
+ #
+ ##############################################
+ # Note on an issue with a preceding ?
+ ##############################################
+ # We don't include a ? in the above list, but there may
+ # be a break at a previous ? if the line is long.
+ # Because of this we do not want to force a break if
+ # there is a previous ? on this line. For now the best way
+ # to do this is to not break if we have seen a lower strength
+ # point, which is probably a ?.
+ #
+ # Example of unwanted breaks we are avoiding at a '.' following a ?
+ # from pod2html using perltidy -gnu:
+ # )
+ # ? "\n<A NAME=\""
+ # . $value
+ # . "\">\n$text</A>\n"
+ # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
if (
(
$next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
)
&& ( $nesting_depth_to_go[$i_begin] >
$nesting_depth_to_go[$i_next_nonblank] )
+ && ( $strength <= $lowest_strength )
)
{
set_forced_breakpoint($i_next_nonblank);
# break between ) { in a continued line so that the '{' can
# be outdented
# See similar logic in scan_list which catches instances
- # where a line is just something like ') {'
- || ( $line_count
+ # where a line is just something like ') {'. We have to
+ # be careful because the corresponding block keyword might
+ # not be on the first line, such as 'for' here:
+ #
+ # eval {
+ # for ("a") {
+ # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
+ # }
+ # };
+ #
+ || (
+ $line_count
&& ( $token eq ')' )
&& ( $next_nonblank_type eq '{' )
&& ($next_nonblank_block_type)
- && !$rOpts->{'opening-brace-always-on-right'} )
+ && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
+
+ # RT #104427: Dont break before opening sub brace because
+ # sub block breaks handled at higher level, unless
+ # it looks like the preceeding list is long and broken
+ && !(
+ $next_nonblank_block_type =~ /^sub\b/
+ && ( $nesting_depth_to_go[$i_begin] ==
+ $nesting_depth_to_go[$i_next_nonblank] )
+ )
+
+ && !$rOpts->{'opening-brace-always-on-right'}
+ )
# There is an implied forced break at a terminal opening brace
|| ( ( $type eq '{' ) && ( $i_test == $imax ) )
-
)
{
# Forced breakpoints must sometimes be overridden, for example
# because of a side comment causing a NO_BREAK. It is easier
# to catch this here than when they are set.
- if ( $strength < NO_BREAK ) {
+ if ( $strength < NO_BREAK - 1 ) {
$strength = $lowest_strength - $tiny_bias;
$must_break = 1;
}
&& ( $next_nonblank_type =~ /^[\;\,]$/ )
&& (
(
- $leading_spaces + $lengths_to_go[ $i_next_nonblank + 1 ]
- - $starting_sum
- ) > $rOpts_maximum_line_length
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
+ $starting_sum
+ ) > $maximum_line_length
)
)
{
# Avoid a break which would strand a single punctuation
# token. For example, we do not want to strand a leading
# '.' which is followed by a long quoted string.
+ # But note that we do want to do this with -extrude (l=1)
+ # so please test any changes to this code on -extrude.
if (
!$must_break
&& ( $i_test == $i_begin )
&& ( $token eq $type )
&& (
(
- $leading_spaces + $lengths_to_go[ $i_test + 1 ] -
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 1 ] -
$starting_sum
- ) <= $rOpts_maximum_line_length
+ ) < $maximum_line_length
)
)
{
- $i_test++;
-
- if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
- $i_test++;
- }
+ $i_test = min( $imax, $inext_to_go[$i_test] );
redo;
}
# break It is only called if a breakpoint is required or
# desired. This will probably need some adjustments
# over time. A goal is to try to be sure that, if a new
- # side comment is introduced into formated text, then
+ # side comment is introduced into formatted text, then
# the same breakpoints will occur. scbreak.t
last
if (
- $i_test == $imax # we are at the end
- && !$forced_breakpoint_count #
- && $saw_good_break # old line had good break
- && $type =~ /^[#;\{]$/ # and this line ends in
- # ';' or side comment
- && $i_last_break < 0 # and we haven't made a break
- && $i_lowest > 0 # and we saw a possible break
- && $i_lowest < $imax - 1 # (but not just before this ;)
+ $i_test == $imax # we are at the end
+ && !$forced_breakpoint_count #
+ && $saw_good_break # old line had good break
+ && $type =~ /^[#;\{]$/ # and this line ends in
+ # ';' or side comment
+ && $i_last_break < 0 # and we haven't made a break
+ && $i_lowest >= 0 # and we saw a possible break
+ && $i_lowest < $imax - 1 # (but not just before this ;)
&& $strength - $lowest_strength < 0.5 * WEAK # and it's good
);
+ # Do not skip past an important break point in a short final
+ # segment. For example, without this check we would miss the
+ # break at the final / in the following code:
+ #
+ # $depth_stop =
+ # ( $tau * $mass_pellet * $q_0 *
+ # ( 1. - exp( -$t_stop / $tau ) ) -
+ # 4. * $pi * $factor * $k_ice *
+ # ( $t_melt - $t_ice ) *
+ # $r_pellet *
+ # $t_stop ) /
+ # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
+ #
+ if ( $line_count > 2
+ && $i_lowest < $i_test
+ && $i_test > $imax - 2
+ && $nesting_depth_to_go[$i_begin] >
+ $nesting_depth_to_go[$i_lowest]
+ && $lowest_strength < $last_break_strength - .5 * WEAK )
+ {
+ # Make this break for math operators for now
+ my $ir = $inext_to_go[$i_lowest];
+ my $il = $iprev_to_go[$ir];
+ last
+ if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
+ || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
+ }
+
+ # Update the minimum bond strength location
$lowest_strength = $strength;
$i_lowest = $i_test;
$lowest_next_token = $next_nonblank_token;
# set flags to remember if a break here will produce a
# leading alignment of certain common tokens
- if (
- $line_count > 0
+ if ( $line_count > 0
&& $i_test < $imax
&& ( $lowest_strength - $last_break_strength <= $max_bias )
- && ( $nesting_depth_to_go[$i_begin] >=
- $nesting_depth_to_go[$i_next_nonblank] )
- && (
- (
- $types_to_go[$i_begin] =~ /^(\.|\&\&|\|\||:)$/
- && $types_to_go[$i_begin] eq $next_nonblank_type
- )
- || ( $tokens_to_go[$i_begin] =~ /^(and|or)$/
- && $tokens_to_go[$i_begin] eq $next_nonblank_token )
- )
)
{
- $leading_alignment_token = $next_nonblank_token;
- $leading_alignment_type = $next_nonblank_type;
+ my $i_last_end = $iprev_to_go[$i_begin];
+ my $tok_beg = $tokens_to_go[$i_begin];
+ my $type_beg = $types_to_go[$i_begin];
+ if (
+
+ # check for leading alignment of certain tokens
+ (
+ $tok_beg eq $next_nonblank_token
+ && $is_chain_operator{$tok_beg}
+ && ( $type_beg eq 'k'
+ || $type_beg eq $tok_beg )
+ && $nesting_depth_to_go[$i_begin] >=
+ $nesting_depth_to_go[$i_next_nonblank]
+ )
+
+ || ( $tokens_to_go[$i_last_end] eq $token
+ && $is_chain_operator{$token}
+ && ( $type eq 'k' || $type eq $token )
+ && $nesting_depth_to_go[$i_last_end] >=
+ $nesting_depth_to_go[$i_test] )
+ )
+ {
+ $leading_alignment_token = $next_nonblank_token;
+ $leading_alignment_type = $next_nonblank_type;
+ }
}
}
- my $too_long =
- ( $i_test >= $imax )
- ? 1
- : (
- (
- $leading_spaces + $lengths_to_go[ $i_test + 2 ] -
- $starting_sum
- ) > $rOpts_maximum_line_length
- );
+ my $too_long = ( $i_test >= $imax );
+ if ( !$too_long ) {
+ my $next_length =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 2 ] -
+ $starting_sum;
+ $too_long = $next_length > $maximum_line_length;
+
+ # To prevent blinkers we will avoid leaving a token exactly at
+ # the line length limit unless it is the last token or one of
+ # several "good" types.
+ #
+ # The following code was a blinker with -pbp before this
+ # modification:
+## $last_nonblank_token eq '('
+## && $is_indirect_object_taker{ $paren_type
+## [$paren_depth] }
+ # The issue causing the problem is that if the
+ # term [$paren_depth] gets broken across a line then
+ # the whitespace routine doesn't see both opening and closing
+ # brackets and will format like '[ $paren_depth ]'. This
+ # leads to an oscillation in length depending if we break
+ # before the closing bracket or not.
+ if ( !$too_long
+ && $i_test + 1 < $imax
+ && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
+ {
+ $too_long = $next_length >= $maximum_line_length;
+ }
+ }
FORMATTER_DEBUG_FLAG_BREAK
- && print
-"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
+ && do {
+ my $ltok = $token;
+ my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
+ my $i_testp2 = $i_test + 2;
+ if ( $i_testp2 > $max_index_to_go + 1 ) {
+ $i_testp2 = $max_index_to_go + 1;
+ }
+ if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
+ if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
+ print STDOUT
+"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
+ };
# allow one extra terminal token after exceeding line length
# if it would strand this token.
if ( $rOpts_fuzzy_line_length
&& $too_long
- && ( $i_lowest == $i_test )
- && ( length($token) > 1 )
- && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
+ && $i_lowest == $i_test
+ && $token_lengths_to_go[$i_test] > 1
+ && $next_nonblank_type =~ /^[\;\,]$/ )
{
$too_long = 0;
}
);
}
+ #-------------------------------------------------------
+ # END of inner loop to find the best next breakpoint
+ # Now decide exactly where to put the breakpoint
+ #-------------------------------------------------------
+
# it's always ok to break at imax if no other break was found
if ( $i_lowest < 0 ) { $i_lowest = $imax }
# semi-final index calculation
- my $i_next_nonblank = (
- ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
- ? $i_lowest + 2
- : $i_lowest + 1
- );
+ my $i_next_nonblank = $inext_to_go[$i_lowest];
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
last;
}
+ #-------------------------------------------------------
+ # END of inner loop to find the best next breakpoint:
+ # Break the line after the token with index i=$i_lowest
+ #-------------------------------------------------------
+
# final index calculation
- $i_next_nonblank = (
- ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
- ? $i_lowest + 2
- : $i_lowest + 1
- );
+ $i_next_nonblank = $inext_to_go[$i_lowest];
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
FORMATTER_DEBUG_FLAG_BREAK
- && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
+ && print STDOUT
+ "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
#-------------------------------------------------------
# ?/: rule 2 : if we break at a '?', then break at its ':'
}
}
+ #-------------------------------------------------------
+ # END of main loop to set continuation breakpoints
+ # Now go back and make any necessary corrections
+ #-------------------------------------------------------
+
#-------------------------------------------------------
# ?/: rule 4 -- if we broke at a ':', then break at
# corresponding '?' unless this is a chain of ?: expressions
my $i_question = $mate_index_to_go[$_];
if ( $i_question >= 0 ) {
if ( $want_break_before{'?'} ) {
- $i_question--;
- if ( $i_question > 0
- && $types_to_go[$i_question] eq 'b' )
- {
- $i_question--;
- }
+ $i_question = $iprev_to_go[$i_question];
}
if ( $i_question >= 0 ) {
}
}
}
- return \@i_first, \@i_last;
+ return ( \@i_first, \@i_last, $colon_count );
}
sub insert_additional_breaks {
my $i_l;
my $line_number = 0;
my $i_break_left;
- foreach $i_break_left ( sort @$ri_break_list ) {
+ foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
$i_f = $$ri_first[$line_number];
$i_l = $$ri_last[$line_number];
$i_l = $$ri_last[$line_number];
}
- my $i_break_right = $i_break_left + 1;
- if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
+ # Do not leave a blank at the end of a line; back up if necessary
+ if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
+ my $i_break_right = $inext_to_go[$i_break_left];
if ( $i_break_left >= $i_f
&& $i_break_left < $i_l
&& $i_break_right > $i_f
}
}
-# check to see if output line tabbing agrees with input line
-# this can be very useful for debugging a script which has an extra
-# or missing brace
sub compare_indentation_levels {
- my ( $python_indentation_level, $structural_indentation_level ) = @_;
- if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
+ # check to see if output line tabbing agrees with input line
+ # this can be very useful for debugging a script which has an extra
+ # or missing brace
+ my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
+ if ( $guessed_indentation_level ne $structural_indentation_level ) {
$last_tabbing_disagreement = $input_line_number;
if ($in_tabbing_disagreement) {
if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
write_logfile_entry(
-"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
+"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
);
}
$in_tabbing_disagreement = $input_line_number;
$ci_level, $available_spaces, $index,
$gnu_sequence_number, $align_paren, $stack_depth,
$starting_index,
- )
- = @_;
+ ) = @_;
my $closed = -1;
my $arrow_count = 0;
my $comma_count = 0;
my ( $item, $spaces_needed ) = @_;
my $available_spaces = $item->get_AVAILABLE_SPACES();
- my $deleted_spaces =
+ my $deleted_spaces =
( $available_spaces > $spaces_needed )
? $spaces_needed
: $available_spaces;
# caller.
my ( $item, $spaces_needed ) = @_;
my $available_spaces = $item->get_AVAILABLE_SPACES();
- my $deleted_spaces =
+ my $deleted_spaces =
( $available_spaces > $spaces_needed )
? $spaces_needed
: $available_spaces;
# attempts to line up certain common tokens, such as => and #, which are
# identified by the calling routine.
#
-# There are two main routines: append_line and flush. Append acts as a
+# There are two main routines: valign_input and flush. Append acts as a
# storage buffer, collecting lines into a group which can be vertically
# aligned. When alignment is no longer possible or desirable, it dumps
# the group to flush.
#
-# append_line -----> flush
+# valign_input -----> flush
#
# collects writes
# vertical one
use constant VALIGN_DEBUG_FLAG_APPEND => 0;
use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
+ use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
+ use constant VALIGN_DEBUG_FLAG_TABS => 0;
my $debug_warning = sub {
- print "VALIGN_DEBUGGING with key $_[0]\n";
+ print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
};
VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
+ VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
+ VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS');
}
$group_type
$group_maximum_gap
$marginal_match
- $last_group_level_written
+ $last_level_written
$last_leading_space_count
$extra_indent_ok
$zero_count
$file_writer_object
@side_comment_history
$comment_leading_space_count
+ $is_matching_terminal_line
+ $consecutive_block_comments
$cached_line_text
$cached_line_type
$cached_line_flag
$cached_seqno
$cached_line_valid
+ $cached_line_leading_space_count
+ $cached_seqno_string
+
+ $valign_buffer_filling
+ @valign_buffer
+
+ $seqno_string
+ $last_nonblank_seqno_string
$rOpts
$rOpts_maximum_line_length
+ $rOpts_variable_maximum_line_length
$rOpts_continuation_indentation
$rOpts_indent_columns
$rOpts_tabs
$rOpts_entab_leading_whitespace
+ $rOpts_valign
+ $rOpts_fixed_position_side_comment
$rOpts_minimum_space_to_comment
);
= @_;
# variables describing the entire space group:
-
$ralignment_list = [];
$group_level = 0;
- $last_group_level_written = -1;
+ $last_level_written = -1;
$extra_indent_ok = 0; # can we move all lines to the right?
$last_side_comment_length = 0;
$maximum_jmax_seen = 0;
$last_outdented_line_at = 0;
$last_side_comment_line_number = 0;
$last_side_comment_level = -1;
+ $is_matching_terminal_line = 0;
# most recent 3 side comments; [ line number, column ]
$side_comment_history[0] = [ -300, 0 ];
$side_comment_history[1] = [ -200, 0 ];
$side_comment_history[2] = [ -100, 0 ];
- # write_leader_and_string cache:
- $cached_line_text = "";
- $cached_line_type = 0;
- $cached_line_flag = 0;
- $cached_seqno = 0;
- $cached_line_valid = 0;
+ # valign_output_step_B cache:
+ $cached_line_text = "";
+ $cached_line_type = 0;
+ $cached_line_flag = 0;
+ $cached_seqno = 0;
+ $cached_line_valid = 0;
+ $cached_line_leading_space_count = 0;
+ $cached_seqno_string = "";
+
+ # string of sequence numbers joined together
+ $seqno_string = "";
+ $last_nonblank_seqno_string = "";
# frequently used parameters
$rOpts_indent_columns = $rOpts->{'indent-columns'};
$rOpts_tabs = $rOpts->{'tabs'};
$rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
+ $rOpts_fixed_position_side_comment =
+ $rOpts->{'fixed-position-side-comment'};
$rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
+ $rOpts_variable_maximum_line_length =
+ $rOpts->{'variable-maximum-line-length'};
+ $rOpts_valign = $rOpts->{'valign'};
+ $consecutive_block_comments = 0;
forget_side_comment();
initialize_for_new_group();
}
sub dump_alignments {
- print
+ print STDOUT
"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
for my $i ( 0 .. $maximum_alignment_index ) {
my $column = $ralignment_list->[$i]->get_column();
my $matching_token = $ralignment_list->[$i]->get_matching_token();
my $starting_line = $ralignment_list->[$i]->get_starting_line();
my $ending_line = $ralignment_list->[$i]->get_ending_line();
- print
+ print STDOUT
"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
}
}
$last_comment_column = 0;
}
-sub append_line {
+sub maximum_line_length_for_level {
+
+ # return maximum line length for line starting with a given level
+ my $maximum_line_length = $rOpts_maximum_line_length;
+ if ($rOpts_variable_maximum_line_length) {
+ my $level = shift;
+ if ( $level < 0 ) { $level = 0 }
+ $maximum_line_length += $level * $rOpts_indent_columns;
+ }
+ return $maximum_line_length;
+}
+
+sub valign_input {
- # sub append is called to place one line in the current vertical group.
+ # Place one line in the current vertical group.
#
# The input parameters are:
# $level = indentation level of this line
# first one is always at zero. The interior columns are at the start of
# the matching tokens, and the last one tracks the maximum line length.
#
- # Basically, each time a new line comes in, it joins the current vertical
+ # Each time a new line comes in, it joins the current vertical
# group if possible. Otherwise it causes the current group to be dumped
# and a new group is started.
#
# The log file warns the user if there are any such tabs.
my (
- $level, $level_end,
- $indentation, $rfields,
- $rtokens, $rpatterns,
- $is_forced_break, $outdent_long_lines,
- $is_terminal_statement, $do_not_pad,
- $rvertical_tightness_flags, $level_jump,
- )
- = @_;
+ $level, $level_end,
+ $indentation, $rfields,
+ $rtokens, $rpatterns,
+ $is_forced_break, $outdent_long_lines,
+ $is_terminal_ternary, $is_terminal_statement,
+ $do_not_pad, $rvertical_tightness_flags,
+ $level_jump,
+ ) = @_;
# number of fields is $jmax
# number of tokens between fields is $jmax-1
my $jmax = $#{$rfields};
- $previous_minimum_jmax_seen = $minimum_jmax_seen;
- $previous_maximum_jmax_seen = $maximum_jmax_seen;
my $leading_space_count = get_SPACES($indentation);
( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
$is_outdented = 0 if $is_hanging_side_comment;
+ # Forget side comment alignment after seeing 2 or more block comments
+ my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
+ if ($is_block_comment) {
+ $consecutive_block_comments++;
+ }
+ else {
+ if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
+ $consecutive_block_comments = 0;
+ }
+
VALIGN_DEBUG_FLAG_APPEND0 && do {
- print
+ print STDOUT
"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
};
if ($rvertical_tightness_flags) {
if ( $maximum_line_index <= 0
&& $cached_line_type
+ && $cached_seqno
+ && $rvertical_tightness_flags->[2]
&& $rvertical_tightness_flags->[2] == $cached_seqno )
{
$rvertical_tightness_flags->[3] ||= 1;
- $cached_line_valid ||= 1;
+ $cached_line_valid ||= 1;
}
}
if ( $level < 0 ) { $level = 0 }
# do not align code across indentation level changes
- if ( $level != $group_level || $is_outdented ) {
+ # or if vertical alignment is turned off for debugging
+ if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
# we are allowed to shift a group of lines to the right if its
# level is greater than the previous and next group
$extra_indent_ok =
- ( $level < $group_level && $last_group_level_written < $group_level );
+ ( $level < $group_level && $last_level_written < $group_level );
my_flush();
# Patch to collect outdentable block COMMENTS
# --------------------------------------------------------------------
my $is_blank_line = "";
- my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
if ( $group_type eq 'COMMENT' ) {
if (
(
}
}
+ # --------------------------------------------------------------------
+ # add dummy fields for terminal ternary
+ # --------------------------------------------------------------------
+ my $j_terminal_match;
+ if ( $is_terminal_ternary && $current_line ) {
+ $j_terminal_match =
+ fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
+ $jmax = @{$rfields} - 1;
+ }
+
+ # --------------------------------------------------------------------
+ # add dummy fields for else statement
+ # --------------------------------------------------------------------
+ if ( $rfields->[0] =~ /^else\s*$/
+ && $current_line
+ && $level_jump == 0 )
+ {
+ $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
+ $jmax = @{$rfields} - 1;
+ }
+
# --------------------------------------------------------------------
# Step 1. Handle simple line of code with no fields to match.
# --------------------------------------------------------------------
# and no space recovery is needed.
if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
{
- write_leader_and_string( $leading_space_count, $$rfields[0], 0,
- $outdent_long_lines, $rvertical_tightness_flags );
+ valign_output_step_B( $leading_space_count, $$rfields[0], 0,
+ $outdent_long_lines, $rvertical_tightness_flags, $level );
return;
}
}
outdent_long_lines => $outdent_long_lines,
list_type => "",
is_hanging_side_comment => $is_hanging_side_comment,
- maximum_line_length => $rOpts->{'maximum-line-length'},
+ maximum_line_length => maximum_line_length_for_level($level),
rvertical_tightness_flags => $rvertical_tightness_flags,
);
+ # Initialize a global flag saying if the last line of the group should
+ # match end of group and also terminate the group. There should be no
+ # returns between here and where the flag is handled at the bottom.
+ my $col_matching_terminal = 0;
+ if ( defined($j_terminal_match) ) {
+
+ # remember the column of the terminal ? or { to match with
+ $col_matching_terminal = $current_line->get_column($j_terminal_match);
+
+ # set global flag for sub decide_if_aligned
+ $is_matching_terminal_line = 1;
+ }
+
# --------------------------------------------------------------------
# It simplifies things to create a zero length side comment
# if none exists.
# --------------------------------------------------------------------
# Append this line to the current group (or start new group)
# --------------------------------------------------------------------
- accept_line($new_line);
+ add_to_group($new_line);
# Future update to allow this to vary:
$current_line = $new_line if ( $maximum_line_index == 0 );
+ # output this group if it ends in a terminal else or ternary line
+ if ( defined($j_terminal_match) ) {
+
+ # if there is only one line in the group (maybe due to failure to match
+ # perfectly with previous lines), then align the ? or { of this
+ # terminal line with the previous one unless that would make the line
+ # too long
+ if ( $maximum_line_index == 0 ) {
+ my $col_now = $current_line->get_column($j_terminal_match);
+ my $pad = $col_matching_terminal - $col_now;
+ my $padding_available =
+ $current_line->get_available_space_on_right();
+ if ( $pad > 0 && $pad <= $padding_available ) {
+ $current_line->increase_field_width( $j_terminal_match, $pad );
+ }
+ }
+ my_flush();
+ $is_matching_terminal_line = 0;
+ }
+
# --------------------------------------------------------------------
# Step 8. Some old debugging stuff
# --------------------------------------------------------------------
VALIGN_DEBUG_FLAG_APPEND && do {
- print "APPEND fields:";
+ print STDOUT "APPEND fields:";
dump_array(@$rfields);
- print "APPEND tokens:";
+ print STDOUT "APPEND tokens:";
dump_array(@$rtokens);
- print "APPEND patterns:";
+ print STDOUT "APPEND patterns:";
dump_array(@$rpatterns);
dump_alignments();
};
+
+ return;
}
sub join_hanging_comment {
my $old_line = shift;
my $maximum_field_index = $old_line->get_jmax();
+ ###############################################
# this line must have fewer fields
return unless $maximum_field_index > $jmax;
+ ###############################################
# Identify specific cases where field elimination is allowed:
# case=1: both lines have comma-separated lists, and the first
my $case = 1;
# See if case 2: both lines have leading '='
- # We'll require smiliar leading patterns in this case
+ # We'll require similar leading patterns in this case
my $old_rtokens = $old_line->get_rtokens();
my $rtokens = $new_line->get_rtokens();
my $rpatterns = $new_line->get_rpatterns();
my $old_rpatterns = $old_line->get_rpatterns();
if ( $rtokens->[0] =~ /^=\d*$/
- && $old_rtokens->[0] eq $rtokens->[0]
+ && $old_rtokens->[0] eq $rtokens->[0]
&& $old_rpatterns->[0] eq $rpatterns->[0] )
{
$case = 2;
sub eliminate_new_fields {
return unless ( $maximum_line_index >= 0 );
- my $new_line = shift;
- my $old_line = shift;
- my $jmax = $new_line->get_jmax();
+ my ( $new_line, $old_line ) = @_;
+ my $jmax = $new_line->get_jmax();
- my $old_rtokens = $old_line->get_rtokens();
- my $rtokens = $new_line->get_rtokens();
+ my $old_rtokens = $old_line->get_rtokens();
+ my $rtokens = $new_line->get_rtokens();
my $is_assignment =
( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
my $rpatterns = $new_line->get_rpatterns();
my $old_rpatterns = $old_line->get_rpatterns();
- # loop over all old tokens except comment
+ # loop over all OLD tokens except comment and check match
my $match = 1;
my $k;
for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
}
}
- # first tokens agree, so combine new tokens
+ # first tokens agree, so combine extra new tokens
if ($match) {
for $k ( $maximum_field_index .. $jmax - 1 ) {
$new_line->set_jmax($jmax);
}
-sub check_match {
+sub fix_terminal_ternary {
- my $new_line = shift;
- my $old_line = shift;
+ # Add empty fields as necessary to align a ternary term
+ # like this:
+ #
+ # my $leapyear =
+ # $year % 4 ? 0
+ # : $year % 100 ? 1
+ # : $year % 400 ? 0
+ # : 1;
+ #
+ # returns 1 if the terminal item should be indented
+
+ my ( $rfields, $rtokens, $rpatterns ) = @_;
+
+ my $jmax = @{$rfields} - 1;
+ my $old_line = $group_lines[$maximum_line_index];
+ my $rfields_old = $old_line->get_rfields();
- my $jmax = $new_line->get_jmax();
+ my $rpatterns_old = $old_line->get_rpatterns();
+ my $rtokens_old = $old_line->get_rtokens();
my $maximum_field_index = $old_line->get_jmax();
- # flush if this line has too many fields
- if ( $jmax > $maximum_field_index ) { my_flush(); return }
+ # look for the question mark after the :
+ my ($jquestion);
+ my $depth_question;
+ my $pad = "";
+ for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
+ my $tok = $rtokens_old->[$j];
+ if ( $tok =~ /^\?(\d+)$/ ) {
+ $depth_question = $1;
- # flush if adding this line would make a non-monotonic field count
- if (
- ( $maximum_field_index > $jmax ) # this has too few fields
- && (
- ( $previous_minimum_jmax_seen < $jmax ) # and wouldn't be monotonic
- || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
- )
- )
- {
- my_flush();
- return;
+ # depth must be correct
+ next unless ( $depth_question eq $group_level );
+
+ $jquestion = $j;
+ if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
+ $pad = " " x length($1);
+ }
+ else {
+ return; # shouldn't happen
+ }
+ last;
+ }
}
+ return unless ( defined($jquestion) ); # shouldn't happen
- # otherwise append this line if everything matches
- my $jmax_original_line = $new_line->get_jmax_original_line();
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
- my $rtokens = $new_line->get_rtokens();
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- my $list_type = $new_line->get_list_type();
+ # Now splice the tokens and patterns of the previous line
+ # into the else line to insure a match. Add empty fields
+ # as necessary.
+ my $jadd = $jquestion;
+
+ # Work on copies of the actual arrays in case we have
+ # to return due to an error
+ my @fields = @{$rfields};
+ my @patterns = @{$rpatterns};
+ my @tokens = @{$rtokens};
+
+ VALIGN_DEBUG_FLAG_TERNARY && do {
+ local $" = '><';
+ print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
+ print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
+ print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
+ print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
+ print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
+ print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
+ };
+
+ # handle cases of leading colon on this line
+ if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
+
+ my ( $colon, $therest ) = ( $1, $2 );
+
+ # Handle sub-case of first field with leading colon plus additional code
+ # This is the usual situation as at the '1' below:
+ # ...
+ # : $year % 400 ? 0
+ # : 1;
+ if ($therest) {
+
+ # Split the first field after the leading colon and insert padding.
+ # Note that this padding will remain even if the terminal value goes
+ # out on a separate line. This does not seem to look to bad, so no
+ # mechanism has been included to undo it.
+ my $field1 = shift @fields;
+ unshift @fields, ( $colon, $pad . $therest );
- my $group_list_type = $old_line->get_list_type();
- my $old_rpatterns = $old_line->get_rpatterns();
- my $old_rtokens = $old_line->get_rtokens();
+ # change the leading pattern from : to ?
+ return unless ( $patterns[0] =~ s/^\:/?/ );
- my $jlimit = $jmax - 1;
- if ( $maximum_field_index > $jmax ) {
- $jlimit = $jmax_original_line;
- --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
+ # install leading tokens and patterns of existing line
+ unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
+ unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
+
+ # insert appropriate number of empty fields
+ splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+ }
+
+ # handle sub-case of first field just equal to leading colon.
+ # This can happen for example in the example below where
+ # the leading '(' would create a new alignment token
+ # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
+ # : ( $mname = $name . '->' );
+ else {
+
+ return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
+
+ # prepend a leading ? onto the second pattern
+ $patterns[1] = "?b" . $patterns[1];
+
+ # pad the second field
+ $fields[1] = $pad . $fields[1];
+
+ # install leading tokens and patterns of existing line, replacing
+ # leading token and inserting appropriate number of empty fields
+ splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
+ splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
+ splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+ }
}
- my $everything_matches = 1;
+ # Handle case of no leading colon on this line. This will
+ # be the case when -wba=':' is used. For example,
+ # $year % 400 ? 0 :
+ # 1;
+ else {
- # common list types always match
- unless ( ( $group_list_type && ( $list_type eq $group_list_type ) )
- || $is_hanging_side_comment )
- {
+ # install leading tokens and patterns of existing line
+ $patterns[0] = '?' . 'b' . $patterns[0];
+ unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
+ unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
- my $leading_space_count = $new_line->get_leading_space_count();
- my $saw_equals = 0;
- for my $j ( 0 .. $jlimit ) {
- my $match = 1;
+ # insert appropriate number of empty fields
+ $jadd = $jquestion + 1;
+ $fields[0] = $pad . $fields[0];
+ splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
+ }
- my $old_tok = $$old_rtokens[$j];
- my $new_tok = $$rtokens[$j];
+ VALIGN_DEBUG_FLAG_TERNARY && do {
+ local $" = '><';
+ print STDOUT "MODIFIED TOKENS=<@tokens>\n";
+ print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
+ print STDOUT "MODIFIED FIELDS=<@fields>\n";
+ };
- # dumb down the match after an equals
- if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
- $new_tok = $1;
- $old_tok =~ s/\+.*$//;
- }
- if ( $new_tok =~ /^=\d*$/ ) { $saw_equals = 1 }
+ # all ok .. update the arrays
+ @{$rfields} = @fields;
+ @{$rtokens} = @tokens;
+ @{$rpatterns} = @patterns;
- # we never match if the matching tokens differ
- if ( $j < $jlimit
- && $old_tok ne $new_tok )
- {
- $match = 0;
- }
+ # force a flush after this line
+ return $jquestion;
+}
- # otherwise, if patterns match, we always have a match.
- # However, if patterns don't match, we have to be careful...
- elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
+sub fix_terminal_else {
- # We have to be very careful about aligning commas when the
- # pattern's don't match, because it can be worse to create an
- # alignment where none is needed than to omit one. The current
- # rule: if we are within a matching sub call (indicated by '+'
- # in the matching token), we'll allow a marginal match, but
- # otherwise not.
- #
- # Here's an example where we'd like to align the '='
- # my $cfile = File::Spec->catfile( 't', 'callext.c' );
- # my $inc = File::Spec->catdir( 'Basic', 'Core' );
- # because the function names differ.
- # Future alignment logic should make this unnecessary.
- #
- # Here's an example where the ','s are not contained in a call.
- # The first line below should probably not match the next two:
- # ( $a, $b ) = ( $b, $r );
- # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
- # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
- if ( $new_tok =~ /^,/ ) {
- if ( $$rtokens[$j] =~ /[A-Za-z]/ ) {
- $marginal_match = 1;
- }
- else {
- $match = 0;
- }
- }
+ # Add empty fields as necessary to align a balanced terminal
+ # else block to a previous if/elsif/unless block,
+ # like this:
+ #
+ # if ( 1 || $x ) { print "ok 13\n"; }
+ # else { print "not ok 13\n"; }
+ #
+ # returns 1 if the else block should be indented
+ #
+ my ( $rfields, $rtokens, $rpatterns ) = @_;
+ my $jmax = @{$rfields} - 1;
+ return unless ( $jmax > 0 );
+
+ # check for balanced else block following if/elsif/unless
+ my $rfields_old = $current_line->get_rfields();
+
+ # TBD: add handling for 'case'
+ return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
+
+ # look for the opening brace after the else, and extract the depth
+ my $tok_brace = $rtokens->[0];
+ my $depth_brace;
+ if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
+
+ # probably: "else # side_comment"
+ else { return }
+
+ my $rpatterns_old = $current_line->get_rpatterns();
+ my $rtokens_old = $current_line->get_rtokens();
+ my $maximum_field_index = $current_line->get_jmax();
+
+ # be sure the previous if/elsif is followed by an opening paren
+ my $jparen = 0;
+ my $tok_paren = '(' . $depth_brace;
+ my $tok_test = $rtokens_old->[$jparen];
+ return unless ( $tok_test eq $tok_paren ); # shouldn't happen
+
+ # Now find the opening block brace
+ my ($jbrace);
+ for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
+ my $tok = $rtokens_old->[$j];
+ if ( $tok eq $tok_brace ) {
+ $jbrace = $j;
+ last;
+ }
+ }
+ return unless ( defined($jbrace) ); # shouldn't happen
- # parens don't align well unless patterns match
- elsif ( $new_tok =~ /^\(/ ) {
- $match = 0;
- }
+ # Now splice the tokens and patterns of the previous line
+ # into the else line to insure a match. Add empty fields
+ # as necessary.
+ my $jadd = $jbrace - $jparen;
+ splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
+ splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
+ splice( @{$rfields}, 1, 0, ('') x $jadd );
+
+ # force a flush after this line if it does not follow a case
+ return $jbrace
+ unless ( $rfields_old->[0] =~ /^case\s*$/ );
+}
- # Handle an '=' alignment with different patterns to
- # the left.
- elsif ( $new_tok =~ /^=\d*$/ ) {
+{ # sub check_match
+ my %is_good_alignment;
- $saw_equals = 1;
+ BEGIN {
+
+ # Vertically aligning on certain "good" tokens is usually okay
+ # so we can be less restrictive in marginal cases.
+ @_ = qw( { ? => = );
+ push @_, (',');
+ @is_good_alignment{@_} = (1) x scalar(@_);
+ }
+
+ sub check_match {
- # It is best to be a little restrictive when
- # aligning '=' tokens. Here is an example of
- # two lines that we will not align:
- # my $variable=6;
- # $bb=4;
- # The problem is that one is a 'my' declaration,
- # and the other isn't, so they're not very similar.
- # We will filter these out by comparing the first
- # letter of the pattern. This is crude, but works
- # well enough.
+ # See if the current line matches the current vertical alignment group.
+ # If not, flush the current group.
+ my $new_line = shift;
+ my $old_line = shift;
+
+ # uses global variables:
+ # $previous_minimum_jmax_seen
+ # $maximum_jmax_seen
+ # $maximum_line_index
+ # $marginal_match
+ my $jmax = $new_line->get_jmax();
+ my $maximum_field_index = $old_line->get_jmax();
+
+ # flush if this line has too many fields
+ if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
+
+ # flush if adding this line would make a non-monotonic field count
+ if (
+ ( $maximum_field_index > $jmax ) # this has too few fields
+ && (
+ ( $previous_minimum_jmax_seen <
+ $jmax ) # and wouldn't be monotonic
+ || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
+ )
+ )
+ {
+ goto NO_MATCH;
+ }
+
+ # otherwise see if this line matches the current group
+ my $jmax_original_line = $new_line->get_jmax_original_line();
+ my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
+ my $rtokens = $new_line->get_rtokens();
+ my $rfields = $new_line->get_rfields();
+ my $rpatterns = $new_line->get_rpatterns();
+ my $list_type = $new_line->get_list_type();
+
+ my $group_list_type = $old_line->get_list_type();
+ my $old_rpatterns = $old_line->get_rpatterns();
+ my $old_rtokens = $old_line->get_rtokens();
+
+ my $jlimit = $jmax - 1;
+ if ( $maximum_field_index > $jmax ) {
+ $jlimit = $jmax_original_line;
+ --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
+ }
+
+ # handle comma-separated lists ..
+ if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
+ for my $j ( 0 .. $jlimit ) {
+ my $old_tok = $$old_rtokens[$j];
+ next unless $old_tok;
+ my $new_tok = $$rtokens[$j];
+ next unless $new_tok;
+
+ # lists always match ...
+ # unless they would align any '=>'s with ','s
+ goto NO_MATCH
+ if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
+ || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
+ }
+ }
+
+ # do detailed check for everything else except hanging side comments
+ elsif ( !$is_hanging_side_comment ) {
+
+ my $leading_space_count = $new_line->get_leading_space_count();
+
+ my $max_pad = 0;
+ my $min_pad = 0;
+ my $saw_good_alignment;
+
+ for my $j ( 0 .. $jlimit ) {
+
+ my $old_tok = $$old_rtokens[$j];
+ my $new_tok = $$rtokens[$j];
+
+ # Note on encoding used for alignment tokens:
+ # -------------------------------------------
+ # Tokens are "decorated" with information which can help
+ # prevent unwanted alignments. Consider for example the
+ # following two lines:
+ # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
+ # local ( $i, $f ) = &'bdiv( $xn, $xd );
+ # There are three alignment tokens in each line, a comma,
+ # an =, and a comma. In the first line these three tokens
+ # are encoded as:
+ # ,4+local-18 =3 ,4+split-7
+ # and in the second line they are encoded as
+ # ,4+local-18 =3 ,4+&'bdiv-8
+ # Tokens always at least have token name and nesting
+ # depth. So in this example the ='s are at depth 3 and
+ # the ,'s are at depth 4. This prevents aligning tokens
+ # of different depths. Commas contain additional
+ # information, as follows:
+ # , {depth} + {container name} - {spaces to opening paren}
+ # This allows us to reject matching the rightmost commas
+ # in the above two lines, since they are for different
+ # function calls. This encoding is done in
+ # 'sub send_lines_to_vertical_aligner'.
+
+ # Pick off actual token.
+ # Everything up to the first digit is the actual token.
+ my $alignment_token = $new_tok;
+ if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
+
+ # see if the decorated tokens match
+ my $tokens_match = $new_tok eq $old_tok
+
+ # Exception for matching terminal : of ternary statement..
+ # consider containers prefixed by ? and : a match
+ || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
+
+ # No match if the alignment tokens differ...
+ if ( !$tokens_match ) {
+
+ # ...Unless this is a side comment
if (
- substr( $$old_rpatterns[$j], 0, 1 ) ne
- substr( $$rpatterns[$j], 0, 1 ) )
+ $j == $jlimit
+
+ # and there is either at least one alignment token
+ # or this is a single item following a list. This
+ # latter rule is required for 'December' to join
+ # the following list:
+ # my (@months) = (
+ # '', 'January', 'February', 'March',
+ # 'April', 'May', 'June', 'July',
+ # 'August', 'September', 'October', 'November',
+ # 'December'
+ # );
+ # If it doesn't then the -lp formatting will fail.
+ && ( $j > 0 || $old_tok =~ /^,/ )
+ )
{
- $match = 0;
+ $marginal_match = 1
+ if ( $marginal_match == 0
+ && $maximum_line_index == 0 );
+ last;
}
- # If we pass that test, we'll call it a marginal match.
- # Here is an example of a marginal match:
- # $done{$$op} = 1;
- # $op = compile_bblock($op);
- # The left tokens are both identifiers, but
- # one accesses a hash and the other doesn't.
- # We'll let this be a tentative match and undo
- # it later if we don't find more than 2 lines
- # in the group.
- elsif ( $maximum_line_index == 0 ) {
- $marginal_match = 1;
- }
+ goto NO_MATCH;
}
- }
- # Don't let line with fewer fields increase column widths
- # ( align3.t )
- if ( $maximum_field_index > $jmax ) {
+ # Calculate amount of padding required to fit this in.
+ # $pad is the number of spaces by which we must increase
+ # the current field to squeeze in this field.
my $pad =
length( $$rfields[$j] ) - $old_line->current_field_width($j);
+ if ( $j == 0 ) { $pad += $leading_space_count; }
- if ( $j == 0 ) {
- $pad += $leading_space_count;
+ # remember max pads to limit marginal cases
+ if ( $alignment_token ne '#' ) {
+ if ( $pad > $max_pad ) { $max_pad = $pad }
+ if ( $pad < $min_pad ) { $min_pad = $pad }
+ }
+ if ( $is_good_alignment{$alignment_token} ) {
+ $saw_good_alignment = 1;
}
- # TESTING: suspend this rule to allow last lines to join
- if ( $pad > 0 ) { $match = 0; }
- }
+ # If patterns don't match, we have to be careful...
+ if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
- unless ($match) {
- $everything_matches = 0;
- last;
- }
- }
- }
+ # flag this as a marginal match since patterns differ
+ $marginal_match = 1
+ if ( $marginal_match == 0 && $maximum_line_index == 0 );
+
+ # We have to be very careful about aligning commas
+ # when the pattern's don't match, because it can be
+ # worse to create an alignment where none is needed
+ # than to omit one. Here's an example where the ','s
+ # are not in named containers. The first line below
+ # should not match the next two:
+ # ( $a, $b ) = ( $b, $r );
+ # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
+ # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
+ if ( $alignment_token eq ',' ) {
+
+ # do not align commas unless they are in named containers
+ goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
+ }
+
+ # do not align parens unless patterns match;
+ # large ugly spaces can occur in math expressions.
+ elsif ( $alignment_token eq '(' ) {
+
+ # But we can allow a match if the parens don't
+ # require any padding.
+ if ( $pad != 0 ) { goto NO_MATCH }
+ }
+
+ # Handle an '=' alignment with different patterns to
+ # the left.
+ elsif ( $alignment_token eq '=' ) {
+
+ # It is best to be a little restrictive when
+ # aligning '=' tokens. Here is an example of
+ # two lines that we will not align:
+ # my $variable=6;
+ # $bb=4;
+ # The problem is that one is a 'my' declaration,
+ # and the other isn't, so they're not very similar.
+ # We will filter these out by comparing the first
+ # letter of the pattern. This is crude, but works
+ # well enough.
+ if (
+ substr( $$old_rpatterns[$j], 0, 1 ) ne
+ substr( $$rpatterns[$j], 0, 1 ) )
+ {
+ goto NO_MATCH;
+ }
- if ( $maximum_field_index > $jmax ) {
+ # If we pass that test, we'll call it a marginal match.
+ # Here is an example of a marginal match:
+ # $done{$$op} = 1;
+ # $op = compile_bblock($op);
+ # The left tokens are both identifiers, but
+ # one accesses a hash and the other doesn't.
+ # We'll let this be a tentative match and undo
+ # it later if we don't find more than 2 lines
+ # in the group.
+ elsif ( $maximum_line_index == 0 ) {
+ $marginal_match =
+ 2; # =2 prevents being undone below
+ }
+ }
+ }
- if ($everything_matches) {
+ # Don't let line with fewer fields increase column widths
+ # ( align3.t )
+ if ( $maximum_field_index > $jmax ) {
+
+ # Exception: suspend this rule to allow last lines to join
+ if ( $pad > 0 ) { goto NO_MATCH; }
+ }
+ } ## end for my $j ( 0 .. $jlimit)
+
+ # Turn off the "marginal match" flag in some cases...
+ # A "marginal match" occurs when the alignment tokens agree
+ # but there are differences in the other tokens (patterns).
+ # If we leave the marginal match flag set, then the rule is that we
+ # will align only if there are more than two lines in the group.
+ # We will turn of the flag if we almost have a match
+ # and either we have seen a good alignment token or we
+ # just need a small pad (2 spaces) to fit. These rules are
+ # the result of experimentation. Tokens which misaligned by just
+ # one or two characters are annoying. On the other hand,
+ # large gaps to less important alignment tokens are also annoying.
+ if ( $marginal_match == 1
+ && $jmax == $maximum_field_index
+ && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
+ )
+ {
+ $marginal_match = 0;
+ }
+ ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
+ }
+ # We have a match (even if marginal).
+ # If the current line has fewer fields than the current group
+ # but otherwise matches, copy the remaining group fields to
+ # make it a perfect match.
+ if ( $maximum_field_index > $jmax ) {
my $comment = $$rfields[$jmax];
for $jmax ( $jlimit .. $maximum_field_index ) {
$$rtokens[$jmax] = $$old_rtokens[$jmax];
$$rfields[$jmax] = $comment;
$new_line->set_jmax($jmax);
}
- }
+ return;
- my_flush() unless ($everything_matches);
+ NO_MATCH:
+ ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
+ my_flush();
+ return;
+ }
}
sub check_fit {
my $maximum_field_index = $old_line->get_jmax();
for $j ( 0 .. $jmax ) {
- ## testing patch to avoid excessive gaps in previous lines,
- # due to a line of fewer fields.
- # return join( ".",
- # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
- # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
- ## MOVED BELOW AS A TEST
- ##next if ($jmax < $maximum_field_index && $j==$jmax-1);
-
$pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
if ( $j == 0 ) {
next if $pad < 0;
+ ## This patch helps sometimes, but it doesn't check to see if
+ ## the line is too long even without the side comment. It needs
+ ## to be reworked.
+ ##don't let a long token with no trailing side comment push
+ ##side comments out, or end a group. (sidecmt1.t)
+ ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
+
# This line will need space; lets see if we want to accept it..
if (
last;
}
- # TESTING PATCH moved from above to be sure we fit
+ # patch to avoid excessive gaps in previous lines,
+ # due to a line of fewer fields.
+ # return join( ".",
+ # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
+ # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
# looks ok, squeeze this field in
}
}
-sub accept_line {
+sub add_to_group {
+ # The current line either starts a new alignment group or is
+ # accepted into the current alignment group.
my $new_line = shift;
$group_lines[ ++$maximum_line_index ] = $new_line;
$group_lines[ $maximum_line_index - 1 ]->get_alignments();
$new_line->set_alignments(@new_alignments);
}
+
+ # remember group jmax extremes for next call to valign_input
+ $previous_minimum_jmax_seen = $minimum_jmax_seen;
+ $previous_maximum_jmax_seen = $maximum_jmax_seen;
}
sub dump_array {
# debug routine to dump array contents
local $" = ')(';
- print "(@_)\n";
+ print STDOUT "(@_)\n";
}
# flush() sends the current Perl::Tidy::VerticalAligner group down the
# pipeline to Perl::Tidy::FileWriter.
-# This is the external flush, which also empties the cache
+# This is the external flush, which also empties the buffer and cache
sub flush {
+ # the buffer must be emptied first, then any cached text
+ dump_valign_buffer();
+
if ( $maximum_line_index < 0 ) {
if ($cached_line_type) {
- $file_writer_object->write_code_line( $cached_line_text . "\n" );
- $cached_line_type = 0;
- $cached_line_text = "";
+ $seqno_string = $cached_seqno_string;
+ valign_output_step_C( $cached_line_text,
+ $cached_line_leading_space_count,
+ $last_level_written );
+ $cached_line_type = 0;
+ $cached_line_text = "";
+ $cached_seqno_string = "";
}
}
else {
}
}
+sub reduce_valign_buffer_indentation {
+
+ my ($diff) = @_;
+ if ( $valign_buffer_filling && $diff ) {
+ my $max_valign_buffer = @valign_buffer;
+ for ( my $i = 0 ; $i < $max_valign_buffer ; $i++ ) {
+ my ( $line, $leading_space_count, $level ) =
+ @{ $valign_buffer[$i] };
+ my $ws = substr( $line, 0, $diff );
+ if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
+ $line = substr( $line, $diff );
+ }
+ if ( $leading_space_count >= $diff ) {
+ $leading_space_count -= $diff;
+ $level = level_change( $leading_space_count, $diff, $level );
+ }
+ $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
+ }
+ }
+}
+
+sub level_change {
+
+ # compute decrease in level when we remove $diff spaces from the
+ # leading spaces
+ my ( $leading_space_count, $diff, $level ) = @_;
+ if ($rOpts_indent_columns) {
+ my $olev =
+ int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
+ my $nlev = int( $leading_space_count / $rOpts_indent_columns );
+ $level -= ( $olev - $nlev );
+ if ( $level < 0 ) { $level = 0 }
+ }
+ return $level;
+}
+
+sub dump_valign_buffer {
+ if (@valign_buffer) {
+ foreach (@valign_buffer) {
+ valign_output_step_D( @{$_} );
+ }
+ @valign_buffer = ();
+ }
+ $valign_buffer_filling = "";
+}
+
# This is the internal flush, which leaves the cache intact
sub my_flush {
VALIGN_DEBUG_FLAG_APPEND0 && do {
my ( $a, $b, $c ) = caller();
- print
+ print STDOUT
"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
};
# zero leading space count if any lines are too long
my $max_excess = 0;
for my $i ( 0 .. $maximum_line_index ) {
- my $str = $group_lines[$i];
+ my $str = $group_lines[$i];
my $excess =
- length($str) + $leading_space_count - $rOpts_maximum_line_length;
+ length($str) +
+ $leading_space_count -
+ maximum_line_length_for_level($group_level);
if ( $excess > $max_excess ) {
$max_excess = $excess;
}
# write the group of lines
my $outdent_long_lines = 0;
for my $i ( 0 .. $maximum_line_index ) {
- write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
- $outdent_long_lines, "" );
+ valign_output_step_B( $leading_space_count, $group_lines[$i], 0,
+ $outdent_long_lines, "", $group_level );
}
}
my $group_list_type = $group_lines[0]->get_list_type();
my ( $a, $b, $c ) = caller();
my $maximum_field_index = $group_lines[0]->get_jmax();
- print
+ print STDOUT
"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
};
my $group_leader_length = $group_lines[0]->get_leading_space_count();
# add extra leading spaces if helpful
- my $min_ci_gap =
- improve_continuation_indentation( $do_not_align,
+ my $min_ci_gap = improve_continuation_indentation( $do_not_align,
$group_leader_length );
# loop to output all lines
for my $i ( 0 .. $maximum_line_index ) {
my $line = $group_lines[$i];
- write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
+ valign_output_step_A( $line, $min_ci_gap, $do_not_align,
$group_leader_length, $extra_leading_spaces );
}
}
# Do not try to align two lines which are not really similar
return unless $maximum_line_index == 1;
+ return if ($is_matching_terminal_line);
my $group_list_type = $group_lines[0]->get_list_type();
|| $group_maximum_gap > 12
# or lines with differing number of alignment tokens
+ # TODO: this could be improved. It occasionally rejects
+ # good matches.
|| $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
)
);
if ( $move >= 0
&& $last_side_comment_length > 0
&& ( $first_side_comment_line == 0 )
- && $group_level == $last_group_level_written )
+ && $group_level == $last_level_written )
{
$min_move = 0;
}
$move = $min_move;
}
- # prevously, an upper bound was placed on $move here,
+ # previously, an upper bound was placed on $move here,
# (maximum_space_to_comment), but it was not helpful
# don't exceed the available space
# 'tan' => \&tan,
# 'atan2' => \&atan2,
- ## BUB: Deactivated####################
+ ## Deactivated####################
# The trouble with this patch is that it may, for example,
# move in some 'or's or ':'s, and leave some out, so that the
# left edge alignment suffers.
my $maximum_field_index = $group_lines[0]->get_jmax();
- my $min_ci_gap = $rOpts_maximum_line_length;
+ my $min_ci_gap = maximum_line_length_for_level($group_level);
if ( $maximum_field_index > 1 && !$do_not_align ) {
for my $i ( 0 .. $maximum_line_index ) {
my $leading_space_count = $line->get_leading_space_count();
my $rfields = $line->get_rfields();
- my $gap = $line->get_column(0) - $leading_space_count -
+ my $gap =
+ $line->get_column(0) -
+ $leading_space_count -
length( $$rfields[0] );
if ( $leading_space_count > $group_leader_length ) {
}
}
- if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
+ if ( $min_ci_gap >= maximum_line_length_for_level($group_level) ) {
$min_ci_gap = 0;
}
}
return $min_ci_gap;
}
-sub write_vertically_aligned_line {
+sub valign_output_step_A {
+
+ ###############################################################
+ # This is Step A in writing vertically aligned lines.
+ # The line is prepared according to the alignments which have
+ # been found and shipped to the next step.
+ ###############################################################
my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
$extra_leading_spaces )
: $rOpts_minimum_space_to_comment - 1;
}
+ # if the -fpsc flag is set, move the side comment to the selected
+ # column if and only if it is possible, ignoring constraints on
+ # line length and minimum space to comment
+ if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
+ {
+ my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
+ if ( $newpad >= 0 ) { $pad = $newpad; }
+ }
+
# accumulate the padding
if ( $pad > 0 ) { $total_pad_count += $pad; }
$total_pad_count = 0;
$str .= $$rfields[$j];
}
+ else {
+ $total_pad_count = 0;
+ }
# update side comment history buffer
if ( $j == $maximum_field_index ) {
my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
# ship this line off
- write_leader_and_string( $leading_space_count + $extra_leading_spaces,
+ valign_output_step_B( $leading_space_count + $extra_leading_spaces,
$str, $side_comment_length, $outdent_long_lines,
- $rvertical_tightness_flags );
+ $rvertical_tightness_flags, $group_level );
}
sub get_extra_leading_spaces {
# list before it sees everything. When this happens, it sets
# the indentation to the standard scheme, but notes how
# many spaces it would have liked to use. We may be able
- # to recover that space here in the event that that all of the
+ # to recover that space here in the event that all of the
# lines of a list are back together again.
#----------------------------------------------------------
sub combine_fields {
# combine all fields except for the comment field ( sidecmt.t )
+ # Uses global variables:
+ # @group_lines
+ # $maximum_line_index
my ( $j, $k );
my $maximum_field_index = $group_lines[0]->get_jmax();
for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
1 + $maximum_line_index + $file_writer_object->get_output_line_number();
}
-sub write_leader_and_string {
+sub valign_output_step_B {
+
+ ###############################################################
+ # This is Step B in writing vertically aligned lines.
+ # Vertical tightness is applied according to preset flags.
+ # In particular this routine handles stacking of opening
+ # and closing tokens.
+ ###############################################################
my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
- $rvertical_tightness_flags )
+ $rvertical_tightness_flags, $level )
= @_;
- my $leading_string = get_leading_string($leading_space_count);
-
# handle outdenting of long lines:
if ($outdent_long_lines) {
my $excess =
- length($str) - $side_comment_length + $leading_space_count -
- $rOpts_maximum_line_length;
+ length($str) -
+ $side_comment_length +
+ $leading_space_count -
+ maximum_line_length_for_level($level);
if ( $excess > 0 ) {
- $leading_string = "";
+ $leading_space_count = 0;
$last_outdented_line_at =
$file_writer_object->get_output_line_number();
}
}
+ # Make preliminary leading whitespace. It could get changed
+ # later by entabbing, so we have to keep track of any changes
+ # to the leading_space_count from here on.
+ my $leading_string =
+ $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
+
# Unpack any recombination data; it was packed by
# sub send_lines_to_vertical_aligner. Contents:
#
- # [0] type: 1=opening 2=closing 3=opening block brace
+ # [0] type: 1=opening non-block 2=closing non-block
+ # 3=opening block brace 4=closing block brace
# [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
# if closing: spaces of padding to use
# [2] sequence number of container
# [3] valid flag: do not append if this flag is false
#
- my ( $open_or_close, $tightness_flag, $seqno, $valid );
+ my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
+ $seqno_end );
if ($rvertical_tightness_flags) {
- ( $open_or_close, $tightness_flag, $seqno, $valid ) =
- @{$rvertical_tightness_flags};
+ (
+ $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
+ $seqno_end
+ ) = @{$rvertical_tightness_flags};
}
+ $seqno_string = $seqno_end;
+
# handle any cached line ..
# either append this line to it or write it out
- if ($cached_line_text) {
+ if ( length($cached_line_text) ) {
+ # Dump an invalid cached line
if ( !$cached_line_valid ) {
- $file_writer_object->write_code_line( $cached_line_text . "\n" );
+ valign_output_step_C( $cached_line_text,
+ $cached_line_leading_space_count,
+ $last_level_written );
}
- # handle cached line with opening container token
+ # Handle cached line ending in OPENING tokens
elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
my $gap = $leading_space_count - length($cached_line_text);
}
}
- if ( $gap >= 0 ) {
- $leading_string = $cached_line_text . ' ' x $gap;
+ if ( $gap >= 0 && defined($seqno_beg) ) {
+ $leading_string = $cached_line_text . ' ' x $gap;
+ $leading_space_count = $cached_line_leading_space_count;
+ $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
+ $level = $last_level_written;
}
else {
- $file_writer_object->write_code_line(
- $cached_line_text . "\n" );
+ valign_output_step_C( $cached_line_text,
+ $cached_line_leading_space_count,
+ $last_level_written );
}
}
- # handle cached line to place before this closing container token
+ # Handle cached line ending in CLOSING tokens
else {
my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
+ if (
+
+ # The new line must start with container
+ $seqno_beg
+
+ # The container combination must be okay..
+ && (
+
+ # okay to combine like types
+ ( $open_or_close == $cached_line_type )
+
+ # closing block brace may append to non-block
+ || ( $cached_line_type == 2 && $open_or_close == 4 )
+
+ # something like ');'
+ || ( !$open_or_close && $cached_line_type == 2 )
+
+ )
+
+ # The combined line must fit
+ && (
+ length($test_line) <=
+ maximum_line_length_for_level($last_level_written) )
+ )
+ {
+
+ $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
+
+ # Patch to outdent closing tokens ending # in ');'
+ # If we are joining a line like ');' to a previous stacked
+ # set of closing tokens, then decide if we may outdent the
+ # combined stack to the indentation of the ');'. Since we
+ # should not normally outdent any of the other tokens more than
+ # the indentation of the lines that contained them, we will
+ # only do this if all of the corresponding opening
+ # tokens were on the same line. This can happen with
+ # -sot and -sct. For example, it is ok here:
+ # __PACKAGE__->load_components( qw(
+ # PK::Auto
+ # Core
+ # ));
+ #
+ # But, for example, we do not outdent in this example because
+ # that would put the closing sub brace out farther than the
+ # opening sub brace:
+ #
+ # perltidy -sot -sct
+ # $c->Tk::bind(
+ # '<Control-f>' => sub {
+ # my ($c) = @_;
+ # my $e = $c->XEvent;
+ # itemsUnderArea $c;
+ # } );
+ #
+ if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
+
+ # The way to tell this is if the stacked sequence numbers
+ # of this output line are the reverse of the stacked
+ # sequence numbers of the previous non-blank line of
+ # sequence numbers. So we can join if the previous
+ # nonblank string of tokens is the mirror image. For
+ # example if stack )}] is 13:8:6 then we are looking for a
+ # leading stack like [{( which is 6:8:13 We only need to
+ # check the two ends, because the intermediate tokens must
+ # fall in order. Note on speed: having to split on colons
+ # and eliminate multiple colons might appear to be slow,
+ # but it's not an issue because we almost never come
+ # through here. In a typical file we don't.
+ $seqno_string =~ s/^:+//;
+ $last_nonblank_seqno_string =~ s/^:+//;
+ $seqno_string =~ s/:+/:/g;
+ $last_nonblank_seqno_string =~ s/:+/:/g;
+
+ # how many spaces can we outdent?
+ my $diff =
+ $cached_line_leading_space_count - $leading_space_count;
+ if ( $diff > 0
+ && length($seqno_string)
+ && length($last_nonblank_seqno_string) ==
+ length($seqno_string) )
+ {
+ my @seqno_last =
+ ( split ':', $last_nonblank_seqno_string );
+ my @seqno_now = ( split ':', $seqno_string );
+ if ( $seqno_now[-1] == $seqno_last[0]
+ && $seqno_now[0] == $seqno_last[-1] )
+ {
- if ( length($test_line) <= $rOpts_maximum_line_length ) {
- $str = $test_line;
- $leading_string = "";
+ # OK to outdent ..
+ # for absolute safety, be sure we only remove
+ # whitespace
+ my $ws = substr( $test_line, 0, $diff );
+ if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
+
+ $test_line = substr( $test_line, $diff );
+ $cached_line_leading_space_count -= $diff;
+ $last_level_written =
+ level_change(
+ $cached_line_leading_space_count,
+ $diff, $last_level_written );
+ reduce_valign_buffer_indentation($diff);
+ }
+
+ # shouldn't happen, but not critical:
+ ##else {
+ ## ERROR transferring indentation here
+ ##}
+ }
+ }
+ }
+
+ $str = $test_line;
+ $leading_string = "";
+ $leading_space_count = $cached_line_leading_space_count;
+ $level = $last_level_written;
}
else {
- $file_writer_object->write_code_line(
- $cached_line_text . "\n" );
+ valign_output_step_C( $cached_line_text,
+ $cached_line_leading_space_count,
+ $last_level_written );
}
}
}
$cached_line_type = 0;
$cached_line_text = "";
+ # make the line to be written
my $line = $leading_string . $str;
# write or cache this line
- if ( !$rvertical_tightness_flags || $side_comment_length > 0 ) {
- $file_writer_object->write_code_line( $line . "\n" );
+ if ( !$open_or_close || $side_comment_length > 0 ) {
+ valign_output_step_C( $line, $leading_space_count, $level );
}
else {
- $cached_line_text = $line;
- $cached_line_type = $open_or_close;
- $cached_line_flag = $tightness_flag;
- $cached_seqno = $seqno;
- $cached_line_valid = $valid;
+ $cached_line_text = $line;
+ $cached_line_type = $open_or_close;
+ $cached_line_flag = $tightness_flag;
+ $cached_seqno = $seqno;
+ $cached_line_valid = $valid;
+ $cached_line_leading_space_count = $leading_space_count;
+ $cached_seqno_string = $seqno_string;
}
- $last_group_level_written = $group_level;
+ $last_level_written = $level;
$last_side_comment_length = $side_comment_length;
$extra_indent_ok = 0;
}
-{ # begin get_leading_string
+sub valign_output_step_C {
- my @leading_string_cache;
+ ###############################################################
+ # This is Step C in writing vertically aligned lines.
+ # Lines are either stored in a buffer or passed along to the next step.
+ # The reason for storing lines is that we may later want to reduce their
+ # indentation when -sot and -sct are both used.
+ ###############################################################
+ my @args = @_;
- sub get_leading_string {
+ # Dump any saved lines if we see a line with an unbalanced opening or
+ # closing token.
+ dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
- # define the leading whitespace string for this line..
- my $leading_whitespace_count = shift;
+ # Either store or write this line
+ if ($valign_buffer_filling) {
+ push @valign_buffer, [@args];
+ }
+ else {
+ valign_output_step_D(@args);
+ }
- # Handle case of zero whitespace, which includes multi-line quotes
- # (which may have a finite level; this prevents tab problems)
- if ( $leading_whitespace_count <= 0 ) {
- return "";
- }
+ # For lines starting or ending with opening or closing tokens..
+ if ($seqno_string) {
+ $last_nonblank_seqno_string = $seqno_string;
- # look for previous result
- elsif ( $leading_string_cache[$leading_whitespace_count] ) {
- return $leading_string_cache[$leading_whitespace_count];
- }
+ # Start storing lines when we see a line with multiple stacked opening
+ # tokens.
+ # patch for RT #94354, requested by Colin Williams
+ if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
+ {
+
+ # This test is efficient but a little subtle: The first test says
+ # that we have multiple sequence numbers and hence multiple opening
+ # or closing tokens in this line. The second part of the test
+ # rejects stacked closing and ternary tokens. So if we get here
+ # then we should have stacked unbalanced opening tokens.
+
+ # Here is a complex example:
+
+ # Foo($Bar[0], { # (side comment)
+ # baz => 1,
+ # });
+
+ # The first line has sequence 6::4. It does not begin with
+ # a closing token or ternary, so it passes the test and must be
+ # stacked opening tokens.
+
+ # The last line has sequence 4:6 but is a stack of closing tokens,
+ # so it gets rejected.
+
+ # Note that the sequence number of an opening token for a qw quote
+ # is a negative number and will be rejected.
+ # For example, for the following line:
+ # skip_symbols([qw(
+ # $seqno_string='10:5:-1'. It would be okay to accept it but
+ # I decided not to do this after testing.
+
+ $valign_buffer_filling = $seqno_string;
+
+ }
+ }
+}
+
+sub valign_output_step_D {
+
+ ###############################################################
+ # This is Step D in writing vertically aligned lines.
+ # Write one vertically aligned line of code to the output object.
+ ###############################################################
+
+ my ( $line, $leading_space_count, $level ) = @_;
+
+ # The line is currently correct if there is no tabbing (recommended!)
+ # We may have to lop off some leading spaces and replace with tabs.
+ if ( $leading_space_count > 0 ) {
+
+ # Nothing to do if no tabs
+ if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
+ || $rOpts_indent_columns <= 0 )
+ {
+
+ # nothing to do
+ }
+
+ # Handle entab option
+ elsif ($rOpts_entab_leading_whitespace) {
+ my $space_count =
+ $leading_space_count % $rOpts_entab_leading_whitespace;
+ my $tab_count =
+ int( $leading_space_count / $rOpts_entab_leading_whitespace );
+ my $leading_string = "\t" x $tab_count . ' ' x $space_count;
+ if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
+ substr( $line, 0, $leading_space_count ) = $leading_string;
+ }
+ else {
+
+ # shouldn't happen - program error counting whitespace
+ # - skip entabbing
+ VALIGN_DEBUG_FLAG_TABS
+ && warning(
+"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
+ );
+ }
+ }
+
+ # Handle option of one tab per level
+ else {
+ my $leading_string = ( "\t" x $level );
+ my $space_count =
+ $leading_space_count - $level * $rOpts_indent_columns;
+
+ # shouldn't happen:
+ if ( $space_count < 0 ) {
+
+ # But it could be an outdented comment
+ if ( $line !~ /^\s*#/ ) {
+ VALIGN_DEBUG_FLAG_TABS
+ && warning(
+"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
+ );
+ }
+ $leading_string = ( ' ' x $leading_space_count );
+ }
+ else {
+ $leading_string .= ( ' ' x $space_count );
+ }
+ if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
+ substr( $line, 0, $leading_space_count ) = $leading_string;
+ }
+ else {
+
+ # shouldn't happen - program error counting whitespace
+ # we'll skip entabbing
+ VALIGN_DEBUG_FLAG_TABS
+ && warning(
+"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
+ );
+ }
+ }
+ }
+ $file_writer_object->write_code_line( $line . "\n" );
+}
+
+{ # begin get_leading_string
+
+ my @leading_string_cache;
+
+ sub get_leading_string {
+
+ # define the leading whitespace string for this line..
+ my $leading_whitespace_count = shift;
+
+ # Handle case of zero whitespace, which includes multi-line quotes
+ # (which may have a finite level; this prevents tab problems)
+ if ( $leading_whitespace_count <= 0 ) {
+ return "";
+ }
+
+ # look for previous result
+ elsif ( $leading_string_cache[$leading_whitespace_count] ) {
+ return $leading_string_cache[$leading_whitespace_count];
+ }
# must compute a string for this number of spaces
my $leading_string;
elsif ($rOpts_entab_leading_whitespace) {
my $space_count =
$leading_whitespace_count % $rOpts_entab_leading_whitespace;
- my $tab_count =
- int(
+ my $tab_count = int(
$leading_whitespace_count / $rOpts_entab_leading_whitespace );
$leading_string = "\t" x $tab_count . ' ' x $space_count;
}
# shouldn't happen:
if ( $space_count < 0 ) {
- warning(
-"Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
- );
+ VALIGN_DEBUG_FLAG_TABS
+ && warning(
+"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
+ );
+
+ # -- skip entabbing
$leading_string = ( ' ' x $leading_whitespace_count );
}
else {
}
}
+sub require_blank_code_lines {
+
+ # write out the requested number of blanks regardless of the value of -mbl
+ # unless -mbl=0. This allows extra blank lines to be written for subs and
+ # packages even with the default -mbl=1
+ my $self = shift;
+ my $count = shift;
+ my $need = $count - $self->{_consecutive_blank_lines};
+ my $rOpts = $self->{_rOpts};
+ my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
+ for ( my $i = 0 ; $i < $need ; $i++ ) {
+ $self->write_blank_code_line($forced);
+ }
+}
+
sub write_blank_code_line {
- my $self = shift;
- my $rOpts = $self->{_rOpts};
+ my $self = shift;
+ my $forced = shift;
+ my $rOpts = $self->{_rOpts};
return
- if ( $self->{_consecutive_blank_lines} >=
+ if (!$forced
+ && $self->{_consecutive_blank_lines} >=
$rOpts->{'maximum-consecutive-blank-lines'} );
$self->{_consecutive_blank_lines}++;
$self->{_consecutive_nonblank_lines} = 0;
my $debug_file = $self->{_debug_file};
my $fh;
unless ( $fh = IO::File->new("> $debug_file") ) {
- warn("can't open $debug_file: $!\n");
+ Perl::Tidy::Warn("can't open $debug_file: $!\n");
}
$self->{_debug_file_opened} = 1;
$self->{_fh} = $fh;
$pattern .= $$rtoken_type[$j];
}
$reconstructed_original .= $$rtokens[$j];
- $block_str .= "($$rblock_type[$j])";
+ $block_str .= "($$rblock_type[$j])";
$num = length( $$rtokens[$j] );
my $type_str = $$rtoken_type[$j];
use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
my $debug_warning = sub {
- print "TOKENIZER_DEBUGGING with key $_[0]\n";
+ print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n";
};
TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
}
use Carp;
+
+# PACKAGE VARIABLES for processing an entire FILE.
use vars qw{
$tokenizer_self
- $level_in_tokenizer
- $slevel_in_tokenizer
- $nesting_token_string
- $nesting_type_string
- $nesting_block_string
- $nesting_block_flag
- $nesting_list_string
- $nesting_list_flag
- $saw_negative_indentation
- $id_scan_state
+
$last_nonblank_token
$last_nonblank_type
$last_nonblank_block_type
- $last_nonblank_container_type
- $last_nonblank_type_sequence
- $last_last_nonblank_token
- $last_last_nonblank_type
- $last_last_nonblank_block_type
- $last_last_nonblank_container_type
- $last_last_nonblank_type_sequence
- $last_nonblank_prototype
$statement_type
- $identifier
- $in_quote
- $quote_type
- $quote_character
- $quote_pos
- $quote_depth
- $allowed_quote_modifiers
+ $in_attribute_list
+ $current_package
+ $context
+
+ %is_constant
+ %is_user_function
+ %user_function_prototype
+ %is_block_function
+ %is_block_list_function
+ %saw_function_definition
+
+ $brace_depth
$paren_depth
+ $square_bracket_depth
+
+ @current_depth
+ @total_depth
+ $total_depth
+ @nesting_sequence_number
+ @current_sequence_number
@paren_type
@paren_semicolon_count
@paren_structural_type
- $brace_depth
@brace_type
@brace_structural_type
- @brace_statement_type
@brace_context
@brace_package
- $square_bracket_depth
@square_bracket_type
@square_bracket_structural_type
@depth_array
+ @nested_ternary_flag
+ @nested_statement_type
@starting_line_of_current_depth
- @current_depth
- @current_sequence_number
- @nesting_sequence_number
- @lower_case_labels_at
- $saw_v_string
- %is_constant
- %is_user_function
- %user_function_prototype
- %saw_function_definition
- $max_token_index
- $peeked_ahead
- $current_package
- $unexpected_error_count
- $input_line
- $input_line_number
- $rpretokens
- $rpretoken_map
- $rpretoken_type
- $want_paren
- $context
- @slevel_stack
- $ci_string_in_tokenizer
- $continuation_string_in_tokenizer
- $in_statement_continuation
- $started_looking_for_here_target_at
- $nearly_matched_here_target_at
+};
+# GLOBAL CONSTANTS for routines in this package
+use vars qw{
%is_indirect_object_taker
%is_block_operator
%expecting_operator_token
%expecting_operator_types
%expecting_term_types
%expecting_term_token
- %is_block_function
- %is_block_list_function
%is_digraph
%is_file_test_operator
%is_trigraph
+ %is_tetragraph
%is_valid_token_type
%is_keyword
%is_code_block_token
# Note: 'tabs' and 'indent_columns' are temporary and should be
# removed asap
my %defaults = (
- source_object => undef,
- debugger_object => undef,
- diagnostics_object => undef,
- logger_object => undef,
- starting_level => undef,
- indent_columns => 4,
- tabs => 0,
- look_for_hash_bang => 0,
- trim_qw => 1,
- look_for_autoloader => 1,
- look_for_selfloader => 1,
+ source_object => undef,
+ debugger_object => undef,
+ diagnostics_object => undef,
+ logger_object => undef,
+ starting_level => undef,
+ indent_columns => 4,
+ tabsize => 8,
+ look_for_hash_bang => 0,
+ trim_qw => 1,
+ look_for_autoloader => 1,
+ look_for_selfloader => 1,
+ starting_line_number => 1,
+ extended_syntax => 0,
);
my %args = ( %defaults, @_ );
# _in_data flag set if we are in __DATA__ section
# _in_end flag set if we are in __END__ section
# _in_format flag set if we are in a format description
+ # _in_attribute_list flag telling if we are looking for attributes
# _in_quote flag telling if we are chasing a quote
# _starting_level indentation level of first line
- # _input_tabstr string denoting one indentation level of input file
- # _know_input_tabstr flag indicating if we know _input_tabstr
# _line_buffer_object object with get_line() method to supply source code
# _diagnostics_object place to write debugging information
+ # _unexpected_error_count error count used to limit output
+ # _lower_case_labels_at line numbers where lower case labels seen
$tokenizer_self = {
- _rhere_target_list => undef,
- _in_here_doc => 0,
- _here_doc_target => "",
- _here_quote_character => "",
- _in_data => 0,
- _in_end => 0,
- _in_format => 0,
- _in_error => 0,
- _in_pod => 0,
- _in_quote => 0,
- _quote_target => "",
- _line_start_quote => -1,
- _starting_level => $args{starting_level},
- _know_starting_level => defined( $args{starting_level} ),
- _tabs => $args{tabs},
- _indent_columns => $args{indent_columns},
- _look_for_hash_bang => $args{look_for_hash_bang},
- _trim_qw => $args{trim_qw},
- _input_tabstr => "",
- _know_input_tabstr => -1,
- _last_line_number => 0,
- _saw_perl_dash_P => 0,
- _saw_perl_dash_w => 0,
- _saw_use_strict => 0,
- _look_for_autoloader => $args{look_for_autoloader},
- _look_for_selfloader => $args{look_for_selfloader},
- _saw_autoloader => 0,
- _saw_selfloader => 0,
- _saw_hash_bang => 0,
- _saw_end => 0,
- _saw_data => 0,
- _saw_lc_filehandle => 0,
- _started_tokenizing => 0,
- _line_buffer_object => $line_buffer_object,
- _debugger_object => $args{debugger_object},
- _diagnostics_object => $args{diagnostics_object},
- _logger_object => $args{logger_object},
+ _rhere_target_list => [],
+ _in_here_doc => 0,
+ _here_doc_target => "",
+ _here_quote_character => "",
+ _in_data => 0,
+ _in_end => 0,
+ _in_format => 0,
+ _in_error => 0,
+ _in_pod => 0,
+ _in_attribute_list => 0,
+ _in_quote => 0,
+ _quote_target => "",
+ _line_start_quote => -1,
+ _starting_level => $args{starting_level},
+ _know_starting_level => defined( $args{starting_level} ),
+ _tabsize => $args{tabsize},
+ _indent_columns => $args{indent_columns},
+ _look_for_hash_bang => $args{look_for_hash_bang},
+ _trim_qw => $args{trim_qw},
+ _continuation_indentation => $args{continuation_indentation},
+ _outdent_labels => $args{outdent_labels},
+ _last_line_number => $args{starting_line_number} - 1,
+ _saw_perl_dash_P => 0,
+ _saw_perl_dash_w => 0,
+ _saw_use_strict => 0,
+ _saw_v_string => 0,
+ _look_for_autoloader => $args{look_for_autoloader},
+ _look_for_selfloader => $args{look_for_selfloader},
+ _saw_autoloader => 0,
+ _saw_selfloader => 0,
+ _saw_hash_bang => 0,
+ _saw_end => 0,
+ _saw_data => 0,
+ _saw_negative_indentation => 0,
+ _started_tokenizing => 0,
+ _line_buffer_object => $line_buffer_object,
+ _debugger_object => $args{debugger_object},
+ _diagnostics_object => $args{diagnostics_object},
+ _logger_object => $args{logger_object},
+ _unexpected_error_count => 0,
+ _started_looking_for_here_target_at => 0,
+ _nearly_matched_here_target_at => undef,
+ _line_text => "",
+ _rlower_case_labels_at => undef,
+ _extended_syntax => $args{extended_syntax},
};
prepare_for_a_new_file();
warning("hit EOF while in format description\n");
}
- # this check may be removed after a year or so
- if ( $tokenizer_self->{_saw_lc_filehandle} ) {
-
- warning( <<'EOM' );
-------------------------------------------------------------------------
-PLEASE NOTE: If you get this message, it is because perltidy noticed
-possible ambiguous syntax at one or more places in your script, as
-noted above. The problem is with statements accepting indirect objects,
-such as print and printf statements of the form
-
- print bareword ( $etc
-
-Perltidy needs your help in deciding if 'bareword' is a filehandle or a
-function call. The problem is the space between 'bareword' and '('. If
-'bareword' is a function call, you should remove the trailing space. If
-'bareword' is a filehandle, you should avoid the opening paren or else
-globally capitalize 'bareword' to be BAREWORD. So the above line
-would be:
-
- print bareword( $etc # function
-or
- print bareword @list # filehandle
-or
- print BAREWORD ( $etc # filehandle
-
-If you want to keep the line as it is, and are sure it is correct,
-you can use -w=0 to prevent this message.
-------------------------------------------------------------------------
-EOM
-
- }
-
if ( $tokenizer_self->{_in_pod} ) {
# Just write log entry if this is after __END__ or __DATA__
if ( $tokenizer_self->{_in_here_doc} ) {
my $here_doc_target = $tokenizer_self->{_here_doc_target};
+ my $started_looking_for_here_target_at =
+ $tokenizer_self->{_started_looking_for_here_target_at};
if ($here_doc_target) {
warning(
"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
);
}
+ my $nearly_matched_here_target_at =
+ $tokenizer_self->{_nearly_matched_here_target_at};
if ($nearly_matched_here_target_at) {
warning(
"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
if ( $tokenizer_self->{_in_quote} ) {
my $line_start_quote = $tokenizer_self->{_line_start_quote};
my $quote_target = $tokenizer_self->{_quote_target};
+ my $what =
+ ( $tokenizer_self->{_in_attribute_list} )
+ ? "attribute list"
+ : "quote/pattern";
warning(
-"hit EOF seeking end of quote/pattern starting at line $line_start_quote ending in $quote_target\n"
+"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
);
}
write_logfile_entry("Suggest including 'use strict;'\n");
}
- # it is suggested that lables have at least one upper case character
+ # it is suggested that labels have at least one upper case character
# for legibility and to avoid code breakage as new keywords are introduced
- if (@lower_case_labels_at) {
- my $num = @lower_case_labels_at;
+ if ( $tokenizer_self->{_rlower_case_labels_at} ) {
+ my @lower_case_labels_at =
+ @{ $tokenizer_self->{_rlower_case_labels_at} };
write_logfile_entry(
"Suggest using upper case characters in label(s)\n");
local $" = ')(';
# warn if this version can't handle v-strings
my $tok = shift;
- $saw_v_string = $input_line_number;
+ unless ( $tokenizer_self->{_saw_v_string} ) {
+ $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
+ }
if ( $] < 5.006 ) {
warning(
"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
my $self = shift;
+ # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
+ # $square_bracket_depth, $paren_depth
+
my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
+ $tokenizer_self->{_line_text} = $input_line;
return undef unless ($input_line);
- $tokenizer_self->{_last_line_number}++;
+ my $input_line_number = ++$tokenizer_self->{_last_line_number};
# Find and remove what characters terminate this line, including any
# control r
$input_line_separator = $2 . $input_line_separator;
}
- # for backwards compatability we keep the line text terminated with
+ # for backwards compatibility we keep the line text terminated with
# a newline character
$input_line .= "\n";
-
- my $input_line_number = $tokenizer_self->{_last_line_number};
+ $tokenizer_self->{_line_text} = $input_line; # update
# create a data structure describing this line which will be
# returned to the caller.
# _ending_in_quote - this line ends in a multi-line quote
# (so don't trim trailing blanks!)
my $line_of_tokens = {
- _line_type => 'EOF',
- _line_text => $input_line,
- _line_number => $input_line_number,
- _rtoken_type => undef,
- _rtokens => undef,
- _rlevels => undef,
- _rslevels => undef,
- _rblock_type => undef,
- _rcontainer_type => undef,
- _rcontainer_environment => undef,
- _rtype_sequence => undef,
- _rnesting_tokens => undef,
- _rci_levels => undef,
- _rnesting_blocks => undef,
- _python_indentation_level => -1, ## 0,
- _starting_in_quote =>
- ( $tokenizer_self->{_in_quote} && ( $quote_type eq 'Q' ) ),
+ _line_type => 'EOF',
+ _line_text => $input_line,
+ _line_number => $input_line_number,
+ _rtoken_type => undef,
+ _rtokens => undef,
+ _rlevels => undef,
+ _rslevels => undef,
+ _rblock_type => undef,
+ _rcontainer_type => undef,
+ _rcontainer_environment => undef,
+ _rtype_sequence => undef,
+ _rnesting_tokens => undef,
+ _rci_levels => undef,
+ _rnesting_blocks => undef,
+ _guessed_indentation_level => 0,
+ _starting_in_quote => 0, # to be set by subroutine
_ending_in_quote => 0,
_curly_brace_depth => $brace_depth,
_square_bracket_depth => $square_bracket_depth,
my $candidate_target = $input_line;
chomp $candidate_target;
if ( $candidate_target eq $here_doc_target ) {
- $nearly_matched_here_target_at = undef;
- $line_of_tokens->{_line_type} = 'HERE_END';
+ $tokenizer_self->{_nearly_matched_here_target_at} = undef;
+ $line_of_tokens->{_line_type} = 'HERE_END';
write_logfile_entry("Exiting HERE document $here_doc_target\n");
my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
if (@$rhere_target_list) { # there can be multiple here targets
( $here_doc_target, $here_quote_character ) =
@{ shift @$rhere_target_list };
- $tokenizer_self->{_here_doc_target} = $here_doc_target;
+ $tokenizer_self->{_here_doc_target} = $here_doc_target;
$tokenizer_self->{_here_quote_character} =
$here_quote_character;
write_logfile_entry(
"Entering HERE document $here_doc_target\n");
- $nearly_matched_here_target_at = undef;
- $started_looking_for_here_target_at = $input_line_number;
+ $tokenizer_self->{_nearly_matched_here_target_at} = undef;
+ $tokenizer_self->{_started_looking_for_here_target_at} =
+ $input_line_number;
}
else {
$tokenizer_self->{_in_here_doc} = 0;
$candidate_target =~ s/\s*$//;
$candidate_target =~ s/^\s*//;
if ( $candidate_target eq $here_doc_target ) {
- $nearly_matched_here_target_at = $input_line_number;
+ $tokenizer_self->{_nearly_matched_here_target_at} =
+ $input_line_number;
}
}
return $line_of_tokens;
$tokenizer_self->{_in_pod} = 0;
}
if ( $input_line =~ /^\#\!.*perl\b/ ) {
- warning("Hash-bang in pod can cause perl to fail! \n");
+ warning(
+ "Hash-bang in pod can cause older versions of perl to fail! \n"
+ );
}
return $line_of_tokens;
}
# must print line unchanged if we have seen a severe error (i.e., we
- # are seeing illegal tokens and connot continue. Syntax errors do
+ # are seeing illegal tokens and cannot continue. Syntax errors do
# not pass this route). Calling routine can decide what to do, but
# the default can be to just pass all lines as if they were after __END__
elsif ( $tokenizer_self->{_in_error} ) {
if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
complain("=cut while not in pod ignored\n");
$tokenizer_self->{_in_pod} = 0;
- $line_of_tokens->{_line_type} = 'POD_STOP';
+ $line_of_tokens->{_line_type} = 'POD_END';
}
else {
- $line_of_tokens->{_line_type} = 'POD_END';
+ $line_of_tokens->{_line_type} = 'POD_START';
complain(
"=cut starts a pod section .. this can fool pod utilities.\n"
);
# update indentation levels for log messages
if ( $input_line !~ /^\s*$/ ) {
- my $rlevels = $line_of_tokens->{_rlevels};
- my $structural_indentation_level = $$rlevels[0];
- my ( $python_indentation_level, $msg ) =
- find_indentation_level( $input_line, $structural_indentation_level );
- if ($msg) { write_logfile_entry("$msg") }
- if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
- $line_of_tokens->{_python_indentation_level} =
- $python_indentation_level;
- }
+ my $rlevels = $line_of_tokens->{_rlevels};
+ $line_of_tokens->{_guessed_indentation_level} =
+ guess_old_indentation_level($input_line);
}
# see if this line contains here doc targets
my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
if (@$rhere_target_list) {
- #my $here_doc_target = shift @$rhere_target_list;
my ( $here_doc_target, $here_quote_character ) =
@{ shift @$rhere_target_list };
$tokenizer_self->{_in_here_doc} = 1;
$tokenizer_self->{_here_doc_target} = $here_doc_target;
$tokenizer_self->{_here_quote_character} = $here_quote_character;
write_logfile_entry("Entering HERE document $here_doc_target\n");
- $started_looking_for_here_target_at = $input_line_number;
+ $tokenizer_self->{_started_looking_for_here_target_at} =
+ $input_line_number;
}
# NOTE: __END__ and __DATA__ statements are written unformatted
$line_of_tokens->{_line_type} = 'CODE';
# remember if we have seen any real code
- if ( !$tokenizer_self->{_started_tokenizing}
+ if ( !$tokenizer_self->{_started_tokenizing}
&& $input_line !~ /^\s*$/
&& $input_line !~ /^\s*#/ )
{
and ( $tokenizer_self->{_line_start_quote} < 0 ) )
{
- if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
+ #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
+ if (
+ ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
+ {
$tokenizer_self->{_line_start_quote} = $input_line_number;
- $tokenizer_self->{_quote_target} = $quote_target;
write_logfile_entry(
"Start multi-line quote or pattern ending in $quote_target\n");
}
sub find_starting_indentation_level {
- my $starting_level = 0;
- my $know_input_tabstr = -1; # flag for find_indentation_level
+ # We need to find the indentation level of the first line of the
+ # script being formatted. Often it will be zero for an entire file,
+ # but if we are formatting a local block of code (within an editor for
+ # example) it may not be zero. The user may specify this with the
+ # -sil=n parameter but normally doesn't so we have to guess.
+ #
+ # USES GLOBAL VARIABLES: $tokenizer_self
+ my $starting_level = 0;
# use value if given as parameter
if ( $tokenizer_self->{_know_starting_level} ) {
# otherwise figure it out from the input file
else {
my $line;
- my $i = 0;
- my $structural_indentation_level = -1; # flag for find_indentation_level
+ my $i = 0;
+ # keep looking at lines until we find a hash bang or piece of code
my $msg = "";
while ( $line =
$tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
$starting_level = 0;
last;
}
- next if ( $line =~ /^\s*#/ ); # must not be comment
- next if ( $line =~ /^\s*$/ ); # must not be blank
- ( $starting_level, $msg ) =
- find_indentation_level( $line, $structural_indentation_level );
- if ($msg) { write_logfile_entry("$msg") }
+ next if ( $line =~ /^\s*#/ ); # skip past comments
+ next if ( $line =~ /^\s*$/ ); # skip past blank lines
+ $starting_level = guess_old_indentation_level($line);
last;
}
$msg = "Line $i implies starting-indentation-level = $starting_level\n";
-
- if ( $starting_level > 0 ) {
-
- my $input_tabstr = $tokenizer_self->{_input_tabstr};
- if ( $input_tabstr eq "\t" ) {
- $msg .= "by guessing input tabbing uses 1 tab per level\n";
- }
- else {
- my $cols = length($input_tabstr);
- $msg .=
- "by guessing input tabbing uses $cols blanks per level\n";
- }
- }
write_logfile_entry("$msg");
}
$tokenizer_self->{_starting_level} = $starting_level;
reset_indentation_level($starting_level);
}
-# Find indentation level given a input line. At the same time, try to
-# figure out the input tabbing scheme.
-#
-# There are two types of calls:
-#
-# Type 1: $structural_indentation_level < 0
-# In this case we have to guess $input_tabstr to figure out the level.
-#
-# Type 2: $structural_indentation_level >= 0
-# In this case the level of this line is known, and this routine can
-# update the tabbing string, if still unknown, to make the level correct.
+sub guess_old_indentation_level {
+ my ($line) = @_;
-sub find_indentation_level {
- my ( $line, $structural_indentation_level ) = @_;
+ # Guess the indentation level of an input line.
+ #
+ # For the first line of code this result will define the starting
+ # indentation level. It will mainly be non-zero when perltidy is applied
+ # within an editor to a local block of code.
+ #
+ # This is an impossible task in general because we can't know what tabs
+ # meant for the old script and how many spaces were used for one
+ # indentation level in the given input script. For example it may have
+ # been previously formatted with -i=7 -et=3. But we can at least try to
+ # make sure that perltidy guesses correctly if it is applied repeatedly to
+ # a block of code within an editor, so that the block stays at the same
+ # level when perltidy is applied repeatedly.
+ #
+ # USES GLOBAL VARIABLES: $tokenizer_self
my $level = 0;
- my $msg = "";
- my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
- my $input_tabstr = $tokenizer_self->{_input_tabstr};
+ # find leading tabs, spaces, and any statement label
+ my $spaces = 0;
+ if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
- # find leading whitespace
- my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
+ # If there are leading tabs, we use the tab scheme for this run, if
+ # any, so that the code will remain stable when editing.
+ if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} }
- # make first guess at input tabbing scheme if necessary
- if ( $know_input_tabstr < 0 ) {
+ if ($2) { $spaces += length($2) }
- $know_input_tabstr = 0;
+ # correct for outdented labels
+ if ( $3 && $tokenizer_self->{'_outdent_labels'} ) {
+ $spaces += $tokenizer_self->{_continuation_indentation};
+ }
+ }
- if ( $tokenizer_self->{_tabs} ) {
- $input_tabstr = "\t";
- if ( length($leading_whitespace) > 0 ) {
- if ( $leading_whitespace !~ /\t/ ) {
+ # compute indentation using the value of -i for this run.
+ # If -i=0 is used for this run (which is possible) it doesn't matter
+ # what we do here but we'll guess that the old run used 4 spaces per level.
+ my $indent_columns = $tokenizer_self->{_indent_columns};
+ $indent_columns = 4 if ( !$indent_columns );
+ $level = int( $spaces / $indent_columns );
+ return ($level);
+}
- my $cols = $tokenizer_self->{_indent_columns};
+# This is a currently unused debug routine
+sub dump_functions {
- if ( length($leading_whitespace) < $cols ) {
- $cols = length($leading_whitespace);
- }
- $input_tabstr = " " x $cols;
- }
+ my $fh = *STDOUT;
+ my ( $pkg, $sub );
+ foreach $pkg ( keys %is_user_function ) {
+ print $fh "\nnon-constant subs in package $pkg\n";
+
+ foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
+ my $msg = "";
+ if ( $is_block_list_function{$pkg}{$sub} ) {
+ $msg = 'block_list';
}
- }
- else {
- $input_tabstr = " " x $tokenizer_self->{_indent_columns};
- if ( length($leading_whitespace) > 0 ) {
- if ( $leading_whitespace =~ /^\t/ ) {
- $input_tabstr = "\t";
- }
+ if ( $is_block_function{$pkg}{$sub} ) {
+ $msg = 'block';
}
+ print $fh "$sub $msg\n";
}
- $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
- $tokenizer_self->{_input_tabstr} = $input_tabstr;
}
- # determine the input tabbing scheme if possible
- if ( ( $know_input_tabstr == 0 )
- && ( length($leading_whitespace) > 0 )
- && ( $structural_indentation_level > 0 ) )
- {
- my $saved_input_tabstr = $input_tabstr;
+ foreach $pkg ( keys %is_constant ) {
+ print $fh "\nconstants and constant subs in package $pkg\n";
- # check for common case of one tab per indentation level
- if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
- if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
- $input_tabstr = "\t";
- $msg = "Guessing old indentation was tab character\n";
- }
+ foreach $sub ( keys %{ $is_constant{$pkg} } ) {
+ print $fh "$sub\n";
}
+ }
+}
- else {
+sub ones_count {
- # detab any tabs based on 8 blanks per tab
- my $entabbed = "";
- if ( $leading_whitespace =~ s/^\t+/ /g ) {
- $entabbed = "entabbed";
- }
+ # count number of 1's in a string of 1's and 0's
+ # example: ones_count("010101010101") gives 6
+ return ( my $cis = $_[0] ) =~ tr/1/0/;
+}
- # now compute tabbing from number of spaces
- my $columns =
- length($leading_whitespace) / $structural_indentation_level;
- if ( $columns == int $columns ) {
- $msg =
- "Guessing old indentation was $columns $entabbed spaces\n";
- }
- else {
- $columns = int $columns;
- $msg =
-"old indentation is unclear, using $columns $entabbed spaces\n";
- }
- $input_tabstr = " " x $columns;
- }
- $know_input_tabstr = 1;
- $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
- $tokenizer_self->{_input_tabstr} = $input_tabstr;
+sub prepare_for_a_new_file {
- # see if mistakes were made
- if ( ( $tokenizer_self->{_starting_level} > 0 )
- && !$tokenizer_self->{_know_starting_level} )
- {
+ # previous tokens needed to determine what to expect next
+ $last_nonblank_token = ';'; # the only possible starting state which
+ $last_nonblank_type = ';'; # will make a leading brace a code block
+ $last_nonblank_block_type = '';
- if ( $input_tabstr ne $saved_input_tabstr ) {
- complain(
-"I made a bad starting level guess; rerun with a value for -sil \n"
- );
- }
- }
- }
+ # scalars for remembering statement types across multiple lines
+ $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
+ $in_attribute_list = 0;
+
+ # scalars for remembering where we are in the file
+ $current_package = "main";
+ $context = UNKNOWN_CONTEXT;
+
+ # hashes used to remember function information
+ %is_constant = (); # user-defined constants
+ %is_user_function = (); # user-defined functions
+ %user_function_prototype = (); # their prototypes
+ %is_block_function = ();
+ %is_block_list_function = ();
+ %saw_function_definition = ();
+
+ # variables used to track depths of various containers
+ # and report nesting errors
+ $paren_depth = 0;
+ $brace_depth = 0;
+ $square_bracket_depth = 0;
+ @current_depth[ 0 .. $#closing_brace_names ] =
+ (0) x scalar @closing_brace_names;
+ $total_depth = 0;
+ @total_depth = ();
+ @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
+ ( 0 .. $#closing_brace_names );
+ @current_sequence_number = ();
+ $paren_type[$paren_depth] = '';
+ $paren_semicolon_count[$paren_depth] = 0;
+ $paren_structural_type[$brace_depth] = '';
+ $brace_type[$brace_depth] = ';'; # identify opening brace as code block
+ $brace_structural_type[$brace_depth] = '';
+ $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
+ $brace_package[$paren_depth] = $current_package;
+ $square_bracket_type[$square_bracket_depth] = '';
+ $square_bracket_structural_type[$square_bracket_depth] = '';
- # use current guess at input tabbing to get input indentation level
- #
- # Patch to handle a common case of entabbed leading whitespace
- # If the leading whitespace equals 4 spaces and we also have
- # tabs, detab the input whitespace assuming 8 spaces per tab.
- if ( length($input_tabstr) == 4 ) {
- $leading_whitespace =~ s/^\t+/ /g;
- }
+ initialize_tokenizer_state();
+}
- if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
- my $pos = 0;
+{ # begin tokenize_this_line
- while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
- {
- $pos += $len_tab;
- $level++;
- }
- }
- return ( $level, $msg );
-}
+ use constant BRACE => 0;
+ use constant SQUARE_BRACKET => 1;
+ use constant PAREN => 2;
+ use constant QUESTION_COLON => 3;
-sub dump_token_types {
- my $class = shift;
- my $fh = shift;
+ # TV1: scalars for processing one LINE.
+ # Re-initialized on each entry to sub tokenize_this_line.
+ my (
+ $block_type, $container_type, $expecting,
+ $i, $i_tok, $input_line,
+ $input_line_number, $last_nonblank_i, $max_token_index,
+ $next_tok, $next_type, $peeked_ahead,
+ $prototype, $rhere_target_list, $rtoken_map,
+ $rtoken_type, $rtokens, $tok,
+ $type, $type_sequence, $indent_flag,
+ );
- # This should be the latest list of token types in use
- # adding NEW_TOKENS: add a comment here
- print $fh <<'END_OF_LIST';
+ # TV2: refs to ARRAYS for processing one LINE
+ # Re-initialized on each call.
+ my $routput_token_list = []; # stack of output token indexes
+ my $routput_token_type = []; # token types
+ my $routput_block_type = []; # types of code block
+ my $routput_container_type = []; # paren types, such as if, elsif, ..
+ my $routput_type_sequence = []; # nesting sequential number
+ my $routput_indent_flag = []; #
+
+ # TV3: SCALARS for quote variables. These are initialized with a
+ # subroutine call and continually updated as lines are processed.
+ my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
+ $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
+
+ # TV4: SCALARS for multi-line identifiers and
+ # statements. These are initialized with a subroutine call
+ # and continually updated as lines are processed.
+ my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
+
+ # TV5: SCALARS for tracking indentation level.
+ # Initialized once and continually updated as lines are
+ # processed.
+ my (
+ $nesting_token_string, $nesting_type_string,
+ $nesting_block_string, $nesting_block_flag,
+ $nesting_list_string, $nesting_list_flag,
+ $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
+ $in_statement_continuation, $level_in_tokenizer,
+ $slevel_in_tokenizer, $rslevel_stack,
+ );
-Here is a list of the token types currently used for lines of type 'CODE'.
-For the following tokens, the "type" of a token is just the token itself.
+ # TV6: SCALARS for remembering several previous
+ # tokens. Initialized once and continually updated as
+ # lines are processed.
+ my (
+ $last_nonblank_container_type, $last_nonblank_type_sequence,
+ $last_last_nonblank_token, $last_last_nonblank_type,
+ $last_last_nonblank_block_type, $last_last_nonblank_container_type,
+ $last_last_nonblank_type_sequence, $last_nonblank_prototype,
+ );
-.. :: << >> ** && .. || -> => += -= .= %= &= |= ^= *= <>
-( ) <= >= == =~ !~ != ++ -- /= x=
-... **= <<= >>= &&= ||= <=>
-, + - / * | % ! x ~ = \ ? : . < > ^ &
+ # ----------------------------------------------------------------
+ # beginning of tokenizer variable access and manipulation routines
+ # ----------------------------------------------------------------
+
+ sub initialize_tokenizer_state {
+
+ # TV1: initialized on each call
+ # TV2: initialized on each call
+ # TV3:
+ $in_quote = 0;
+ $quote_type = 'Q';
+ $quote_character = "";
+ $quote_pos = 0;
+ $quote_depth = 0;
+ $quoted_string_1 = "";
+ $quoted_string_2 = "";
+ $allowed_quote_modifiers = "";
+
+ # TV4:
+ $id_scan_state = '';
+ $identifier = '';
+ $want_paren = "";
+ $indented_if_level = 0;
+
+ # TV5:
+ $nesting_token_string = "";
+ $nesting_type_string = "";
+ $nesting_block_string = '1'; # initially in a block
+ $nesting_block_flag = 1;
+ $nesting_list_string = '0'; # initially not in a list
+ $nesting_list_flag = 0; # initially not in a list
+ $ci_string_in_tokenizer = "";
+ $continuation_string_in_tokenizer = "0";
+ $in_statement_continuation = 0;
+ $level_in_tokenizer = 0;
+ $slevel_in_tokenizer = 0;
+ $rslevel_stack = [];
+
+ # TV6:
+ $last_nonblank_container_type = '';
+ $last_nonblank_type_sequence = '';
+ $last_last_nonblank_token = ';';
+ $last_last_nonblank_type = ';';
+ $last_last_nonblank_block_type = '';
+ $last_last_nonblank_container_type = '';
+ $last_last_nonblank_type_sequence = '';
+ $last_nonblank_prototype = "";
+ }
+
+ sub save_tokenizer_state {
+
+ my $rTV1 = [
+ $block_type, $container_type, $expecting,
+ $i, $i_tok, $input_line,
+ $input_line_number, $last_nonblank_i, $max_token_index,
+ $next_tok, $next_type, $peeked_ahead,
+ $prototype, $rhere_target_list, $rtoken_map,
+ $rtoken_type, $rtokens, $tok,
+ $type, $type_sequence, $indent_flag,
+ ];
+
+ my $rTV2 = [
+ $routput_token_list, $routput_token_type,
+ $routput_block_type, $routput_container_type,
+ $routput_type_sequence, $routput_indent_flag,
+ ];
+
+ my $rTV3 = [
+ $in_quote, $quote_type,
+ $quote_character, $quote_pos,
+ $quote_depth, $quoted_string_1,
+ $quoted_string_2, $allowed_quote_modifiers,
+ ];
+
+ my $rTV4 =
+ [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
+
+ my $rTV5 = [
+ $nesting_token_string, $nesting_type_string,
+ $nesting_block_string, $nesting_block_flag,
+ $nesting_list_string, $nesting_list_flag,
+ $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
+ $in_statement_continuation, $level_in_tokenizer,
+ $slevel_in_tokenizer, $rslevel_stack,
+ ];
+
+ my $rTV6 = [
+ $last_nonblank_container_type,
+ $last_nonblank_type_sequence,
+ $last_last_nonblank_token,
+ $last_last_nonblank_type,
+ $last_last_nonblank_block_type,
+ $last_last_nonblank_container_type,
+ $last_last_nonblank_type_sequence,
+ $last_nonblank_prototype,
+ ];
+ return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
+ }
+
+ sub restore_tokenizer_state {
+ my ($rstate) = @_;
+ my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
+ (
+ $block_type, $container_type, $expecting,
+ $i, $i_tok, $input_line,
+ $input_line_number, $last_nonblank_i, $max_token_index,
+ $next_tok, $next_type, $peeked_ahead,
+ $prototype, $rhere_target_list, $rtoken_map,
+ $rtoken_type, $rtokens, $tok,
+ $type, $type_sequence, $indent_flag,
+ ) = @{$rTV1};
-The following additional token types are defined:
+ (
+ $routput_token_list, $routput_token_type,
+ $routput_block_type, $routput_container_type,
+ $routput_type_sequence, $routput_type_sequence,
+ ) = @{$rTV2};
- type meaning
- b blank (white space)
- { indent: opening structural curly brace or square bracket or paren
- (code block, anonymous hash reference, or anonymous array reference)
- } outdent: right structural curly brace or square bracket or paren
- [ left non-structural square bracket (enclosing an array index)
- ] right non-structural square bracket
- ( left non-structural paren (all but a list right of an =)
- ) right non-structural parena
- L left non-structural curly brace (enclosing a key)
- R right non-structural curly brace
- ; terminal semicolon
- f indicates a semicolon in a "for" statement
- h here_doc operator <<
- # a comment
- Q indicates a quote or pattern
- q indicates a qw quote block
- k a perl keyword
- C user-defined constant or constant function (with void prototype = ())
- U user-defined function taking parameters
- G user-defined function taking block parameter (like grep/map/eval)
- M (unused, but reserved for subroutine definition name)
- P (unused, but -html uses it to label pod text)
- t type indicater such as %,$,@,*,&,sub
- w bare word (perhaps a subroutine call)
- i identifier of some type (with leading %, $, @, *, &, sub, -> )
- n a number
- v a v-string
- F a file test operator (like -e)
- Y File handle
- Z identifier in indirect object slot: may be file handle, object
- J LABEL: code block label
- j LABEL after next, last, redo, goto
- p unary +
- m unary -
- pp pre-increment operator ++
- mm pre-decrement operator --
- A : used as attribute separator
-
- Here are the '_line_type' codes used internally:
- SYSTEM - system-specific code before hash-bang line
- CODE - line of perl code (including comments)
- POD_START - line starting pod, such as '=head'
- POD - pod documentation text
- POD_END - last line of pod section, '=cut'
- HERE - text of here-document
- HERE_END - last line of here-doc (target word)
- FORMAT - format section
- FORMAT_END - last line of format section, '.'
- DATA_START - __DATA__ line
- DATA - unidentified text following __DATA__
- END_START - __END__ line
- END - unidentified text following __END__
- ERROR - we are in big trouble, probably not a perl script
-END_OF_LIST
-}
+ (
+ $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
+ $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
+ ) = @{$rTV3};
-# This is a currently unused debug routine
-sub dump_functions {
+ ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
+ @{$rTV4};
- my $fh = *STDOUT;
- my ( $pkg, $sub );
- foreach $pkg ( keys %is_user_function ) {
- print $fh "\nnon-constant subs in package $pkg\n";
+ (
+ $nesting_token_string, $nesting_type_string,
+ $nesting_block_string, $nesting_block_flag,
+ $nesting_list_string, $nesting_list_flag,
+ $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
+ $in_statement_continuation, $level_in_tokenizer,
+ $slevel_in_tokenizer, $rslevel_stack,
+ ) = @{$rTV5};
- foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
- my $msg = "";
- if ( $is_block_list_function{$pkg}{$sub} ) {
- $msg = 'block_list';
- }
+ (
+ $last_nonblank_container_type,
+ $last_nonblank_type_sequence,
+ $last_last_nonblank_token,
+ $last_last_nonblank_type,
+ $last_last_nonblank_block_type,
+ $last_last_nonblank_container_type,
+ $last_last_nonblank_type_sequence,
+ $last_nonblank_prototype,
+ ) = @{$rTV6};
+ }
- if ( $is_block_function{$pkg}{$sub} ) {
- $msg = 'block';
- }
- print $fh "$sub $msg\n";
- }
+ sub get_indentation_level {
+
+ # patch to avoid reporting error if indented if is not terminated
+ if ($indented_if_level) { return $level_in_tokenizer - 1 }
+ return $level_in_tokenizer;
}
- foreach $pkg ( keys %is_constant ) {
- print $fh "\nconstants and constant subs in package $pkg\n";
+ sub reset_indentation_level {
+ $level_in_tokenizer = $_[0];
+ $slevel_in_tokenizer = $_[0];
+ push @{$rslevel_stack}, $slevel_in_tokenizer;
+ }
- foreach $sub ( keys %{ $is_constant{$pkg} } ) {
- print $fh "$sub\n";
- }
+ sub peeked_ahead {
+ $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
}
-}
-sub prepare_for_a_new_file {
- $saw_negative_indentation = 0;
- $id_scan_state = '';
- $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
- $last_nonblank_token = ';'; # the only possible starting state which
- $last_nonblank_type = ';'; # will make a leading brace a code block
- $last_nonblank_block_type = '';
- $last_nonblank_container_type = '';
- $last_nonblank_type_sequence = '';
- $last_last_nonblank_token = ';';
- $last_last_nonblank_type = ';';
- $last_last_nonblank_block_type = '';
- $last_last_nonblank_container_type = '';
- $last_last_nonblank_type_sequence = '';
- $last_nonblank_prototype = "";
- $identifier = '';
- $in_quote = 0; # flag telling if we are chasing a quote, and what kind
- $quote_type = 'Q';
- $quote_character = ""; # character we seek if chasing a quote
- $quote_pos = 0; # next character index to check for case of alphanum char
- $quote_depth = 0;
- $allowed_quote_modifiers = "";
- $paren_depth = 0;
- $brace_depth = 0;
- $square_bracket_depth = 0;
- $current_package = "main";
- @current_depth[ 0 .. $#closing_brace_names ] =
- (0) x scalar @closing_brace_names;
- @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
- ( 0 .. $#closing_brace_names );
- @current_sequence_number = ();
+ # ------------------------------------------------------------
+ # end of tokenizer variable access and manipulation routines
+ # ------------------------------------------------------------
- $paren_type[$paren_depth] = '';
- $paren_semicolon_count[$paren_depth] = 0;
- $brace_type[$brace_depth] = ';'; # identify opening brace as code block
- $brace_structural_type[$brace_depth] = '';
- $brace_statement_type[$brace_depth] = "";
- $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
- $paren_structural_type[$brace_depth] = '';
- $square_bracket_type[$square_bracket_depth] = '';
- $square_bracket_structural_type[$square_bracket_depth] = '';
- $brace_package[$paren_depth] = $current_package;
- %is_constant = (); # user-defined constants
- %is_user_function = (); # user-defined functions
- %user_function_prototype = (); # their prototypes
- %is_block_function = ();
- %is_block_list_function = ();
- %saw_function_definition = ();
- $unexpected_error_count = 0;
- $want_paren = "";
- $context = UNKNOWN_CONTEXT;
- @slevel_stack = ();
- $ci_string_in_tokenizer = "";
- $continuation_string_in_tokenizer = "0";
- $in_statement_continuation = 0;
- @lower_case_labels_at = ();
- $saw_v_string = 0; # for warning of v-strings on older perl
- $nesting_token_string = "";
- $nesting_type_string = "";
- $nesting_block_string = '1'; # initially in a block
- $nesting_block_flag = 1;
- $nesting_list_string = '0'; # initially not in a list
- $nesting_list_flag = 0; # initially not in a list
- $nearly_matched_here_target_at = undef;
-}
-
-sub get_quote_target {
- return matching_end_token($quote_character);
-}
-
-sub get_indentation_level {
- return $level_in_tokenizer;
-}
-
-sub reset_indentation_level {
- $level_in_tokenizer = $_[0];
- $slevel_in_tokenizer = $_[0];
- push @slevel_stack, $slevel_in_tokenizer;
-}
-
-{ # begin tokenize_this_line
+ # ------------------------------------------------------------
+ # beginning of various scanner interface routines
+ # ------------------------------------------------------------
+ sub scan_replacement_text {
- use constant BRACE => 0;
- use constant SQUARE_BRACKET => 1;
- use constant PAREN => 2;
- use constant QUESTION_COLON => 3;
+ # check for here-docs in replacement text invoked by
+ # a substitution operator with executable modifier 'e'.
+ #
+ # given:
+ # $replacement_text
+ # return:
+ # $rht = reference to any here-doc targets
+ my ($replacement_text) = @_;
+
+ # quick check
+ return undef unless ( $replacement_text =~ /<</ );
+
+ write_logfile_entry("scanning replacement text for here-doc targets\n");
+
+ # save the logger object for error messages
+ my $logger_object = $tokenizer_self->{_logger_object};
+
+ # localize all package variables
+ local (
+ $tokenizer_self, $last_nonblank_token,
+ $last_nonblank_type, $last_nonblank_block_type,
+ $statement_type, $in_attribute_list,
+ $current_package, $context,
+ %is_constant, %is_user_function,
+ %user_function_prototype, %is_block_function,
+ %is_block_list_function, %saw_function_definition,
+ $brace_depth, $paren_depth,
+ $square_bracket_depth, @current_depth,
+ @total_depth, $total_depth,
+ @nesting_sequence_number, @current_sequence_number,
+ @paren_type, @paren_semicolon_count,
+ @paren_structural_type, @brace_type,
+ @brace_structural_type, @brace_context,
+ @brace_package, @square_bracket_type,
+ @square_bracket_structural_type, @depth_array,
+ @starting_line_of_current_depth, @nested_ternary_flag,
+ @nested_statement_type,
+ );
- my (
- $block_type, $container_type, $expecting,
- $here_doc_target, $here_quote_character, $i,
- $i_tok, $last_nonblank_i, $next_tok,
- $next_type, $prototype, $rtoken_map,
- $rtoken_type, $rtokens, $tok,
- $type, $type_sequence,
- );
+ # save all lexical variables
+ my $rstate = save_tokenizer_state();
+ _decrement_count(); # avoid error check for multiple tokenizers
- my @output_token_list = (); # stack of output token indexes
- my @output_token_type = (); # token types
- my @output_block_type = (); # types of code block
- my @output_container_type = (); # paren types, such as if, elsif, ..
- my @output_type_sequence = (); # nesting sequential number
+ # make a new tokenizer
+ my $rOpts = {};
+ my $rpending_logfile_message;
+ my $source_object =
+ Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
+ $rpending_logfile_message );
+ my $tokenizer = Perl::Tidy::Tokenizer->new(
+ source_object => $source_object,
+ logger_object => $logger_object,
+ starting_line_number => $input_line_number,
+ );
- my @here_target_list = (); # list of here-doc target strings
+ # scan the replacement text
+ 1 while ( $tokenizer->get_line() );
+
+ # remove any here doc targets
+ my $rht = undef;
+ if ( $tokenizer_self->{_in_here_doc} ) {
+ $rht = [];
+ push @{$rht},
+ [
+ $tokenizer_self->{_here_doc_target},
+ $tokenizer_self->{_here_quote_character}
+ ];
+ if ( $tokenizer_self->{_rhere_target_list} ) {
+ push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
+ $tokenizer_self->{_rhere_target_list} = undef;
+ }
+ $tokenizer_self->{_in_here_doc} = undef;
+ }
+
+ # now its safe to report errors
+ $tokenizer->report_tokenization_errors();
+
+ # restore all tokenizer lexical variables
+ restore_tokenizer_state($rstate);
+
+ # return the here doc targets
+ return $rht;
+ }
- # ------------------------------------------------------------
- # beginning of various scanner interfaces to simplify coding
- # ------------------------------------------------------------
sub scan_bare_identifier {
( $i, $tok, $type, $prototype ) =
scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
- $rtoken_map );
+ $rtoken_map, $max_token_index );
}
sub scan_identifier {
( $i, $tok, $type, $id_scan_state, $identifier ) =
- scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens );
+ scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
+ $max_token_index, $expecting, $paren_type[$paren_depth] );
}
sub scan_id {
( $i, $tok, $type, $id_scan_state ) =
scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
- $id_scan_state );
+ $id_scan_state, $max_token_index );
}
- my $number;
-
sub scan_number {
+ my $number;
( $i, $type, $number ) =
- scan_number_do( $input_line, $i, $rtoken_map, $type );
+ scan_number_do( $input_line, $i, $rtoken_map, $type,
+ $max_token_index );
+ return $number;
}
# a sub to warn if token found where term expected
sub error_if_expecting_TERM {
if ( $expecting == TERM ) {
if ( $really_want_term{$last_nonblank_type} ) {
- unexpected( $tok, "term", $i_tok, $last_nonblank_i );
+ unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
+ $rtoken_type, $input_line );
1;
}
}
sub error_if_expecting_OPERATOR {
if ( $expecting == OPERATOR ) {
my $thing = defined $_[0] ? $_[0] : $tok;
- unexpected( $thing, "operator", $i_tok, $last_nonblank_i );
+ unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
+ $rtoken_map, $rtoken_type, $input_line );
if ( $i_tok == 0 ) {
interrupt_logfile();
warning("Missing ';' above?\n");
# keyword ( .... ) { BLOCK }
# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
my %is_blocktype_with_paren;
- @_ = qw(if elsif unless while until for foreach switch case given when);
+ @_ =
+ qw(if elsif unless while until for foreach switch case given when catch);
@is_blocktype_with_paren{@_} = (1) x scalar(@_);
# ------------------------------------------------------------
## '^=' => undef,
## '|=' => undef,
## '||=' => undef,
+## '//=' => undef,
## '~' => undef,
+## '~~' => undef,
+## '!~~' => undef,
'>' => sub {
error_if_expecting_TERM()
$tokenizer_self->{_saw_perl_dash_w} = 1;
}
- # Check for indentifier in indirect object slot
+ # Check for identifier in indirect object slot
# (vorboard.pl, sort.t). Something like:
# /^(print|printf|sort|exec|system)$/
if (
$container_type = $want_paren;
$want_paren = "";
}
+ elsif ( $statement_type =~ /^sub\b/ ) {
+ $container_type = $statement_type;
+ }
else {
$container_type = $last_nonblank_token;
# error; for example, we might have a constant pi and
# invoke it with pi() or just pi;
my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens );
+ find_next_nonblank_token( $i, $rtokens,
+ $max_token_index );
if ( $next_nonblank_token ne ')' ) {
my $hint;
error_if_expecting_OPERATOR('(');
} ## end if ( $expecting == OPERATOR...
}
$paren_type[$paren_depth] = $container_type;
- $type_sequence = increase_nesting_depth( PAREN, $i_tok );
+ ( $type_sequence, $indent_flag ) =
+ increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
# propagate types down through nested parens
# for example: the second paren in 'if ((' would be structural
},
')' => sub {
- $type_sequence = decrease_nesting_depth( PAREN, $i_tok );
+ ( $type_sequence, $indent_flag ) =
+ decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
if ( $paren_structural_type[$paren_depth] eq '{' ) {
$type = '}';
$container_type = $paren_type[$paren_depth];
+ # restore statement type as 'sub' at closing paren of a signature
+ # so that a subsequent ':' is identified as an attribute
+ if ( $container_type =~ /^sub\b/ ) {
+ $statement_type = $container_type;
+ }
+
# /^(for|foreach)$/
if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
my $num_sc = $paren_semicolon_count[$paren_depth];
if ( $last_nonblank_type eq ',' ) {
complain("Repeated ','s \n");
}
+
+ # patch for operator_expected: note if we are in the list (use.t)
+ if ( $statement_type eq 'use' ) { $statement_type = '_use' }
## FIXME: need to move this elsewhere, perhaps check after a '('
## elsif ($last_nonblank_token eq '(') {
## warning("Leading ','s illegal in some versions of perl\n");
';' => sub {
$context = UNKNOWN_CONTEXT;
$statement_type = '';
+ $want_paren = "";
# /^(for|foreach)$/
if ( $is_for_foreach{ $paren_type[$paren_depth] } )
'/' => sub {
my $is_pattern;
- if ( $expecting == UNKNOWN ) { # indeterminte, must guess..
+ if ( $expecting == UNKNOWN ) { # indeterminate, must guess..
my $msg;
( $is_pattern, $msg ) =
- guess_if_pattern_or_division( $i, $rtokens, $rtoken_map );
+ guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
+ $max_token_index );
if ($msg) {
write_diagnostics("DIVIDE:$msg\n");
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[cgimosx]';
+ $allowed_quote_modifiers = '[msixpodualngc]';
}
else { # not a pattern; check for a /= token
$type = $tok;
}
- #DEBUG - collecting info on what tokens follow a divide
- # for development of guessing algorithm
- #if ( numerator_expected( $i, $rtokens ) < 0 ) {
- # #write_diagnostics( "DIVIDE? $input_line\n" );
- #}
+ #DEBUG - collecting info on what tokens follow a divide
+ # for development of guessing algorithm
+ #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
+ # #write_diagnostics( "DIVIDE? $input_line\n" );
+ #}
}
},
'{' => sub {
# check for syntax error here;
unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
- my $list = join( ' ', sort keys %is_blocktype_with_paren );
- warning(
- "syntax error at ') {', didn't see one of: $list\n");
+ if ( $tokenizer_self->{'_extended_syntax'} ) {
+
+ # we append a trailing () to mark this as an unknown
+ # block type. This allows perltidy to format some
+ # common extensions of perl syntax.
+ # This is used by sub code_block_type
+ $last_nonblank_token .= '()';
+ }
+ else {
+ my $list =
+ join( ' ', sort keys %is_blocktype_with_paren );
+ warning(
+"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
+ );
+ }
}
}
# allow paren-less identifier after 'when'
# if the brace is preceded by a space
if ( $statement_type eq 'when'
- && $last_nonblank_type eq 'i'
+ && $last_nonblank_type eq 'i'
&& $last_last_nonblank_type eq 'k'
&& ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
{
# which will be blank for an anonymous hash
else {
- $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type );
+ $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
+ $max_token_index );
# patch to promote bareword type to function taking block
if ( $block_type
&& $last_nonblank_type eq 'w'
&& $last_nonblank_i >= 0 )
{
- if ( $output_token_type[$last_nonblank_i] eq 'w' ) {
- $output_token_type[$last_nonblank_i] = 'G';
+ if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
+ $routput_token_type->[$last_nonblank_i] = 'G';
}
}
}
}
}
- $brace_type[ ++$brace_depth ] = $block_type;
- $brace_package[$brace_depth] = $current_package;
- $type_sequence = increase_nesting_depth( BRACE, $i_tok );
+
+ $brace_type[ ++$brace_depth ] = $block_type;
+ $brace_package[$brace_depth] = $current_package;
$brace_structural_type[$brace_depth] = $type;
$brace_context[$brace_depth] = $context;
- $brace_statement_type[$brace_depth] = $statement_type;
+ ( $type_sequence, $indent_flag ) =
+ increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
},
'}' => sub {
$block_type = $brace_type[$brace_depth];
# can happen on brace error (caught elsewhere)
else {
}
- $type_sequence = decrease_nesting_depth( BRACE, $i_tok );
+ ( $type_sequence, $indent_flag ) =
+ decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
if ( $brace_structural_type[$brace_depth] eq 'L' ) {
$type = 'R';
}
- # propagate type information for 'do' and 'eval' blocks.
- # This is necessary to enable us to know if an operator
- # or term is expected next
- if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
- $tok = $brace_type[$brace_depth];
+ # propagate type information for 'do' and 'eval' blocks, and also
+ # for smartmatch operator. This is necessary to enable us to know
+ # if an operator or term is expected next.
+ if ( $is_block_operator{$block_type} ) {
+ $tok = $block_type;
}
- $context = $brace_context[$brace_depth];
- $statement_type = $brace_statement_type[$brace_depth];
+ $context = $brace_context[$brace_depth];
if ( $brace_depth > 0 ) { $brace_depth--; }
},
'&' => sub { # maybe sub call? start looking
# got mistaken as a q operator in an early version:
# print BODY &q(<<'EOT');
if ( $expecting != OPERATOR ) {
- scan_identifier();
+
+ # But only look for a sub call if we are expecting a term or
+ # if there is no existing space after the &.
+ # For example we probably don't want & as sub call here:
+ # Fcntl::S_IRUSR & $mode;
+ if ( $expecting == TERM || $next_type ne 'b' ) {
+ scan_identifier();
+ }
}
else {
}
if ( $expecting != OPERATOR ) {
( $i, $type ) =
find_angle_operator_termination( $input_line, $i, $rtoken_map,
- $expecting );
+ $expecting, $max_token_index );
+ if ( $type eq '<' && $expecting == TERM ) {
+ error_if_expecting_TERM();
+ interrupt_logfile();
+ warning("Unterminated <> operator?\n");
+ resume_logfile();
+ }
}
else {
}
my $msg;
( $is_pattern, $msg ) =
- guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map );
+ guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
+ $max_token_index );
if ($msg) { write_logfile_entry($msg) }
}
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[cgimosx]'; # TBD:check this
+ $allowed_quote_modifiers = '[msixpodualngc]';
}
else {
-
- $type_sequence =
- increase_nesting_depth( QUESTION_COLON, $i_tok );
+ ( $type_sequence, $indent_flag ) =
+ increase_nesting_depth( QUESTION_COLON,
+ $$rtoken_map[$i_tok] );
}
},
'*' => sub { # typeglob, or multiply?
# ATTRS: check for a ':' which introduces an attribute list
# (this might eventually get its own token type)
- elsif ( $statement_type =~ /^sub/ ) {
- $type = 'A';
+ elsif ( $statement_type =~ /^sub\b/ ) {
+ $type = 'A';
+ $in_attribute_list = 1;
}
# check for scalar attribute, such as
elsif ($is_my_our{$statement_type}
&& $current_depth[QUESTION_COLON] == 0 )
{
- $type = 'A';
+ $type = 'A';
+ $in_attribute_list = 1;
}
# otherwise, it should be part of a ?/: operator
else {
- $type_sequence =
- decrease_nesting_depth( QUESTION_COLON, $i_tok );
+ ( $type_sequence, $indent_flag ) =
+ decrease_nesting_depth( QUESTION_COLON,
+ $$rtoken_map[$i_tok] );
if ( $last_nonblank_token eq '?' ) {
warning("Syntax error near ? :\n");
}
'+' => sub { # what kind of plus?
if ( $expecting == TERM ) {
- scan_number();
+ my $number = scan_number();
# unary plus is safest assumption if not a number
if ( !defined($number) ) { $type = 'p'; }
'[' => sub {
$square_bracket_type[ ++$square_bracket_depth ] =
$last_nonblank_token;
- $type_sequence = increase_nesting_depth( SQUARE_BRACKET, $i_tok );
+ ( $type_sequence, $indent_flag ) =
+ increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
# It may seem odd, but structural square brackets have
# type '{' and '}'. This simplifies the indentation logic.
$square_bracket_structural_type[$square_bracket_depth] = $type;
},
']' => sub {
- $type_sequence = decrease_nesting_depth( SQUARE_BRACKET, $i_tok );
+ ( $type_sequence, $indent_flag ) =
+ decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
{
$type = '}';
}
+
+ # propagate type information for smartmatch operator. This is
+ # necessary to enable us to know if an operator or term is expected
+ # next.
+ if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
+ $tok = $square_bracket_type[$square_bracket_depth];
+ }
+
if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
},
'-' => sub { # what kind of minus?
if ( ( $expecting != OPERATOR )
&& $is_file_test_operator{$next_tok} )
{
- $i++;
- $tok .= $next_tok;
- $type = 'F';
+ my ( $next_nonblank_token, $i_next ) =
+ find_next_nonblank_token( $i + 1, $rtokens,
+ $max_token_index );
+
+ # check for a quoted word like "-w=>xx";
+ # it is sufficient to just check for a following '='
+ if ( $next_nonblank_token eq '=' ) {
+ $type = 'm';
+ }
+ else {
+ $i++;
+ $tok .= $next_tok;
+ $type = 'F';
+ }
}
elsif ( $expecting == TERM ) {
- scan_number();
+ my $number = scan_number();
# maybe part of bareword token? unary is safest
if ( !defined($number) ) { $type = 'm'; }
; # here-doc not possible if end of line
if ( $expecting != OPERATOR ) {
- my ($found_target);
- ( $found_target, $here_doc_target, $here_quote_character, $i ) =
- find_here_doc( $expecting, $i, $rtokens, $rtoken_map );
+ my ( $found_target, $here_doc_target, $here_quote_character,
+ $saw_error );
+ (
+ $found_target, $here_doc_target, $here_quote_character, $i,
+ $saw_error
+ )
+ = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
+ $max_token_index );
if ($found_target) {
- push @here_target_list,
+ push @{$rhere_target_list},
[ $here_doc_target, $here_quote_character ];
$type = 'h';
if ( length($here_doc_target) > 80 ) {
}
}
elsif ( $expecting == TERM ) {
+ unless ($saw_error) {
- # shouldn't happen..
- warning("Program bug; didn't find here doc target\n");
- report_definite_bug();
+ # shouldn't happen..
+ warning("Program bug; didn't find here doc target\n");
+ report_definite_bug();
+ }
}
}
else {
if ( $expecting == TERM ) { $type = 'pp' }
elsif ( $expecting == UNKNOWN ) {
my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens );
+ find_next_nonblank_token( $i, $rtokens, $max_token_index );
if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
}
},
if ( $last_nonblank_type eq $tok ) {
complain("Repeated '=>'s \n");
}
+
+ # patch for operator_expected: note if we are in the list (use.t)
+ # TODO: make version numbers a new token type
+ if ( $statement_type eq 'use' ) { $statement_type = '_use' }
},
# type = 'mm' for pre-decrement, '--' for post-decrement
if ( $expecting == TERM ) { $type = 'mm' }
elsif ( $expecting == UNKNOWN ) {
my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens );
+ find_next_nonblank_token( $i, $rtokens, $max_token_index );
if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
}
},
error_if_expecting_TERM()
if ( $expecting == TERM );
},
+
+ '//' => sub {
+ error_if_expecting_TERM()
+ if ( $expecting == TERM );
+ },
};
# ------------------------------------------------------------
# These block types terminate statements and do not need a trailing
# semicolon
- # patched for SWITCH/CASE:
+ # patched for SWITCH/CASE/
my %is_zero_continuation_block_type;
- @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ;
+ @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
if elsif else unless while until for foreach switch case given when);
@is_zero_continuation_block_type{@_} = (1) x scalar(@_);
@is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
my %is_logical_container;
- @_ = qw(if elsif unless while and or not && ! || for foreach);
+ @_ = qw(if elsif unless while and or err not && ! || for foreach);
@is_logical_container{@_} = (1) x scalar(@_);
my %is_binary_type;
@is_binary_type{@_} = (1) x scalar(@_);
my %is_binary_keyword;
- @_ = qw(and or eq ne cmp);
+ @_ = qw(and or err eq ne cmp);
@is_binary_keyword{@_} = (1) x scalar(@_);
# 'L' is token for opening { at hash key
'__DATA__' => '_in_data',
);
- # ref: camel 3 p 147,
+ # original ref: camel 3 p 147,
# but perl may accept undocumented flags
+ # perl 5.10 adds 'p' (preserve)
+ # Perl version 5.22 added 'n'
+ # From http://perldoc.perl.org/perlop.html we have
+ # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
+ # s/PATTERN/REPLACEMENT/msixpodualngcer
+ # y/SEARCHLIST/REPLACEMENTLIST/cdsr
+ # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
+ # qr/STRING/msixpodualn
my %quote_modifiers = (
- 's' => '[cegimosx]',
- 'y' => '[cds]',
- 'tr' => '[cds]',
- 'm' => '[cgimosx]',
- 'qr' => '[imosx]',
+ 's' => '[msixpodualngcer]',
+ 'y' => '[cdsr]',
+ 'tr' => '[cdsr]',
+ 'm' => '[msixpodualngc]',
+ 'qr' => '[msixpodualn]',
'q' => "",
'qq' => "",
'qw' => "",
# For example, I used 'v' for v-strings.
#
# *. Implement coding to recognize the $type of the token in this routine.
- # This is the hardest part, and is best done by immitating or modifying
+ # This is the hardest part, and is best done by imitating or modifying
# some of the existing coding. For example, to recognize v-strings, I
# patched 'sub scan_bare_identifier' to recognize v-strings beginning with
# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
# *, then run diff between the output of the previous version and the
# current version.
#
+ # *. For another example, search for the smartmatch operator '~~'
+ # with your editor to see where updates were made for it.
+ #
# -----------------------------------------------------------------------
my $line_of_tokens = shift;
# extract line number for use in error messages
$input_line_number = $line_of_tokens->{_line_number};
+ # reinitialize for multi-line quote
+ $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
+
# check for pod documentation
if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
# must not be in multi-line quote
- # and must not be in an eqn
+ # and must not be in an equation
if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
{
$tokenizer_self->{_in_pod} = 1;
$input_line =~ s/^\s*//; # trim left end
}
+ # Set a flag to indicate if we might be at an __END__ or __DATA__ line
+ # This will be used below to avoid quoting a bare word followed by
+ # a fat comma.
+ my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/;
+
+ # update the copy of the line for use in error messages
+ # This must be exactly what we give the pre_tokenizer
+ $tokenizer_self->{_line_text} = $input_line;
+
# re-initialize for the main loop
- @output_token_list = (); # stack of output token indexes
- @output_token_type = (); # token types
- @output_block_type = (); # types of code block
- @output_container_type = (); # paren types, such as if, elsif, ..
- @output_type_sequence = (); # nesting sequential number
+ $routput_token_list = []; # stack of output token indexes
+ $routput_token_type = []; # token types
+ $routput_block_type = []; # types of code block
+ $routput_container_type = []; # paren types, such as if, elsif, ..
+ $routput_type_sequence = []; # nesting sequential number
+
+ $rhere_target_list = [];
$tok = $last_nonblank_token;
$type = $last_nonblank_type;
$block_type = $last_nonblank_block_type;
$container_type = $last_nonblank_container_type;
$type_sequence = $last_nonblank_type_sequence;
- @here_target_list = (); # list of here-doc target strings
-
- $peeked_ahead = 0;
+ $indent_flag = 0;
+ $peeked_ahead = 0;
# tokenization is done in two stages..
# stage 1 is a very simple pre-tokenization
}
# start by breaking the line into pre-tokens
- ( $rpretokens, $rpretoken_map, $rpretoken_type ) =
+ ( $rtokens, $rtoken_map, $rtoken_type ) =
pre_tokenize( $input_line, $max_tokens_wanted );
- $max_token_index = scalar(@$rpretokens) - 1;
- push( @$rpretokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
- push( @$rpretoken_map, 0, 0, 0 ); # shouldn't be referenced
- push( @$rpretoken_type, 'b', 'b', 'b' );
-
- # temporary copies while coding change is underway
- ( $rtokens, $rtoken_map, $rtoken_type ) =
- ( $rpretokens, $rpretoken_map, $rpretoken_type );
+ $max_token_index = scalar(@$rtokens) - 1;
+ push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
+ push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced
+ push( @$rtoken_type, 'b', 'b', 'b' );
# initialize for main loop
for $i ( 0 .. $max_token_index + 3 ) {
- $output_token_type[$i] = "";
- $output_block_type[$i] = "";
- $output_container_type[$i] = "";
- $output_type_sequence[$i] = "";
+ $routput_token_type->[$i] = "";
+ $routput_block_type->[$i] = "";
+ $routput_container_type->[$i] = "";
+ $routput_type_sequence->[$i] = "";
+ $routput_indent_flag->[$i] = 0;
}
$i = -1;
$i_tok = -1;
if ($in_quote) { # continue looking for end of a quote
$type = $quote_type;
- unless (@output_token_list) { # initialize if continuation line
- push( @output_token_list, $i );
- $output_token_type[$i] = $type;
+ unless ( @{$routput_token_list} )
+ { # initialize if continuation line
+ push( @{$routput_token_list}, $i );
+ $routput_token_type->[$i] = $type;
}
$tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
# scan for the end of the quote or pattern
- ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
- do_quote( $i, $in_quote, $quote_character, $quote_pos,
- $quote_depth, $rtokens, $rtoken_map );
+ (
+ $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+ $quoted_string_1, $quoted_string_2
+ )
+ = do_quote(
+ $i, $in_quote, $quote_character,
+ $quote_pos, $quote_depth, $quoted_string_1,
+ $quoted_string_2, $rtokens, $rtoken_map,
+ $max_token_index
+ );
# all done if we didn't find it
last if ($in_quote);
+ # save pattern and replacement text for rescanning
+ my $qs1 = $quoted_string_1;
+ my $qs2 = $quoted_string_2;
+
# re-initialize for next search
$quote_character = '';
$quote_pos = 0;
$quote_type = 'Q';
+ $quoted_string_1 = "";
+ $quoted_string_2 = "";
last if ( ++$i > $max_token_index );
# look for any modifiers
# check for exact quote modifiers
if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
my $str = $$rtokens[$i];
- while ( $str =~ /\G$allowed_quote_modifiers/gc ) { }
+ my $saw_modifier_e;
+ while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
+ my $pos = pos($str);
+ my $char = substr( $str, $pos - 1, 1 );
+ $saw_modifier_e ||= ( $char eq 'e' );
+ }
+
+ # For an 'e' quote modifier we must scan the replacement
+ # text for here-doc targets.
+ if ($saw_modifier_e) {
+
+ my $rht = scan_replacement_text($qs1);
+
+ # Change type from 'Q' to 'h' for quotes with
+ # here-doc targets so that the formatter (see sub
+ # print_line_of_tokens) will not make any line
+ # breaks after this point.
+ if ($rht) {
+ push @{$rhere_target_list}, @{$rht};
+ $type = 'h';
+ if ( $i_tok < 0 ) {
+ my $ilast = $routput_token_list->[-1];
+ $routput_token_type->[$ilast] = $type;
+ }
+ }
+ }
if ( defined( pos($str) ) ) {
}
}
- unless ( $tok =~ /^\s*$/ ) {
+ unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) {
# try to catch some common errors
if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
}
}
- $last_last_nonblank_token = $last_nonblank_token;
- $last_last_nonblank_type = $last_nonblank_type;
- $last_last_nonblank_block_type = $last_nonblank_block_type;
+ $last_last_nonblank_token = $last_nonblank_token;
+ $last_last_nonblank_type = $last_nonblank_type;
+ $last_last_nonblank_block_type = $last_nonblank_block_type;
$last_last_nonblank_container_type =
$last_nonblank_container_type;
$last_last_nonblank_type_sequence =
# store previous token type
if ( $i_tok >= 0 ) {
- $output_token_type[$i_tok] = $type;
- $output_block_type[$i_tok] = $block_type;
- $output_container_type[$i_tok] = $container_type;
- $output_type_sequence[$i_tok] = $type_sequence;
+ $routput_token_type->[$i_tok] = $type;
+ $routput_block_type->[$i_tok] = $block_type;
+ $routput_container_type->[$i_tok] = $container_type;
+ $routput_type_sequence->[$i_tok] = $type_sequence;
+ $routput_indent_flag->[$i_tok] = $indent_flag;
}
my $pre_tok = $$rtokens[$i]; # get the next pre-token
my $pre_type = $$rtoken_type[$i]; # and type
$block_type = ""; # blank for all tokens except code block braces
$container_type = ""; # blank for all tokens except some parens
$type_sequence = ""; # blank for all tokens except ?/:
+ $indent_flag = 0;
$prototype = ""; # blank for all tokens except user defined subs
$i_tok = $i;
# this pre-token will start an output token
- push( @output_token_list, $i_tok );
+ push( @{$routput_token_list}, $i_tok );
# continue gathering identifier if necessary
# but do not start on blanks and comments
# I have allowed tokens starting with <, such as <=,
# because I don't think these could be valid angle operators.
# test file: storrs4.pl
- my $test_tok = $tok . $$rtokens[ $i + 1 ];
+ my $test_tok = $tok . $$rtokens[ $i + 1 ];
+ my $combine_ok = $is_digraph{$test_tok};
+
+ # check for special cases which cannot be combined
+ if ($combine_ok) {
+
+ # '//' must be defined_or operator if an operator is expected.
+ # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
+ # could be migrated here for clarity
+
+ # Patch for RT#102371, misparsing a // in the following snippet:
+ # state $b //= ccc();
+ # The solution is to always accept the digraph (or trigraph) after
+ # token type 'Z' (possible file handle). The reason is that
+ # sub operator_expected gives TERM expected here, which is
+ # wrong in this case.
+ if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
+ my $next_type = $$rtokens[ $i + 1 ];
+ my $expecting =
+ operator_expected( $prev_type, $tok, $next_type );
+
+ # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
+ $combine_ok = 0 if ( $expecting == TERM );
+ }
+ }
if (
- $is_digraph{$test_tok}
+ $combine_ok
&& ( $test_tok ne '/=' ) # might be pattern
&& ( $test_tok ne 'x=' ) # might be $x
&& ( $test_tok ne '**' ) # typeglob?
$tok = $test_tok;
$i++;
}
+
+ # The only current tetragraph is the double diamond operator
+ # and its first three characters are not a trigraph, so
+ # we do can do a special test for it
+ elsif ( $test_tok eq '<<>' ) {
+ $test_tok .= $$rtokens[ $i + 2 ];
+ if ( $is_tetragraph{$test_tok} ) {
+ $tok = $test_tok;
+ $i += 2;
+ }
+ }
}
+
$type = $tok;
$next_tok = $$rtokens[ $i + 1 ];
$next_type = $$rtoken_type[ $i + 1 ];
$brace_type[$brace_depth], $paren_depth,
$paren_type[$paren_depth]
);
- print "TOKENIZE:(@debug_list)\n";
+ print STDOUT "TOKENIZE:(@debug_list)\n";
};
+ # turn off attribute list on first non-blank, non-bareword
+ if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
+
###############################################################
# We have the next token, $tok.
# Now we have to examine this token and decide what it is
if ( $pre_type eq 'w' ) {
$expecting = operator_expected( $prev_type, $tok, $next_type );
my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens );
+ find_next_nonblank_token( $i, $rtokens, $max_token_index );
+
+ # ATTRS: handle sub and variable attributes
+ if ($in_attribute_list) {
+
+ # treat bare word followed by open paren like qw(
+ if ( $next_nonblank_token eq '(' ) {
+ $in_quote = $quote_items{'q'};
+ $allowed_quote_modifiers = $quote_modifiers{'q'};
+ $type = 'q';
+ $quote_type = 'q';
+ next;
+ }
+
+ # handle bareword not followed by open paren
+ else {
+ $type = 'w';
+ next;
+ }
+ }
# quote a word followed by => operator
- if ( $next_nonblank_token eq '=' ) {
+ # unless the word __END__ or __DATA__ and the only word on
+ # the line.
+ if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) {
if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
if ( $is_constant{$current_package}{$tok} ) {
$type = 'C';
}
elsif ( $is_user_function{$current_package}{$tok} ) {
- $type = 'U';
+ $type = 'U';
$prototype =
$user_function_prototype{$current_package}{$tok};
}
elsif ( $tok =~ /^v\d+$/ ) {
$type = 'v';
- unless ($saw_v_string) { report_v_string($tok) }
+ report_v_string($tok);
}
else { $type = 'w' }
}
}
- # quote a bare word within braces..like xxx->{s}; note that we
- # must be sure this is not a structural brace, to avoid
- # mistaking {s} in the following for a quoted bare word:
- # for(@[){s}bla}BLA}
- if ( ( $last_nonblank_type eq 'L' )
- && ( $next_nonblank_token eq '}' ) )
+ # quote a bare word within braces..like xxx->{s}; note that we
+ # must be sure this is not a structural brace, to avoid
+ # mistaking {s} in the following for a quoted bare word:
+ # for(@[){s}bla}BLA}
+ # Also treat q in something like var{-q} as a bare word, not qoute operator
+ if (
+ $next_nonblank_token eq '}'
+ && (
+ $last_nonblank_type eq 'L'
+ || ( $last_nonblank_type eq 'm'
+ && $last_last_nonblank_type eq 'L' )
+ )
+ )
{
$type = 'w';
next;
$type = 'n';
}
}
-
+ elsif ( $tok_kw eq 'CORE::' ) {
+ $type = $tok = $tok_kw;
+ $i += 2;
+ }
elsif ( ( $tok eq 'strict' )
and ( $last_nonblank_token eq 'use' ) )
{
{
scan_bare_identifier();
my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens );
+ find_next_nonblank_token( $i, $rtokens,
+ $max_token_index );
if ($next_nonblank_token) {
if ( $is_keyword{$next_nonblank_token} ) {
- warning(
+
+ # Assume qw is used as a quote and okay, as in:
+ # use constant qw{ DEBUG 0 };
+ # Not worth trying to parse for just a warning
+
+ # NOTE: This warning is deactivated because recent
+ # versions of perl do not complain here, but
+ # the coding is retained for reference.
+ if ( 0 && $next_nonblank_token ne 'qw' ) {
+ warning(
"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
- );
+ );
+ }
}
# FIXME: could check for error in which next token is
# not a word (number, punctuation, ..)
else {
- $is_constant{$current_package}
- {$next_nonblank_token} = 1;
+ $is_constant{$current_package}{$next_nonblank_token}
+ = 1;
}
}
}
# various quote operators
elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
+##NICOL PATCH
if ( $expecting == OPERATOR ) {
- # patch for paren-less for/foreach glitch, part 1
- # perl will accept this construct as valid:
+ # Be careful not to call an error for a qw quote
+ # where a parenthesized list is allowed. For example,
+ # it could also be a for/foreach construct such as
#
# foreach my $key qw\Uno Due Tres Quadro\ {
# print "Set $key\n";
# }
- unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
+ #
+
+ # Or it could be a function call.
+ # NOTE: Braces in something like &{ xxx } are not
+ # marked as a block, we might have a method call.
+ # &method(...), $method->(..), &{method}(...),
+ # $ref[2](list) is ok & short for $ref[2]->(list)
+ #
+ # See notes in 'sub code_block_type' and
+ # 'sub is_non_structural_brace'
+
+ unless (
+ $tok eq 'qw'
+ && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
+ || $is_for_foreach{$want_paren} )
+ )
{
error_if_expecting_OPERATOR();
}
elsif (
( $next_nonblank_token eq ':' )
&& ( $$rtokens[ $i_next + 1 ] ne ':' )
- && ( $i_next <= $max_token_index ) # colon on same line
+ && ( $i_next <= $max_token_index ) # colon on same line
&& label_ok()
)
{
- if ( $tok !~ /A-Z/ ) {
- push @lower_case_labels_at, $input_line_number;
+ if ( $tok !~ /[A-Z]/ ) {
+ push @{ $tokenizer_self->{_rlower_case_labels_at} },
+ $input_line_number;
}
$type = 'J';
$tok .= ':';
elsif ( $tok eq 'else' ) {
# patched for SWITCH/CASE
- if ( $last_nonblank_token ne ';'
+ if (
+ $last_nonblank_token ne ';'
&& $last_nonblank_block_type !~
- /^(if|elsif|unless|case|when)$/ )
+ /^(if|elsif|unless|case|when)$/
+
+ # patch to avoid an unwanted error message for
+ # the case of a parenless 'case' (RT 105484):
+ # switch ( 1 ) { case x { 2 } else { } }
+ && $statement_type !~
+ /^(if|elsif|unless|case|when)$/
+ )
{
warning(
"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
# note: ';' '{' and '}' in list above
# because continues can follow bare blocks;
# ':' is labeled block
- warning("'$tok' should follow a block\n");
+ #
+ ############################################
+ # NOTE: This check has been deactivated because
+ # continue has an alternative usage for given/when
+ # blocks in perl 5.10
+ ## warning("'$tok' should follow a block\n");
+ ############################################
}
}
elsif ( $tok eq 'when' || $tok eq 'case' ) {
$statement_type = $tok; # next '{' is block
}
+
+ #
+ # indent trailing if/unless/while/until
+ # outdenting will be handled by later indentation loop
+## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
+##$opt_o = 1
+## if !(
+## $opt_b
+## || $opt_c
+## || $opt_d
+## || $opt_f
+## || $opt_i
+## || $opt_l
+## || $opt_o
+## || $opt_x
+## );
+## if ( $tok =~ /^(if|unless|while|until)$/
+## && $next_nonblank_token ne '(' )
+## {
+## $indent_flag = 1;
+## }
}
# check for inline label following
$type = 'U';
}
- # mark bare words following a file test operator as
- # something that will expect an operator next.
- # patch 072901: unless followed immediately by a paren,
- # in which case it must be a function call (pid.t)
- if ( $last_nonblank_type eq 'F' && $next_tok ne '(' ) {
- $type = 'C';
+ # underscore after file test operator is file handle
+ if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
+ $type = 'Z';
}
# patch for SWITCH/CASE if 'case' and 'when are
# not treated as keywords:
if (
(
- $tok eq 'case'
+ $tok eq 'case'
&& $brace_type[$brace_depth] eq 'switch'
)
|| ( $tok eq 'when'
$expecting = operator_expected( $prev_type, $tok, $next_type );
error_if_expecting_OPERATOR("Number")
if ( $expecting == OPERATOR );
- scan_number();
+ my $number = scan_number();
if ( !defined($number) ) {
# shouldn't happen - we should always get a number
# -----------------------------
if ( $i_tok >= 0 ) {
- $output_token_type[$i_tok] = $type;
- $output_block_type[$i_tok] = $block_type;
- $output_container_type[$i_tok] = $container_type;
- $output_type_sequence[$i_tok] = $type_sequence;
+ $routput_token_type->[$i_tok] = $type;
+ $routput_block_type->[$i_tok] = $block_type;
+ $routput_container_type->[$i_tok] = $container_type;
+ $routput_type_sequence->[$i_tok] = $type_sequence;
+ $routput_indent_flag->[$i_tok] = $indent_flag;
}
unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
my $container_environment = '';
my $im = -1; # previous $i value
my $num;
- my $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
+ my $ci_string_sum = ones_count($ci_string_in_tokenizer);
-# =head1 Computing Token Indentation
+# Computing Token Indentation
#
# The final section of the tokenizer forms tokens and also computes
# parameters needed to find indentation. It is much easier to do it
# running value of this variable is $level_in_tokenizer.
#
# The total continuation is much more difficult to compute, and requires
-# several variables. These veriables are:
+# several variables. These variables are:
#
# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
# each indentation level, if there are intervening open secondary
# indentation level, if the level is of type BLOCK or not.
# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
# $nesting_list_string = a string of 1's and 0's indicating, for each
-# indentation level, if it is is appropriate for list formatting.
+# indentation level, if it is appropriate for list formatting.
# If so, continuation indentation is used to indent long list items.
# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
-# @slevel_stack = a stack of total nesting depths at each
+# @{$rslevel_stack} = a stack of total nesting depths at each
# structural indentation level, where "total nesting depth" means
# the nesting depth that would occur if every nesting token -- '{', '[',
# and '(' -- , regardless of context, is used to compute a nesting
$nesting_list_string_i, $nesting_token_string_i,
$nesting_type_string_i, );
- foreach $i (@output_token_list) { # scan the list of pre-tokens indexes
+ foreach $i ( @{$routput_token_list} )
+ { # scan the list of pre-tokens indexes
# self-checking for valid token types
- my $type = $output_token_type[$i];
+ my $type = $routput_token_type->[$i];
+ my $forced_indentation_flag = $routput_indent_flag->[$i];
+
+ # See if we should undo the $forced_indentation_flag.
+ # Forced indentation after 'if', 'unless', 'while' and 'until'
+ # expressions without trailing parens is optional and doesn't
+ # always look good. It is usually okay for a trailing logical
+ # expression, but if the expression is a function call, code block,
+ # or some kind of list it puts in an unwanted extra indentation
+ # level which is hard to remove.
+ #
+ # Example where extra indentation looks ok:
+ # return 1
+ # if $det_a < 0 and $det_b > 0
+ # or $det_a > 0 and $det_b < 0;
+ #
+ # Example where extra indentation is not needed because
+ # the eval brace also provides indentation:
+ # print "not " if defined eval {
+ # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
+ # };
+ #
+ # The following rule works fairly well:
+ # Undo the flag if the end of this line, or start of the next
+ # line, is an opening container token or a comma.
+ # This almost always works, but if not after another pass it will
+ # be stable.
+ if ( $forced_indentation_flag && $type eq 'k' ) {
+ my $ixlast = -1;
+ my $ilast = $routput_token_list->[$ixlast];
+ my $toklast = $routput_token_type->[$ilast];
+ if ( $toklast eq '#' ) {
+ $ixlast--;
+ $ilast = $routput_token_list->[$ixlast];
+ $toklast = $routput_token_type->[$ilast];
+ }
+ if ( $toklast eq 'b' ) {
+ $ixlast--;
+ $ilast = $routput_token_list->[$ixlast];
+ $toklast = $routput_token_type->[$ilast];
+ }
+ if ( $toklast =~ /^[\{,]$/ ) {
+ $forced_indentation_flag = 0;
+ }
+ else {
+ ( $toklast, my $i_next ) =
+ find_next_nonblank_token( $max_token_index, $rtokens,
+ $max_token_index );
+ if ( $toklast =~ /^[\{,]$/ ) {
+ $forced_indentation_flag = 0;
+ }
+ }
+ }
+
+ # if we are already in an indented if, see if we should outdent
+ if ($indented_if_level) {
+
+ # don't try to nest trailing if's - shouldn't happen
+ if ( $type eq 'k' ) {
+ $forced_indentation_flag = 0;
+ }
+
+ # check for the normal case - outdenting at next ';'
+ elsif ( $type eq ';' ) {
+ if ( $level_in_tokenizer == $indented_if_level ) {
+ $forced_indentation_flag = -1;
+ $indented_if_level = 0;
+ }
+ }
+
+ # handle case of missing semicolon
+ elsif ( $type eq '}' ) {
+ if ( $level_in_tokenizer == $indented_if_level ) {
+ $indented_if_level = 0;
+
+ # TBD: This could be a subroutine call
+ $level_in_tokenizer--;
+ if ( @{$rslevel_stack} > 1 ) {
+ pop( @{$rslevel_stack} );
+ }
+ if ( length($nesting_block_string) > 1 )
+ { # true for valid script
+ chop $nesting_block_string;
+ chop $nesting_list_string;
+ }
+
+ }
+ }
+ }
+
my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
$level_i = $level_in_tokenizer;
# Note: these are set so that the leading braces have a HIGHER
# level than their CONTENTS, which is convenient for indentation
# Also, define continuation indentation for each token.
- if ( $type eq '{' || $type eq 'L' ) {
+ if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
+ {
# use environment before updating
$container_environment =
$nesting_block_flag ? 'BLOCK'
: $nesting_list_flag ? 'LIST'
- : "";
+ : "";
# if the difference between total nesting levels is not 1,
# there are intervening non-structural nesting types between
# this '{' and the previous unclosed '{'
my $intervening_secondary_structure = 0;
- if (@slevel_stack) {
+ if ( @{$rslevel_stack} ) {
$intervening_secondary_structure =
- $slevel_in_tokenizer - $slevel_stack[-1];
+ $slevel_in_tokenizer - $rslevel_stack->[-1];
}
- # =head1 Continuation Indentation
+ # Continuation Indentation
#
# Having tried setting continuation indentation both in the formatter and
# in the tokenizer, I can say that setting it in the tokenizer is much,
# variable.
# save the current states
- push( @slevel_stack, 1 + $slevel_in_tokenizer );
+ push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
$level_in_tokenizer++;
- if ( $output_block_type[$i] ) {
- $nesting_block_flag = 1;
- $nesting_block_string .= '1';
+ if ($forced_indentation_flag) {
+
+ # break BEFORE '?' when there is forced indentation
+ if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
+ if ( $type eq 'k' ) {
+ $indented_if_level = $level_in_tokenizer;
+ }
+
+ # do not change container environment here if we are not
+ # at a real list. Adding this check prevents "blinkers"
+ # often near 'unless" clauses, such as in the following
+ # code:
+## next
+## unless -e (
+## $archive =
+## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
+## );
+
+ $nesting_block_string .= "$nesting_block_flag";
}
else {
- $nesting_block_flag = 0;
- $nesting_block_string .= '0';
- }
- # we will use continuation indentation within containers
- # which are not blocks and not logical expressions
+ if ( $routput_block_type->[$i] ) {
+ $nesting_block_flag = 1;
+ $nesting_block_string .= '1';
+ }
+ else {
+ $nesting_block_flag = 0;
+ $nesting_block_string .= '0';
+ }
+ }
+
+ # we will use continuation indentation within containers
+ # which are not blocks and not logical expressions
my $bit = 0;
- if ( !$output_block_type[$i] ) {
+ if ( !$routput_block_type->[$i] ) {
# propagate flag down at nested open parens
- if ( $output_container_type[$i] eq '(' ) {
+ if ( $routput_container_type->[$i] eq '(' ) {
$bit = 1 if $nesting_list_flag;
}
else {
$bit = 1
unless
- $is_logical_container{ $output_container_type[$i] };
+ $is_logical_container{ $routput_container_type->[$i]
+ };
}
}
$nesting_list_string .= $bit;
$ci_string_in_tokenizer .=
( $intervening_secondary_structure != 0 ) ? '1' : '0';
- $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
+ $ci_string_sum = ones_count($ci_string_in_tokenizer);
$continuation_string_in_tokenizer .=
( $in_statement_continuation > 0 ) ? '1' : '0';
my $total_ci = $ci_string_sum;
if (
- !$output_block_type[$i] # patch: skip for BLOCK
+ !$routput_block_type->[$i] # patch: skip for BLOCK
&& ($in_statement_continuation)
+ && !( $forced_indentation_flag && $type eq ':' )
)
{
$total_ci += $in_statement_continuation
$in_statement_continuation = 0;
}
- elsif ( $type eq '}' || $type eq 'R' ) {
+ elsif ($type eq '}'
+ || $type eq 'R'
+ || $forced_indentation_flag < 0 )
+ {
# only a nesting error in the script would prevent popping here
- if ( @slevel_stack > 1 ) { pop(@slevel_stack); }
+ if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
$level_i = --$level_in_tokenizer;
$nesting_list_flag = ( $nesting_list_string =~ /1$/ );
chop $ci_string_in_tokenizer;
- $ci_string_sum =
- ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
+ $ci_string_sum = ones_count($ci_string_in_tokenizer);
$in_statement_continuation =
chop $continuation_string_in_tokenizer;
# zero continuation flag at terminal BLOCK '}' which
# ends a statement.
- if ( $output_block_type[$i] ) {
+ if ( $routput_block_type->[$i] ) {
# ...These include non-anonymous subs
# note: could be sub ::abc { or sub 'abc
- if ( $output_block_type[$i] =~ m/^sub\s*/gc ) {
+ if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
# note: older versions of perl require the /gc modifier
# here or else the \G does not work.
- if ( $output_block_type[$i] =~ /\G('|::|\w)/gc ) {
+ if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
+ {
$in_statement_continuation = 0;
}
}
# ...and include all block types except user subs with
# block prototypes and these: (sort|grep|map|do|eval)
-# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
+# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
elsif (
- $is_zero_continuation_block_type{ $output_block_type
- [$i] } )
+ $is_zero_continuation_block_type{
+ $routput_block_type->[$i]
+ } )
{
$in_statement_continuation = 0;
}
# /^(sort|grep|map|do|eval)$/ )
elsif (
$is_not_zero_continuation_block_type{
- $output_block_type[$i] } )
+ $routput_block_type->[$i]
+ } )
{
}
# ..and a block introduced by a label
# /^\w+\s*:$/gc ) {
- elsif ( $output_block_type[$i] =~ /:$/ ) {
+ elsif ( $routput_block_type->[$i] =~ /:$/ ) {
$in_statement_continuation = 0;
}
- # ..nor user function with block prototype
+ # user function with block prototype
else {
+ $in_statement_continuation = 0;
}
}
# If we are in a list, then
- # we must set continuatoin indentation at the closing
+ # we must set continuation indentation at the closing
# paren of something like this (paren after $check):
# assert(
# __LINE__,
# );
elsif ( $tok eq ')' ) {
$in_statement_continuation = 1
- if $output_container_type[$i] =~ /^[;,\{\}]$/;
+ if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
}
+
+ elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
}
# use environment after updating
$container_environment =
$nesting_block_flag ? 'BLOCK'
: $nesting_list_flag ? 'LIST'
- : "";
+ : "";
$ci_string_i = $ci_string_sum + $in_statement_continuation;
$nesting_block_string_i = $nesting_block_string;
$nesting_list_string_i = $nesting_list_string;
$container_environment =
$nesting_block_flag ? 'BLOCK'
: $nesting_list_flag ? 'LIST'
- : "";
+ : "";
# zero the continuation indentation at certain tokens so
# that they will be at the same level as its container. For
$in_statement_continuation = 0;
}
- # otherwise, the next token after a ',' starts a new term
- elsif ( $type eq ',' ) {
+ # otherwise, the token after a ',' starts a new term
+
+ # Patch FOR RT#99961; no continuation after a ';'
+ # This is needed because perltidy currently marks
+ # a block preceded by a type character like % or @
+ # as a non block, to simplify formatting. But these
+ # are actually blocks and can have semicolons.
+ # See code_block_type() and is_non_structural_brace().
+ elsif ( $type eq ',' || $type eq ';' ) {
$in_statement_continuation = 0;
}
}
if ( $level_in_tokenizer < 0 ) {
- unless ($saw_negative_indentation) {
- $saw_negative_indentation = 1;
+ unless ( $tokenizer_self->{_saw_negative_indentation} ) {
+ $tokenizer_self->{_saw_negative_indentation} = 1;
warning("Starting negative indentation\n");
}
}
- # set secondary nesting levels based on all continment token types
+ # set secondary nesting levels based on all containment token types
# Note: these are set so that the nesting depth is the depth
# of the PREVIOUS TOKEN, which is convenient for setting
- # the stength of token bonds
+ # the strength of token bonds
my $slevel_i = $slevel_in_tokenizer;
# /^[L\{\(\[]$/
}
}
- push( @block_type, $output_block_type[$i] );
+ push( @block_type, $routput_block_type->[$i] );
push( @ci_string, $ci_string_i );
push( @container_environment, $container_environment );
- push( @container_type, $output_container_type[$i] );
+ push( @container_type, $routput_container_type->[$i] );
push( @levels, $level_i );
push( @nesting_tokens, $nesting_token_string_i );
push( @nesting_types, $nesting_type_string_i );
push( @slevels, $slevel_i );
push( @token_type, $fix_type );
- push( @type_sequence, $output_type_sequence[$i] );
+ push( @type_sequence, $routput_type_sequence->[$i] );
push( @nesting_blocks, $nesting_block_string );
push( @nesting_lists, $nesting_list_string );
push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
}
+ $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
$tokenizer_self->{_in_quote} = $in_quote;
- $tokenizer_self->{_rhere_target_list} = \@here_target_list;
+ $tokenizer_self->{_quote_target} =
+ $in_quote ? matching_end_token($quote_character) : "";
+ $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
$line_of_tokens->{_rtoken_type} = \@token_type;
$line_of_tokens->{_rtokens} = \@tokens;
}
} # end tokenize_this_line
+#########i#############################################################
+# Tokenizer routines which assist in identifying token types
+#######################################################################
+
+sub operator_expected {
+
+ # Many perl symbols have two or more meanings. For example, '<<'
+ # can be a shift operator or a here-doc operator. The
+ # interpretation of these symbols depends on the current state of
+ # the tokenizer, which may either be expecting a term or an
+ # operator. For this example, a << would be a shift if an operator
+ # is expected, and a here-doc if a term is expected. This routine
+ # is called to make this decision for any current token. It returns
+ # one of three possible values:
+ #
+ # OPERATOR - operator expected (or at least, not a term)
+ # UNKNOWN - can't tell
+ # TERM - a term is expected (or at least, not an operator)
+ #
+ # The decision is based on what has been seen so far. This
+ # information is stored in the "$last_nonblank_type" and
+ # "$last_nonblank_token" variables. For example, if the
+ # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
+ # if $last_nonblank_type is 'n' (numeric), we are expecting an
+ # OPERATOR.
+ #
+ # If a UNKNOWN is returned, the calling routine must guess. A major
+ # goal of this tokenizer is to minimize the possibility of returning
+ # UNKNOWN, because a wrong guess can spoil the formatting of a
+ # script.
+ #
+ # adding NEW_TOKENS: it is critically important that this routine be
+ # updated to allow it to determine if an operator or term is to be
+ # expected after the new token. Doing this simply involves adding
+ # the new token character to one of the regexes in this routine or
+ # to one of the hash lists
+ # that it uses, which are initialized in the BEGIN section.
+ # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
+ # $statement_type
+
+ my ( $prev_type, $tok, $next_type ) = @_;
+
+ my $op_expected = UNKNOWN;
+
+##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
+
+# Note: function prototype is available for token type 'U' for future
+# program development. It contains the leading and trailing parens,
+# and no blanks. It might be used to eliminate token type 'C', for
+# example (prototype = '()'). Thus:
+# if ($last_nonblank_type eq 'U') {
+# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
+# }
+
+ # A possible filehandle (or object) requires some care...
+ if ( $last_nonblank_type eq 'Z' ) {
+
+ # angle.t
+ if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
+ $op_expected = UNKNOWN;
+ }
+
+ # For possible file handle like "$a", Perl uses weird parsing rules.
+ # For example:
+ # print $a/2,"/hi"; - division
+ # print $a / 2,"/hi"; - division
+ # print $a/ 2,"/hi"; - division
+ # print $a /2,"/hi"; - pattern (and error)!
+ elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
+ $op_expected = TERM;
+ }
+
+ # Note when an operation is being done where a
+ # filehandle might be expected, since a change in whitespace
+ # could change the interpretation of the statement.
+ else {
+ if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
+ complain("operator in print statement not recommended\n");
+ $op_expected = OPERATOR;
+ }
+ }
+ }
+
+ # Check for smartmatch operator before preceding brace or square bracket.
+ # For example, at the ? after the ] in the following expressions we are
+ # expecting an operator:
+ #
+ # qr/3/ ~~ ['1234'] ? 1 : 0;
+ # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
+ elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) {
+ $op_expected = OPERATOR;
+ }
+
+ # handle something after 'do' and 'eval'
+ elsif ( $is_block_operator{$last_nonblank_token} ) {
+
+ # something like $a = eval "expression";
+ # ^
+ if ( $last_nonblank_type eq 'k' ) {
+ $op_expected = TERM; # expression or list mode following keyword
+ }
+
+ # something like $a = do { BLOCK } / 2;
+ # or this ? after a smartmatch anonynmous hash or array reference:
+ # qr/3/ ~~ ['1234'] ? 1 : 0;
+ # ^
+ else {
+ $op_expected = OPERATOR; # block mode following }
+ }
+ }
+
+ # handle bare word..
+ elsif ( $last_nonblank_type eq 'w' ) {
+
+ # unfortunately, we can't tell what type of token to expect next
+ # after most bare words
+ $op_expected = UNKNOWN;
+ }
+
+ # operator, but not term possible after these types
+ # Note: moved ')' from type to token because parens in list context
+ # get marked as '{' '}' now. This is a minor glitch in the following:
+ # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
+ #
+ elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
+ || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
+ {
+ $op_expected = OPERATOR;
+
+ # in a 'use' statement, numbers and v-strings are not true
+ # numbers, so to avoid incorrect error messages, we will
+ # mark them as unknown for now (use.t)
+ # TODO: it would be much nicer to create a new token V for VERSION
+ # number in a use statement. Then this could be a check on type V
+ # and related patches which change $statement_type for '=>'
+ # and ',' could be removed. Further, it would clean things up to
+ # scan the 'use' statement with a separate subroutine.
+ if ( ( $statement_type eq 'use' )
+ && ( $last_nonblank_type =~ /^[nv]$/ ) )
+ {
+ $op_expected = UNKNOWN;
+ }
+
+ # expecting VERSION or {} after package NAMESPACE
+ elsif ($statement_type =~ /^package\b/
+ && $last_nonblank_token =~ /^package\b/ )
+ {
+ $op_expected = TERM;
+ }
+ }
+
+ # no operator after many keywords, such as "die", "warn", etc
+ elsif ( $expecting_term_token{$last_nonblank_token} ) {
+
+ # patch for dor.t (defined or).
+ # perl functions which may be unary operators
+ # TODO: This list is incomplete, and these should be put
+ # into a hash.
+ if ( $tok eq '/'
+ && $next_type eq '/'
+ && $last_nonblank_type eq 'k'
+ && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
+ {
+ $op_expected = OPERATOR;
+ }
+ else {
+ $op_expected = TERM;
+ }
+ }
+
+ # no operator after things like + - ** (i.e., other operators)
+ elsif ( $expecting_term_types{$last_nonblank_type} ) {
+ $op_expected = TERM;
+ }
+
+ # a few operators, like "time", have an empty prototype () and so
+ # take no parameters but produce a value to operate on
+ elsif ( $expecting_operator_token{$last_nonblank_token} ) {
+ $op_expected = OPERATOR;
+ }
+
+ # post-increment and decrement produce values to be operated on
+ elsif ( $expecting_operator_types{$last_nonblank_type} ) {
+ $op_expected = OPERATOR;
+ }
+
+ # no value to operate on after sub block
+ elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
+
+ # a right brace here indicates the end of a simple block.
+ # all non-structural right braces have type 'R'
+ # all braces associated with block operator keywords have been given those
+ # keywords as "last_nonblank_token" and caught above.
+ # (This statement is order dependent, and must come after checking
+ # $last_nonblank_token).
+ elsif ( $last_nonblank_type eq '}' ) {
+
+ # patch for dor.t (defined or).
+ if ( $tok eq '/'
+ && $next_type eq '/'
+ && $last_nonblank_token eq ']' )
+ {
+ $op_expected = OPERATOR;
+ }
+
+ # Patch for RT #116344: misparse a ternary operator after an anonymous
+ # hash, like this:
+ # return ref {} ? 1 : 0;
+ # The right brace should really be marked type 'R' in this case, and
+ # it is safest to return an UNKNOWN here. Expecting a TERM will
+ # cause the '?' to always be interpreted as a pattern delimiter
+ # rather than introducing a ternary operator.
+ elsif ( $tok eq '?' ) {
+ $op_expected = UNKNOWN;
+ }
+ else {
+ $op_expected = TERM;
+ }
+ }
+
+ # something else..what did I forget?
+ else {
+
+ # collecting diagnostics on unknown operator types..see what was missed
+ $op_expected = UNKNOWN;
+ write_diagnostics(
+"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
+ );
+ }
+
+ TOKENIZER_DEBUG_FLAG_EXPECT && do {
+ print STDOUT
+"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
+ };
+ return $op_expected;
+}
+
sub new_statement_ok {
# return true if the current token can start a new statement
+ # USES GLOBAL VARIABLES: $last_nonblank_type
return label_ok() # a label would be ok here
sub label_ok {
# Decide if a bare word followed by a colon here is a label
+ # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
+ # $brace_depth, @brace_type
# if it follows an opening or closing code block curly brace..
if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
return $brace_type[$brace_depth];
}
- # otherwise, it is a label if and only if it follows a ';'
- # (real or fake)
+ # otherwise, it is a label if and only if it follows a ';' (real or fake)
+ # or another label
else {
- return ( $last_nonblank_type eq ';' );
+ return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
}
}
# Returns "" if not code block, otherwise returns 'last_nonblank_token'
# to indicate the type of code block. (For example, 'last_nonblank_token'
# might be 'if' for an if block, 'else' for an else block, etc).
+ # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
+ # $last_nonblank_block_type, $brace_depth, @brace_type
# handle case of multiple '{'s
# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
- my ( $i, $rtokens, $rtoken_type ) = @_;
+ my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
if ( $last_nonblank_token eq '{'
&& $last_nonblank_type eq $last_nonblank_token )
{
# opening brace where a statement may appear is probably
# a code block but might be and anonymous hash reference
if ( $brace_type[$brace_depth] ) {
- return decide_if_code_block( $i, $rtokens, $rtoken_type );
+ return decide_if_code_block( $i, $rtokens, $rtoken_type,
+ $max_token_index );
}
# cannot start a code block within an anonymous hash
# an opening brace where a statement may appear is probably
# a code block but might be and anonymous hash reference
- return decide_if_code_block( $i, $rtokens, $rtoken_type );
+ return decide_if_code_block( $i, $rtokens, $rtoken_type,
+ $max_token_index );
}
# handle case of '}{'
# a } { situation ...
# could be hash reference after code block..(blktype1.t)
if ($last_nonblank_block_type) {
- return decide_if_code_block( $i, $rtokens, $rtoken_type );
+ return decide_if_code_block( $i, $rtokens, $rtoken_type,
+ $max_token_index );
}
# must be a block if it follows a closing hash reference
}
}
+ ################################################################
# NOTE: braces after type characters start code blocks, but for
# simplicity these are not identified as such. See also
# sub is_non_structural_brace.
- # elsif ( $last_nonblank_type eq 't' ) {
- # return $last_nonblank_token;
- # }
+ ################################################################
+
+## elsif ( $last_nonblank_type eq 't' ) {
+## return $last_nonblank_token;
+## }
# brace after label:
elsif ( $last_nonblank_type eq 'J' ) {
# otherwise, look at previous token. This must be a code block if
# it follows any of these:
-# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
+# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
elsif ( $is_code_block_token{$last_nonblank_token} ) {
- return $last_nonblank_token;
+
+ # Bug Patch: Note that the opening brace after the 'if' in the following
+ # snippet is an anonymous hash ref and not a code block!
+ # print 'hi' if { x => 1, }->{x};
+ # We can identify this situation because the last nonblank type
+ # will be a keyword (instead of a closing peren)
+ if ( $last_nonblank_token =~ /^(if|unless)$/
+ && $last_nonblank_type eq 'k' )
+ {
+ return "";
+ }
+ else {
+ return $last_nonblank_token;
+ }
}
- # or a sub definition
+ # or a sub or package BLOCK
elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
- && $last_nonblank_token =~ /^sub\b/ )
+ && $last_nonblank_token =~ /^(sub|package)\b/ )
{
return $last_nonblank_token;
}
+ elsif ( $statement_type =~ /^(sub|package)\b/ ) {
+ return $statement_type;
+ }
+
# user-defined subs with block parameters (like grep/map/eval)
elsif ( $last_nonblank_type eq 'G' ) {
return $last_nonblank_token;
# check bareword
elsif ( $last_nonblank_type eq 'w' ) {
- return decide_if_code_block( $i, $rtokens, $rtoken_type );
+ return decide_if_code_block( $i, $rtokens, $rtoken_type,
+ $max_token_index );
+ }
+
+ # Patch for bug # RT #94338 reported by Daniel Trizen
+ # for-loop in a parenthesized block-map triggering an error message:
+ # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
+ # Check for a code block within a parenthesized function call
+ elsif ( $last_nonblank_token eq '(' ) {
+ my $paren_type = $paren_type[$paren_depth];
+ if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) {
+
+ # We will mark this as a code block but use type 't' instead
+ # of the name of the contining function. This will allow for
+ # correct parsing but will usually produce better formatting.
+ # Braces with block type 't' are not broken open automatically
+ # in the formatter as are other code block types, and this usually
+ # works best.
+ return 't'; # (Not $paren_type)
+ }
+ else {
+ return "";
+ }
+ }
+
+ # handle unknown syntax ') {'
+ # we previously appended a '()' to mark this case
+ elsif ( $last_nonblank_token =~ /\(\)$/ ) {
+ return $last_nonblank_token;
}
# anything else must be anonymous hash reference
sub decide_if_code_block {
- my ( $i, $rtokens, $rtoken_type ) = @_;
+ # USES GLOBAL VARIABLES: $last_nonblank_token
+ my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
+
my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens );
+ find_next_nonblank_token( $i, $rtokens, $max_token_index );
# we are at a '{' where a statement may appear.
# We must decide if this brace starts an anonymous hash or a code
# We are only going to look ahead one more (nonblank/comment) line.
# Strange formatting could cause a bad guess, but that's unlikely.
- my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
- my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
+ my @pre_types;
+ my @pre_tokens;
+
+ # Ignore the rest of this line if it is a side comment
+ if ( $next_nonblank_token ne '#' ) {
+ @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
+ @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
+ }
my ( $rpre_tokens, $rpre_types ) =
peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
# generous, and prevents
push @pre_tokens, @$rpre_tokens;
}
- # put a sentinal token to simplify stopping the search
+ # put a sentinel token to simplify stopping the search
+ push @pre_types, '}';
push @pre_types, '}';
my $jbeg = 0;
$j++;
}
elsif ( $pre_types[$j] eq 'w' ) {
- unless ( $is_keyword{ $pre_tokens[$j] } ) {
- $j++;
- }
+ $j++;
}
elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
$j++;
$j++ if $pre_types[$j] eq 'b';
- # it's a hash ref if a comma or => follow next
- if ( $pre_types[$j] eq ','
- || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
+ # Patched for RT #95708
+ if (
+
+ # it is a comma which is not a pattern delimeter except for qw
+ (
+ $pre_types[$j] eq ','
+ && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
+ )
+
+ # or a =>
+ || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
+ )
{
$code_block_type = "";
}
sub unexpected {
# report unexpected token type and show where it is
- my ( $found, $expecting, $i_tok, $last_nonblank_i ) = @_;
- $unexpected_error_count++;
- if ( $unexpected_error_count <= MAX_NAG_MESSAGES ) {
+ # USES GLOBAL VARIABLES: $tokenizer_self
+ my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
+ $rpretoken_type, $input_line )
+ = @_;
+
+ if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
my $msg = "found $found where $expecting expected";
my $pos = $$rpretoken_map[$i_tok];
interrupt_logfile();
+ my $input_line_number = $tokenizer_self->{_last_line_number};
my ( $offset, $numbered_line, $underline ) =
make_numbered_line( $input_line_number, $input_line, $pos );
$underline = write_on_underline( $underline, $pos - $offset, '^' );
}
}
-sub indicate_error {
- my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
- interrupt_logfile();
- warning($msg);
- write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
- resume_logfile();
-}
-
-sub write_error_indicator_pair {
- my ( $line_number, $input_line, $pos, $carrat ) = @_;
- my ( $offset, $numbered_line, $underline ) =
- make_numbered_line( $line_number, $input_line, $pos );
- $underline = write_on_underline( $underline, $pos - $offset, $carrat );
- warning( $numbered_line . "\n" );
- $underline =~ s/\s*$//;
- warning( $underline . "\n" );
-}
-
-sub make_numbered_line {
-
- # Given an input line, its line number, and a character position of
- # interest, create a string not longer than 80 characters of the form
- # $lineno: sub_string
- # such that the sub_string of $str contains the position of interest
- #
- # Here is an example of what we want, in this case we add trailing
- # '...' because the line is long.
- #
- # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
- #
- # Here is another example, this time in which we used leading '...'
- # because of excessive length:
- #
- # 2: ... er of the World Wide Web Consortium's
- #
- # input parameters are:
- # $lineno = line number
- # $str = the text of the line
- # $pos = position of interest (the error) : 0 = first character
- #
- # We return :
- # - $offset = an offset which corrects the position in case we only
- # display part of a line, such that $pos-$offset is the effective
- # position from the start of the displayed line.
- # - $numbered_line = the numbered line as above,
- # - $underline = a blank 'underline' which is all spaces with the same
- # number of characters as the numbered line.
-
- my ( $lineno, $str, $pos ) = @_;
- my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
- my $excess = length($str) - $offset - 68;
- my $numc = ( $excess > 0 ) ? 68 : undef;
-
- if ( defined($numc) ) {
- if ( $offset == 0 ) {
- $str = substr( $str, $offset, $numc - 4 ) . " ...";
- }
- else {
- $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
- }
- }
- else {
-
- if ( $offset == 0 ) {
- }
- else {
- $str = "... " . substr( $str, $offset + 4 );
- }
- }
-
- my $numbered_line = sprintf( "%d: ", $lineno );
- $offset -= length($numbered_line);
- $numbered_line .= $str;
- my $underline = " " x length($numbered_line);
- return ( $offset, $numbered_line, $underline );
-}
-
-sub write_on_underline {
-
- # The "underline" is a string that shows where an error is; it starts
- # out as a string of blanks with the same length as the numbered line of
- # code above it, and we have to add marking to show where an error is.
- # In the example below, we want to write the string '--^' just below
- # the line of bad code:
- #
- # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
- # ---^
- # We are given the current underline string, plus a position and a
- # string to write on it.
- #
- # In the above example, there will be 2 calls to do this:
- # First call: $pos=19, pos_chr=^
- # Second call: $pos=16, pos_chr=---
- #
- # This is a trivial thing to do with substr, but there is some
- # checking to do.
-
- my ( $underline, $pos, $pos_chr ) = @_;
-
- # check for error..shouldn't happen
- unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
- return $underline;
- }
- my $excess = length($pos_chr) + $pos - length($underline);
- if ( $excess > 0 ) {
- $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
- }
- substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
- return ($underline);
-}
-
-sub is_non_structural_brace {
+sub is_non_structural_brace {
# Decide if a brace or bracket is structural or non-structural
# by looking at the previous token and type
+ # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
# Tentatively deactivated because it caused the wrong operator expectation
# return 0;
# }
+ ################################################################
# NOTE: braces after type characters start code blocks, but for
# simplicity these are not identified as such. See also
# sub code_block_type
- # if ($last_nonblank_type eq 't') {return 0}
+ ################################################################
+
+ ##if ($last_nonblank_type eq 't') {return 0}
# otherwise, it is non-structural if it is decorated
# by type information.
);
}
-sub operator_expected {
-
- # Many perl symbols have two or more meanings. For example, '<<'
- # can be a shift operator or a here-doc operator. The
- # interpretation of these symbols depends on the current state of
- # the tokenizer, which may either be expecting a term or an
- # operator. For this example, a << would be a shift if an operator
- # is expected, and a here-doc if a term is expected. This routine
- # is called to make this decision for any current token. It returns
- # one of three possible values:
- #
- # OPERATOR - operator expected (or at least, not a term)
- # UNKNOWN - can't tell
- # TERM - a term is expected (or at least, not an operator)
- #
- # The decision is based on what has been seen so far. This
- # information is stored in the "$last_nonblank_type" and
- # "$last_nonblank_token" variables. For example, if the
- # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
- # if $last_nonblank_type is 'n' (numeric), we are expecting an
- # OPERATOR.
- #
- # If a UNKNOWN is returned, the calling routine must guess. A major
- # goal of this tokenizer is to minimize the possiblity of returning
- # UNKNOWN, because a wrong guess can spoil the formatting of a
- # script.
- #
- # adding NEW_TOKENS: it is critically important that this routine be
- # updated to allow it to determine if an operator or term is to be
- # expected after the new token. Doing this simply involves adding
- # the new token character to one of the regexes in this routine or
- # to one of the hash lists
- # that it uses, which are initialized in the BEGIN section.
+#########i#############################################################
+# Tokenizer routines for tracking container nesting depths
+#######################################################################
- my ( $prev_type, $tok, $next_type ) = @_;
- my $op_expected = UNKNOWN;
+# The following routines keep track of nesting depths of the nesting
+# types, ( [ { and ?. This is necessary for determining the indentation
+# level, and also for debugging programs. Not only do they keep track of
+# nesting depths of the individual brace types, but they check that each
+# of the other brace types is balanced within matching pairs. For
+# example, if the program sees this sequence:
+#
+# { ( ( ) }
+#
+# then it can determine that there is an extra left paren somewhere
+# between the { and the }. And so on with every other possible
+# combination of outer and inner brace types. For another
+# example:
+#
+# ( [ ..... ] ] )
+#
+# which has an extra ] within the parens.
+#
+# The brace types have indexes 0 .. 3 which are indexes into
+# the matrices.
+#
+# The pair ? : are treated as just another nesting type, with ? acting
+# as the opening brace and : acting as the closing brace.
+#
+# The matrix
+#
+# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
+#
+# saves the nesting depth of brace type $b (where $b is either of the other
+# nesting types) when brace type $a enters a new depth. When this depth
+# decreases, a check is made that the current depth of brace types $b is
+# unchanged, or otherwise there must have been an error. This can
+# be very useful for localizing errors, particularly when perl runs to
+# the end of a large file (such as this one) and announces that there
+# is a problem somewhere.
+#
+# A numerical sequence number is maintained for every nesting type,
+# so that each matching pair can be uniquely identified in a simple
+# way.
-# Note: function prototype is available for token type 'U' for future
-# program development. It contains the leading and trailing parens,
-# and no blanks. It might be used to eliminate token type 'C', for
-# example (prototype = '()'). Thus:
-# if ($last_nonblank_type eq 'U') {
-# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
-# }
+sub increase_nesting_depth {
+ my ( $aa, $pos ) = @_;
+
+ # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
+ # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
+ # $statement_type
+ my $bb;
+ $current_depth[$aa]++;
+ $total_depth++;
+ $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
+ my $input_line_number = $tokenizer_self->{_last_line_number};
+ my $input_line = $tokenizer_self->{_line_text};
- # A possible filehandle (or object) requires some care...
- if ( $last_nonblank_type eq 'Z' ) {
+ # Sequence numbers increment by number of items. This keeps
+ # a unique set of numbers but still allows the relative location
+ # of any type to be determined.
+ $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
+ my $seqno = $nesting_sequence_number[$aa];
+ $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
- # angle.t
- if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
- $op_expected = UNKNOWN;
- }
+ $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
+ [ $input_line_number, $input_line, $pos ];
- # For possible file handle like "$a", Perl uses weird parsing rules.
- # For example:
- # print $a/2,"/hi"; - division
- # print $a / 2,"/hi"; - division
- # print $a/ 2,"/hi"; - division
- # print $a /2,"/hi"; - pattern (and error)!
- elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
- $op_expected = TERM;
- }
+ for $bb ( 0 .. $#closing_brace_names ) {
+ next if ( $bb == $aa );
+ $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
+ }
- # Note when an operation is being done where a
- # filehandle might be expected, since a change in whitespace
- # could change the interpretation of the statement.
- else {
- if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
- complain("operator in print statement not recommended\n");
- $op_expected = OPERATOR;
+ # set a flag for indenting a nested ternary statement
+ my $indent = 0;
+ if ( $aa == QUESTION_COLON ) {
+ $nested_ternary_flag[ $current_depth[$aa] ] = 0;
+ if ( $current_depth[$aa] > 1 ) {
+ if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
+ my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
+ if ( $pdepth == $total_depth - 1 ) {
+ $indent = 1;
+ $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
+ }
}
}
}
+ $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
+ $statement_type = "";
+ return ( $seqno, $indent );
+}
- # handle something after 'do' and 'eval'
- elsif ( $is_block_operator{$last_nonblank_token} ) {
-
- # something like $a = eval "expression";
- # ^
- if ( $last_nonblank_type eq 'k' ) {
- $op_expected = TERM; # expression or list mode following keyword
- }
-
- # something like $a = do { BLOCK } / 2;
- # ^
- else {
- $op_expected = OPERATOR; # block mode following }
- }
- }
+sub decrease_nesting_depth {
- # handle bare word..
- elsif ( $last_nonblank_type eq 'w' ) {
+ my ( $aa, $pos ) = @_;
- # unfortunately, we can't tell what type of token to expect next
- # after most bare words
- $op_expected = UNKNOWN;
- }
+ # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
+ # @current_sequence_number, @depth_array, @starting_line_of_current_depth
+ # $statement_type
+ my $bb;
+ my $seqno = 0;
+ my $input_line_number = $tokenizer_self->{_last_line_number};
+ my $input_line = $tokenizer_self->{_line_text};
- # operator, but not term possible after these types
- # Note: moved ')' from type to token because parens in list context
- # get marked as '{' '}' now. This is a minor glitch in the following:
- # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
- #
- elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
- || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
- {
- $op_expected = OPERATOR;
+ my $outdent = 0;
+ $total_depth--;
+ if ( $current_depth[$aa] > 0 ) {
- # in a 'use' statement, numbers and v-strings are not really
- # numbers, so to avoid incorrect error messages, we will
- # mark them as unknown for now (use.t)
- if ( ( $statement_type eq 'use' )
- && ( $last_nonblank_type =~ /^[nv]$/ ) )
- {
- $op_expected = UNKNOWN;
+ # set a flag for un-indenting after seeing a nested ternary statement
+ $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
+ if ( $aa == QUESTION_COLON ) {
+ $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
}
- }
+ $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
- # no operator after many keywords, such as "die", "warn", etc
- elsif ( $expecting_term_token{$last_nonblank_token} ) {
- $op_expected = TERM;
- }
+ # check that any brace types $bb contained within are balanced
+ for $bb ( 0 .. $#closing_brace_names ) {
+ next if ( $bb == $aa );
- # no operator after things like + - ** (i.e., other operators)
- elsif ( $expecting_term_types{$last_nonblank_type} ) {
- $op_expected = TERM;
- }
-
- # a few operators, like "time", have an empty prototype () and so
- # take no parameters but produce a value to operate on
- elsif ( $expecting_operator_token{$last_nonblank_token} ) {
- $op_expected = OPERATOR;
- }
-
- # post-increment and decrement produce values to be operated on
- elsif ( $expecting_operator_types{$last_nonblank_type} ) {
- $op_expected = OPERATOR;
- }
-
- # no value to operate on after sub block
- elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
-
- # a right brace here indicates the end of a simple block.
- # all non-structural right braces have type 'R'
- # all braces associated with block operator keywords have been given those
- # keywords as "last_nonblank_token" and caught above.
- # (This statement is order dependent, and must come after checking
- # $last_nonblank_token).
- elsif ( $last_nonblank_type eq '}' ) {
- $op_expected = TERM;
- }
-
- # something else..what did I forget?
- else {
-
- # collecting diagnostics on unknown operator types..see what was missed
- $op_expected = UNKNOWN;
- write_diagnostics(
-"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
- );
- }
-
- TOKENIZER_DEBUG_FLAG_EXPECT && do {
- print
-"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
- };
- return $op_expected;
-}
-
-# The following routines keep track of nesting depths of the nesting
-# types, ( [ { and ?. This is necessary for determining the indentation
-# level, and also for debugging programs. Not only do they keep track of
-# nesting depths of the individual brace types, but they check that each
-# of the other brace types is balanced within matching pairs. For
-# example, if the program sees this sequence:
-#
-# { ( ( ) }
-#
-# then it can determine that there is an extra left paren somewhere
-# between the { and the }. And so on with every other possible
-# combination of outer and inner brace types. For another
-# example:
-#
-# ( [ ..... ] ] )
-#
-# which has an extra ] within the parens.
-#
-# The brace types have indexes 0 .. 3 which are indexes into
-# the matrices.
-#
-# The pair ? : are treated as just another nesting type, with ? acting
-# as the opening brace and : acting as the closing brace.
-#
-# The matrix
-#
-# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
-#
-# saves the nesting depth of brace type $b (where $b is either of the other
-# nesting types) when brace type $a enters a new depth. When this depth
-# decreases, a check is made that the current depth of brace types $b is
-# unchanged, or otherwise there must have been an error. This can
-# be very useful for localizing errors, particularly when perl runs to
-# the end of a large file (such as this one) and announces that there
-# is a problem somewhere.
-#
-# A numerical sequence number is maintained for every nesting type,
-# so that each matching pair can be uniquely identified in a simple
-# way.
-
-sub increase_nesting_depth {
- my ( $a, $i_tok ) = @_;
- my $b;
- $current_depth[$a]++;
-
- # Sequence numbers increment by number of items. This keeps
- # a unique set of numbers but still allows the relative location
- # of any type to be determined.
- $nesting_sequence_number[$a] += scalar(@closing_brace_names);
- my $seqno = $nesting_sequence_number[$a];
- $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
-
- my $pos = $$rpretoken_map[$i_tok];
- $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
- [ $input_line_number, $input_line, $pos ];
-
- for $b ( 0 .. $#closing_brace_names ) {
- next if ( $b == $a );
- $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
- }
- return $seqno;
-}
-
-sub decrease_nesting_depth {
-
- my ( $a, $i_tok ) = @_;
- my $pos = $$rpretoken_map[$i_tok];
- my $b;
- my $seqno = 0;
-
- if ( $current_depth[$a] > 0 ) {
-
- $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
-
- # check that any brace types $b contained within are balanced
- for $b ( 0 .. $#closing_brace_names ) {
- next if ( $b == $a );
-
- unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
- $current_depth[$b] )
- {
- my $diff = $current_depth[$b] -
- $depth_array[$a][$b][ $current_depth[$a] ];
+ unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
+ $current_depth[$bb] )
+ {
+ my $diff =
+ $current_depth[$bb] -
+ $depth_array[$aa][$bb][ $current_depth[$aa] ];
# don't whine too many times
my $saw_brace_error = get_saw_brace_error();
if (
$saw_brace_error <= MAX_NAG_MESSAGES
- # if too many closing types have occured, we probably
+ # if too many closing types have occurred, we probably
# already caught this error
&& ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
)
{
interrupt_logfile();
my $rsl =
- $starting_line_of_current_depth[$a][ $current_depth[$a] ];
+ $starting_line_of_current_depth[$aa]
+ [ $current_depth[$aa] ];
my $sl = $$rsl[0];
my $rel = [ $input_line_number, $input_line, $pos ];
my $el = $$rel[0];
}
my $bname =
( $diff > 0 )
- ? $opening_brace_names[$b]
- : $closing_brace_names[$b];
+ ? $opening_brace_names[$bb]
+ : $closing_brace_names[$bb];
write_error_indicator_pair( @$rsl, '^' );
my $msg = <<"EOM";
-Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
+Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
EOM
if ( $diff > 0 ) {
my $rml =
- $starting_line_of_current_depth[$b]
- [ $current_depth[$b] ];
+ $starting_line_of_current_depth[$bb]
+ [ $current_depth[$bb] ];
my $ml = $$rml[0];
$msg .=
" The most recent un-matched $bname is on line $ml\n";
increment_brace_error();
}
}
- $current_depth[$a]--;
+ $current_depth[$aa]--;
}
else {
my $saw_brace_error = get_saw_brace_error();
if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
my $msg = <<"EOM";
-There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
+There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
EOM
indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
}
increment_brace_error();
}
- return $seqno;
+ return ( $seqno, $outdent );
}
sub check_final_nesting_depths {
- my ($a);
+ my ($aa);
- for $a ( 0 .. $#closing_brace_names ) {
+ # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
- if ( $current_depth[$a] ) {
- my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
+ for $aa ( 0 .. $#closing_brace_names ) {
+
+ if ( $current_depth[$aa] ) {
+ my $rsl =
+ $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
my $sl = $$rsl[0];
my $msg = <<"EOM";
-Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
-The most recent un-matched $opening_brace_names[$a] is on line $sl
+Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
+The most recent un-matched $opening_brace_names[$aa] is on line $sl
EOM
indicate_error( $msg, @$rsl, '^' );
increment_brace_error();
}
}
-sub numerator_expected {
-
- # this is a filter for a possible numerator, in support of guessing
- # for the / pattern delimiter token.
- # returns -
- # 1 - yes
- # 0 - can't tell
- # -1 - no
- # Note: I am using the convention that variables ending in
- # _expected have these 3 possible values.
- my ( $i, $rtokens ) = @_;
- my $next_token = $$rtokens[ $i + 1 ];
- if ( $next_token eq '=' ) { $i++; } # handle /=
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens );
-
- if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
- 1;
- }
- else {
-
- if ( $next_nonblank_token =~ /^\s*$/ ) {
- 0;
- }
- else {
- -1;
- }
- }
-}
-
-sub pattern_expected {
-
- # This is the start of a filter for a possible pattern.
- # It looks at the token after a possbible pattern and tries to
- # determine if that token could end a pattern.
- # returns -
- # 1 - yes
- # 0 - can't tell
- # -1 - no
- my ( $i, $rtokens ) = @_;
- my $next_token = $$rtokens[ $i + 1 ];
- if ( $next_token =~ /^[cgimosx]/ ) { $i++; } # skip possible modifier
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens );
-
- # list of tokens which may follow a pattern
- # (can probably be expanded)
- if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
- {
- 1;
- }
- else {
-
- if ( $next_nonblank_token =~ /^\s*$/ ) {
- 0;
- }
- else {
- -1;
- }
- }
-}
-
-sub find_next_nonblank_token_on_this_line {
- my ( $i, $rtokens ) = @_;
- my $next_nonblank_token;
-
- if ( $i < $max_token_index ) {
- $next_nonblank_token = $$rtokens[ ++$i ];
-
- if ( $next_nonblank_token =~ /^\s*$/ ) {
-
- if ( $i < $max_token_index ) {
- $next_nonblank_token = $$rtokens[ ++$i ];
- }
- }
- }
- else {
- $next_nonblank_token = "";
- }
- return ( $next_nonblank_token, $i );
-}
-
-sub find_next_nonblank_token {
- my ( $i, $rtokens ) = @_;
-
- if ( $i >= $max_token_index ) {
-
- if ( !$peeked_ahead ) {
- $peeked_ahead = 1;
- $rtokens = peek_ahead_for_nonblank_token($rtokens);
- }
- }
- my $next_nonblank_token = $$rtokens[ ++$i ];
-
- if ( $next_nonblank_token =~ /^\s*$/ ) {
- $next_nonblank_token = $$rtokens[ ++$i ];
- }
- return ( $next_nonblank_token, $i );
-}
+#########i#############################################################
+# Tokenizer routines for looking ahead in input stream
+#######################################################################
sub peek_ahead_for_n_nonblank_pre_tokens {
# returns next n pretokens if they exist
# returns undef's if hits eof without seeing any pretokens
+ # USES GLOBAL VARIABLES: $tokenizer_self
my $max_pretokens = shift;
my $line;
my $i = 0;
# look ahead for next non-blank, non-comment line of code
sub peek_ahead_for_nonblank_token {
- my $rtokens = shift;
+
+ # USES GLOBAL VARIABLES: $tokenizer_self
+ my ( $rtokens, $max_token_index ) = @_;
my $line;
my $i = 0;
return $rtokens;
}
-sub pre_tokenize {
-
- # Break a string, $str, into a sequence of preliminary tokens. We
- # are interested in these types of tokens:
- # words (type='w'), example: 'max_tokens_wanted'
- # digits (type = 'd'), example: '0755'
- # whitespace (type = 'b'), example: ' '
- # any other single character (i.e. punct; type = the character itself).
- # We cannot do better than this yet because we might be in a quoted
- # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
- # tokens.
- my ( $str, $max_tokens_wanted ) = @_;
+#########i#############################################################
+# Tokenizer guessing routines for ambiguous situations
+#######################################################################
- # we return references to these 3 arrays:
- my @tokens = (); # array of the tokens themselves
- my @token_map = (0); # string position of start of each token
- my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
+sub guess_if_pattern_or_conditional {
- do {
+ # this routine is called when we have encountered a ? following an
+ # unknown bareword, and we must decide if it starts a pattern or not
+ # input parameters:
+ # $i - token index of the ? starting possible pattern
+ # output parameters:
+ # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
+ # msg = a warning or diagnostic message
+ # USES GLOBAL VARIABLES: $last_nonblank_token
+ my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
+ my $is_pattern = 0;
+ my $msg = "guessing that ? after $last_nonblank_token starts a ";
- # whitespace
- if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
+ if ( $i >= $max_token_index ) {
+ $msg .= "conditional (no end to pattern found on the line)\n";
+ }
+ else {
+ my $ibeg = $i;
+ $i = $ibeg + 1;
+ my $next_token = $$rtokens[$i]; # first token after ?
- # numbers
- # note that this must come before words!
- elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
+ # look for a possible ending ? on this line..
+ my $in_quote = 1;
+ my $quote_depth = 0;
+ my $quote_character = '';
+ my $quote_pos = 0;
+ my $quoted_string;
+ (
+ $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+ $quoted_string
+ )
+ = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
+ $quote_pos, $quote_depth, $max_token_index );
- # words
- elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
+ if ($in_quote) {
- # single-character punctuation
- elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
+ # we didn't find an ending ? on this line,
+ # so we bias towards conditional
+ $is_pattern = 0;
+ $msg .= "conditional (no ending ? on this line)\n";
- # that's all..
- else {
- return ( \@tokens, \@token_map, \@type );
+ # we found an ending ?, so we bias towards a pattern
}
+ else {
- push @tokens, $1;
- push @token_map, pos($str);
-
- } while ( --$max_tokens_wanted != 0 );
-
- return ( \@tokens, \@token_map, \@type );
-}
-
-sub show_tokens {
-
- # this is an old debug routine
- my ( $rtokens, $rtoken_map ) = @_;
- my $num = scalar(@$rtokens);
- my $i;
-
- for ( $i = 0 ; $i < $num ; $i++ ) {
- my $len = length( $$rtokens[$i] );
- print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
- }
-}
-
-sub find_angle_operator_termination {
-
- # We are looking at a '<' and want to know if it is an angle operator.
- # We are to return:
- # $i = pretoken index of ending '>' if found, current $i otherwise
- # $type = 'Q' if found, '>' otherwise
- my ( $input_line, $i_beg, $rtoken_map, $expecting ) = @_;
- my $i = $i_beg;
- my $type = '<';
- pos($input_line) = 1 + $$rtoken_map[$i];
-
- my $filter;
-
- # we just have to find the next '>' if a term is expected
- if ( $expecting == TERM ) { $filter = '[\>]' }
-
- # we have to guess if we don't know what is expected
- elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
-
- # shouldn't happen - we shouldn't be here if operator is expected
- else { warning("Program Bug in find_angle_operator_termination\n") }
-
- # To illustrate what we might be looking at, in case we are
- # guessing, here are some examples of valid angle operators
- # (or file globs):
- # <tmp_imp/*>
- # <FH>
- # <$fh>
- # <*.c *.h>
- # <_>
- # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
- # <${PREFIX}*img*.$IMAGE_TYPE>
- # <img*.$IMAGE_TYPE>
- # <Timg*.$IMAGE_TYPE>
- # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
- #
- # Here are some examples of lines which do not have angle operators:
- # return undef unless $self->[2]++ < $#{$self->[1]};
- # < 2 || @$t >
- #
- # the following line from dlister.pl caused trouble:
- # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
- #
- # If the '<' starts an angle operator, it must end on this line and
- # it must not have certain characters like ';' and '=' in it. I use
- # this to limit the testing. This filter should be improved if
- # possible.
-
- if ( $input_line =~ /($filter)/g ) {
-
- if ( $1 eq '>' ) {
-
- # We MAY have found an angle operator termination if we get
- # here, but we need to do more to be sure we haven't been
- # fooled.
- my $pos = pos($input_line);
-
- my $pos_beg = $$rtoken_map[$i];
- my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
-
- ######################################debug#####
- #write_diagnostics( "ANGLE? :$str\n");
- #print "ANGLE: found $1 at pos=$pos\n";
- ######################################debug#####
- $type = 'Q';
- my $error;
- ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
-
- # It may be possible that a quote ends midway in a pretoken.
- # If this happens, it may be necessary to split the pretoken.
- if ($error) {
- warning(
- "Possible tokinization error..please check this line\n");
- report_possible_bug();
- }
-
- # Now let's see where we stand....
- # OK if math op not possible
- if ( $expecting == TERM ) {
- }
-
- # OK if there are no more than 2 pre-tokens inside
- # (not possible to write 2 token math between < and >)
- # This catches most common cases
- elsif ( $i <= $i_beg + 3 ) {
- write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
- }
-
- # Not sure..
- else {
-
- # Let's try a Brace Test: any braces inside must balance
- my $br = 0;
- while ( $str =~ /\{/g ) { $br++ }
- while ( $str =~ /\}/g ) { $br-- }
- my $sb = 0;
- while ( $str =~ /\[/g ) { $sb++ }
- while ( $str =~ /\]/g ) { $sb-- }
- my $pr = 0;
- while ( $str =~ /\(/g ) { $pr++ }
- while ( $str =~ /\)/g ) { $pr-- }
-
- # if braces do not balance - not angle operator
- if ( $br || $sb || $pr ) {
- $i = $i_beg;
- $type = '<';
- write_diagnostics(
- "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
- }
-
- # we should keep doing more checks here...to be continued
- # Tentatively accepting this as a valid angle operator.
- # There are lots more things that can be checked.
- else {
- write_diagnostics(
- "ANGLE-Guessing yes: $str expecting=$expecting\n");
- write_logfile_entry("Guessing angle operator here: $str\n");
- }
- }
- }
-
- # didn't find ending >
- else {
- if ( $expecting == TERM ) {
- warning("No ending > for angle operator\n");
- }
- }
- }
- return ( $i, $type );
-}
-
-sub inverse_pretoken_map {
-
- # Starting with the current pre_token index $i, scan forward until
- # finding the index of the next pre_token whose position is $pos.
- my ( $i, $pos, $rtoken_map ) = @_;
- my $error = 0;
-
- while ( ++$i <= $max_token_index ) {
-
- if ( $pos <= $$rtoken_map[$i] ) {
-
- # Let the calling routine handle errors in which we do not
- # land on a pre-token boundary. It can happen by running
- # perltidy on some non-perl scripts, for example.
- if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
- $i--;
- last;
- }
- }
- return ( $i, $error );
-}
-
-sub guess_if_pattern_or_conditional {
-
- # this routine is called when we have encountered a ? following an
- # unknown bareword, and we must decide if it starts a pattern or not
- # input parameters:
- # $i - token index of the ? starting possible pattern
- # output parameters:
- # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
- # msg = a warning or diagnostic message
- my ( $i, $rtokens, $rtoken_map ) = @_;
- my $is_pattern = 0;
- my $msg = "guessing that ? after $last_nonblank_token starts a ";
-
- if ( $i >= $max_token_index ) {
- $msg .= "conditional (no end to pattern found on the line)\n";
- }
- else {
- my $ibeg = $i;
- $i = $ibeg + 1;
- my $next_token = $$rtokens[$i]; # first token after ?
-
- # look for a possible ending ? on this line..
- my $in_quote = 1;
- my $quote_depth = 0;
- my $quote_character = '';
- my $quote_pos = 0;
- ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
- follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
- $quote_pos, $quote_depth );
-
- if ($in_quote) {
-
- # we didn't find an ending ? on this line,
- # so we bias towards conditional
- $is_pattern = 0;
- $msg .= "conditional (no ending ? on this line)\n";
-
- # we found an ending ?, so we bias towards a pattern
- }
- else {
-
- if ( pattern_expected( $i, $rtokens ) >= 0 ) {
+ if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
$is_pattern = 1;
$msg .= "pattern (found ending ? and pattern expected)\n";
}
# output parameters:
# $is_pattern = 0 if probably division, =1 if probably a pattern
# msg = a warning or diagnostic message
- my ( $i, $rtokens, $rtoken_map ) = @_;
+ # USES GLOBAL VARIABLES: $last_nonblank_token
+ my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
my $is_pattern = 0;
my $msg = "guessing that / after $last_nonblank_token starts a ";
if ( $i >= $max_token_index ) {
- "division (no end to pattern found on the line)\n";
+ $msg .= "division (no end to pattern found on the line)\n";
}
else {
my $ibeg = $i;
- my $divide_expected = numerator_expected( $i, $rtokens );
+ my $divide_expected =
+ numerator_expected( $i, $rtokens, $max_token_index );
$i = $ibeg + 1;
my $next_token = $$rtokens[$i]; # first token after slash
my $quote_depth = 0;
my $quote_character = '';
my $quote_pos = 0;
- ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
- follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
- $quote_pos, $quote_depth );
+ my $quoted_string;
+ (
+ $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+ $quoted_string
+ )
+ = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
+ $quote_pos, $quote_depth, $max_token_index );
if ($in_quote) {
# we found an ending /, so we bias towards a pattern
else {
- if ( pattern_expected( $i, $rtokens ) >= 0 ) {
+ if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
if ( $divide_expected >= 0 ) {
return ( $is_pattern, $msg );
}
-sub find_here_doc {
+# try to resolve here-doc vs. shift by looking ahead for
+# non-code or the end token (currently only looks for end token)
+# returns 1 if it is probably a here doc, 0 if not
+sub guess_if_here_doc {
- # find the target of a here document, if any
- # input parameters:
- # $i - token index of the second < of <<
- # ($i must be less than the last token index if this is called)
- # output parameters:
- # $found_target = 0 didn't find target; =1 found target
- # HERE_TARGET - the target string (may be empty string)
- # $i - unchanged if not here doc,
- # or index of the last token of the here target
- my ( $expecting, $i, $rtokens, $rtoken_map ) = @_;
- my $ibeg = $i;
- my $found_target = 0;
- my $here_doc_target = '';
- my $here_quote_character = '';
- my ( $next_nonblank_token, $i_next_nonblank, $next_token );
- $next_token = $$rtokens[ $i + 1 ];
+ # This is how many lines we will search for a target as part of the
+ # guessing strategy. It is a constant because there is probably
+ # little reason to change it.
+ # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
+ # %is_constant,
+ use constant HERE_DOC_WINDOW => 40;
- # perl allows a backslash before the target string (heredoc.t)
- my $backslash = 0;
- if ( $next_token eq '\\' ) {
- $backslash = 1;
- $next_token = $$rtokens[ $i + 2 ];
- }
+ my $next_token = shift;
+ my $here_doc_expected = 0;
+ my $line;
+ my $k = 0;
+ my $msg = "checking <<";
- ( $next_nonblank_token, $i_next_nonblank ) =
- find_next_nonblank_token_on_this_line( $i, $rtokens );
+ while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
+ {
+ chomp $line;
- if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
+ if ( $line =~ /^$next_token$/ ) {
+ $msg .= " -- found target $next_token ahead $k lines\n";
+ $here_doc_expected = 1; # got it
+ last;
+ }
+ last if ( $k >= HERE_DOC_WINDOW );
+ }
- my $in_quote = 1;
- my $quote_depth = 0;
- my $quote_pos = 0;
+ unless ($here_doc_expected) {
- ( $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth ) =
- follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
- $here_quote_character, $quote_pos, $quote_depth );
-
- if ($in_quote) { # didn't find end of quote, so no target found
- $i = $ibeg;
- }
- else { # found ending quote
- my $j;
- $found_target = 1;
-
- my $tokj;
- for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
- $tokj = $$rtokens[$j];
-
- # we have to remove any backslash before the quote character
- # so that the here-doc-target exactly matches this string
- next
- if ( $tokj eq "\\"
- && $j < $i - 1
- && $$rtokens[ $j + 1 ] eq $here_quote_character );
- $here_doc_target .= $tokj;
- }
- }
- }
-
- elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
- $found_target = 1;
- write_logfile_entry(
- "found blank here-target after <<; suggest using \"\"\n");
- $i = $ibeg;
- }
- elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
-
- my $here_doc_expected;
- if ( $expecting == UNKNOWN ) {
- $here_doc_expected = guess_if_here_doc($next_token);
- }
- else {
- $here_doc_expected = 1;
- }
-
- if ($here_doc_expected) {
- $found_target = 1;
- $here_doc_target = $next_token;
- $i = $ibeg + 1;
- }
-
- }
- else {
-
- if ( $expecting == TERM ) {
- $found_target = 1;
- write_logfile_entry("Note: bare here-doc operator <<\n");
- }
- else {
- $i = $ibeg;
- }
- }
-
- # patch to neglect any prepended backslash
- if ( $found_target && $backslash ) { $i++ }
-
- return ( $found_target, $here_doc_target, $here_quote_character, $i );
-}
-
-# try to resolve here-doc vs. shift by looking ahead for
-# non-code or the end token (currently only looks for end token)
-# returns 1 if it is probably a here doc, 0 if not
-sub guess_if_here_doc {
-
- # This is how many lines we will search for a target as part of the
- # guessing strategy. It is a constant because there is probably
- # little reason to change it.
- use constant HERE_DOC_WINDOW => 40;
-
- my $next_token = shift;
- my $here_doc_expected = 0;
- my $line;
- my $k = 0;
- my $msg = "checking <<";
-
- while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
- {
- chomp $line;
-
- if ( $line =~ /^$next_token$/ ) {
- $msg .= " -- found target $next_token ahead $k lines\n";
- $here_doc_expected = 1; # got it
- last;
- }
- last if ( $k >= HERE_DOC_WINDOW );
- }
-
- unless ($here_doc_expected) {
-
- if ( !defined($line) ) {
- $here_doc_expected = -1; # hit eof without seeing target
- $msg .= " -- must be shift; target $next_token not in file\n";
+ if ( !defined($line) ) {
+ $here_doc_expected = -1; # hit eof without seeing target
+ $msg .= " -- must be shift; target $next_token not in file\n";
}
else { # still unsure..taking a wild guess
return $here_doc_expected;
}
-sub do_quote {
-
- # follow (or continue following) quoted string or pattern
- # $in_quote return code:
- # 0 - ok, found end
- # 1 - still must find end of quote whose target is $quote_character
- # 2 - still looking for end of first of two quotes
- my ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, $rtokens,
- $rtoken_map )
- = @_;
-
- if ( $in_quote == 2 ) { # two quotes/patterns to follow
- my $ibeg = $i;
- ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
- follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
- $quote_pos, $quote_depth );
-
- if ( $in_quote == 1 ) {
- if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
- $quote_character = '';
- }
- }
-
- if ( $in_quote == 1 ) { # one (more) quote to follow
- my $ibeg = $i;
- ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
- follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
- $quote_pos, $quote_depth );
- }
- return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth );
-}
-
-sub scan_number_do {
-
- # scan a number in any of the formats that Perl accepts
- # Underbars (_) are allowed in decimal numbers.
- # input parameters -
- # $input_line - the string to scan
- # $i - pre_token index to start scanning
- # $rtoken_map - reference to the pre_token map giving starting
- # character position in $input_line of token $i
- # output parameters -
- # $i - last pre_token index of the number just scanned
- # number - the number (characters); or undef if not a number
-
- my ( $input_line, $i, $rtoken_map, $input_type ) = @_;
- my $pos_beg = $$rtoken_map[$i];
- my $pos;
- my $i_begin = $i;
- my $number = undef;
- my $type = $input_type;
-
- my $first_char = substr( $input_line, $pos_beg, 1 );
-
- # Look for bad starting characters; Shouldn't happen..
- if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
- warning("Program bug - scan_number given character $first_char\n");
- report_definite_bug();
- return ( $i, $type, $number );
- }
-
- # handle v-string without leading 'v' character ('Two Dot' rule)
- # (vstring.t)
- pos($input_line) = $pos_beg;
- if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
- $pos = pos($input_line);
- my $numc = $pos - $pos_beg;
- $number = substr( $input_line, $pos_beg, $numc );
- $type = 'v';
- unless ($saw_v_string) { report_v_string($number) }
- }
-
- # handle octal, hex, binary
- if ( !defined($number) ) {
- pos($input_line) = $pos_beg;
- if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
- {
- $pos = pos($input_line);
- my $numc = $pos - $pos_beg;
- $number = substr( $input_line, $pos_beg, $numc );
- $type = 'n';
- }
- }
-
- # handle decimal
- if ( !defined($number) ) {
- pos($input_line) = $pos_beg;
-
- if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
- $pos = pos($input_line);
-
- # watch out for things like 0..40 which would give 0. by this;
- if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
- && ( substr( $input_line, $pos, 1 ) eq '.' ) )
- {
- $pos--;
- }
- my $numc = $pos - $pos_beg;
- $number = substr( $input_line, $pos_beg, $numc );
- $type = 'n';
- }
- }
-
- # filter out non-numbers like e + - . e2 .e3 +e6
- # the rule: at least one digit, and any 'e' must be preceded by a digit
- if (
- $number !~ /\d/ # no digits
- || ( $number =~ /^(.*)[eE]/
- && $1 !~ /\d/ ) # or no digits before the 'e'
- )
- {
- $number = undef;
- $type = $input_type;
- return ( $i, $type, $number );
- }
-
- # Found a number; now we must convert back from character position
- # to pre_token index. An error here implies user syntax error.
- # An example would be an invalid octal number like '009'.
- my $error;
- ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
- if ($error) { warning("Possibly invalid number\n") }
-
- return ( $i, $type, $number );
-}
+#########i#############################################################
+# Tokenizer Routines for scanning identifiers and related items
+#######################################################################
sub scan_bare_identifier_do {
# this routine is called to scan a token starting with an alphanumeric
# variable or package separator, :: or '.
+ # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
+ # $last_nonblank_type,@paren_type, $paren_depth
- my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map ) = @_;
+ my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
+ $max_token_index )
+ = @_;
my $i_begin = $i;
my $package = undef;
if ( $type eq 'w' ) {
# check for v-string with leading 'v' type character
- # (This seems to have presidence over filehandle, type 'Y')
- if ( $tok =~ /^v\d+$/ ) {
+ # (This seems to have precedence over filehandle, type 'Y')
+ if ( $tok =~ /^v\d[_\d]*$/ ) {
# we only have the first part - something like 'v101' -
# look for more
- if ( $input_line =~ m/\G(\.\d+)+/gc ) {
+ if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
$pos = pos($input_line);
$numc = $pos - $pos_beg;
$tok = substr( $input_line, $pos_beg, $numc );
$type = 'v';
# warn if this version can't handle v-strings
- unless ($saw_v_string) { report_v_string($tok) }
+ report_v_string($tok);
}
elsif ( $is_constant{$package}{$sub_name} ) {
# doesn't get in the way of good scripts.
# Complain if a filehandle has any lower case
- # letters. This is suggested good practice, but the
- # main reason for this warning is that prior to
- # release 20010328, perltidy incorrectly parsed a
- # function call after a print/printf, with the
- # result that a space got added before the opening
- # paren, thereby converting the function name to a
- # filehandle according to perl's weird rules. This
- # will not usually generate a syntax error, so this
- # is a potentially serious bug. By warning
- # of filehandles with any lower case letters,
- # followed by opening parens, we will help the user
- # find almost all of these older errors.
- # use 'sub_name' because something like
+ # letters. This is suggested good practice.
+ # Use 'sub_name' because something like
# main::MYHANDLE is ok for filehandle
if ( $sub_name =~ /[a-z]/ ) {
# to pre_token index.
# I don't think an error flag can occur here ..but who knows
my $error;
- ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
+ ( $i, $error ) =
+ inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
if ($error) {
warning("scan_bare_identifier: Possibly invalid tokenization\n");
}
sub scan_id_do {
- # This is the new scanner and will eventually replace scan_identifier.
- # Only type 'sub' and 'package' are implemented.
- # Token types $ * % @ & -> are not yet implemented.
- #
- # Scan identifier following a type token.
- # The type of call depends on $id_scan_state: $id_scan_state = ''
- # for starting call, in which case $tok must be the token defining
- # the type.
- #
- # If the type token is the last nonblank token on the line, a value
- # of $id_scan_state = $tok is returned, indicating that further
- # calls must be made to get the identifier. If the type token is
- # not the last nonblank token on the line, the identifier is
- # scanned and handled and a value of '' is returned.
-
- my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state ) = @_;
+# This is the new scanner and will eventually replace scan_identifier.
+# Only type 'sub' and 'package' are implemented.
+# Token types $ * % @ & -> are not yet implemented.
+#
+# Scan identifier following a type token.
+# The type of call depends on $id_scan_state: $id_scan_state = ''
+# for starting call, in which case $tok must be the token defining
+# the type.
+#
+# If the type token is the last nonblank token on the line, a value
+# of $id_scan_state = $tok is returned, indicating that further
+# calls must be made to get the identifier. If the type token is
+# not the last nonblank token on the line, the identifier is
+# scanned and handled and a value of '' is returned.
+# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
+# $statement_type, $tokenizer_self
+
+ my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
+ $max_token_index )
+ = @_;
my $type = '';
my ( $i_beg, $pos_beg );
if ( $next_nonblank_token =~ /^\s/ ) {
( $next_nonblank_token, $i_beg ) =
- find_next_nonblank_token_on_this_line( $i_beg, $rtokens );
+ find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
+ $max_token_index );
if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
$blank_line = 1;
}
unless ($blank_line) {
if ( $id_scan_state eq 'sub' ) {
- ( $i, $tok, $type, $id_scan_state ) =
- do_scan_sub( $input_line, $i, $i_beg, $tok, $type, $rtokens,
- $rtoken_map, $id_scan_state );
+ ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
+ $input_line, $i, $i_beg,
+ $tok, $type, $rtokens,
+ $rtoken_map, $id_scan_state, $max_token_index
+ );
}
elsif ( $id_scan_state eq 'package' ) {
( $i, $tok, $type ) =
do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
- $rtoken_map );
+ $rtoken_map, $max_token_index );
$id_scan_state = '';
}
}
TOKENIZER_DEBUG_FLAG_NSCAN && do {
- print
+ print STDOUT
"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
};
return ( $i, $tok, $type, $id_scan_state );
}
-{
+sub check_prototype {
+ my ( $proto, $package, $subname ) = @_;
+ return unless ( defined($package) && defined($subname) );
+ if ( defined($proto) ) {
+ $proto =~ s/^\s*\(\s*//;
+ $proto =~ s/\s*\)$//;
+ if ($proto) {
+ $is_user_function{$package}{$subname} = 1;
+ $user_function_prototype{$package}{$subname} = "($proto)";
- # saved package and subnames in case prototype is on separate line
- my ( $package_saved, $subname_saved );
+ # prototypes containing '&' must be treated specially..
+ if ( $proto =~ /\&/ ) {
- sub do_scan_sub {
+ # right curly braces of prototypes ending in
+ # '&' may be followed by an operator
+ if ( $proto =~ /\&$/ ) {
+ $is_block_function{$package}{$subname} = 1;
+ }
- # do_scan_sub parses a sub name and prototype
- # it is called with $i_beg equal to the index of the first nonblank
- # token following a 'sub' token.
+ # right curly braces of prototypes NOT ending in
+ # '&' may NOT be followed by an operator
+ elsif ( $proto !~ /\&$/ ) {
+ $is_block_list_function{$package}{$subname} = 1;
+ }
+ }
+ }
+ else {
+ $is_constant{$package}{$subname} = 1;
+ }
+ }
+ else {
+ $is_user_function{$package}{$subname} = 1;
+ }
+}
- # TODO: add future error checks to be sure we have a valid
- # sub name. For example, 'sub &doit' is wrong. Also, be sure
- # a name is given if and only if a non-anonymous sub is
- # appropriate.
-
- my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
- $id_scan_state )
- = @_;
- $id_scan_state = ""; # normally we get everything in one call
- my $subname = undef;
- my $package = undef;
- my $proto = undef;
- my $attrs = undef;
- my $match;
-
- my $pos_beg = $$rtoken_map[$i_beg];
- pos($input_line) = $pos_beg;
-
- # sub NAME PROTO ATTRS
- if (
- $input_line =~ m/\G\s*
- ((?:\w*(?:'|::))*) # package - something that ends in :: or '
- (\w+) # NAME - required
- (\s*\([^){]*\))? # PROTO - something in parens
- (\s*:)? # ATTRS - leading : of attribute list
- /gcx
- )
- {
- $match = 1;
- $subname = $2;
- $proto = $3;
- $attrs = $4;
-
- $package = ( defined($1) && $1 ) ? $1 : $current_package;
- $package =~ s/\'/::/g;
- if ( $package =~ /^\:/ ) { $package = 'main' . $package }
- $package =~ s/::$//;
- my $pos = pos($input_line);
- my $numc = $pos - $pos_beg;
- $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
- $type = 'i';
- }
-
- # Look for prototype/attributes not preceded on this line by subname;
- # This might be an anonymous sub with attributes,
- # or a prototype on a separate line from its sub name
- elsif (
- $input_line =~ m/\G(\s*\([^){]*\))? # PROTO
- (\s*:)? # ATTRS leading ':'
- /gcx
- && ( $1 || $2 )
- )
- {
- $match = 1;
- $proto = $1;
- $attrs = $2;
-
- # Handle prototype on separate line from subname
- if ($subname_saved) {
- $package = $package_saved;
- $subname = $subname_saved;
- $tok = $last_nonblank_token;
- }
- $type = 'i';
- }
-
- if ($match) {
-
- # ATTRS: if there are attributes, back up and let the ':' be
- # found later by the scanner.
- my $pos = pos($input_line);
- if ($attrs) {
- $pos -= length($attrs);
- }
-
- my $next_nonblank_token = $tok;
-
- # catch case of line with leading ATTR ':' after anonymous sub
- if ( $pos == $pos_beg && $tok eq ':' ) {
- $type = 'A';
- }
-
- # We must convert back from character position
- # to pre_token index.
- else {
-
- # I don't think an error flag can occur here ..but ?
- my $error;
- ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
- if ($error) { warning("Possibly invalid sub\n") }
-
- # check for multiple definitions of a sub
- ( $next_nonblank_token, my $i_next ) =
- find_next_nonblank_token_on_this_line( $i, $rtokens );
- }
-
- if ( $next_nonblank_token =~ /^(\s*|#)$/ )
- { # skip blank or side comment
- my ( $rpre_tokens, $rpre_types ) =
- peek_ahead_for_n_nonblank_pre_tokens(1);
- if ( defined($rpre_tokens) && @$rpre_tokens ) {
- $next_nonblank_token = $rpre_tokens->[0];
- }
- else {
- $next_nonblank_token = '}';
- }
- }
- $package_saved = "";
- $subname_saved = "";
- if ( $next_nonblank_token eq '{' ) {
- if ($subname) {
- if ( $saw_function_definition{$package}{$subname} ) {
- my $lno = $saw_function_definition{$package}{$subname};
- warning(
-"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
- );
- }
- $saw_function_definition{$package}{$subname} =
- $input_line_number;
- }
- }
- elsif ( $next_nonblank_token eq ';' ) {
- }
- elsif ( $next_nonblank_token eq '}' ) {
- }
-
- # ATTRS - if an attribute list follows, remember the name
- # of the sub so the next opening brace can be labeled.
- # Setting 'statement_type' causes any ':'s to introduce
- # attributes.
- elsif ( $next_nonblank_token eq ':' ) {
- $statement_type = $tok;
- }
-
- # see if PROTO follows on another line:
- elsif ( $next_nonblank_token eq '(' ) {
- if ( $attrs || $proto ) {
- warning(
-"unexpected '(' after definition or declaration of sub '$subname'\n"
- );
- }
- else {
- $id_scan_state = 'sub'; # we must come back to get proto
- $statement_type = $tok;
- $package_saved = $package;
- $subname_saved = $subname;
- }
- }
- elsif ($next_nonblank_token) { # EOF technically ok
- warning(
-"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
- );
- }
- check_prototype( $proto, $package, $subname );
- }
-
- # no match but line not blank
- else {
- }
- return ( $i, $tok, $type, $id_scan_state );
- }
-}
-
-sub check_prototype {
- my ( $proto, $package, $subname ) = @_;
- return unless ( defined($package) && defined($subname) );
- if ( defined($proto) ) {
- $proto =~ s/^\s*\(\s*//;
- $proto =~ s/\s*\)$//;
- if ($proto) {
- $is_user_function{$package}{$subname} = 1;
- $user_function_prototype{$package}{$subname} = "($proto)";
-
- # prototypes containing '&' must be treated specially..
- if ( $proto =~ /\&/ ) {
-
- # right curly braces of prototypes ending in
- # '&' may be followed by an operator
- if ( $proto =~ /\&$/ ) {
- $is_block_function{$package}{$subname} = 1;
- }
-
- # right curly braces of prototypes NOT ending in
- # '&' may NOT be followed by an operator
- elsif ( $proto !~ /\&$/ ) {
- $is_block_list_function{$package}{$subname} = 1;
- }
- }
- }
- else {
- $is_constant{$package}{$subname} = 1;
- }
- }
- else {
- $is_user_function{$package}{$subname} = 1;
- }
-}
-
-sub do_scan_package {
+sub do_scan_package {
# do_scan_package parses a package name
# it is called with $i_beg equal to the index of the first nonblank
# token following a 'package' token.
+ # USES GLOBAL VARIABLES: $current_package,
- my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map ) = @_;
+ # package NAMESPACE
+ # package NAMESPACE VERSION
+ # package NAMESPACE BLOCK
+ # package NAMESPACE VERSION BLOCK
+ #
+ # If VERSION is provided, package sets the $VERSION variable in the given
+ # namespace to a version object with the VERSION provided. VERSION must be
+ # a "strict" style version number as defined by the version module: a
+ # positive decimal number (integer or decimal-fraction) without
+ # exponentiation or else a dotted-decimal v-string with a leading 'v'
+ # character and at least three components.
+ # reference http://perldoc.perl.org/functions/package.html
+
+ my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
+ $max_token_index )
+ = @_;
my $package = undef;
my $pos_beg = $$rtoken_map[$i_beg];
pos($input_line) = $pos_beg;
$package =~ s/::$//;
my $pos = pos($input_line);
my $numc = $pos - $pos_beg;
- $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
+ $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
$type = 'i';
# Now we must convert back from character position
# to pre_token index.
# I don't think an error flag can occur here ..but ?
my $error;
- ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
+ ( $i, $error ) =
+ inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
if ($error) { warning("Possibly invalid package\n") }
$current_package = $package;
- # check for error
+ # we should now have package NAMESPACE
+ # now expecting VERSION, BLOCK, or ; to follow ...
+ # package NAMESPACE VERSION
+ # package NAMESPACE BLOCK
+ # package NAMESPACE VERSION BLOCK
my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens );
- if ( $next_nonblank_token !~ /^[;\}]$/ ) {
+ find_next_nonblank_token( $i, $rtokens, $max_token_index );
+
+ # check that something recognizable follows, but do not parse.
+ # A VERSION number will be parsed later as a number or v-string in the
+ # normal way. What is important is to set the statement type if
+ # everything looks okay so that the operator_expected() routine
+ # knows that the number is in a package statement.
+ # Examples of valid primitive tokens that might follow are:
+ # 1235 . ; { } v3 v
+ if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) {
+ $statement_type = $tok;
+ }
+ else {
warning(
"Unexpected '$next_nonblank_token' after package name '$tok'\n"
);
# scan state, id_scan_state. It updates id_scan_state based upon
# current id_scan_state and token, and returns an updated
# id_scan_state and the next index after the identifier.
+ # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
+ # $last_nonblank_type
- my ( $i, $id_scan_state, $identifier, $rtokens ) = @_;
+ my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
+ $expecting, $container_type )
+ = @_;
my $i_begin = $i;
my $type = '';
my $tok_begin = $$rtokens[$i_begin];
my $tok = $tok_begin;
my $message = "";
+ my $in_prototype_or_signature = $container_type =~ /^sub/;
+
# these flags will be used to help figure out the type:
my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
my $saw_type;
while ( $i < $max_token_index ) {
$i_save = $i unless ( $tok =~ /^\s*$/ );
- $tok = $$rtokens[ ++$i ];
+ $tok = $$rtokens[ ++$i ];
if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
$tok = '::';
last;
}
}
+
+ # POSTDEFREF ->@ ->% ->& ->*
+ elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
+ $identifier .= $tok;
+ }
elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
$saw_alpha = 1;
$id_scan_state = ':'; # now need ::
# howdy::123::bubba();
#
}
- elsif ( $tok =~ /^[0-9]/ ) { # numeric
+ elsif ( $tok =~ /^[0-9]/ ) { # numeric
$saw_alpha = 1;
- $id_scan_state = ':'; # now need ::
+ $id_scan_state = ':'; # now need ::
$identifier .= $tok;
}
elsif ( $tok eq '::' ) {
$id_scan_state = 'A';
$identifier .= $tok;
}
- elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array
+
+ # $# and POSTDEFREF ->$#
+ elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array
$identifier .= $tok; # keep same state, a $ could follow
}
elsif ( $tok eq '{' ) {
# check for something like ${#} or ${©}
- if ( $identifier eq '$'
+ ##if ( $identifier eq '$'
+ if (
+ (
+ $identifier eq '$'
+ || $identifier eq '@'
+ || $identifier eq '$#'
+ )
&& $i + 2 <= $max_token_index
&& $$rtokens[ $i + 2 ] eq '}'
- && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
+ && $$rtokens[ $i + 1 ] !~ /[\s\w]/
+ )
{
my $next2 = $$rtokens[ $i + 2 ];
my $next1 = $$rtokens[ $i + 1 ];
if ( $identifier =~ /^[\$\*\@\%]$/ ) {
$identifier .= $tok;
$id_scan_state = 'A';
+
+ # Perl accepts '$^]' or '@^]', but
+ # there must not be a space before the ']'.
+ my $next1 = $$rtokens[ $i + 1 ];
+ if ( $next1 eq ']' ) {
+ $i++;
+ $identifier .= $next1;
+ $id_scan_state = "";
+ last;
+ }
}
else {
$id_scan_state = '';
}
else { # something else
+ if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) {
+ $id_scan_state = '';
+ $i = $i_save;
+ $type = 'i'; # probably punctuation variable
+ last;
+ }
+
# check for various punctuation variables
if ( $identifier =~ /^[\$\*\@\%]$/ ) {
$identifier .= $tok;
}
+ # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
+ elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) {
+ $identifier .= $tok;
+ }
+
elsif ( $identifier eq '$#' ) {
if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
# $a = ${$:};
$i = $i_save;
- if ( $tok eq '{' ) { $type = 't' }
- else { $type = 'i' }
+ if ( $tok eq '{' ) { $type = 't' }
+ else { $type = 'i' }
}
elsif ( $identifier eq '->' ) {
$i = $i_save;
# punctuation variable?
# testfile: cunningham4.pl
- if ( $identifier eq '&' ) {
+ #
+ # We have to be careful here. If we are in an unknown state,
+ # we will reject the punctuation variable. In the following
+ # example the '&' is a binary operator but we are in an unknown
+ # state because there is no sigil on 'Prima', so we don't
+ # know what it is. But it is a bad guess that
+ # '&~' is a function variable.
+ # $self->{text}->{colorMap}->[
+ # Prima::PodView::COLOR_CODE_FOREGROUND
+ # & ~tb::COLOR_INDEX ] =
+ # $sec->{ColorCode}
+ if ( $identifier eq '&' && $expecting ) {
$identifier .= $tok;
}
else {
}
}
- if ( $id_scan_state eq ')' ) {
- warning("Hit end of line while seeking ) to end prototype\n");
+ if ( $id_scan_state eq ')' ) {
+ warning("Hit end of line while seeking ) to end prototype\n");
+ }
+
+ # once we enter the actual identifier, it may not extend beyond
+ # the end of the current line
+ if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
+ $id_scan_state = '';
+ }
+ if ( $i < 0 ) { $i = 0 }
+
+ unless ($type) {
+
+ if ($saw_type) {
+
+ if ($saw_alpha) {
+ if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
+ $type = 'w';
+ }
+ else { $type = 'i' }
+ }
+ elsif ( $identifier eq '->' ) {
+ $type = '->';
+ }
+ elsif (
+ ( length($identifier) > 1 )
+
+ # In something like '@$=' we have an identifier '@$'
+ # In something like '$${' we have type '$$' (and only
+ # part of an identifier)
+ && !( $identifier =~ /\$$/ && $tok eq '{' )
+ && ( $identifier !~ /^(sub |package )$/ )
+ )
+ {
+ $type = 'i';
+ }
+ else { $type = 't' }
+ }
+ elsif ($saw_alpha) {
+
+ # type 'w' includes anything without leading type info
+ # ($,%,@,*) including something like abc::def::ghi
+ $type = 'w';
+ }
+ else {
+ $type = '';
+ } # this can happen on a restart
+ }
+
+ if ($identifier) {
+ $tok = $identifier;
+ if ($message) { write_logfile_entry($message) }
+ }
+ else {
+ $tok = $tok_begin;
+ $i = $i_begin;
+ }
+
+ TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
+ my ( $a, $b, $c ) = caller;
+ print STDOUT
+"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
+ print STDOUT
+"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
+ };
+ return ( $i, $tok, $type, $id_scan_state, $identifier );
+}
+
+{
+
+ # saved package and subnames in case prototype is on separate line
+ my ( $package_saved, $subname_saved );
+
+ sub do_scan_sub {
+
+ # do_scan_sub parses a sub name and prototype
+ # it is called with $i_beg equal to the index of the first nonblank
+ # token following a 'sub' token.
+
+ # TODO: add future error checks to be sure we have a valid
+ # sub name. For example, 'sub &doit' is wrong. Also, be sure
+ # a name is given if and only if a non-anonymous sub is
+ # appropriate.
+ # USES GLOBAL VARS: $current_package, $last_nonblank_token,
+ # $in_attribute_list, %saw_function_definition,
+ # $statement_type
+
+ my (
+ $input_line, $i, $i_beg,
+ $tok, $type, $rtokens,
+ $rtoken_map, $id_scan_state, $max_token_index
+ ) = @_;
+ $id_scan_state = ""; # normally we get everything in one call
+ my $subname = undef;
+ my $package = undef;
+ my $proto = undef;
+ my $attrs = undef;
+ my $match;
+
+ my $pos_beg = $$rtoken_map[$i_beg];
+ pos($input_line) = $pos_beg;
+
+ # Look for the sub NAME
+ if (
+ $input_line =~ m/\G\s*
+ ((?:\w*(?:'|::))*) # package - something that ends in :: or '
+ (\w+) # NAME - required
+ /gcx
+ )
+ {
+ $match = 1;
+ $subname = $2;
+
+ $package = ( defined($1) && $1 ) ? $1 : $current_package;
+ $package =~ s/\'/::/g;
+ if ( $package =~ /^\:/ ) { $package = 'main' . $package }
+ $package =~ s/::$//;
+ my $pos = pos($input_line);
+ my $numc = $pos - $pos_beg;
+ $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
+ $type = 'i';
+ }
+
+ # Now look for PROTO ATTRS
+ # Look for prototype/attributes which are usually on the same
+ # line as the sub name but which might be on a separate line.
+ # For example, we might have an anonymous sub with attributes,
+ # or a prototype on a separate line from its sub name
+
+ # NOTE: We only want to parse PROTOTYPES here. If we see anything that
+ # does not look like a prototype, we assume it is a SIGNATURE and we
+ # will stop and let the the standard tokenizer handle it. In
+ # particular, we stop if we see any nested parens, braces, or commas.
+ my $saw_opening_paren = $input_line =~ /\G\s*\(/;
+ if (
+ $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO
+ (\s*:)? # ATTRS leading ':'
+ /gcx
+ && ( $1 || $2 )
+ )
+ {
+ $proto = $1;
+ $attrs = $2;
+
+ # If we also found the sub name on this call then append PROTO.
+ # This is not necessary but for compatability with previous
+ # versions when the -csc flag is used:
+ if ( $match && $proto ) {
+ $tok .= $proto;
+ }
+ $match ||= 1;
+
+ # Handle prototype on separate line from subname
+ if ($subname_saved) {
+ $package = $package_saved;
+ $subname = $subname_saved;
+ $tok = $last_nonblank_token;
+ }
+ $type = 'i';
+ }
+
+ if ($match) {
+
+ # ATTRS: if there are attributes, back up and let the ':' be
+ # found later by the scanner.
+ my $pos = pos($input_line);
+ if ($attrs) {
+ $pos -= length($attrs);
+ }
+
+ my $next_nonblank_token = $tok;
+
+ # catch case of line with leading ATTR ':' after anonymous sub
+ if ( $pos == $pos_beg && $tok eq ':' ) {
+ $type = 'A';
+ $in_attribute_list = 1;
+ }
+
+ # Otherwise, if we found a match we must convert back from
+ # string position to the pre_token index for continued parsing.
+ else {
+
+ # I don't think an error flag can occur here ..but ?
+ my $error;
+ ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
+ $max_token_index );
+ if ($error) { warning("Possibly invalid sub\n") }
+
+ # check for multiple definitions of a sub
+ ( $next_nonblank_token, my $i_next ) =
+ find_next_nonblank_token_on_this_line( $i, $rtokens,
+ $max_token_index );
+ }
+
+ if ( $next_nonblank_token =~ /^(\s*|#)$/ )
+ { # skip blank or side comment
+ my ( $rpre_tokens, $rpre_types ) =
+ peek_ahead_for_n_nonblank_pre_tokens(1);
+ if ( defined($rpre_tokens) && @$rpre_tokens ) {
+ $next_nonblank_token = $rpre_tokens->[0];
+ }
+ else {
+ $next_nonblank_token = '}';
+ }
+ }
+ $package_saved = "";
+ $subname_saved = "";
+
+ # See what's next...
+ if ( $next_nonblank_token eq '{' ) {
+ if ($subname) {
+
+ # Check for multiple definitions of a sub, but
+ # it is ok to have multiple sub BEGIN, etc,
+ # so we do not complain if name is all caps
+ if ( $saw_function_definition{$package}{$subname}
+ && $subname !~ /^[A-Z]+$/ )
+ {
+ my $lno = $saw_function_definition{$package}{$subname};
+ warning(
+"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
+ );
+ }
+ $saw_function_definition{$package}{$subname} =
+ $tokenizer_self->{_last_line_number};
+ }
+ }
+ elsif ( $next_nonblank_token eq ';' ) {
+ }
+ elsif ( $next_nonblank_token eq '}' ) {
+ }
+
+ # ATTRS - if an attribute list follows, remember the name
+ # of the sub so the next opening brace can be labeled.
+ # Setting 'statement_type' causes any ':'s to introduce
+ # attributes.
+ elsif ( $next_nonblank_token eq ':' ) {
+ $statement_type = $tok;
+ }
+
+ # if we stopped before an open paren ...
+ elsif ( $next_nonblank_token eq '(' ) {
+
+ # If we DID NOT see this paren above then it must be on the
+ # next line so we will set a flag to come back here and see if
+ # it is a PROTOTYPE
+
+ # Otherwise, we assume it is a SIGNATURE rather than a
+ # PROTOTYPE and let the normal tokenizer handle it as a list
+ if ( !$saw_opening_paren ) {
+ $id_scan_state = 'sub'; # we must come back to get proto
+ $package_saved = $package;
+ $subname_saved = $subname;
+ }
+ $statement_type = $tok;
+ }
+ elsif ($next_nonblank_token) { # EOF technically ok
+ warning(
+"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
+ );
+ }
+ check_prototype( $proto, $package, $subname );
+ }
+
+ # no match but line not blank
+ else {
+ }
+ return ( $i, $tok, $type, $id_scan_state );
+ }
+}
+
+#########i###############################################################
+# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
+#########################################################################
+
+sub find_next_nonblank_token {
+ my ( $i, $rtokens, $max_token_index ) = @_;
+
+ if ( $i >= $max_token_index ) {
+ if ( !peeked_ahead() ) {
+ peeked_ahead(1);
+ $rtokens =
+ peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
+ }
+ }
+ my $next_nonblank_token = $$rtokens[ ++$i ];
+
+ if ( $next_nonblank_token =~ /^\s*$/ ) {
+ $next_nonblank_token = $$rtokens[ ++$i ];
+ }
+ return ( $next_nonblank_token, $i );
+}
+
+sub numerator_expected {
+
+ # this is a filter for a possible numerator, in support of guessing
+ # for the / pattern delimiter token.
+ # returns -
+ # 1 - yes
+ # 0 - can't tell
+ # -1 - no
+ # Note: I am using the convention that variables ending in
+ # _expected have these 3 possible values.
+ my ( $i, $rtokens, $max_token_index ) = @_;
+ my $next_token = $$rtokens[ $i + 1 ];
+ if ( $next_token eq '=' ) { $i++; } # handle /=
+ my ( $next_nonblank_token, $i_next ) =
+ find_next_nonblank_token( $i, $rtokens, $max_token_index );
+
+ if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
+ 1;
+ }
+ else {
+
+ if ( $next_nonblank_token =~ /^\s*$/ ) {
+ 0;
+ }
+ else {
+ -1;
+ }
+ }
+}
+
+sub pattern_expected {
+
+ # This is the start of a filter for a possible pattern.
+ # It looks at the token after a possible pattern and tries to
+ # determine if that token could end a pattern.
+ # returns -
+ # 1 - yes
+ # 0 - can't tell
+ # -1 - no
+ my ( $i, $rtokens, $max_token_index ) = @_;
+ my $next_token = $$rtokens[ $i + 1 ];
+ if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier
+ my ( $next_nonblank_token, $i_next ) =
+ find_next_nonblank_token( $i, $rtokens, $max_token_index );
+
+ # list of tokens which may follow a pattern
+ # (can probably be expanded)
+ if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
+ {
+ 1;
+ }
+ else {
+
+ if ( $next_nonblank_token =~ /^\s*$/ ) {
+ 0;
+ }
+ else {
+ -1;
+ }
+ }
+}
+
+sub find_next_nonblank_token_on_this_line {
+ my ( $i, $rtokens, $max_token_index ) = @_;
+ my $next_nonblank_token;
+
+ if ( $i < $max_token_index ) {
+ $next_nonblank_token = $$rtokens[ ++$i ];
+
+ if ( $next_nonblank_token =~ /^\s*$/ ) {
+
+ if ( $i < $max_token_index ) {
+ $next_nonblank_token = $$rtokens[ ++$i ];
+ }
+ }
+ }
+ else {
+ $next_nonblank_token = "";
+ }
+ return ( $next_nonblank_token, $i );
+}
+
+sub find_angle_operator_termination {
+
+ # We are looking at a '<' and want to know if it is an angle operator.
+ # We are to return:
+ # $i = pretoken index of ending '>' if found, current $i otherwise
+ # $type = 'Q' if found, '>' otherwise
+ my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
+ my $i = $i_beg;
+ my $type = '<';
+ pos($input_line) = 1 + $$rtoken_map[$i];
+
+ my $filter;
+
+ # we just have to find the next '>' if a term is expected
+ if ( $expecting == TERM ) { $filter = '[\>]' }
+
+ # we have to guess if we don't know what is expected
+ elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
+
+ # shouldn't happen - we shouldn't be here if operator is expected
+ else { warning("Program Bug in find_angle_operator_termination\n") }
+
+ # To illustrate what we might be looking at, in case we are
+ # guessing, here are some examples of valid angle operators
+ # (or file globs):
+ # <tmp_imp/*>
+ # <FH>
+ # <$fh>
+ # <*.c *.h>
+ # <_>
+ # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
+ # <${PREFIX}*img*.$IMAGE_TYPE>
+ # <img*.$IMAGE_TYPE>
+ # <Timg*.$IMAGE_TYPE>
+ # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
+ #
+ # Here are some examples of lines which do not have angle operators:
+ # return undef unless $self->[2]++ < $#{$self->[1]};
+ # < 2 || @$t >
+ #
+ # the following line from dlister.pl caused trouble:
+ # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
+ #
+ # If the '<' starts an angle operator, it must end on this line and
+ # it must not have certain characters like ';' and '=' in it. I use
+ # this to limit the testing. This filter should be improved if
+ # possible.
+
+ if ( $input_line =~ /($filter)/g ) {
+
+ if ( $1 eq '>' ) {
+
+ # We MAY have found an angle operator termination if we get
+ # here, but we need to do more to be sure we haven't been
+ # fooled.
+ my $pos = pos($input_line);
+
+ my $pos_beg = $$rtoken_map[$i];
+ my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
+
+ # Reject if the closing '>' follows a '-' as in:
+ # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
+ if ( $expecting eq UNKNOWN ) {
+ my $check = substr( $input_line, $pos - 2, 1 );
+ if ( $check eq '-' ) {
+ return ( $i, $type );
+ }
+ }
+
+ ######################################debug#####
+ #write_diagnostics( "ANGLE? :$str\n");
+ #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
+ ######################################debug#####
+ $type = 'Q';
+ my $error;
+ ( $i, $error ) =
+ inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
+
+ # It may be possible that a quote ends midway in a pretoken.
+ # If this happens, it may be necessary to split the pretoken.
+ if ($error) {
+ warning(
+ "Possible tokinization error..please check this line\n");
+ report_possible_bug();
+ }
+
+ # Now let's see where we stand....
+ # OK if math op not possible
+ if ( $expecting == TERM ) {
+ }
+
+ # OK if there are no more than 2 pre-tokens inside
+ # (not possible to write 2 token math between < and >)
+ # This catches most common cases
+ elsif ( $i <= $i_beg + 3 ) {
+ write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
+ }
+
+ # Not sure..
+ else {
+
+ # Let's try a Brace Test: any braces inside must balance
+ my $br = 0;
+ while ( $str =~ /\{/g ) { $br++ }
+ while ( $str =~ /\}/g ) { $br-- }
+ my $sb = 0;
+ while ( $str =~ /\[/g ) { $sb++ }
+ while ( $str =~ /\]/g ) { $sb-- }
+ my $pr = 0;
+ while ( $str =~ /\(/g ) { $pr++ }
+ while ( $str =~ /\)/g ) { $pr-- }
+
+ # if braces do not balance - not angle operator
+ if ( $br || $sb || $pr ) {
+ $i = $i_beg;
+ $type = '<';
+ write_diagnostics(
+ "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
+ }
+
+ # we should keep doing more checks here...to be continued
+ # Tentatively accepting this as a valid angle operator.
+ # There are lots more things that can be checked.
+ else {
+ write_diagnostics(
+ "ANGLE-Guessing yes: $str expecting=$expecting\n");
+ write_logfile_entry("Guessing angle operator here: $str\n");
+ }
+ }
+ }
+
+ # didn't find ending >
+ else {
+ if ( $expecting == TERM ) {
+ warning("No ending > for angle operator\n");
+ }
+ }
+ }
+ return ( $i, $type );
+}
+
+sub scan_number_do {
+
+ # scan a number in any of the formats that Perl accepts
+ # Underbars (_) are allowed in decimal numbers.
+ # input parameters -
+ # $input_line - the string to scan
+ # $i - pre_token index to start scanning
+ # $rtoken_map - reference to the pre_token map giving starting
+ # character position in $input_line of token $i
+ # output parameters -
+ # $i - last pre_token index of the number just scanned
+ # number - the number (characters); or undef if not a number
+
+ my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
+ my $pos_beg = $$rtoken_map[$i];
+ my $pos;
+ my $i_begin = $i;
+ my $number = undef;
+ my $type = $input_type;
+
+ my $first_char = substr( $input_line, $pos_beg, 1 );
+
+ # Look for bad starting characters; Shouldn't happen..
+ if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
+ warning("Program bug - scan_number given character $first_char\n");
+ report_definite_bug();
+ return ( $i, $type, $number );
+ }
+
+ # handle v-string without leading 'v' character ('Two Dot' rule)
+ # (vstring.t)
+ # TODO: v-strings may contain underscores
+ pos($input_line) = $pos_beg;
+ if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
+ $pos = pos($input_line);
+ my $numc = $pos - $pos_beg;
+ $number = substr( $input_line, $pos_beg, $numc );
+ $type = 'v';
+ report_v_string($number);
+ }
+
+ # handle octal, hex, binary
+ if ( !defined($number) ) {
+ pos($input_line) = $pos_beg;
+ if ( $input_line =~
+ /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
+ {
+ $pos = pos($input_line);
+ my $numc = $pos - $pos_beg;
+ $number = substr( $input_line, $pos_beg, $numc );
+ $type = 'n';
+ }
+ }
+
+ # handle decimal
+ if ( !defined($number) ) {
+ pos($input_line) = $pos_beg;
+
+ if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
+ $pos = pos($input_line);
+
+ # watch out for things like 0..40 which would give 0. by this;
+ if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
+ && ( substr( $input_line, $pos, 1 ) eq '.' ) )
+ {
+ $pos--;
+ }
+ my $numc = $pos - $pos_beg;
+ $number = substr( $input_line, $pos_beg, $numc );
+ $type = 'n';
+ }
+ }
+
+ # filter out non-numbers like e + - . e2 .e3 +e6
+ # the rule: at least one digit, and any 'e' must be preceded by a digit
+ if (
+ $number !~ /\d/ # no digits
+ || ( $number =~ /^(.*)[eE]/
+ && $1 !~ /\d/ ) # or no digits before the 'e'
+ )
+ {
+ $number = undef;
+ $type = $input_type;
+ return ( $i, $type, $number );
+ }
+
+ # Found a number; now we must convert back from character position
+ # to pre_token index. An error here implies user syntax error.
+ # An example would be an invalid octal number like '009'.
+ my $error;
+ ( $i, $error ) =
+ inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
+ if ($error) { warning("Possibly invalid number\n") }
+
+ return ( $i, $type, $number );
+}
+
+sub inverse_pretoken_map {
+
+ # Starting with the current pre_token index $i, scan forward until
+ # finding the index of the next pre_token whose position is $pos.
+ my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
+ my $error = 0;
+
+ while ( ++$i <= $max_token_index ) {
+
+ if ( $pos <= $$rtoken_map[$i] ) {
+
+ # Let the calling routine handle errors in which we do not
+ # land on a pre-token boundary. It can happen by running
+ # perltidy on some non-perl scripts, for example.
+ if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
+ $i--;
+ last;
+ }
+ }
+ return ( $i, $error );
+}
+
+sub find_here_doc {
+
+ # find the target of a here document, if any
+ # input parameters:
+ # $i - token index of the second < of <<
+ # ($i must be less than the last token index if this is called)
+ # output parameters:
+ # $found_target = 0 didn't find target; =1 found target
+ # HERE_TARGET - the target string (may be empty string)
+ # $i - unchanged if not here doc,
+ # or index of the last token of the here target
+ # $saw_error - flag noting unbalanced quote on here target
+ my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
+ my $ibeg = $i;
+ my $found_target = 0;
+ my $here_doc_target = '';
+ my $here_quote_character = '';
+ my $saw_error = 0;
+ my ( $next_nonblank_token, $i_next_nonblank, $next_token );
+ $next_token = $$rtokens[ $i + 1 ];
+
+ # perl allows a backslash before the target string (heredoc.t)
+ my $backslash = 0;
+ if ( $next_token eq '\\' ) {
+ $backslash = 1;
+ $next_token = $$rtokens[ $i + 2 ];
+ }
+
+ ( $next_nonblank_token, $i_next_nonblank ) =
+ find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
+
+ if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
+
+ my $in_quote = 1;
+ my $quote_depth = 0;
+ my $quote_pos = 0;
+ my $quoted_string;
+
+ (
+ $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
+ $quoted_string
+ )
+ = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
+ $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
+
+ if ($in_quote) { # didn't find end of quote, so no target found
+ $i = $ibeg;
+ if ( $expecting == TERM ) {
+ warning(
+"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
+ );
+ $saw_error = 1;
+ }
+ }
+ else { # found ending quote
+ my $j;
+ $found_target = 1;
+
+ my $tokj;
+ for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
+ $tokj = $$rtokens[$j];
+
+ # we have to remove any backslash before the quote character
+ # so that the here-doc-target exactly matches this string
+ next
+ if ( $tokj eq "\\"
+ && $j < $i - 1
+ && $$rtokens[ $j + 1 ] eq $here_quote_character );
+ $here_doc_target .= $tokj;
+ }
+ }
+ }
+
+ elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
+ $found_target = 1;
+ write_logfile_entry(
+ "found blank here-target after <<; suggest using \"\"\n");
+ $i = $ibeg;
}
+ elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
+
+ my $here_doc_expected;
+ if ( $expecting == UNKNOWN ) {
+ $here_doc_expected = guess_if_here_doc($next_token);
+ }
+ else {
+ $here_doc_expected = 1;
+ }
+
+ if ($here_doc_expected) {
+ $found_target = 1;
+ $here_doc_target = $next_token;
+ $i = $ibeg + 1;
+ }
- # once we enter the actual identifier, it may not extend beyond
- # the end of the current line
- if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
- $id_scan_state = '';
}
- if ( $i < 0 ) { $i = 0 }
+ else {
- unless ($type) {
+ if ( $expecting == TERM ) {
+ $found_target = 1;
+ write_logfile_entry("Note: bare here-doc operator <<\n");
+ }
+ else {
+ $i = $ibeg;
+ }
+ }
- if ($saw_type) {
+ # patch to neglect any prepended backslash
+ if ( $found_target && $backslash ) { $i++ }
- if ($saw_alpha) {
- if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
- $type = 'w';
- }
- else { $type = 'i' }
- }
- elsif ( $identifier eq '->' ) {
- $type = '->';
- }
- elsif (
- ( length($identifier) > 1 )
+ return ( $found_target, $here_doc_target, $here_quote_character, $i,
+ $saw_error );
+}
- # In something like '@$=' we have an identifier '@$'
- # In something like '$${' we have type '$$' (and only
- # part of an identifier)
- && !( $identifier =~ /\$$/ && $tok eq '{' )
- && ( $identifier !~ /^(sub |package )$/ )
- )
- {
- $type = 'i';
- }
- else { $type = 't' }
- }
- elsif ($saw_alpha) {
+sub do_quote {
- # type 'w' includes anything without leading type info
- # ($,%,@,*) including something like abc::def::ghi
- $type = 'w';
+ # follow (or continue following) quoted string(s)
+ # $in_quote return code:
+ # 0 - ok, found end
+ # 1 - still must find end of quote whose target is $quote_character
+ # 2 - still looking for end of first of two quotes
+ #
+ # Returns updated strings:
+ # $quoted_string_1 = quoted string seen while in_quote=1
+ # $quoted_string_2 = quoted string seen while in_quote=2
+ my (
+ $i, $in_quote, $quote_character,
+ $quote_pos, $quote_depth, $quoted_string_1,
+ $quoted_string_2, $rtokens, $rtoken_map,
+ $max_token_index
+ ) = @_;
+
+ my $in_quote_starting = $in_quote;
+
+ my $quoted_string;
+ if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
+ my $ibeg = $i;
+ (
+ $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+ $quoted_string
+ )
+ = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
+ $quote_pos, $quote_depth, $max_token_index );
+ $quoted_string_2 .= $quoted_string;
+ if ( $in_quote == 1 ) {
+ if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
+ $quote_character = '';
}
else {
- $type = '';
- } # this can happen on a restart
+ $quoted_string_2 .= "\n";
+ }
}
- if ($identifier) {
- $tok = $identifier;
- if ($message) { write_logfile_entry($message) }
- }
- else {
- $tok = $tok_begin;
- $i = $i_begin;
+ if ( $in_quote == 1 ) { # one (more) quote to follow
+ my $ibeg = $i;
+ (
+ $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+ $quoted_string
+ )
+ = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
+ $quote_pos, $quote_depth, $max_token_index );
+ $quoted_string_1 .= $quoted_string;
+ if ( $in_quote == 1 ) {
+ $quoted_string_1 .= "\n";
+ }
}
-
- TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
- my ( $a, $b, $c ) = caller;
- print
-"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
- print
-"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
- };
- return ( $i, $tok, $type, $id_scan_state, $identifier );
+ return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+ $quoted_string_1, $quoted_string_2 );
}
sub follow_quoted_string {
# $beginning_tok = the starting quote character
# $quote_pos = index to check next for alphanumeric delimiter
# $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
- my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth )
+ # $quoted_string = the text of the quote (without quotation tokens)
+ my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
+ $max_token_index )
= @_;
my ( $tok, $end_tok );
- my $i = $i_beg - 1;
+ my $i = $i_beg - 1;
+ my $quoted_string = "";
TOKENIZER_DEBUG_FLAG_QUOTE && do {
- print
+ print STDOUT
"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
};
}
}
- # There are two different loops which search for the ending quote
- # character. In the rare case of an alphanumeric quote delimiter, we
- # have to look through alphanumeric tokens character-by-character, since
- # the pre-tokenization process combines multiple alphanumeric
- # characters, whereas for a non-alphanumeric delimiter, only tokens of
- # length 1 can match.
+ # There are two different loops which search for the ending quote
+ # character. In the rare case of an alphanumeric quote delimiter, we
+ # have to look through alphanumeric tokens character-by-character, since
+ # the pre-tokenization process combines multiple alphanumeric
+ # characters, whereas for a non-alphanumeric delimiter, only tokens of
+ # length 1 can match.
+
+ ###################################################################
+ # Case 1 (rare): loop for case of alphanumeric quote delimiter..
+ # "quote_pos" is the position the current word to begin searching
+ ###################################################################
+ if ( $beginning_tok =~ /\w/ ) {
+
+ # Note this because it is not recommended practice except
+ # for obfuscated perl contests
+ if ( $in_quote == 1 ) {
+ write_logfile_entry(
+ "Note: alphanumeric quote delimiter ($beginning_tok) \n");
+ }
+
+ while ( $i < $max_token_index ) {
+
+ if ( $quote_pos == 0 || ( $i < 0 ) ) {
+ $tok = $$rtokens[ ++$i ];
+
+ if ( $tok eq '\\' ) {
+
+ # retain backslash unless it hides the end token
+ $quoted_string .= $tok
+ unless $$rtokens[ $i + 1 ] eq $end_tok;
+ $quote_pos++;
+ last if ( $i >= $max_token_index );
+ $tok = $$rtokens[ ++$i ];
+ }
+ }
+ my $old_pos = $quote_pos;
+
+ unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
+ {
+
+ }
+ $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
+
+ if ( $quote_pos > 0 ) {
+
+ $quoted_string .=
+ substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
+
+ $quote_depth--;
+
+ if ( $quote_depth == 0 ) {
+ $in_quote--;
+ last;
+ }
+ }
+ else {
+ $quoted_string .= substr( $tok, $old_pos );
+ }
+ }
+ }
+
+ ########################################################################
+ # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
+ ########################################################################
+ else {
+
+ while ( $i < $max_token_index ) {
+ $tok = $$rtokens[ ++$i ];
+
+ if ( $tok eq $end_tok ) {
+ $quote_depth--;
+
+ if ( $quote_depth == 0 ) {
+ $in_quote--;
+ last;
+ }
+ }
+ elsif ( $tok eq $beginning_tok ) {
+ $quote_depth++;
+ }
+ elsif ( $tok eq '\\' ) {
+
+ # retain backslash unless it hides the beginning or end token
+ $tok = $$rtokens[ ++$i ];
+ $quoted_string .= '\\'
+ unless ( $tok eq $end_tok || $tok eq $beginning_tok );
+ }
+ $quoted_string .= $tok;
+ }
+ }
+ if ( $i > $max_token_index ) { $i = $max_token_index }
+ return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
+ $quoted_string );
+}
+
+sub indicate_error {
+ my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
+ interrupt_logfile();
+ warning($msg);
+ write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
+ resume_logfile();
+}
+
+sub write_error_indicator_pair {
+ my ( $line_number, $input_line, $pos, $carrat ) = @_;
+ my ( $offset, $numbered_line, $underline ) =
+ make_numbered_line( $line_number, $input_line, $pos );
+ $underline = write_on_underline( $underline, $pos - $offset, $carrat );
+ warning( $numbered_line . "\n" );
+ $underline =~ s/\s*$//;
+ warning( $underline . "\n" );
+}
+
+sub make_numbered_line {
+
+ # Given an input line, its line number, and a character position of
+ # interest, create a string not longer than 80 characters of the form
+ # $lineno: sub_string
+ # such that the sub_string of $str contains the position of interest
+ #
+ # Here is an example of what we want, in this case we add trailing
+ # '...' because the line is long.
+ #
+ # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
+ #
+ # Here is another example, this time in which we used leading '...'
+ # because of excessive length:
+ #
+ # 2: ... er of the World Wide Web Consortium's
+ #
+ # input parameters are:
+ # $lineno = line number
+ # $str = the text of the line
+ # $pos = position of interest (the error) : 0 = first character
+ #
+ # We return :
+ # - $offset = an offset which corrects the position in case we only
+ # display part of a line, such that $pos-$offset is the effective
+ # position from the start of the displayed line.
+ # - $numbered_line = the numbered line as above,
+ # - $underline = a blank 'underline' which is all spaces with the same
+ # number of characters as the numbered line.
+
+ my ( $lineno, $str, $pos ) = @_;
+ my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
+ my $excess = length($str) - $offset - 68;
+ my $numc = ( $excess > 0 ) ? 68 : undef;
+
+ if ( defined($numc) ) {
+ if ( $offset == 0 ) {
+ $str = substr( $str, $offset, $numc - 4 ) . " ...";
+ }
+ else {
+ $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
+ }
+ }
+ else {
+
+ if ( $offset == 0 ) {
+ }
+ else {
+ $str = "... " . substr( $str, $offset + 4 );
+ }
+ }
+
+ my $numbered_line = sprintf( "%d: ", $lineno );
+ $offset -= length($numbered_line);
+ $numbered_line .= $str;
+ my $underline = " " x length($numbered_line);
+ return ( $offset, $numbered_line, $underline );
+}
+
+sub write_on_underline {
- # loop for case of alphanumeric quote delimiter..
- # "quote_pos" is the position the current word to begin searching
- if ( $beginning_tok =~ /\w/ ) {
+ # The "underline" is a string that shows where an error is; it starts
+ # out as a string of blanks with the same length as the numbered line of
+ # code above it, and we have to add marking to show where an error is.
+ # In the example below, we want to write the string '--^' just below
+ # the line of bad code:
+ #
+ # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
+ # ---^
+ # We are given the current underline string, plus a position and a
+ # string to write on it.
+ #
+ # In the above example, there will be 2 calls to do this:
+ # First call: $pos=19, pos_chr=^
+ # Second call: $pos=16, pos_chr=---
+ #
+ # This is a trivial thing to do with substr, but there is some
+ # checking to do.
- # Note this because it is not recommended practice except
- # for obfuscated perl contests
- if ( $in_quote == 1 ) {
- write_logfile_entry(
- "Note: alphanumeric quote delimiter ($beginning_tok) \n");
- }
+ my ( $underline, $pos, $pos_chr ) = @_;
- while ( $i < $max_token_index ) {
+ # check for error..shouldn't happen
+ unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
+ return $underline;
+ }
+ my $excess = length($pos_chr) + $pos - length($underline);
+ if ( $excess > 0 ) {
+ $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
+ }
+ substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
+ return ($underline);
+}
- if ( $quote_pos == 0 || ( $i < 0 ) ) {
- $tok = $$rtokens[ ++$i ];
+sub pre_tokenize {
- if ( $tok eq '\\' ) {
+ # Break a string, $str, into a sequence of preliminary tokens. We
+ # are interested in these types of tokens:
+ # words (type='w'), example: 'max_tokens_wanted'
+ # digits (type = 'd'), example: '0755'
+ # whitespace (type = 'b'), example: ' '
+ # any other single character (i.e. punct; type = the character itself).
+ # We cannot do better than this yet because we might be in a quoted
+ # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
+ # tokens.
+ my ( $str, $max_tokens_wanted ) = @_;
- $quote_pos++;
- last if ( $i >= $max_token_index );
- $tok = $$rtokens[ ++$i ];
+ # we return references to these 3 arrays:
+ my @tokens = (); # array of the tokens themselves
+ my @token_map = (0); # string position of start of each token
+ my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
- }
- }
- my $old_pos = $quote_pos;
+ do {
- unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
- {
+ # whitespace
+ if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
- }
- $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
+ # numbers
+ # note that this must come before words!
+ elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
- if ( $quote_pos > 0 ) {
+ # words
+ elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
- $quote_depth--;
+ # single-character punctuation
+ elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
- if ( $quote_depth == 0 ) {
- $in_quote--;
- last;
- }
- }
+ # that's all..
+ else {
+ return ( \@tokens, \@token_map, \@type );
}
- }
- # loop for case of a non-alphanumeric quote delimiter..
- else {
+ push @tokens, $1;
+ push @token_map, pos($str);
- while ( $i < $max_token_index ) {
- $tok = $$rtokens[ ++$i ];
+ } while ( --$max_tokens_wanted != 0 );
- if ( $tok eq $end_tok ) {
- $quote_depth--;
+ return ( \@tokens, \@token_map, \@type );
+}
- if ( $quote_depth == 0 ) {
- $in_quote--;
- last;
- }
- }
- elsif ( $tok eq $beginning_tok ) {
- $quote_depth++;
- }
- elsif ( $tok eq '\\' ) {
- $i++;
- }
- }
+sub show_tokens {
+
+ # this is an old debug routine
+ my ( $rtokens, $rtoken_map ) = @_;
+ my $num = scalar(@$rtokens);
+ my $i;
+
+ for ( $i = 0 ; $i < $num ; $i++ ) {
+ my $len = length( $$rtokens[$i] );
+ print STDOUT "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
}
- if ( $i > $max_token_index ) { $i = $max_token_index }
- return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth );
}
sub matching_end_token {
}
}
+sub dump_token_types {
+ my $class = shift;
+ my $fh = shift;
+
+ # This should be the latest list of token types in use
+ # adding NEW_TOKENS: add a comment here
+ print $fh <<'END_OF_LIST';
+
+Here is a list of the token types currently used for lines of type 'CODE'.
+For the following tokens, the "type" of a token is just the token itself.
+
+.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
+( ) <= >= == =~ !~ != ++ -- /= x=
+... **= <<= >>= &&= ||= //= <=>
+, + - / * | % ! x ~ = \ ? : . < > ^ &
+
+The following additional token types are defined:
+
+ type meaning
+ b blank (white space)
+ { indent: opening structural curly brace or square bracket or paren
+ (code block, anonymous hash reference, or anonymous array reference)
+ } outdent: right structural curly brace or square bracket or paren
+ [ left non-structural square bracket (enclosing an array index)
+ ] right non-structural square bracket
+ ( left non-structural paren (all but a list right of an =)
+ ) right non-structural paren
+ L left non-structural curly brace (enclosing a key)
+ R right non-structural curly brace
+ ; terminal semicolon
+ f indicates a semicolon in a "for" statement
+ h here_doc operator <<
+ # a comment
+ Q indicates a quote or pattern
+ q indicates a qw quote block
+ k a perl keyword
+ C user-defined constant or constant function (with void prototype = ())
+ U user-defined function taking parameters
+ G user-defined function taking block parameter (like grep/map/eval)
+ M (unused, but reserved for subroutine definition name)
+ P (unused, but -html uses it to label pod text)
+ t type indicater such as %,$,@,*,&,sub
+ w bare word (perhaps a subroutine call)
+ i identifier of some type (with leading %, $, @, *, &, sub, -> )
+ n a number
+ v a v-string
+ F a file test operator (like -e)
+ Y File handle
+ Z identifier in indirect object slot: may be file handle, object
+ J LABEL: code block label
+ j LABEL after next, last, redo, goto
+ p unary +
+ m unary -
+ pp pre-increment operator ++
+ mm pre-decrement operator --
+ A : used as attribute separator
+
+ Here are the '_line_type' codes used internally:
+ SYSTEM - system-specific code before hash-bang line
+ CODE - line of perl code (including comments)
+ POD_START - line starting pod, such as '=head'
+ POD - pod documentation text
+ POD_END - last line of pod section, '=cut'
+ HERE - text of here-document
+ HERE_END - last line of here-doc (target word)
+ FORMAT - format section
+ FORMAT_END - last line of format section, '.'
+ DATA_START - __DATA__ line
+ DATA - unidentified text following __DATA__
+ END_START - __END__ line
+ END - unidentified text following __END__
+ ERROR - we are in big trouble, probably not a perl script
+END_OF_LIST
+}
+
BEGIN {
# These names are used in error messages
@closing_brace_names = qw# '}' ']' ')' ':' #;
my @digraphs = qw(
- .. :: << >> ** && .. || -> => += -= .= %= &= |= ^= *= <>
- <= >= == =~ !~ != ++ -- /= x=
+ .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
+ <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
);
@is_digraph{@digraphs} = (1) x scalar(@digraphs);
- my @trigraphs = qw( ... **= <<= >>= &&= ||= <=> );
+ my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.=);
@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
+ my @tetragraphs = qw( <<>> );
+ @is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs);
+
# make a hash of all valid token types for self-checking the tokenizer
# (adding NEW_TOKENS : select a new character and add to this list)
my @valid_token_types = qw#
#;
push( @valid_token_types, @digraphs );
push( @valid_token_types, @trigraphs );
- push( @valid_token_types, '#' );
- push( @valid_token_types, ',' );
+ push( @valid_token_types, @tetragraphs );
+ push( @valid_token_types, ( '#', ',', 'CORE::' ) );
@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
# a list of file test letters, as in -e (Table 3-4 of 'camel 3')
# these functions have prototypes of the form (&), so when they are
# followed by a block, that block MAY BE followed by an operator.
+ # Smartmatch operator ~~ may be followed by anonymous hash or array ref
@_ = qw( do eval );
@is_block_operator{@_} = (1) x scalar(@_);
# these functions allow an identifier in the indirect object slot
- @_ = qw( print printf sort exec system );
+ @_ = qw( print printf sort exec system say);
@is_indirect_object_taker{@_} = (1) x scalar(@_);
# These tokens may precede a code block
- # patched for SWITCH/CASE
- @_ = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
+ # patched for SWITCH/CASE/CATCH. Actually these could be removed
+ # now and we could let the extended-syntax coding handle them
+ @_ =
+ qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless do while until eval for foreach map grep sort
- switch case given when);
+ switch case given when catch try finally);
@is_code_block_token{@_} = (1) x scalar(@_);
# I'll build the list of keywords incrementally
LE
LT
NE
+ UNITCHECK
abs
accept
alarm
bind
binmode
bless
+ break
caller
chdir
chmod
case
given
when
+ err
+ say
+
+ catch
);
- # patched above for SWITCH/CASE
+ # patched above for SWITCH/CASE given/when err say
+ # 'err' is a fairly safe addition.
+ # TODO: 'default' still needed if appropriate
+ # 'use feature' seen, but perltidy works ok without it.
+ # Concerned that 'default' could break code.
push( @Keywords, @value_requestor );
# These are treated the same but are not keywords:
# these token TYPES expect trailing operator but not a term
# note: ++ and -- are post-increment and decrement, 'C' = constant
- my @operator_requestor_types = qw( ++ -- C );
+ my @operator_requestor_types = qw( ++ -- C <> q );
@expecting_operator_types{@operator_requestor_types} =
(1) x scalar(@operator_requestor_types);
# note: pp and mm are pre-increment and decrement
# f=semicolon in for, F=file test operator
my @value_requestor_type = qw#
- L { ( [ ~ !~ =~ ; . .. ... A : && ! || = + - x
- **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||=
- <= >= == != => \ > < % * / ? & | ** <=>
- f F pp mm Y p m U J G
+ L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
+ **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
+ <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
+ f F pp mm Y p m U J G j >> << ^ t
+ ~. ^. |. &. ^.= |.= &.=
#;
push( @value_requestor_type, ',' )
; # (perl doesn't like a ',' in a qw block)
@expecting_term_types{@value_requestor_type} =
(1) x scalar(@value_requestor_type);
+ # Note: the following valid token types are not assigned here to
+ # hashes requesting to be followed by values or terms, but are
+ # instead currently hard-coded into sub operator_expected:
+ # ) -> :: Q R Z ] b h i k n v w } #
+
# For simple syntax checking, it is nice to have a list of operators which
# will really be unhappy if not followed by a term. This includes most
# of the above...
vec
warn
while
+ given
+ when
);
@is_keyword_taking_list{@keyword_taking_list} =
(1) x scalar(@keyword_taking_list);
# These are not used in any way yet
# my @unused_keywords = qw(
- # CORE
# __FILE__
# __LINE__
# __PACKAGE__
# );
- # The list of keywords was extracted from function 'keyword' in
+ # The list of keywords was originally extracted from function 'keyword' in
# perl file toke.c version 5.005.03, using this utility, plus a
# little editing: (file getkwd.pl):
# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
@is_keyword{@Keywords} = (1) x scalar(@Keywords);
}
1;
-__END__
-
-=head1 NAME
-
-Perl::Tidy - Parses and beautifies perl source
-
-=head1 SYNOPSIS
-
- use Perl::Tidy;
-
- Perl::Tidy::perltidy(
- source => $source,
- destination => $destination,
- stderr => $stderr,
- argv => $argv,
- perltidyrc => $perltidyrc,
- logfile => $logfile,
- errorfile => $errorfile,
- formatter => $formatter, # callback object (see below)
- );
-
-=head1 DESCRIPTION
-
-This module makes the functionality of the perltidy utility available to perl
-scripts. Any or all of the input parameters may be omitted, in which case the
-@ARGV array will be used to provide input parameters as described
-in the perltidy(1) man page.
-
-For example, the perltidy script is basically just this:
-
- use Perl::Tidy;
- Perl::Tidy::perltidy();
-
-The module accepts input and output streams by a variety of methods.
-The following list of parameters may be any of a the following: a
-filename, an ARRAY reference, a SCALAR reference, or an object with
-either a B<getline> or B<print> method, as appropriate.
-
- source - the source of the script to be formatted
- destination - the destination of the formatted output
- stderr - standard error output
- perltidyrc - the .perltidyrc file
- logfile - the .LOG file stream, if any
- errorfile - the .ERR file stream, if any
-
-The following chart illustrates the logic used to decide how to
-treat a parameter.
-
- ref($param) $param is assumed to be:
- ----------- ---------------------
- undef a filename
- SCALAR ref to string
- ARRAY ref to array
- (other) object with getline (if source) or print method
-
-If the parameter is an object, and the object has a B<close> method, that
-close method will be called at the end of the stream.
-
-=over 4
-
-=item source
-
-If the B<source> parameter is given, it defines the source of the
-input stream.
-
-=item destination
-
-If the B<destination> parameter is given, it will be used to define the
-file or memory location to receive output of perltidy.
-
-=item stderr
-
-The B<stderr> parameter allows the calling program to capture the output
-to what would otherwise go to the standard error output device.
-
-=item perltidyrc
-
-If the B<perltidyrc> file is given, it will be used instead of any
-F<.perltidyrc> configuration file that would otherwise be used.
-
-=item argv
-
-If the B<argv> parameter is given, it will be used instead of the
-B<@ARGV> array. The B<argv> parameter may be a string, a reference to a
-string, or a reference to an array. If it is a string or reference to a
-string, it will be parsed into an array of items just as if it were a
-command line string.
-
-=back
-
-=head1 EXAMPLE
-
-The following example passes perltidy a snippet as a reference
-to a string and receives the result back in a reference to
-an array.
-
- use Perl::Tidy;
-
- # some messy source code to format
- my $source = <<'EOM';
- use strict;
- my @editors=('Emacs', 'Vi '); my $rand = rand();
- print "A poll of 10 random programmers gave these results:\n";
- foreach(0..10) {
- my $i=int ($rand+rand());
- print " $editors[$i] users are from Venus" . ", " .
- "$editors[1-$i] users are from Mars" .
- "\n";
- }
- EOM
-
- # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
- my @dest;
- perltidy( source => \$source, destination => \@dest );
- foreach (@dest) {print}
-
-=head1 Using the B<formatter> Callback Object
-
-The B<formatter> parameter is an optional callback object which allows
-the calling program to receive tokenized lines directly from perltidy for
-further specialized processing. When this parameter is used, the two
-formatting options which are built into perltidy (beautification or
-html) are ignored. The following diagram illustrates the logical flow:
-
- |-- (normal route) -> code beautification
- caller->perltidy->|-- (-html flag ) -> create html
- |-- (formatter given)-> callback to write_line
-
-This can be useful for processing perl scripts in some way. The
-parameter C<$formatter> in the perltidy call,
-
- formatter => $formatter,
-
-is an object created by the caller with a C<write_line> method which
-will accept and process tokenized lines, one line per call. Here is
-a simple example of a C<write_line> which merely prints the line number,
-the line type (as determined by perltidy), and the text of the line:
-
- sub write_line {
-
- # This is called from perltidy line-by-line
- my $self = shift;
- my $line_of_tokens = shift;
- my $line_type = $line_of_tokens->{_line_type};
- my $input_line_number = $line_of_tokens->{_line_number};
- my $input_line = $line_of_tokens->{_line_text};
- print "$input_line_number:$line_type:$input_line";
- }
-
-The complete program, B<perllinetype>, is contained in the examples section of
-the source distribution. As this example shows, the callback method
-receives a parameter B<$line_of_tokens>, which is a reference to a hash
-of other useful information. This example uses these hash entries:
-
- $line_of_tokens->{_line_number} - the line number (1,2,...)
- $line_of_tokens->{_line_text} - the text of the line
- $line_of_tokens->{_line_type} - the type of the line, one of:
-
- SYSTEM - system-specific code before hash-bang line
- CODE - line of perl code (including comments)
- POD_START - line starting pod, such as '=head'
- POD - pod documentation text
- POD_END - last line of pod section, '=cut'
- HERE - text of here-document
- HERE_END - last line of here-doc (target word)
- FORMAT - format section
- FORMAT_END - last line of format section, '.'
- DATA_START - __DATA__ line
- DATA - unidentified text following __DATA__
- END_START - __END__ line
- END - unidentified text following __END__
- ERROR - we are in big trouble, probably not a perl script
-
-Most applications will be only interested in lines of type B<CODE>. For
-another example, let's write a program which checks for one of the
-so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
-can slow down processing. Here is a B<write_line>, from the example
-program B<find_naughty.pl>, which does that:
-
- sub write_line {
-
- # This is called back from perltidy line-by-line
- # We're looking for $`, $&, and $'
- my ( $self, $line_of_tokens ) = @_;
-
- # pull out some stuff we might need
- my $line_type = $line_of_tokens->{_line_type};
- my $input_line_number = $line_of_tokens->{_line_number};
- my $input_line = $line_of_tokens->{_line_text};
- my $rtoken_type = $line_of_tokens->{_rtoken_type};
- my $rtokens = $line_of_tokens->{_rtokens};
- chomp $input_line;
-
- # skip comments, pod, etc
- return if ( $line_type ne 'CODE' );
-
- # loop over tokens looking for $`, $&, and $'
- for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
-
- # we only want to examine token types 'i' (identifier)
- next unless $$rtoken_type[$j] eq 'i';
-
- # pull out the actual token text
- my $token = $$rtokens[$j];
-
- # and check it
- if ( $token =~ /^\$[\`\&\']$/ ) {
- print STDERR
- "$input_line_number: $token\n";
- }
- }
- }
-
-This example pulls out these tokenization variables from the $line_of_tokens
-hash reference:
-
- $rtoken_type = $line_of_tokens->{_rtoken_type};
- $rtokens = $line_of_tokens->{_rtokens};
-
-The variable C<$rtoken_type> is a reference to an array of token type codes,
-and C<$rtokens> is a reference to a corresponding array of token text.
-These are obviously only defined for lines of type B<CODE>.
-Perltidy classifies tokens into types, and has a brief code for each type.
-You can get a complete list at any time by running perltidy from the
-command line with
-
- perltidy --dump-token-types
-
-In the present example, we are only looking for tokens of type B<i>
-(identifiers), so the for loop skips past all other types. When an
-identifier is found, its actual text is checked to see if it is one
-being sought. If so, the above write_line prints the token and its
-line number.
-
-The B<formatter> feature is relatively new in perltidy, and further
-documentation needs to be written to complete its description. However,
-several example programs have been written and can be found in the
-B<examples> section of the source distribution. Probably the best way
-to get started is to find one of the examples which most closely matches
-your application and start modifying it.
-
-For help with perltidy's pecular way of breaking lines into tokens, you
-might run, from the command line,
-
- perltidy -D filename
-
-where F<filename> is a short script of interest. This will produce
-F<filename.DEBUG> with interleaved lines of text and their token types.
-The -D flag has been in perltidy from the beginning for this purpose.
-If you want to see the code which creates this file, it is
-C<write_debug_entry> in Tidy.pm.
-
-=head1 EXPORT
-
- &perltidy
-
-=head1 CREDITS
-
-Thanks to Hugh Myers who developed the initial modular interface
-to perltidy.
-
-=head1 VERSION
-
-This man page documents Perl::Tidy version 20031021.
-
-=head1 AUTHOR
-
- Steve Hancock
- perltidy at users.sourceforge.net
-
-=head1 SEE ALSO
-
-The perltidy(1) man page describes all of the features of perltidy. It
-can be found at http://perltidy.sourceforge.net.
-
-=cut