#
# perltidy - a perl script indenter and formatter
#
-# Copyright (c) 2000-2018 by Steve Hancock
+# Copyright (c) 2000-2019 by Steve Hancock
# Distributed under the GPL license agreement; see file COPYING
#
# This program is free software; you can redistribute it and/or modify
use strict;
use Exporter;
use Carp;
-$|++;
+use Digest::MD5 qw(md5_hex);
+use Perl::Tidy::Debugger;
+use Perl::Tidy::DevNull;
+use Perl::Tidy::Diagnostics;
+use Perl::Tidy::FileWriter;
+use Perl::Tidy::Formatter;
+use Perl::Tidy::HtmlWriter;
+use Perl::Tidy::IOScalar;
+use Perl::Tidy::IOScalarArray;
+use Perl::Tidy::IndentationItem;
+use Perl::Tidy::LineSink;
+use Perl::Tidy::LineSource;
+use Perl::Tidy::Logger;
+use Perl::Tidy::Tokenizer;
+use Perl::Tidy::VerticalAligner;
+local $| = 1;
use vars qw{
$VERSION
$missing_file_spec
$fh_stderr
$rOpts_character_encoding
+ $Warn_count
};
@ISA = qw( Exporter );
use File::Temp qw(tempfile);
BEGIN {
- ( $VERSION = q($Id: Tidy.pm,v 1.74 2018/02/20 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
+
+ # Release version is the approximate YYMMDD of the release.
+ # Development version is (Last Release).(Development Number)
+
+ # To make the number continually increasing, the Development Number is a 2
+ # digit number starting at 01 after a release is continually bumped along
+ # at significant points during development. If it ever reaches 99 then the
+ # Release version must be bumped, and it is probably past time for a
+ # release anyway.
+
+ $VERSION = '20200110';
}
sub streamhandle {
sub find_input_line_ending {
# Peek at a file and return first line ending character.
- # Quietly return undef in case of any trouble.
+ # Return undefined value in case of any trouble.
my ($input_file) = @_;
my $ending;
my @parts = @_;
- #BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
BEGIN {
eval { require File::Spec };
$missing_file_spec = $@;
# Perl 5.004 systems may not have File::Spec so we'll make
# a simple try. We assume File::Basename is available.
- # return undef if not successful.
+ # return if not successful.
my $name = pop @parts;
my $path = join '/', @parts;
my $test_file = $path . $name;
my ( $test_name, $test_path ) = fileparse($test_file);
return $test_file if ( $test_name eq $name );
- return if ( $^O eq 'VMS' );
+ return if ( $^O eq 'VMS' );
# this should work at least for Windows and Unix:
$test_file = $path . '/' . $name;
$fh_stderr = *STDERR;
}
- sub Warn { my $msg = shift; $fh_stderr->print($msg); return }
+ sub Warn { my $msg = shift; $fh_stderr->print($msg); $Warn_count++; return }
sub Exit {
my $flag = shift;
if ($flag) { goto ERROR_EXIT }
else { goto NORMAL_EXIT }
+ croak "unexpectd return to Exit";
+ }
+
+ sub Die {
+ my $msg = shift;
+ Warn($msg);
+ Exit(1);
+ croak "unexpected return to Die";
}
- sub Die { my $msg = shift; Warn($msg); Exit(1); }
+ my $md5_hex = sub {
+ my ($buf) = @_;
+
+ # Evaluate the MD5 sum for a string
+ # Patch for [rt.cpan.org #88020]
+ # Use utf8::encode since md5_hex() only operates on bytes.
+ # my $digest = md5_hex( utf8::encode($sink_buffer) );
+
+ # Note added 20180114: the above patch did not work correctly. I'm not
+ # sure why. But switching to the method recommended in the Perl 5
+ # documentation for Encode worked. According to this we can either use
+ # $octets = encode_utf8($string) or equivalently
+ # $octets = encode("utf8",$string)
+ # and then calculate the checksum. So:
+ my $octets = Encode::encode( "utf8", $buf );
+ my $digest = md5_hex($octets);
+ return $digest;
+ };
# extract various dump parameters
my $dump_options_type = $input_hash{'dump_options_type'};
else {
my ( $rargv, $msg ) = parse_args($argv);
if ($msg) {
- Die <<EOM;
+ Die(<<EOM);
Error parsing this string passed to to perltidy with 'argv':
$msg
EOM
$rpending_complaint, $dump_options_type,
);
- my $saw_extrude = ( grep m/^-extrude$/, @{$rraw_options} ) ? 1 : 0;
+ my $saw_extrude = ( grep { m/^-extrude$/ } @{$rraw_options} ) ? 1 : 0;
my $saw_pbp =
- ( grep m/^-(pbp|perl-best-practices)$/, @{$rraw_options} ) ? 1 : 0;
+ ( grep { m/^-(pbp|perl-best-practices)$/ } @{$rraw_options} ) ? 1 : 0;
#---------------------------------------------------------------
# Handle requests to dump information
%{$dump_options} = %{$rOpts};
}
- Exit 0 if ($quit_now);
+ Exit(0) if ($quit_now);
# make printable string of options for this run as possible diagnostic
my $readable_options = readable_options( $rOpts, $roption_string );
# dump from command line
if ( $rOpts->{'dump-options'} ) {
print STDOUT $readable_options;
- Exit 0;
+ Exit(0);
}
#---------------------------------------------------------------
my $formats = join ' ',
sort map { "'" . $_ . "'" } keys %default_file_extension;
my $fmt = $rOpts->{'format'};
- Die "-format='$fmt' but must be one of: $formats\n";
+ Die("-format='$fmt' but must be one of: $formats\n");
}
my $output_extension = make_extension( $rOpts->{'output-file-extension'},
# At present only one forward slash is allowed. In the future multiple
# slashes may be allowed to allow for other options
if ( $delete_backup > 1 ) {
- Die "-bext=$bext contains more than one '/'\n";
+ Die("-bext=$bext contains more than one '/'\n");
}
my $backup_extension =
}
Perl::Tidy::Formatter::check_options($rOpts);
+ Perl::Tidy::Tokenizer::check_options($rOpts);
if ( $rOpts->{'format'} eq 'html' ) {
Perl::Tidy::HtmlWriter->check_options($rOpts);
}
# no filenames should be given if input is from an array
if ($source_stream) {
if ( @ARGV > 0 ) {
- Die
-"You may not specify any filenames when a source array is given\n";
+ Die(
+"You may not specify any filenames when a source array is given\n"
+ );
}
# we'll stuff the source array into ARGV
while ( my $input_file = shift @ARGV ) {
my $fileroot;
- my $input_file_permissions;
+ my @input_file_stat;
+ my $display_name;
#---------------------------------------------------------------
# prepare this input stream
#---------------------------------------------------------------
if ($source_stream) {
- $fileroot = "perltidy";
+ $fileroot = "perltidy";
+ $display_name = "<source_stream>";
# If the source is from an array or string, then .LOG output
# is only possible if a logfile stream is specified. This prevents
}
}
elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
- $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
+ $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
+ $display_name = "<stdin>";
$in_place_modify = 0;
}
else {
- $fileroot = $input_file;
+ $fileroot = $input_file;
+ $display_name = $input_file;
unless ( -e $input_file ) {
# file doesn't exist - check for a file glob
}
}
}
- Warn "skipping file: '$input_file': no matches found\n";
+ Warn("skipping file: '$input_file': no matches found\n");
next;
}
unless ( -f $input_file ) {
- Warn "skipping file: $input_file: not a regular file\n";
+ Warn("skipping file: $input_file: not a regular file\n");
next;
}
# the old .tdy or .bak files might still exist so we
# shouldn't overwrite them with zero length files.
unless ( -s $input_file ) {
- Warn "skipping file: $input_file: Zero size\n";
+ Warn("skipping file: $input_file: Zero size\n");
next;
}
unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
- Warn
- "skipping file: $input_file: Non-text (override with -f)\n";
+ Warn(
+ "skipping file: $input_file: Non-text (override with -f)\n"
+ );
next;
}
# we should have a valid filename now
- $fileroot = $input_file;
- $input_file_permissions = ( stat $input_file )[2] & oct(7777);
+ $fileroot = $input_file;
+ @input_file_stat = stat($input_file);
if ( $^O eq 'VMS' ) {
( $fileroot, $dot ) = check_vms_filename($fileroot);
my $new_path = $rOpts->{'output-path'};
unless ( -d $new_path ) {
unless ( mkdir $new_path, 0777 ) {
- Die "unable to create directory $new_path: $!\n";
+ Die("unable to create directory $new_path: $!\n");
}
}
my $path = $new_path;
$fileroot = catfile( $path, $base );
unless ($fileroot) {
- Die <<EOM;
+ Die(<<EOM);
------------------------------------------------------------------------
Problem combining $new_path and $base to make a filename; check -opath
------------------------------------------------------------------------
|| $input_file eq 'DIAGNOSTICS' )
)
{
- Warn "skipping file: $input_file: wrong extension\n";
+ Warn("skipping file: $input_file: wrong extension\n");
next;
}
$rpending_logfile_message );
next unless ($source_object);
+ my $max_iterations = $rOpts->{'iterations'};
+ my $do_convergence_test = $max_iterations > 1;
+ my $convergence_log_message;
+ my %saw_md5;
+ my $digest_input = 0;
+
# Prefilters and postfilters: The prefilter is a code reference
# that will be applied to the source before tidying, and the
# postfilter is a code reference to the result before outputting.
$prefilter
|| ( $rOpts_character_encoding
&& $rOpts_character_encoding eq 'utf8' )
+ || $rOpts->{'assert-tidy'}
+ || $rOpts->{'assert-untidy'}
+ || $do_convergence_test
)
{
my $buf = '';
$buf .= $line;
}
- $buf = $prefilter->($buf) if $prefilter;
-
if ( $rOpts_character_encoding
&& $rOpts_character_encoding eq 'utf8'
&& !utf8::is_utf8($buf) )
Encode::FB_CROAK | Encode::LEAVE_SRC );
};
if ($@) {
- Warn
-"skipping file: $input_file: Unable to decode source as UTF-8\n";
+ Warn(
+"skipping file: $input_file: Unable to decode source as UTF-8\n"
+ );
next;
}
}
+ # MD5 sum of input file is evaluated before any prefilter
+ if ( $rOpts->{'assert-tidy'} || $rOpts->{'assert-untidy'} ) {
+ $digest_input = $md5_hex->($buf);
+ }
+
+ $buf = $prefilter->($buf) if $prefilter;
+
+ # starting MD5 sum for convergence test is evaluated after any prefilter
+ if ($do_convergence_test) {
+ my $digest = $md5_hex->($buf);
+ $saw_md5{$digest} = 1;
+ }
+
$source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
$rpending_logfile_message );
}
if ( $rOpts->{'standard-output'} ) {
my $msg = "You may not use -o and -st together";
$msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
- Die "$msg\n";
+ Die("$msg\n");
}
elsif ($destination_stream) {
- Die
-"You may not specify a destination array and -o together\n";
+ Die(
+"You may not specify a destination array and -o together\n"
+ );
}
elsif ( defined( $rOpts->{'output-path'} ) ) {
- Die "You may not specify -o and -opath together\n";
+ Die("You may not specify -o and -opath together\n");
}
elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
- Die "You may not specify -o and -oext together\n";
+ Die("You may not specify -o and -oext together\n");
}
$output_file = $rOpts->{outfile};
# make sure user gives a file name after -o
if ( $output_file =~ /^-/ ) {
- Die "You must specify a valid filename after -o\n";
+ Die("You must specify a valid filename after -o\n");
}
# do not overwrite input file with -o
- if ( defined($input_file_permissions)
- && ( $output_file eq $input_file ) )
- {
- Die "Use 'perltidy -b $input_file' to modify in-place\n";
+ if ( @input_file_stat && ( $output_file eq $input_file ) ) {
+ Die("Use 'perltidy -b $input_file' to modify in-place\n");
}
}
else {
- Die "You may not use -o with more than one input file\n";
+ Die("You may not use -o with more than one input file\n");
}
}
elsif ( $rOpts->{'standard-output'} ) {
my $msg =
"You may not specify a destination array and -st together\n";
$msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
- Die "$msg\n";
+ Die("$msg\n");
}
$output_file = '-';
if ( $number_of_files <= 1 ) {
}
else {
- Die "You may not use -st with more than one input file\n";
+ Die("You may not use -st with more than one input file\n");
}
}
elsif ($destination_stream) {
else {
if ($in_place_modify) {
$output_file = IO::File->new_tmpfile()
- or Die "cannot open temp file for -b option: $!\n";
+ or Die("cannot open temp file for -b option: $!\n");
}
else {
$actual_output_extension = $output_extension;
$line_separator = "\n" unless defined($line_separator);
my ( $sink_object, $postfilter_buffer );
- if ($postfilter) {
+ if ( $postfilter
+ || $rOpts->{'assert-tidy'}
+ || $rOpts->{'assert-untidy'} )
+ {
$sink_object =
Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
$line_separator, $rOpts, $rpending_logfile_message, $binmode );
my $logger_object =
Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
- $fh_stderr, $saw_extrude );
+ $fh_stderr, $saw_extrude, $display_name );
write_logfile_header(
$rOpts, $logger_object, $config_file,
$rraw_options, $Windows_type, $readable_options,
# loop over iterations for one source stream
#---------------------------------------------------------------
- # We will do a convergence test if 3 or more iterations are allowed.
- # It would be pointless for fewer because we have to make at least
- # two passes before we can see if we are converged, and the test
- # would just slow things down.
- my $max_iterations = $rOpts->{'iterations'};
- my $convergence_log_message;
- my %saw_md5;
- my $do_convergence_test = $max_iterations > 2;
- if ($do_convergence_test) {
- eval "use Digest::MD5 qw(md5_hex)";
- $do_convergence_test = !$@;
-
- ### Trying to avoid problems with ancient versions of perl
- ##eval { my $string = "perltidy"; utf8::encode($string) };
- ##$do_convergence_test = $do_convergence_test && !$@;
- }
-
# save objects to allow redirecting output during iterations
my $sink_object_final = $sink_object;
my $debugger_object_final = $debugger_object;
);
}
else {
- Die "I don't know how to do -format=$rOpts->{'format'}\n";
+ Die("I don't know how to do -format=$rOpts->{'format'}\n");
}
unless ($formatter) {
- Die "Unable to continue with $rOpts->{'format'} formatting\n";
+ Die("Unable to continue with $rOpts->{'format'} formatting\n");
}
#---------------------------------------------------------------
$rpending_logfile_message );
# stop iterations if errors or converged
- #my $stop_now = $logger_object->{_warning_count};
my $stop_now = $tokenizer->report_tokenization_errors();
+ $stop_now ||= $tokenizer->get_unexpected_error_count();
if ($stop_now) {
$convergence_log_message = <<EOM;
Stopping iterations because of severe errors.
}
elsif ($do_convergence_test) {
- # Patch for [rt.cpan.org #88020]
- # Use utf8::encode since md5_hex() only operates on bytes.
- # my $digest = md5_hex( utf8::encode($sink_buffer) );
-
- # Note added 20180114: this patch did not work correctly.
- # I'm not sure why. But switching to the method
- # recommended in the Perl 5 documentation for Encode
- # worked. According to this we can either use
- # $octets = encode_utf8($string) or equivalently
- # $octets = encode("utf8",$string)
- # and then calculate the checksum. So:
- my $octets = Encode::encode( "utf8", $sink_buffer );
- my $digest = md5_hex($octets);
+ my $digest = $md5_hex->($sink_buffer);
if ( !$saw_md5{$digest} ) {
$saw_md5{$digest} = $iter;
}
#---------------------------------------------------------------
# Perform any postfilter operation
#---------------------------------------------------------------
- if ($postfilter) {
+ if ( $postfilter
+ || $rOpts->{'assert-tidy'}
+ || $rOpts->{'assert-untidy'} )
+ {
$sink_object->close_output_file();
$sink_object =
Perl::Tidy::LineSink->new( $output_file, $tee_file,
$line_separator, $rOpts, $rpending_logfile_message, $binmode );
- my $buf = $postfilter->($postfilter_buffer);
+
+ my $buf =
+ $postfilter
+ ? $postfilter->($postfilter_buffer)
+ : $postfilter_buffer;
+
+ # Check if file changed if requested, but only after any postfilter
+ if ( $rOpts->{'assert-tidy'} ) {
+ my $digest_output = $md5_hex->($buf);
+ if ( $digest_output ne $digest_input ) {
+ $logger_object->warning(
+"assertion failure: '--assert-tidy' is set but output differs from input\n"
+ );
+ }
+ }
+ if ( $rOpts->{'assert-untidy'} ) {
+ my $digest_output = $md5_hex->($buf);
+ if ( $digest_output eq $digest_input ) {
+ $logger_object->warning(
+"assertion failure: '--assert-untidy' is set but output equals input\n"
+ );
+ }
+ }
+
$source_object =
Perl::Tidy::LineSource->new( \$buf, $rOpts,
$rpending_logfile_message );
# oh, oh, no real file to backup ..
# shouldn't happen because of numerous preliminary checks
- Die
-"problem with -b backing up input file '$input_file': not a file\n";
+ Die(
+"problem with -b backing up input file '$input_file': not a file\n"
+ );
}
my $backup_name = $input_file . $backup_extension;
if ( -f $backup_name ) {
unlink($backup_name)
- or Die
-"unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
+ or Die(
+"unable to remove previous '$backup_name' for -b option; check permissions: $!\n"
+ );
}
# backup the input file
# we use copy for symlinks, move for regular files
if ( -l $input_file ) {
File::Copy::copy( $input_file, $backup_name )
- or Die "File::Copy failed trying to backup source: $!";
+ or Die("File::Copy failed trying to backup source: $!");
}
else {
rename( $input_file, $backup_name )
- or Die
-"problem renaming $input_file to $backup_name for -b option: $!\n";
+ or Die(
+"problem renaming $input_file to $backup_name for -b option: $!\n"
+ );
}
$ifname = $backup_name;
# handle of an open nameless temporary file so we would lose
# everything if we closed it.
seek( $output_file, 0, 0 )
- or Die "unable to rewind a temporary file for -b option: $!\n";
+ or Die("unable to rewind a temporary file for -b option: $!\n");
my $fout = IO::File->new("> $input_file")
- or Die
-"problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
+ or Die(
+"problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n"
+ );
if ($binmode) {
if ( $rOpts->{'character-encoding'}
&& $rOpts->{'character-encoding'} eq 'utf8' )
{
- binmode $fout, ":encoding(UTF-8)";
+ binmode $fout, ":raw:encoding(UTF-8)";
}
else { binmode $fout }
}
# set output file permissions
if ( $output_file && -f $output_file && !-l $output_file ) {
- if ($input_file_permissions) {
+ if (@input_file_stat) {
- # give output script same permissions as input script, but
- # make it user-writable or else we can't run perltidy again.
- # Thus we retain whatever executable flags were set.
+ # Set file ownership and permissions
if ( $rOpts->{'format'} eq 'tidy' ) {
- chmod( $input_file_permissions | oct(600), $output_file );
+ my ( $mode_i, $uid_i, $gid_i ) =
+ @input_file_stat[ 2, 4, 5 ];
+ my ( $uid_o, $gid_o ) = ( stat($output_file) )[ 4, 5 ];
+ my $input_file_permissions = $mode_i & oct(7777);
+ my $output_file_permissions = $input_file_permissions;
+
+ #rt128477: avoid inconsistent owner/group and suid/sgid
+ if ( $uid_i != $uid_o || $gid_i != $gid_o ) {
+
+ # try to change owner and group to match input file if in -b mode
+ # note: chown returns number of files successfully changed
+ if ( $in_place_modify
+ && chown( $uid_i, $gid_i, $output_file ) )
+ {
+ # owner/group successfully changed
+ }
+ else {
+
+ # owner or group differ: do not copy suid and sgid
+ $output_file_permissions = $mode_i & oct(777);
+ if ( $input_file_permissions !=
+ $output_file_permissions )
+ {
+ Warn(
+"Unable to copy setuid and/or setgid bits for output file '$output_file'\n"
+ );
+ }
+ }
+ }
+
+ # Make the output file for rw unless we are in -b mode.
+ # Explanation: perltidy does not unlink existing output
+ # files before writing to them, for safety. If a
+ # designated output file exists and is not writable,
+ # perltidy will halt. This can prevent a data loss if a
+ # user accidentally enters "perltidy infile -o
+ # important_ro_file", or "perltidy infile -st
+ # >important_ro_file". But it also means that perltidy can
+ # get locked out of rerunning unless it marks its own
+ # output files writable. The alternative, of always
+ # unlinking the designated output file, is less safe and
+ # not always possible, except in -b mode, where there is an
+ # assumption that a previous backup can be unlinked even if
+ # not writable.
+ if ( !$in_place_modify ) {
+ $output_file_permissions |= oct(600);
+ }
+
+ if ( !chmod( $output_file_permissions, $output_file ) ) {
+
+ # couldn't change file permissions
+ my $operm = sprintf "%04o", $output_file_permissions;
+ Warn(
+"Unable to set permissions for output file '$output_file' to $operm\n"
+ );
+ }
}
# else use default permissions for html and any other format
}
else {
unlink($ifname)
- or Die
-"unable to remove previous '$ifname' for -b option; check permissions: $!\n";
+ or Die(
+"unable to remove previous '$ifname' for -b option; check permissions: $!\n"
+ );
}
}
if $logger_object;
} # end of main loop to process all files
+ # Fix for RT #130297: return a true value if anything was written to the
+ # standard error output, even non-fatal warning messages, otherwise return
+ # false.
+
+ # These exit codes are returned:
+ # 0 = perltidy ran to completion with no errors
+ # 1 = perltidy could not run to completion due to errors
+ # 2 = perltidy ran to completion with error messages
+
+ # Note that if perltidy is run with multiple files, any single file with
+ # errors or warnings will write a line like
+ # '## Please see file testing.t.ERR'
+ # to standard output for each file with errors, so the flag will be true,
+ # even only some of the multiple files may have had errors.
+
NORMAL_EXIT:
- return 0;
+ my $ret = $Warn_count ? 2 : 0;
+ return $ret;
ERROR_EXIT:
return 1;
if ($short_name) {
if ( $expansion{$short_name} ) {
my $existing_name = $expansion{$short_name}[0];
- Die
-"redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
+ Die(
+"redefining abbreviation $short_name for $long_name; already used for $existing_name\n"
+ );
}
$expansion{$short_name} = [$long_name];
if ( $flag eq '!' ) {
my $nolong_name = 'no' . $long_name;
if ( $expansion{$nshort_name} ) {
my $existing_name = $expansion{$nshort_name}[0];
- Die
-"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
+ Die(
+"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"
+ );
}
$expansion{$nshort_name} = [$nolong_name];
}
$add_option->( 'tabs', 't', '!' );
$add_option->( 'default-tabsize', 'dt', '=i' );
$add_option->( 'extended-syntax', 'xs', '!' );
+ $add_option->( 'assert-tidy', 'ast', '!' );
+ $add_option->( 'assert-untidy', 'asu', '!' );
+ $add_option->( 'sub-alias-list', 'sal', '=s' );
########################################
$category = 2; # Code indentation control
$add_option->( 'trim-pod', 'trp', '!' );
$add_option->( 'want-left-space', 'wls', '=s' );
$add_option->( 'want-right-space', 'wrs', '=s' );
+ $add_option->( 'space-prototype-paren', 'spp', '=i' );
########################################
$category = 4; # Comment controls
$add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
$add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
$add_option->( 'cuddled-else', 'ce', '!' );
- $add_option->( 'cuddled-blocks', 'cb', '!' );
$add_option->( 'cuddled-block-list', 'cbl', '=s' );
+ $add_option->( 'cuddled-block-list-exclusive', 'cblx', '!' );
$add_option->( 'cuddled-break-option', 'cbo', '=i' );
$add_option->( 'delete-old-newlines', 'dnl', '!' );
$add_option->( 'opening-brace-always-on-right', 'bar', '!' );
$add_option->( 'break-after-all-operators', 'baao', '!' );
$add_option->( 'break-before-all-operators', 'bbao', '!' );
$add_option->( 'keep-interior-semicolons', 'kis', '!' );
+ $add_option->( 'one-line-block-semicolons', 'olbs', '=i' );
+ $add_option->( 'one-line-block-nesting', 'olbn', '=i' );
########################################
$category = 6; # Controlling list formatting
########################################
$add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
$add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
+ $add_option->( 'break-at-old-method-breakpoints', 'bom', '!' );
$add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
$add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
$add_option->( 'ignore-old-breakpoints', 'iob', '!' );
$add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
$add_option->( 'keep-old-blank-lines', 'kbl', '=i' );
+ $add_option->( 'keyword-group-blanks-list', 'kgbl', '=s' );
+ $add_option->( 'keyword-group-blanks-size', 'kgbs', '=s' );
+ $add_option->( 'keyword-group-blanks-repeat-count', 'kgbr', '=i' );
+ $add_option->( 'keyword-group-blanks-before', 'kgbb', '=i' );
+ $add_option->( 'keyword-group-blanks-after', 'kgba', '=i' );
+ $add_option->( 'keyword-group-blanks-inside', 'kgbi', '!' );
+ $add_option->( 'keyword-group-blanks-delete', 'kgbd', '!' );
+
$add_option->( 'blank-lines-after-opening-block', 'blao', '=i' );
$add_option->( 'blank-lines-before-closing-block', 'blbc', '=i' );
$add_option->( 'blank-lines-after-opening-block-list', 'blaol', '=s' );
$add_option->( 'help', 'h', '' );
$add_option->( 'short-concatenation-item-length', 'scl', '=i' );
$add_option->( 'show-options', 'opt', '!' );
+ $add_option->( 'timestamp', 'ts', '!' );
$add_option->( 'version', 'v', '' );
$add_option->( 'memoize', 'mem', '!' );
$add_option->( 'file-size-order', 'fso', '!' );
'closing-side-comment-else-flag' => [ 0, 2 ],
'comma-arrow-breakpoints' => [ 0, 5 ],
+
+ 'keyword-group-blanks-before' => [ 0, 2 ],
+ 'keyword-group-blanks-after' => [ 0, 2 ],
+
+ 'space-prototype-paren' => [ 0, 2 ],
);
# Note: we could actually allow negative ci if someone really wants it:
blanks-before-comments
blank-lines-before-subs=1
blank-lines-before-packages=1
+
+ keyword-group-blanks-size=5
+ keyword-group-blanks-repeat-count=0
+ keyword-group-blanks-before=1
+ keyword-group-blanks-after=1
+ nokeyword-group-blanks-inside
+ nokeyword-group-blanks-delete
+
block-brace-tightness=0
block-brace-vertical-tightness=0
brace-tightness=1
minimum-space-to-comment=4
nobrace-left-and-indent
nocuddled-else
- nocuddled-blocks
nodelete-old-whitespace
nohtml
nologfile
notabs
nowarning-output
character-encoding=none
+ one-line-block-semicolons=1
+ one-line-block-nesting=0
outdent-labels
outdent-long-quotes
outdent-long-comments
short-concatenation-item-length=8
space-for-semicolon
space-backslash-quote=1
+ space-prototype-paren=1
square-bracket-tightness=1
square-bracket-vertical-tightness-closing=0
square-bracket-vertical-tightness=0
static-block-comments
+ timestamp
trim-qw
format=tidy
backup-file-extension=bak
'nhtml' => [qw(format=tidy)],
'tidy' => [qw(format=tidy)],
+ # -cb is now a synonym for -ce
+ 'cb' => [qw(cuddled-else)],
+ 'cuddled-blocks' => [qw(cuddled-else)],
+
'utf8' => [qw(character-encoding=utf8)],
'UTF8' => [qw(character-encoding=utf8)],
'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
'nbbs' => [qw(blbs=0 blbp=0)],
+ 'keyword-group-blanks' => [qw(kgbb=2 kgbi kgba=2)],
+ 'kgb' => [qw(kgbb=2 kgbi kgba=2)],
+ 'nokeyword-group-blanks' => [qw(kgbb=1 nkgbi kgba=1)],
+ 'nkgb' => [qw(kgbb=1 nkgbi kgba=1)],
+
'break-at-old-trinary-breakpoints' => [qw(bot)],
'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
sub process_command_line {
+ my @q = @_;
my (
$perltidyrc_stream, $is_Windows, $Windows_type,
$rpending_complaint, $dump_options_type
- ) = @_;
+ ) = @q;
my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type;
if ($use_cache) {
return @retvals;
}
else {
- my @retvals = _process_command_line(@_);
+ my @retvals = _process_command_line(@q);
$process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ]
if $retvals[0]->{'memoize'};
return @retvals;
}
}
else {
- return _process_command_line(@_);
+ return _process_command_line(@q);
}
}
#---------------------------------------------------------------
my %Opts = ();
{
- local @ARGV;
+ local @ARGV = ();
# do not load the defaults if we are just dumping perltidyrc
unless ( $dump_options_type eq 'perltidyrc' ) {
for my $i ( @{$rdefaults} ) { push @ARGV, "--" . $i }
}
if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
- Die
-"Programming Bug reported by 'GetOptions': error in setting default options";
+ Die(
+"Programming Bug reported by 'GetOptions': error in setting default options"
+ );
}
}
}
elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
if ($config_file) {
- Warn
-"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
+ Warn(
+"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"
+ );
}
$config_file = $2;
}
}
unless ( -e $config_file ) {
- Warn "cannot find file given with -pro=$config_file: $!\n";
+ Warn("cannot find file given with -pro=$config_file: $!\n");
$config_file = "";
}
}
elsif ( $i =~ /^-(pro|profile)=?$/ ) {
- Die "usage: -pro=filename or --profile=filename, no spaces\n";
+ Die("usage: -pro=filename or --profile=filename, no spaces\n");
}
elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
usage();
- Exit 0;
+ Exit(0);
}
elsif ( $i =~ /^-(version|v)$/ ) {
show_version();
- Exit 0;
+ Exit(0);
}
elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
dump_defaults( @{$rdefaults} );
- Exit 0;
+ Exit(0);
}
elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
dump_long_names( @{$roption_string} );
- Exit 0;
+ Exit(0);
}
elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
dump_short_names($rexpansion);
- Exit 0;
+ Exit(0);
}
elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
- Exit 0;
+ Exit(0);
}
}
if ( $saw_dump_profile && $saw_ignore_profile ) {
- Warn "No profile to dump because of -npro\n";
- Exit 1;
+ Warn("No profile to dump because of -npro\n");
+ Exit(1);
}
#---------------------------------------------------------------
# line.
if ($perltidyrc_stream) {
if ($config_file) {
- Warn <<EOM;
+ Warn(<<EOM);
Conflict: a perltidyrc configuration file was specified both as this
perltidy call parameter: $perltidyrc_stream
and with this -profile=$config_file.
if ($saw_dump_profile) {
dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
- Exit 0;
+ Exit(0);
}
if ($fh_config) {
my ( $rconfig_list, $death_message ) =
read_config_file( $fh_config, $config_file, $rexpansion );
- Die $death_message if ($death_message);
+ Die($death_message) if ($death_message);
# process any .perltidyrc parameters right now so we can
# localize errors
$config_file );
if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
- Die
-"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
+ Die(
+"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n"
+ );
}
# Anything left in this local @ARGV is an error and must be
last;
}
}
- Die <<EOM;
+ Die(<<EOM);
There are $count unrecognized values in the configuration file '$config_file':
$str
Use leading dashes for parameters. Use -npro to ignore this file.
if ( defined( $Opts{$_} ) ) {
delete $Opts{$_};
- Warn "ignoring --$_ in config file: $config_file\n";
+ Warn("ignoring --$_ in config file: $config_file\n");
}
}
}
#---------------------------------------------------------------
expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
- local $SIG{'__WARN__'} = sub { Warn $_[0] };
+ local $SIG{'__WARN__'} = sub { Warn( $_[0] ) };
if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
- Die "Error on command line; for help try 'perltidy -h'\n";
+ Die("Error on command line; for help try 'perltidy -h'\n");
}
# reset Getopt::Long configuration back to its previous value
$rOpts->{'check-syntax'} = 0;
}
+ ###########################################################################
# Added Dec 2017: Deactivating check-syntax for all systems for safety
# because unexpected results can occur when code in BEGIN blocks is
# executed. This flag was included to help check for perltidy mistakes,
# and may still be useful for debugging. To activate for testing comment
- # out the next three lines.
+ # out the next three lines. Also fix sub 'do_check_syntax' in this file.
+ ###########################################################################
else {
$rOpts->{'check-syntax'} = 0;
}
if ( $rOpts->{$key} ) {
if ( $rOpts->{$key} < 0 ) {
$rOpts->{$key} = 0;
- Warn "negative value of $abbrev, setting 0\n";
+ Warn("negative value of $abbrev, setting 0\n");
}
if ( $rOpts->{$key} > 100 ) {
- Warn "unreasonably large value of $abbrev, reducing\n";
+ Warn("unreasonably large value of $abbrev, reducing\n");
$rOpts->{$key} = 100;
}
}
if ( $rOpts->{'opening-brace-always-on-right'}
&& $rOpts->{'opening-brace-on-new-line'} )
{
- Warn <<EOM;
+ Warn(<<EOM);
Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
'opening-brace-on-new-line' (-bl). Ignoring -bl.
EOM
if ( $rOpts->{'entab-leading-whitespace'} ) {
if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
- Warn "-et=n must use a positive integer; ignoring -et\n";
+ Warn("-et=n must use a positive integer; ignoring -et\n");
$rOpts->{'entab-leading-whitespace'} = undef;
}
# use tabs
if ( $rOpts->{'default-tabsize'} ) {
if ( $rOpts->{'default-tabsize'} < 0 ) {
- Warn "negative value of -dt, setting 0\n";
+ Warn("negative value of -dt, setting 0\n");
$rOpts->{'default-tabsize'} = 0;
}
if ( $rOpts->{'default-tabsize'} > 20 ) {
- Warn "unreasonably large value of -dt, reducing\n";
+ Warn("unreasonably large value of -dt, reducing\n");
$rOpts->{'default-tabsize'} = 20;
}
}
$rOpts->{'default-tabsize'} = 8;
}
+ # Check and clean up any sub-alias-list
+ if ( $rOpts->{'sub-alias-list'} ) {
+ my $sub_alias_string = $rOpts->{'sub-alias-list'};
+ $sub_alias_string =~ s/,/ /g; # allow commas
+ $sub_alias_string =~ s/^\s+//;
+ $sub_alias_string =~ s/\s+$//;
+ my @sub_alias_list = split /\s+/, $sub_alias_string;
+ my @filtered_word_list = ('sub');
+ my %seen;
+
+ # include 'sub' for later convenience
+ $seen{sub}++;
+ foreach my $word (@sub_alias_list) {
+ if ($word) {
+ if ( $word !~ /^\w[\w\d]*$/ ) {
+ Warn("unexpected sub alias '$word' - ignoring\n");
+ }
+ if ( !$seen{$word} ) {
+ $seen{$word}++;
+ push @filtered_word_list, $word;
+ }
+ }
+ }
+ my $joined_words = join ' ', @filtered_word_list;
+ $rOpts->{'sub-alias-list'} = join ' ', @filtered_word_list;
+ }
+
# Define $tabsize, the number of spaces per tab for use in
# guessing the indentation of source lines with leading tabs.
# Assume same as for this run if tabs are used , otherwise assume
sub find_file_upwards {
my ( $search_dir, $search_file ) = @_;
- $search_dir =~ s{/+$}{};
+ $search_dir =~ s{/+$}{};
$search_file =~ s{^/+}{};
while (1) {
$search_dir = dirname($search_dir);
}
}
+
+ # This return is for Perl-Critic.
+ # We shouldn't get out of the while loop without a return
+ return;
}
sub expand_command_abbreviations {
# make sure we are not in an infinite loop
if ( $pass_count == $max_passes ) {
local $" = ')(';
- Warn <<EOM;
+ Warn(<<EOM);
I'm tired. We seem to be in an infinite loop trying to expand aliases.
Here are the raw options;
(rraw_options)
EOM
my $num = @new_argv;
if ( $num < 50 ) {
- Warn <<EOM;
+ Warn(<<EOM);
After $max_passes passes here is ARGV
(@new_argv)
EOM
}
else {
- Warn <<EOM;
+ Warn(<<EOM);
After $max_passes passes ARGV has $num entries
EOM
}
if ($config_file) {
- Die <<"DIE";
+ Die(<<"DIE");
Please check your configuration file $config_file for circular-references.
To deactivate it, use -npro.
DIE
}
else {
- Die <<'DIE';
+ Die(<<'DIE');
Program bug - circular-references in the %expansion hash, probably due to
a recent program change.
DIE
# are welcome.
unless ( defined $os ) {
$os = "";
- ${$rpending_complaint} .= <<EOS;
+
+ # Deactivated this message 20180322 because it was needlessly
+ # causing some test scripts to fail. Need help from someone
+ # with expertise in Windows to decide what is possible with windows.
+ ${$rpending_complaint} .= <<EOS if (0);
Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
We won't be able to look for a system-wide config file.
EOS
# Directory, and All Users Directory. All Users will be empty on a
# 9x/Me box. Contributed by: Yves Orton.
- # my ( $rpending_complaint, $os ) = @_;
- # if ( !$os ) { $os = Win_OS_Type(); }
+ # Original coding:
+ # my $rpending_complaint = shift;
+ # my $os = (@_) ? shift : Win_OS_Type();
+
+ my ( $rpending_complaint, $os ) = @_;
+ if ( !$os ) { $os = Win_OS_Type(); }
- my $rpending_complaint = shift;
- my $os = (@_) ? shift : Win_OS_Type();
return unless $os;
my $system = "";
print STDOUT <<"EOM";
This is perltidy, v$VERSION
-Copyright 2000-2018, Steve Hancock
+Copyright 2000-2019, Steve Hancock
Perltidy is free software and may be copied under the terms of the GNU
General Public License, which is included in the distribution files.
Basic Options:
-i=n use n columns per indentation level (default n=4)
- -t tabs: use one tab character per indentation level, not recommeded
+ -t tabs: use one tab character per indentation level, not recommended
-nt no tabs: use n spaces per indentation level (default)
-et=n entab leading whitespace n spaces per tab; not recommended
-io "indent only": just do indentation, no other formatting.
-kis keep interior semicolons. Allows multiple statements per line.
-boc break at old comma breaks: turns off all automatic list formatting
-bol break at old logical breakpoints: or, and, ||, && (default)
+ -bom break at old method call breakpoints: ->
-bok break at old list keyword breakpoints such as map, sort (default)
-bot break at old conditional (ternary ?:) operator breakpoints (default)
-boa break at old attribute breakpoints
$logger_object->report_definite_bug();
# the perl version number will be helpful for diagnosing the problem
- $logger_object->write_logfile_entry(
- qx/perl -v $error_redirection/ . "\n" );
+ $logger_object->write_logfile_entry( $^V . "\n" );
}
}
else {
}
sub do_syntax_check {
- my ( $stream, $flags, $error_redirection ) = @_;
-
- ############################################################
- # This code is not reachable because syntax check is deactivated,
- # but it is retained for reference.
- ############################################################
-
- # We need a named input file for executing perl
- my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
-
- # TODO: Need to add name of file to log somewhere
- # otherwise Perl output is hard to read
- if ( !$stream_filename ) { return $stream_filename, "" }
-
- # We have to quote the filename in case it has unusual characters
- # or spaces. Example: this filename #CM11.pm# gives trouble.
- my $quoted_stream_filename = '"' . $stream_filename . '"';
-
- # Under VMS something like -T will become -t (and an error) so we
- # will put quotes around the flags. Double quotes seem to work on
- # Unix/Windows/VMS, but this may not work on all systems. (Single
- # quotes do not work under Windows). It could become necessary to
- # put double quotes around each flag, such as: -"c" -"T"
- # We may eventually need some system-dependent coding here.
- $flags = '"' . $flags . '"';
-
- # now wish for luck...
- my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
-
- if ($is_tmpfile) {
- unlink $stream_filename
- or Perl::Tidy::Die("couldn't unlink stream $stream_filename: $!\n");
- }
- return $stream_filename, $msg;
-}
-
-#####################################################################
-#
-# This is a stripped down version of IO::Scalar
-# Given a reference to a scalar, it supplies either:
-# a getline method which reads lines (mode='r'), or
-# a print method which reads lines (mode='w')
-#
-#####################################################################
-package Perl::Tidy::IOScalar;
-use Carp;
-
-sub new {
- my ( $package, $rscalar, $mode ) = @_;
- my $ref = ref $rscalar;
- if ( $ref ne 'SCALAR' ) {
- confess <<EOM;
-------------------------------------------------------------------------
-expecting ref to SCALAR but got ref to ($ref); trace follows:
-------------------------------------------------------------------------
-EOM
-
- }
- if ( $mode eq 'w' ) {
- ${$rscalar} = "";
- return bless [ $rscalar, $mode ], $package;
- }
- elsif ( $mode eq 'r' ) {
-
- # Convert a scalar to an array.
- # This avoids looking for "\n" on each call to getline
- #
- # NOTES: The -1 count is needed to avoid loss of trailing blank lines
- # (which might be important in a DATA section).
- my @array;
- if ( $rscalar && ${$rscalar} ) {
- @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
-
- # remove possible extra blank line introduced with split
- if ( @array && $array[-1] eq "\n" ) { pop @array }
- }
- my $i_next = 0;
- return bless [ \@array, $mode, $i_next ], $package;
- }
- else {
- confess <<EOM;
-------------------------------------------------------------------------
-expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
-------------------------------------------------------------------------
-EOM
- }
-}
-
-sub getline {
- my $self = shift;
- my $mode = $self->[1];
- if ( $mode ne 'r' ) {
- confess <<EOM;
-------------------------------------------------------------------------
-getline call requires mode = 'r' but mode = ($mode); trace follows:
-------------------------------------------------------------------------
-EOM
- }
- my $i = $self->[2]++;
- return $self->[0]->[$i];
-}
-
-sub print {
- my ( $self, $msg ) = @_;
- my $mode = $self->[1];
- if ( $mode ne 'w' ) {
- confess <<EOM;
-------------------------------------------------------------------------
-print call requires mode = 'w' but mode = ($mode); trace follows:
-------------------------------------------------------------------------
-EOM
- }
- ${ $self->[0] } .= $msg;
-}
-sub close { return }
-
-#####################################################################
-#
-# This is a stripped down version of IO::ScalarArray
-# Given a reference to an array, it supplies either:
-# a getline method which reads lines (mode='r'), or
-# a print method which reads lines (mode='w')
-#
-# NOTE: this routine assumes that there aren't any embedded
-# newlines within any of the array elements. There are no checks
-# for that.
-#
-#####################################################################
-package Perl::Tidy::IOScalarArray;
-use Carp;
-
-sub new {
- my ( $package, $rarray, $mode ) = @_;
- my $ref = ref $rarray;
- if ( $ref ne 'ARRAY' ) {
- confess <<EOM;
-------------------------------------------------------------------------
-expecting ref to ARRAY but got ref to ($ref); trace follows:
-------------------------------------------------------------------------
-EOM
-
- }
- if ( $mode eq 'w' ) {
- @{$rarray} = ();
- return bless [ $rarray, $mode ], $package;
- }
- elsif ( $mode eq 'r' ) {
- my $i_next = 0;
- return bless [ $rarray, $mode, $i_next ], $package;
- }
- else {
- confess <<EOM;
-------------------------------------------------------------------------
-expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
-------------------------------------------------------------------------
-EOM
- }
-}
-
-sub getline {
- my $self = shift;
- my $mode = $self->[1];
- if ( $mode ne 'r' ) {
- confess <<EOM;
-------------------------------------------------------------------------
-getline requires mode = 'r' but mode = ($mode); trace follows:
-------------------------------------------------------------------------
-EOM
- }
- my $i = $self->[2]++;
- return $self->[0]->[$i];
-}
-
-sub print {
- my ( $self, $msg ) = @_;
- my $mode = $self->[1];
- if ( $mode ne 'w' ) {
- confess <<EOM;
-------------------------------------------------------------------------
-print requires mode = 'w' but mode = ($mode); trace follows:
-------------------------------------------------------------------------
-EOM
- }
- push @{ $self->[0] }, $msg;
-}
-sub close { return }
-
-#####################################################################
-#
-# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
-# which returns the next line to be parsed
-#
-#####################################################################
-
-package Perl::Tidy::LineSource;
-
-sub new {
-
- my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
-
- my $input_line_ending;
- if ( $rOpts->{'preserve-line-endings'} ) {
- $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
- }
-
- ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
- return unless $fh;
-
- # in order to check output syntax when standard output is used,
- # or when it is an object, we have to make a copy of the file
- if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
- {
-
- # Turning off syntax check when input output is used.
- # The reason is that temporary files cause problems on
- # on many systems.
- $rOpts->{'check-syntax'} = 0;
-
- ${$rpending_logfile_message} .= <<EOM;
-Note: --syntax check will be skipped because standard input is used
-EOM
-
- }
-
- return bless {
- _fh => $fh,
- _filename => $input_file,
- _input_line_ending => $input_line_ending,
- _rinput_buffer => [],
- _started => 0,
- }, $class;
-}
-
-sub close_input_file {
- my $self = shift;
-
- # Only close physical files, not STDIN and other objects
- my $filename = $self->{_filename};
- if ( $filename ne '-' && !ref $filename ) {
- eval { $self->{_fh}->close() };
- }
- return;
-}
-
-sub get_line {
- my $self = shift;
- my $line = undef;
- my $fh = $self->{_fh};
- my $rinput_buffer = $self->{_rinput_buffer};
-
- if ( scalar( @{$rinput_buffer} ) ) {
- $line = shift @{$rinput_buffer};
- }
- else {
- $line = $fh->getline();
-
- # patch to read raw mac files under unix, dos
- # see if the first line has embedded \r's
- if ( $line && !$self->{_started} ) {
- if ( $line =~ /[\015][^\015\012]/ ) {
-
- # found one -- break the line up and store in a buffer
- @{$rinput_buffer} = map { $_ . "\n" } split /\015/, $line;
- my $count = @{$rinput_buffer};
- $line = shift @{$rinput_buffer};
- }
- $self->{_started}++;
- }
- }
- return $line;
-}
-
-#####################################################################
-#
-# the Perl::Tidy::LineSink class supplies a write_line method for
-# actual file writing
-#
-#####################################################################
-
-package Perl::Tidy::LineSink;
-
-sub new {
-
- my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
- $rpending_logfile_message, $binmode )
- = @_;
- my $fh = undef;
- my $fh_tee = undef;
-
- my $output_file_open = 0;
-
- if ( $rOpts->{'format'} eq 'tidy' ) {
- ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
- unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; }
- $output_file_open = 1;
- if ($binmode) {
- if ( $rOpts->{'character-encoding'}
- && $rOpts->{'character-encoding'} eq 'utf8' )
- {
- if ( ref($fh) eq 'IO::File' ) {
- $fh->binmode(":encoding(UTF-8)");
- }
- elsif ( $output_file eq '-' ) {
- binmode STDOUT, ":encoding(UTF-8)";
- }
- }
-
- # Patch for RT 122030
- elsif ( ref($fh) eq 'IO::File' ) { $fh->binmode(); }
-
- elsif ( $output_file eq '-' ) { binmode STDOUT }
- }
- }
-
- # in order to check output syntax when standard output is used,
- # or when it is an object, we have to make a copy of the file
- if ( $output_file eq '-' || ref $output_file ) {
- if ( $rOpts->{'check-syntax'} ) {
-
- # Turning off syntax check when standard output is used.
- # The reason is that temporary files cause problems on
- # on many systems.
- $rOpts->{'check-syntax'} = 0;
- ${$rpending_logfile_message} .= <<EOM;
-Note: --syntax check will be skipped because standard output is used
-EOM
-
- }
- }
-
- return bless {
- _fh => $fh,
- _fh_tee => $fh_tee,
- _output_file => $output_file,
- _output_file_open => $output_file_open,
- _tee_flag => 0,
- _tee_file => $tee_file,
- _tee_file_opened => 0,
- _line_separator => $line_separator,
- _binmode => $binmode,
- }, $class;
-}
-
-sub write_line {
-
- my ( $self, $line ) = @_;
- my $fh = $self->{_fh};
-
- my $output_file_open = $self->{_output_file_open};
- chomp $line;
- $line .= $self->{_line_separator};
- $fh->print($line) if ( $self->{_output_file_open} );
-
- if ( $self->{_tee_flag} ) {
- unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
- my $fh_tee = $self->{_fh_tee};
- print $fh_tee $line;
- }
- return;
-}
-
-sub tee_on {
- my $self = shift;
- $self->{_tee_flag} = 1;
- return;
-}
-
-sub tee_off {
- my $self = shift;
- $self->{_tee_flag} = 0;
- return;
-}
-
-sub really_open_tee_file {
- my $self = shift;
- my $tee_file = $self->{_tee_file};
- my $fh_tee;
- $fh_tee = IO::File->new(">$tee_file")
- or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n");
- binmode $fh_tee if $self->{_binmode};
- $self->{_tee_file_opened} = 1;
- $self->{_fh_tee} = $fh_tee;
- return;
-}
-
-sub close_output_file {
- my $self = shift;
-
- # Only close physical files, not STDOUT and other objects
- my $output_file = $self->{_output_file};
- if ( $output_file ne '-' && !ref $output_file ) {
- eval { $self->{_fh}->close() } if $self->{_output_file_open};
- }
- $self->close_tee_file();
+ # This should not be called; the syntax check is deactivated
+ Die("Unexpected call for syntax check-shouldn't happen\n");
return;
}
-sub close_tee_file {
- my $self = shift;
-
- # Only close physical files, not STDOUT and other objects
- if ( $self->{_tee_file_opened} ) {
- my $tee_file = $self->{_tee_file};
- if ( $tee_file ne '-' && !ref $tee_file ) {
- eval { $self->{_fh_tee}->close() };
- $self->{_tee_file_opened} = 0;
- }
- }
- return;
-}
-
-#####################################################################
-#
-# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
-# useful for program development.
-#
-# Only one such file is created regardless of the number of input
-# files processed. This allows the results of processing many files
-# to be summarized in a single file.
-
-# Output messages go to a file named DIAGNOSTICS, where
-# they are labeled by file and line. This allows many files to be
-# scanned at once for some particular condition of interest. It was
-# particularly useful for developing guessing strategies.
-#
-# NOTE: This feature is deactivated in final releases but can be
-# reactivated for debugging by un-commenting the 'I' options flag
-#
-#####################################################################
-
-package Perl::Tidy::Diagnostics;
-
-sub new {
-
- my $class = shift;
- return bless {
- _write_diagnostics_count => 0,
- _last_diagnostic_file => "",
- _input_file => "",
- _fh => undef,
- }, $class;
-}
+1;
-sub set_input_file {
- my ( $self, $input_file ) = @_;
- $self->{_input_file} = $input_file;
- return;
-}
-
-sub write_diagnostics {
- my ( $self, $msg ) = @_;
-
- unless ( $self->{_write_diagnostics_count} ) {
- open( $self->{_fh}, ">", "DIAGNOSTICS" )
- or Perl::Tidy::Die("couldn't open DIAGNOSTICS: $!\n");
- }
-
- my $fh = $self->{_fh};
- my $last_diagnostic_file = $self->{_last_diagnostic_file};
- my $input_file = $self->{_input_file};
- if ( $last_diagnostic_file ne $input_file ) {
- $fh->print("\nFILE:$input_file\n");
- }
- $self->{_last_diagnostic_file} = $input_file;
- my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
- $fh->print("$input_line_number:\t$msg");
- $self->{_write_diagnostics_count}++;
- return;
-}
-
-#####################################################################
-#
-# The Perl::Tidy::Logger class writes the .LOG and .ERR files
-#
-#####################################################################
-
-package Perl::Tidy::Logger;
-
-sub new {
-
- my ( $class, $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude ) =
- @_;
-
- my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef;
-
- # remove any old error output file if we might write a new one
- unless ( $fh_warnings || ref($warning_file) ) {
- if ( -e $warning_file ) {
- unlink($warning_file)
- or Perl::Tidy::Die(
- "couldn't unlink warning file $warning_file: $!\n");
- }
- }
-
- my $logfile_gap =
- defined( $rOpts->{'logfile-gap'} )
- ? $rOpts->{'logfile-gap'}
- : 50;
- if ( $logfile_gap == 0 ) { $logfile_gap = 1 }
-
- return bless {
- _log_file => $log_file,
- _logfile_gap => $logfile_gap,
- _rOpts => $rOpts,
- _fh_warnings => $fh_warnings,
- _last_input_line_written => 0,
- _at_end_of_file => 0,
- _use_prefix => 1,
- _block_log_output => 0,
- _line_of_tokens => undef,
- _output_line_number => undef,
- _wrote_line_information_string => 0,
- _wrote_column_headings => 0,
- _warning_file => $warning_file,
- _warning_count => 0,
- _complaint_count => 0,
- _saw_code_bug => -1, # -1=no 0=maybe 1=for sure
- _saw_brace_error => 0,
- _saw_extrude => $saw_extrude,
- _output_array => [],
- }, $class;
-}
-
-sub get_warning_count {
- my $self = shift;
- return $self->{_warning_count};
-}
-
-sub get_use_prefix {
- my $self = shift;
- return $self->{_use_prefix};
-}
-
-sub block_log_output {
- my $self = shift;
- $self->{_block_log_output} = 1;
- return;
-}
-
-sub unblock_log_output {
- my $self = shift;
- $self->{_block_log_output} = 0;
- return;
-}
-
-sub interrupt_logfile {
- my $self = shift;
- $self->{_use_prefix} = 0;
- $self->warning("\n");
- $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
- return;
-}
-
-sub resume_logfile {
- my $self = shift;
- $self->write_logfile_entry( '#' x 60 . "\n" );
- $self->{_use_prefix} = 1;
- return;
-}
-
-sub we_are_at_the_last_line {
- my $self = shift;
- unless ( $self->{_wrote_line_information_string} ) {
- $self->write_logfile_entry("Last line\n\n");
- }
- $self->{_at_end_of_file} = 1;
- return;
-}
-
-# record some stuff in case we go down in flames
-sub black_box {
- my ( $self, $line_of_tokens, $output_line_number ) = @_;
- my $input_line = $line_of_tokens->{_line_text};
- my $input_line_number = $line_of_tokens->{_line_number};
-
- # save line information in case we have to write a logfile message
- $self->{_line_of_tokens} = $line_of_tokens;
- $self->{_output_line_number} = $output_line_number;
- $self->{_wrote_line_information_string} = 0;
-
- my $last_input_line_written = $self->{_last_input_line_written};
- my $rOpts = $self->{_rOpts};
- if (
- (
- ( $input_line_number - $last_input_line_written ) >=
- $self->{_logfile_gap}
- )
- || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
- )
- {
- my $structural_indentation_level = $line_of_tokens->{_level_0};
- $structural_indentation_level = 0
- if ( $structural_indentation_level < 0 );
- $self->{_last_input_line_written} = $input_line_number;
- ( my $out_str = $input_line ) =~ s/^\s*//;
- chomp $out_str;
-
- $out_str = ( '.' x $structural_indentation_level ) . $out_str;
-
- if ( length($out_str) > 35 ) {
- $out_str = substr( $out_str, 0, 35 ) . " ....";
- }
- $self->logfile_output( "", "$out_str\n" );
- }
- return;
-}
-
-sub write_logfile_entry {
-
- my ( $self, @msg ) = @_;
-
- # add leading >>> to avoid confusing error messages and code
- $self->logfile_output( ">>>", "@msg" );
- return;
-}
-
-sub write_column_headings {
- my $self = shift;
-
- $self->{_wrote_column_headings} = 1;
- my $routput_array = $self->{_output_array};
- push @{$routput_array}, <<EOM;
-The nesting depths in the table below are at the start of the lines.
-The indicated output line numbers are not always exact.
-ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
-
-in:out indent c b nesting code + messages; (messages begin with >>>)
-lines levels i k (code begins with one '.' per indent level)
------- ----- - - -------- -------------------------------------------
-EOM
- return;
-}
-
-sub make_line_information_string {
-
- # make columns of information when a logfile message needs to go out
- my $self = shift;
- my $line_of_tokens = $self->{_line_of_tokens};
- my $input_line_number = $line_of_tokens->{_line_number};
- my $line_information_string = "";
- if ($input_line_number) {
-
- my $output_line_number = $self->{_output_line_number};
- my $brace_depth = $line_of_tokens->{_curly_brace_depth};
- my $paren_depth = $line_of_tokens->{_paren_depth};
- my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
- my $guessed_indentation_level =
- $line_of_tokens->{_guessed_indentation_level};
- ##my $rtoken_array = $line_of_tokens->{_rtoken_array};
-
- my $structural_indentation_level = $line_of_tokens->{_level_0};
-
- $self->write_column_headings() unless $self->{_wrote_column_headings};
-
- # keep logfile columns aligned for scripts up to 999 lines;
- # for longer scripts it doesn't really matter
- my $extra_space = "";
- $extra_space .=
- ( $input_line_number < 10 ) ? " "
- : ( $input_line_number < 100 ) ? " "
- : "";
- $extra_space .=
- ( $output_line_number < 10 ) ? " "
- : ( $output_line_number < 100 ) ? " "
- : "";
-
- # there are 2 possible nesting strings:
- # the original which looks like this: (0 [1 {2
- # the new one, which looks like this: {{[
- # the new one is easier to read, and shows the order, but
- # could be arbitrarily long, so we use it unless it is too long
- my $nesting_string =
- "($paren_depth [$square_bracket_depth {$brace_depth";
- my $nesting_string_new = $line_of_tokens->{_nesting_tokens_0};
- my $ci_level = $line_of_tokens->{_ci_level_0};
- if ( $ci_level > 9 ) { $ci_level = '*' }
- my $bk = ( $line_of_tokens->{_nesting_blocks_0} =~ /1$/ ) ? '1' : '0';
-
- if ( length($nesting_string_new) <= 8 ) {
- $nesting_string =
- $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
- }
- $line_information_string =
-"L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
- }
- return $line_information_string;
-}
-
-sub logfile_output {
- my ( $self, $prompt, $msg ) = @_;
- return if ( $self->{_block_log_output} );
-
- my $routput_array = $self->{_output_array};
- if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
- push @{$routput_array}, "$msg";
- }
- else {
- my $line_information_string = $self->make_line_information_string();
- $self->{_wrote_line_information_string} = 1;
-
- if ($line_information_string) {
- push @{$routput_array}, "$line_information_string $prompt$msg";
- }
- else {
- push @{$routput_array}, "$msg";
- }
- }
- return;
-}
-
-sub get_saw_brace_error {
- my $self = shift;
- return $self->{_saw_brace_error};
-}
-
-sub increment_brace_error {
- my $self = shift;
- $self->{_saw_brace_error}++;
- return;
-}
-
-sub brace_warning {
- my ( $self, $msg ) = @_;
-
- #use constant BRACE_WARNING_LIMIT => 10;
- my $BRACE_WARNING_LIMIT = 10;
- my $saw_brace_error = $self->{_saw_brace_error};
-
- if ( $saw_brace_error < $BRACE_WARNING_LIMIT ) {
- $self->warning($msg);
- }
- $saw_brace_error++;
- $self->{_saw_brace_error} = $saw_brace_error;
-
- if ( $saw_brace_error == $BRACE_WARNING_LIMIT ) {
- $self->warning("No further warnings of this type will be given\n");
- }
- return;
-}
-
-sub complain {
-
- # handle non-critical warning messages based on input flag
- my ( $self, $msg ) = @_;
- my $rOpts = $self->{_rOpts};
-
- # these appear in .ERR output only if -w flag is used
- if ( $rOpts->{'warning-output'} ) {
- $self->warning($msg);
- }
-
- # otherwise, they go to the .LOG file
- else {
- $self->{_complaint_count}++;
- $self->write_logfile_entry($msg);
- }
- return;
-}
-
-sub warning {
-
- # report errors to .ERR file (or stdout)
- my ( $self, $msg ) = @_;
-
- #use constant WARNING_LIMIT => 50;
- my $WARNING_LIMIT = 50;
-
- my $rOpts = $self->{_rOpts};
- unless ( $rOpts->{'quiet'} ) {
-
- my $warning_count = $self->{_warning_count};
- my $fh_warnings = $self->{_fh_warnings};
- if ( !$fh_warnings ) {
- my $warning_file = $self->{_warning_file};
- ( $fh_warnings, my $filename ) =
- Perl::Tidy::streamhandle( $warning_file, 'w' );
- $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n");
- Perl::Tidy::Warn "## Please see file $filename\n"
- unless ref($warning_file);
- $self->{_fh_warnings} = $fh_warnings;
- $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n");
- }
-
- if ( $warning_count < $WARNING_LIMIT ) {
- if ( $self->get_use_prefix() > 0 ) {
- my $input_line_number =
- Perl::Tidy::Tokenizer::get_input_line_number();
- if ( !defined($input_line_number) ) { $input_line_number = -1 }
- $fh_warnings->print("$input_line_number:\t$msg");
- $self->write_logfile_entry("WARNING: $msg");
- }
- else {
- $fh_warnings->print($msg);
- $self->write_logfile_entry($msg);
- }
- }
- $warning_count++;
- $self->{_warning_count} = $warning_count;
-
- if ( $warning_count == $WARNING_LIMIT ) {
- $fh_warnings->print("No further warnings will be given\n");
- }
- }
- return;
-}
-
-# programming bug codes:
-# -1 = no bug
-# 0 = maybe, not sure.
-# 1 = definitely
-sub report_possible_bug {
- my $self = shift;
- my $saw_code_bug = $self->{_saw_code_bug};
- $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
- return;
-}
-
-sub report_definite_bug {
- my $self = shift;
- $self->{_saw_code_bug} = 1;
- return;
-}
-
-sub ask_user_for_bug_report {
-
- my ( $self, $infile_syntax_ok, $formatter ) = @_;
- my $saw_code_bug = $self->{_saw_code_bug};
- if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
- $self->warning(<<EOM);
-
-You may have encountered a code bug in perltidy. If you think so, and
-the problem is not listed in the BUGS file at
-http://perltidy.sourceforge.net, please report it so that it can be
-corrected. Include the smallest possible script which has the problem,
-along with the .LOG file. See the manual pages for contact information.
-Thank you!
-EOM
-
- }
- elsif ( $saw_code_bug == 1 ) {
- if ( $self->{_saw_extrude} ) {
- $self->warning(<<EOM);
-
-You may have encountered a bug in perltidy. However, since you are using the
--extrude option, the problem may be with perl or one of its modules, which have
-occasional problems with this type of file. If you believe that the
-problem is with perltidy, and the problem is not listed in the BUGS file at
-http://perltidy.sourceforge.net, please report it so that it can be corrected.
-Include the smallest possible script which has the problem, along with the .LOG
-file. See the manual pages for contact information.
-Thank you!
-EOM
- }
- else {
- $self->warning(<<EOM);
-
-Oops, you seem to have encountered a bug in perltidy. Please check the
-BUGS file at http://perltidy.sourceforge.net. If the problem is not
-listed there, please report it so that it can be corrected. Include the
-smallest possible script which produces this message, along with the
-.LOG file if appropriate. See the manual pages for contact information.
-Your efforts are appreciated.
-Thank you!
-EOM
- my $added_semicolon_count = 0;
- eval {
- $added_semicolon_count =
- $formatter->get_added_semicolon_count();
- };
- if ( $added_semicolon_count > 0 ) {
- $self->warning(<<EOM);
-
-The log file shows that perltidy added $added_semicolon_count semicolons.
-Please rerun with -nasc to see if that is the cause of the syntax error. Even
-if that is the problem, please report it so that it can be fixed.
-EOM
-
- }
- }
- }
- return;
-}
-
-sub finish {
-
- # called after all formatting to summarize errors
- my ( $self, $infile_syntax_ok, $formatter ) = @_;
-
- my $rOpts = $self->{_rOpts};
- my $warning_count = $self->{_warning_count};
- my $saw_code_bug = $self->{_saw_code_bug};
-
- my $save_logfile =
- ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
- || $saw_code_bug == 1
- || $rOpts->{'logfile'};
- my $log_file = $self->{_log_file};
- if ($warning_count) {
- if ($save_logfile) {
- $self->block_log_output(); # avoid echoing this to the logfile
- $self->warning(
- "The logfile $log_file may contain useful information\n");
- $self->unblock_log_output();
- }
-
- if ( $self->{_complaint_count} > 0 ) {
- $self->warning(
-"To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
- );
- }
-
- if ( $self->{_saw_brace_error}
- && ( $self->{_logfile_gap} > 1 || !$save_logfile ) )
- {
- $self->warning("To save a full .LOG file rerun with -g\n");
- }
- }
- $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
-
- if ($save_logfile) {
- my $log_file = $self->{_log_file};
- my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
- if ($fh) {
- my $routput_array = $self->{_output_array};
- foreach ( @{$routput_array} ) { $fh->print($_) }
- if ( $log_file ne '-' && !ref $log_file ) {
- eval { $fh->close() };
- }
- }
- }
- return;
-}
-
-#####################################################################
-#
-# The Perl::Tidy::DevNull class supplies a dummy print method
-#
-#####################################################################
-
-package Perl::Tidy::DevNull;
-sub new { my $self = shift; return bless {}, $self }
-sub print { return }
-sub close { return }
-
-#####################################################################
-#
-# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
-#
-#####################################################################
-
-package Perl::Tidy::HtmlWriter;
-
-use File::Basename;
-
-# class variables
-use vars qw{
- %html_color
- %html_bold
- %html_italic
- %token_short_names
- %short_to_long_names
- $rOpts
- $css_filename
- $css_linkname
- $missing_html_entities
-};
-
-# replace unsafe characters with HTML entity representation if HTML::Entities
-# is available
-{ eval "use HTML::Entities"; $missing_html_entities = $@; }
-
-sub new {
-
- my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
- $html_src_extension )
- = @_;
-
- my $html_file_opened = 0;
- my $html_fh;
- ( $html_fh, my $html_filename ) =
- Perl::Tidy::streamhandle( $html_file, 'w' );
- unless ($html_fh) {
- Perl::Tidy::Warn("can't open $html_file: $!\n");
- return;
- }
- $html_file_opened = 1;
-
- if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
- $input_file = "NONAME";
- }
-
- # write the table of contents to a string
- my $toc_string;
- my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
-
- my $html_pre_fh;
- my @pre_string_stack;
- if ( $rOpts->{'html-pre-only'} ) {
-
- # pre section goes directly to the output stream
- $html_pre_fh = $html_fh;
- $html_pre_fh->print( <<"PRE_END");
-<pre>
-PRE_END
- }
- else {
-
- # pre section go out to a temporary string
- my $pre_string;
- $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
- push @pre_string_stack, \$pre_string;
- }
-
- # pod text gets diverted if the 'pod2html' is used
- my $html_pod_fh;
- my $pod_string;
- if ( $rOpts->{'pod2html'} ) {
- if ( $rOpts->{'html-pre-only'} ) {
- undef $rOpts->{'pod2html'};
- }
- else {
- eval "use Pod::Html";
- if ($@) {
- Perl::Tidy::Warn
-"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
- undef $rOpts->{'pod2html'};
- }
- else {
- $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
- }
- }
- }
-
- my $toc_filename;
- my $src_filename;
- if ( $rOpts->{'frames'} ) {
- unless ($extension) {
- Perl::Tidy::Warn
-"cannot use frames without a specified output extension; ignoring -frm\n";
- undef $rOpts->{'frames'};
- }
- else {
- $toc_filename = $input_file . $html_toc_extension . $extension;
- $src_filename = $input_file . $html_src_extension . $extension;
- }
- }
-
- # ----------------------------------------------------------
- # Output is now directed as follows:
- # html_toc_fh <-- table of contents items
- # html_pre_fh <-- the <pre> section of formatted code, except:
- # html_pod_fh <-- pod goes here with the pod2html option
- # ----------------------------------------------------------
-
- my $title = $rOpts->{'title'};
- unless ($title) {
- ( $title, my $path ) = fileparse($input_file);
- }
- my $toc_item_count = 0;
- my $in_toc_package = "";
- my $last_level = 0;
- return bless {
- _input_file => $input_file, # name of input file
- _title => $title, # title, unescaped
- _html_file => $html_file, # name of .html output file
- _toc_filename => $toc_filename, # for frames option
- _src_filename => $src_filename, # for frames option
- _html_file_opened => $html_file_opened, # a flag
- _html_fh => $html_fh, # the output stream
- _html_pre_fh => $html_pre_fh, # pre section goes here
- _rpre_string_stack => \@pre_string_stack, # stack of pre sections
- _html_pod_fh => $html_pod_fh, # pod goes here if pod2html
- _rpod_string => \$pod_string, # string holding pod
- _pod_cut_count => 0, # how many =cut's?
- _html_toc_fh => $html_toc_fh, # fh for table of contents
- _rtoc_string => \$toc_string, # string holding toc
- _rtoc_item_count => \$toc_item_count, # how many toc items
- _rin_toc_package => \$in_toc_package, # package name
- _rtoc_name_count => {}, # hash to track unique names
- _rpackage_stack => [], # stack to check for package
- # name changes
- _rlast_level => \$last_level, # brace indentation level
- }, $class;
-}
-
-sub add_toc_item {
-
- # Add an item to the html table of contents.
- # This is called even if no table of contents is written,
- # because we still want to put the anchors in the <pre> text.
- # We are given an anchor name and its type; types are:
- # 'package', 'sub', '__END__', '__DATA__', 'EOF'
- # There must be an 'EOF' call at the end to wrap things up.
- my ( $self, $name, $type ) = @_;
- my $html_toc_fh = $self->{_html_toc_fh};
- my $html_pre_fh = $self->{_html_pre_fh};
- my $rtoc_name_count = $self->{_rtoc_name_count};
- my $rtoc_item_count = $self->{_rtoc_item_count};
- my $rlast_level = $self->{_rlast_level};
- my $rin_toc_package = $self->{_rin_toc_package};
- my $rpackage_stack = $self->{_rpackage_stack};
-
- # packages contain sublists of subs, so to avoid errors all package
- # items are written and finished with the following routines
- my $end_package_list = sub {
- if ( ${$rin_toc_package} ) {
- $html_toc_fh->print("</ul>\n</li>\n");
- ${$rin_toc_package} = "";
- }
- };
-
- my $start_package_list = sub {
- my ( $unique_name, $package ) = @_;
- if ( ${$rin_toc_package} ) { $end_package_list->() }
- $html_toc_fh->print(<<EOM);
-<li><a href=\"#$unique_name\">package $package</a>
-<ul>
-EOM
- ${$rin_toc_package} = $package;
- };
-
- # start the table of contents on the first item
- unless ( ${$rtoc_item_count} ) {
-
- # but just quit if we hit EOF without any other entries
- # in this case, there will be no toc
- return if ( $type eq 'EOF' );
- $html_toc_fh->print( <<"TOC_END");
-<!-- BEGIN CODE INDEX --><a name="code-index"></a>
-<ul>
-TOC_END
- }
- ${$rtoc_item_count}++;
-
- # make a unique anchor name for this location:
- # - packages get a 'package-' prefix
- # - subs use their names
- my $unique_name = $name;
- if ( $type eq 'package' ) { $unique_name = "package-$name" }
-
- # append '-1', '-2', etc if necessary to make unique; this will
- # be unique because subs and packages cannot have a '-'
- if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
- $unique_name .= "-$count";
- }
-
- # - all names get terminal '-' if pod2html is used, to avoid
- # conflicts with anchor names created by pod2html
- if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
-
- # start/stop lists of subs
- if ( $type eq 'sub' ) {
- my $package = $rpackage_stack->[ ${$rlast_level} ];
- unless ($package) { $package = 'main' }
-
- # if we're already in a package/sub list, be sure its the right
- # package or else close it
- if ( ${$rin_toc_package} && ${$rin_toc_package} ne $package ) {
- $end_package_list->();
- }
-
- # start a package/sub list if necessary
- unless ( ${$rin_toc_package} ) {
- $start_package_list->( $unique_name, $package );
- }
- }
-
- # now write an entry in the toc for this item
- if ( $type eq 'package' ) {
- $start_package_list->( $unique_name, $name );
- }
- elsif ( $type eq 'sub' ) {
- $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
- }
- else {
- $end_package_list->();
- $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
- }
-
- # write the anchor in the <pre> section
- $html_pre_fh->print("<a name=\"$unique_name\"></a>");
-
- # end the table of contents, if any, on the end of file
- if ( $type eq 'EOF' ) {
- $html_toc_fh->print( <<"TOC_END");
-</ul>
-<!-- END CODE INDEX -->
-TOC_END
- }
- return;
-}
-
-BEGIN {
-
- # This is the official list of tokens which may be identified by the
- # user. Long names are used as getopt keys. Short names are
- # convenient short abbreviations for specifying input. Short names
- # somewhat resemble token type characters, but are often different
- # because they may only be alphanumeric, to allow command line
- # input. Also, note that because of case insensitivity of html,
- # this table must be in a single case only (I've chosen to use all
- # lower case).
- # When adding NEW_TOKENS: update this hash table
- # short names => long names
- %short_to_long_names = (
- 'n' => 'numeric',
- 'p' => 'paren',
- 'q' => 'quote',
- 's' => 'structure',
- 'c' => 'comment',
- 'v' => 'v-string',
- 'cm' => 'comma',
- 'w' => 'bareword',
- 'co' => 'colon',
- 'pu' => 'punctuation',
- 'i' => 'identifier',
- 'j' => 'label',
- 'h' => 'here-doc-target',
- 'hh' => 'here-doc-text',
- 'k' => 'keyword',
- 'sc' => 'semicolon',
- 'm' => 'subroutine',
- 'pd' => 'pod-text',
- );
-
- # Now we have to map actual token types into one of the above short
- # names; any token types not mapped will get 'punctuation'
- # properties.
-
- # The values of this hash table correspond to the keys of the
- # previous hash table.
- # The keys of this hash table are token types and can be seen
- # by running with --dump-token-types (-dtt).
-
- # When adding NEW_TOKENS: update this hash table
- # $type => $short_name
- %token_short_names = (
- '#' => 'c',
- 'n' => 'n',
- 'v' => 'v',
- 'k' => 'k',
- 'F' => 'k',
- 'Q' => 'q',
- 'q' => 'q',
- 'J' => 'j',
- 'j' => 'j',
- 'h' => 'h',
- 'H' => 'hh',
- 'w' => 'w',
- ',' => 'cm',
- '=>' => 'cm',
- ';' => 'sc',
- ':' => 'co',
- 'f' => 'sc',
- '(' => 'p',
- ')' => 'p',
- 'M' => 'm',
- 'P' => 'pd',
- 'A' => 'co',
- );
-
- # These token types will all be called identifiers for now
- # FIXME: could separate user defined modules as separate type
- my @identifier = qw" i t U C Y Z G :: CORE::";
- @token_short_names{@identifier} = ('i') x scalar(@identifier);
-
- # These token types will be called 'structure'
- my @structure = qw" { } ";
- @token_short_names{@structure} = ('s') x scalar(@structure);
-
- # OLD NOTES: save for reference
- # Any of these could be added later if it would be useful.
- # For now, they will by default become punctuation
- # my @list = qw" L R [ ] ";
- # @token_long_names{@list} = ('non-structure') x scalar(@list);
- #
- # my @list = qw"
- # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
- # ";
- # @token_long_names{@list} = ('math') x scalar(@list);
- #
- # my @list = qw" & &= ~ ~= ^ ^= | |= ";
- # @token_long_names{@list} = ('bit') x scalar(@list);
- #
- # my @list = qw" == != < > <= <=> ";
- # @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
- #
- # my @list = qw" && || ! &&= ||= //= ";
- # @token_long_names{@list} = ('logical') x scalar(@list);
- #
- # my @list = qw" . .= =~ !~ x x= ";
- # @token_long_names{@list} = ('string-operators') x scalar(@list);
- #
- # # Incomplete..
- # my @list = qw" .. -> <> ... \ ? ";
- # @token_long_names{@list} = ('misc-operators') x scalar(@list);
-
-}
-
-sub make_getopt_long_names {
- my ( $class, $rgetopt_names ) = @_;
- while ( my ( $short_name, $name ) = each %short_to_long_names ) {
- push @{$rgetopt_names}, "html-color-$name=s";
- push @{$rgetopt_names}, "html-italic-$name!";
- push @{$rgetopt_names}, "html-bold-$name!";
- }
- push @{$rgetopt_names}, "html-color-background=s";
- push @{$rgetopt_names}, "html-linked-style-sheet=s";
- push @{$rgetopt_names}, "nohtml-style-sheets";
- push @{$rgetopt_names}, "html-pre-only";
- push @{$rgetopt_names}, "html-line-numbers";
- push @{$rgetopt_names}, "html-entities!";
- push @{$rgetopt_names}, "stylesheet";
- push @{$rgetopt_names}, "html-table-of-contents!";
- push @{$rgetopt_names}, "pod2html!";
- push @{$rgetopt_names}, "frames!";
- push @{$rgetopt_names}, "html-toc-extension=s";
- push @{$rgetopt_names}, "html-src-extension=s";
-
- # Pod::Html parameters:
- push @{$rgetopt_names}, "backlink=s";
- push @{$rgetopt_names}, "cachedir=s";
- push @{$rgetopt_names}, "htmlroot=s";
- push @{$rgetopt_names}, "libpods=s";
- push @{$rgetopt_names}, "podpath=s";
- push @{$rgetopt_names}, "podroot=s";
- push @{$rgetopt_names}, "title=s";
-
- # Pod::Html parameters with leading 'pod' which will be removed
- # before the call to Pod::Html
- push @{$rgetopt_names}, "podquiet!";
- push @{$rgetopt_names}, "podverbose!";
- push @{$rgetopt_names}, "podrecurse!";
- push @{$rgetopt_names}, "podflush";
- push @{$rgetopt_names}, "podheader!";
- push @{$rgetopt_names}, "podindex!";
- return;
-}
-
-sub make_abbreviated_names {
-
- # We're appending things like this to the expansion list:
- # 'hcc' => [qw(html-color-comment)],
- # 'hck' => [qw(html-color-keyword)],
- # etc
- my ( $class, $rexpansion ) = @_;
-
- # abbreviations for color/bold/italic properties
- while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
- ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
- ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
- ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
- ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
- ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
- }
-
- # abbreviations for all other html options
- ${$rexpansion}{"hcbg"} = ["html-color-background"];
- ${$rexpansion}{"pre"} = ["html-pre-only"];
- ${$rexpansion}{"toc"} = ["html-table-of-contents"];
- ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
- ${$rexpansion}{"nnn"} = ["html-line-numbers"];
- ${$rexpansion}{"hent"} = ["html-entities"];
- ${$rexpansion}{"nhent"} = ["nohtml-entities"];
- ${$rexpansion}{"css"} = ["html-linked-style-sheet"];
- ${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
- ${$rexpansion}{"ss"} = ["stylesheet"];
- ${$rexpansion}{"pod"} = ["pod2html"];
- ${$rexpansion}{"npod"} = ["nopod2html"];
- ${$rexpansion}{"frm"} = ["frames"];
- ${$rexpansion}{"nfrm"} = ["noframes"];
- ${$rexpansion}{"text"} = ["html-toc-extension"];
- ${$rexpansion}{"sext"} = ["html-src-extension"];
- return;
-}
-
-sub check_options {
-
- # This will be called once after options have been parsed
- my ( $class, $rOpts ) = @_;
-
- # X11 color names for default settings that seemed to look ok
- # (these color names are only used for programming clarity; the hex
- # numbers are actually written)
- use constant ForestGreen => "#228B22";
- use constant SaddleBrown => "#8B4513";
- use constant magenta4 => "#8B008B";
- use constant IndianRed3 => "#CD5555";
- use constant DeepSkyBlue4 => "#00688B";
- use constant MediumOrchid3 => "#B452CD";
- use constant black => "#000000";
- use constant white => "#FFFFFF";
- use constant red => "#FF0000";
-
- # set default color, bold, italic properties
- # anything not listed here will be given the default (punctuation) color --
- # these types currently not listed and get default: ws pu s sc cm co p
- # When adding NEW_TOKENS: add an entry here if you don't want defaults
-
- # set_default_properties( $short_name, default_color, bold?, italic? );
- set_default_properties( 'c', ForestGreen, 0, 0 );
- set_default_properties( 'pd', ForestGreen, 0, 1 );
- set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown
- set_default_properties( 'q', IndianRed3, 0, 0 );
- set_default_properties( 'hh', IndianRed3, 0, 1 );
- set_default_properties( 'h', IndianRed3, 1, 0 );
- set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
- set_default_properties( 'w', black, 0, 0 );
- set_default_properties( 'n', MediumOrchid3, 0, 0 );
- set_default_properties( 'v', MediumOrchid3, 0, 0 );
- set_default_properties( 'j', IndianRed3, 1, 0 );
- set_default_properties( 'm', red, 1, 0 );
-
- set_default_color( 'html-color-background', white );
- set_default_color( 'html-color-punctuation', black );
-
- # setup property lookup tables for tokens based on their short names
- # every token type has a short name, and will use these tables
- # to do the html markup
- while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
- $html_color{$short_name} = $rOpts->{"html-color-$long_name"};
- $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
- $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
- }
-
- # write style sheet to STDOUT and die if requested
- if ( defined( $rOpts->{'stylesheet'} ) ) {
- write_style_sheet_file('-');
- Perl::Tidy::Exit 0;
- }
-
- # make sure user gives a file name after -css
- if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
- $css_linkname = $rOpts->{'html-linked-style-sheet'};
- if ( $css_linkname =~ /^-/ ) {
- Perl::Tidy::Die "You must specify a valid filename after -css\n";
- }
- }
-
- # check for conflict
- if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
- $rOpts->{'nohtml-style-sheets'} = 0;
- warning("You can't specify both -css and -nss; -nss ignored\n");
- }
-
- # write a style sheet file if necessary
- if ($css_linkname) {
-
- # if the selected filename exists, don't write, because user may
- # have done some work by hand to create it; use backup name instead
- # Also, this will avoid a potential disaster in which the user
- # forgets to specify the style sheet, like this:
- # perltidy -html -css myfile1.pl myfile2.pl
- # This would cause myfile1.pl to parsed as the style sheet by GetOpts
- my $css_filename = $css_linkname;
- unless ( -e $css_filename ) {
- write_style_sheet_file($css_filename);
- }
- }
- $missing_html_entities = 1 unless $rOpts->{'html-entities'};
- return;
-}
-
-sub write_style_sheet_file {
-
- my $css_filename = shift;
- my $fh;
- unless ( $fh = IO::File->new("> $css_filename") ) {
- Perl::Tidy::Die "can't open $css_filename: $!\n";
- }
- write_style_sheet_data($fh);
- eval { $fh->close };
- return;
-}
-
-sub write_style_sheet_data {
-
- # write the style sheet data to an open file handle
- my $fh = shift;
-
- my $bg_color = $rOpts->{'html-color-background'};
- my $text_color = $rOpts->{'html-color-punctuation'};
-
- # pre-bgcolor is new, and may not be defined
- my $pre_bg_color = $rOpts->{'html-pre-color-background'};
- $pre_bg_color = $bg_color unless $pre_bg_color;
-
- $fh->print(<<"EOM");
-/* default style sheet generated by perltidy */
-body {background: $bg_color; color: $text_color}
-pre { color: $text_color;
- background: $pre_bg_color;
- font-family: courier;
- }
-
-EOM
-
- foreach my $short_name ( sort keys %short_to_long_names ) {
- my $long_name = $short_to_long_names{$short_name};
-
- my $abbrev = '.' . $short_name;
- if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment
- my $color = $html_color{$short_name};
- if ( !defined($color) ) { $color = $text_color }
- $fh->print("$abbrev \{ color: $color;");
-
- if ( $html_bold{$short_name} ) {
- $fh->print(" font-weight:bold;");
- }
-
- if ( $html_italic{$short_name} ) {
- $fh->print(" font-style:italic;");
- }
- $fh->print("} /* $long_name */\n");
- }
- return;
-}
-
-sub set_default_color {
-
- # make sure that options hash $rOpts->{$key} contains a valid color
- my ( $key, $color ) = @_;
- if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
- $rOpts->{$key} = check_RGB($color);
- return;
-}
-
-sub check_RGB {
-
- # if color is a 6 digit hex RGB value, prepend a #, otherwise
- # assume that it is a valid ascii color name
- my ($color) = @_;
- if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
- return $color;
-}
-
-sub set_default_properties {
- my ( $short_name, $color, $bold, $italic ) = @_;
-
- set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
- my $key;
- $key = "html-bold-$short_to_long_names{$short_name}";
- $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
- $key = "html-italic-$short_to_long_names{$short_name}";
- $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
- return;
-}
-
-sub pod_to_html {
-
- # Use Pod::Html to process the pod and make the page
- # then merge the perltidy code sections into it.
- # return 1 if success, 0 otherwise
- my ( $self, $pod_string, $css_string, $toc_string, $rpre_string_stack ) =
- @_;
- my $input_file = $self->{_input_file};
- my $title = $self->{_title};
- my $success_flag = 0;
-
- # don't try to use pod2html if no pod
- unless ($pod_string) {
- return $success_flag;
- }
-
- # Pod::Html requires a real temporary filename
- my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile();
- unless ($fh_tmp) {
- Perl::Tidy::Warn
- "unable to open temporary file $tmpfile; cannot use pod2html\n";
- return $success_flag;
- }
-
- #------------------------------------------------------------------
- # Warning: a temporary file is open; we have to clean up if
- # things go bad. From here on all returns should be by going to
- # RETURN so that the temporary file gets unlinked.
- #------------------------------------------------------------------
-
- # write the pod text to the temporary file
- $fh_tmp->print($pod_string);
- $fh_tmp->close();
-
- # Hand off the pod to pod2html.
- # Note that we can use the same temporary filename for input and output
- # because of the way pod2html works.
- {
-
- my @args;
- push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
-
- # Flags with string args:
- # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
- # "podpath=s", "podroot=s"
- # Note: -css=s is handled by perltidy itself
- foreach my $kw (qw(backlink cachedir htmlroot libpods podpath podroot))
- {
- if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
- }
-
- # Toggle switches; these have extra leading 'pod'
- # "header!", "index!", "recurse!", "quiet!", "verbose!"
- foreach my $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
- my $kwd = $kw; # allows us to strip 'pod'
- if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
- elsif ( defined( $rOpts->{$kw} ) ) {
- $kwd =~ s/^pod//;
- push @args, "--no$kwd";
- }
- }
-
- # "flush",
- my $kw = 'podflush';
- if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
-
- # Must clean up if pod2html dies (it can);
- # Be careful not to overwrite callers __DIE__ routine
- local $SIG{__DIE__} = sub {
- unlink $tmpfile if -e $tmpfile;
- Perl::Tidy::Die $_[0];
- };
-
- pod2html(@args);
- }
- $fh_tmp = IO::File->new( $tmpfile, 'r' );
- unless ($fh_tmp) {
-
- # this error shouldn't happen ... we just used this filename
- Perl::Tidy::Warn
- "unable to open temporary file $tmpfile; cannot use pod2html\n";
- goto RETURN;
- }
-
- my $html_fh = $self->{_html_fh};
- my @toc;
- my $in_toc;
- my $ul_level = 0;
- my $no_print;
-
- # This routine will write the html selectively and store the toc
- my $html_print = sub {
- foreach (@_) {
- $html_fh->print($_) unless ($no_print);
- if ($in_toc) { push @toc, $_ }
- }
- };
-
- # loop over lines of html output from pod2html and merge in
- # the necessary perltidy html sections
- my ( $saw_body, $saw_index, $saw_body_end );
- while ( my $line = $fh_tmp->getline() ) {
-
- if ( $line =~ /^\s*<html>\s*$/i ) {
- my $date = localtime;
- $html_print->("<!-- Generated by perltidy on $date -->\n");
- $html_print->($line);
- }
-
- # Copy the perltidy css, if any, after <body> tag
- elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
- $saw_body = 1;
- $html_print->($css_string) if $css_string;
- $html_print->($line);
-
- # add a top anchor and heading
- $html_print->("<a name=\"-top-\"></a>\n");
- $title = escape_html($title);
- $html_print->("<h1>$title</h1>\n");
- }
-
- # check for start of index, old pod2html
- # before Pod::Html VERSION 1.15_02 it is delimited by comments as:
- # <!-- INDEX BEGIN -->
- # <ul>
- # ...
- # </ul>
- # <!-- INDEX END -->
- #
- elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
- $in_toc = 'INDEX';
-
- # when frames are used, an extra table of contents in the
- # contents panel is confusing, so don't print it
- $no_print = $rOpts->{'frames'}
- || !$rOpts->{'html-table-of-contents'};
- $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
- $html_print->($line);
- }
-
- # check for start of index, new pod2html
- # After Pod::Html VERSION 1.15_02 it is delimited as:
- # <ul id="index">
- # ...
- # </ul>
- elsif ( $line =~ /^\s*<ul\s+id="index">/i ) {
- $in_toc = 'UL';
- $ul_level = 1;
-
- # when frames are used, an extra table of contents in the
- # contents panel is confusing, so don't print it
- $no_print = $rOpts->{'frames'}
- || !$rOpts->{'html-table-of-contents'};
- $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
- $html_print->($line);
- }
-
- # Check for end of index, old pod2html
- elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
- $saw_index = 1;
- $html_print->($line);
-
- # Copy the perltidy toc, if any, after the Pod::Html toc
- if ($toc_string) {
- $html_print->("<hr />\n") if $rOpts->{'frames'};
- $html_print->("<h2>Code Index:</h2>\n");
- my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
- $html_print->(@toc);
- }
- $in_toc = "";
- $no_print = 0;
- }
-
- # must track <ul> depth level for new pod2html
- elsif ( $line =~ /\s*<ul>\s*$/i && $in_toc eq 'UL' ) {
- $ul_level++;
- $html_print->($line);
- }
-
- # Check for end of index, for new pod2html
- elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) {
- $ul_level--;
- $html_print->($line);
-
- # Copy the perltidy toc, if any, after the Pod::Html toc
- if ( $ul_level <= 0 ) {
- $saw_index = 1;
- if ($toc_string) {
- $html_print->("<hr />\n") if $rOpts->{'frames'};
- $html_print->("<h2>Code Index:</h2>\n");
- my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
- $html_print->(@toc);
- }
- $in_toc = "";
- $ul_level = 0;
- $no_print = 0;
- }
- }
-
- # Copy one perltidy section after each marker
- elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
- $line = $2;
- $html_print->($1) if $1;
-
- # Intermingle code and pod sections if we saw multiple =cut's.
- if ( $self->{_pod_cut_count} > 1 ) {
- my $rpre_string = shift( @{$rpre_string_stack} );
- if ( ${$rpre_string} ) {
- $html_print->('<pre>');
- $html_print->( ${$rpre_string} );
- $html_print->('</pre>');
- }
- else {
-
- # shouldn't happen: we stored a string before writing
- # each marker.
- Perl::Tidy::Warn
-"Problem merging html stream with pod2html; order may be wrong\n";
- }
- $html_print->($line);
- }
-
- # If didn't see multiple =cut lines, we'll put the pod out first
- # and then the code, because it's less confusing.
- else {
-
- # since we are not intermixing code and pod, we don't need
- # or want any <hr> lines which separated pod and code
- $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
- }
- }
-
- # Copy any remaining code section before the </body> tag
- elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
- $saw_body_end = 1;
- if ( @{$rpre_string_stack} ) {
- unless ( $self->{_pod_cut_count} > 1 ) {
- $html_print->('<hr />');
- }
- while ( my $rpre_string = shift( @{$rpre_string_stack} ) ) {
- $html_print->('<pre>');
- $html_print->( ${$rpre_string} );
- $html_print->('</pre>');
- }
- }
- $html_print->($line);
- }
- else {
- $html_print->($line);
- }
- }
-
- $success_flag = 1;
- unless ($saw_body) {
- Perl::Tidy::Warn "Did not see <body> in pod2html output\n";
- $success_flag = 0;
- }
- unless ($saw_body_end) {
- Perl::Tidy::Warn "Did not see </body> in pod2html output\n";
- $success_flag = 0;
- }
- unless ($saw_index) {
- Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n";
- $success_flag = 0;
- }
-
- RETURN:
- eval { $html_fh->close() };
-
- # note that we have to unlink tmpfile before making frames
- # because the tmpfile may be one of the names used for frames
- if ( -e $tmpfile ) {
- unless ( unlink($tmpfile) ) {
- Perl::Tidy::Warn("couldn't unlink temporary file $tmpfile: $!\n");
- $success_flag = 0;
- }
- }
-
- if ( $success_flag && $rOpts->{'frames'} ) {
- $self->make_frame( \@toc );
- }
- return $success_flag;
-}
-
-sub make_frame {
-
- # Make a frame with table of contents in the left panel
- # and the text in the right panel.
- # On entry:
- # $html_filename contains the no-frames html output
- # $rtoc is a reference to an array with the table of contents
- my ( $self, $rtoc ) = @_;
- my $input_file = $self->{_input_file};
- my $html_filename = $self->{_html_file};
- my $toc_filename = $self->{_toc_filename};
- my $src_filename = $self->{_src_filename};
- my $title = $self->{_title};
- $title = escape_html($title);
-
- # FUTURE input parameter:
- my $top_basename = "";
-
- # We need to produce 3 html files:
- # 1. - the table of contents
- # 2. - the contents (source code) itself
- # 3. - the frame which contains them
-
- # get basenames for relative links
- my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
- my ( $src_basename, $src_path ) = fileparse($src_filename);
-
- # 1. Make the table of contents panel, with appropriate changes
- # to the anchor names
- my $src_frame_name = 'SRC';
- my $first_anchor =
- write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
- $src_frame_name );
-
- # 2. The current .html filename is renamed to be the contents panel
- rename( $html_filename, $src_filename )
- or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n";
-
- # 3. Then use the original html filename for the frame
- write_frame_html(
- $title, $html_filename, $top_basename,
- $toc_basename, $src_basename, $src_frame_name
- );
- return;
-}
-
-sub write_toc_html {
-
- # write a separate html table of contents file for frames
- my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
- my $fh = IO::File->new( $toc_filename, 'w' )
- or Perl::Tidy::Die "Cannot open $toc_filename:$!\n";
- $fh->print(<<EOM);
-<html>
-<head>
-<title>$title</title>
-</head>
-<body>
-<h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
-EOM
-
- my $first_anchor =
- change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
- $fh->print( join "", @{$rtoc} );
-
- $fh->print(<<EOM);
-</body>
-</html>
-EOM
-
- return;
-}
-
-sub write_frame_html {
-
- # write an html file to be the table of contents frame
- my (
- $title, $frame_filename, $top_basename,
- $toc_basename, $src_basename, $src_frame_name
- ) = @_;
-
- my $fh = IO::File->new( $frame_filename, 'w' )
- or Perl::Tidy::Die "Cannot open $toc_basename:$!\n";
-
- $fh->print(<<EOM);
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
-<?xml version="1.0" encoding="iso-8859-1" ?>
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-<title>$title</title>
-</head>
-EOM
-
- # two left panels, one right, if master index file
- if ($top_basename) {
- $fh->print(<<EOM);
-<frameset cols="20%,80%">
-<frameset rows="30%,70%">
-<frame src = "$top_basename" />
-<frame src = "$toc_basename" />
-</frameset>
-EOM
- }
-
- # one left panels, one right, if no master index file
- else {
- $fh->print(<<EOM);
-<frameset cols="20%,*">
-<frame src = "$toc_basename" />
-EOM
- }
- $fh->print(<<EOM);
-<frame src = "$src_basename" name = "$src_frame_name" />
-<noframes>
-<body>
-<p>If you see this message, you are using a non-frame-capable web client.</p>
-<p>This document contains:</p>
-<ul>
-<li><a href="$toc_basename">A table of contents</a></li>
-<li><a href="$src_basename">The source code</a></li>
-</ul>
-</body>
-</noframes>
-</frameset>
-</html>
-EOM
- return;
-}
-
-sub change_anchor_names {
-
- # add a filename and target to anchors
- # also return the first anchor
- my ( $rlines, $filename, $target ) = @_;
- my $first_anchor;
- foreach my $line ( @{$rlines} ) {
-
- # We're looking for lines like this:
- # <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
- # ---- - -------- -----------------
- # $1 $4 $5
- if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
- my $pre = $1;
- my $name = $4;
- my $post = $5;
- my $href = "$filename#$name";
- $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
- unless ($first_anchor) { $first_anchor = $href }
- }
- }
- return $first_anchor;
-}
-
-sub close_html_file {
- my $self = shift;
- return unless $self->{_html_file_opened};
-
- my $html_fh = $self->{_html_fh};
- my $rtoc_string = $self->{_rtoc_string};
-
- # There are 3 basic paths to html output...
-
- # ---------------------------------
- # Path 1: finish up if in -pre mode
- # ---------------------------------
- if ( $rOpts->{'html-pre-only'} ) {
- $html_fh->print( <<"PRE_END");
-</pre>
-PRE_END
- eval { $html_fh->close() };
- return;
- }
-
- # Finish the index
- $self->add_toc_item( 'EOF', 'EOF' );
-
- my $rpre_string_stack = $self->{_rpre_string_stack};
-
- # Patch to darken the <pre> background color in case of pod2html and
- # interleaved code/documentation. Otherwise, the distinction
- # between code and documentation is blurred.
- if ( $rOpts->{pod2html}
- && $self->{_pod_cut_count} >= 1
- && $rOpts->{'html-color-background'} eq '#FFFFFF' )
- {
- $rOpts->{'html-pre-color-background'} = '#F0F0F0';
- }
-
- # put the css or its link into a string, if used
- my $css_string;
- my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
-
- # use css linked to another file
- if ( $rOpts->{'html-linked-style-sheet'} ) {
- $fh_css->print(
- qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />));
- }
-
- # use css embedded in this file
- elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
- $fh_css->print( <<'ENDCSS');
-<style type="text/css">
-<!--
-ENDCSS
- write_style_sheet_data($fh_css);
- $fh_css->print( <<"ENDCSS");
--->
-</style>
-ENDCSS
- }
-
- # -----------------------------------------------------------
- # path 2: use pod2html if requested
- # If we fail for some reason, continue on to path 3
- # -----------------------------------------------------------
- if ( $rOpts->{'pod2html'} ) {
- my $rpod_string = $self->{_rpod_string};
- $self->pod_to_html(
- ${$rpod_string}, $css_string,
- ${$rtoc_string}, $rpre_string_stack
- ) && return;
- }
-
- # --------------------------------------------------
- # path 3: write code in html, with pod only in italics
- # --------------------------------------------------
- my $input_file = $self->{_input_file};
- my $title = escape_html($input_file);
- my $date = localtime;
- $html_fh->print( <<"HTML_START");
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<!-- Generated by perltidy on $date -->
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-<title>$title</title>
-HTML_START
-
- # output the css, if used
- if ($css_string) {
- $html_fh->print($css_string);
- $html_fh->print( <<"ENDCSS");
-</head>
-<body>
-ENDCSS
- }
- else {
-
- $html_fh->print( <<"HTML_START");
-</head>
-<body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
-HTML_START
- }
-
- $html_fh->print("<a name=\"-top-\"></a>\n");
- $html_fh->print( <<"EOM");
-<h1>$title</h1>
-EOM
-
- # copy the table of contents
- if ( ${$rtoc_string}
- && !$rOpts->{'frames'}
- && $rOpts->{'html-table-of-contents'} )
- {
- $html_fh->print( ${$rtoc_string} );
- }
-
- # copy the pre section(s)
- my $fname_comment = $input_file;
- $fname_comment =~ s/--+/-/g; # protect HTML comment tags
- $html_fh->print( <<"END_PRE");
-<hr />
-<!-- contents of filename: $fname_comment -->
-<pre>
-END_PRE
-
- foreach my $rpre_string ( @{$rpre_string_stack} ) {
- $html_fh->print( ${$rpre_string} );
- }
-
- # and finish the html page
- $html_fh->print( <<"HTML_END");
-</pre>
-</body>
-</html>
-HTML_END
- eval { $html_fh->close() }; # could be object without close method
-
- if ( $rOpts->{'frames'} ) {
- my @toc = map { $_ .= "\n" } split /\n/, ${$rtoc_string};
- $self->make_frame( \@toc );
- }
- return;
-}
-
-sub markup_tokens {
- my ( $self, $rtokens, $rtoken_type, $rlevels ) = @_;
- my ( @colored_tokens, $string, $type, $token, $level );
- my $rlast_level = $self->{_rlast_level};
- my $rpackage_stack = $self->{_rpackage_stack};
-
- for ( my $j = 0 ; $j < @{$rtoken_type} ; $j++ ) {
- $type = $rtoken_type->[$j];
- $token = $rtokens->[$j];
- $level = $rlevels->[$j];
- $level = 0 if ( $level < 0 );
-
- #-------------------------------------------------------
- # Update the package stack. The package stack is needed to keep
- # the toc correct because some packages may be declared within
- # blocks and go out of scope when we leave the block.
- #-------------------------------------------------------
- if ( $level > ${$rlast_level} ) {
- unless ( $rpackage_stack->[ $level - 1 ] ) {
- $rpackage_stack->[ $level - 1 ] = 'main';
- }
- $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
- }
- elsif ( $level < ${$rlast_level} ) {
- my $package = $rpackage_stack->[$level];
- unless ($package) { $package = 'main' }
-
- # if we change packages due to a nesting change, we
- # have to make an entry in the toc
- if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
- $self->add_toc_item( $package, 'package' );
- }
- }
- ${$rlast_level} = $level;
-
- #-------------------------------------------------------
- # Intercept a sub name here; split it
- # into keyword 'sub' and sub name; and add an
- # entry in the toc
- #-------------------------------------------------------
- if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
- $token = $self->markup_html_element( $1, 'k' );
- push @colored_tokens, $token;
- $token = $2;
- $type = 'M';
-
- # but don't include sub declarations in the toc;
- # these wlll have leading token types 'i;'
- my $signature = join "", @{$rtoken_type};
- unless ( $signature =~ /^i;/ ) {
- my $subname = $token;
- $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
- $self->add_toc_item( $subname, 'sub' );
- }
- }
-
- #-------------------------------------------------------
- # Intercept a package name here; split it
- # into keyword 'package' and name; add to the toc,
- # and update the package stack
- #-------------------------------------------------------
- if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
- $token = $self->markup_html_element( $1, 'k' );
- push @colored_tokens, $token;
- $token = $2;
- $type = 'i';
- $self->add_toc_item( "$token", 'package' );
- $rpackage_stack->[$level] = $token;
- }
-
- $token = $self->markup_html_element( $token, $type );
- push @colored_tokens, $token;
- }
- return ( \@colored_tokens );
-}
-
-sub markup_html_element {
- my ( $self, $token, $type ) = @_;
-
- return $token if ( $type eq 'b' ); # skip a blank token
- return $token if ( $token =~ /^\s*$/ ); # skip a blank line
- $token = escape_html($token);
-
- # get the short abbreviation for this token type
- my $short_name = $token_short_names{$type};
- if ( !defined($short_name) ) {
- $short_name = "pu"; # punctuation is default
- }
-
- # handle style sheets..
- if ( !$rOpts->{'nohtml-style-sheets'} ) {
- if ( $short_name ne 'pu' ) {
- $token = qq(<span class="$short_name">) . $token . "</span>";
- }
- }
-
- # handle no style sheets..
- else {
- my $color = $html_color{$short_name};
-
- if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
- $token = qq(<font color="$color">) . $token . "</font>";
- }
- if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
- if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
- }
- return $token;
-}
-
-sub escape_html {
-
- my $token = shift;
- if ($missing_html_entities) {
- $token =~ s/\&/&/g;
- $token =~ s/\</</g;
- $token =~ s/\>/>/g;
- $token =~ s/\"/"/g;
- }
- else {
- HTML::Entities::encode_entities($token);
- }
- return $token;
-}
-
-sub finish_formatting {
-
- # called after last line
- my $self = shift;
- $self->close_html_file();
- return;
-}
-
-sub write_line {
-
- my ( $self, $line_of_tokens ) = @_;
- return unless $self->{_html_file_opened};
- my $html_pre_fh = $self->{_html_pre_fh};
- my $line_type = $line_of_tokens->{_line_type};
- my $input_line = $line_of_tokens->{_line_text};
- my $line_number = $line_of_tokens->{_line_number};
- chomp $input_line;
-
- # markup line of code..
- my $html_line;
- if ( $line_type eq 'CODE' ) {
- my $rtoken_type = $line_of_tokens->{_rtoken_type};
- my $rtokens = $line_of_tokens->{_rtokens};
- my $rlevels = $line_of_tokens->{_rlevels};
-
- if ( $input_line =~ /(^\s*)/ ) {
- $html_line = $1;
- }
- else {
- $html_line = "";
- }
- my ($rcolored_tokens) =
- $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
- $html_line .= join '', @{$rcolored_tokens};
- }
-
- # markup line of non-code..
- else {
- my $line_character;
- if ( $line_type eq 'HERE' ) { $line_character = 'H' }
- elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
- elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
- elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
- elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
- elsif ( $line_type eq 'END_START' ) {
- $line_character = 'k';
- $self->add_toc_item( '__END__', '__END__' );
- }
- elsif ( $line_type eq 'DATA_START' ) {
- $line_character = 'k';
- $self->add_toc_item( '__DATA__', '__DATA__' );
- }
- elsif ( $line_type =~ /^POD/ ) {
- $line_character = 'P';
- if ( $rOpts->{'pod2html'} ) {
- my $html_pod_fh = $self->{_html_pod_fh};
- if ( $line_type eq 'POD_START' ) {
-
- my $rpre_string_stack = $self->{_rpre_string_stack};
- my $rpre_string = $rpre_string_stack->[-1];
-
- # if we have written any non-blank lines to the
- # current pre section, start writing to a new output
- # string
- if ( ${$rpre_string} =~ /\S/ ) {
- my $pre_string;
- $html_pre_fh =
- Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
- $self->{_html_pre_fh} = $html_pre_fh;
- push @{$rpre_string_stack}, \$pre_string;
-
- # leave a marker in the pod stream so we know
- # where to put the pre section we just
- # finished.
- my $for_html = '=for html'; # don't confuse pod utils
- $html_pod_fh->print(<<EOM);
-
-$for_html
-<!-- pERLTIDY sECTION -->
-
-EOM
- }
-
- # otherwise, just clear the current string and start
- # over
- else {
- ${$rpre_string} = "";
- $html_pod_fh->print("\n");
- }
- }
- $html_pod_fh->print( $input_line . "\n" );
- if ( $line_type eq 'POD_END' ) {
- $self->{_pod_cut_count}++;
- $html_pod_fh->print("\n");
- }
- return;
- }
- }
- else { $line_character = 'Q' }
- $html_line = $self->markup_html_element( $input_line, $line_character );
- }
-
- # add the line number if requested
- if ( $rOpts->{'html-line-numbers'} ) {
- my $extra_space =
- ( $line_number < 10 ) ? " "
- : ( $line_number < 100 ) ? " "
- : ( $line_number < 1000 ) ? " "
- : "";
- $html_line = $extra_space . $line_number . " " . $html_line;
- }
-
- # write the line
- $html_pre_fh->print("$html_line\n");
- return;
-}
-
-#####################################################################
-#
-# The Perl::Tidy::Formatter package adds indentation, whitespace, and
-# line breaks to the token stream
-#
-# WARNING: This is not a real class for speed reasons. Only one
-# Formatter may be used.
-#
-#####################################################################
-
-package Perl::Tidy::Formatter;
-
-BEGIN {
-
- # Caution: these debug flags produce a lot of output
- # They should all be 0 except when debugging small scripts
- use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
- use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
- use constant FORMATTER_DEBUG_FLAG_BOND => 0;
- use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
- use constant FORMATTER_DEBUG_FLAG_CI => 0;
- use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
- use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
- use constant FORMATTER_DEBUG_FLAG_LIST => 0;
- use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
- use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
- use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
- use constant FORMATTER_DEBUG_FLAG_STORE => 0;
- use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
- use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
-
- my $debug_warning = sub {
- print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
- };
-
- FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
- FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
- FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
- FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
- FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
- FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
- FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
- FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
- FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
- FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
- FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
- FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
- FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
- FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
-}
-
-use Carp;
-use vars qw{
-
- @gnu_stack
- $max_gnu_stack_index
- $gnu_position_predictor
- $line_start_index_to_go
- $last_indentation_written
- $last_unadjusted_indentation
- $last_leading_token
- $last_output_short_opening_token
- $peak_batch_size
-
- $saw_VERSION_in_this_file
- $saw_END_or_DATA_
-
- @gnu_item_list
- $max_gnu_item_index
- $gnu_sequence_number
- $last_output_indentation
- %last_gnu_equals
- %gnu_comma_count
- %gnu_arrow_count
-
- @block_type_to_go
- @type_sequence_to_go
- @container_environment_to_go
- @bond_strength_to_go
- @forced_breakpoint_to_go
- @token_lengths_to_go
- @summed_lengths_to_go
- @levels_to_go
- @leading_spaces_to_go
- @reduced_spaces_to_go
- @matching_token_to_go
- @mate_index_to_go
- @ci_levels_to_go
- @nesting_depth_to_go
- @nobreak_to_go
- @old_breakpoint_to_go
- @tokens_to_go
- @rtoken_vars_to_go
- @K_to_go
- @types_to_go
- @inext_to_go
- @iprev_to_go
-
- %saved_opening_indentation
-
- $max_index_to_go
- $comma_count_in_batch
- $last_nonblank_index_to_go
- $last_nonblank_type_to_go
- $last_nonblank_token_to_go
- $last_last_nonblank_index_to_go
- $last_last_nonblank_type_to_go
- $last_last_nonblank_token_to_go
- @nonblank_lines_at_depth
- $starting_in_quote
- $ending_in_quote
- @whitespace_level_stack
- $whitespace_last_level
-
- $format_skipping_pattern_begin
- $format_skipping_pattern_end
-
- $forced_breakpoint_count
- $forced_breakpoint_undo_count
- @forced_breakpoint_undo_stack
- %postponed_breakpoint
-
- $tabbing
- $embedded_tab_count
- $first_embedded_tab_at
- $last_embedded_tab_at
- $deleted_semicolon_count
- $first_deleted_semicolon_at
- $last_deleted_semicolon_at
- $added_semicolon_count
- $first_added_semicolon_at
- $last_added_semicolon_at
- $first_tabbing_disagreement
- $last_tabbing_disagreement
- $in_tabbing_disagreement
- $tabbing_disagreement_count
- $input_line_tabbing
-
- $last_line_leading_type
- $last_line_leading_level
- $last_last_line_leading_level
-
- %block_leading_text
- %block_opening_line_number
- $csc_new_statement_ok
- $csc_last_label
- %csc_block_label
- $accumulating_text_for_block
- $leading_block_text
- $rleading_block_if_elsif_text
- $leading_block_text_level
- $leading_block_text_length_exceeded
- $leading_block_text_line_length
- $leading_block_text_line_number
- $closing_side_comment_prefix_pattern
- $closing_side_comment_list_pattern
-
- $blank_lines_after_opening_block_pattern
- $blank_lines_before_closing_block_pattern
-
- $last_nonblank_token
- $last_nonblank_type
- $last_last_nonblank_token
- $last_last_nonblank_type
- $last_nonblank_block_type
- $last_output_level
- %is_do_follower
- %is_if_brace_follower
- %space_after_keyword
- $rbrace_follower
- $looking_for_else
- %is_last_next_redo_return
- %is_other_brace_follower
- %is_else_brace_follower
- %is_anon_sub_brace_follower
- %is_anon_sub_1_brace_follower
- %is_sort_map_grep
- %is_sort_map_grep_eval
- %is_sort_map_grep_eval_do
- %is_block_without_semicolon
- %is_if_unless
- %is_and_or
- %is_assignment
- %is_chain_operator
- %is_if_unless_and_or_last_next_redo_return
- %ok_to_add_semicolon_for_block_type
-
- @has_broken_sublist
- @dont_align
- @want_comma_break
-
- $is_static_block_comment
- $index_start_one_line_block
- $semicolons_before_block_self_destruct
- $index_max_forced_break
- $input_line_number
- $diagnostics_object
- $vertical_aligner_object
- $logger_object
- $file_writer_object
- $formatter_self
- @ci_stack
- %want_break_before
- %outdent_keyword
- $static_block_comment_pattern
- $static_side_comment_pattern
- %opening_vertical_tightness
- %closing_vertical_tightness
- %closing_token_indentation
- $some_closing_token_indentation
-
- %opening_token_right
- %stack_opening_token
- %stack_closing_token
-
- $block_brace_vertical_tightness_pattern
-
- $rOpts_add_newlines
- $rOpts_add_whitespace
- $rOpts_block_brace_tightness
- $rOpts_block_brace_vertical_tightness
- $rOpts_brace_left_and_indent
- $rOpts_comma_arrow_breakpoints
- $rOpts_break_at_old_keyword_breakpoints
- $rOpts_break_at_old_comma_breakpoints
- $rOpts_break_at_old_logical_breakpoints
- $rOpts_break_at_old_ternary_breakpoints
- $rOpts_break_at_old_attribute_breakpoints
- $rOpts_closing_side_comment_else_flag
- $rOpts_closing_side_comment_maximum_text
- $rOpts_continuation_indentation
- $rOpts_cuddled_else
- $rOpts_delete_old_whitespace
- $rOpts_fuzzy_line_length
- $rOpts_indent_columns
- $rOpts_line_up_parentheses
- $rOpts_maximum_fields_per_table
- $rOpts_maximum_line_length
- $rOpts_variable_maximum_line_length
- $rOpts_short_concatenation_item_length
- $rOpts_keep_old_blank_lines
- $rOpts_ignore_old_breakpoints
- $rOpts_format_skipping
- $rOpts_space_function_paren
- $rOpts_space_keyword_paren
- $rOpts_keep_interior_semicolons
- $rOpts_ignore_side_comment_lengths
- $rOpts_stack_closing_block_brace
- $rOpts_space_backslash_quote
- $rOpts_whitespace_cycle
-
- %is_opening_type
- %is_closing_type
- %is_keyword_returning_list
- %tightness
- %matching_token
- $rOpts
- %right_bond_strength
- %left_bond_strength
- %binary_ws_rules
- %want_left_space
- %want_right_space
- %is_digraph
- %is_trigraph
- $bli_pattern
- $bli_list_string
- %is_closing_type
- %is_opening_type
- %is_closing_token
- %is_opening_token
-
- %weld_len_left_closing
- %weld_len_right_closing
- %weld_len_left_opening
- %weld_len_right_opening
-
- $rcuddled_block_types
-
- $SUB_PATTERN
- $ASUB_PATTERN
-
- $NVARS
-
-};
-
-BEGIN {
-
- # Array index names for token vars
- my $i = 0;
- use constant {
- _BLOCK_TYPE_ => $i++,
- _CI_LEVEL_ => $i++,
- _CONTAINER_ENVIRONMENT_ => $i++,
- _CONTAINER_TYPE_ => $i++,
- _CUMULATIVE_LENGTH_ => $i++,
- _LINE_INDEX_ => $i++,
- _KNEXT_SEQ_ITEM_ => $i++,
- _LEVEL_ => $i++,
- _LEVEL_TRUE_ => $i++,
- _SLEVEL_ => $i++,
- _TOKEN_ => $i++,
- _TYPE_ => $i++,
- _TYPE_SEQUENCE_ => $i++,
- };
- $NVARS = 1 + _TYPE_SEQUENCE_;
-
- # default list of block types for which -bli would apply
- $bli_list_string = 'if else elsif unless while for foreach do : sub';
-
- my @q;
-
- @q = qw(
- .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
- <= >= == =~ !~ != ++ -- /= x=
- );
- @is_digraph{@q} = (1) x scalar(@q);
-
- @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ );
- @is_trigraph{@q} = (1) x scalar(@q);
-
- @q = qw(
- = **= += *= &= <<= &&=
- -= /= |= >>= ||= //=
- .= %= ^=
- x=
- );
- @is_assignment{@q} = (1) x scalar(@q);
-
- @q = qw(
- grep
- keys
- map
- reverse
- sort
- split
- );
- @is_keyword_returning_list{@q} = (1) x scalar(@q);
-
- @q = qw(is if unless and or err last next redo return);
- @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
-
- @q = qw(last next redo return);
- @is_last_next_redo_return{@q} = (1) x scalar(@q);
-
- @q = qw(sort map grep);
- @is_sort_map_grep{@q} = (1) x scalar(@q);
-
- @q = qw(sort map grep eval);
- @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
-
- @q = qw(sort map grep eval do);
- @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
-
- @q = qw(if unless);
- @is_if_unless{@q} = (1) x scalar(@q);
-
- @q = qw(and or err);
- @is_and_or{@q} = (1) x scalar(@q);
-
- # Identify certain operators which often occur in chains.
- # Note: the minus (-) causes a side effect of padding of the first line in
- # something like this (by sub set_logical_padding):
- # Checkbutton => 'Transmission checked',
- # -variable => \$TRANS
- # This usually improves appearance so it seems ok.
- @q = qw(&& || and or : ? . + - * /);
- @is_chain_operator{@q} = (1) x scalar(@q);
-
- # We can remove semicolons after blocks preceded by these keywords
- @q =
- qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
- unless while until for foreach given when default);
- @is_block_without_semicolon{@q} = (1) x scalar(@q);
-
- # We will allow semicolons to be added within these block types
- # as well as sub and package blocks.
- # NOTES:
- # 1. Note that these keywords are omitted:
- # switch case given when default sort map grep
- # 2. It is also ok to add for sub and package blocks and a labeled block
- # 3. But not okay for other perltidy types including:
- # { } ; G t
- # 4. Test files: blktype.t, blktype1.t, semicolon.t
- @q =
- qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
- unless do while until eval for foreach );
- @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
-
- # 'L' is token for opening { at hash key
- @q = qw" L { ( [ ";
- @is_opening_type{@q} = (1) x scalar(@q);
-
- # 'R' is token for closing } at hash key
- @q = qw" R } ) ] ";
- @is_closing_type{@q} = (1) x scalar(@q);
-
- @q = qw" { ( [ ";
- @is_opening_token{@q} = (1) x scalar(@q);
-
- @q = qw" } ) ] ";
- @is_closing_token{@q} = (1) x scalar(@q);
-
- # Patterns for standardizing matches to block types for regular subs and
- # anonymous subs. Examples
- # 'sub process' is a named sub
- # 'sub ::m' is a named sub
- # 'sub' is an anonymous sub
- # 'sub:' is a label, not a sub
- # 'substr' is a keyword
- $SUB_PATTERN = '^sub\s+(::|\w)';
- $ASUB_PATTERN = '^sub$';
-}
-
-# whitespace codes
-use constant WS_YES => 1;
-use constant WS_OPTIONAL => 0;
-use constant WS_NO => -1;
-
-# Token bond strengths.
-use constant NO_BREAK => 10000;
-use constant VERY_STRONG => 100;
-use constant STRONG => 2.1;
-use constant NOMINAL => 1.1;
-use constant WEAK => 0.8;
-use constant VERY_WEAK => 0.55;
-
-# values for testing indexes in output array
-use constant UNDEFINED_INDEX => -1;
-
-# Maximum number of little messages; probably need not be changed.
-use constant MAX_NAG_MESSAGES => 6;
-
-# increment between sequence numbers for each type
-# For example, ?: pairs might have numbers 7,11,15,...
-use constant TYPE_SEQUENCE_INCREMENT => 4;
-
-{
-
- # methods to count instances
- my $_count = 0;
- sub get_count { return $_count; }
- sub _increment_count { return ++$_count }
- sub _decrement_count { return --$_count }
-}
-
-sub trim {
-
- # trim leading and trailing whitespace from a string
- my $str = shift;
- $str =~ s/\s+$//;
- $str =~ s/^\s+//;
- return $str;
-}
-
-sub max {
- my @vals = @_;
- my $max = shift @vals;
- foreach my $val (@vals) {
- $max = ( $max < $val ) ? $val : $max;
- }
- return $max;
-}
-
-sub min {
- my @vals = @_;
- my $min = shift @vals;
- foreach my $val (@vals) {
- $min = ( $min > $val ) ? $val : $min;
- }
- return $min;
-}
-
-sub split_words {
-
- # given a string containing words separated by whitespace,
- # return the list of words
- my ($str) = @_;
- return unless $str;
- $str =~ s/\s+$//;
- $str =~ s/^\s+//;
- return split( /\s+/, $str );
-}
-
-sub check_keys {
- my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
-
- # Check the keys of a hash:
- # $rtest = ref to hash to test
- # $rexpected = ref to has with valid keys
-
- # $msg = a message to write in case of error
- # $exact_match defines the type of check:
- # = false: test hash must not have unknown key
- # = true: test hash must have exactly same keys as known hash
- my @unknown_keys =
- grep { !exists $rvalid->{$_} } keys %{$rtest};
- my @missing_keys =
- grep { !exists $rtest->{$_} } keys %{$rvalid};
- my $error = @unknown_keys;
- if ($exact_match) { $error ||= @missing_keys }
- if ($error) {
- local $" = ')(';
- my @expected_keys = sort keys %{$rvalid};
- @unknown_keys = sort @unknown_keys;
- Perl::Tidy::Die <<EOM;
-------------------------------------------------------------------------
-Program error detected checking hash keys
-Message is: '$msg'
-Expected keys: (@expected_keys)
-Unknown key(s): (@unknown_keys)
-Missing key(s): (@missing_keys)
-------------------------------------------------------------------------
-EOM
- }
-}
-
-# interface to Perl::Tidy::Logger routines
-sub warning {
- my ($msg) = @_;
- if ($logger_object) { $logger_object->warning($msg); }
- return;
-}
-
-sub complain {
- my ($msg) = @_;
- if ($logger_object) {
- $logger_object->complain($msg);
- }
- return;
-}
-
-sub write_logfile_entry {
- my @msg = @_;
- if ($logger_object) {
- $logger_object->write_logfile_entry(@msg);
- }
- return;
-}
-
-sub black_box {
- my @msg = @_;
- if ($logger_object) { $logger_object->black_box(@msg); }
- return;
-}
-
-sub report_definite_bug {
- if ($logger_object) {
- $logger_object->report_definite_bug();
- }
- return;
-}
-
-sub get_saw_brace_error {
- if ($logger_object) {
- $logger_object->get_saw_brace_error();
- }
- return;
-}
-
-sub we_are_at_the_last_line {
- if ($logger_object) {
- $logger_object->we_are_at_the_last_line();
- }
- return;
-}
-
-# interface to Perl::Tidy::Diagnostics routine
-sub write_diagnostics {
- my $msg = shift;
- if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); }
- return;
-}
-
-sub get_added_semicolon_count {
- my $self = shift;
- return $added_semicolon_count;
-}
-
-sub DESTROY {
- my $self = shift;
- $self->_decrement_count();
- return;
-}
-
-sub get_output_line_number {
- return $vertical_aligner_object->get_output_line_number();
-}
-
-sub new {
-
- my $class = shift;
-
- # we are given an object with a write_line() method to take lines
- my %defaults = (
- sink_object => undef,
- diagnostics_object => undef,
- logger_object => undef,
- );
- my %args = ( %defaults, @_ );
-
- $logger_object = $args{logger_object};
- $diagnostics_object = $args{diagnostics_object};
-
- # we create another object with a get_line() and peek_ahead() method
- my $sink_object = $args{sink_object};
- $file_writer_object =
- Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
-
- # initialize the leading whitespace stack to negative levels
- # so that we can never run off the end of the stack
- $peak_batch_size = 0; # flag to determine if we have output code
- $gnu_position_predictor = 0; # where the current token is predicted to be
- $max_gnu_stack_index = 0;
- $max_gnu_item_index = -1;
- $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
- @gnu_item_list = ();
- $last_output_indentation = 0;
- $last_indentation_written = 0;
- $last_unadjusted_indentation = 0;
- $last_leading_token = "";
- $last_output_short_opening_token = 0;
-
- $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
- $saw_END_or_DATA_ = 0;
-
- @block_type_to_go = ();
- @type_sequence_to_go = ();
- @container_environment_to_go = ();
- @bond_strength_to_go = ();
- @forced_breakpoint_to_go = ();
- @summed_lengths_to_go = (); # line length to start of ith token
- @token_lengths_to_go = ();
- @levels_to_go = ();
- @matching_token_to_go = ();
- @mate_index_to_go = ();
- @ci_levels_to_go = ();
- @nesting_depth_to_go = (0);
- @nobreak_to_go = ();
- @old_breakpoint_to_go = ();
- @tokens_to_go = ();
- @rtoken_vars_to_go = ();
- @K_to_go = ();
- @types_to_go = ();
- @leading_spaces_to_go = ();
- @reduced_spaces_to_go = ();
- @inext_to_go = ();
- @iprev_to_go = ();
-
- @whitespace_level_stack = ();
- $whitespace_last_level = -1;
-
- @dont_align = ();
- @has_broken_sublist = ();
- @want_comma_break = ();
-
- @ci_stack = ("");
- $first_tabbing_disagreement = 0;
- $last_tabbing_disagreement = 0;
- $tabbing_disagreement_count = 0;
- $in_tabbing_disagreement = 0;
- $input_line_tabbing = undef;
-
- $last_last_line_leading_level = 0;
- $last_line_leading_level = 0;
- $last_line_leading_type = '#';
-
- $last_nonblank_token = ';';
- $last_nonblank_type = ';';
- $last_last_nonblank_token = ';';
- $last_last_nonblank_type = ';';
- $last_nonblank_block_type = "";
- $last_output_level = 0;
- $looking_for_else = 0;
- $embedded_tab_count = 0;
- $first_embedded_tab_at = 0;
- $last_embedded_tab_at = 0;
- $deleted_semicolon_count = 0;
- $first_deleted_semicolon_at = 0;
- $last_deleted_semicolon_at = 0;
- $added_semicolon_count = 0;
- $first_added_semicolon_at = 0;
- $last_added_semicolon_at = 0;
- $is_static_block_comment = 0;
- %postponed_breakpoint = ();
-
- # variables for adding side comments
- %block_leading_text = ();
- %block_opening_line_number = ();
- $csc_new_statement_ok = 1;
- %csc_block_label = ();
-
- %saved_opening_indentation = ();
-
- reset_block_text_accumulator();
-
- prepare_for_new_input_lines();
-
- $vertical_aligner_object =
- Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
- $logger_object, $diagnostics_object );
-
- if ( $rOpts->{'entab-leading-whitespace'} ) {
- write_logfile_entry(
-"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
- );
- }
- elsif ( $rOpts->{'tabs'} ) {
- write_logfile_entry("Indentation will be with a tab character\n");
- }
- else {
- write_logfile_entry(
- "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
- }
-
- # This hash holds the main data structures for formatting
- # All hash keys must be defined here.
- $formatter_self = {
- rlines => [], # = ref to array of lines of the file
- rLL => [], # = ref to array with all tokens
- # in the file. LL originally meant
- # 'Linked List'. Linked lists were a
- # bad idea but LL is easy to type.
- Klimit => undef, # = maximum K index for rLL. This is
- # needed to catch any autovivification
- # problems.
- rnested_pairs => [], # for welding decisions
- K_opening_container => {}, # for quickly traversing structure
- K_closing_container => {}, # for quickly traversing structure
- K_opening_ternary => {}, # for quickly traversing structure
- K_closing_ternary => {}, # for quickly traversing structure
- rK_phantom_semicolons =>
- undef, # for undoing phantom semicolons if iterating
- rpaired_to_inner_container => {},
- rbreak_container => {}, # prevent one-line blocks
- rvalid_self_keys => [], # for checking
- };
- my @valid_keys = keys %{$formatter_self};
- $formatter_self->{rvalid_self_keys} = \@valid_keys;
-
- bless $formatter_self, $class;
-
- # Safety check..this is not a class yet
- if ( _increment_count() > 1 ) {
- confess
-"Attempt to create more than 1 object in $class, which is not a true class yet\n";
- }
- return $formatter_self;
-}
-
-sub Fault {
- my ($msg) = @_;
-
- # This routine is called for errors that really should not occur
- # except if there has been a bug introduced by a recent program change
- my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
- my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
- my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
-
- Perl::Tidy::Die(<<EOM);
-==============================================================================
-Fault detected at line $line0 of sub '$subroutine1'
-in file '$filename1'
-which was called from line $line1 of sub '$subroutine2'
-Message: '$msg'
-This is probably an error introduced by a recent programming change.
-==============================================================================
-EOM
-}
-
-sub check_self_hash {
- my $self = shift;
- my @valid_self_keys = @{ $self->{rvalid_self_keys} };
- my %valid_self_hash;
- @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys);
- check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 );
- return;
-}
-
-sub check_token_array {
- my $self = shift;
-
- # Check for errors in the array of tokens
- # Uses package variable $NVARS
- $self->check_self_hash();
- my $rLL = $self->{rLL};
- for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
- my $nvars = @{ $rLL->[$KK] };
- if ( $nvars != $NVARS ) {
- my $type = $rLL->[$KK]->[_TYPE_];
- $type = '*' unless defined($type);
- Fault(
-"number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
- );
- }
- foreach my $var ( _TOKEN_, _TYPE_ ) {
- if ( !defined( $rLL->[$KK]->[$var] ) ) {
- my $iline = $rLL->[$KK]->[_LINE_INDEX_];
- Fault("Undefined variable $var for K=$KK, line=$iline\n");
- }
- }
- return;
- }
-}
-
-sub set_rLL_max_index {
- my $self = shift;
-
- # Set the limit of the rLL array, assuming that it is correct.
- # This should only be called by routines after they make changes
- # to tokenization
- my $rLL = $self->{rLL};
- if ( !defined($rLL) ) {
-
- # Shouldn't happen because rLL was initialized to be an array ref
- Fault("Undefined Memory rLL");
- }
- my $Klimit_old = $self->{Klimit};
- my $num = @{$rLL};
- my $Klimit;
- if ( $num > 0 ) { $Klimit = $num - 1 }
- $self->{Klimit} = $Klimit;
- return ($Klimit);
-}
-
-sub get_rLL_max_index {
- my $self = shift;
-
- # the memory location $rLL and number of tokens should be obtained
- # from this routine so that any autovivication can be immediately caught.
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
- if ( !defined($rLL) ) {
-
- # Shouldn't happen because rLL was initialized to be an array ref
- Fault("Undefined Memory rLL");
- }
- my $num = @{$rLL};
- if ( $num == 0 && defined($Klimit)
- || $num > 0 && !defined($Klimit)
- || $num > 0 && $Klimit != $num - 1 )
- {
-
- # Possible autovivification problem...
- if ( !defined($Klimit) ) { $Klimit = '*' }
- Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
- }
- return ($Klimit);
-}
-
-sub prepare_for_new_input_lines {
-
- # Remember the largest batch size processed. This is needed
- # by the pad routine to avoid padding the first nonblank token
- if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
- $peak_batch_size = $max_index_to_go;
- }
-
- $gnu_sequence_number++; # increment output batch counter
- %last_gnu_equals = ();
- %gnu_comma_count = ();
- %gnu_arrow_count = ();
- $line_start_index_to_go = 0;
- $max_gnu_item_index = UNDEFINED_INDEX;
- $index_max_forced_break = UNDEFINED_INDEX;
- $max_index_to_go = UNDEFINED_INDEX;
- $last_nonblank_index_to_go = UNDEFINED_INDEX;
- $last_nonblank_type_to_go = '';
- $last_nonblank_token_to_go = '';
- $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
- $last_last_nonblank_type_to_go = '';
- $last_last_nonblank_token_to_go = '';
- $forced_breakpoint_count = 0;
- $forced_breakpoint_undo_count = 0;
- $rbrace_follower = undef;
- $summed_lengths_to_go[0] = 0;
- $comma_count_in_batch = 0;
- $starting_in_quote = 0;
-
- destroy_one_line_block();
- return;
-}
-
-sub break_lines {
-
- # Loop over old lines to set new line break points
-
- my $self = shift;
- my $rlines = $self->{rlines};
-
- # Flag to prevent blank lines when POD occurs in a format skipping sect.
- my $in_format_skipping_section;
-
- my $line_type = "";
- foreach my $line_of_tokens ( @{$rlines} ) {
-
- my $last_line_type = $line_type;
- $line_type = $line_of_tokens->{_line_type};
- my $input_line = $line_of_tokens->{_line_text};
-
- # _line_type codes are:
- # SYSTEM - system-specific code before hash-bang line
- # CODE - line of perl code (including comments)
- # POD_START - line starting pod, such as '=head'
- # POD - pod documentation text
- # POD_END - last line of pod section, '=cut'
- # HERE - text of here-document
- # HERE_END - last line of here-doc (target word)
- # FORMAT - format section
- # FORMAT_END - last line of format section, '.'
- # DATA_START - __DATA__ line
- # DATA - unidentified text following __DATA__
- # END_START - __END__ line
- # END - unidentified text following __END__
- # ERROR - we are in big trouble, probably not a perl script
-
- # put a blank line after an =cut which comes before __END__ and __DATA__
- # (required by podchecker)
- if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
- $file_writer_object->reset_consecutive_blank_lines();
- if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
- $self->want_blank_line();
- }
- }
-
- # handle line of code..
- if ( $line_type eq 'CODE' ) {
-
- my $CODE_type = $line_of_tokens->{_code_type};
- $in_format_skipping_section = $CODE_type eq 'FS';
-
- # Handle blank lines
- if ( $CODE_type eq 'BL' ) {
-
- # If keep-old-blank-lines is zero, we delete all
- # old blank lines and let the blank line rules generate any
- # needed blanks.
- if ($rOpts_keep_old_blank_lines) {
- $self->flush();
- $file_writer_object->write_blank_code_line(
- $rOpts_keep_old_blank_lines == 2 );
- $last_line_leading_type = 'b';
- }
- next;
- }
- else {
-
- # let logger see all non-blank lines of code
- my $output_line_number = get_output_line_number();
- ##$vertical_aligner_object->get_output_line_number();
- black_box( $line_of_tokens, $output_line_number );
- }
-
- # Handle Format Skipping (FS) and Verbatim (VB) Lines
- if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
- $self->write_unindented_line("$input_line");
- $file_writer_object->reset_consecutive_blank_lines();
- next;
- }
-
- # Handle all other lines of code
- $self->print_line_of_tokens($line_of_tokens);
- }
-
- # handle line of non-code..
- else {
-
- # set special flags
- my $skip_line = 0;
- my $tee_line = 0;
- if ( $line_type =~ /^POD/ ) {
-
- # Pod docs should have a preceding blank line. But stay
- # out of __END__ and __DATA__ sections, because
- # the user may be using this section for any purpose whatsoever
- if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
- if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
- if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
- if ( !$skip_line
- && !$in_format_skipping_section
- && $line_type eq 'POD_START'
- && !$saw_END_or_DATA_ )
- {
- $self->want_blank_line();
- }
- }
-
- # leave the blank counters in a predictable state
- # after __END__ or __DATA__
- elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
- $file_writer_object->reset_consecutive_blank_lines();
- $saw_END_or_DATA_ = 1;
- }
-
- # write unindented non-code line
- if ( !$skip_line ) {
- if ($tee_line) { $file_writer_object->tee_on() }
- $self->write_unindented_line($input_line);
- if ($tee_line) { $file_writer_object->tee_off() }
- }
- }
- }
- return;
-}
-
-{ ## Beginning of routine to check line hashes
-
- my %valid_line_hash;
-
- BEGIN {
-
- # These keys are defined for each line in the formatter
- # Each line must have exactly these quantities
- my @valid_line_keys = qw(
- _curly_brace_depth
- _ending_in_quote
- _guessed_indentation_level
- _line_number
- _line_text
- _line_type
- _paren_depth
- _quote_character
- _rK_range
- _square_bracket_depth
- _starting_in_quote
- _ended_in_blank_token
- _code_type
-
- _ci_level_0
- _level_0
- _nesting_blocks_0
- _nesting_tokens_0
- );
-
- @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
- }
-
- sub check_line_hashes {
- my $self = shift;
- $self->check_self_hash();
- my $rlines = $self->{rlines};
- foreach my $rline ( @{$rlines} ) {
- my $iline = $rline->{_line_number};
- my $line_type = $rline->{_line_type};
- check_keys( $rline, \%valid_line_hash,
- "Checkpoint: line number =$iline, line_type=$line_type", 1 );
- }
- return;
- }
-
-} ## End check line hashes
-
-sub write_line {
-
- # We are caching tokenized lines as they arrive and converting them to the
- # format needed for the final formatting.
- my ( $self, $line_of_tokens_old ) = @_;
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
- my $rlines_new = $self->{rlines};
-
- my $Kfirst;
- my $line_of_tokens = {};
- foreach my $key (
- qw(
- _curly_brace_depth
- _ending_in_quote
- _guessed_indentation_level
- _line_number
- _line_text
- _line_type
- _paren_depth
- _quote_character
- _square_bracket_depth
- _starting_in_quote
- )
- )
- {
- $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
- }
-
- # Data needed by Logger
- $line_of_tokens->{_level_0} = 0;
- $line_of_tokens->{_ci_level_0} = 0;
- $line_of_tokens->{_nesting_blocks_0} = "";
- $line_of_tokens->{_nesting_tokens_0} = "";
-
- # Needed to avoid trimming quotes
- $line_of_tokens->{_ended_in_blank_token} = undef;
-
- my $line_type = $line_of_tokens_old->{_line_type};
- my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
- if ( $line_type eq 'CODE' ) {
-
- my $rtokens = $line_of_tokens_old->{_rtokens};
- my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
- my $rblock_type = $line_of_tokens_old->{_rblock_type};
- my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
- my $rcontainer_environment =
- $line_of_tokens_old->{_rcontainer_environment};
- my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
- my $rlevels = $line_of_tokens_old->{_rlevels};
- my $rslevels = $line_of_tokens_old->{_rslevels};
- my $rci_levels = $line_of_tokens_old->{_rci_levels};
- my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
- my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
-
- my $jmax = @{$rtokens} - 1;
- if ( $jmax >= 0 ) {
- $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
- foreach my $j ( 0 .. $jmax ) {
- my @tokary;
- @tokary[
- _TOKEN_, _TYPE_,
- _BLOCK_TYPE_, _CONTAINER_TYPE_,
- _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
- _LEVEL_, _LEVEL_TRUE_,
- _SLEVEL_, _CI_LEVEL_,
- _LINE_INDEX_,
- ]
- = (
- $rtokens->[$j], $rtoken_type->[$j],
- $rblock_type->[$j], $rcontainer_type->[$j],
- $rcontainer_environment->[$j], $rtype_sequence->[$j],
- $rlevels->[$j], $rlevels->[$j],
- $rslevels->[$j], $rci_levels->[$j],
- $input_line_no,
- );
- ##push @token_array, \@tokary;
- push @{$rLL}, \@tokary;
- }
-
- #$Klast=@{$rLL}-1;
- $Klimit = @{$rLL} - 1;
-
- # Need to remember if we can trim the input line
- $line_of_tokens->{_ended_in_blank_token} =
- $rtoken_type->[$jmax] eq 'b';
-
- $line_of_tokens->{_level_0} = $rlevels->[0];
- $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
- $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
- $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
- }
- }
-
- $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
- $line_of_tokens->{_code_type} = "";
- $self->{Klimit} = $Klimit;
-
- push @{$rlines_new}, $line_of_tokens;
- return;
-}
-
-BEGIN {
-
- # initialize these global hashes, which control the use of
- # whitespace around tokens:
- #
- # %binary_ws_rules
- # %want_left_space
- # %want_right_space
- # %space_after_keyword
- #
- # Many token types are identical to the tokens themselves.
- # See the tokenizer for a complete list. Here are some special types:
- # k = perl keyword
- # f = semicolon in for statement
- # m = unary minus
- # p = unary plus
- # Note that :: is excluded since it should be contained in an identifier
- # Note that '->' is excluded because it never gets space
- # parentheses and brackets are excluded since they are handled specially
- # curly braces are included but may be overridden by logic, such as
- # newline logic.
-
- # NEW_TOKENS: create a whitespace rule here. This can be as
- # simple as adding your new letter to @spaces_both_sides, for
- # example.
-
- my @q;
-
- @q = qw" L { ( [ ";
- @is_opening_type{@q} = (1) x scalar(@q);
-
- @q = qw" R } ) ] ";
- @is_closing_type{@q} = (1) x scalar(@q);
-
- my @spaces_both_sides = qw"
- + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
- .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
- &&= ||= //= <=> A k f w F n C Y U G v
- ";
-
- my @spaces_left_side = qw"
- t ! ~ m p { \ h pp mm Z j
- ";
- push( @spaces_left_side, '#' ); # avoids warning message
-
- my @spaces_right_side = qw"
- ; } ) ] R J ++ -- **=
- ";
- push( @spaces_right_side, ',' ); # avoids warning message
-
- # Note that we are in a BEGIN block here. Later in processing
- # the values of %want_left_space and %want_right_space
- # may be overridden by any user settings specified by the
- # -wls and -wrs parameters. However the binary_whitespace_rules
- # are hardwired and have priority.
- @want_left_space{@spaces_both_sides} =
- (1) x scalar(@spaces_both_sides);
- @want_right_space{@spaces_both_sides} =
- (1) x scalar(@spaces_both_sides);
- @want_left_space{@spaces_left_side} =
- (1) x scalar(@spaces_left_side);
- @want_right_space{@spaces_left_side} =
- (-1) x scalar(@spaces_left_side);
- @want_left_space{@spaces_right_side} =
- (-1) x scalar(@spaces_right_side);
- @want_right_space{@spaces_right_side} =
- (1) x scalar(@spaces_right_side);
- $want_left_space{'->'} = WS_NO;
- $want_right_space{'->'} = WS_NO;
- $want_left_space{'**'} = WS_NO;
- $want_right_space{'**'} = WS_NO;
- $want_right_space{'CORE::'} = WS_NO;
-
- # These binary_ws_rules are hardwired and have priority over the above
- # settings. It would be nice to allow adjustment by the user,
- # but it would be complicated to specify.
- #
- # hash type information must stay tightly bound
- # as in : ${xxxx}
- $binary_ws_rules{'i'}{'L'} = WS_NO;
- $binary_ws_rules{'i'}{'{'} = WS_YES;
- $binary_ws_rules{'k'}{'{'} = WS_YES;
- $binary_ws_rules{'U'}{'{'} = WS_YES;
- $binary_ws_rules{'i'}{'['} = WS_NO;
- $binary_ws_rules{'R'}{'L'} = WS_NO;
- $binary_ws_rules{'R'}{'{'} = WS_NO;
- $binary_ws_rules{'t'}{'L'} = WS_NO;
- $binary_ws_rules{'t'}{'{'} = WS_NO;
- $binary_ws_rules{'}'}{'L'} = WS_NO;
- $binary_ws_rules{'}'}{'{'} = WS_NO;
- $binary_ws_rules{'$'}{'L'} = WS_NO;
- $binary_ws_rules{'$'}{'{'} = WS_NO;
- $binary_ws_rules{'@'}{'L'} = WS_NO;
- $binary_ws_rules{'@'}{'{'} = WS_NO;
- $binary_ws_rules{'='}{'L'} = WS_YES;
- $binary_ws_rules{'J'}{'J'} = WS_YES;
-
- # the following includes ') {'
- # as in : if ( xxx ) { yyy }
- $binary_ws_rules{']'}{'L'} = WS_NO;
- $binary_ws_rules{']'}{'{'} = WS_NO;
- $binary_ws_rules{')'}{'{'} = WS_YES;
- $binary_ws_rules{')'}{'['} = WS_NO;
- $binary_ws_rules{']'}{'['} = WS_NO;
- $binary_ws_rules{']'}{'{'} = WS_NO;
- $binary_ws_rules{'}'}{'['} = WS_NO;
- $binary_ws_rules{'R'}{'['} = WS_NO;
-
- $binary_ws_rules{']'}{'++'} = WS_NO;
- $binary_ws_rules{']'}{'--'} = WS_NO;
- $binary_ws_rules{')'}{'++'} = WS_NO;
- $binary_ws_rules{')'}{'--'} = WS_NO;
-
- $binary_ws_rules{'R'}{'++'} = WS_NO;
- $binary_ws_rules{'R'}{'--'} = WS_NO;
-
- $binary_ws_rules{'i'}{'Q'} = WS_YES;
- $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
-
- # FIXME: we could to split 'i' into variables and functions
- # and have no space for functions but space for variables. For now,
- # I have a special patch in the special rules below
- $binary_ws_rules{'i'}{'('} = WS_NO;
-
- $binary_ws_rules{'w'}{'('} = WS_NO;
- $binary_ws_rules{'w'}{'{'} = WS_YES;
-} ## end BEGIN block
-
-sub set_whitespace_flags {
-
- # This routine examines each pair of nonblank tokens and
- # sets a flag indicating if white space is needed.
- #
- # $rwhitespace_flags->[$j] is a flag indicating whether a white space
- # BEFORE token $j is needed, with the following values:
- #
- # WS_NO = -1 do not want a space before token $j
- # WS_OPTIONAL= 0 optional space or $j is a whitespace
- # WS_YES = 1 want a space before token $j
- #
-
- my $self = shift;
- my $rLL = $self->{rLL};
-
- my $rwhitespace_flags = [];
-
- my ( $last_token, $last_type, $last_block_type, $last_input_line_no,
- $token, $type, $block_type, $input_line_no );
- my $j_tight_closing_paren = -1;
-
- $token = ' ';
- $type = 'b';
- $block_type = '';
- $input_line_no = 0;
- $last_token = ' ';
- $last_type = 'b';
- $last_block_type = '';
- $last_input_line_no = 0;
-
- my $jmax = @{$rLL} - 1;
-
- my ($ws);
-
- # This is some logic moved to a sub to avoid deep nesting of if stmts
- my $ws_in_container = sub {
-
- my ($j) = @_;
- my $ws = WS_YES;
- if ( $j + 1 > $jmax ) { return (WS_NO) }
-
- # Patch to count '-foo' as single token so that
- # each of $a{-foo} and $a{foo} and $a{'foo'} do
- # not get spaces with default formatting.
- my $j_here = $j;
- ++$j_here
- if ( $token eq '-'
- && $last_token eq '{'
- && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
-
- # $j_next is where a closing token should be if
- # the container has a single token
- if ( $j_here + 1 > $jmax ) { return (WS_NO) }
- my $j_next =
- ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
- ? $j_here + 2
- : $j_here + 1;
-
- if ( $j_next > $jmax ) { return WS_NO }
- my $tok_next = $rLL->[$j_next]->[_TOKEN_];
- my $type_next = $rLL->[$j_next]->[_TYPE_];
-
- # for tightness = 1, if there is just one token
- # within the matching pair, we will keep it tight
- if (
- $tok_next eq $matching_token{$last_token}
-
- # but watch out for this: [ [ ] (misc.t)
- && $last_token ne $token
-
- # double diamond is usually spaced
- && $token ne '<<>>'
-
- )
- {
-
- # remember where to put the space for the closing paren
- $j_tight_closing_paren = $j_next;
- return (WS_NO);
- }
- return (WS_YES);
- };
-
- # main loop over all tokens to define the whitespace flags
- for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
-
- my $rtokh = $rLL->[$j];
-
- # Set a default
- $rwhitespace_flags->[$j] = WS_OPTIONAL;
-
- if ( $rtokh->[_TYPE_] eq 'b' ) {
- next;
- }
-
- # set a default value, to be changed as needed
- $ws = undef;
- $last_token = $token;
- $last_type = $type;
- $last_block_type = $block_type;
- $last_input_line_no = $input_line_no;
- $token = $rtokh->[_TOKEN_];
- $type = $rtokh->[_TYPE_];
- $block_type = $rtokh->[_BLOCK_TYPE_];
- $input_line_no = $rtokh->[_LINE_INDEX_];
-
- #---------------------------------------------------------------
- # Whitespace Rules Section 1:
- # Handle space on the inside of opening braces.
- #---------------------------------------------------------------
-
- # /^[L\{\(\[]$/
- if ( $is_opening_type{$last_type} ) {
-
- $j_tight_closing_paren = -1;
-
- # let us keep empty matched braces together: () {} []
- # except for BLOCKS
- if ( $token eq $matching_token{$last_token} ) {
- if ($block_type) {
- $ws = WS_YES;
- }
- else {
- $ws = WS_NO;
- }
- }
- else {
-
- # we're considering the right of an opening brace
- # tightness = 0 means always pad inside with space
- # tightness = 1 means pad inside if "complex"
- # tightness = 2 means never pad inside with space
-
- my $tightness;
- if ( $last_type eq '{'
- && $last_token eq '{'
- && $last_block_type )
- {
- $tightness = $rOpts_block_brace_tightness;
- }
- else { $tightness = $tightness{$last_token} }
-
- #=============================================================
- # Patch for test problem fabrice_bug.pl
- # We must always avoid spaces around a bare word beginning
- # with ^ as in:
- # my $before = ${^PREMATCH};
- # Because all of the following cause an error in perl:
- # my $before = ${ ^PREMATCH };
- # my $before = ${ ^PREMATCH};
- # my $before = ${^PREMATCH };
- # So if brace tightness flag is -bt=0 we must temporarily reset
- # to bt=1. Note that here we must set tightness=1 and not 2 so
- # that the closing space
- # is also avoided (via the $j_tight_closing_paren flag in coding)
- if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
-
- #=============================================================
-
- if ( $tightness <= 0 ) {
- $ws = WS_YES;
- }
- elsif ( $tightness > 1 ) {
- $ws = WS_NO;
- }
- else {
- $ws = $ws_in_container->($j);
- }
- }
- } # end setting space flag inside opening tokens
- my $ws_1;
- $ws_1 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
-
- #---------------------------------------------------------------
- # Whitespace Rules Section 2:
- # Handle space on inside of closing brace pairs.
- #---------------------------------------------------------------
-
- # /[\}\)\]R]/
- if ( $is_closing_type{$type} ) {
-
- if ( $j == $j_tight_closing_paren ) {
-
- $j_tight_closing_paren = -1;
- $ws = WS_NO;
- }
- else {
-
- if ( !defined($ws) ) {
-
- my $tightness;
- if ( $type eq '}' && $token eq '}' && $block_type ) {
- $tightness = $rOpts_block_brace_tightness;
- }
- else { $tightness = $tightness{$token} }
-
- $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
- }
- }
- } # end setting space flag inside closing tokens
-
- my $ws_2;
- $ws_2 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
-
- #---------------------------------------------------------------
- # Whitespace Rules Section 3:
- # Use the binary rule table.
- #---------------------------------------------------------------
- if ( !defined($ws) ) {
- $ws = $binary_ws_rules{$last_type}{$type};
- }
- my $ws_3;
- $ws_3 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
-
- #---------------------------------------------------------------
- # Whitespace Rules Section 4:
- # Handle some special cases.
- #---------------------------------------------------------------
- if ( $token eq '(' ) {
-
- # This will have to be tweaked as tokenization changes.
- # We usually want a space at '} (', for example:
- # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
- #
- # But not others:
- # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
- # At present, the above & block is marked as type L/R so this case
- # won't go through here.
- if ( $last_type eq '}' ) { $ws = WS_YES }
-
- # NOTE: some older versions of Perl had occasional problems if
- # spaces are introduced between keywords or functions and opening
- # parens. So the default is not to do this except is certain
- # cases. The current Perl seems to tolerate spaces.
-
- # Space between keyword and '('
- elsif ( $last_type eq 'k' ) {
- $ws = WS_NO
- unless ( $rOpts_space_keyword_paren
- || $space_after_keyword{$last_token} );
- }
-
- # Space between function and '('
- # -----------------------------------------------------
- # 'w' and 'i' checks for something like:
- # myfun( &myfun( ->myfun(
- # -----------------------------------------------------
- elsif (( $last_type =~ /^[wUG]$/ )
- || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
- {
- $ws = WS_NO unless ($rOpts_space_function_paren);
- }
-
- # space between something like $i and ( in
- # for $i ( 0 .. 20 ) {
- # FIXME: eventually, type 'i' needs to be split into multiple
- # token types so this can be a hardwired rule.
- elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
- $ws = WS_YES;
- }
-
- # allow constant function followed by '()' to retain no space
- elsif ($last_type eq 'C'
- && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
- {
- $ws = WS_NO;
- }
- }
-
- # patch for SWITCH/CASE: make space at ']{' optional
- # since the '{' might begin a case or when block
- elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
- $ws = WS_OPTIONAL;
- }
-
- # keep space between 'sub' and '{' for anonymous sub definition
- if ( $type eq '{' ) {
- if ( $last_token eq 'sub' ) {
- $ws = WS_YES;
- }
-
- # this is needed to avoid no space in '){'
- if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
-
- # avoid any space before the brace or bracket in something like
- # @opts{'a','b',...}
- if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
- $ws = WS_NO;
- }
- }
-
- elsif ( $type eq 'i' ) {
-
- # never a space before ->
- if ( $token =~ /^\-\>/ ) {
- $ws = WS_NO;
- }
- }
-
- # retain any space between '-' and bare word
- elsif ( $type eq 'w' || $type eq 'C' ) {
- $ws = WS_OPTIONAL if $last_type eq '-';
-
- # never a space before ->
- if ( $token =~ /^\-\>/ ) {
- $ws = WS_NO;
- }
- }
-
- # retain any space between '-' and bare word
- # example: avoid space between 'USER' and '-' here:
- # $myhash{USER-NAME}='steve';
- elsif ( $type eq 'm' || $type eq '-' ) {
- $ws = WS_OPTIONAL if ( $last_type eq 'w' );
- }
-
- # always space before side comment
- elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
-
- # always preserver whatever space was used after a possible
- # filehandle (except _) or here doc operator
- if (
- $type ne '#'
- && ( ( $last_type eq 'Z' && $last_token ne '_' )
- || $last_type eq 'h' )
- )
- {
- $ws = WS_OPTIONAL;
- }
-
- # space_backslash_quote; RT #123774
- # allow a space between a backslash and single or double quote
- # to avoid fooling html formatters
- elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
- if ($rOpts_space_backslash_quote) {
- if ( $rOpts_space_backslash_quote == 1 ) {
- $ws = WS_OPTIONAL;
- }
- elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
- else { } # shouldnt happen
- }
- else {
- $ws = WS_NO;
- }
- }
-
- my $ws_4;
- $ws_4 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
-
- #---------------------------------------------------------------
- # Whitespace Rules Section 5:
- # Apply default rules not covered above.
- #---------------------------------------------------------------
-
- # If we fall through to here, look at the pre-defined hash tables for
- # the two tokens, and:
- # if (they are equal) use the common value
- # if (either is zero or undef) use the other
- # if (either is -1) use it
- # That is,
- # left vs right
- # 1 vs 1 --> 1
- # 0 vs 0 --> 0
- # -1 vs -1 --> -1
- #
- # 0 vs -1 --> -1
- # 0 vs 1 --> 1
- # 1 vs 0 --> 1
- # -1 vs 0 --> -1
- #
- # -1 vs 1 --> -1
- # 1 vs -1 --> -1
- if ( !defined($ws) ) {
- my $wl = $want_left_space{$type};
- my $wr = $want_right_space{$last_type};
- if ( !defined($wl) ) { $wl = 0 }
- if ( !defined($wr) ) { $wr = 0 }
- $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
- }
-
- if ( !defined($ws) ) {
- $ws = 0;
- write_diagnostics(
- "WS flag is undefined for tokens $last_token $token\n");
- }
-
- # Treat newline as a whitespace. Otherwise, we might combine
- # 'Send' and '-recipients' here according to the above rules:
- # my $msg = new Fax::Send
- # -recipients => $to,
- # -data => $data;
- if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
-
- if ( ( $ws == 0 )
- && $j > 0
- && $j < $jmax
- && ( $last_type !~ /^[Zh]$/ ) )
- {
-
- # If this happens, we have a non-fatal but undesirable
- # hole in the above rules which should be patched.
- write_diagnostics(
- "WS flag is zero for tokens $last_token $token\n");
- }
-
- $rwhitespace_flags->[$j] = $ws;
-
- FORMATTER_DEBUG_FLAG_WHITE && do {
- my $str = substr( $last_token, 0, 15 );
- $str .= ' ' x ( 16 - length($str) );
- if ( !defined($ws_1) ) { $ws_1 = "*" }
- if ( !defined($ws_2) ) { $ws_2 = "*" }
- if ( !defined($ws_3) ) { $ws_3 = "*" }
- if ( !defined($ws_4) ) { $ws_4 = "*" }
- print STDOUT
-"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
- };
- } ## end main loop
-
- if ( $rOpts->{'tight-secret-operators'} ) {
- new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
- }
- return $rwhitespace_flags;
-} ## end sub set_whitespace_flags
-
-sub respace_tokens {
-
- my $self = shift;
- return if $rOpts->{'indent-only'};
-
- # This routine makes all necessary changes to the tokenization after the
- # file has been read. This consists mostly of inserting and deleting spaces
- # according to the selected parameters. In a few cases non-space characters
- # are added, deleted or modified.
-
- # The old tokens are copied one-by-one, with changes, from the old
- # linear storage array to a new array.
-
- my $rLL = $self->{rLL};
- my $Klimit_old = $self->{Klimit};
- my $rlines = $self->{rlines};
- my $rpaired_to_inner_container = $self->{rpaired_to_inner_container};
-
- my $rLL_new = []; # This is the new array
- my $KK = 0;
- my $rtoken_vars;
- my $Kmax = @{$rLL} - 1;
-
- # Set the whitespace flags, which indicate the token spacing preference.
- my $rwhitespace_flags = $self->set_whitespace_flags();
-
- # we will be setting token lengths as we go
- my $cumulative_length = 0;
-
- # We also define these hash indexes giving container token array indexes
- # as a function of the container sequence numbers. For example,
- my $K_opening_container = {}; # opening [ { or (
- my $K_closing_container = {}; # closing ] } or )
- my $K_opening_ternary = {}; # opening ? of ternary
- my $K_closing_ternary = {}; # closing : of ternary
-
- # List of new K indexes of phantom semicolons
- # This will be needed if we want to undo them for iterations
- my $rK_phantom_semicolons = [];
-
- # Temporary hashes for adding semicolons
- ##my $rKfirst_new = {};
-
- # a sub to link preceding nodes forward to a new node type
- my $link_back = sub {
- my ( $Ktop, $key ) = @_;
-
- my $Kprev = $Ktop - 1;
- while ( $Kprev >= 0
- && !defined( $rLL_new->[$Kprev]->[$key] ) )
- {
- $rLL_new->[$Kprev]->[$key] = $Ktop;
- $Kprev -= 1;
- }
- };
-
- # A sub to store each token in the new array
- # All new tokens must be stored by this sub so that it can update
- # all data structures on the fly.
- my $last_nonblank_type = ';';
- my $store_token = sub {
- my ($item) = @_;
-
- # This will be the index of this item in the new array
- my $KK_new = @{$rLL_new};
-
- # check for a sequenced item (i.e., container or ?/:)
- my $type_sequence = $item->[_TYPE_SEQUENCE_];
- if ($type_sequence) {
-
- $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
-
- my $token = $item->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
-
- $K_opening_container->{$type_sequence} = $KK_new;
- }
- elsif ( $is_closing_token{$token} ) {
-
- $K_closing_container->{$type_sequence} = $KK_new;
- }
-
- # These are not yet used but could be useful
- else {
- if ( $token eq '?' ) {
- $K_opening_ternary->{$type_sequence} = $KK;
- }
- elsif ( $token eq ':' ) {
- $K_closing_ternary->{$type_sequence} = $KK;
- }
- else {
- # shouldn't happen
- print STDERR "Ugh: shouldn't happen\n";
- }
- }
- }
-
- # Save the length sum to just BEFORE this token
- $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
-
- # now set the length of this token
- my $token_length = length( $item->[_TOKEN_] );
-
- # and update the cumulative length
- $cumulative_length += $token_length;
-
- my $type = $item->[_TYPE_];
- if ( $type ne 'b' ) { $last_nonblank_type = $type }
-
- # and finally, add this item to the new array
- push @{$rLL_new}, $item;
- };
-
- my $add_phantom_semicolon = sub {
-
- my ($KK) = @_;
-
- my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
- return unless ( defined($Kp) );
-
- # we are only adding semicolons for certain block types
- my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
- return
- unless ( $ok_to_add_semicolon_for_block_type{$block_type}
- || $block_type =~ /^(sub|package)/
- || $block_type =~ /^\w+\:$/ );
-
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
-
- my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
- my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
-
- # Do not add a semicolon if...
- return
- if (
-
- # it would follow a comment (and be isolated)
- $previous_nonblank_type eq '#'
-
- # it follows a code block ( because they are not always wanted
- # there and may add clutter)
- || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
-
- # it would follow a label
- || $previous_nonblank_type eq 'J'
-
- # it would be inside a 'format' statement (and cause syntax error)
- || ( $previous_nonblank_type eq 'k'
- && $previous_nonblank_token =~ /format/ )
-
- # if it would prevent welding two containers
- || $rpaired_to_inner_container->{$type_sequence}
-
- );
-
- # We will insert an empty semicolon here as a placeholder.
- # Later, if it becomes the last token on a line, we will bring it to life.
- # The advantage of doing this is that (1) we just have to check line endings,
- # and (2) the phantom semicolon has zero width and therefore won't cause
- # needless breaks of one-line blocks.
- my $Ktop = -1;
- if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
- && $want_left_space{';'} == WS_NO )
- {
-
- # convert the blank into a semicolon..
- # be careful: we are working on the new stack top
- # on a token which has been stored.
- my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
-
- # Convert the existing blank to a semicolon
- $rLL_new->[$Ktop]->[_TOKEN_] = ''; # zero length
- $rLL_new->[$Ktop]->[_TYPE_] = ';';
- $rLL_new->[$Ktop]->[_SLEVEL_] =
- $rLL->[$KK]->[_SLEVEL_];
-
- push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
-
- # Then store a new blank
- $store_token->($rcopy);
- }
- else {
-
- # insert a new token
- my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
- $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
- $store_token->($rcopy);
- push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
- }
- };
-
- my $check_Q = sub {
-
- # Check that a quote looks okay
- # This works but needs to by sync'd with the log file output
- my ( $KK, $Kfirst ) = @_;
- my $token = $rLL->[$KK]->[_TOKEN_];
- note_embedded_tab() if ( $token =~ "\t" );
-
- my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
- return unless ( defined($Kp) );
- my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
- my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
-
- my $previous_nonblank_type_2 = 'b';
- my $previous_nonblank_token_2 = "";
- my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
- if ( defined($Kpp) ) {
- $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
- $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
- }
-
- my $Kn = $self->K_next_nonblank($KK);
- my $next_nonblank_token = "";
- if ( defined($Kn) ) {
- $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
- }
-
- my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
- my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
-
- # make note of something like '$var = s/xxx/yyy/;'
- # in case it should have been '$var =~ s/xxx/yyy/;'
- if (
- $token =~ /^(s|tr|y|m|\/)/
- && $previous_nonblank_token =~ /^(=|==|!=)$/
-
- # preceded by simple scalar
- && $previous_nonblank_type_2 eq 'i'
- && $previous_nonblank_token_2 =~ /^\$/
-
- # followed by some kind of termination
- # (but give complaint if we can not see far enough ahead)
- && $next_nonblank_token =~ /^[; \)\}]$/
-
- # scalar is not declared
- && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
- )
- {
- my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
- complain(
-"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
- );
- }
- };
-
- # Main loop over all lines of the file
- my $last_K_out;
- my $CODE_type = "";
- my $line_type = "";
- foreach my $line_of_tokens ( @{$rlines} ) {
-
- $input_line_number = $line_of_tokens->{_line_number};
- my $last_line_type = $line_type;
- $line_type = $line_of_tokens->{_line_type};
- next unless ( $line_type eq 'CODE' );
- my $last_CODE_type = $CODE_type;
- $CODE_type = $line_of_tokens->{_code_type};
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- next unless defined($Kfirst);
-
- # Check for correct sequence of token indexes...
- # An error here means that sub write_line() did not correctly
- # package the tokenized lines as it received them.
- if ( defined($last_K_out) ) {
- if ( $Kfirst != $last_K_out + 1 ) {
- Fault(
- "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
- );
- }
- }
- else {
- if ( $Kfirst != 0 ) {
- Fault("Program Bug: first K is $Kfirst but should be 0");
- }
- }
- $last_K_out = $Klast;
-
- # Handle special lines of code
- if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
-
- # CODE_types are as follows.
- # 'BL' = Blank Line
- # 'VB' = Verbatim - line goes out verbatim
- # 'FS' = Format Skipping - line goes out verbatim, no blanks
- # 'IO' = Indent Only - only indentation may be changed
- # 'NIN' = No Internal Newlines - line does not get broken
- # 'HSC'=Hanging Side Comment - fix this hanging side comment
- # 'BC'=Block Comment - an ordinary full line comment
- # 'SBC'=Static Block Comment - a block comment which does not get
- # indented
- # 'SBCX'=Static Block Comment Without Leading Space
- # 'DEL'=Delete this line
- # 'VER'=VERSION statement
- # '' or (undefined) - no restructions
-
- # For a hanging side comment we insert an empty quote before
- # the comment so that it becomes a normal side comment and
- # will be aligned by the vertical aligner
- if ( $CODE_type eq 'HSC' ) {
-
- # Safety Check: This must be a line with one token (a comment)
- my $rtoken_vars = $rLL->[$Kfirst];
- if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
-
- # Note that even if the flag 'noadd-whitespace' is set, we will
- # make an exception here and allow a blank to be inserted to push the comment
- # to the right. We can think of this as an adjustment of indentation
- # rather than whitespace between tokens. This will also prevent the hanging
- # side comment from getting converted to a block comment if whitespace
- # gets deleted, as for example with the -extrude and -mangle options.
- my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
- $store_token->($rcopy);
- $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
- $store_token->($rcopy);
- $store_token->($rtoken_vars);
- next;
- }
- else {
-
- # This line was mis-marked by sub scan_comment
- Fault(
- "Program bug. A hanging side comment has been mismarked"
- );
- }
- }
-
- # Copy tokens unchanged
- foreach my $KK ( $Kfirst .. $Klast ) {
- $store_token->( $rLL->[$KK] );
- }
- next;
- }
-
- # Handle normal line..
-
- # Insert any essential whitespace between lines
- # if last line was normal CODE
- my $type_next = $rLL->[$Kfirst]->[_TYPE_];
- my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
- my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
- if ( $last_line_type eq 'CODE'
- && $type_next ne 'b'
- && defined($Kp) )
- {
- my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
- my $type_p = $rLL_new->[$Kp]->[_TYPE_];
-
- my ( $token_pp, $type_pp );
- my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
- if ( defined($Kpp) ) {
- $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
- $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
- }
- else {
- $token_pp = ";";
- $type_pp = ';';
- }
-
- if (
- is_essential_whitespace(
- $token_pp, $type_pp, $token_p,
- $type_p, $token_next, $type_next,
- )
- )
- {
-
- # Copy this first token as blank, but use previous line number
- my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
- $rcopy->[_LINE_INDEX_] =
- $rLL_new->[-1]->[_LINE_INDEX_];
- $store_token->($rcopy);
- }
- }
-
- # loop to copy all tokens on this line, with any changes
- my $type_sequence;
- for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
- $rtoken_vars = $rLL->[$KK];
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- my $last_type_sequence = $type_sequence;
- $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
-
- # Handle a blank space ...
- if ( $type eq 'b' ) {
-
- # Delete it if not wanted by whitespace rules
- # or we are deleting all whitespace
- # Note that whitespace flag is a flag indicating whether a
- # white space BEFORE the token is needed
- next if ( $KK >= $Kmax ); # skip terminal blank
- my $Knext = $KK + 1;
- my $ws = $rwhitespace_flags->[$Knext];
- if ( $ws == -1
- || $rOpts_delete_old_whitespace )
- {
-
- # FIXME: maybe switch to using _new
- my $Kp = $self->K_previous_nonblank($KK);
- next unless defined($Kp);
- my $token_p = $rLL->[$Kp]->[_TOKEN_];
- my $type_p = $rLL->[$Kp]->[_TYPE_];
-
- my ( $token_pp, $type_pp );
-
- #my $Kpp = $K_previous_nonblank->($Kp);
- my $Kpp = $self->K_previous_nonblank($Kp);
- if ( defined($Kpp) ) {
- $token_pp = $rLL->[$Kpp]->[_TOKEN_];
- $type_pp = $rLL->[$Kpp]->[_TYPE_];
- }
- else {
- $token_pp = ";";
- $type_pp = ';';
- }
- my $token_next = $rLL->[$Knext]->[_TOKEN_];
- my $type_next = $rLL->[$Knext]->[_TYPE_];
-
- my $do_not_delete = is_essential_whitespace(
- $token_pp, $type_pp, $token_p,
- $type_p, $token_next, $type_next,
- );
-
- next unless ($do_not_delete);
- }
-
- # make it just one character if allowed
- if ($rOpts_add_whitespace) {
- $rtoken_vars->[_TOKEN_] = ' ';
- }
- $store_token->($rtoken_vars);
- next;
- }
-
- # Handle a nonblank token...
-
- # Modify certain tokens here for whitespace
- # The following is not yet done, but could be:
- # sub (x x x)
- if ( $type =~ /^[wit]$/ ) {
-
- # Examples:
- # change '$ var' to '$var' etc
- # '-> new' to '->new'
- if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
- $token =~ s/\s*//g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
-
- # Split identifiers with leading arrows, inserting blanks if
- # necessary. It is easier and safer here than in the
- # tokenizer. For example '->new' becomes two tokens, '->' and
- # 'new' with a possible blank between.
- #
- # Note: there is a related patch in sub set_whitespace_flags
- if ( $token =~ /^\-\>(.*)$/ && $1 ) {
- my $token_save = $1;
- my $type_save = $type;
-
- # store a blank to left of arrow if necessary
- my $Kprev = $self->K_previous_nonblank($KK);
- if ( defined($Kprev)
- && $rLL->[$Kprev]->[_TYPE_] ne 'b'
- && $rOpts_add_whitespace
- && $want_left_space{'->'} == WS_YES )
- {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'b', ' ' );
- $store_token->($rcopy);
- }
-
- # then store the arrow
- my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
- $store_token->($rcopy);
-
- # then reset the current token to be the remainder,
- # and reset the whitespace flag according to the arrow
- $token = $rtoken_vars->[_TOKEN_] = $token_save;
- $type = $rtoken_vars->[_TYPE_] = $type_save;
- $store_token->($rtoken_vars);
- next;
- }
-
- if ( $token =~ /$SUB_PATTERN/ ) {
- $token =~ s/\s+/ /g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
-
- # trim identifiers of trailing blanks which can occur
- # under some unusual circumstances, such as if the
- # identifier 'witch' has trailing blanks on input here:
- #
- # sub
- # witch
- # () # prototype may be on new line ...
- # ...
- if ( $type eq 'i' ) {
- $token =~ s/\s+$//g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
- }
-
- # change 'LABEL :' to 'LABEL:'
- elsif ( $type eq 'J' ) {
- $token =~ s/\s+//g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
-
- # patch to add space to something like "x10"
- # This avoids having to split this token in the pre-tokenizer
- elsif ( $type eq 'n' ) {
- if ( $token =~ /^x\d+/ ) {
- $token =~ s/x/x /;
- $rtoken_vars->[_TOKEN_] = $token;
- }
- }
-
- # check a quote for problems
- elsif ( $type eq 'Q' ) {
-
- # This is ready to go but is commented out because there is
- # still identical logic in sub break_lines.
- # $check_Q->($KK, $Kfirst);
- }
-
- # trim blanks from right of qw quotes
- # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
- elsif ( $type eq 'q' ) {
- $token =~ s/\s*$//;
- $rtoken_vars->[_TOKEN_] = $token;
- note_embedded_tab() if ( $token =~ "\t" );
- }
-
- elsif ($type_sequence) {
-
- # if ( $is_opening_token{$token} ) {
- # }
-
- if ( $is_closing_token{$token} ) {
-
- # Insert a tentative missing semicolon if the next token is
- # a closing block brace
- if (
- $type eq '}'
- && $token eq '}'
-
- # not preceded by a ';'
- && $last_nonblank_type ne ';'
-
- # and this is not a VERSION stmt (is all one line, we are not
- # inserting semicolons on one-line blocks)
- && $CODE_type ne 'VER'
-
- # and we are allowed to add semicolons
- && $rOpts->{'add-semicolons'}
- )
- {
- $add_phantom_semicolon->($KK);
- }
- }
- }
-
- # Insert any needed whitespace
- if ( @{$rLL_new}
- && $rLL_new->[-1]->[_TYPE_] ne 'b'
- && $rOpts_add_whitespace )
- {
- my $ws = $rwhitespace_flags->[$KK];
- if ( $ws == 1 ) {
-
- my $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
- $rcopy->[_LINE_INDEX_] =
- $rLL_new->[-1]->[_LINE_INDEX_];
- $store_token->($rcopy);
- }
- }
- $store_token->($rtoken_vars);
- } # End token loop
- } # End line loop
-
- # Reset memory to be the new array
- $self->{rLL} = $rLL_new;
- $self->set_rLL_max_index();
- $self->{K_opening_container} = $K_opening_container;
- $self->{K_closing_container} = $K_closing_container;
- $self->{K_opening_ternary} = $K_opening_ternary;
- $self->{K_closing_ternary} = $K_closing_ternary;
- $self->{rK_phantom_semicolons} = $rK_phantom_semicolons;
-
- # make sure the new array looks okay
- $self->check_token_array();
-
- # reset the token limits of each line
- $self->resync_lines_and_tokens();
-
- return;
-}
-
-{ # scan_comments
-
- my $Last_line_had_side_comment;
- my $In_format_skipping_section;
- my $Saw_VERSION_in_this_file;
-
- sub scan_comments {
- my $self = shift;
- my $rlines = $self->{rlines};
-
- $Last_line_had_side_comment = undef;
- $In_format_skipping_section = undef;
- $Saw_VERSION_in_this_file = undef;
-
- # Loop over all lines
- foreach my $line_of_tokens ( @{$rlines} ) {
- my $line_type = $line_of_tokens->{_line_type};
- next unless ( $line_type eq 'CODE' );
- my $CODE_type = $self->get_CODE_type($line_of_tokens);
- $line_of_tokens->{_code_type} = $CODE_type;
- }
- return;
- }
-
- sub get_CODE_type {
- my ( $self, $line_of_tokens ) = @_;
-
- # We are looking at a line of code and setting a flag to
- # describe any special processing that it requires
-
- # Possible CODE_types are as follows.
- # 'BL' = Blank Line
- # 'VB' = Verbatim - line goes out verbatim
- # 'IO' = Indent Only - line goes out unchanged except for indentation
- # 'NIN' = No Internal Newlines - line does not get broken
- # 'HSC'=Hanging Side Comment - fix this hanging side comment
- # 'BC'=Block Comment - an ordinary full line comment
- # 'SBC'=Static Block Comment - a block comment which does not get
- # indented
- # 'SBCX'=Static Block Comment Without Leading Space
- # 'DEL'=Delete this line
- # 'VER'=VERSION statement
- # '' or (undefined) - no restructions
-
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
-
- my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
- my $no_internal_newlines = 1 - $rOpts_add_newlines;
- if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
-
- # extract what we need for this line..
-
- # Global value for error messages:
- $input_line_number = $line_of_tokens->{_line_number};
-
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- my $jmax = -1;
- if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
- my $input_line = $line_of_tokens->{_line_text};
- my $in_continued_quote = my $starting_in_quote =
- $line_of_tokens->{_starting_in_quote};
- my $in_quote = $line_of_tokens->{_ending_in_quote};
- my $ending_in_quote = $in_quote;
- my $guessed_indentation_level =
- $line_of_tokens->{_guessed_indentation_level};
-
- my $is_static_block_comment = 0;
-
- # Handle a continued quote..
- if ($in_continued_quote) {
-
- # A line which is entirely a quote or pattern must go out
- # verbatim. Note: the \n is contained in $input_line.
- if ( $jmax <= 0 ) {
- if ( ( $input_line =~ "\t" ) ) {
- note_embedded_tab();
- }
- $Last_line_had_side_comment = 0;
- return 'VB';
- }
- }
-
- my $is_block_comment =
- ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
-
- # Write line verbatim if we are in a formatting skip section
- if ($In_format_skipping_section) {
- $Last_line_had_side_comment = 0;
-
- # Note: extra space appended to comment simplifies pattern matching
- if ( $is_block_comment
- && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
- /$format_skipping_pattern_end/o )
- {
- $In_format_skipping_section = 0;
- write_logfile_entry("Exiting formatting skip section\n");
- }
- return 'FS';
- }
-
- # See if we are entering a formatting skip section
- if ( $rOpts_format_skipping
- && $is_block_comment
- && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
- /$format_skipping_pattern_begin/o )
- {
- $In_format_skipping_section = 1;
- write_logfile_entry("Entering formatting skip section\n");
- $Last_line_had_side_comment = 0;
- return 'FS';
- }
-
- # ignore trailing blank tokens (they will get deleted later)
- if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
- $jmax--;
- }
-
- # Handle a blank line..
- if ( $jmax < 0 ) {
- $Last_line_had_side_comment = 0;
- return 'BL';
- }
-
- # see if this is a static block comment (starts with ## by default)
- my $is_static_block_comment_without_leading_space = 0;
- if ( $is_block_comment
- && $rOpts->{'static-block-comments'}
- && $input_line =~ /$static_block_comment_pattern/o )
- {
- $is_static_block_comment = 1;
- $is_static_block_comment_without_leading_space =
- substr( $input_line, 0, 1 ) eq '#';
- }
-
- # Check for comments which are line directives
- # Treat exactly as static block comments without leading space
- # reference: perlsyn, near end, section Plain Old Comments (Not!)
- # example: '# line 42 "new_filename.plx"'
- if (
- $is_block_comment
- && $input_line =~ /^\# \s*
- line \s+ (\d+) \s*
- (?:\s("?)([^"]+)\2)? \s*
- $/x
- )
- {
- $is_static_block_comment = 1;
- $is_static_block_comment_without_leading_space = 1;
- }
-
- # look for hanging side comment
- if (
- $is_block_comment
- && $Last_line_had_side_comment # last line had side comment
- && $input_line =~ /^\s/ # there is some leading space
- && !$is_static_block_comment # do not make static comment hanging
- && $rOpts->{'hanging-side-comments'} # user is allowing
- # hanging side comments
- # like this
- )
- {
- $Last_line_had_side_comment = 1;
- return 'HSC';
- }
-
- # remember if this line has a side comment
- $Last_line_had_side_comment =
- ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
-
- # Handle a block (full-line) comment..
- if ($is_block_comment) {
-
- if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' }
-
- # TRIM COMMENTS -- This could be turned off as a option
- $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end
-
- if ($is_static_block_comment_without_leading_space) {
- return 'SBCX';
- }
- elsif ($is_static_block_comment) {
- return 'SBC';
- }
- else {
- return 'BC';
- }
- }
-
-=pod
- # NOTE: This does not work yet. Version in print-line-of-tokens
- # is Still used until fixed
-
- # compare input/output indentation except for continuation lines
- # (because they have an unknown amount of initial blank space)
- # and lines which are quotes (because they may have been outdented)
- # Note: this test is placed here because we know the continuation flag
- # at this point, which allows us to avoid non-meaningful checks.
- my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_];
- compare_indentation_levels( $guessed_indentation_level,
- $structural_indentation_level )
- unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0
- || $guessed_indentation_level == 0
- && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' );
-=cut
-
- # Patch needed for MakeMaker. Do not break a statement
- # in which $VERSION may be calculated. See MakeMaker.pm;
- # this is based on the coding in it.
- # The first line of a file that matches this will be eval'd:
- # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
- # Examples:
- # *VERSION = \'1.01';
- # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
- # We will pass such a line straight through without breaking
- # it unless -npvl is used.
-
- # Patch for problem reported in RT #81866, where files
- # had been flattened into a single line and couldn't be
- # tidied without -npvl. There are two parts to this patch:
- # First, it is not done for a really long line (80 tokens for now).
- # Second, we will only allow up to one semicolon
- # before the VERSION. We need to allow at least one semicolon
- # for statements like this:
- # require Exporter; our $VERSION = $Exporter::VERSION;
- # where both statements must be on a single line for MakeMaker
-
- my $is_VERSION_statement = 0;
- if ( !$Saw_VERSION_in_this_file
- && $jmax < 80
- && $input_line =~
- /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
- {
- $Saw_VERSION_in_this_file = 1;
- write_logfile_entry("passing VERSION line; -npvl deactivates\n");
- $CODE_type = 'VER';
- }
- return $CODE_type;
- }
-}
-
-sub find_nested_pairs {
- my $self = shift;
-
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
-
- # We define an array of pairs of nested containers
- my @nested_pairs;
-
- # We also set the following hash values to identify container pairs for
- # which the opening and closing tokens are adjacent in the token stream:
- # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
- # $seqno_in are the seqence numbers of the outer and inner containers of
- # the pair We need these later to decide if we can insert a missing
- # semicolon
- my $rpaired_to_inner_container = {};
-
- # This local hash remembers if an outer container has a close following
- # inner container;
- # The key is the outer sequence number
- # The value is the token_hash of the inner container
-
- my %has_close_following_opening;
-
- # Names of calling routines can either be marked as 'i' or 'w',
- # and they may invoke a sub call with an '->'. We will consider
- # any consecutive string of such types as a single unit when making
- # weld decisions. We also allow a leading !
- my $is_name_type = {
- 'i' => 1,
- 'w' => 1,
- 'U' => 1,
- '->' => 1,
- '!' => 1,
- };
-
- my $is_name = sub {
- my $type = shift;
- return $type && $is_name_type->{$type};
- };
-
- my $last_container;
- my $last_last_container;
- my $last_nonblank_token_vars;
- my $last_count;
-
- my $nonblank_token_count = 0;
-
- # loop over all tokens
- foreach my $rtoken_vars ( @{$rLL} ) {
-
- my $type = $rtoken_vars->[_TYPE_];
-
- next if ( $type eq 'b' );
-
- # long identifier-like items are counted as a single item
- $nonblank_token_count++
- unless ( $is_name->($type)
- && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
-
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ($type_sequence) {
-
- my $token = $rtoken_vars->[_TOKEN_];
-
- if ( $is_opening_token{$token} ) {
-
- # following previous opening token ...
- if ( $last_container
- && $is_opening_token{ $last_container->[_TOKEN_] } )
- {
-
- # adjacent to this one
- my $tok_diff = $nonblank_token_count - $last_count;
-
- my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
-
- if ( $tok_diff == 1
- || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
- {
-
- # remember this pair...
- my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
- my $inner_seqno = $type_sequence;
- $has_close_following_opening{$outer_seqno} =
- $rtoken_vars;
- }
- }
- }
-
- elsif ( $is_closing_token{$token} ) {
-
- # if the corresponding opening token had an adjacent opening
- if ( $has_close_following_opening{$type_sequence}
- && $is_closing_token{ $last_container->[_TOKEN_] }
- && $has_close_following_opening{$type_sequence}
- ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
- {
-
- # The closing weld tokens must be adjacent
- # NOTE: so intermediate commas and semicolons
- # can currently block a weld. This is something
- # that could be fixed in the future by including
- # a flag to delete un-necessary commas and semicolons.
- my $tok_diff = $nonblank_token_count - $last_count;
-
- if ( $tok_diff == 1 ) {
-
- # This is a closely nested pair ..
- my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
- my $outer_seqno = $type_sequence;
- $rpaired_to_inner_container->{$outer_seqno} =
- $inner_seqno;
-
- push @nested_pairs, [ $inner_seqno, $outer_seqno ];
- }
- }
- }
-
- $last_last_container = $last_container;
- $last_container = $rtoken_vars;
- $last_count = $nonblank_token_count;
- }
- $last_nonblank_token_vars = $rtoken_vars;
- }
- $self->{rnested_pairs} = \@nested_pairs;
- $self->{rpaired_to_inner_container} = $rpaired_to_inner_container;
- return;
-}
-
-sub dump_tokens {
-
- # a debug routine, not normally used
- my ( $self, $msg ) = @_;
- my $rLL = $self->{rLL};
- my $nvars = @{$rLL};
- print STDERR "$msg\n";
- print STDERR "ntokens=$nvars\n";
- print STDERR "K\t_TOKEN_\t_TYPE_\n";
- my $K = 0;
- foreach my $item ( @{$rLL} ) {
- print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
- $K++;
- }
-}
-
-sub K_next_nonblank {
- my ( $self, $KK, $rLL ) = @_;
-
- # return the index K of the next nonblank token
- return unless ( defined($KK) && $KK >= 0 );
- $rLL = $self->{rLL} unless ( defined($rLL) );
- my $Num = @{$rLL};
- my $Knnb = $KK + 1;
- while ( $Knnb < $Num ) {
- if ( !defined( $rLL->[$Knnb] ) ) {
- Fault("Undefined entry for k=$Knnb");
- }
- if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
- $Knnb++;
- }
- return;
-}
-
-sub K_previous_nonblank {
-
- # return index of previous nonblank token before item K
- # Call with $KK=undef to start search at the top of the array
- my ( $self, $KK, $rLL ) = @_;
- $rLL = $self->{rLL} unless ( defined($rLL) );
- my $Num = @{$rLL};
- if ( !defined($KK) ) { $KK = $Num }
- elsif ( $KK > $Num ) {
-
- # The caller should make the first call with KK_new=undef to
- # avoid this error
- Fault(
-"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
- );
- }
- my $Kpnb = $KK - 1;
- while ( $Kpnb >= 0 ) {
- if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
- $Kpnb--;
- }
- return;
-}
-
-sub weld_containers {
-
- # do any welding operations
- my $self = shift;
-
- # initialize weld length hashes needed later for checking line lengths
- # TODO: These should eventually be stored in $self rather than be package vars
- %weld_len_left_closing = ();
- %weld_len_right_closing = ();
- %weld_len_left_opening = ();
- %weld_len_right_opening = ();
-
- return if ( $rOpts->{'indent-only'} );
- return unless ($rOpts_add_newlines);
-
- $self->weld_nested_containers()
- if $rOpts->{'weld-nested-containers'};
-
- # Note that these two calls are order-dependent.
- # sub weld_nested_containers() must be called before sub
- # weld_cuddled_blocks(). This is because it is more complex and could
- # overwrite the %weld_len_... hash values written by weld_cuddled_blocks().
- # sub weld_cuddled_blocks(), on the other hand, is much simpler and will
- # not overwrite the values written by weld_nested_containers. But
- # note that weld_nested_containers() changes the _LEVEL_ values, so
- # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
-
- # Here is a good test case to Be sure that both cuddling and welding
- # are working and not interfering with each other:
-
- # perltidy -wn -cb -cbl='if-elsif-else'
-
- # if ($BOLD_MATH) { (
- # $labels, $comment,
- # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
- # ) } else { (
- # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
- # $after
- # ) }
-
- $self->weld_cuddled_blocks()
- if $rOpts->{'cuddled-blocks'};
-
- return;
-}
-
-sub weld_cuddled_blocks {
- my $self = shift;
-
- # This routine implements the -cb flag by finding the appropriate
- # closing and opening block braces and welding them together.
-
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
- my $rbreak_container = $self->{rbreak_container};
-
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
-
- my $length_to_opening_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_opening_container->{$seqno};
- my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
- my $length_to_closing_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_closing_container->{$seqno};
- my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
-
- my $is_broken_block = sub {
-
- # a block is broken if the input line numbers of the braces differ
- # we can only cuddle between broken blocks
- my ($seqno) = @_;
- my $K_opening = $K_opening_container->{$seqno};
- return unless ( defined($K_opening) );
- my $K_closing = $K_closing_container->{$seqno};
- return unless ( defined($K_closing) );
- return $rbreak_container->{$seqno}
- || $rLL->[$K_closing]->[_LINE_INDEX_] !=
- $rLL->[$K_opening]->[_LINE_INDEX_];
- };
-
- # A stack to remember open chains at all levels:
- # $in_chain[$level] = [$chain_type, $type_sequence];
- my @in_chain;
- my $CBO = $rOpts->{'cuddled-break-option'};
-
- # loop over structure items to find cuddled pairs
- my $level = 0;
- my $KK = 0;
- while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) {
- my $rtoken_vars = $rLL->[$KK];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$type_sequence ) {
- Fault("sequence = $type_sequence not defined");
- }
-
- # We use the original levels because they get changed by sub
- # 'weld_nested_containers'. So if this were to be called before that
- # routine, the levels would be wrong and things would go bad.
- my $last_level = $level;
- $level = $rtoken_vars->[_LEVEL_TRUE_];
-
- if ( $level < $last_level ) { $in_chain[$last_level] = undef }
- elsif ( $level > $last_level ) { $in_chain[$level] = undef }
-
- # We are only looking at code blocks
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- next unless ( $type eq $token );
-
- if ( $token eq '{' ) {
-
- my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- if ( !$block_type ) {
-
- # patch for unrecognized block types which may not be labeled
- my $Kp = $self->K_previous_nonblank($KK);
- while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
- $Kp = $self->K_previous_nonblank($Kp);
- }
- next unless $Kp;
- $block_type = $rLL->[$Kp]->[_TOKEN_];
- }
- if ( $in_chain[$level] ) {
-
- # we are in a chain and are at an opening block brace.
- # See if we are welding this opening brace with the previous
- # block brace. Get their identification numbers:
- my $closing_seqno = $in_chain[$level]->[1];
- my $opening_seqno = $type_sequence;
-
- # The preceding block must be on multiple lines so that its
- # closing brace will start a new line.
- if ( !$is_broken_block->($closing_seqno) ) {
- next unless ( $CBO == 2 );
- $rbreak_container->{$closing_seqno} = 1;
- }
-
- # we will let the trailing block be either broken or intact
- ## && $is_broken_block->($opening_seqno);
-
- # We can weld the closing brace to its following word ..
- my $Ko = $K_closing_container->{$closing_seqno};
- my $Kon = $self->K_next_nonblank($Ko);
-
- # ..unless it is a comment
- if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) {
- my $dlen =
- $rLL->[ $Kon + 1 ]->[_CUMULATIVE_LENGTH_] -
- $rLL->[$Ko]->[_CUMULATIVE_LENGTH_];
- $weld_len_right_closing{$closing_seqno} = $dlen;
-
- # Set flag that we want to break the next container
- # so that the cuddled line is balanced.
- $rbreak_container->{$opening_seqno} = 1
- if ($CBO);
- }
-
- }
- else {
-
- # We are not in a chain. Start a new chain if we see the
- # starting block type.
- if ( $rcuddled_block_types->{$block_type} ) {
- $in_chain[$level] = [ $block_type, $type_sequence ];
- }
- else {
- $block_type = '*';
- $in_chain[$level] = [ $block_type, $type_sequence ];
- }
- }
- }
- elsif ( $token eq '}' ) {
- if ( $in_chain[$level] ) {
-
- # We are in a chain at a closing brace. See if this chain
- # continues..
- my $Knn = $self->K_next_nonblank($KK);
-
- # skip past comments
- while ( $Knn && $rLL->[$Knn]->[_TYPE_] eq '#' ) {
- $Knn = $self->K_next_nonblank($Knn);
- }
- next unless $Knn;
-
- my $chain_type = $in_chain[$level]->[0];
- my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
- if (
- $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
- )
- {
-
- # Note that we do not weld yet because we must wait until
- # we we are sure that an opening brace for this follows.
- $in_chain[$level]->[1] = $type_sequence;
- }
- else { $in_chain[$level] = undef }
- }
- }
- }
-
- return;
-}
-
-sub weld_nested_containers {
- my $self = shift;
-
- # This routine implements the -wn flag by "welding together"
- # the nested closing and opening tokens which were previously
- # identified by sub 'find_nested_pairs'. "welding" simply
- # involves setting certain hash values which will be checked
- # later during formatting.
-
- my $rLL = $self->{rLL};
- my $Klimit = $self->get_rLL_max_index();
- my $rnested_pairs = $self->{rnested_pairs};
- my $rlines = $self->{rlines};
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
-
- # Return unless there are nested pairs to weld
- return unless defined($rnested_pairs) && @{$rnested_pairs};
-
- # This array will hold the sequence numbers of the tokens to be welded.
- my @welds;
-
- # Variables needed for estimating line lengths
- my $starting_indent;
- my $starting_lentot;
-
- # A tolerance to the length for length estimates. In some rare cases
- # this can avoid problems where a final weld slightly exceeds the
- # line length and gets broken in a bad spot.
- my $length_tol = 1;
-
- my $excess_length_to = sub {
- my ($rtoken_hash) = @_;
-
- # Estimate the length from the line start to a given token
- my $length = $rtoken_hash->[_CUMULATIVE_LENGTH_] - $starting_lentot;
-
- my $excess_length =
- $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
- return ($excess_length);
- };
- my $length_to_opening_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_opening_container->{$seqno};
- my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
- my $length_to_closing_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_closing_container->{$seqno};
- my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
-
- # Abbreviations:
- # _oo=outer opening, i.e. first of { {
- # _io=inner opening, i.e. second of { {
- # _oc=outer closing, i.e. second of } {
- # _ic=inner closing, i.e. first of } }
-
- my $previous_pair;
-
- # We are working from outermost to innermost pairs so that
- # level changes will be complete when we arrive at the inner pairs.
-
- while ( my $item = pop( @{$rnested_pairs} ) ) {
- my ( $inner_seqno, $outer_seqno ) = @{$item};
-
- my $Kouter_opening = $K_opening_container->{$outer_seqno};
- my $Kinner_opening = $K_opening_container->{$inner_seqno};
- my $Kouter_closing = $K_closing_container->{$outer_seqno};
- my $Kinner_closing = $K_closing_container->{$inner_seqno};
-
- my $outer_opening = $rLL->[$Kouter_opening];
- my $inner_opening = $rLL->[$Kinner_opening];
- my $outer_closing = $rLL->[$Kouter_closing];
- my $inner_closing = $rLL->[$Kinner_closing];
-
- my $iline_oo = $outer_opening->[_LINE_INDEX_];
- my $iline_io = $inner_opening->[_LINE_INDEX_];
-
- # Set flag saying if this pair starts a new weld
- my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
-
- # Set flag saying if this pair is adjacent to the previous nesting pair
- # (even if previous pair was rejected as a weld)
- my $touch_previous_pair =
- defined($previous_pair) && $outer_seqno == $previous_pair->[0];
- $previous_pair = $item;
-
- # Set a flag if we should not weld. It sometimes looks best not to weld
- # when the opening and closing tokens are very close. However, there
- # is a danger that we will create a "blinker", which oscillates between
- # two semi-stable states, if we do not weld. So the rules for
- # not welding have to be carefully defined and tested.
- my $do_not_weld;
- if ( !$touch_previous_pair ) {
-
- # If this pair is not adjacent to the previous pair (skipped or
- # not), then measure lengths from the start of line of oo
-
- my $rK_range = $rlines->[$iline_oo]->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- $starting_lentot = $rLL->[$Kfirst]->[_CUMULATIVE_LENGTH_];
- $starting_indent = 0;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $rLL->[$Kfirst]->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
-
- # DO-NOT-WELD RULE 1:
- # Do not weld something that looks like the start of a two-line
- # function call, like this:
- # $trans->add_transformation(
- # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
- # We will look for a semicolon after the closing paren.
-
- # We want to weld something complex, like this though
- # my $compass = uc( opposite_direction( line_to_canvas_direction(
- # @{ $coords[0] }, @{ $coords[1] } ) ) );
- # Otherwise we will get a 'blinker'
-
- my $iline_oc = $outer_closing->[_LINE_INDEX_];
- if ( $iline_oc <= $iline_oo + 1 ) {
-
- # Look for following semicolon...
- my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
- my $next_nonblank_type =
- defined($Knext_nonblank)
- ? $rLL->[$Knext_nonblank]->[_TYPE_]
- : 'b';
- if ( $next_nonblank_type eq ';' ) {
-
- # Then do not weld if no other containers between inner
- # opening and closing.
- my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
- if ( $Knext_seq_item == $Kinner_closing ) {
- $do_not_weld ||= 1;
- }
- }
- }
- }
-
- my $iline_ic = $inner_closing->[_LINE_INDEX_];
-
- # DO-NOT-WELD RULE 2:
- # Do not weld an opening paren to an inner one line brace block
- # We will just use old line numbers for this test and require
- # iterations if necessary for convergence
-
- # For example, otherwise we could cause the opening paren
- # in the following example to separate from the caller name
- # as here:
-
- # $_[0]->code_handler
- # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
-
- # Here is another example where we do not want to weld:
- # $wrapped->add_around_modifier(
- # sub { push @tracelog => 'around 1'; $_[0]->(); } );
-
- # If the one line sub block gets broken due to length or by the
- # user, then we can weld. The result will then be:
- # $wrapped->add_around_modifier( sub {
- # push @tracelog => 'around 1';
- # $_[0]->();
- # } );
-
- if ( $iline_ic == $iline_io ) {
-
- my $token_oo = $outer_opening->[_TOKEN_];
- my $block_type_io = $inner_opening->[_BLOCK_TYPE_];
- my $token_io = $inner_opening->[_TOKEN_];
- $do_not_weld ||= $token_oo eq '(' && $token_io eq '{';
- }
-
- # DO-NOT-WELD RULE 3:
- # Do not weld if this makes our line too long
- $do_not_weld ||= $excess_length_to->($inner_opening) > 0;
-
- if ($do_not_weld) {
-
- # After neglecting a pair, we start measuring from start of point io
- $starting_lentot = $inner_opening->[_CUMULATIVE_LENGTH_];
- $starting_indent = 0;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $inner_opening->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
-
- # Normally, a broken pair should not decrease indentation of
- # intermediate tokens:
- ## if ( $last_pair_broken ) { next }
- # However, for long strings of welded tokens, such as '{{{{{{...'
- # we will allow broken pairs to also remove indentation.
- # This will keep very long strings of opening and closing
- # braces from marching off to the right. We will do this if the
- # number of tokens in a weld before the broken weld is 4 or more.
- # This rule will mainly be needed for test scripts, since typical
- # welds have fewer than about 4 welded tokens.
- if ( !@welds || @{ $welds[-1] } < 4 ) { next }
- }
-
- # otherwise start new weld ...
- elsif ($starting_new_weld) {
- push @welds, $item;
- }
-
- # ... or extend current weld
- else {
- unshift @{ $welds[-1] }, $inner_seqno;
- }
-
- ########################################################################
- # After welding, reduce the indentation level if all intermediate tokens
- ########################################################################
-
- my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
- if ( $dlevel != 0 ) {
- my $Kstart = $Kinner_opening;
- my $Kstop = $Kinner_closing;
- for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
- $rLL->[$KK]->[_LEVEL_] += $dlevel;
- }
- }
- }
-
- #####################################################
- # Define weld lengths needed later to set line breaks
- #####################################################
- foreach my $item (@welds) {
-
- # sweep from inner to outer
-
- my $inner_seqno;
- my $len_close = 0;
- my $len_open = 0;
- foreach my $outer_seqno ( @{$item} ) {
- if ($inner_seqno) {
-
- my $dlen_opening =
- $length_to_opening_seqno->($inner_seqno) -
- $length_to_opening_seqno->($outer_seqno);
-
- my $dlen_closing =
- $length_to_closing_seqno->($outer_seqno) -
- $length_to_closing_seqno->($inner_seqno);
-
- $len_open += $dlen_opening;
- $len_close += $dlen_closing;
-
- }
-
- $weld_len_left_closing{$outer_seqno} = $len_close;
- $weld_len_right_opening{$outer_seqno} = $len_open;
-
- $inner_seqno = $outer_seqno;
- }
-
- # sweep from outer to inner
- foreach my $seqno ( reverse @{$item} ) {
- $weld_len_right_closing{$seqno} =
- $len_close - $weld_len_left_closing{$seqno};
- $weld_len_left_opening{$seqno} =
- $len_open - $weld_len_right_opening{$seqno};
- }
- }
-
- #####################################
- # DEBUG
- #####################################
- if (0) {
- my $count = 0;
- local $" = ')(';
- foreach my $weld (@welds) {
- print "\nWeld number $count has seq: (@{$weld})\n";
- foreach my $seq ( @{$weld} ) {
- print <<EOM;
- seq=$seq
- left_opening=$weld_len_left_opening{$seq};
- right_opening=$weld_len_right_opening{$seq};
- left_closing=$weld_len_left_closing{$seq};
- right_closing=$weld_len_right_closing{$seq};
-EOM
- }
-
- $count++;
- }
- }
- return;
-}
-
-sub weld_len_left {
-
- my ( $seqno, $type_or_tok ) = @_;
-
- # Given the sequence number of a token, and the token or its type,
- # return the length of any weld to its left
-
- my $weld_len;
- if ($seqno) {
- if ( $is_closing_type{$type_or_tok} ) {
- $weld_len = $weld_len_left_closing{$seqno};
- }
- elsif ( $is_opening_type{$type_or_tok} ) {
- $weld_len = $weld_len_left_opening{$seqno};
- }
- }
- if ( !defined($weld_len) ) { $weld_len = 0 }
- return $weld_len;
-}
-
-sub weld_len_right {
-
- my ( $seqno, $type_or_tok ) = @_;
-
- # Given the sequence number of a token, and the token or its type,
- # return the length of any weld to its right
-
- my $weld_len;
- if ($seqno) {
- if ( $is_closing_type{$type_or_tok} ) {
- $weld_len = $weld_len_right_closing{$seqno};
- }
- elsif ( $is_opening_type{$type_or_tok} ) {
- $weld_len = $weld_len_right_opening{$seqno};
- }
- }
- if ( !defined($weld_len) ) { $weld_len = 0 }
- return $weld_len;
-}
-
-sub weld_len_left_to_go {
- my ($i) = @_;
-
- # Given the index of a token in the 'to_go' array
- # return the length of any weld to its left
- return if ( $i < 0 );
- my $weld_len =
- weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
- return $weld_len;
-}
-
-sub weld_len_right_to_go {
- my ($i) = @_;
-
- # Given the index of a token in the 'to_go' array
- # return the length of any weld to its right
- return if ( $i < 0 );
- if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
- my $weld_len =
- weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
- return $weld_len;
-}
-
-sub link_sequence_items {
-
- # This has been merged into 'respace_tokens' but retained for reference
- my $self = shift;
- my $rlines = $self->{rlines};
- my $rLL = $self->{rLL};
-
- # We walk the token list and make links to the next sequence item.
- # We also define these hashes to container tokens using sequence number as
- # the key:
- my $K_opening_container = {}; # opening [ { or (
- my $K_closing_container = {}; # closing ] } or )
- my $K_opening_ternary = {}; # opening ? of ternary
- my $K_closing_ternary = {}; # closing : of ternary
-
- # sub to link preceding nodes forward to a new node type
- my $link_back = sub {
- my ( $Ktop, $key ) = @_;
-
- my $Kprev = $Ktop - 1;
- while ( $Kprev >= 0
- && !defined( $rLL->[$Kprev]->[$key] ) )
- {
- $rLL->[$Kprev]->[$key] = $Ktop;
- $Kprev -= 1;
- }
- };
-
- for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
-
- $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef;
-
- my $type = $rLL->[$KK]->[_TYPE_];
-
- next if ( $type eq 'b' );
-
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- if ($type_sequence) {
-
- $link_back->( $KK, _KNEXT_SEQ_ITEM_ );
-
- my $token = $rLL->[$KK]->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
-
- $K_opening_container->{$type_sequence} = $KK;
- }
- elsif ( $is_closing_token{$token} ) {
-
- $K_closing_container->{$type_sequence} = $KK;
- }
-
- # These are not yet used but could be useful
- else {
- if ( $token eq '?' ) {
- $K_opening_ternary->{$type_sequence} = $KK;
- }
- elsif ( $token eq ':' ) {
- $K_closing_ternary->{$type_sequence} = $KK;
- }
- else {
- Fault(<<EOM);
-Unknown sequenced token type '$type'. Expecting one of '{[(?:)]}'
-EOM
- }
- }
- }
- }
-
- $self->{K_opening_container} = $K_opening_container;
- $self->{K_closing_container} = $K_closing_container;
- $self->{K_opening_ternary} = $K_opening_ternary;
- $self->{K_closing_ternary} = $K_closing_ternary;
- return;
-}
-
-sub sum_token_lengths {
- my $self = shift;
-
- # This has been merged into 'respace_tokens' but retained for reference
- my $rLL = $self->{rLL};
- my $cumulative_length = 0;
- for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
-
- # Save the length sum to just BEFORE this token
- $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length;
-
- # now set the length of this token
- my $token_length = length( $rLL->[$KK]->[_TOKEN_] );
-
- $cumulative_length += $token_length;
- }
- return;
-}
-
-sub resync_lines_and_tokens {
-
- my $self = shift;
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
- my $rlines = $self->{rlines};
-
- # Re-construct the arrays of tokens associated with the original input lines
- # since they have probably changed due to inserting and deleting blanks
- # and a few other tokens.
-
- my $Kmax = -1;
-
- # This is the next token and its line index:
- my $Knext = 0;
- my $inext;
- if ( defined($rLL) && @{$rLL} ) {
- $Kmax = @{$rLL} - 1;
- $inext = $rLL->[$Knext]->[_LINE_INDEX_];
- }
-
- my $get_inext = sub {
- if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
- else {
- $inext = $rLL->[$Knext]->[_LINE_INDEX_];
- }
- return $inext;
- };
-
- # Remember the most recently output token index
- my $Klast_out;
-
- my $iline = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $iline++;
- my $line_type = $line_of_tokens->{_line_type};
- if ( $line_type eq 'CODE' ) {
-
- my @K_array;
- my $rK_range;
- $inext = $get_inext->();
- while ( defined($inext) && $inext <= $iline ) {
- push @{K_array}, $Knext;
- $Knext += 1;
- $inext = $get_inext->();
- }
-
- # Delete any terminal blank token
- if (@K_array) {
- if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
- pop @K_array;
- }
- }
-
- # Define the range of K indexes for the line:
- # $Kfirst = index of first token on line
- # $Klast_out = index of last token on line
- my ( $Kfirst, $Klast );
- if (@K_array) {
- $Kfirst = $K_array[0];
- $Klast = $K_array[-1];
- $Klast_out = $Klast;
- }
-
- # It is only safe to trim the actual line text if the input
- # line had a terminal blank token. Otherwise, we may be
- # in a quote.
- if ( $line_of_tokens->{_ended_in_blank_token} ) {
- $line_of_tokens->{_line_text} =~ s/\s+$//;
- }
- $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
- }
- }
-
- # There shouldn't be any nodes beyond the last one unless we start
- # allowing 'link_after' calls
- if ( defined($inext) ) {
-
- Fault("unexpected tokens at end of file when reconstructing lines");
- }
-
- return;
-}
-
-sub dump_verbatim {
- my $self = shift;
- my $rlines = $self->{rlines};
- foreach my $line ( @{$rlines} ) {
- my $input_line = $line->{_line_text};
- $self->write_unindented_line($input_line);
- }
- return;
-}
-
-sub finish_formatting {
-
- my ( $self, $severe_error ) = @_;
-
- # The file has been tokenized and is ready to be formatted.
- # All of the relevant data is stored in $self, ready to go.
-
- # output file verbatim if severe error or no formatting requested
- if ( $severe_error || $rOpts->{notidy} ) {
- $self->dump_verbatim();
- $self->wrapup();
- return;
- }
-
- # Make a pass through the lines, looking at lines of CODE and identifying
- # special processing needs, such format skipping sections marked by
- # special comments
- $self->scan_comments();
-
- # Find nested pairs of container tokens for any welding. This information
- # is also needed for adding semicolons, so it is split apart from the
- # welding step.
- $self->find_nested_pairs();
-
- # Make sure everything looks good
- $self->check_line_hashes();
-
- # Future: Place to Begin future Iteration Loop
- # foreach my $it_count(1..$maxit) {
-
- # Future: We must reset some things after the first iteration.
- # This includes:
- # - resetting levels if there was any welding
- # - resetting any phantom semicolons
- # - dealing with any line numbering issues so we can relate final lines
- # line numbers with input line numbers.
- #
- # If ($it_count>1) {
- # Copy {level_raw} to [_LEVEL_] if ($it_count>1)
- # Renumber lines
- # }
-
- # Make a pass through all tokens, adding or deleting any whitespace as
- # required. Also make any other changes, such as adding semicolons.
- # All token changes must be made here so that the token data structure
- # remains fixed for the rest of this iteration.
- $self->respace_tokens();
-
- # Implement any welding needed for the -wn or -cb options
- $self->weld_containers();
-
- # Finishes formatting and write the result to the line sink.
- # Eventually this call should just change the 'rlines' data according to the
- # new line breaks and then return so that we can do an internal iteration
- # before continuing with the next stages of formatting.
- $self->break_lines();
-
- ############################################################
- # A possible future decomposition of 'break_lines()' follows.
- # Benefits:
- # - allow perltidy to do an internal iteration which eliminates
- # many unnecessary steps, such as re-parsing and vertical alignment.
- # This will allow iterations to be automatic.
- # - consolidate all length calculations to allow utf8 alignment
- ############################################################
-
- # Future: Check for convergence of beginning tokens on CODE lines
-
- # Future: End of Iteration Loop
-
- # Future: add_padding($rargs);
-
- # Future: add_closing_side_comments($rargs);
-
- # Future: vertical_alignment($rargs);
-
- # Future: output results
-
- # A final routine to tie up any loose ends
- $self->wrapup();
- return;
-}
-
-sub create_one_line_block {
- ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) =
- @_;
- return;
-}
-
-sub destroy_one_line_block {
- $index_start_one_line_block = UNDEFINED_INDEX;
- $semicolons_before_block_self_destruct = 0;
- return;
-}
-
-sub leading_spaces_to_go {
-
- # return the number of indentation spaces for a token in the output stream;
- # these were previously stored by 'set_leading_whitespace'.
-
- my $ii = shift;
- if ( $ii < 0 ) { $ii = 0 }
- return get_spaces( $leading_spaces_to_go[$ii] );
-
-}
-
-sub get_spaces {
-
- # return the number of leading spaces associated with an indentation
- # variable $indentation is either a constant number of spaces or an object
- # with a get_spaces method.
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_spaces() : $indentation;
-}
-
-sub get_recoverable_spaces {
-
- # return the number of spaces (+ means shift right, - means shift left)
- # that we would like to shift a group of lines with the same indentation
- # to get them to line up with their opening parens
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
-}
-
-sub get_available_spaces_to_go {
-
- my $ii = shift;
- my $item = $leading_spaces_to_go[$ii];
-
- # return the number of available leading spaces associated with an
- # indentation variable. $indentation is either a constant number of
- # spaces or an object with a get_available_spaces method.
- return ref($item) ? $item->get_available_spaces() : 0;
-}
-
-sub new_lp_indentation_item {
-
- # this is an interface to the IndentationItem class
- my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
-
- # A negative level implies not to store the item in the item_list
- my $index = 0;
- if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
-
- my $item = Perl::Tidy::IndentationItem->new(
- $spaces, $level,
- $ci_level, $available_spaces,
- $index, $gnu_sequence_number,
- $align_paren, $max_gnu_stack_index,
- $line_start_index_to_go,
- );
-
- if ( $level >= 0 ) {
- $gnu_item_list[$max_gnu_item_index] = $item;
- }
-
- return $item;
-}
-
-sub set_leading_whitespace {
-
- # This routine defines leading whitespace
- # given: the level and continuation_level of a token,
- # define: space count of leading string which would apply if it
- # were the first token of a new line.
-
- my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
-
- # Adjust levels if necessary to recycle whitespace:
- # given $level_abs, the absolute level
- # define $level, a possibly reduced level for whitespace
- my $level = $level_abs;
- if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
- if ( $level_abs < $whitespace_last_level ) {
- pop(@whitespace_level_stack);
- }
- if ( !@whitespace_level_stack ) {
- push @whitespace_level_stack, $level_abs;
- }
- elsif ( $level_abs > $whitespace_last_level ) {
- $level = $whitespace_level_stack[-1] +
- ( $level_abs - $whitespace_last_level );
-
- if (
- # 1 Try to break at a block brace
- (
- $level > $rOpts_whitespace_cycle
- && $last_nonblank_type eq '{'
- && $last_nonblank_token eq '{'
- )
-
- # 2 Then either a brace or bracket
- || ( $level > $rOpts_whitespace_cycle + 1
- && $last_nonblank_token =~ /^[\{\[]$/ )
-
- # 3 Then a paren too
- || $level > $rOpts_whitespace_cycle + 2
- )
- {
- $level = 1;
- }
- push @whitespace_level_stack, $level;
- }
- $level = $whitespace_level_stack[-1];
- }
- $whitespace_last_level = $level_abs;
-
- # modify for -bli, which adds one continuation indentation for
- # opening braces
- if ( $rOpts_brace_left_and_indent
- && $max_index_to_go == 0
- && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
- {
- $ci_level++;
- }
-
- # patch to avoid trouble when input file has negative indentation.
- # other logic should catch this error.
- if ( $level < 0 ) { $level = 0 }
-
- #-------------------------------------------
- # handle the standard indentation scheme
- #-------------------------------------------
- unless ($rOpts_line_up_parentheses) {
- my $space_count =
- $ci_level * $rOpts_continuation_indentation +
- $level * $rOpts_indent_columns;
- my $ci_spaces =
- ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
-
- if ($in_continued_quote) {
- $space_count = 0;
- $ci_spaces = 0;
- }
- $leading_spaces_to_go[$max_index_to_go] = $space_count;
- $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
- return;
- }
-
- #-------------------------------------------------------------
- # handle case of -lp indentation..
- #-------------------------------------------------------------
-
- # The continued_quote flag means that this is the first token of a
- # line, and it is the continuation of some kind of multi-line quote
- # or pattern. It requires special treatment because it must have no
- # added leading whitespace. So we create a special indentation item
- # which is not in the stack.
- if ($in_continued_quote) {
- my $space_count = 0;
- my $available_space = 0;
- $level = -1; # flag to prevent storing in item_list
- $leading_spaces_to_go[$max_index_to_go] =
- $reduced_spaces_to_go[$max_index_to_go] =
- new_lp_indentation_item( $space_count, $level, $ci_level,
- $available_space, 0 );
- return;
- }
-
- # get the top state from the stack
- my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
- my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
- my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
-
- my $type = $types_to_go[$max_index_to_go];
- my $token = $tokens_to_go[$max_index_to_go];
- my $total_depth = $nesting_depth_to_go[$max_index_to_go];
-
- if ( $type eq '{' || $type eq '(' ) {
-
- $gnu_comma_count{ $total_depth + 1 } = 0;
- $gnu_arrow_count{ $total_depth + 1 } = 0;
-
- # If we come to an opening token after an '=' token of some type,
- # see if it would be helpful to 'break' after the '=' to save space
- my $last_equals = $last_gnu_equals{$total_depth};
- if ( $last_equals && $last_equals > $line_start_index_to_go ) {
-
- # find the position if we break at the '='
- my $i_test = $last_equals;
- if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
-
- # TESTING
- ##my $too_close = ($i_test==$max_index_to_go-1);
-
- my $test_position = total_line_length( $i_test, $max_index_to_go );
- my $mll = maximum_line_length($i_test);
-
- if (
-
- # the equals is not just before an open paren (testing)
- ##!$too_close &&
-
- # if we are beyond the midpoint
- $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
-
- # or we are beyond the 1/4 point and there was an old
- # break at the equals
- || (
- $gnu_position_predictor >
- $mll - $rOpts_maximum_line_length * 3 / 4
- && (
- $old_breakpoint_to_go[$last_equals]
- || ( $last_equals > 0
- && $old_breakpoint_to_go[ $last_equals - 1 ] )
- || ( $last_equals > 1
- && $types_to_go[ $last_equals - 1 ] eq 'b'
- && $old_breakpoint_to_go[ $last_equals - 2 ] )
- )
- )
- )
- {
-
- # then make the switch -- note that we do not set a real
- # breakpoint here because we may not really need one; sub
- # scan_list will do that if necessary
- $line_start_index_to_go = $i_test + 1;
- $gnu_position_predictor = $test_position;
- }
- }
- }
-
- my $halfway =
- maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
-
- # Check for decreasing depth ..
- # Note that one token may have both decreasing and then increasing
- # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
- # in this example we would first go back to (1,0) then up to (2,0)
- # in a single call.
- if ( $level < $current_level || $ci_level < $current_ci_level ) {
-
- # loop to find the first entry at or completely below this level
- my ( $lev, $ci_lev );
- while (1) {
- if ($max_gnu_stack_index) {
-
- # save index of token which closes this level
- $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go);
-
- # Undo any extra indentation if we saw no commas
- my $available_spaces =
- $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
-
- my $comma_count = 0;
- my $arrow_count = 0;
- if ( $type eq '}' || $type eq ')' ) {
- $comma_count = $gnu_comma_count{$total_depth};
- $arrow_count = $gnu_arrow_count{$total_depth};
- $comma_count = 0 unless $comma_count;
- $arrow_count = 0 unless $arrow_count;
- }
- $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count);
- $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count);
-
- if ( $available_spaces > 0 ) {
-
- if ( $comma_count <= 0 || $arrow_count > 0 ) {
-
- my $i = $gnu_stack[$max_gnu_stack_index]->get_index();
- my $seqno =
- $gnu_stack[$max_gnu_stack_index]
- ->get_sequence_number();
-
- # Be sure this item was created in this batch. This
- # should be true because we delete any available
- # space from open items at the end of each batch.
- if ( $gnu_sequence_number != $seqno
- || $i > $max_gnu_item_index )
- {
- warning(
-"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
- );
- report_definite_bug();
- }
-
- else {
- if ( $arrow_count == 0 ) {
- $gnu_item_list[$i]
- ->permanently_decrease_available_spaces(
- $available_spaces);
- }
- else {
- $gnu_item_list[$i]
- ->tentatively_decrease_available_spaces(
- $available_spaces);
- }
- foreach my $j ( $i + 1 .. $max_gnu_item_index ) {
- $gnu_item_list[$j]
- ->decrease_SPACES($available_spaces);
- }
- }
- }
- }
-
- # go down one level
- --$max_gnu_stack_index;
- $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
- $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
-
- # stop when we reach a level at or below the current level
- if ( $lev <= $level && $ci_lev <= $ci_level ) {
- $space_count =
- $gnu_stack[$max_gnu_stack_index]->get_spaces();
- $current_level = $lev;
- $current_ci_level = $ci_lev;
- last;
- }
- }
-
- # reached bottom of stack .. should never happen because
- # only negative levels can get here, and $level was forced
- # to be positive above.
- else {
- warning(
-"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
- );
- report_definite_bug();
- last;
- }
- }
- }
-
- # handle increasing depth
- if ( $level > $current_level || $ci_level > $current_ci_level ) {
-
- # Compute the standard incremental whitespace. This will be
- # the minimum incremental whitespace that will be used. This
- # choice results in a smooth transition between the gnu-style
- # and the standard style.
- my $standard_increment =
- ( $level - $current_level ) * $rOpts_indent_columns +
- ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
-
- # Now we have to define how much extra incremental space
- # ("$available_space") we want. This extra space will be
- # reduced as necessary when long lines are encountered or when
- # it becomes clear that we do not have a good list.
- my $available_space = 0;
- my $align_paren = 0;
- my $excess = 0;
-
- # initialization on empty stack..
- if ( $max_gnu_stack_index == 0 ) {
- $space_count = $level * $rOpts_indent_columns;
- }
-
- # if this is a BLOCK, add the standard increment
- elsif ($last_nonblank_block_type) {
- $space_count += $standard_increment;
- }
-
- # if last nonblank token was not structural indentation,
- # just use standard increment
- elsif ( $last_nonblank_type ne '{' ) {
- $space_count += $standard_increment;
- }
-
- # otherwise use the space to the first non-blank level change token
- else {
-
- $space_count = $gnu_position_predictor;
-
- my $min_gnu_indentation =
- $gnu_stack[$max_gnu_stack_index]->get_spaces();
-
- $available_space = $space_count - $min_gnu_indentation;
- if ( $available_space >= $standard_increment ) {
- $min_gnu_indentation += $standard_increment;
- }
- elsif ( $available_space > 1 ) {
- $min_gnu_indentation += $available_space + 1;
- }
- elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
- if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
- $min_gnu_indentation += 2;
- }
- else {
- $min_gnu_indentation += 1;
- }
- }
- else {
- $min_gnu_indentation += $standard_increment;
- }
- $available_space = $space_count - $min_gnu_indentation;
-
- if ( $available_space < 0 ) {
- $space_count = $min_gnu_indentation;
- $available_space = 0;
- }
- $align_paren = 1;
- }
-
- # update state, but not on a blank token
- if ( $types_to_go[$max_index_to_go] ne 'b' ) {
-
- $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
-
- ++$max_gnu_stack_index;
- $gnu_stack[$max_gnu_stack_index] =
- new_lp_indentation_item( $space_count, $level, $ci_level,
- $available_space, $align_paren );
-
- # If the opening paren is beyond the half-line length, then
- # we will use the minimum (standard) indentation. This will
- # help avoid problems associated with running out of space
- # near the end of a line. As a result, in deeply nested
- # lists, there will be some indentations which are limited
- # to this minimum standard indentation. But the most deeply
- # nested container will still probably be able to shift its
- # parameters to the right for proper alignment, so in most
- # cases this will not be noticeable.
- if ( $available_space > 0 && $space_count > $halfway ) {
- $gnu_stack[$max_gnu_stack_index]
- ->tentatively_decrease_available_spaces($available_space);
- }
- }
- }
-
- # Count commas and look for non-list characters. Once we see a
- # non-list character, we give up and don't look for any more commas.
- if ( $type eq '=>' ) {
- $gnu_arrow_count{$total_depth}++;
-
- # tentatively treating '=>' like '=' for estimating breaks
- # TODO: this could use some experimentation
- $last_gnu_equals{$total_depth} = $max_index_to_go;
- }
-
- elsif ( $type eq ',' ) {
- $gnu_comma_count{$total_depth}++;
- }
-
- elsif ( $is_assignment{$type} ) {
- $last_gnu_equals{$total_depth} = $max_index_to_go;
- }
-
- # this token might start a new line
- # if this is a non-blank..
- if ( $type ne 'b' ) {
-
- # and if ..
- if (
-
- # this is the first nonblank token of the line
- $max_index_to_go == 1 && $types_to_go[0] eq 'b'
-
- # or previous character was one of these:
- || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
-
- # or previous character was opening and this does not close it
- || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
- || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
-
- # or this token is one of these:
- || $type =~ /^([\.]|\|\||\&\&)$/
-
- # or this is a closing structure
- || ( $last_nonblank_type_to_go eq '}'
- && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
-
- # or previous token was keyword 'return'
- || ( $last_nonblank_type_to_go eq 'k'
- && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
-
- # or starting a new line at certain keywords is fine
- || ( $type eq 'k'
- && $is_if_unless_and_or_last_next_redo_return{$token} )
-
- # or this is after an assignment after a closing structure
- || (
- $is_assignment{$last_nonblank_type_to_go}
- && (
- $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
-
- # and it is significantly to the right
- || $gnu_position_predictor > $halfway
- )
- )
- )
- {
- check_for_long_gnu_style_lines();
- $line_start_index_to_go = $max_index_to_go;
-
- # back up 1 token if we want to break before that type
- # otherwise, we may strand tokens like '?' or ':' on a line
- if ( $line_start_index_to_go > 0 ) {
- if ( $last_nonblank_type_to_go eq 'k' ) {
-
- if ( $want_break_before{$last_nonblank_token_to_go} ) {
- $line_start_index_to_go--;
- }
- }
- elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
- $line_start_index_to_go--;
- }
- }
- }
- }
-
- # remember the predicted position of this token on the output line
- if ( $max_index_to_go > $line_start_index_to_go ) {
- $gnu_position_predictor =
- total_line_length( $line_start_index_to_go, $max_index_to_go );
- }
- else {
- $gnu_position_predictor =
- $space_count + $token_lengths_to_go[$max_index_to_go];
- }
-
- # store the indentation object for this token
- # this allows us to manipulate the leading whitespace
- # (in case we have to reduce indentation to fit a line) without
- # having to change any token values
- $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
- $reduced_spaces_to_go[$max_index_to_go] =
- ( $max_gnu_stack_index > 0 && $ci_level )
- ? $gnu_stack[ $max_gnu_stack_index - 1 ]
- : $gnu_stack[$max_gnu_stack_index];
- return;
-}
-
-sub check_for_long_gnu_style_lines {
-
- # look at the current estimated maximum line length, and
- # remove some whitespace if it exceeds the desired maximum
-
- # this is only for the '-lp' style
- return unless ($rOpts_line_up_parentheses);
-
- # nothing can be done if no stack items defined for this line
- return if ( $max_gnu_item_index == UNDEFINED_INDEX );
-
- # see if we have exceeded the maximum desired line length
- # keep 2 extra free because they are needed in some cases
- # (result of trial-and-error testing)
- my $spaces_needed =
- $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
-
- return if ( $spaces_needed <= 0 );
-
- # We are over the limit, so try to remove a requested number of
- # spaces from leading whitespace. We are only allowed to remove
- # from whitespace items created on this batch, since others have
- # already been used and cannot be undone.
- my @candidates = ();
- my $i;
-
- # loop over all whitespace items created for the current batch
- for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
- my $item = $gnu_item_list[$i];
-
- # item must still be open to be a candidate (otherwise it
- # cannot influence the current token)
- next if ( $item->get_closed() >= 0 );
-
- my $available_spaces = $item->get_available_spaces();
-
- if ( $available_spaces > 0 ) {
- push( @candidates, [ $i, $available_spaces ] );
- }
- }
-
- return unless (@candidates);
-
- # sort by available whitespace so that we can remove whitespace
- # from the maximum available first
- @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
-
- # keep removing whitespace until we are done or have no more
- foreach my $candidate (@candidates) {
- my ( $i, $available_spaces ) = @{$candidate};
- my $deleted_spaces =
- ( $available_spaces > $spaces_needed )
- ? $spaces_needed
- : $available_spaces;
-
- # remove the incremental space from this item
- $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
-
- my $i_debug = $i;
-
- # update the leading whitespace of this item and all items
- # that came after it
- for ( ; $i <= $max_gnu_item_index ; $i++ ) {
-
- my $old_spaces = $gnu_item_list[$i]->get_spaces();
- if ( $old_spaces >= $deleted_spaces ) {
- $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
- }
-
- # shouldn't happen except for code bug:
- else {
- my $level = $gnu_item_list[$i_debug]->get_level();
- my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
- my $old_level = $gnu_item_list[$i]->get_level();
- my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
- warning(
-"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
- );
- report_definite_bug();
- }
- }
- $gnu_position_predictor -= $deleted_spaces;
- $spaces_needed -= $deleted_spaces;
- last unless ( $spaces_needed > 0 );
- }
- return;
-}
-
-sub finish_lp_batch {
-
- # This routine is called once after each output stream batch is
- # finished to undo indentation for all incomplete -lp
- # indentation levels. It is too risky to leave a level open,
- # because then we can't backtrack in case of a long line to follow.
- # This means that comments and blank lines will disrupt this
- # indentation style. But the vertical aligner may be able to
- # get the space back if there are side comments.
-
- # this is only for the 'lp' style
- return unless ($rOpts_line_up_parentheses);
-
- # nothing can be done if no stack items defined for this line
- return if ( $max_gnu_item_index == UNDEFINED_INDEX );
-
- # loop over all whitespace items created for the current batch
- foreach my $i ( 0 .. $max_gnu_item_index ) {
- my $item = $gnu_item_list[$i];
-
- # only look for open items
- next if ( $item->get_closed() >= 0 );
-
- # Tentatively remove all of the available space
- # (The vertical aligner will try to get it back later)
- my $available_spaces = $item->get_available_spaces();
- if ( $available_spaces > 0 ) {
-
- # delete incremental space for this item
- $gnu_item_list[$i]
- ->tentatively_decrease_available_spaces($available_spaces);
-
- # Reduce the total indentation space of any nodes that follow
- # Note that any such nodes must necessarily be dependents
- # of this node.
- foreach ( $i + 1 .. $max_gnu_item_index ) {
- $gnu_item_list[$_]->decrease_SPACES($available_spaces);
- }
- }
- }
- return;
-}
-
-sub reduce_lp_indentation {
-
- # reduce the leading whitespace at token $i if possible by $spaces_needed
- # (a large value of $spaces_needed will remove all excess space)
- # NOTE: to be called from scan_list only for a sequence of tokens
- # contained between opening and closing parens/braces/brackets
-
- my ( $i, $spaces_wanted ) = @_;
- my $deleted_spaces = 0;
-
- my $item = $leading_spaces_to_go[$i];
- my $available_spaces = $item->get_available_spaces();
-
- if (
- $available_spaces > 0
- && ( ( $spaces_wanted <= $available_spaces )
- || !$item->get_have_child() )
- )
- {
-
- # we'll remove these spaces, but mark them as recoverable
- $deleted_spaces =
- $item->tentatively_decrease_available_spaces($spaces_wanted);
- }
-
- return $deleted_spaces;
-}
-
-sub token_sequence_length {
-
- # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
- # returns 0 if $ibeg > $iend (shouldn't happen)
- my ( $ibeg, $iend ) = @_;
- return 0 if ( $iend < 0 || $ibeg > $iend );
- return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
- return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
-}
-
-sub total_line_length {
-
- # return length of a line of tokens ($ibeg .. $iend)
- my ( $ibeg, $iend ) = @_;
- return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
-}
-
-sub maximum_line_length_for_level {
-
- # return maximum line length for line starting with a given level
- my $maximum_line_length = $rOpts_maximum_line_length;
-
- # Modify if -vmll option is selected
- if ($rOpts_variable_maximum_line_length) {
- my $level = shift;
- if ( $level < 0 ) { $level = 0 }
- $maximum_line_length += $level * $rOpts_indent_columns;
- }
- return $maximum_line_length;
-}
-
-sub maximum_line_length {
-
- # return maximum line length for line starting with the token at given index
- my $ii = shift;
- return maximum_line_length_for_level( $levels_to_go[$ii] );
-}
-
-sub excess_line_length {
-
- # return number of characters by which a line of tokens ($ibeg..$iend)
- # exceeds the allowable line length.
- my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_;
-
- # Include left and right weld lengths unless requested not to
- my $wl = $ignore_left_weld ? 0 : weld_len_left_to_go($iend);
- my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend);
-
- return total_line_length( $ibeg, $iend ) + $wl + $wr -
- maximum_line_length($ibeg);
-}
-
-sub wrapup {
-
- # flush buffer and write any informative messages
- my $self = shift;
-
- $self->flush();
- $file_writer_object->decrement_output_line_number()
- ; # fix up line number since it was incremented
- we_are_at_the_last_line();
- if ( $added_semicolon_count > 0 ) {
- my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
- my $what =
- ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
- write_logfile_entry("$added_semicolon_count $what added:\n");
- write_logfile_entry(
- " $first at input line $first_added_semicolon_at\n");
-
- if ( $added_semicolon_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_added_semicolon_at\n");
- }
- write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
- write_logfile_entry("\n");
- }
-
- if ( $deleted_semicolon_count > 0 ) {
- my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
- my $what =
- ( $deleted_semicolon_count > 1 )
- ? "semicolons were"
- : "semicolon was";
- write_logfile_entry(
- "$deleted_semicolon_count unnecessary $what deleted:\n");
- write_logfile_entry(
- " $first at input line $first_deleted_semicolon_at\n");
-
- if ( $deleted_semicolon_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_deleted_semicolon_at\n");
- }
- write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
- write_logfile_entry("\n");
- }
-
- if ( $embedded_tab_count > 0 ) {
- my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
- my $what =
- ( $embedded_tab_count > 1 )
- ? "quotes or patterns"
- : "quote or pattern";
- write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
- write_logfile_entry(
-"This means the display of this script could vary with device or software\n"
- );
- write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
-
- if ( $embedded_tab_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_embedded_tab_at\n");
- }
- write_logfile_entry("\n");
- }
-
- if ($first_tabbing_disagreement) {
- write_logfile_entry(
-"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
- );
- }
-
- if ($in_tabbing_disagreement) {
- write_logfile_entry(
-"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
- );
- }
- else {
-
- if ($last_tabbing_disagreement) {
-
- write_logfile_entry(
-"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
- );
- }
- else {
- write_logfile_entry("No indentation disagreement seen\n");
- }
- }
- if ($first_tabbing_disagreement) {
- write_logfile_entry(
-"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
- );
- }
- write_logfile_entry("\n");
-
- $vertical_aligner_object->report_anything_unusual();
-
- $file_writer_object->report_line_length_errors();
-
- return;
-}
-
-sub check_options {
-
- # This routine is called to check the Opts hash after it is defined
- $rOpts = shift;
-
- make_static_block_comment_pattern();
- make_static_side_comment_pattern();
- make_closing_side_comment_prefix();
- make_closing_side_comment_list_pattern();
- $format_skipping_pattern_begin =
- make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
- $format_skipping_pattern_end =
- make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
-
- # If closing side comments ARE selected, then we can safely
- # delete old closing side comments unless closing side comment
- # warnings are requested. This is a good idea because it will
- # eliminate any old csc's which fall below the line count threshold.
- # We cannot do this if warnings are turned on, though, because we
- # might delete some text which has been added. So that must
- # be handled when comments are created.
- if ( $rOpts->{'closing-side-comments'} ) {
- if ( !$rOpts->{'closing-side-comment-warnings'} ) {
- $rOpts->{'delete-closing-side-comments'} = 1;
- }
- }
-
- # If closing side comments ARE NOT selected, but warnings ARE
- # selected and we ARE DELETING csc's, then we will pretend to be
- # adding with a huge interval. This will force the comments to be
- # generated for comparison with the old comments, but not added.
- elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
- if ( $rOpts->{'delete-closing-side-comments'} ) {
- $rOpts->{'delete-closing-side-comments'} = 0;
- $rOpts->{'closing-side-comments'} = 1;
- $rOpts->{'closing-side-comment-interval'} = 100000000;
- }
- }
-
- make_bli_pattern();
- make_block_brace_vertical_tightness_pattern();
- make_blank_line_pattern();
-
- prepare_cuddled_block_types();
- if ( $rOpts->{'dump-cuddled-block-list'} ) {
- dump_cuddled_block_list(*STDOUT);
- Perl::Tidy::Exit 0;
- }
-
- if ( $rOpts->{'line-up-parentheses'} ) {
-
- if ( $rOpts->{'indent-only'}
- || !$rOpts->{'add-newlines'}
- || !$rOpts->{'delete-old-newlines'} )
- {
- Perl::Tidy::Warn <<EOM;
------------------------------------------------------------------------
-Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
-
-The -lp indentation logic requires that perltidy be able to coordinate
-arbitrarily large numbers of line breakpoints. This isn't possible
-with these flags. Sometimes an acceptable workaround is to use -wocb=3
------------------------------------------------------------------------
-EOM
- $rOpts->{'line-up-parentheses'} = 0;
- }
- }
-
- # At present, tabs are not compatible with the line-up-parentheses style
- # (it would be possible to entab the total leading whitespace
- # just prior to writing the line, if desired).
- if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
- Perl::Tidy::Warn <<EOM;
-Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
-
- # Likewise, tabs are not compatible with outdenting..
- if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
- Perl::Tidy::Warn <<EOM;
-Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
-
- if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
- Perl::Tidy::Warn <<EOM;
-Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
-
- if ( !$rOpts->{'space-for-semicolon'} ) {
- $want_left_space{'f'} = -1;
- }
-
- if ( $rOpts->{'space-terminal-semicolon'} ) {
- $want_left_space{';'} = 1;
- }
-
- # implement outdenting preferences for keywords
- %outdent_keyword = ();
- unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
- @_ = qw(next last redo goto return); # defaults
- }
-
- # FUTURE: if not a keyword, assume that it is an identifier
- foreach (@_) {
- if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
- $outdent_keyword{$_} = 1;
- }
- else {
- Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword";
- }
- }
-
- # implement user whitespace preferences
- if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
- @want_left_space{@_} = (1) x scalar(@_);
- }
-
- if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
- @want_right_space{@_} = (1) x scalar(@_);
- }
-
- if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
- @want_left_space{@_} = (-1) x scalar(@_);
- }
-
- if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
- @want_right_space{@_} = (-1) x scalar(@_);
- }
- if ( $rOpts->{'dump-want-left-space'} ) {
- dump_want_left_space(*STDOUT);
- Perl::Tidy::Exit 0;
- }
-
- if ( $rOpts->{'dump-want-right-space'} ) {
- dump_want_right_space(*STDOUT);
- Perl::Tidy::Exit 0;
- }
-
- # default keywords for which space is introduced before an opening paren
- # (at present, including them messes up vertical alignment)
- @_ = qw(my local our and or err eq ne if else elsif until
- unless while for foreach return switch case given when catch);
- @space_after_keyword{@_} = (1) x scalar(@_);
-
- # first remove any or all of these if desired
- if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
-
- # -nsak='*' selects all the above keywords
- if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
- @space_after_keyword{@_} = (0) x scalar(@_);
- }
-
- # then allow user to add to these defaults
- if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
- @space_after_keyword{@_} = (1) x scalar(@_);
- }
-
- # implement user break preferences
- my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
- = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
- . : ? && || and or err xor
- );
-
- my $break_after = sub {
- foreach my $tok (@_) {
- if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
- }
- }
- };
-
- my $break_before = sub {
- foreach my $tok (@_) {
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
- }
- }
- };
-
- $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
- $break_before->(@all_operators)
- if ( $rOpts->{'break-before-all-operators'} );
-
- $break_after->( split_words( $rOpts->{'want-break-after'} ) );
- $break_before->( split_words( $rOpts->{'want-break-before'} ) );
-
- # make note if breaks are before certain key types
- %want_break_before = ();
- foreach my $tok ( @all_operators, ',' ) {
- $want_break_before{$tok} =
- $left_bond_strength{$tok} < $right_bond_strength{$tok};
- }
-
- # Coordinate ?/: breaks, which must be similar
- if ( !$want_break_before{':'} ) {
- $want_break_before{'?'} = $want_break_before{':'};
- $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
- $left_bond_strength{'?'} = NO_BREAK;
- }
-
- # Define here tokens which may follow the closing brace of a do statement
- # on the same line, as in:
- # } while ( $something);
- @_ = qw(until while unless if ; : );
- push @_, ',';
- @is_do_follower{@_} = (1) x scalar(@_);
-
- # These tokens may follow the closing brace of an if or elsif block.
- # In other words, for cuddled else we want code to look like:
- # } elsif ( $something) {
- # } else {
- if ( $rOpts->{'cuddled-else'} ) {
- @_ = qw(else elsif);
- @is_if_brace_follower{@_} = (1) x scalar(@_);
- }
- else {
- %is_if_brace_follower = ();
- }
-
- # nothing can follow the closing curly of an else { } block:
- %is_else_brace_follower = ();
-
- # what can follow a multi-line anonymous sub definition closing curly:
- @_ = qw# ; : => or and && || ~~ !~~ ) #;
- push @_, ',';
- @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
-
- # what can follow a one-line anonymous sub closing curly:
- # one-line anonymous subs also have ']' here...
- # see tk3.t and PP.pm
- @_ = qw# ; : => or and && || ) ] ~~ !~~ #;
- push @_, ',';
- @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
-
- # What can follow a closing curly of a block
- # which is not an if/elsif/else/do/sort/map/grep/eval/sub
- # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
- @_ = qw# ; : => or and && || ) #;
- push @_, ',';
-
- # allow cuddled continue if cuddled else is specified
- if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
-
- @is_other_brace_follower{@_} = (1) x scalar(@_);
-
- $right_bond_strength{'{'} = WEAK;
- $left_bond_strength{'{'} = VERY_STRONG;
-
- # make -l=0 equal to -l=infinite
- if ( !$rOpts->{'maximum-line-length'} ) {
- $rOpts->{'maximum-line-length'} = 1000000;
- }
-
- # make -lbl=0 equal to -lbl=infinite
- if ( !$rOpts->{'long-block-line-count'} ) {
- $rOpts->{'long-block-line-count'} = 1000000;
- }
-
- my $enc = $rOpts->{'character-encoding'};
- if ( $enc && $enc !~ /^(none|utf8)$/i ) {
- Perl::Tidy::Die <<EOM;
-Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
-EOM
- }
-
- my $ole = $rOpts->{'output-line-ending'};
- if ($ole) {
- my %endings = (
- dos => "\015\012",
- win => "\015\012",
- mac => "\015",
- unix => "\012",
- );
-
- # Patch for RT #99514, a memoization issue.
- # Normally, the user enters one of 'dos', 'win', etc, and we change the
- # value in the options parameter to be the corresponding line ending
- # character. But, if we are using memoization, on later passes through
- # here the option parameter will already have the desired ending
- # character rather than the keyword 'dos', 'win', etc. So
- # we must check to see if conversion has already been done and, if so,
- # bypass the conversion step.
- my %endings_inverted = (
- "\015\012" => 'dos',
- "\015\012" => 'win',
- "\015" => 'mac',
- "\012" => 'unix',
- );
-
- if ( defined( $endings_inverted{$ole} ) ) {
-
- # we already have valid line ending, nothing more to do
- }
- else {
- $ole = lc $ole;
- unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
- my $str = join " ", keys %endings;
- Perl::Tidy::Die <<EOM;
-Unrecognized line ending '$ole'; expecting one of: $str
-EOM
- }
- if ( $rOpts->{'preserve-line-endings'} ) {
- Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
- $rOpts->{'preserve-line-endings'} = undef;
- }
- }
- }
-
- # hashes used to simplify setting whitespace
- %tightness = (
- '{' => $rOpts->{'brace-tightness'},
- '}' => $rOpts->{'brace-tightness'},
- '(' => $rOpts->{'paren-tightness'},
- ')' => $rOpts->{'paren-tightness'},
- '[' => $rOpts->{'square-bracket-tightness'},
- ']' => $rOpts->{'square-bracket-tightness'},
- );
- %matching_token = (
- '{' => '}',
- '(' => ')',
- '[' => ']',
- '?' => ':',
- );
-
- # frequently used parameters
- $rOpts_add_newlines = $rOpts->{'add-newlines'};
- $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
- $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
- $rOpts_block_brace_vertical_tightness =
- $rOpts->{'block-brace-vertical-tightness'};
- $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
- $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
- $rOpts_break_at_old_ternary_breakpoints =
- $rOpts->{'break-at-old-ternary-breakpoints'};
- $rOpts_break_at_old_attribute_breakpoints =
- $rOpts->{'break-at-old-attribute-breakpoints'};
- $rOpts_break_at_old_comma_breakpoints =
- $rOpts->{'break-at-old-comma-breakpoints'};
- $rOpts_break_at_old_keyword_breakpoints =
- $rOpts->{'break-at-old-keyword-breakpoints'};
- $rOpts_break_at_old_logical_breakpoints =
- $rOpts->{'break-at-old-logical-breakpoints'};
- $rOpts_closing_side_comment_else_flag =
- $rOpts->{'closing-side-comment-else-flag'};
- $rOpts_closing_side_comment_maximum_text =
- $rOpts->{'closing-side-comment-maximum-text'};
- $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
- $rOpts_cuddled_else = $rOpts->{'cuddled-else'};
- $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
- $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
- $rOpts_indent_columns = $rOpts->{'indent-columns'};
- $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
- $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
- $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
- $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
-
- $rOpts_variable_maximum_line_length =
- $rOpts->{'variable-maximum-line-length'};
- $rOpts_short_concatenation_item_length =
- $rOpts->{'short-concatenation-item-length'};
-
- $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
- $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
- $rOpts_format_skipping = $rOpts->{'format-skipping'};
- $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
- $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
- $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
- $rOpts_ignore_side_comment_lengths =
- $rOpts->{'ignore-side-comment-lengths'};
-
- # Note that both opening and closing tokens can access the opening
- # and closing flags of their container types.
- %opening_vertical_tightness = (
- '(' => $rOpts->{'paren-vertical-tightness'},
- '{' => $rOpts->{'brace-vertical-tightness'},
- '[' => $rOpts->{'square-bracket-vertical-tightness'},
- ')' => $rOpts->{'paren-vertical-tightness'},
- '}' => $rOpts->{'brace-vertical-tightness'},
- ']' => $rOpts->{'square-bracket-vertical-tightness'},
- );
-
- %closing_vertical_tightness = (
- '(' => $rOpts->{'paren-vertical-tightness-closing'},
- '{' => $rOpts->{'brace-vertical-tightness-closing'},
- '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
- ')' => $rOpts->{'paren-vertical-tightness-closing'},
- '}' => $rOpts->{'brace-vertical-tightness-closing'},
- ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
- );
-
- # assume flag for '>' same as ')' for closing qw quotes
- %closing_token_indentation = (
- ')' => $rOpts->{'closing-paren-indentation'},
- '}' => $rOpts->{'closing-brace-indentation'},
- ']' => $rOpts->{'closing-square-bracket-indentation'},
- '>' => $rOpts->{'closing-paren-indentation'},
- );
-
- # flag indicating if any closing tokens are indented
- $some_closing_token_indentation =
- $rOpts->{'closing-paren-indentation'}
- || $rOpts->{'closing-brace-indentation'}
- || $rOpts->{'closing-square-bracket-indentation'}
- || $rOpts->{'indent-closing-brace'};
-
- %opening_token_right = (
- '(' => $rOpts->{'opening-paren-right'},
- '{' => $rOpts->{'opening-hash-brace-right'},
- '[' => $rOpts->{'opening-square-bracket-right'},
- );
-
- %stack_opening_token = (
- '(' => $rOpts->{'stack-opening-paren'},
- '{' => $rOpts->{'stack-opening-hash-brace'},
- '[' => $rOpts->{'stack-opening-square-bracket'},
- );
-
- %stack_closing_token = (
- ')' => $rOpts->{'stack-closing-paren'},
- '}' => $rOpts->{'stack-closing-hash-brace'},
- ']' => $rOpts->{'stack-closing-square-bracket'},
- );
- $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
- $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
- return;
-}
-
-sub bad_pattern {
-
- # See if a pattern will compile. We have to use a string eval here,
- # but it should be safe because the pattern has been constructed
- # by this program.
- my ($pattern) = @_;
- eval "'##'=~/$pattern/";
- return $@;
-}
-
-sub prepare_cuddled_block_types {
-
- my $cuddled_string = $rOpts->{'cuddled-block-list'};
- $cuddled_string = "try-catch-finally" unless defined($cuddled_string);
-
- # we have a cuddled string of the form
- # 'try-catch-finally'
-
- # we want to prepare a hash of the form
-
- # $rcuddled_block_types = {
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 1
- # },
- # };
-
- # use -dcbl to dump this hash
-
- # Multiple such strings are input as a space or comma separated list
-
- # If we get two lists with the same leading type, such as
- # -cbl = "-try-catch-finally -try-catch-otherwise"
- # then they will get merged as follows:
- # $rcuddled_block_types = {
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 2,
- # 'otherwise' => 1,
- # },
- # };
- # This will allow either type of chain to be followed.
-
- $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
- my @cuddled_strings = split /\s+/, $cuddled_string;
-
- $rcuddled_block_types = {};
-
- # process each dash-separated string...
- my $string_count = 0;
- foreach my $string (@cuddled_strings) {
- next unless $string;
- my @words = split /-+/, $string; # allow multiple dashes
-
- # we could look for and report possible errors here...
- next unless ( @words && @words > 0 );
- my $start = shift @words;
-
- # allow either '-continue' or *-continue' for arbitrary starting type
- $start = '*' unless $start;
-
- # always make an entry for the leading word. If none follow, this
- # will still prevent a wildcard from matching this word.
- if ( !defined( $rcuddled_block_types->{$start} ) ) {
- $rcuddled_block_types->{$start} = {};
- }
-
- # The count gives the original word order in case we ever want it.
- $string_count++;
- my $word_count = 0;
- foreach my $word (@words) {
- next unless $word;
- $word_count++;
- $rcuddled_block_types->{$start}->{$word} =
- 1; #"$string_count.$word_count";
- }
- }
-
- return;
-}
-
-sub dump_cuddled_block_list {
- my ($fh) = @_;
-
- # Here is the format of the cuddled block type hash
- # which controls this routine
- # my $rcuddled_block_types = {
- # 'if' => {
- # 'else' => 1,
- # 'elsif' => 1
- # },
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 1
- # },
- # };
- #The numerical values are string.word,
- #where string = string number and word = word number in that string
-
- my $cuddled_string = $rOpts->{'cuddled-block-list'};
- $cuddled_string = '' unless $cuddled_string;
- $fh->print(<<EOM);
-------------------------------------------------------------------------
-Hash of cuddled block types created from
- -cbl='$cuddled_string'
-------------------------------------------------------------------------
-EOM
-
- use Data::Dumper;
- $fh->print( Dumper($rcuddled_block_types) );
-
- $fh->print(<<EOM);
-------------------------------------------------------------------------
-EOM
- return;
-}
-
-sub make_static_block_comment_pattern {
-
- # create the pattern used to identify static block comments
- $static_block_comment_pattern = '^\s*##';
-
- # allow the user to change it
- if ( $rOpts->{'static-block-comment-prefix'} ) {
- my $prefix = $rOpts->{'static-block-comment-prefix'};
- $prefix =~ s/^\s*//;
- my $pattern = $prefix;
-
- # user may give leading caret to force matching left comments only
- if ( $prefix !~ /^\^#/ ) {
- if ( $prefix !~ /^#/ ) {
- Perl::Tidy::Die
-"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
- }
- $pattern = '^\s*' . $prefix;
- }
- if ( bad_pattern($pattern) ) {
- Perl::Tidy::Die
-"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
- }
- $static_block_comment_pattern = $pattern;
- }
- return;
-}
-
-sub make_format_skipping_pattern {
- my ( $opt_name, $default ) = @_;
- my $param = $rOpts->{$opt_name};
- unless ($param) { $param = $default }
- $param =~ s/^\s*//;
- if ( $param !~ /^#/ ) {
- Perl::Tidy::Die
- "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
- }
- my $pattern = '^' . $param . '\s';
- if ( bad_pattern($pattern) ) {
- Perl::Tidy::Die
-"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
- }
- return $pattern;
-}
-
-sub make_closing_side_comment_list_pattern {
-
- # turn any input list into a regex for recognizing selected block types
- $closing_side_comment_list_pattern = '^\w+';
- if ( defined( $rOpts->{'closing-side-comment-list'} )
- && $rOpts->{'closing-side-comment-list'} )
- {
- $closing_side_comment_list_pattern =
- make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
- }
- return;
-}
-
-sub make_bli_pattern {
-
- if ( defined( $rOpts->{'brace-left-and-indent-list'} )
- && $rOpts->{'brace-left-and-indent-list'} )
- {
- $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
- }
-
- $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
- return;
-}
-
-sub make_block_brace_vertical_tightness_pattern {
-
- # turn any input list into a regex for recognizing selected block types
- $block_brace_vertical_tightness_pattern =
- '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
- if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
- && $rOpts->{'block-brace-vertical-tightness-list'} )
- {
- $block_brace_vertical_tightness_pattern =
- make_block_pattern( '-bbvtl',
- $rOpts->{'block-brace-vertical-tightness-list'} );
- }
- return;
-}
-
-sub make_blank_line_pattern {
-
- $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
- my $key = 'blank-lines-before-closing-block-list';
- if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
- $blank_lines_before_closing_block_pattern =
- make_block_pattern( '-blbcl', $rOpts->{$key} );
- }
-
- $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
- $key = 'blank-lines-after-opening-block-list';
- if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
- $blank_lines_after_opening_block_pattern =
- make_block_pattern( '-blaol', $rOpts->{$key} );
- }
- return;
-}
-
-sub make_block_pattern {
-
- # given a string of block-type keywords, return a regex to match them
- # The only tricky part is that labels are indicated with a single ':'
- # and the 'sub' token text may have additional text after it (name of
- # sub).
- #
- # Example:
- #
- # input string: "if else elsif unless while for foreach do : sub";
- # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
-
- # Minor Update:
- #
- # To distinguish between anonymous subs and named subs, use 'sub' to
- # indicate a named sub, and 'asub' to indicate an anonymous sub
-
- my ( $abbrev, $string ) = @_;
- my @list = split_words($string);
- my @words = ();
- my %seen;
- for my $i (@list) {
- if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
- next if $seen{$i};
- $seen{$i} = 1;
- if ( $i eq 'sub' ) {
- }
- elsif ( $i eq 'asub' ) {
- }
- elsif ( $i eq ';' ) {
- push @words, ';';
- }
- elsif ( $i eq '{' ) {
- push @words, '\{';
- }
- elsif ( $i eq ':' ) {
- push @words, '\w+:';
- }
- elsif ( $i =~ /^\w/ ) {
- push @words, $i;
- }
- else {
- Perl::Tidy::Warn
- "unrecognized block type $i after $abbrev, ignoring\n";
- }
- }
- my $pattern = '(' . join( '|', @words ) . ')$';
- my $sub_patterns = "";
- if ( $seen{'sub'} ) {
- $sub_patterns .= '|' . $SUB_PATTERN;
- }
- if ( $seen{'asub'} ) {
- $sub_patterns .= '|' . $ASUB_PATTERN;
- }
- if ($sub_patterns) {
- $pattern = '(' . $pattern . $sub_patterns . ')';
- }
- $pattern = '^' . $pattern;
- return $pattern;
-}
-
-sub make_static_side_comment_pattern {
-
- # create the pattern used to identify static side comments
- $static_side_comment_pattern = '^##';
-
- # allow the user to change it
- if ( $rOpts->{'static-side-comment-prefix'} ) {
- my $prefix = $rOpts->{'static-side-comment-prefix'};
- $prefix =~ s/^\s*//;
- my $pattern = '^' . $prefix;
- if ( bad_pattern($pattern) ) {
- Perl::Tidy::Die
-"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
- }
- $static_side_comment_pattern = $pattern;
- }
- return;
-}
-
-sub make_closing_side_comment_prefix {
-
- # Be sure we have a valid closing side comment prefix
- my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
- my $csc_prefix_pattern;
- if ( !defined($csc_prefix) ) {
- $csc_prefix = '## end';
- $csc_prefix_pattern = '^##\s+end';
- }
- else {
- my $test_csc_prefix = $csc_prefix;
- if ( $test_csc_prefix !~ /^#/ ) {
- $test_csc_prefix = '#' . $test_csc_prefix;
- }
-
- # make a regex to recognize the prefix
- my $test_csc_prefix_pattern = $test_csc_prefix;
-
- # escape any special characters
- $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
-
- $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
-
- # allow exact number of intermediate spaces to vary
- $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
-
- # make sure we have a good pattern
- # if we fail this we probably have an error in escaping
- # characters.
-
- if ( bad_pattern($test_csc_prefix_pattern) ) {
-
- # shouldn't happen..must have screwed up escaping, above
- report_definite_bug();
- Perl::Tidy::Warn
-"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
-
- # just warn and keep going with defaults
- Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n";
- Perl::Tidy::Warn
- "Using default -cscp instead; please check output\n";
- }
- else {
- $csc_prefix = $test_csc_prefix;
- $csc_prefix_pattern = $test_csc_prefix_pattern;
- }
- }
- $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
- $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
- return;
-}
-
-sub dump_want_left_space {
- my $fh = shift;
- local $" = "\n";
- print $fh <<EOM;
-These values are the main control of whitespace to the left of a token type;
-They may be altered with the -wls parameter.
-For a list of token types, use perltidy --dump-token-types (-dtt)
- 1 means the token wants a space to its left
--1 means the token does not want a space to its left
-------------------------------------------------------------------------
-EOM
- foreach my $key ( sort keys %want_left_space ) {
- print $fh "$key\t$want_left_space{$key}\n";
- }
- return;
-}
-
-sub dump_want_right_space {
- my $fh = shift;
- local $" = "\n";
- print $fh <<EOM;
-These values are the main control of whitespace to the right of a token type;
-They may be altered with the -wrs parameter.
-For a list of token types, use perltidy --dump-token-types (-dtt)
- 1 means the token wants a space to its right
--1 means the token does not want a space to its right
-------------------------------------------------------------------------
-EOM
- foreach my $key ( sort keys %want_right_space ) {
- print $fh "$key\t$want_right_space{$key}\n";
- }
- return;
-}
-
-{ # begin is_essential_whitespace
-
- my %is_sort_grep_map;
- my %is_for_foreach;
-
- BEGIN {
-
- my @q;
- @q = qw(sort grep map);
- @is_sort_grep_map{@q} = (1) x scalar(@q);
-
- @q = qw(for foreach);
- @is_for_foreach{@q} = (1) x scalar(@q);
-
- }
-
- sub is_essential_whitespace {
-
- # Essential whitespace means whitespace which cannot be safely deleted
- # without risking the introduction of a syntax error.
- # We are given three tokens and their types:
- # ($tokenl, $typel) is the token to the left of the space in question
- # ($tokenr, $typer) is the token to the right of the space in question
- # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
- #
- # This is a slow routine but is not needed too often except when -mangle
- # is used.
- #
- # Note: This routine should almost never need to be changed. It is
- # for avoiding syntax problems rather than for formatting.
- my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
-
- my $result =
-
- # never combine two bare words or numbers
- # examples: and ::ok(1)
- # return ::spw(...)
- # for bla::bla:: abc
- # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
- # $input eq"quit" to make $inputeq"quit"
- # my $size=-s::SINK if $file; <==OK but we won't do it
- # don't join something like: for bla::bla:: abc
- # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
- ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
- && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
-
- # do not combine a number with a concatenation dot
- # example: pom.caputo:
- # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
- || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
- || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
-
- # do not join a minus with a bare word, because you might form
- # a file test operator. Example from Complex.pm:
- # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
- || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
-
- # and something like this could become ambiguous without space
- # after the '-':
- # use constant III=>1;
- # $a = $b - III;
- # and even this:
- # $a = - III;
- || ( ( $tokenl eq '-' )
- && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
-
- # '= -' should not become =- or you will get a warning
- # about reversed -=
- # || ($tokenr eq '-')
-
- # keep a space between a quote and a bareword to prevent the
- # bareword from becoming a quote modifier.
- || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
-
- # keep a space between a token ending in '$' and any word;
- # this caused trouble: "die @$ if $@"
- || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
- && ( $tokenr =~ /^[a-zA-Z_]/ ) )
-
- # perl is very fussy about spaces before <<
- || ( $tokenr =~ /^\<\</ )
-
- # avoid combining tokens to create new meanings. Example:
- # $a+ +$b must not become $a++$b
- || ( $is_digraph{ $tokenl . $tokenr } )
- || ( $is_trigraph{ $tokenl . $tokenr } )
-
- # another example: do not combine these two &'s:
- # allow_options & &OPT_EXECCGI
- || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
-
- # don't combine $$ or $# with any alphanumeric
- # (testfile mangle.t with --mangle)
- || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
-
- # retain any space after possible filehandle
- # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
- || ( $typel eq 'Z' )
-
- # Perl is sensitive to whitespace after the + here:
- # $b = xvals $a + 0.1 * yvals $a;
- || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
-
- # keep paren separate in 'use Foo::Bar ()'
- || ( $tokenr eq '('
- && $typel eq 'w'
- && $typell eq 'k'
- && $tokenll eq 'use' )
-
- # keep any space between filehandle and paren:
- # file mangle.t with --mangle:
- || ( $typel eq 'Y' && $tokenr eq '(' )
-
- # retain any space after here doc operator ( hereerr.t)
- || ( $typel eq 'h' )
-
- # be careful with a space around ++ and --, to avoid ambiguity as to
- # which token it applies
- || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
- || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
-
- # need space after foreach my; for example, this will fail in
- # older versions of Perl:
- # foreach my$ft(@filetypes)...
- || (
- $tokenl eq 'my'
-
- # /^(for|foreach)$/
- && $is_for_foreach{$tokenll}
- && $tokenr =~ /^\$/
- )
-
- # must have space between grep and left paren; "grep(" will fail
- || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
-
- # don't stick numbers next to left parens, as in:
- #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
- || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
-
- # We must be sure that a space between a ? and a quoted string
- # remains if the space before the ? remains. [Loca.pm, lockarea]
- # ie,
- # $b=join $comma ? ',' : ':', @_; # ok
- # $b=join $comma?',' : ':', @_; # ok!
- # $b=join $comma ?',' : ':', @_; # error!
- # Not really required:
- ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
-
- # do not remove space between an '&' and a bare word because
- # it may turn into a function evaluation, like here
- # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
- # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
- || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
-
- # space stacked labels (TODO: check if really necessary)
- || ( $typel eq 'J' && $typer eq 'J' )
-
- ; # the value of this long logic sequence is the result we want
-##if ($typel eq 'j') {print STDERR "typel=$typel typer=$typer result='$result'\n"}
- return $result;
- }
-}
-
-{
- my %secret_operators;
- my %is_leading_secret_token;
-
- BEGIN {
-
- # token lists for perl secret operators as compiled by Philippe Bruhat
- # at: https://metacpan.org/module/perlsecret
- %secret_operators = (
- 'Goatse' => [qw#= ( ) =#], #=( )=
- 'Venus1' => [qw#0 +#], # 0+
- 'Venus2' => [qw#+ 0#], # +0
- 'Enterprise' => [qw#) x ! !#], # ()x!!
- 'Kite1' => [qw#~ ~ <>#], # ~~<>
- 'Kite2' => [qw#~~ <>#], # ~~<>
- 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
- 'Bang bang ' => [qw#! !#], # !!
- );
-
- # The following operators and constants are not included because they
- # are normally kept tight by perltidy:
- # ~~ <~>
- #
-
- # Make a lookup table indexed by the first token of each operator:
- # first token => [list, list, ...]
- foreach my $value ( values(%secret_operators) ) {
- my $tok = $value->[0];
- push @{ $is_leading_secret_token{$tok} }, $value;
- }
- }
-
- sub new_secret_operator_whitespace {
-
- my ( $rlong_array, $rwhitespace_flags ) = @_;
-
- # Loop over all tokens in this line
- my ( $token, $type );
- my $jmax = @{$rlong_array} - 1;
- foreach my $j ( 0 .. $jmax ) {
-
- $token = $rlong_array->[$j]->[_TOKEN_];
- $type = $rlong_array->[$j]->[_TYPE_];
-
- # Skip unless this token might start a secret operator
- next if ( $type eq 'b' );
- next unless ( $is_leading_secret_token{$token} );
-
- # Loop over all secret operators with this leading token
- foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
- my $jend = $j - 1;
- foreach my $tok ( @{$rpattern} ) {
- $jend++;
- $jend++
-
- if ( $jend <= $jmax
- && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
- if ( $jend > $jmax
- || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
- {
- $jend = undef;
- last;
- }
- }
-
- if ($jend) {
-
- # set flags to prevent spaces within this operator
- foreach my $jj ( $j + 1 .. $jend ) {
- $rwhitespace_flags->[$jj] = WS_NO;
- }
- $j = $jend;
- last;
- }
- } ## End Loop over all operators
- } ## End loop over all tokens
- return;
- } # End sub
-}
-
-{ # begin print_line_of_tokens
-
- my $rinput_token_array; # Current working array
- my $rinput_K_array; # Future working array
-
- my $in_quote;
- my $guessed_indentation_level;
-
- # This should be a return variable from extract_token
- # These local token variables are stored by store_token_to_go:
- my $rtoken_vars;
- my $Ktoken_vars;
- my $block_type;
- my $ci_level;
- my $container_environment;
- my $container_type;
- my $in_continued_quote;
- my $level;
- my $no_internal_newlines;
- my $slevel;
- my $token;
- my $type;
- my $type_sequence;
-
- # routine to pull the jth token from the line of tokens
- sub extract_token {
- my ( $self, $j ) = @_;
-
- my $rLL = $self->{rLL};
- $Ktoken_vars = $rinput_K_array->[$j];
- if ( !defined($Ktoken_vars) ) {
-
- # Shouldn't happen: an error here would be due to a recent program change
- Fault("undefined index K for j=$j");
- }
- $rtoken_vars = $rLL->[$Ktoken_vars];
-
- if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) {
-
- # Shouldn't happen: an error here would be due to a recent program change
- Fault(<<EOM);
- j=$j, K=$Ktoken_vars, '$rtoken_vars->[_TOKEN_]' ne '$rLL->[$Ktoken_vars]'
-EOM
- }
-
- #########################################################
- # these are now redundant and can eventually be eliminated
-
- $token = $rtoken_vars->[_TOKEN_];
- $type = $rtoken_vars->[_TYPE_];
- $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- $container_type = $rtoken_vars->[_CONTAINER_TYPE_];
- $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_];
- $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- $level = $rtoken_vars->[_LEVEL_];
- $slevel = $rtoken_vars->[_SLEVEL_];
- $ci_level = $rtoken_vars->[_CI_LEVEL_];
- #########################################################
-
- return;
- }
-
- {
- my @saved_token;
-
- sub save_current_token {
-
- @saved_token = (
- $block_type, $ci_level,
- $container_environment, $container_type,
- $in_continued_quote, $level,
- $no_internal_newlines, $slevel,
- $token, $type,
- $type_sequence, $rtoken_vars,
- $Ktoken_vars,
- );
- return;
- }
-
- sub restore_current_token {
- (
- $block_type, $ci_level,
- $container_environment, $container_type,
- $in_continued_quote, $level,
- $no_internal_newlines, $slevel,
- $token, $type,
- $type_sequence, $rtoken_vars,
- $Ktoken_vars,
- ) = @saved_token;
- return;
- }
- }
-
- sub token_length {
-
- # Returns the length of a token, given:
- # $token=text of the token
- # $type = type
- # $not_first_token = should be TRUE if this is not the first token of
- # the line. It might the index of this token in an array. It is
- # used to test for a side comment vs a block comment.
- # Note: Eventually this should be the only routine determining the
- # length of a token in this package.
- my ( $token, $type, $not_first_token ) = @_;
- my $token_length = length($token);
-
- # We mark lengths of side comments as just 1 if we are
- # ignoring their lengths when setting line breaks.
- $token_length = 1
- if ( $rOpts_ignore_side_comment_lengths
- && $not_first_token
- && $type eq '#' );
- return $token_length;
- }
-
- sub rtoken_length {
-
- # return length of ith token in @{$rtokens}
- my ($i) = @_;
- return token_length( $rinput_token_array->[$i]->[_TOKEN_],
- $rinput_token_array->[$i]->[_TYPE_], $i );
- }
-
- # Routine to place the current token into the output stream.
- # Called once per output token.
- sub store_token_to_go {
-
- my ( $self, $side_comment_follows ) = @_;
-
- my $flag = $side_comment_follows ? 1 : $no_internal_newlines;
-
- ++$max_index_to_go;
- $K_to_go[$max_index_to_go] = $Ktoken_vars;
- $rtoken_vars_to_go[$max_index_to_go] = $rtoken_vars;
- $tokens_to_go[$max_index_to_go] = $token;
- $types_to_go[$max_index_to_go] = $type;
- $nobreak_to_go[$max_index_to_go] = $flag;
- $old_breakpoint_to_go[$max_index_to_go] = 0;
- $forced_breakpoint_to_go[$max_index_to_go] = 0;
- $block_type_to_go[$max_index_to_go] = $block_type;
- $type_sequence_to_go[$max_index_to_go] = $type_sequence;
- $container_environment_to_go[$max_index_to_go] = $container_environment;
- $ci_levels_to_go[$max_index_to_go] = $ci_level;
- $mate_index_to_go[$max_index_to_go] = -1;
- $matching_token_to_go[$max_index_to_go] = '';
- $bond_strength_to_go[$max_index_to_go] = 0;
-
- # Note: negative levels are currently retained as a diagnostic so that
- # the 'final indentation level' is correctly reported for bad scripts.
- # But this means that every use of $level as an index must be checked.
- # If this becomes too much of a problem, we might give up and just clip
- # them at zero.
- ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
- $levels_to_go[$max_index_to_go] = $level;
- $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
-
- # link the non-blank tokens
- my $iprev = $max_index_to_go - 1;
- $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
- $iprev_to_go[$max_index_to_go] = $iprev;
- $inext_to_go[$iprev] = $max_index_to_go
- if ( $iprev >= 0 && $type ne 'b' );
- $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
-
- $token_lengths_to_go[$max_index_to_go] =
- token_length( $token, $type, $max_index_to_go );
-
- # We keep a running sum of token lengths from the start of this batch:
- # summed_lengths_to_go[$i] = total length to just before token $i
- # summed_lengths_to_go[$i+1] = total length to just after token $i
- $summed_lengths_to_go[ $max_index_to_go + 1 ] =
- $summed_lengths_to_go[$max_index_to_go] +
- $token_lengths_to_go[$max_index_to_go];
-
- # Define the indentation that this token would have if it started
- # a new line. We have to do this now because we need to know this
- # when considering one-line blocks.
- set_leading_whitespace( $level, $ci_level, $in_continued_quote );
-
- # remember previous nonblank tokens seen
- if ( $type ne 'b' ) {
- $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
- $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
- $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
- $last_nonblank_index_to_go = $max_index_to_go;
- $last_nonblank_type_to_go = $type;
- $last_nonblank_token_to_go = $token;
- if ( $type eq ',' ) {
- $comma_count_in_batch++;
- }
- }
-
- FORMATTER_DEBUG_FLAG_STORE && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
- };
- return;
- }
-
- sub insert_new_token_to_go {
-
- # insert a new token into the output stream. use same level as
- # previous token; assumes a character at max_index_to_go.
- my $self = shift;
- my @args = @_;
- save_current_token();
- ( $token, $type, $slevel, $no_internal_newlines ) = @args;
-
- if ( $max_index_to_go == UNDEFINED_INDEX ) {
- warning("code bug: bad call to insert_new_token_to_go\n");
- }
- $level = $levels_to_go[$max_index_to_go];
-
- # FIXME: it seems to be necessary to use the next, rather than
- # previous, value of this variable when creating a new blank (align.t)
- #my $slevel = $nesting_depth_to_go[$max_index_to_go];
- $ci_level = $ci_levels_to_go[$max_index_to_go];
- $container_environment = $container_environment_to_go[$max_index_to_go];
- $in_continued_quote = 0;
- $block_type = "";
- $type_sequence = "";
- $self->store_token_to_go();
- restore_current_token();
- return;
- }
-
- sub copy_hash {
- my ($rold_token_hash) = @_;
- my %new_token_hash =
- map { $_, $rold_token_hash->{$_} } keys %{$rold_token_hash};
- return \%new_token_hash;
- }
-
- sub copy_array {
- my ($rold) = @_;
- my @new = map { $_ } @{$rold};
- return \@new;
- }
-
- sub copy_token_as_type {
- my ( $rold_token, $type, $token ) = @_;
- if ( $type eq 'b' ) {
- $token = " " unless defined($token);
- }
- elsif ( $type eq 'q' ) {
- $token = '' unless defined($token);
- }
- elsif ( $type eq '->' ) {
- $token = '->' unless defined($token);
- }
- elsif ( $type eq ';' ) {
- $token = ';' unless defined($token);
- }
- else {
- Fault(
-"Programming error: copy_token_as has type $type but should be 'b' or 'q'"
- );
- }
- my $rnew_token = copy_array($rold_token);
- $rnew_token->[_TYPE_] = $type;
- $rnew_token->[_TOKEN_] = $token;
- $rnew_token->[_BLOCK_TYPE_] = '';
- $rnew_token->[_CONTAINER_TYPE_] = '';
- $rnew_token->[_CONTAINER_ENVIRONMENT_] = '';
- $rnew_token->[_TYPE_SEQUENCE_] = '';
- return $rnew_token;
- }
-
- sub boolean_equals {
- my ( $val1, $val2 ) = @_;
- return ( $val1 && $val2 || !$val1 && !$val2 );
- }
-
- sub print_line_of_tokens {
-
- my ( $self, $line_of_tokens ) = @_;
-
- # This routine is called once per input line to process all of
- # the tokens on that line. This is the first stage of
- # beautification.
- #
- # Full-line comments and blank lines may be processed immediately.
- #
- # For normal lines of code, the tokens are stored one-by-one,
- # via calls to 'sub store_token_to_go', until a known line break
- # point is reached. Then, the batch of collected tokens is
- # passed along to 'sub output_line_to_go' for further
- # processing. This routine decides if there should be
- # whitespace between each pair of non-white tokens, so later
- # routines only need to decide on any additional line breaks.
- # Any whitespace is initially a single space character. Later,
- # the vertical aligner may expand that to be multiple space
- # characters if necessary for alignment.
-
- $input_line_number = $line_of_tokens->{_line_number};
- my $input_line = $line_of_tokens->{_line_text};
- my $CODE_type = $line_of_tokens->{_code_type};
-
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $K_first, $K_last ) = @{$rK_range};
-
- my $rLL = $self->{rLL};
- my $rbreak_container = $self->{rbreak_container};
-
- if ( !defined($K_first) ) {
-
- # Unexpected blank line..
- # Calling routine was supposed to handle this
- Perl::Tidy::Warn(
-"Programming Error: Unexpected Blank Line in print_line_of_tokens. Ignoring"
- );
- return;
- }
-
- $no_internal_newlines = 1 - $rOpts_add_newlines;
- my $is_comment =
- ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
- my $is_static_block_comment_without_leading_space =
- $CODE_type eq 'SBCX';
- $is_static_block_comment =
- $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
- my $is_hanging_side_comment = $CODE_type eq 'HSC';
- my $is_VERSION_statement = $CODE_type eq 'VER';
- if ($is_VERSION_statement) {
- $saw_VERSION_in_this_file = 1;
- $no_internal_newlines = 1;
- }
-
- # Add interline blank if any
- my $last_old_nonblank_type = "b";
- my $first_new_nonblank_type = "b";
- my $first_new_nonblank_token = " ";
- if ( $max_index_to_go >= 0 ) {
- $last_old_nonblank_type = $types_to_go[$max_index_to_go];
- $first_new_nonblank_type = $rLL->[$K_first]->[_TYPE_];
- $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
- if ( !$is_comment
- && $types_to_go[$max_index_to_go] ne 'b'
- && $K_first > 0
- && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
- {
- $K_first -= 1;
- }
- }
-
- # Copy the tokens into local arrays
- $rinput_token_array = [];
- $rinput_K_array = [];
- $rinput_K_array = [ ( $K_first .. $K_last ) ];
- $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ];
- my $jmax = @{$rinput_K_array} - 1;
-
- $in_continued_quote = $starting_in_quote =
- $line_of_tokens->{_starting_in_quote};
- $in_quote = $line_of_tokens->{_ending_in_quote};
- $ending_in_quote = $in_quote;
- $guessed_indentation_level =
- $line_of_tokens->{_guessed_indentation_level};
-
- my $j_next;
- my $next_nonblank_token;
- my $next_nonblank_token_type;
-
- $block_type = "";
- $container_type = "";
- $container_environment = "";
- $type_sequence = "";
-
- ######################################
- # Handle a block (full-line) comment..
- ######################################
- if ($is_comment) {
-
- if ( $rOpts->{'delete-block-comments'} ) { return }
-
- if ( $rOpts->{'tee-block-comments'} ) {
- $file_writer_object->tee_on();
- }
-
- destroy_one_line_block();
- $self->output_line_to_go();
-
- # output a blank line before block comments
- if (
- # unless we follow a blank or comment line
- $last_line_leading_type !~ /^[#b]$/
-
- # only if allowed
- && $rOpts->{'blanks-before-comments'}
-
- # if this is NOT an empty comment line
- && $rinput_token_array->[0]->[_TOKEN_] ne '#'
-
- # not after a short line ending in an opening token
- # because we already have space above this comment.
- # Note that the first comment in this if block, after
- # the 'if (', does not get a blank line because of this.
- && !$last_output_short_opening_token
-
- # never before static block comments
- && !$is_static_block_comment
- )
- {
- $self->flush(); # switching to new output stream
- $file_writer_object->write_blank_code_line();
- $last_line_leading_type = 'b';
- }
-
- # TRIM COMMENTS -- This could be turned off as a option
- $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//; # trim right end
-
- if (
- $rOpts->{'indent-block-comments'}
- && ( !$rOpts->{'indent-spaced-block-comments'}
- || $input_line =~ /^\s+/ )
- && !$is_static_block_comment_without_leading_space
- )
- {
- $self->extract_token(0);
- $self->store_token_to_go();
- $self->output_line_to_go();
- }
- else {
- $self->flush(); # switching to new output stream
- $file_writer_object->write_code_line(
- $rinput_token_array->[0]->[_TOKEN_] . "\n" );
- $last_line_leading_type = '#';
- }
- if ( $rOpts->{'tee-block-comments'} ) {
- $file_writer_object->tee_off();
- }
- return;
- }
-
- # TODO: Move to sub scan_comments
- # compare input/output indentation except for continuation lines
- # (because they have an unknown amount of initial blank space)
- # and lines which are quotes (because they may have been outdented)
- # Note: this test is placed here because we know the continuation flag
- # at this point, which allows us to avoid non-meaningful checks.
- my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_];
- compare_indentation_levels( $guessed_indentation_level,
- $structural_indentation_level )
- unless ( $is_hanging_side_comment
- || $rinput_token_array->[0]->[_CI_LEVEL_] > 0
- || $guessed_indentation_level == 0
- && $rinput_token_array->[0]->[_TYPE_] eq 'Q' );
-
- ##########################
- # Handle indentation-only
- ##########################
-
- # NOTE: In previous versions we sent all qw lines out immediately here.
- # No longer doing this: also write a line which is entirely a 'qw' list
- # to allow stacking of opening and closing tokens. Note that interior
- # qw lines will still go out at the end of this routine.
- ##if ( $rOpts->{'indent-only'} ) {
- if ( $CODE_type eq 'IO' ) {
- $self->flush();
- my $line = $input_line;
-
- # delete side comments if requested with -io, but
- # we will not allow deleting of closing side comments with -io
- # because the coding would be more complex
- if ( $rOpts->{'delete-side-comments'}
- && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' )
- {
-
- $line = "";
- foreach my $jj ( 0 .. $jmax - 1 ) {
- $line .= $rinput_token_array->[$jj]->[_TOKEN_];
- }
- }
- $line = trim($line);
-
- $self->extract_token(0);
- $token = $line;
- $type = 'q';
- $block_type = "";
- $container_type = "";
- $container_environment = "";
- $type_sequence = "";
- $self->store_token_to_go();
- $self->output_line_to_go();
- return;
- }
-
- ############################
- # Handle all other lines ...
- ############################
-
- #######################################################
- # FIXME: this should become unnecessary
- # making $j+2 valid simplifies coding
- my $rnew_blank =
- copy_token_as_type( $rinput_token_array->[$jmax], 'b' );
- push @{$rinput_token_array}, $rnew_blank;
- push @{$rinput_token_array}, $rnew_blank;
- #######################################################
-
- # If we just saw the end of an elsif block, write nag message
- # if we do not see another elseif or an else.
- if ($looking_for_else) {
-
- unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
- write_logfile_entry("(No else block)\n");
- }
- $looking_for_else = 0;
- }
-
- # This is a good place to kill incomplete one-line blocks
- if (
- (
- ( $semicolons_before_block_self_destruct == 0 )
- && ( $max_index_to_go >= 0 )
- && ( $last_old_nonblank_type eq ';' )
- && ( $first_new_nonblank_token ne '}' )
- )
-
- # Patch for RT #98902. Honor request to break at old commas.
- || ( $rOpts_break_at_old_comma_breakpoints
- && $max_index_to_go >= 0
- && $last_old_nonblank_type eq ',' )
- )
- {
- $forced_breakpoint_to_go[$max_index_to_go] = 1
- if ($rOpts_break_at_old_comma_breakpoints);
- destroy_one_line_block();
- $self->output_line_to_go();
- }
-
- # loop to process the tokens one-by-one
- $type = 'b';
- $token = "";
-
- # We do not want a leading blank if the previous batch just got output
- my $jmin = 0;
- if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
- $jmin = 1;
- }
-
- foreach my $j ( $jmin .. $jmax ) {
-
- # pull out the local values for this token
- $self->extract_token($j);
-
- if ( $type eq '#' ) {
-
- # trim trailing whitespace
- # (there is no option at present to prevent this)
- $token =~ s/\s*$//;
-
- if (
- $rOpts->{'delete-side-comments'}
-
- # delete closing side comments if necessary
- || ( $rOpts->{'delete-closing-side-comments'}
- && $token =~ /$closing_side_comment_prefix_pattern/o
- && $last_nonblank_block_type =~
- /$closing_side_comment_list_pattern/o )
- )
- {
- if ( $types_to_go[$max_index_to_go] eq 'b' ) {
- unstore_token_to_go();
- }
- last;
- }
- }
-
- # If we are continuing after seeing a right curly brace, flush
- # buffer unless we see what we are looking for, as in
- # } else ...
- if ( $rbrace_follower && $type ne 'b' ) {
-
- unless ( $rbrace_follower->{$token} ) {
- $self->output_line_to_go();
- }
- $rbrace_follower = undef;
- }
-
- $j_next =
- ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' )
- ? $j + 2
- : $j + 1;
- $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_];
- $next_nonblank_token_type =
- $rinput_token_array->[$j_next]->[_TYPE_];
-
- ######################
- # MAYBE MOVE ELSEWHERE?
- ######################
- if ( $type eq 'Q' ) {
- note_embedded_tab() if ( $token =~ "\t" );
-
- # make note of something like '$var = s/xxx/yyy/;'
- # in case it should have been '$var =~ s/xxx/yyy/;'
- if (
- $token =~ /^(s|tr|y|m|\/)/
- && $last_nonblank_token =~ /^(=|==|!=)$/
-
- # preceded by simple scalar
- && $last_last_nonblank_type eq 'i'
- && $last_last_nonblank_token =~ /^\$/
-
- # followed by some kind of termination
- # (but give complaint if we can's see far enough ahead)
- && $next_nonblank_token =~ /^[; \)\}]$/
-
- # scalar is not declared
- && !(
- $types_to_go[0] eq 'k'
- && $tokens_to_go[0] =~ /^(my|our|local)$/
- )
- )
- {
- my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
- complain(
-"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
- );
- }
- }
-
- # Do not allow breaks which would promote a side comment to a
- # block comment. In order to allow a break before an opening
- # or closing BLOCK, followed by a side comment, those sections
- # of code will handle this flag separately.
- my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
- my $is_opening_BLOCK =
- ( $type eq '{'
- && $token eq '{'
- && $block_type
- && $block_type ne 't' );
- my $is_closing_BLOCK =
- ( $type eq '}'
- && $token eq '}'
- && $block_type
- && $block_type ne 't' );
-
- if ( $side_comment_follows
- && !$is_opening_BLOCK
- && !$is_closing_BLOCK )
- {
- $no_internal_newlines = 1;
- }
-
- # We're only going to handle breaking for code BLOCKS at this
- # (top) level. Other indentation breaks will be handled by
- # sub scan_list, which is better suited to dealing with them.
- if ($is_opening_BLOCK) {
-
- # Tentatively output this token. This is required before
- # calling starting_one_line_block. We may have to unstore
- # it, though, if we have to break before it.
- $self->store_token_to_go($side_comment_follows);
-
- # Look ahead to see if we might form a one-line block..
- my $too_long = 0;
-
- # But obey any flag set for cuddled blocks
- if ( $rbreak_container->{$type_sequence} ) {
- destroy_one_line_block();
- }
- else {
- $too_long =
- starting_one_line_block( $j, $jmax, $level, $slevel,
- $ci_level, $rinput_token_array );
- }
- clear_breakpoint_undo_stack();
-
- # to simplify the logic below, set a flag to indicate if
- # this opening brace is far from the keyword which introduces it
- my $keyword_on_same_line = 1;
- if ( ( $max_index_to_go >= 0 )
- && ( $last_nonblank_type eq ')' ) )
- {
- if ( $block_type =~ /^(if|else|elsif)$/
- && ( $tokens_to_go[0] eq '}' )
- && $rOpts_cuddled_else )
- {
- $keyword_on_same_line = 1;
- }
- elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
- {
- $keyword_on_same_line = 0;
- }
- }
-
- # decide if user requested break before '{'
- my $want_break =
-
- # use -bl flag if not a sub block of any type
- $block_type !~ /^sub\b/
- ? $rOpts->{'opening-brace-on-new-line'}
-
- # use -sbl flag for a named sub block
- : $block_type !~ /$ASUB_PATTERN/
- ? $rOpts->{'opening-sub-brace-on-new-line'}
-
- # use -asbl flag for an anonymous sub block
- : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
-
- # Do not break if this token is welded to the left
- if ( weld_len_left( $type_sequence, $token ) ) {
- $want_break = 0;
- }
-
- # Break before an opening '{' ...
- if (
-
- # if requested
- $want_break
-
- # and we were unable to start looking for a block,
- && $index_start_one_line_block == UNDEFINED_INDEX
-
- # or if it will not be on same line as its keyword, so that
- # it will be outdented (eval.t, overload.t), and the user
- # has not insisted on keeping it on the right
- || ( !$keyword_on_same_line
- && !$rOpts->{'opening-brace-always-on-right'} )
-
- )
- {
-
- # but only if allowed
- unless ($no_internal_newlines) {
-
- # since we already stored this token, we must unstore it
- $self->unstore_token_to_go();
-
- # then output the line
- $self->output_line_to_go();
-
- # and now store this token at the start of a new line
- $self->store_token_to_go($side_comment_follows);
- }
- }
-
- # Now update for side comment
- if ($side_comment_follows) { $no_internal_newlines = 1 }
-
- # now output this line
- unless ($no_internal_newlines) {
- $self->output_line_to_go();
- }
- }
-
- elsif ($is_closing_BLOCK) {
-
- # If there is a pending one-line block ..
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
-
- # we have to terminate it if..
- if (
-
- # it is too long (final length may be different from
- # initial estimate). note: must allow 1 space for this
- # token
- excess_line_length( $index_start_one_line_block,
- $max_index_to_go ) >= 0
-
- # or if it has too many semicolons
- || ( $semicolons_before_block_self_destruct == 0
- && $last_nonblank_type ne ';' )
- )
- {
- destroy_one_line_block();
- }
- }
-
- # put a break before this closing curly brace if appropriate
- unless ( $no_internal_newlines
- || $index_start_one_line_block != UNDEFINED_INDEX )
- {
-
- # write out everything before this closing curly brace
- $self->output_line_to_go();
- }
-
- # Now update for side comment
- if ($side_comment_follows) { $no_internal_newlines = 1 }
-
- # store the closing curly brace
- $self->store_token_to_go();
-
- # ok, we just stored a closing curly brace. Often, but
- # not always, we want to end the line immediately.
- # So now we have to check for special cases.
-
- # if this '}' successfully ends a one-line block..
- my $is_one_line_block = 0;
- my $keep_going = 0;
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
-
- # Remember the type of token just before the
- # opening brace. It would be more general to use
- # a stack, but this will work for one-line blocks.
- $is_one_line_block =
- $types_to_go[$index_start_one_line_block];
-
- # we have to actually make it by removing tentative
- # breaks that were set within it
- undo_forced_breakpoint_stack(0);
- set_nobreaks( $index_start_one_line_block,
- $max_index_to_go - 1 );
-
- # then re-initialize for the next one-line block
- destroy_one_line_block();
-
- # then decide if we want to break after the '}' ..
- # We will keep going to allow certain brace followers as in:
- # do { $ifclosed = 1; last } unless $losing;
- #
- # But make a line break if the curly ends a
- # significant block:
- if (
- (
- $is_block_without_semicolon{$block_type}
-
- # Follow users break point for
- # one line block types U & G, such as a 'try' block
- || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
- )
-
- # if needless semicolon follows we handle it later
- && $next_nonblank_token ne ';'
- )
- {
- $self->output_line_to_go()
- unless ($no_internal_newlines);
- }
- }
-
- # set string indicating what we need to look for brace follower
- # tokens
- if ( $block_type eq 'do' ) {
- $rbrace_follower = \%is_do_follower;
- }
- elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
- $rbrace_follower = \%is_if_brace_follower;
- }
- elsif ( $block_type eq 'else' ) {
- $rbrace_follower = \%is_else_brace_follower;
- }
-
- # added eval for borris.t
- elsif ($is_sort_map_grep_eval{$block_type}
- || $is_one_line_block eq 'G' )
- {
- $rbrace_follower = undef;
- $keep_going = 1;
- }
-
- # anonymous sub
- elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
-
- if ($is_one_line_block) {
- $rbrace_follower = \%is_anon_sub_1_brace_follower;
- }
- else {
- $rbrace_follower = \%is_anon_sub_brace_follower;
- }
- }
-
- # None of the above: specify what can follow a closing
- # brace of a block which is not an
- # if/elsif/else/do/sort/map/grep/eval
- # Testfiles:
- # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
- else {
- $rbrace_follower = \%is_other_brace_follower;
- }
-
- # See if an elsif block is followed by another elsif or else;
- # complain if not.
- if ( $block_type eq 'elsif' ) {
-
- if ( $next_nonblank_token_type eq 'b' ) { # end of line?
- $looking_for_else = 1; # ok, check on next line
- }
- else {
-
- unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
- write_logfile_entry("No else block :(\n");
- }
- }
- }
-
- # keep going after certain block types (map,sort,grep,eval)
- # added eval for borris.t
- if ($keep_going) {
-
- # keep going
- }
-
- # if no more tokens, postpone decision until re-entring
- elsif ( ( $next_nonblank_token_type eq 'b' )
- && $rOpts_add_newlines )
- {
- unless ($rbrace_follower) {
- $self->output_line_to_go()
- unless ($no_internal_newlines);
- }
- }
-
- elsif ($rbrace_follower) {
-
- unless ( $rbrace_follower->{$next_nonblank_token} ) {
- $self->output_line_to_go()
- unless ($no_internal_newlines);
- }
- $rbrace_follower = undef;
- }
-
- else {
- $self->output_line_to_go() unless ($no_internal_newlines);
- }
-
- } # end treatment of closing block token
-
- # handle semicolon
- elsif ( $type eq ';' ) {
-
- # kill one-line blocks with too many semicolons
- $semicolons_before_block_self_destruct--;
- if (
- ( $semicolons_before_block_self_destruct < 0 )
- || ( $semicolons_before_block_self_destruct == 0
- && $next_nonblank_token_type !~ /^[b\}]$/ )
- )
- {
- destroy_one_line_block();
- }
-
- # Remove unnecessary semicolons, but not after bare
- # blocks, where it could be unsafe if the brace is
- # mistokenized.
- if (
- (
- $last_nonblank_token eq '}'
- && (
- $is_block_without_semicolon{
- $last_nonblank_block_type}
- || $last_nonblank_block_type =~ /$SUB_PATTERN/
- || $last_nonblank_block_type =~ /^\w+:$/ )
- )
- || $last_nonblank_type eq ';'
- )
- {
-
- if (
- $rOpts->{'delete-semicolons'}
-
- # don't delete ; before a # because it would promote it
- # to a block comment
- && ( $next_nonblank_token_type ne '#' )
- )
- {
- note_deleted_semicolon();
- $self->output_line_to_go()
- unless ( $no_internal_newlines
- || $index_start_one_line_block != UNDEFINED_INDEX );
- next;
- }
- else {
- write_logfile_entry("Extra ';'\n");
- }
- }
- $self->store_token_to_go();
-
- $self->output_line_to_go()
- unless ( $no_internal_newlines
- || ( $rOpts_keep_interior_semicolons && $j < $jmax )
- || ( $next_nonblank_token eq '}' ) );
-
- }
-
- # handle here_doc target string
- elsif ( $type eq 'h' ) {
-
- # no newlines after seeing here-target
- $no_internal_newlines = 1;
- destroy_one_line_block();
- $self->store_token_to_go();
- }
-
- # handle all other token types
- else {
-
- $self->store_token_to_go();
- }
-
- # remember two previous nonblank OUTPUT tokens
- if ( $type ne '#' && $type ne 'b' ) {
- $last_last_nonblank_token = $last_nonblank_token;
- $last_last_nonblank_type = $last_nonblank_type;
- $last_nonblank_token = $token;
- $last_nonblank_type = $type;
- $last_nonblank_block_type = $block_type;
- }
-
- # unset the continued-quote flag since it only applies to the
- # first token, and we want to resume normal formatting if
- # there are additional tokens on the line
- $in_continued_quote = 0;
-
- } # end of loop over all tokens in this 'line_of_tokens'
-
- # we have to flush ..
- if (
-
- # if there is a side comment
- ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
-
- # if this line ends in a quote
- # NOTE: This is critically important for insuring that quoted lines
- # do not get processed by things like -sot and -sct
- || $in_quote
-
- # if this is a VERSION statement
- || $is_VERSION_statement
-
- # to keep a label at the end of a line
- || $type eq 'J'
-
- # if we are instructed to keep all old line breaks
- || !$rOpts->{'delete-old-newlines'}
- )
- {
- destroy_one_line_block();
- $self->output_line_to_go();
- }
-
- # mark old line breakpoints in current output stream
- if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
- my $jobp = $max_index_to_go;
- if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
- {
- $jobp--;
- }
- $old_breakpoint_to_go[$jobp] = 1;
- }
- return;
- } ## end sub print_line_of_tokens
-} ## end block print_line_of_tokens
-
-# sub output_line_to_go sends one logical line of tokens on down the
-# pipeline to the VerticalAligner package, breaking the line into continuation
-# lines as necessary. The line of tokens is ready to go in the "to_go"
-# arrays.
-sub output_line_to_go {
-
- my $self = shift;
-
- # debug stuff; this routine can be called from many points
- FORMATTER_DEBUG_FLAG_OUTPUT && do {
- my ( $a, $b, $c ) = caller;
- write_diagnostics(
-"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
- );
- my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
- write_diagnostics("$output_str\n");
- };
-
- # Do not end line in a weld
- # TODO: Move this fix into the routine?
- #my $jnb = $max_index_to_go;
- #if ( $jnb > 0 && $types_to_go[$jnb] eq 'b' ) { $jnb-- }
- return if ( weld_len_right_to_go($max_index_to_go) );
-
- # just set a tentative breakpoint if we might be in a one-line block
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
- set_forced_breakpoint($max_index_to_go);
- return;
- }
-
- my $cscw_block_comment;
- $cscw_block_comment = $self->add_closing_side_comment()
- if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
-
- my $comma_arrow_count_contained = match_opening_and_closing_tokens();
-
- # tell the -lp option we are outputting a batch so it can close
- # any unfinished items in its stack
- finish_lp_batch();
-
- # If this line ends in a code block brace, set breaks at any
- # previous closing code block braces to breakup a chain of code
- # blocks on one line. This is very rare but can happen for
- # user-defined subs. For example we might be looking at this:
- # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
- my $saw_good_break = 0; # flag to force breaks even if short line
- if (
-
- # looking for opening or closing block brace
- $block_type_to_go[$max_index_to_go]
-
- # but not one of these which are never duplicated on a line:
- # until|while|for|if|elsif|else
- && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
- )
- {
- my $lev = $nesting_depth_to_go[$max_index_to_go];
-
- # Walk backwards from the end and
- # set break at any closing block braces at the same level.
- # But quit if we are not in a chain of blocks.
- for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
- last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
- next if ( $levels_to_go[$i] > $lev ); # skip past higher level
-
- if ( $block_type_to_go[$i] ) {
- if ( $tokens_to_go[$i] eq '}' ) {
- set_forced_breakpoint($i);
- $saw_good_break = 1;
- }
- }
-
- # quit if we see anything besides words, function, blanks
- # at this level
- elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
- }
- }
-
- my $imin = 0;
- my $imax = $max_index_to_go;
-
- # trim any blank tokens
- if ( $max_index_to_go >= 0 ) {
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- }
-
- # anything left to write?
- if ( $imin <= $imax ) {
-
- # add a blank line before certain key types but not after a comment
- if ( $last_line_leading_type !~ /^[#]/ ) {
- my $want_blank = 0;
- my $leading_token = $tokens_to_go[$imin];
- my $leading_type = $types_to_go[$imin];
-
- # blank lines before subs except declarations and one-liners
- # MCONVERSION LOCATION - for sub tokenization change
- if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) !~ /^[\;\}]$/
- );
- }
-
- # break before all package declarations
- # MCONVERSION LOCATION - for tokenizaton change
- elsif ($leading_token =~ /^(package\s)/
- && $leading_type eq 'i' )
- {
- $want_blank = $rOpts->{'blank-lines-before-packages'};
- }
-
- # break before certain key blocks except one-liners
- if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) ne '}'
- );
- }
-
- # Break before certain block types if we haven't had a
- # break at this level for a while. This is the
- # difficult decision..
- elsif ($leading_type eq 'k'
- && $last_line_leading_type ne 'b'
- && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
- {
- my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
- if ( !defined($lc) ) { $lc = 0 }
-
- $want_blank =
- $rOpts->{'blanks-before-blocks'}
- && $lc >= $rOpts->{'long-block-line-count'}
- && $file_writer_object->get_consecutive_nonblank_lines() >=
- $rOpts->{'long-block-line-count'}
- && (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) ne '}'
- );
- }
-
- # Check for blank lines wanted before a closing brace
- if ( $leading_token eq '}' ) {
- if ( $rOpts->{'blank-lines-before-closing-block'}
- && $block_type_to_go[$imin]
- && $block_type_to_go[$imin] =~
- /$blank_lines_before_closing_block_pattern/ )
- {
- my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
- if ( $nblanks > $want_blank ) {
- $want_blank = $nblanks;
- }
- }
- }
-
- if ($want_blank) {
-
- # future: send blank line down normal path to VerticalAligner
- Perl::Tidy::VerticalAligner::flush();
- $file_writer_object->require_blank_code_lines($want_blank);
- }
- }
-
- # update blank line variables and count number of consecutive
- # non-blank, non-comment lines at this level
- $last_last_line_leading_level = $last_line_leading_level;
- $last_line_leading_level = $levels_to_go[$imin];
- if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
- $last_line_leading_type = $types_to_go[$imin];
- if ( $last_line_leading_level == $last_last_line_leading_level
- && $last_line_leading_type ne 'b'
- && $last_line_leading_type ne '#'
- && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
- {
- $nonblank_lines_at_depth[$last_line_leading_level]++;
- }
- else {
- $nonblank_lines_at_depth[$last_line_leading_level] = 1;
- }
-
- FORMATTER_DEBUG_FLAG_FLUSH && do {
- my ( $package, $file, $line ) = caller;
- print STDOUT
-"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
- };
-
- # add a couple of extra terminal blank tokens
- pad_array_to_go();
-
- # set all forced breakpoints for good list formatting
- my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
-
- my $old_line_count_in_batch =
- $rtoken_vars_to_go[$max_index_to_go]->[_LINE_INDEX_] -
- $rtoken_vars_to_go[0]->[_LINE_INDEX_] + 1;
-
- if (
- $is_long_line
- || $old_line_count_in_batch > 1
-
- # must always call scan_list() with unbalanced batches because it
- # is maintaining some stacks
- || is_unbalanced_batch()
-
- # call scan_list if we might want to break at commas
- || (
- $comma_count_in_batch
- && ( $rOpts_maximum_fields_per_table > 0
- || $rOpts_comma_arrow_breakpoints == 0 )
- )
-
- # call scan_list if user may want to break open some one-line
- # hash references
- || ( $comma_arrow_count_contained
- && $rOpts_comma_arrow_breakpoints != 3 )
- )
- {
- ## This caused problems in one version of perl for unknown reasons:
- ## $saw_good_break ||= scan_list();
- my $sgb = scan_list();
- $saw_good_break ||= $sgb;
- }
-
- # let $ri_first and $ri_last be references to lists of
- # first and last tokens of line fragments to output..
- my ( $ri_first, $ri_last );
-
- # write a single line if..
- if (
-
- # we aren't allowed to add any newlines
- !$rOpts_add_newlines
-
- # or, we don't already have an interior breakpoint
- # and we didn't see a good breakpoint
- || (
- !$forced_breakpoint_count
- && !$saw_good_break
-
- # and this line is 'short'
- && !$is_long_line
- )
- )
- {
- @{$ri_first} = ($imin);
- @{$ri_last} = ($imax);
- }
-
- # otherwise use multiple lines
- else {
-
- ( $ri_first, $ri_last, my $colon_count ) =
- set_continuation_breaks($saw_good_break);
-
- break_all_chain_tokens( $ri_first, $ri_last );
-
- break_equals( $ri_first, $ri_last );
-
- # now we do a correction step to clean this up a bit
- # (The only time we would not do this is for debugging)
- if ( $rOpts->{'recombine'} ) {
- ( $ri_first, $ri_last ) =
- recombine_breakpoints( $ri_first, $ri_last );
- }
-
- insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
- }
-
- # do corrector step if -lp option is used
- my $do_not_pad = 0;
- if ($rOpts_line_up_parentheses) {
- $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
- }
- $self->unmask_phantom_semicolons( $ri_first, $ri_last );
- $self->send_lines_to_vertical_aligner( $ri_first, $ri_last,
- $do_not_pad );
-
- # Insert any requested blank lines after an opening brace. We have to
- # skip back before any side comment to find the terminal token
- my $iterm;
- for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
- next if $types_to_go[$iterm] eq '#';
- next if $types_to_go[$iterm] eq 'b';
- last;
- }
-
- # write requested number of blank lines after an opening block brace
- if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
- if ( $rOpts->{'blank-lines-after-opening-block'}
- && $block_type_to_go[$iterm]
- && $block_type_to_go[$iterm] =~
- /$blank_lines_after_opening_block_pattern/ )
- {
- my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
- Perl::Tidy::VerticalAligner::flush();
- $file_writer_object->require_blank_code_lines($nblanks);
- }
- }
- }
-
- prepare_for_new_input_lines();
-
- # output any new -cscw block comment
- if ($cscw_block_comment) {
- $self->flush();
- $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
- }
- return;
-}
-
-sub note_added_semicolon {
- my ($line_number) = @_;
- $last_added_semicolon_at = $line_number;
- if ( $added_semicolon_count == 0 ) {
- $first_added_semicolon_at = $last_added_semicolon_at;
- }
- $added_semicolon_count++;
- write_logfile_entry("Added ';' here\n");
- return;
-}
-
-sub note_deleted_semicolon {
- $last_deleted_semicolon_at = $input_line_number;
- if ( $deleted_semicolon_count == 0 ) {
- $first_deleted_semicolon_at = $last_deleted_semicolon_at;
- }
- $deleted_semicolon_count++;
- write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
- return;
-}
-
-sub note_embedded_tab {
- $embedded_tab_count++;
- $last_embedded_tab_at = $input_line_number;
- if ( !$first_embedded_tab_at ) {
- $first_embedded_tab_at = $last_embedded_tab_at;
- }
-
- if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
- write_logfile_entry("Embedded tabs in quote or pattern\n");
- }
- return;
-}
-
-sub starting_one_line_block {
-
- # after seeing an opening curly brace, look for the closing brace
- # and see if the entire block will fit on a line. This routine is
- # not always right because it uses the old whitespace, so a check
- # is made later (at the closing brace) to make sure we really
- # have a one-line block. We have to do this preliminary check,
- # though, because otherwise we would always break at a semicolon
- # within a one-line block if the block contains multiple statements.
-
- my ( $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_;
-
- my $jmax_check = @{$rtoken_array};
- if ( $jmax_check < $jmax ) {
- print STDERR "jmax=$jmax > $jmax_check\n";
- }
-
- # kill any current block - we can only go 1 deep
- destroy_one_line_block();
-
- # return value:
- # 1=distance from start of block to opening brace exceeds line length
- # 0=otherwise
-
- my $i_start = 0;
-
- # shouldn't happen: there must have been a prior call to
- # store_token_to_go to put the opening brace in the output stream
- if ( $max_index_to_go < 0 ) {
- warning("program bug: store_token_to_go called incorrectly\n");
- report_definite_bug();
- }
- else {
-
- # cannot use one-line blocks with cuddled else/elsif lines
- if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
- return 0;
- }
- }
-
- my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_];
-
- # find the starting keyword for this block (such as 'if', 'else', ...)
-
- if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
- $i_start = $max_index_to_go;
- }
-
- # the previous nonblank token should start these block types
- elsif (( $last_last_nonblank_token_to_go eq $block_type )
- || ( $block_type =~ /^sub\b/ )
- || $block_type =~ /\(\)/ )
- {
- $i_start = $last_last_nonblank_index_to_go;
-
- # For signatures and extended syntax ...
- # If this brace follows a parenthesized list, we should look back to
- # find the keyword before the opening paren because otherwise we might
- # form a one line block which stays intack, and cause the parenthesized
- # expression to break open. That looks bad. However, actually
- # searching for the opening paren is slow and tedius.
- # The actual keyword is often at the start of a line, but might not be.
- # For example, we might have an anonymous sub with signature list
- # following a =>. It is safe to mark the start anywhere before the
- # opening paren, so we just go back to the prevoious break (or start of
- # the line) if that is before the opening paren. The minor downside is
- # that we may very occasionally break open a block unnecessarily.
- if ( $tokens_to_go[$i_start] eq ')' ) {
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
- my $lev = $levels_to_go[$i_start];
- if ( $lev > $level ) { return 0 }
- }
- }
-
- elsif ( $last_last_nonblank_token_to_go eq ')' ) {
-
- # For something like "if (xxx) {", the keyword "if" will be
- # just after the most recent break. This will be 0 unless
- # we have just killed a one-line block and are starting another.
- # (doif.t)
- # Note: cannot use inext_index_to_go[] here because that array
- # is still being constructed.
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
- }
-
- # Patch to avoid breaking short blocks defined with extended_syntax:
- # Strip off any trailing () which was added in the parser to mark
- # the opening keyword. For example, in the following
- # create( TypeFoo $e) {$bubba}
- # the blocktype would be marked as create()
- my $stripped_block_type = $block_type;
- $stripped_block_type =~ s/\(\)$//;
-
- unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
- return 0;
- }
- }
-
- # patch for SWITCH/CASE to retain one-line case/when blocks
- elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
-
- # Note: cannot use inext_index_to_go[] here because that array
- # is still being constructed.
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
- }
- unless ( $tokens_to_go[$i_start] eq $block_type ) {
- return 0;
- }
- }
-
- else {
- return 1;
- }
-
- my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
-
- # see if length is too long to even start
- if ( $pos > maximum_line_length($i_start) ) {
- return 1;
- }
-
- foreach my $i ( $j + 1 .. $jmax ) {
-
- # old whitespace could be arbitrarily large, so don't use it
- if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 }
- else { $pos += rtoken_length($i) }
-
- # Return false result if we exceed the maximum line length,
- if ( $pos > maximum_line_length($i_start) ) {
- return 0;
- }
-
- # or encounter another opening brace before finding the closing brace.
- elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{'
- && $rtoken_array->[$i]->[_TYPE_] eq '{'
- && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
- {
- return 0;
- }
-
- # if we find our closing brace..
- elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}'
- && $rtoken_array->[$i]->[_TYPE_] eq '}'
- && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
- {
-
- # be sure any trailing comment also fits on the line
- my $i_nonblank =
- ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1;
-
- # Patch for one-line sort/map/grep/eval blocks with side comments:
- # We will ignore the side comment length for sort/map/grep/eval
- # because this can lead to statements which change every time
- # perltidy is run. Here is an example from Denis Moskowitz which
- # oscillates between these two states without this patch:
-
-## --------
-## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
-## @baz;
-##
-## grep {
-## $_->foo ne 'bar'
-## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
-## @baz;
-## --------
-
- # When the first line is input it gets broken apart by the main
- # line break logic in sub print_line_of_tokens.
- # When the second line is input it gets recombined by
- # print_line_of_tokens and passed to the output routines. The
- # output routines (set_continuation_breaks) do not break it apart
- # because the bond strengths are set to the highest possible value
- # for grep/map/eval/sort blocks, so the first version gets output.
- # It would be possible to fix this by changing bond strengths,
- # but they are high to prevent errors in older versions of perl.
-
- if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#'
- && !$is_sort_map_grep{$block_type} )
- {
-
- $pos += rtoken_length($i_nonblank);
-
- if ( $i_nonblank > $i + 1 ) {
-
- # source whitespace could be anything, assume
- # at least one space before the hash on output
- if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) {
- $pos += 1;
- }
- else { $pos += rtoken_length( $i + 1 ) }
- }
-
- if ( $pos >= maximum_line_length($i_start) ) {
- return 0;
- }
- }
-
- # ok, it's a one-line block
- create_one_line_block( $i_start, 20 );
- return 0;
- }
-
- # just keep going for other characters
- else {
- }
- }
-
- # Allow certain types of new one-line blocks to form by joining
- # input lines. These can be safely done, but for other block types,
- # we keep old one-line blocks but do not form new ones. It is not
- # always a good idea to make as many one-line blocks as possible,
- # so other types are not done. The user can always use -mangle.
- if ( $is_sort_map_grep_eval{$block_type} ) {
- create_one_line_block( $i_start, 1 );
- }
- return 0;
-}
-
-sub unstore_token_to_go {
-
- # remove most recent token from output stream
- my $self = shift;
- if ( $max_index_to_go > 0 ) {
- $max_index_to_go--;
- }
- else {
- $max_index_to_go = UNDEFINED_INDEX;
- }
- return;
-}
-
-sub want_blank_line {
- my $self = shift;
- $self->flush();
- $file_writer_object->want_blank_line();
- return;
-}
-
-sub write_unindented_line {
- my ( $self, $line ) = @_;
- $self->flush();
- $file_writer_object->write_line($line);
- return;
-}
-
-sub undo_ci {
-
- # Undo continuation indentation in certain sequences
- # For example, we can undo continuation indentation in sort/map/grep chains
- # my $dat1 = pack( "n*",
- # map { $_, $lookup->{$_} }
- # sort { $a <=> $b }
- # grep { $lookup->{$_} ne $default } keys %$lookup );
- # To align the map/sort/grep keywords like this:
- # my $dat1 = pack( "n*",
- # map { $_, $lookup->{$_} }
- # sort { $a <=> $b }
- # grep { $lookup->{$_} ne $default } keys %$lookup );
- my ( $ri_first, $ri_last ) = @_;
- my ( $line_1, $line_2, $lev_last );
- my $this_line_is_semicolon_terminated;
- my $max_line = @{$ri_first} - 1;
-
- # looking at each line of this batch..
- # We are looking at leading tokens and looking for a sequence
- # all at the same level and higher level than enclosing lines.
- foreach my $line ( 0 .. $max_line ) {
-
- my $ibeg = $ri_first->[$line];
- my $lev = $levels_to_go[$ibeg];
- if ( $line > 0 ) {
-
- # if we have started a chain..
- if ($line_1) {
-
- # see if it continues..
- if ( $lev == $lev_last ) {
- if ( $types_to_go[$ibeg] eq 'k'
- && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
- {
-
- # chain continues...
- # check for chain ending at end of a statement
- if ( $line == $max_line ) {
-
- # see of this line ends a statement
- my $iend = $ri_last->[$line];
- $this_line_is_semicolon_terminated =
- $types_to_go[$iend] eq ';'
-
- # with possible side comment
- || ( $types_to_go[$iend] eq '#'
- && $iend - $ibeg >= 2
- && $types_to_go[ $iend - 2 ] eq ';'
- && $types_to_go[ $iend - 1 ] eq 'b' );
- }
- $line_2 = $line if ($this_line_is_semicolon_terminated);
- }
- else {
-
- # kill chain
- $line_1 = undef;
- }
- }
- elsif ( $lev < $lev_last ) {
-
- # chain ends with previous line
- $line_2 = $line - 1;
- }
- elsif ( $lev > $lev_last ) {
-
- # kill chain
- $line_1 = undef;
- }
-
- # undo the continuation indentation if a chain ends
- if ( defined($line_2) && defined($line_1) ) {
- my $continuation_line_count = $line_2 - $line_1 + 1;
- @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] =
- (0) x ($continuation_line_count)
- if ( $continuation_line_count >= 0 );
- @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
- = @reduced_spaces_to_go[ @{$ri_first}
- [ $line_1 .. $line_2 ] ];
- $line_1 = undef;
- }
- }
-
- # not in a chain yet..
- else {
-
- # look for start of a new sort/map/grep chain
- if ( $lev > $lev_last ) {
- if ( $types_to_go[$ibeg] eq 'k'
- && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
- {
- $line_1 = $line;
- }
- }
- }
- }
- $lev_last = $lev;
- }
- return;
-}
-
-sub undo_lp_ci {
-
- # If there is a single, long parameter within parens, like this:
- #
- # $self->command( "/msg "
- # . $infoline->chan
- # . " You said $1, but did you know that it's square was "
- # . $1 * $1 . " ?" );
- #
- # we can remove the continuation indentation of the 2nd and higher lines
- # to achieve this effect, which is more pleasing:
- #
- # $self->command("/msg "
- # . $infoline->chan
- # . " You said $1, but did you know that it's square was "
- # . $1 * $1 . " ?");
-
- my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
- my $max_line = @{$ri_first} - 1;
-
- # must be multiple lines
- return unless $max_line > $line_open;
-
- my $lev_start = $levels_to_go[$i_start];
- my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
-
- # see if all additional lines in this container have continuation
- # indentation
- my $n;
- my $line_1 = 1 + $line_open;
- for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
- my $ibeg = $ri_first->[$n];
- my $iend = $ri_last->[$n];
- if ( $ibeg eq $closing_index ) { $n--; last }
- return if ( $lev_start != $levels_to_go[$ibeg] );
- return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
- last if ( $closing_index <= $iend );
- }
-
- # we can reduce the indentation of all continuation lines
- my $continuation_line_count = $n - $line_open;
- @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
- (0) x ($continuation_line_count);
- @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
- @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
- return;
-}
-
-sub pad_token {
-
- # insert $pad_spaces before token number $ipad
- my ( $ipad, $pad_spaces ) = @_;
- if ( $pad_spaces > 0 ) {
- $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
- }
- elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
- $tokens_to_go[$ipad] = "";
- }
- else {
-
- # shouldn't happen
- return;
- }
-
- $token_lengths_to_go[$ipad] += $pad_spaces;
- foreach my $i ( $ipad .. $max_index_to_go ) {
- $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
- }
- return;
-}
-
-{
- my %is_math_op;
-
- BEGIN {
-
- my @q = qw( + - * / );
- @is_math_op{@q} = (1) x scalar(@q);
- }
-
- sub set_logical_padding {
-
- # Look at a batch of lines and see if extra padding can improve the
- # alignment when there are certain leading operators. Here is an
- # example, in which some extra space is introduced before
- # '( $year' to make it line up with the subsequent lines:
- #
- # if ( ( $Year < 1601 )
- # || ( $Year > 2899 )
- # || ( $EndYear < 1601 )
- # || ( $EndYear > 2899 ) )
- # {
- # &Error_OutOfRange;
- # }
- #
- my ( $ri_first, $ri_last ) = @_;
- my $max_line = @{$ri_first} - 1;
-
- # FIXME: move these declarations below
- my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
- $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
-
- # looking at each line of this batch..
- foreach my $line ( 0 .. $max_line - 1 ) {
-
- # see if the next line begins with a logical operator
- $ibeg = $ri_first->[$line];
- $iend = $ri_last->[$line];
- $ibeg_next = $ri_first->[ $line + 1 ];
- $tok_next = $tokens_to_go[$ibeg_next];
- $type_next = $types_to_go[$ibeg_next];
-
- $has_leading_op_next = ( $tok_next =~ /^\w/ )
- ? $is_chain_operator{$tok_next} # + - * / : ? && ||
- : $is_chain_operator{$type_next}; # and, or
-
- next unless ($has_leading_op_next);
-
- # next line must not be at lesser depth
- next
- if ( $nesting_depth_to_go[$ibeg] >
- $nesting_depth_to_go[$ibeg_next] );
-
- # identify the token in this line to be padded on the left
- $ipad = undef;
-
- # handle lines at same depth...
- if ( $nesting_depth_to_go[$ibeg] ==
- $nesting_depth_to_go[$ibeg_next] )
- {
-
- # if this is not first line of the batch ...
- if ( $line > 0 ) {
-
- # and we have leading operator..
- next if $has_leading_op;
-
- # Introduce padding if..
- # 1. the previous line is at lesser depth, or
- # 2. the previous line ends in an assignment
- # 3. the previous line ends in a 'return'
- # 4. the previous line ends in a comma
- # Example 1: previous line at lesser depth
- # if ( ( $Year < 1601 ) # <- we are here but
- # || ( $Year > 2899 ) # list has not yet
- # || ( $EndYear < 1601 ) # collapsed vertically
- # || ( $EndYear > 2899 ) )
- # {
- #
- # Example 2: previous line ending in assignment:
- # $leapyear =
- # $year % 4 ? 0 # <- We are here
- # : $year % 100 ? 1
- # : $year % 400 ? 0
- # : 1;
- #
- # Example 3: previous line ending in comma:
- # push @expr,
- # /test/ ? undef
- # : eval($_) ? 1
- # : eval($_) ? 1
- # : 0;
-
- # be sure levels agree (do not indent after an indented 'if')
- next
- if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
-
- # allow padding on first line after a comma but only if:
- # (1) this is line 2 and
- # (2) there are at more than three lines and
- # (3) lines 3 and 4 have the same leading operator
- # These rules try to prevent padding within a long
- # comma-separated list.
- my $ok_comma;
- if ( $types_to_go[$iendm] eq ','
- && $line == 1
- && $max_line > 2 )
- {
- my $ibeg_next_next = $ri_first->[ $line + 2 ];
- my $tok_next_next = $tokens_to_go[$ibeg_next_next];
- $ok_comma = $tok_next_next eq $tok_next;
- }
-
- next
- unless (
- $is_assignment{ $types_to_go[$iendm] }
- || $ok_comma
- || ( $nesting_depth_to_go[$ibegm] <
- $nesting_depth_to_go[$ibeg] )
- || ( $types_to_go[$iendm] eq 'k'
- && $tokens_to_go[$iendm] eq 'return' )
- );
-
- # we will add padding before the first token
- $ipad = $ibeg;
- }
-
- # for first line of the batch..
- else {
-
- # WARNING: Never indent if first line is starting in a
- # continued quote, which would change the quote.
- next if $starting_in_quote;
-
- # if this is text after closing '}'
- # then look for an interior token to pad
- if ( $types_to_go[$ibeg] eq '}' ) {
-
- }
-
- # otherwise, we might pad if it looks really good
- else {
-
- # we might pad token $ibeg, so be sure that it
- # is at the same depth as the next line.
- next
- if ( $nesting_depth_to_go[$ibeg] !=
- $nesting_depth_to_go[$ibeg_next] );
-
- # We can pad on line 1 of a statement if at least 3
- # lines will be aligned. Otherwise, it
- # can look very confusing.
-
- # We have to be careful not to pad if there are too few
- # lines. The current rule is:
- # (1) in general we require at least 3 consecutive lines
- # with the same leading chain operator token,
- # (2) but an exception is that we only require two lines
- # with leading colons if there are no more lines. For example,
- # the first $i in the following snippet would get padding
- # by the second rule:
- #
- # $i == 1 ? ( "First", "Color" )
- # : $i == 2 ? ( "Then", "Rarity" )
- # : ( "Then", "Name" );
-
- if ( $max_line > 1 ) {
- my $leading_token = $tokens_to_go[$ibeg_next];
- my $tokens_differ;
-
- # never indent line 1 of a '.' series because
- # previous line is most likely at same level.
- # TODO: we should also look at the leasing_spaces
- # of the last output line and skip if it is same
- # as this line.
- next if ( $leading_token eq '.' );
-
- my $count = 1;
- foreach my $l ( 2 .. 3 ) {
- last if ( $line + $l > $max_line );
- my $ibeg_next_next = $ri_first->[ $line + $l ];
- if ( $tokens_to_go[$ibeg_next_next] ne
- $leading_token )
- {
- $tokens_differ = 1;
- last;
- }
- $count++;
- }
- next if ($tokens_differ);
- next if ( $count < 3 && $leading_token ne ':' );
- $ipad = $ibeg;
- }
- else {
- next;
- }
- }
- }
- }
-
- # find interior token to pad if necessary
- if ( !defined($ipad) ) {
-
- for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
-
- # find any unclosed container
- next
- unless ( $type_sequence_to_go[$i]
- && $mate_index_to_go[$i] > $iend );
-
- # find next nonblank token to pad
- $ipad = $inext_to_go[$i];
- last if ( $ipad > $iend );
- }
- last unless $ipad;
- }
-
- # We cannot pad the first leading token of a file because
- # it could cause a bug in which the starting indentation
- # level is guessed incorrectly each time the code is run
- # though perltidy, thus causing the code to march off to
- # the right. For example, the following snippet would have
- # this problem:
-
-## ov_method mycan( $package, '(""' ), $package
-## or ov_method mycan( $package, '(0+' ), $package
-## or ov_method mycan( $package, '(bool' ), $package
-## or ov_method mycan( $package, '(nomethod' ), $package;
-
- # If this snippet is within a block this won't happen
- # unless the user just processes the snippet alone within
- # an editor. In that case either the user will see and
- # fix the problem or it will be corrected next time the
- # entire file is processed with perltidy.
- ##next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
- next if ( $ipad == 0 && $peak_batch_size <= 1 );
-
-## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
-## IT DID MORE HARM THAN GOOD
-## ceil(
-## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
-## / $upem
-## ),
-##? # do not put leading padding for just 2 lines of math
-##? if ( $ipad == $ibeg
-##? && $line > 0
-##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
-##? && $is_math_op{$type_next}
-##? && $line + 2 <= $max_line )
-##? {
-##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
-##? my $type_next_next = $types_to_go[$ibeg_next_next];
-##? next if !$is_math_op{$type_next_next};
-##? }
-
- # next line must not be at greater depth
- my $iend_next = $ri_last->[ $line + 1 ];
- next
- if ( $nesting_depth_to_go[ $iend_next + 1 ] >
- $nesting_depth_to_go[$ipad] );
-
- # lines must be somewhat similar to be padded..
- my $inext_next = $inext_to_go[$ibeg_next];
- my $type = $types_to_go[$ipad];
- my $type_next = $types_to_go[ $ipad + 1 ];
-
- # see if there are multiple continuation lines
- my $logical_continuation_lines = 1;
- if ( $line + 2 <= $max_line ) {
- my $leading_token = $tokens_to_go[$ibeg_next];
- my $ibeg_next_next = $ri_first->[ $line + 2 ];
- if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
- && $nesting_depth_to_go[$ibeg_next] eq
- $nesting_depth_to_go[$ibeg_next_next] )
- {
- $logical_continuation_lines++;
- }
- }
-
- # see if leading types match
- my $types_match = $types_to_go[$inext_next] eq $type;
- my $matches_without_bang;
-
- # if first line has leading ! then compare the following token
- if ( !$types_match && $type eq '!' ) {
- $types_match = $matches_without_bang =
- $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
- }
-
- if (
-
- # either we have multiple continuation lines to follow
- # and we are not padding the first token
- ( $logical_continuation_lines > 1 && $ipad > 0 )
-
- # or..
- || (
-
- # types must match
- $types_match
-
- # and keywords must match if keyword
- && !(
- $type eq 'k'
- && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
- )
- )
- )
- {
-
- #----------------------begin special checks--------------
- #
- # SPECIAL CHECK 1:
- # A check is needed before we can make the pad.
- # If we are in a list with some long items, we want each
- # item to stand out. So in the following example, the
- # first line beginning with '$casefold->' would look good
- # padded to align with the next line, but then it
- # would be indented more than the last line, so we
- # won't do it.
- #
- # ok(
- # $casefold->{code} eq '0041'
- # && $casefold->{status} eq 'C'
- # && $casefold->{mapping} eq '0061',
- # 'casefold 0x41'
- # );
- #
- # Note:
- # It would be faster, and almost as good, to use a comma
- # count, and not pad if comma_count > 1 and the previous
- # line did not end with a comma.
- #
- my $ok_to_pad = 1;
-
- my $ibg = $ri_first->[ $line + 1 ];
- my $depth = $nesting_depth_to_go[ $ibg + 1 ];
-
- # just use simplified formula for leading spaces to avoid
- # needless sub calls
- my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
-
- # look at each line beyond the next ..
- my $l = $line + 1;
- foreach my $ltest ( $line + 2 .. $max_line ) {
- $l = $ltest;
- my $ibg = $ri_first->[$l];
-
- # quit looking at the end of this container
- last
- if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
- || ( $nesting_depth_to_go[$ibg] < $depth );
-
- # cannot do the pad if a later line would be
- # outdented more
- if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
- $ok_to_pad = 0;
- last;
- }
- }
-
- # don't pad if we end in a broken list
- if ( $l == $max_line ) {
- my $i2 = $ri_last->[$l];
- if ( $types_to_go[$i2] eq '#' ) {
- my $i1 = $ri_first->[$l];
- next
- if (
- terminal_type( \@types_to_go, \@block_type_to_go,
- $i1, $i2 ) eq ','
- );
- }
- }
-
- # SPECIAL CHECK 2:
- # a minus may introduce a quoted variable, and we will
- # add the pad only if this line begins with a bare word,
- # such as for the word 'Button' here:
- # [
- # Button => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- # -accelerator => "Meta+$_"
- # ];
- #
- # On the other hand, if 'Button' is quoted, it looks best
- # not to pad:
- # [
- # 'Button' => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- # -accelerator => "Meta+$_"
- # ];
- if ( $types_to_go[$ibeg_next] eq 'm' ) {
- $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
- }
-
- next unless $ok_to_pad;
-
- #----------------------end special check---------------
-
- my $length_1 = total_line_length( $ibeg, $ipad - 1 );
- my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
- $pad_spaces = $length_2 - $length_1;
-
- # If the first line has a leading ! and the second does
- # not, then remove one space to try to align the next
- # leading characters, which are often the same. For example:
- # if ( !$ts
- # || $ts == $self->Holder
- # || $self->Holder->Type eq "Arena" )
- #
- # This usually helps readability, but if there are subsequent
- # ! operators things will still get messed up. For example:
- #
- # if ( !exists $Net::DNS::typesbyname{$qtype}
- # && exists $Net::DNS::classesbyname{$qtype}
- # && !exists $Net::DNS::classesbyname{$qclass}
- # && exists $Net::DNS::typesbyname{$qclass} )
- # We can't fix that.
- if ($matches_without_bang) { $pad_spaces-- }
-
- # make sure this won't change if -lp is used
- my $indentation_1 = $leading_spaces_to_go[$ibeg];
- if ( ref($indentation_1) ) {
- if ( $indentation_1->get_recoverable_spaces() == 0 ) {
- my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
- unless ( $indentation_2->get_recoverable_spaces() == 0 )
- {
- $pad_spaces = 0;
- }
- }
- }
-
- # we might be able to handle a pad of -1 by removing a blank
- # token
- if ( $pad_spaces < 0 ) {
-
- if ( $pad_spaces == -1 ) {
- if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
- {
- pad_token( $ipad - 1, $pad_spaces );
- }
- }
- $pad_spaces = 0;
- }
-
- # now apply any padding for alignment
- if ( $ipad >= 0 && $pad_spaces ) {
-
- my $length_t = total_line_length( $ibeg, $iend );
- if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
- {
- pad_token( $ipad, $pad_spaces );
- }
- }
- }
- }
- continue {
- $iendm = $iend;
- $ibegm = $ibeg;
- $has_leading_op = $has_leading_op_next;
- } # end of loop over lines
- return;
- }
-}
-
-sub correct_lp_indentation {
-
- # When the -lp option is used, we need to make a last pass through
- # each line to correct the indentation positions in case they differ
- # from the predictions. This is necessary because perltidy uses a
- # predictor/corrector method for aligning with opening parens. The
- # predictor is usually good, but sometimes stumbles. The corrector
- # tries to patch things up once the actual opening paren locations
- # are known.
- my ( $ri_first, $ri_last ) = @_;
- my $do_not_pad = 0;
-
- # Note on flag '$do_not_pad':
- # We want to avoid a situation like this, where the aligner inserts
- # whitespace before the '=' to align it with a previous '=', because
- # otherwise the parens might become mis-aligned in a situation like
- # this, where the '=' has become aligned with the previous line,
- # pushing the opening '(' forward beyond where we want it.
- #
- # $mkFloor::currentRoom = '';
- # $mkFloor::c_entry = $c->Entry(
- # -width => '10',
- # -relief => 'sunken',
- # ...
- # );
- #
- # We leave it to the aligner to decide how to do this.
-
- # first remove continuation indentation if appropriate
- my $max_line = @{$ri_first} - 1;
-
- # looking at each line of this batch..
- my ( $ibeg, $iend );
- foreach my $line ( 0 .. $max_line ) {
- $ibeg = $ri_first->[$line];
- $iend = $ri_last->[$line];
-
- # looking at each token in this output line..
- foreach my $i ( $ibeg .. $iend ) {
-
- # How many space characters to place before this token
- # for special alignment. Actual padding is done in the
- # continue block.
-
- # looking for next unvisited indentation item
- my $indentation = $leading_spaces_to_go[$i];
- if ( !$indentation->get_marked() ) {
- $indentation->set_marked(1);
-
- # looking for indentation item for which we are aligning
- # with parens, braces, and brackets
- next unless ( $indentation->get_align_paren() );
-
- # skip closed container on this line
- if ( $i > $ibeg ) {
- my $im = max( $ibeg, $iprev_to_go[$i] );
- if ( $type_sequence_to_go[$im]
- && $mate_index_to_go[$im] <= $iend )
- {
- next;
- }
- }
-
- if ( $line == 1 && $i == $ibeg ) {
- $do_not_pad = 1;
- }
-
- # Ok, let's see what the error is and try to fix it
- my $actual_pos;
- my $predicted_pos = $indentation->get_spaces();
- if ( $i > $ibeg ) {
-
- # token is mid-line - use length to previous token
- $actual_pos = total_line_length( $ibeg, $i - 1 );
-
- # for mid-line token, we must check to see if all
- # additional lines have continuation indentation,
- # and remove it if so. Otherwise, we do not get
- # good alignment.
- my $closing_index = $indentation->get_closed();
- if ( $closing_index > $iend ) {
- my $ibeg_next = $ri_first->[ $line + 1 ];
- if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
- undo_lp_ci( $line, $i, $closing_index, $ri_first,
- $ri_last );
- }
- }
- }
- elsif ( $line > 0 ) {
-
- # handle case where token starts a new line;
- # use length of previous line
- my $ibegm = $ri_first->[ $line - 1 ];
- my $iendm = $ri_last->[ $line - 1 ];
- $actual_pos = total_line_length( $ibegm, $iendm );
-
- # follow -pt style
- ++$actual_pos
- if ( $types_to_go[ $iendm + 1 ] eq 'b' );
- }
- else {
-
- # token is first character of first line of batch
- $actual_pos = $predicted_pos;
- }
-
- my $move_right = $actual_pos - $predicted_pos;
-
- # done if no error to correct (gnu2.t)
- if ( $move_right == 0 ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
- }
-
- # if we have not seen closure for this indentation in
- # this batch, we can only pass on a request to the
- # vertical aligner
- my $closing_index = $indentation->get_closed();
-
- if ( $closing_index < 0 ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
- }
-
- # If necessary, look ahead to see if there is really any
- # leading whitespace dependent on this whitespace, and
- # also find the longest line using this whitespace.
- # Since it is always safe to move left if there are no
- # dependents, we only need to do this if we may have
- # dependent nodes or need to move right.
-
- my $right_margin = 0;
- my $have_child = $indentation->get_have_child();
-
- my %saw_indentation;
- my $line_count = 1;
- $saw_indentation{$indentation} = $indentation;
-
- if ( $have_child || $move_right > 0 ) {
- $have_child = 0;
- my $max_length = 0;
- if ( $i == $ibeg ) {
- $max_length = total_line_length( $ibeg, $iend );
- }
-
- # look ahead at the rest of the lines of this batch..
- foreach my $line_t ( $line + 1 .. $max_line ) {
- my $ibeg_t = $ri_first->[$line_t];
- my $iend_t = $ri_last->[$line_t];
- last if ( $closing_index <= $ibeg_t );
-
- # remember all different indentation objects
- my $indentation_t = $leading_spaces_to_go[$ibeg_t];
- $saw_indentation{$indentation_t} = $indentation_t;
- $line_count++;
-
- # remember longest line in the group
- my $length_t = total_line_length( $ibeg_t, $iend_t );
- if ( $length_t > $max_length ) {
- $max_length = $length_t;
- }
- }
- $right_margin = maximum_line_length($ibeg) - $max_length;
- if ( $right_margin < 0 ) { $right_margin = 0 }
- }
-
- my $first_line_comma_count =
- grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
- my $comma_count = $indentation->get_comma_count();
- my $arrow_count = $indentation->get_arrow_count();
-
- # This is a simple approximate test for vertical alignment:
- # if we broke just after an opening paren, brace, bracket,
- # and there are 2 or more commas in the first line,
- # and there are no '=>'s,
- # then we are probably vertically aligned. We could set
- # an exact flag in sub scan_list, but this is good
- # enough.
- my $indentation_count = keys %saw_indentation;
- my $is_vertically_aligned =
- ( $i == $ibeg
- && $first_line_comma_count > 1
- && $indentation_count == 1
- && ( $arrow_count == 0 || $arrow_count == $line_count ) );
-
- # Make the move if possible ..
- if (
-
- # we can always move left
- $move_right < 0
-
- # but we should only move right if we are sure it will
- # not spoil vertical alignment
- || ( $comma_count == 0 )
- || ( $comma_count > 0 && !$is_vertically_aligned )
- )
- {
- my $move =
- ( $move_right <= $right_margin )
- ? $move_right
- : $right_margin;
-
- foreach ( keys %saw_indentation ) {
- $saw_indentation{$_}
- ->permanently_decrease_available_spaces( -$move );
- }
- }
-
- # Otherwise, record what we want and the vertical aligner
- # will try to recover it.
- else {
- $indentation->set_recoverable_spaces($move_right);
- }
- }
- }
- }
- return $do_not_pad;
-}
-
-# flush is called to output any tokens in the pipeline, so that
-# an alternate source of lines can be written in the correct order
-
-sub flush {
- my $self = shift;
- destroy_one_line_block();
- $self->output_line_to_go();
- Perl::Tidy::VerticalAligner::flush();
- return;
-}
-
-sub reset_block_text_accumulator {
-
- # save text after 'if' and 'elsif' to append after 'else'
- if ($accumulating_text_for_block) {
-
- if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
- push @{$rleading_block_if_elsif_text}, $leading_block_text;
- }
- }
- $accumulating_text_for_block = "";
- $leading_block_text = "";
- $leading_block_text_level = 0;
- $leading_block_text_length_exceeded = 0;
- $leading_block_text_line_number = 0;
- $leading_block_text_line_length = 0;
- return;
-}
-
-sub set_block_text_accumulator {
- my $i = shift;
- $accumulating_text_for_block = $tokens_to_go[$i];
- if ( $accumulating_text_for_block !~ /^els/ ) {
- $rleading_block_if_elsif_text = [];
- }
- $leading_block_text = "";
- $leading_block_text_level = $levels_to_go[$i];
- $leading_block_text_line_number = get_output_line_number();
- ##$vertical_aligner_object->get_output_line_number();
- $leading_block_text_length_exceeded = 0;
-
- # this will contain the column number of the last character
- # of the closing side comment
- $leading_block_text_line_length =
- length($csc_last_label) +
- length($accumulating_text_for_block) +
- length( $rOpts->{'closing-side-comment-prefix'} ) +
- $leading_block_text_level * $rOpts_indent_columns + 3;
- return;
-}
-
-sub accumulate_block_text {
- my $i = shift;
-
- # accumulate leading text for -csc, ignoring any side comments
- if ( $accumulating_text_for_block
- && !$leading_block_text_length_exceeded
- && $types_to_go[$i] ne '#' )
- {
-
- my $added_length = $token_lengths_to_go[$i];
- $added_length += 1 if $i == 0;
- my $new_line_length = $leading_block_text_line_length + $added_length;
-
- # we can add this text if we don't exceed some limits..
- if (
-
- # we must not have already exceeded the text length limit
- length($leading_block_text) <
- $rOpts_closing_side_comment_maximum_text
-
- # and either:
- # the new total line length must be below the line length limit
- # or the new length must be below the text length limit
- # (ie, we may allow one token to exceed the text length limit)
- && (
- $new_line_length <
- maximum_line_length_for_level($leading_block_text_level)
-
- || length($leading_block_text) + $added_length <
- $rOpts_closing_side_comment_maximum_text
- )
-
- # UNLESS: we are adding a closing paren before the brace we seek.
- # This is an attempt to avoid situations where the ... to be
- # added are longer than the omitted right paren, as in:
-
- # foreach my $item (@a_rather_long_variable_name_here) {
- # &whatever;
- # } ## end foreach my $item (@a_rather_long_variable_name_here...
-
- || (
- $tokens_to_go[$i] eq ')'
- && (
- (
- $i + 1 <= $max_index_to_go
- && $block_type_to_go[ $i + 1 ] eq
- $accumulating_text_for_block
- )
- || ( $i + 2 <= $max_index_to_go
- && $block_type_to_go[ $i + 2 ] eq
- $accumulating_text_for_block )
- )
- )
- )
- {
-
- # add an extra space at each newline
- if ( $i == 0 ) { $leading_block_text .= ' ' }
-
- # add the token text
- $leading_block_text .= $tokens_to_go[$i];
- $leading_block_text_line_length = $new_line_length;
- }
-
- # show that text was truncated if necessary
- elsif ( $types_to_go[$i] ne 'b' ) {
- $leading_block_text_length_exceeded = 1;
- $leading_block_text .= '...';
- }
- }
- return;
-}
-
-{
- my %is_if_elsif_else_unless_while_until_for_foreach;
-
- BEGIN {
-
- # These block types may have text between the keyword and opening
- # curly. Note: 'else' does not, but must be included to allow trailing
- # if/elsif text to be appended.
- # patch for SWITCH/CASE: added 'case' and 'when'
- my @q =
- qw(if elsif else unless while until for foreach case when catch);
- @is_if_elsif_else_unless_while_until_for_foreach{@q} =
- (1) x scalar(@q);
- }
-
- sub accumulate_csc_text {
-
- # called once per output buffer when -csc is used. Accumulates
- # the text placed after certain closing block braces.
- # Defines and returns the following for this buffer:
-
- my $block_leading_text = ""; # the leading text of the last '}'
- my $rblock_leading_if_elsif_text;
- my $i_block_leading_text =
- -1; # index of token owning block_leading_text
- my $block_line_count = 100; # how many lines the block spans
- my $terminal_type = 'b'; # type of last nonblank token
- my $i_terminal = 0; # index of last nonblank token
- my $terminal_block_type = "";
-
- # update most recent statement label
- $csc_last_label = "" unless ($csc_last_label);
- if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
- my $block_label = $csc_last_label;
-
- # Loop over all tokens of this batch
- for my $i ( 0 .. $max_index_to_go ) {
- my $type = $types_to_go[$i];
- my $block_type = $block_type_to_go[$i];
- my $token = $tokens_to_go[$i];
-
- # remember last nonblank token type
- if ( $type ne '#' && $type ne 'b' ) {
- $terminal_type = $type;
- $terminal_block_type = $block_type;
- $i_terminal = $i;
- }
-
- my $type_sequence = $type_sequence_to_go[$i];
- if ( $block_type && $type_sequence ) {
-
- if ( $token eq '}' ) {
-
- # restore any leading text saved when we entered this block
- if ( defined( $block_leading_text{$type_sequence} ) ) {
- ( $block_leading_text, $rblock_leading_if_elsif_text )
- = @{ $block_leading_text{$type_sequence} };
- $i_block_leading_text = $i;
- delete $block_leading_text{$type_sequence};
- $rleading_block_if_elsif_text =
- $rblock_leading_if_elsif_text;
- }
-
- if ( defined( $csc_block_label{$type_sequence} ) ) {
- $block_label = $csc_block_label{$type_sequence};
- delete $csc_block_label{$type_sequence};
- }
-
- # if we run into a '}' then we probably started accumulating
- # at something like a trailing 'if' clause..no harm done.
- if ( $accumulating_text_for_block
- && $levels_to_go[$i] <= $leading_block_text_level )
- {
- my $lev = $levels_to_go[$i];
- reset_block_text_accumulator();
- }
-
- if ( defined( $block_opening_line_number{$type_sequence} ) )
- {
- my $output_line_number = get_output_line_number();
- ##$vertical_aligner_object->get_output_line_number();
- $block_line_count =
- $output_line_number -
- $block_opening_line_number{$type_sequence} + 1;
- delete $block_opening_line_number{$type_sequence};
- }
- else {
-
- # Error: block opening line undefined for this line..
- # This shouldn't be possible, but it is not a
- # significant problem.
- }
- }
-
- elsif ( $token eq '{' ) {
-
- my $line_number = get_output_line_number();
- ##$vertical_aligner_object->get_output_line_number();
- $block_opening_line_number{$type_sequence} = $line_number;
-
- # set a label for this block, except for
- # a bare block which already has the label
- # A label can only be used on the next {
- if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
- $csc_block_label{$type_sequence} = $csc_last_label;
- $csc_last_label = "";
-
- if ( $accumulating_text_for_block
- && $levels_to_go[$i] == $leading_block_text_level )
- {
-
- if ( $accumulating_text_for_block eq $block_type ) {
-
- # save any leading text before we enter this block
- $block_leading_text{$type_sequence} = [
- $leading_block_text,
- $rleading_block_if_elsif_text
- ];
- $block_opening_line_number{$type_sequence} =
- $leading_block_text_line_number;
- reset_block_text_accumulator();
- }
- else {
-
- # shouldn't happen, but not a serious error.
- # We were accumulating -csc text for block type
- # $accumulating_text_for_block and unexpectedly
- # encountered a '{' for block type $block_type.
- }
- }
- }
- }
-
- if ( $type eq 'k'
- && $csc_new_statement_ok
- && $is_if_elsif_else_unless_while_until_for_foreach{$token}
- && $token =~ /$closing_side_comment_list_pattern/o )
- {
- set_block_text_accumulator($i);
- }
- else {
-
- # note: ignoring type 'q' because of tricks being played
- # with 'q' for hanging side comments
- if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
- $csc_new_statement_ok =
- ( $block_type || $type eq 'J' || $type eq ';' );
- }
- if ( $type eq ';'
- && $accumulating_text_for_block
- && $levels_to_go[$i] == $leading_block_text_level )
- {
- reset_block_text_accumulator();
- }
- else {
- accumulate_block_text($i);
- }
- }
- }
-
- # Treat an 'else' block specially by adding preceding 'if' and
- # 'elsif' text. Otherwise, the 'end else' is not helpful,
- # especially for cuddled-else formatting.
- if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
- $block_leading_text =
- make_else_csc_text( $i_terminal, $terminal_block_type,
- $block_leading_text, $rblock_leading_if_elsif_text );
- }
-
- # if this line ends in a label then remember it for the next pass
- $csc_last_label = "";
- if ( $terminal_type eq 'J' ) {
- $csc_last_label = $tokens_to_go[$i_terminal];
- }
-
- return ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count, $block_label );
- }
-}
-
-sub make_else_csc_text {
-
- # create additional -csc text for an 'else' and optionally 'elsif',
- # depending on the value of switch
- # $rOpts_closing_side_comment_else_flag:
- #
- # = 0 add 'if' text to trailing else
- # = 1 same as 0 plus:
- # add 'if' to 'elsif's if can fit in line length
- # add last 'elsif' to trailing else if can fit in one line
- # = 2 same as 1 but do not check if exceed line length
- #
- # $rif_elsif_text = a reference to a list of all previous closing
- # side comments created for this if block
- #
- my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
- my $csc_text = $block_leading_text;
-
- if ( $block_type eq 'elsif'
- && $rOpts_closing_side_comment_else_flag == 0 )
- {
- return $csc_text;
- }
-
- my $count = @{$rif_elsif_text};
- return $csc_text unless ($count);
-
- my $if_text = '[ if' . $rif_elsif_text->[0];
-
- # always show the leading 'if' text on 'else'
- if ( $block_type eq 'else' ) {
- $csc_text .= $if_text;
- }
-
- # see if that's all
- if ( $rOpts_closing_side_comment_else_flag == 0 ) {
- return $csc_text;
- }
-
- my $last_elsif_text = "";
- if ( $count > 1 ) {
- $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
- if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
- }
-
- # tentatively append one more item
- my $saved_text = $csc_text;
- if ( $block_type eq 'else' ) {
- $csc_text .= $last_elsif_text;
- }
- else {
- $csc_text .= ' ' . $if_text;
- }
-
- # all done if no length checks requested
- if ( $rOpts_closing_side_comment_else_flag == 2 ) {
- return $csc_text;
- }
-
- # undo it if line length exceeded
- my $length =
- length($csc_text) +
- length($block_type) +
- length( $rOpts->{'closing-side-comment-prefix'} ) +
- $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
- if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
- $csc_text = $saved_text;
- }
- return $csc_text;
-}
-
-{ # sub balance_csc_text
-
- my %matching_char;
-
- BEGIN {
- %matching_char = (
- '{' => '}',
- '(' => ')',
- '[' => ']',
- '}' => '{',
- ')' => '(',
- ']' => '[',
- );
- }
-
- sub balance_csc_text {
-
- # Append characters to balance a closing side comment so that editors
- # such as vim can correctly jump through code.
- # Simple Example:
- # input = ## end foreach my $foo ( sort { $b ...
- # output = ## end foreach my $foo ( sort { $b ...})
-
- # NOTE: This routine does not currently filter out structures within
- # quoted text because the bounce algorithms in text editors do not
- # necessarily do this either (a version of vim was checked and
- # did not do this).
-
- # Some complex examples which will cause trouble for some editors:
- # while ( $mask_string =~ /\{[^{]*?\}/g ) {
- # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
- # if ( $1 eq '{' ) {
- # test file test1/braces.pl has many such examples.
-
- my ($csc) = @_;
-
- # loop to examine characters one-by-one, RIGHT to LEFT and
- # build a balancing ending, LEFT to RIGHT.
- for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
-
- my $char = substr( $csc, $pos, 1 );
-
- # ignore everything except structural characters
- next unless ( $matching_char{$char} );
-
- # pop most recently appended character
- my $top = chop($csc);
-
- # push it back plus the mate to the newest character
- # unless they balance each other.
- $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
- }
-
- # return the balanced string
- return $csc;
- }
-}
-
-sub add_closing_side_comment {
-
- my $self = shift;
-
- # add closing side comments after closing block braces if -csc used
- my $cscw_block_comment;
-
- #---------------------------------------------------------------
- # Step 1: loop through all tokens of this line to accumulate
- # the text needed to create the closing side comments. Also see
- # how the line ends.
- #---------------------------------------------------------------
-
- my ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count, $block_label )
- = accumulate_csc_text();
-
- #---------------------------------------------------------------
- # Step 2: make the closing side comment if this ends a block
- #---------------------------------------------------------------
- ##my $have_side_comment = $i_terminal != $max_index_to_go;
- my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
-
- # if this line might end in a block closure..
- if (
- $terminal_type eq '}'
-
- # ..and either
- && (
-
- # the block is long enough
- ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
-
- # or there is an existing comment to check
- || ( $have_side_comment
- && $rOpts->{'closing-side-comment-warnings'} )
- )
-
- # .. and if this is one of the types of interest
- && $block_type_to_go[$i_terminal] =~
- /$closing_side_comment_list_pattern/o
-
- # .. but not an anonymous sub
- # These are not normally of interest, and their closing braces are
- # often followed by commas or semicolons anyway. This also avoids
- # possible erratic output due to line numbering inconsistencies
- # in the cases where their closing braces terminate a line.
- && $block_type_to_go[$i_terminal] ne 'sub'
-
- # ..and the corresponding opening brace must is not in this batch
- # (because we do not need to tag one-line blocks, although this
- # should also be caught with a positive -csci value)
- && $mate_index_to_go[$i_terminal] < 0
-
- # ..and either
- && (
-
- # this is the last token (line doesn't have a side comment)
- !$have_side_comment
-
- # or the old side comment is a closing side comment
- || $tokens_to_go[$max_index_to_go] =~
- /$closing_side_comment_prefix_pattern/o
- )
- )
- {
-
- # then make the closing side comment text
- if ($block_label) { $block_label .= " " }
- my $token =
-"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
-
- # append any extra descriptive text collected above
- if ( $i_block_leading_text == $i_terminal ) {
- $token .= $block_leading_text;
- }
-
- $token = balance_csc_text($token)
- if $rOpts->{'closing-side-comments-balanced'};
-
- $token =~ s/\s*$//; # trim any trailing whitespace
-
- # handle case of existing closing side comment
- if ($have_side_comment) {
-
- # warn if requested and tokens differ significantly
- if ( $rOpts->{'closing-side-comment-warnings'} ) {
- my $old_csc = $tokens_to_go[$max_index_to_go];
- my $new_csc = $token;
- $new_csc =~ s/\s+//g; # trim all whitespace
- $old_csc =~ s/\s+//g; # trim all whitespace
- $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
- $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
- $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
- my $new_trailing_dots = $1;
- $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
-
- # Patch to handle multiple closing side comments at
- # else and elsif's. These have become too complicated
- # to check, so if we see an indication of
- # '[ if' or '[ # elsif', then assume they were made
- # by perltidy.
- if ( $block_type_to_go[$i_terminal] eq 'else' ) {
- if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
- }
- elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
- if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
- }
-
- # if old comment is contained in new comment,
- # only compare the common part.
- if ( length($new_csc) > length($old_csc) ) {
- $new_csc = substr( $new_csc, 0, length($old_csc) );
- }
-
- # if the new comment is shorter and has been limited,
- # only compare the common part.
- if ( length($new_csc) < length($old_csc)
- && $new_trailing_dots )
- {
- $old_csc = substr( $old_csc, 0, length($new_csc) );
- }
-
- # any remaining difference?
- if ( $new_csc ne $old_csc ) {
-
- # just leave the old comment if we are below the threshold
- # for creating side comments
- if ( $block_line_count <
- $rOpts->{'closing-side-comment-interval'} )
- {
- $token = undef;
- }
-
- # otherwise we'll make a note of it
- else {
-
- warning(
-"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
- );
-
- # save the old side comment in a new trailing block comment
- my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
- $year += 1900;
- $month += 1;
- $cscw_block_comment =
-"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
- }
- }
- else {
-
- # No differences.. we can safely delete old comment if we
- # are below the threshold
- if ( $block_line_count <
- $rOpts->{'closing-side-comment-interval'} )
- {
- $token = undef;
- $self->unstore_token_to_go()
- if ( $types_to_go[$max_index_to_go] eq '#' );
- $self->unstore_token_to_go()
- if ( $types_to_go[$max_index_to_go] eq 'b' );
- }
- }
- }
-
- # switch to the new csc (unless we deleted it!)
- $tokens_to_go[$max_index_to_go] = $token if $token;
- }
-
- # handle case of NO existing closing side comment
- else {
-
- # Remove any existing blank and add another below.
- # This is a tricky point. A side comment needs to have the same level
- # as the preceding closing brace or else the line will not get the right
- # indentation. So even if we have a blank, we are going to replace it.
- if ( $types_to_go[$max_index_to_go] eq 'b' ) {
- unstore_token_to_go();
- }
-
- # insert the new side comment into the output token stream
- my $type = '#';
- my $block_type = '';
- my $type_sequence = '';
- my $container_environment =
- $container_environment_to_go[$max_index_to_go];
- my $level = $levels_to_go[$max_index_to_go];
- my $slevel = $nesting_depth_to_go[$max_index_to_go];
- my $no_internal_newlines = 0;
-
- my $ci_level = $ci_levels_to_go[$max_index_to_go];
- my $in_continued_quote = 0;
-
- # insert a blank token
- $self->insert_new_token_to_go( ' ', 'b', $slevel,
- $no_internal_newlines );
-
- # then the side comment
- $self->insert_new_token_to_go( $token, $type, $slevel,
- $no_internal_newlines );
- }
- }
- return $cscw_block_comment;
-}
-
-sub previous_nonblank_token {
- my ($i) = @_;
- my $name = "";
- my $im = $i - 1;
- return "" if ( $im < 0 );
- if ( $types_to_go[$im] eq 'b' ) { $im--; }
- return "" if ( $im < 0 );
- $name = $tokens_to_go[$im];
-
- # prepend any sub name to an isolated -> to avoid unwanted alignments
- # [test case is test8/penco.pl]
- if ( $name eq '->' ) {
- $im--;
- if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
- $name = $tokens_to_go[$im] . $name;
- }
- }
- return $name;
-}
-
-sub send_lines_to_vertical_aligner {
-
- my ( $self, $ri_first, $ri_last, $do_not_pad ) = @_;
-
- my $rindentation_list = [0]; # ref to indentations for each line
-
- # define the array @matching_token_to_go for the output tokens
- # which will be non-blank for each special token (such as =>)
- # for which alignment is required.
- set_vertical_alignment_markers( $ri_first, $ri_last );
-
- # flush if necessary to avoid unwanted alignment
- my $must_flush = 0;
- if ( @{$ri_first} > 1 ) {
-
- # flush before a long if statement
- if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
- $must_flush = 1;
- }
- }
- if ($must_flush) {
- Perl::Tidy::VerticalAligner::flush();
- }
-
- undo_ci( $ri_first, $ri_last );
-
- set_logical_padding( $ri_first, $ri_last );
-
- # loop to prepare each line for shipment
- my $n_last_line = @{$ri_first} - 1;
- my $in_comma_list;
- for my $n ( 0 .. $n_last_line ) {
- my $ibeg = $ri_first->[$n];
- my $iend = $ri_last->[$n];
-
- my ( $rtokens, $rfields, $rpatterns ) =
- make_alignment_patterns( $ibeg, $iend );
-
- # Set flag to show how much level changes between this line
- # and the next line, if we have it.
- my $ljump = 0;
- if ( $n < $n_last_line ) {
- my $ibegp = $ri_first->[ $n + 1 ];
- $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
- }
-
- my ( $indentation, $lev, $level_end, $terminal_type,
- $is_semicolon_terminated, $is_outdented_line )
- = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
- $ri_first, $ri_last, $rindentation_list, $ljump );
-
- # we will allow outdenting of long lines..
- my $outdent_long_lines = (
-
- # which are long quotes, if allowed
- ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
-
- # which are long block comments, if allowed
- || (
- $types_to_go[$ibeg] eq '#'
- && $rOpts->{'outdent-long-comments'}
-
- # but not if this is a static block comment
- && !$is_static_block_comment
- )
- );
-
- my $level_jump =
- $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
-
- my $rvertical_tightness_flags =
- set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
- $ri_first, $ri_last );
-
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
-
- # Set a flag at the final ':' of a ternary chain to request
- # vertical alignment of the final term. Here is a
- # slightly complex example:
- #
- # $self->{_text} = (
- # !$section ? ''
- # : $type eq 'item' ? "the $section entry"
- # : "the section on $section"
- # )
- # . (
- # $page
- # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
- # : ' elsewhere in this document'
- # );
- #
- my $is_terminal_ternary = 0;
- if ( $tokens_to_go[$ibeg] eq ':'
- || $n > 0 && $tokens_to_go[ $ri_last->[ $n - 1 ] ] eq ':' )
- {
- my $last_leading_type = ":";
- if ( $n > 0 ) {
- my $iprev = $ri_first->[ $n - 1 ];
- $last_leading_type = $types_to_go[$iprev];
- }
- if ( $terminal_type ne ';'
- && $n_last_line > $n
- && $level_end == $lev )
- {
- my $inext = $ri_first->[ $n + 1 ];
- $level_end = $levels_to_go[$inext];
- $terminal_type = $types_to_go[$inext];
- }
-
- $is_terminal_ternary = $last_leading_type eq ':'
- && ( ( $terminal_type eq ';' && $level_end <= $lev )
- || ( $terminal_type ne ':' && $level_end < $lev ) )
-
- # the terminal term must not contain any ternary terms, as in
- # my $ECHO = (
- # $Is_MSWin32 ? ".\\echo$$"
- # : $Is_MacOS ? ":echo$$"
- # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
- # );
- && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ];
- }
-
- # send this new line down the pipe
- my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
- Perl::Tidy::VerticalAligner::valign_input(
- $lev,
- $level_end,
- $indentation,
- $rfields,
- $rtokens,
- $rpatterns,
- $forced_breakpoint_to_go[$iend] || $in_comma_list,
- $outdent_long_lines,
- $is_terminal_ternary,
- $is_semicolon_terminated,
- $do_not_pad,
- $rvertical_tightness_flags,
- $level_jump,
- );
- $in_comma_list =
- $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
-
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
-
- $do_not_pad = 0;
-
- # Set flag indicating if this line ends in an opening
- # token and is very short, so that a blank line is not
- # needed if the subsequent line is a comment.
- # Examples of what we are looking for:
- # {
- # && (
- # BEGIN {
- # default {
- # sub {
- $last_output_short_opening_token
-
- # line ends in opening token
- = $types_to_go[$iend] =~ /^[\{\(\[L]$/
-
- # and either
- && (
- # line has either single opening token
- $iend == $ibeg
-
- # or is a single token followed by opening token.
- # Note that sub identifiers have blanks like 'sub doit'
- || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
- )
-
- # and limit total to 10 character widths
- && token_sequence_length( $ibeg, $iend ) <= 10;
-
- } # end of loop to output each line
-
- # remember indentation of lines containing opening containers for
- # later use by sub set_adjusted_indentation
- save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
- return;
-}
-
-{ # begin make_alignment_patterns
-
- my %block_type_map;
- my %keyword_map;
-
- BEGIN {
-
- # map related block names into a common name to
- # allow alignment
- %block_type_map = (
- 'unless' => 'if',
- 'else' => 'if',
- 'elsif' => 'if',
- 'when' => 'if',
- 'default' => 'if',
- 'case' => 'if',
- 'sort' => 'map',
- 'grep' => 'map',
- );
-
- # map certain keywords to the same 'if' class to align
- # long if/elsif sequences. [elsif.pl]
- %keyword_map = (
- 'unless' => 'if',
- 'else' => 'if',
- 'elsif' => 'if',
- 'when' => 'given',
- 'default' => 'given',
- 'case' => 'switch',
-
- # treat an 'undef' similar to numbers and quotes
- 'undef' => 'Q',
- );
- }
-
- sub make_alignment_patterns {
-
- # Here we do some important preliminary work for the
- # vertical aligner. We create three arrays for one
- # output line. These arrays contain strings that can
- # be tested by the vertical aligner to see if
- # consecutive lines can be aligned vertically.
- #
- # The three arrays are indexed on the vertical
- # alignment fields and are:
- # @tokens - a list of any vertical alignment tokens for this line.
- # These are tokens, such as '=' '&&' '#' etc which
- # we want to might align vertically. These are
- # decorated with various information such as
- # nesting depth to prevent unwanted vertical
- # alignment matches.
- # @fields - the actual text of the line between the vertical alignment
- # tokens.
- # @patterns - a modified list of token types, one for each alignment
- # field. These should normally each match before alignment is
- # allowed, even when the alignment tokens match.
- my ( $ibeg, $iend ) = @_;
- my @tokens = ();
- my @fields = ();
- my @patterns = ();
- my $i_start = $ibeg;
-
- my $depth = 0;
- my @container_name = ("");
- my @multiple_comma_arrows = (undef);
-
- my $j = 0; # field index
-
- $patterns[0] = "";
- for my $i ( $ibeg .. $iend ) {
-
- # Keep track of containers balanced on this line only.
- # These are used below to prevent unwanted cross-line alignments.
- # Unbalanced containers already avoid aligning across
- # container boundaries.
- if ( $tokens_to_go[$i] eq '(' ) {
-
- # if container is balanced on this line...
- my $i_mate = $mate_index_to_go[$i];
- if ( $i_mate > $i && $i_mate <= $iend ) {
- $depth++;
- my $seqno = $type_sequence_to_go[$i];
- my $count = comma_arrow_count($seqno);
- $multiple_comma_arrows[$depth] = $count && $count > 1;
-
- # Append the previous token name to make the container name
- # more unique. This name will also be given to any commas
- # within this container, and it helps avoid undesirable
- # alignments of different types of containers.
- my $name = previous_nonblank_token($i);
- $name =~ s/^->//;
- $container_name[$depth] = "+" . $name;
-
- # Make the container name even more unique if necessary.
- # If we are not vertically aligning this opening paren,
- # append a character count to avoid bad alignment because
- # it usually looks bad to align commas within containers
- # for which the opening parens do not align. Here
- # is an example very BAD alignment of commas (because
- # the atan2 functions are not all aligned):
- # $XY =
- # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
- # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
- # $X * atan2( $X, 1 ) -
- # $Y * atan2( $Y, 1 );
- #
- # On the other hand, it is usually okay to align commas if
- # opening parens align, such as:
- # glVertex3d( $cx + $s * $xs, $cy, $z );
- # glVertex3d( $cx, $cy + $s * $ys, $z );
- # glVertex3d( $cx - $s * $xs, $cy, $z );
- # glVertex3d( $cx, $cy - $s * $ys, $z );
- #
- # To distinguish between these situations, we will
- # append the length of the line from the previous matching
- # token, or beginning of line, to the function name. This
- # will allow the vertical aligner to reject undesirable
- # matches.
-
- # if we are not aligning on this paren...
- if ( $matching_token_to_go[$i] eq '' ) {
-
- # Sum length from previous alignment, or start of line.
- my $len =
- ( $i_start == $ibeg )
- ? total_line_length( $i_start, $i - 1 )
- : token_sequence_length( $i_start, $i - 1 );
-
- # tack length onto the container name to make unique
- $container_name[$depth] .= "-" . $len;
- }
- }
- }
- elsif ( $tokens_to_go[$i] eq ')' ) {
- $depth-- if $depth > 0;
- }
-
- # if we find a new synchronization token, we are done with
- # a field
- if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
-
- my $tok = my $raw_tok = $matching_token_to_go[$i];
-
- # make separators in different nesting depths unique
- # by appending the nesting depth digit.
- if ( $raw_tok ne '#' ) {
- $tok .= "$nesting_depth_to_go[$i]";
- }
-
- # also decorate commas with any container name to avoid
- # unwanted cross-line alignments.
- if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
- if ( $container_name[$depth] ) {
- $tok .= $container_name[$depth];
- }
- }
-
- # Patch to avoid aligning leading and trailing if, unless.
- # Mark trailing if, unless statements with container names.
- # This makes them different from leading if, unless which
- # are not so marked at present. If we ever need to name
- # them too, we could use ci to distinguish them.
- # Example problem to avoid:
- # return ( 2, "DBERROR" )
- # if ( $retval == 2 );
- # if ( scalar @_ ) {
- # my ( $a, $b, $c, $d, $e, $f ) = @_;
- # }
- if ( $raw_tok eq '(' ) {
- my $ci = $ci_levels_to_go[$ibeg];
- if ( $container_name[$depth] =~ /^\+(if|unless)/
- && $ci )
- {
- $tok .= $container_name[$depth];
- }
- }
-
- # Decorate block braces with block types to avoid
- # unwanted alignments such as the following:
- # foreach ( @{$routput_array} ) { $fh->print($_) }
- # eval { $fh->close() };
- if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
- my $block_type = $block_type_to_go[$i];
-
- # map certain related block types to allow
- # else blocks to align
- $block_type = $block_type_map{$block_type}
- if ( defined( $block_type_map{$block_type} ) );
-
- # remove sub names to allow one-line sub braces to align
- # regardless of name
- #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
- if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
-
- # allow all control-type blocks to align
- if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
-
- $tok .= $block_type;
- }
-
- # concatenate the text of the consecutive tokens to form
- # the field
- push( @fields,
- join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
-
- # store the alignment token for this field
- push( @tokens, $tok );
-
- # get ready for the next batch
- $i_start = $i;
- $j++;
- $patterns[$j] = "";
- }
-
- # continue accumulating tokens
- # handle non-keywords..
- if ( $types_to_go[$i] ne 'k' ) {
- my $type = $types_to_go[$i];
-
- # Mark most things before arrows as a quote to
- # get them to line up. Testfile: mixed.pl.
- if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
- my $next_type = $types_to_go[ $i + 1 ];
- my $i_next_nonblank =
- ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
-
- if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
- $type = 'Q';
-
- # Patch to ignore leading minus before words,
- # by changing pattern 'mQ' into just 'Q',
- # so that we can align things like this:
- # Button => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
- }
- }
-
- # patch to make numbers and quotes align
- if ( $type eq 'n' ) { $type = 'Q' }
-
- # patch to ignore any ! in patterns
- if ( $type eq '!' ) { $type = '' }
-
- $patterns[$j] .= $type;
- }
-
- # for keywords we have to use the actual text
- else {
-
- my $tok = $tokens_to_go[$i];
-
- # but map certain keywords to a common string to allow
- # alignment.
- $tok = $keyword_map{$tok}
- if ( defined( $keyword_map{$tok} ) );
- $patterns[$j] .= $tok;
- }
- }
-
- # done with this line .. join text of tokens to make the last field
- push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
- return ( \@tokens, \@fields, \@patterns );
- }
-
-} # end make_alignment_patterns
-
-{ # begin unmatched_indexes
-
- # closure to keep track of unbalanced containers.
- # arrays shared by the routines in this block:
- my @unmatched_opening_indexes_in_this_batch;
- my @unmatched_closing_indexes_in_this_batch;
- my %comma_arrow_count;
-
- sub is_unbalanced_batch {
- return @unmatched_opening_indexes_in_this_batch +
- @unmatched_closing_indexes_in_this_batch;
- }
-
- sub comma_arrow_count {
- my $seqno = shift;
- return $comma_arrow_count{$seqno};
- }
-
- sub match_opening_and_closing_tokens {
-
- # Match up indexes of opening and closing braces, etc, in this batch.
- # This has to be done after all tokens are stored because unstoring
- # of tokens would otherwise cause trouble.
-
- @unmatched_opening_indexes_in_this_batch = ();
- @unmatched_closing_indexes_in_this_batch = ();
- %comma_arrow_count = ();
- my $comma_arrow_count_contained = 0;
-
- foreach my $i ( 0 .. $max_index_to_go ) {
- if ( $type_sequence_to_go[$i] ) {
- my $token = $tokens_to_go[$i];
- if ( $token =~ /^[\(\[\{\?]$/ ) {
- push @unmatched_opening_indexes_in_this_batch, $i;
- }
- elsif ( $token =~ /^[\)\]\}\:]$/ ) {
-
- my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
- if ( defined($i_mate) && $i_mate >= 0 ) {
- if ( $type_sequence_to_go[$i_mate] ==
- $type_sequence_to_go[$i] )
- {
- $mate_index_to_go[$i] = $i_mate;
- $mate_index_to_go[$i_mate] = $i;
- my $seqno = $type_sequence_to_go[$i];
- if ( $comma_arrow_count{$seqno} ) {
- $comma_arrow_count_contained +=
- $comma_arrow_count{$seqno};
- }
- }
- else {
- push @unmatched_opening_indexes_in_this_batch,
- $i_mate;
- push @unmatched_closing_indexes_in_this_batch, $i;
- }
- }
- else {
- push @unmatched_closing_indexes_in_this_batch, $i;
- }
- }
- }
- elsif ( $tokens_to_go[$i] eq '=>' ) {
- if (@unmatched_opening_indexes_in_this_batch) {
- my $j = $unmatched_opening_indexes_in_this_batch[-1];
- my $seqno = $type_sequence_to_go[$j];
- $comma_arrow_count{$seqno}++;
- }
- }
- }
- return $comma_arrow_count_contained;
- }
-
- sub save_opening_indentation {
-
- # This should be called after each batch of tokens is output. It
- # saves indentations of lines of all unmatched opening tokens.
- # These will be used by sub get_opening_indentation.
-
- my ( $ri_first, $ri_last, $rindentation_list ) = @_;
-
- # we no longer need indentations of any saved indentations which
- # are unmatched closing tokens in this batch, because we will
- # never encounter them again. So we can delete them to keep
- # the hash size down.
- foreach (@unmatched_closing_indexes_in_this_batch) {
- my $seqno = $type_sequence_to_go[$_];
- delete $saved_opening_indentation{$seqno};
- }
-
- # we need to save indentations of any unmatched opening tokens
- # in this batch because we may need them in a subsequent batch.
- foreach (@unmatched_opening_indexes_in_this_batch) {
- my $seqno = $type_sequence_to_go[$_];
- $saved_opening_indentation{$seqno} = [
- lookup_opening_indentation(
- $_, $ri_first, $ri_last, $rindentation_list
- )
- ];
- }
- return;
- }
-} # end unmatched_indexes
-
-sub get_opening_indentation {
-
- # get the indentation of the line which output the opening token
- # corresponding to a given closing token in the current output batch.
- #
- # given:
- # $i_closing - index in this line of a closing token ')' '}' or ']'
- #
- # $ri_first - reference to list of the first index $i for each output
- # line in this batch
- # $ri_last - reference to list of the last index $i for each output line
- # in this batch
- # $rindentation_list - reference to a list containing the indentation
- # used for each line.
- #
- # return:
- # -the indentation of the line which contained the opening token
- # which matches the token at index $i_opening
- # -and its offset (number of columns) from the start of the line
- #
- my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
-
- # first, see if the opening token is in the current batch
- my $i_opening = $mate_index_to_go[$i_closing];
- my ( $indent, $offset, $is_leading, $exists );
- $exists = 1;
- if ( $i_opening >= 0 ) {
-
- # it is..look up the indentation
- ( $indent, $offset, $is_leading ) =
- lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
- $rindentation_list );
- }
-
- # if not, it should have been stored in the hash by a previous batch
- else {
- my $seqno = $type_sequence_to_go[$i_closing];
- if ($seqno) {
- if ( $saved_opening_indentation{$seqno} ) {
- ( $indent, $offset, $is_leading ) =
- @{ $saved_opening_indentation{$seqno} };
- }
-
- # some kind of serious error
- # (example is badfile.t)
- else {
- $indent = 0;
- $offset = 0;
- $is_leading = 0;
- $exists = 0;
- }
- }
-
- # if no sequence number it must be an unbalanced container
- else {
- $indent = 0;
- $offset = 0;
- $is_leading = 0;
- $exists = 0;
- }
- }
- return ( $indent, $offset, $is_leading, $exists );
-}
-
-sub lookup_opening_indentation {
-
- # get the indentation of the line in the current output batch
- # which output a selected opening token
- #
- # given:
- # $i_opening - index of an opening token in the current output batch
- # whose line indentation we need
- # $ri_first - reference to list of the first index $i for each output
- # line in this batch
- # $ri_last - reference to list of the last index $i for each output line
- # in this batch
- # $rindentation_list - reference to a list containing the indentation
- # used for each line. (NOTE: the first slot in
- # this list is the last returned line number, and this is
- # followed by the list of indentations).
- #
- # return
- # -the indentation of the line which contained token $i_opening
- # -and its offset (number of columns) from the start of the line
-
- my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
-
- my $nline = $rindentation_list->[0]; # line number of previous lookup
-
- # reset line location if necessary
- $nline = 0 if ( $i_opening < $ri_start->[$nline] );
-
- # find the correct line
- unless ( $i_opening > $ri_last->[-1] ) {
- while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
- }
-
- # error - token index is out of bounds - shouldn't happen
- else {
- warning(
-"non-fatal program bug in lookup_opening_indentation - index out of range\n"
- );
- report_definite_bug();
- $nline = $#{$ri_last};
- }
-
- $rindentation_list->[0] =
- $nline; # save line number to start looking next call
- my $ibeg = $ri_start->[$nline];
- my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
- my $is_leading = ( $ibeg == $i_opening );
- return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
-}
-
-{
- my %is_if_elsif_else_unless_while_until_for_foreach;
-
- BEGIN {
-
- # These block types may have text between the keyword and opening
- # curly. Note: 'else' does not, but must be included to allow trailing
- # if/elsif text to be appended.
- # patch for SWITCH/CASE: added 'case' and 'when'
- my @q = qw(if elsif else unless while until for foreach case when);
- @is_if_elsif_else_unless_while_until_for_foreach{@q} =
- (1) x scalar(@q);
- }
-
- sub set_adjusted_indentation {
-
- # This routine has the final say regarding the actual indentation of
- # a line. It starts with the basic indentation which has been
- # defined for the leading token, and then takes into account any
- # options that the user has set regarding special indenting and
- # outdenting.
-
- my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
- $rindentation_list, $level_jump )
- = @_;
-
- # we need to know the last token of this line
- my ( $terminal_type, $i_terminal ) =
- terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
-
- my $is_outdented_line = 0;
-
- my $is_semicolon_terminated = $terminal_type eq ';'
- && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
-
- # NOTE: A future improvement would be to make it semicolon terminated
- # even if it does not have a semicolon but is followed by a closing
- # block brace. This would undo ci even for something like the
- # following, in which the final paren does not have a semicolon because
- # it is a possible weld location:
-
- # if ($BOLD_MATH) {
- # (
- # $labels, $comment,
- # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
- # )
- # }
- #
-
- # MOJO: Set a flag if this lines begins with ')->'
- my $leading_paren_arrow = (
- $types_to_go[$ibeg] eq '}'
- && $tokens_to_go[$ibeg] eq ')'
- && (
- ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
- || ( $ibeg < $i_terminal - 1
- && $types_to_go[ $ibeg + 1 ] eq 'b'
- && $types_to_go[ $ibeg + 2 ] eq '->' )
- )
- );
-
- ##########################################################
- # Section 1: set a flag and a default indentation
- #
- # Most lines are indented according to the initial token.
- # But it is common to outdent to the level just after the
- # terminal token in certain cases...
- # adjust_indentation flag:
- # 0 - do not adjust
- # 1 - outdent
- # 2 - vertically align with opening token
- # 3 - indent
- ##########################################################
- my $adjust_indentation = 0;
- my $default_adjust_indentation = $adjust_indentation;
-
- my (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- );
-
- # if we are at a closing token of some type..
- if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
-
- # get the indentation of the line containing the corresponding
- # opening token
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
-
- # First set the default behavior:
- if (
-
- # default behavior is to outdent closing lines
- # of the form: "); }; ]; )->xxx;"
- $is_semicolon_terminated
-
- # and 'cuddled parens' of the form: ")->pack("
- # Bug fix for RT #123749]: the types here were
- # incorrectly '(' and ')'. Corrected to be '{' and '}'
- || (
- $terminal_type eq '{'
- && $types_to_go[$ibeg] eq '}'
- && ( $nesting_depth_to_go[$iend] + 1 ==
- $nesting_depth_to_go[$ibeg] )
- )
-
- # remove continuation indentation for any line like
- # } ... {
- # or without ending '{' and unbalanced, such as
- # such as '}->{$operator}'
- || (
- $types_to_go[$ibeg] eq '}'
-
- && ( $types_to_go[$iend] eq '{'
- || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
- )
-
- # and when the next line is at a lower indentation level
- # PATCH: and only if the style allows undoing continuation
- # for all closing token types. We should really wait until
- # the indentation of the next line is known and then make
- # a decision, but that would require another pass.
- || ( $level_jump < 0 && !$some_closing_token_indentation )
-
- # Patch for -wn=2, multiple welded closing tokens
- || ( $i_terminal > $ibeg
- && $types_to_go[$iend] =~ /^[\)\}\]R]$/ )
-
- )
- {
- $adjust_indentation = 1;
- }
-
- # outdent something like '),'
- if (
- $terminal_type eq ','
-
- # Removed this constraint for -wn
- # OLD: allow just one character before the comma
- # && $i_terminal == $ibeg + 1
-
- # require LIST environment; otherwise, we may outdent too much -
- # this can happen in calls without parentheses (overload.t);
- && $container_environment_to_go[$i_terminal] eq 'LIST'
- )
- {
- $adjust_indentation = 1;
- }
-
- # undo continuation indentation of a terminal closing token if
- # it is the last token before a level decrease. This will allow
- # a closing token to line up with its opening counterpart, and
- # avoids a indentation jump larger than 1 level.
- if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
- && $i_terminal == $ibeg )
- {
- my $ci = $ci_levels_to_go[$ibeg];
- my $lev = $levels_to_go[$ibeg];
- my $next_type = $types_to_go[ $ibeg + 1 ];
- my $i_next_nonblank =
- ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
- if ( $i_next_nonblank <= $max_index_to_go
- && $levels_to_go[$i_next_nonblank] < $lev )
- {
- $adjust_indentation = 1;
- }
-
- # Patch for RT #96101, in which closing brace of anonymous subs
- # was not outdented. We should look ahead and see if there is
- # a level decrease at the next token (i.e., a closing token),
- # but right now we do not have that information. For now
- # we see if we are in a list, and this works well.
- # See test files 'sub*.t' for good test cases.
- if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
- && $container_environment_to_go[$i_terminal] eq 'LIST'
- && !$rOpts->{'indent-closing-brace'} )
- {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
- my $indentation = $leading_spaces_to_go[$ibeg];
- if ( defined($opening_indentation)
- && get_spaces($indentation) >
- get_spaces($opening_indentation) )
- {
- $adjust_indentation = 1;
- }
- }
- }
-
- # YVES patch 1 of 2:
- # Undo ci of line with leading closing eval brace,
- # but not beyond the indention of the line with
- # the opening brace.
- if ( $block_type_to_go[$ibeg] eq 'eval'
- && !$rOpts->{'line-up-parentheses'}
- && !$rOpts->{'indent-closing-brace'} )
- {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
- my $indentation = $leading_spaces_to_go[$ibeg];
- if ( defined($opening_indentation)
- && get_spaces($indentation) >
- get_spaces($opening_indentation) )
- {
- $adjust_indentation = 1;
- }
- }
-
- $default_adjust_indentation = $adjust_indentation;
-
- # Now modify default behavior according to user request:
- # handle option to indent non-blocks of the form ); }; ];
- # But don't do special indentation to something like ')->pack('
- if ( !$block_type_to_go[$ibeg] ) {
- my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
- if ( $cti == 1 ) {
- if ( $i_terminal <= $ibeg + 1
- || $is_semicolon_terminated )
- {
- $adjust_indentation = 2;
- }
- else {
- $adjust_indentation = 0;
- }
- }
- elsif ( $cti == 2 ) {
- if ($is_semicolon_terminated) {
- $adjust_indentation = 3;
- }
- else {
- $adjust_indentation = 0;
- }
- }
- elsif ( $cti == 3 ) {
- $adjust_indentation = 3;
- }
- }
-
- # handle option to indent blocks
- else {
- if (
- $rOpts->{'indent-closing-brace'}
- && (
- $i_terminal == $ibeg # isolated terminal '}'
- || $is_semicolon_terminated
- )
- ) # } xxxx ;
- {
- $adjust_indentation = 3;
- }
- }
- }
-
- # if at ');', '};', '>;', and '];' of a terminal qw quote
- elsif ($rpatterns->[0] =~ /^qb*;$/
- && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
- {
- if ( $closing_token_indentation{$1} == 0 ) {
- $adjust_indentation = 1;
- }
- else {
- $adjust_indentation = 3;
- }
- }
-
- # if line begins with a ':', align it with any
- # previous line leading with corresponding ?
- elsif ( $types_to_go[$ibeg] eq ':' ) {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
- if ($is_leading) { $adjust_indentation = 2; }
- }
-
- ##########################################################
- # Section 2: set indentation according to flag set above
- #
- # Select the indentation object to define leading
- # whitespace. If we are outdenting something like '} } );'
- # then we want to use one level below the last token
- # ($i_terminal) in order to get it to fully outdent through
- # all levels.
- ##########################################################
- my $indentation;
- my $lev;
- my $level_end = $levels_to_go[$iend];
-
- if ( $adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
- $lev = $levels_to_go[$ibeg];
- }
- elsif ( $adjust_indentation == 1 ) {
-
- # Change the indentation to be that of a different token on the line
- # Previously, the indentation of the terminal token was used:
- # OLD CODING:
- # $indentation = $reduced_spaces_to_go[$i_terminal];
- # $lev = $levels_to_go[$i_terminal];
-
- # Generalization for MOJO:
- # Use the lowest level indentation of the tokens on the line.
- # For example, here we can use the indentation of the ending ';':
- # } until ($selection > 0 and $selection < 10); # ok to use ';'
- # But this will not outdent if we use the terminal indentation:
- # )->then( sub { # use indentation of the ->, not the {
- # Warning: reduced_spaces_to_go[] may be a reference, do not
- # do numerical checks with it
-
- my $i_ind = $ibeg;
- $indentation = $reduced_spaces_to_go[$i_ind];
- $lev = $levels_to_go[$i_ind];
- while ( $i_ind < $i_terminal ) {
- $i_ind++;
- if ( $levels_to_go[$i_ind] < $lev ) {
- $indentation = $reduced_spaces_to_go[$i_ind];
- $lev = $levels_to_go[$i_ind];
- }
- }
- }
-
- # handle indented closing token which aligns with opening token
- elsif ( $adjust_indentation == 2 ) {
-
- # handle option to align closing token with opening token
- $lev = $levels_to_go[$ibeg];
-
- # calculate spaces needed to align with opening token
- my $space_count =
- get_spaces($opening_indentation) + $opening_offset;
-
- # Indent less than the previous line.
- #
- # Problem: For -lp we don't exactly know what it was if there
- # were recoverable spaces sent to the aligner. A good solution
- # would be to force a flush of the vertical alignment buffer, so
- # that we would know. For now, this rule is used for -lp:
- #
- # When the last line did not start with a closing token we will
- # be optimistic that the aligner will recover everything wanted.
- #
- # This rule will prevent us from breaking a hierarchy of closing
- # tokens, and in a worst case will leave a closing paren too far
- # indented, but this is better than frequently leaving it not
- # indented enough.
- my $last_spaces = get_spaces($last_indentation_written);
- if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
- $last_spaces +=
- get_recoverable_spaces($last_indentation_written);
- }
-
- # reset the indentation to the new space count if it works
- # only options are all or none: nothing in-between looks good
- $lev = $levels_to_go[$ibeg];
- if ( $space_count < $last_spaces ) {
- if ($rOpts_line_up_parentheses) {
- my $lev = $levels_to_go[$ibeg];
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
- }
- else {
- $indentation = $space_count;
- }
- }
-
- # revert to default if it doesn't work
- else {
- $space_count = leading_spaces_to_go($ibeg);
- if ( $default_adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
- }
- elsif ( $default_adjust_indentation == 1 ) {
- $indentation = $reduced_spaces_to_go[$i_terminal];
- $lev = $levels_to_go[$i_terminal];
- }
- }
- }
-
- # Full indentaion of closing tokens (-icb and -icp or -cti=2)
- else {
-
- # handle -icb (indented closing code block braces)
- # Updated method for indented block braces: indent one full level if
- # there is no continuation indentation. This will occur for major
- # structures such as sub, if, else, but not for things like map
- # blocks.
- #
- # Note: only code blocks without continuation indentation are
- # handled here (if, else, unless, ..). In the following snippet,
- # the terminal brace of the sort block will have continuation
- # indentation as shown so it will not be handled by the coding
- # here. We would have to undo the continuation indentation to do
- # this, but it probably looks ok as is. This is a possible future
- # update for semicolon terminated lines.
- #
- # if ($sortby eq 'date' or $sortby eq 'size') {
- # @files = sort {
- # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
- # or $a cmp $b
- # } @files;
- # }
- #
- if ( $block_type_to_go[$ibeg]
- && $ci_levels_to_go[$i_terminal] == 0 )
- {
- my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
- $indentation = $spaces + $rOpts_indent_columns;
-
- # NOTE: for -lp we could create a new indentation object, but
- # there is probably no need to do it
- }
-
- # handle -icp and any -icb block braces which fall through above
- # test such as the 'sort' block mentioned above.
- else {
-
- # There are currently two ways to handle -icp...
- # One way is to use the indentation of the previous line:
- # $indentation = $last_indentation_written;
-
- # The other way is to use the indentation that the previous line
- # would have had if it hadn't been adjusted:
- $indentation = $last_unadjusted_indentation;
-
- # Current method: use the minimum of the two. This avoids
- # inconsistent indentation.
- if ( get_spaces($last_indentation_written) <
- get_spaces($indentation) )
- {
- $indentation = $last_indentation_written;
- }
- }
-
- # use previous indentation but use own level
- # to cause list to be flushed properly
- $lev = $levels_to_go[$ibeg];
- }
-
- # remember indentation except for multi-line quotes, which get
- # no indentation
- unless ( $ibeg == 0 && $starting_in_quote ) {
- $last_indentation_written = $indentation;
- $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
- $last_leading_token = $tokens_to_go[$ibeg];
- }
-
- # be sure lines with leading closing tokens are not outdented more
- # than the line which contained the corresponding opening token.
-
- #############################################################
- # updated per bug report in alex_bug.pl: we must not
- # mess with the indentation of closing logical braces so
- # we must treat something like '} else {' as if it were
- # an isolated brace my $is_isolated_block_brace = (
- # $iend == $ibeg ) && $block_type_to_go[$ibeg];
- #############################################################
- my $is_isolated_block_brace = $block_type_to_go[$ibeg]
- && ( $iend == $ibeg
- || $is_if_elsif_else_unless_while_until_for_foreach{
- $block_type_to_go[$ibeg]
- } );
-
- # only do this for a ':; which is aligned with its leading '?'
- my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
-
- if (
- defined($opening_indentation)
- && !$leading_paren_arrow # MOJO
- && !$is_isolated_block_brace
- && !$is_unaligned_colon
- )
- {
- if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
- $indentation = $opening_indentation;
- }
- }
-
- # remember the indentation of each line of this batch
- push @{$rindentation_list}, $indentation;
-
- # outdent lines with certain leading tokens...
- if (
-
- # must be first word of this batch
- $ibeg == 0
-
- # and ...
- && (
-
- # certain leading keywords if requested
- (
- $rOpts->{'outdent-keywords'}
- && $types_to_go[$ibeg] eq 'k'
- && $outdent_keyword{ $tokens_to_go[$ibeg] }
- )
-
- # or labels if requested
- || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
-
- # or static block comments if requested
- || ( $types_to_go[$ibeg] eq '#'
- && $rOpts->{'outdent-static-block-comments'}
- && $is_static_block_comment )
- )
- )
-
- {
- my $space_count = leading_spaces_to_go($ibeg);
- if ( $space_count > 0 ) {
- $space_count -= $rOpts_continuation_indentation;
- $is_outdented_line = 1;
- if ( $space_count < 0 ) { $space_count = 0 }
-
- # do not promote a spaced static block comment to non-spaced;
- # this is not normally necessary but could be for some
- # unusual user inputs (such as -ci = -i)
- if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
- $space_count = 1;
- }
-
- if ($rOpts_line_up_parentheses) {
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
- }
- else {
- $indentation = $space_count;
- }
- }
- }
-
- return ( $indentation, $lev, $level_end, $terminal_type,
- $is_semicolon_terminated, $is_outdented_line );
- }
-}
-
-sub set_vertical_tightness_flags {
-
- my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
-
- # Define vertical tightness controls for the nth line of a batch.
- # We create an array of parameters which tell the vertical aligner
- # if we should combine this line with the next line to achieve the
- # desired vertical tightness. The array of parameters contains:
- #
- # [0] type: 1=opening non-block 2=closing non-block
- # 3=opening block brace 4=closing block brace
- #
- # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
- # if closing: spaces of padding to use
- # [2] sequence number of container
- # [3] valid flag: do not append if this flag is false. Will be
- # true if appropriate -vt flag is set. Otherwise, Will be
- # made true only for 2 line container in parens with -lp
- #
- # These flags are used by sub set_leading_whitespace in
- # the vertical aligner
-
- my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
-
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1:
- # Handle Lines 1 .. n-1 but not the last line
- # For non-BLOCK tokens, we will need to examine the next line
- # too, so we won't consider the last line.
- #--------------------------------------------------------------
- if ( $n < $n_last_line ) {
-
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1a:
- # Look for Type 1, last token of this line is a non-block opening token
- #--------------------------------------------------------------
- my $ibeg_next = $ri_first->[ $n + 1 ];
- my $token_end = $tokens_to_go[$iend];
- my $iend_next = $ri_last->[ $n + 1 ];
- if (
- $type_sequence_to_go[$iend]
- && !$block_type_to_go[$iend]
- && $is_opening_token{$token_end}
- && (
- $opening_vertical_tightness{$token_end} > 0
-
- # allow 2-line method call to be closed up
- || ( $rOpts_line_up_parentheses
- && $token_end eq '('
- && $iend > $ibeg
- && $types_to_go[ $iend - 1 ] ne 'b' )
- )
- )
- {
-
- # avoid multiple jumps in nesting depth in one line if
- # requested
- my $ovt = $opening_vertical_tightness{$token_end};
- my $iend_next = $ri_last->[ $n + 1 ];
- unless (
- $ovt < 2
- && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
- $nesting_depth_to_go[$ibeg_next] )
- )
- {
-
- # If -vt flag has not been set, mark this as invalid
- # and aligner will validate it if it sees the closing paren
- # within 2 lines.
- my $valid_flag = $ovt;
- @{$rvertical_tightness_flags} =
- ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
- }
- }
-
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1b:
- # Look for Type 2, first token of next line is a non-block closing
- # token .. and be sure this line does not have a side comment
- #--------------------------------------------------------------
- my $token_next = $tokens_to_go[$ibeg_next];
- if ( $type_sequence_to_go[$ibeg_next]
- && !$block_type_to_go[$ibeg_next]
- && $is_closing_token{$token_next}
- && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
- {
- my $ovt = $opening_vertical_tightness{$token_next};
- my $cvt = $closing_vertical_tightness{$token_next};
- if (
-
- # never append a trailing line like )->pack(
- # because it will throw off later alignment
- (
- $nesting_depth_to_go[$ibeg_next] ==
- $nesting_depth_to_go[ $iend_next + 1 ] + 1
- )
- && (
- $cvt == 2
- || (
- $container_environment_to_go[$ibeg_next] ne 'LIST'
- && (
- $cvt == 1
-
- # allow closing up 2-line method calls
- || ( $rOpts_line_up_parentheses
- && $token_next eq ')' )
- )
- )
- )
- )
- {
-
- # decide which trailing closing tokens to append..
- my $ok = 0;
- if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
- else {
- my $str = join( '',
- @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
-
- # append closing token if followed by comment or ';'
- if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
- }
-
- if ($ok) {
- my $valid_flag = $cvt;
- @{$rvertical_tightness_flags} = (
- 2,
- $tightness{$token_next} == 2 ? 0 : 1,
- $type_sequence_to_go[$ibeg_next], $valid_flag,
- );
- }
- }
- }
-
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1c:
- # Implement the Opening Token Right flag (Type 2)..
- # If requested, move an isolated trailing opening token to the end of
- # the previous line which ended in a comma. We could do this
- # in sub recombine_breakpoints but that would cause problems
- # with -lp formatting. The problem is that indentation will
- # quickly move far to the right in nested expressions. By
- # doing it after indentation has been set, we avoid changes
- # to the indentation. Actual movement of the token takes place
- # in sub valign_output_step_B.
- #--------------------------------------------------------------
- if (
- $opening_token_right{ $tokens_to_go[$ibeg_next] }
-
- # previous line is not opening
- # (use -sot to combine with it)
- && !$is_opening_token{$token_end}
-
- # previous line ended in one of these
- # (add other cases if necessary; '=>' and '.' are not necessary
- && !$block_type_to_go[$ibeg_next]
-
- # this is a line with just an opening token
- && ( $iend_next == $ibeg_next
- || $iend_next == $ibeg_next + 2
- && $types_to_go[$iend_next] eq '#' )
-
- # looks bad if we align vertically with the wrong container
- && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
- )
- {
- my $valid_flag = 1;
- my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
- @{$rvertical_tightness_flags} =
- ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
- }
-
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1d:
- # Stacking of opening and closing tokens (Type 2)
- #--------------------------------------------------------------
- my $stackable;
- my $token_beg_next = $tokens_to_go[$ibeg_next];
-
- # patch to make something like 'qw(' behave like an opening paren
- # (aran.t)
- if ( $types_to_go[$ibeg_next] eq 'q' ) {
- if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
- $token_beg_next = $1;
- }
- }
-
- if ( $is_closing_token{$token_end}
- && $is_closing_token{$token_beg_next} )
- {
- $stackable = $stack_closing_token{$token_beg_next}
- unless ( $block_type_to_go[$ibeg_next] )
- ; # shouldn't happen; just checking
- }
- elsif ($is_opening_token{$token_end}
- && $is_opening_token{$token_beg_next} )
- {
- $stackable = $stack_opening_token{$token_beg_next}
- unless ( $block_type_to_go[$ibeg_next] )
- ; # shouldn't happen; just checking
- }
-
- if ($stackable) {
-
- my $is_semicolon_terminated;
- if ( $n + 1 == $n_last_line ) {
- my ( $terminal_type, $i_terminal ) = terminal_type(
- \@types_to_go, \@block_type_to_go,
- $ibeg_next, $iend_next
- );
- $is_semicolon_terminated = $terminal_type eq ';'
- && $nesting_depth_to_go[$iend_next] <
- $nesting_depth_to_go[$ibeg_next];
- }
-
- # this must be a line with just an opening token
- # or end in a semicolon
- if (
- $is_semicolon_terminated
- || ( $iend_next == $ibeg_next
- || $iend_next == $ibeg_next + 2
- && $types_to_go[$iend_next] eq '#' )
- )
- {
- my $valid_flag = 1;
- my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
- @{$rvertical_tightness_flags} =
- ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
- );
- }
- }
- }
-
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 2:
- # Handle type 3, opening block braces on last line of the batch
- # Check for a last line with isolated opening BLOCK curly
- #--------------------------------------------------------------
- elsif ($rOpts_block_brace_vertical_tightness
- && $ibeg eq $iend
- && $types_to_go[$iend] eq '{'
- && $block_type_to_go[$iend] =~
- /$block_brace_vertical_tightness_pattern/o )
- {
- @{$rvertical_tightness_flags} =
- ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
- }
-
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 3:
- # Handle type 4, a closing block brace on the last line of the batch Check
- # for a last line with isolated closing BLOCK curly
- #--------------------------------------------------------------
- elsif ($rOpts_stack_closing_block_brace
- && $ibeg eq $iend
- && $block_type_to_go[$iend]
- && $types_to_go[$iend] eq '}' )
- {
- my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
- @{$rvertical_tightness_flags} =
- ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
- }
-
- # pack in the sequence numbers of the ends of this line
- $rvertical_tightness_flags->[4] = get_seqno($ibeg);
- $rvertical_tightness_flags->[5] = get_seqno($iend);
- return $rvertical_tightness_flags;
-}
-
-sub get_seqno {
-
- # get opening and closing sequence numbers of a token for the vertical
- # aligner. Assign qw quotes a value to allow qw opening and closing tokens
- # to be treated somewhat like opening and closing tokens for stacking
- # tokens by the vertical aligner.
- my ($ii) = @_;
- my $seqno = $type_sequence_to_go[$ii];
- if ( $types_to_go[$ii] eq 'q' ) {
- my $SEQ_QW = -1;
- if ( $ii > 0 ) {
- $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
- }
- else {
- if ( !$ending_in_quote ) {
- $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
- }
- }
- }
- return ($seqno);
-}
-
-{
- my %is_vertical_alignment_type;
- my %is_vertical_alignment_keyword;
- my %is_terminal_alignment_type;
-
- BEGIN {
-
- my @q;
-
- # Removed =~ from list to improve chances of alignment
- # Removed // from list to improve chances of alignment (RT# 119588)
- @q = qw#
- = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
- { ? : => && || ~~ !~~
- #;
- @is_vertical_alignment_type{@q} = (1) x scalar(@q);
-
- # only align these at end of line
- @q = qw(&& ||);
- @is_terminal_alignment_type{@q} = (1) x scalar(@q);
-
- # eq and ne were removed from this list to improve alignment chances
- @q = qw(if unless and or err for foreach while until);
- @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
- }
-
- sub set_vertical_alignment_markers {
-
- # This routine takes the first step toward vertical alignment of the
- # lines of output text. It looks for certain tokens which can serve as
- # vertical alignment markers (such as an '=').
- #
- # Method: We look at each token $i in this output batch and set
- # $matching_token_to_go[$i] equal to those tokens at which we would
- # accept vertical alignment.
-
- my ( $ri_first, $ri_last ) = @_;
-
- # nothing to do if we aren't allowed to change whitespace
- if ( !$rOpts_add_whitespace ) {
- for my $i ( 0 .. $max_index_to_go ) {
- $matching_token_to_go[$i] = '';
- }
- return;
- }
-
- # remember the index of last nonblank token before any sidecomment
- my $i_terminal = $max_index_to_go;
- if ( $types_to_go[$i_terminal] eq '#' ) {
- if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
- if ( $i_terminal > 0 ) { --$i_terminal }
- }
- }
-
- # look at each line of this batch..
- my $last_vertical_alignment_before_index;
- my $vert_last_nonblank_type;
- my $vert_last_nonblank_token;
- my $vert_last_nonblank_block_type;
- my $max_line = @{$ri_first} - 1;
-
- foreach my $line ( 0 .. $max_line ) {
- my $ibeg = $ri_first->[$line];
- my $iend = $ri_last->[$line];
- $last_vertical_alignment_before_index = -1;
- $vert_last_nonblank_type = '';
- $vert_last_nonblank_token = '';
- $vert_last_nonblank_block_type = '';
-
- # look at each token in this output line..
- foreach my $i ( $ibeg .. $iend ) {
- my $alignment_type = '';
- my $type = $types_to_go[$i];
- my $block_type = $block_type_to_go[$i];
- my $token = $tokens_to_go[$i];
-
- # check for flag indicating that we should not align
- # this token
- if ( $matching_token_to_go[$i] ) {
- $matching_token_to_go[$i] = '';
- next;
- }
-
- #--------------------------------------------------------
- # First see if we want to align BEFORE this token
- #--------------------------------------------------------
-
- # The first possible token that we can align before
- # is index 2 because: 1) it doesn't normally make sense to
- # align before the first token and 2) the second
- # token must be a blank if we are to align before
- # the third
- if ( $i < $ibeg + 2 ) { }
-
- # must follow a blank token
- elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
-
- # align a side comment --
- elsif ( $type eq '#' ) {
-
- unless (
-
- # it is a static side comment
- (
- $rOpts->{'static-side-comments'}
- && $token =~ /$static_side_comment_pattern/o
- )
-
- # or a closing side comment
- || ( $vert_last_nonblank_block_type
- && $token =~
- /$closing_side_comment_prefix_pattern/o )
- )
- {
- $alignment_type = $type;
- } ## Example of a static side comment
- }
-
- # otherwise, do not align two in a row to create a
- # blank field
- elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
-
- # align before one of these keywords
- # (within a line, since $i>1)
- elsif ( $type eq 'k' ) {
-
- # /^(if|unless|and|or|eq|ne)$/
- if ( $is_vertical_alignment_keyword{$token} ) {
- $alignment_type = $token;
- }
- }
-
- # align before one of these types..
- # Note: add '.' after new vertical aligner is operational
- elsif ( $is_vertical_alignment_type{$type} ) {
- $alignment_type = $token;
-
- # Do not align a terminal token. Although it might
- # occasionally look ok to do this, this has been found to be
- # a good general rule. The main problems are:
- # (1) that the terminal token (such as an = or :) might get
- # moved far to the right where it is hard to see because
- # nothing follows it, and
- # (2) doing so may prevent other good alignments.
- # Current exceptions are && and ||
- if ( $i == $iend || $i >= $i_terminal ) {
- $alignment_type = ""
- unless ( $is_terminal_alignment_type{$type} );
- }
-
- # Do not align leading ': (' or '. ('. This would prevent
- # alignment in something like the following:
- # $extra_space .=
- # ( $input_line_number < 10 ) ? " "
- # : ( $input_line_number < 100 ) ? " "
- # : "";
- # or
- # $code =
- # ( $case_matters ? $accessor : " lc($accessor) " )
- # . ( $yesno ? " eq " : " ne " )
- if ( $i == $ibeg + 2
- && $types_to_go[$ibeg] =~ /^[\.\:]$/
- && $types_to_go[ $i - 1 ] eq 'b' )
- {
- $alignment_type = "";
- }
-
- # For a paren after keyword, only align something like this:
- # if ( $a ) { &a }
- # elsif ( $b ) { &b }
- if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
- $alignment_type = ""
- unless $vert_last_nonblank_token =~
- /^(if|unless|elsif)$/;
- }
-
- # be sure the alignment tokens are unique
- # This didn't work well: reason not determined
- # if ($token ne $type) {$alignment_type .= $type}
- }
-
- # NOTE: This is deactivated because it causes the previous
- # if/elsif alignment to fail
- #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
- #{ $alignment_type = $type; }
-
- if ($alignment_type) {
- $last_vertical_alignment_before_index = $i;
- }
-
- #--------------------------------------------------------
- # Next see if we want to align AFTER the previous nonblank
- #--------------------------------------------------------
-
- # We want to line up ',' and interior ';' tokens, with the added
- # space AFTER these tokens. (Note: interior ';' is included
- # because it may occur in short blocks).
- if (
-
- # we haven't already set it
- !$alignment_type
-
- # and its not the first token of the line
- && ( $i > $ibeg )
-
- # and it follows a blank
- && $types_to_go[ $i - 1 ] eq 'b'
-
- # and previous token IS one of these:
- && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
-
- # and it's NOT one of these
- && ( $type !~ /^[b\#\)\]\}]$/ )
-
- # then go ahead and align
- )
-
- {
- $alignment_type = $vert_last_nonblank_type;
- }
-
- #--------------------------------------------------------
- # then store the value
- #--------------------------------------------------------
- $matching_token_to_go[$i] = $alignment_type;
- if ( $type ne 'b' ) {
- $vert_last_nonblank_type = $type;
- $vert_last_nonblank_token = $token;
- $vert_last_nonblank_block_type = $block_type;
- }
- }
- }
- return;
- }
-}
-
-sub terminal_type {
-
- # returns type of last token on this line (terminal token), as follows:
- # returns # for a full-line comment
- # returns ' ' for a blank line
- # otherwise returns final token type
-
- my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
-
- # check for full-line comment..
- if ( $rtype->[$ibeg] eq '#' ) {
- return wantarray ? ( $rtype->[$ibeg], $ibeg ) : $rtype->[$ibeg];
- }
- else {
-
- # start at end and walk backwards..
- for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
-
- # skip past any side comment and blanks
- next if ( $rtype->[$i] eq 'b' );
- next if ( $rtype->[$i] eq '#' );
-
- # found it..make sure it is a BLOCK termination,
- # but hide a terminal } after sort/grep/map because it is not
- # necessarily the end of the line. (terminal.t)
- my $terminal_type = $rtype->[$i];
- if (
- $terminal_type eq '}'
- && ( !$rblock_type->[$i]
- || ( $is_sort_map_grep_eval_do{ $rblock_type->[$i] } ) )
- )
- {
- $terminal_type = 'b';
- }
- return wantarray ? ( $terminal_type, $i ) : $terminal_type;
- }
-
- # empty line
- return wantarray ? ( ' ', $ibeg ) : ' ';
- }
-}
-
-{ # set_bond_strengths
-
- my %is_good_keyword_breakpoint;
- my %is_lt_gt_le_ge;
-
- my %binary_bond_strength;
- my %nobreak_lhs;
- my %nobreak_rhs;
-
- my @bias_tokens;
- my $delta_bias;
-
- sub bias_table_key {
- my ( $type, $token ) = @_;
- my $bias_table_key = $type;
- if ( $type eq 'k' ) {
- $bias_table_key = $token;
- if ( $token eq 'err' ) { $bias_table_key = 'or' }
- }
- return $bias_table_key;
- }
-
- sub set_bond_strengths {
-
- BEGIN {
-
- my @q;
- @q = qw(if unless while until for foreach);
- @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
-
- @q = qw(lt gt le ge);
- @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
- #
- # The decision about where to break a line depends upon a "bond
- # strength" between tokens. The LOWER the bond strength, the MORE
- # likely a break. A bond strength may be any value but to simplify
- # things there are several pre-defined strength levels:
-
- # NO_BREAK => 10000;
- # VERY_STRONG => 100;
- # STRONG => 2.1;
- # NOMINAL => 1.1;
- # WEAK => 0.8;
- # VERY_WEAK => 0.55;
-
- # The strength values are based on trial-and-error, and need to be
- # tweaked occasionally to get desired results. Some comments:
- #
- # 1. Only relative strengths are important. small differences
- # in strengths can make big formatting differences.
- # 2. Each indentation level adds one unit of bond strength.
- # 3. A value of NO_BREAK makes an unbreakable bond
- # 4. A value of VERY_WEAK is the strength of a ','
- # 5. Values below NOMINAL are considered ok break points.
- # 6. Values above NOMINAL are considered poor break points.
- #
- # The bond strengths should roughly follow precedence order where
- # possible. If you make changes, please check the results very
- # carefully on a variety of scripts. Testing with the -extrude
- # options is particularly helpful in exercising all of the rules.
-
- # Wherever possible, bond strengths are defined in the following
- # tables. There are two main stages to setting bond strengths and
- # two types of tables:
- #
- # The first stage involves looking at each token individually and
- # defining left and right bond strengths, according to if we want
- # to break to the left or right side, and how good a break point it
- # is. For example tokens like =, ||, && make good break points and
- # will have low strengths, but one might want to break on either
- # side to put them at the end of one line or beginning of the next.
- #
- # The second stage involves looking at certain pairs of tokens and
- # defining a bond strength for that particular pair. This second
- # stage has priority.
-
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 1.
- # Set left and right bond strengths of individual tokens.
- #---------------------------------------------------------------
-
- # NOTE: NO_BREAK's set in this section first are HINTS which will
- # probably not be honored. Essential NO_BREAKS's should be set in
- # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
- # of this subroutine.
-
- # Note that we are setting defaults in this section. The user
- # cannot change bond strengths but can cause the left and right
- # bond strengths of any token type to be swapped through the use of
- # the -wba and -wbb flags. In this way the user can determine if a
- # breakpoint token should appear at the end of one line or the
- # beginning of the next line.
-
- # The hash keys in this section are token types, plus the text of
- # certain keywords like 'or', 'and'.
-
- # no break around possible filehandle
- $left_bond_strength{'Z'} = NO_BREAK;
- $right_bond_strength{'Z'} = NO_BREAK;
-
- # never put a bare word on a new line:
- # example print (STDERR, "bla"); will fail with break after (
- $left_bond_strength{'w'} = NO_BREAK;
-
- # blanks always have infinite strength to force breaks after
- # real tokens
- $right_bond_strength{'b'} = NO_BREAK;
-
- # try not to break on exponentation
- @q = qw" ** .. ... <=> ";
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
-
- # The comma-arrow has very low precedence but not a good break point
- $left_bond_strength{'=>'} = NO_BREAK;
- $right_bond_strength{'=>'} = NOMINAL;
-
- # ok to break after label
- $left_bond_strength{'J'} = NO_BREAK;
- $right_bond_strength{'J'} = NOMINAL;
- $left_bond_strength{'j'} = STRONG;
- $right_bond_strength{'j'} = STRONG;
- $left_bond_strength{'A'} = STRONG;
- $right_bond_strength{'A'} = STRONG;
-
- $left_bond_strength{'->'} = STRONG;
- $right_bond_strength{'->'} = VERY_STRONG;
-
- $left_bond_strength{'CORE::'} = NOMINAL;
- $right_bond_strength{'CORE::'} = NO_BREAK;
-
- # breaking AFTER modulus operator is ok:
- @q = qw" % ";
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
-
- # Break AFTER math operators * and /
- @q = qw" * / x ";
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
-
- # Break AFTER weakest math operators + and -
- # Make them weaker than * but a bit stronger than '.'
- @q = qw" + - ";
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
-
- # breaking BEFORE these is just ok:
- @q = qw" >> << ";
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
- @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
-
- # breaking before the string concatenation operator seems best
- # because it can be hard to see at the end of a line
- $right_bond_strength{'.'} = STRONG;
- $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
-
- @q = qw"} ] ) R";
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
-
- # make these a little weaker than nominal so that they get
- # favored for end-of-line characters
- @q = qw"!= == =~ !~ ~~ !~~";
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
-
- # break AFTER these
- @q = qw" < > | & >= <=";
- @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
-
- # breaking either before or after a quote is ok
- # but bias for breaking before a quote
- $left_bond_strength{'Q'} = NOMINAL;
- $right_bond_strength{'Q'} = NOMINAL + 0.02;
- $left_bond_strength{'q'} = NOMINAL;
- $right_bond_strength{'q'} = NOMINAL;
-
- # starting a line with a keyword is usually ok
- $left_bond_strength{'k'} = NOMINAL;
-
- # we usually want to bond a keyword strongly to what immediately
- # follows, rather than leaving it stranded at the end of a line
- $right_bond_strength{'k'} = STRONG;
-
- $left_bond_strength{'G'} = NOMINAL;
- $right_bond_strength{'G'} = STRONG;
-
- # assignment operators
- @q = qw(
- = **= += *= &= <<= &&=
- -= /= |= >>= ||= //=
- .= %= ^=
- x=
- );
-
- # Default is to break AFTER various assignment operators
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
-
- # Default is to break BEFORE '&&' and '||' and '//'
- # set strength of '||' to same as '=' so that chains like
- # $a = $b || $c || $d will break before the first '||'
- $right_bond_strength{'||'} = NOMINAL;
- $left_bond_strength{'||'} = $right_bond_strength{'='};
-
- # same thing for '//'
- $right_bond_strength{'//'} = NOMINAL;
- $left_bond_strength{'//'} = $right_bond_strength{'='};
-
- # set strength of && a little higher than ||
- $right_bond_strength{'&&'} = NOMINAL;
- $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
-
- $left_bond_strength{';'} = VERY_STRONG;
- $right_bond_strength{';'} = VERY_WEAK;
- $left_bond_strength{'f'} = VERY_STRONG;
-
- # make right strength of for ';' a little less than '='
- # to make for contents break after the ';' to avoid this:
- # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
- # $number_of_fields )
- # and make it weaker than ',' and 'and' too
- $right_bond_strength{'f'} = VERY_WEAK - 0.03;
-
- # The strengths of ?/: should be somewhere between
- # an '=' and a quote (NOMINAL),
- # make strength of ':' slightly less than '?' to help
- # break long chains of ? : after the colons
- $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
- $right_bond_strength{':'} = NO_BREAK;
- $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
- $right_bond_strength{'?'} = NO_BREAK;
-
- $left_bond_strength{','} = VERY_STRONG;
- $right_bond_strength{','} = VERY_WEAK;
-
- # remaining digraphs and trigraphs not defined above
- @q = qw( :: <> ++ --);
- @left_bond_strength{@q} = (WEAK) x scalar(@q);
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
-
- # Set bond strengths of certain keywords
- # make 'or', 'err', 'and' slightly weaker than a ','
- $left_bond_strength{'and'} = VERY_WEAK - 0.01;
- $left_bond_strength{'or'} = VERY_WEAK - 0.02;
- $left_bond_strength{'err'} = VERY_WEAK - 0.02;
- $left_bond_strength{'xor'} = NOMINAL;
- $right_bond_strength{'and'} = NOMINAL;
- $right_bond_strength{'or'} = NOMINAL;
- $right_bond_strength{'err'} = NOMINAL;
- $right_bond_strength{'xor'} = STRONG;
-
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 2.
- # Set binary rules for bond strengths between certain token types.
- #---------------------------------------------------------------
-
- # We have a little problem making tables which apply to the
- # container tokens. Here is a list of container tokens and
- # their types:
- #
- # type tokens // meaning
- # { {, [, ( // indent
- # } }, ], ) // outdent
- # [ [ // left non-structural [ (enclosing an array index)
- # ] ] // right non-structural square bracket
- # ( ( // left non-structural paren
- # ) ) // right non-structural paren
- # L { // left non-structural curly brace (enclosing a key)
- # R } // right non-structural curly brace
- #
- # Some rules apply to token types and some to just the token
- # itself. We solve the problem by combining type and token into a
- # new hash key for the container types.
- #
- # If a rule applies to a token 'type' then we need to make rules
- # for each of these 'type.token' combinations:
- # Type Type.Token
- # { {{, {[, {(
- # [ [[
- # ( ((
- # L L{
- # } }}, }], })
- # ] ]]
- # ) ))
- # R R}
- #
- # If a rule applies to a token then we need to make rules for
- # these 'type.token' combinations:
- # Token Type.Token
- # { {{, L{
- # [ {[, [[
- # ( {(, ((
- # } }}, R}
- # ] }], ]]
- # ) }), ))
-
- # allow long lines before final { in an if statement, as in:
- # if (..........
- # ..........)
- # {
- #
- # Otherwise, the line before the { tends to be too short.
-
- $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
- $binary_bond_strength{'(('}{'{{'} = NOMINAL;
-
- # break on something like '} (', but keep this stronger than a ','
- # example is in 'howe.pl'
- $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
- $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
-
- # keep matrix and hash indices together
- # but make them a little below STRONG to allow breaking open
- # something like {'some-word'}{'some-very-long-word'} at the }{
- # (bracebrk.t)
- $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
-
- # increase strength to the point where a break in the following
- # will be after the opening paren rather than at the arrow:
- # $a->$b($c);
- $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
-
- $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
-
- $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
-
- #---------------------------------------------------------------
- # Binary NO_BREAK rules
- #---------------------------------------------------------------
-
- # use strict requires that bare word and => not be separated
- $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
- $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
-
- # Never break between a bareword and a following paren because
- # perl may give an error. For example, if a break is placed
- # between 'to_filehandle' and its '(' the following line will
- # give a syntax error [Carp.pm]: my( $no) =fileno(
- # to_filehandle( $in)) ;
- $binary_bond_strength{'C'}{'(('} = NO_BREAK;
- $binary_bond_strength{'C'}{'{('} = NO_BREAK;
- $binary_bond_strength{'U'}{'(('} = NO_BREAK;
- $binary_bond_strength{'U'}{'{('} = NO_BREAK;
-
- # use strict requires that bare word within braces not start new
- # line
- $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
-
- $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
-
- # use strict requires that bare word and => not be separated
- $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
-
- # use strict does not allow separating type info from trailing { }
- # testfile is readmail.pl
- $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
- $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
-
- # As a defensive measure, do not break between a '(' and a
- # filehandle. In some cases, this can cause an error. For
- # example, the following program works:
- # my $msg="hi!\n";
- # print
- # ( STDOUT
- # $msg
- # );
- #
- # But this program fails:
- # my $msg="hi!\n";
- # print
- # (
- # STDOUT
- # $msg
- # );
- #
- # This is normally only a problem with the 'extrude' option
- $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
- $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
-
- # never break between sub name and opening paren
- $binary_bond_strength{'w'}{'(('} = NO_BREAK;
- $binary_bond_strength{'w'}{'{('} = NO_BREAK;
-
- # keep '}' together with ';'
- $binary_bond_strength{'}}'}{';'} = NO_BREAK;
-
- # Breaking before a ++ can cause perl to guess wrong. For
- # example the following line will cause a syntax error
- # with -extrude if we break between '$i' and '++' [fixstyle2]
- # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
- $nobreak_lhs{'++'} = NO_BREAK;
-
- # Do not break before a possible file handle
- $nobreak_lhs{'Z'} = NO_BREAK;
-
- # use strict hates bare words on any new line. For
- # example, a break before the underscore here provokes the
- # wrath of use strict:
- # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
- $nobreak_rhs{'F'} = NO_BREAK;
- $nobreak_rhs{'CORE::'} = NO_BREAK;
-
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 3.
- # Define tables and values for applying a small bias to the above
- # values.
- #---------------------------------------------------------------
- # Adding a small 'bias' to strengths is a simple way to make a line
- # break at the first of a sequence of identical terms. For
- # example, to force long string of conditional operators to break
- # with each line ending in a ':', we can add a small number to the
- # bond strength of each ':' (colon.t)
- @bias_tokens = qw( : && || f and or . ); # tokens which get bias
- $delta_bias = 0.0001; # a very small strength level
-
- } ## end BEGIN
-
- # patch-its always ok to break at end of line
- $nobreak_to_go[$max_index_to_go] = 0;
-
- # we start a new set of bias values for each line
- my %bias;
- @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
- my $code_bias = -.01; # bias for closing block braces
-
- my $type = 'b';
- my $token = ' ';
- my $last_type;
- my $last_nonblank_type = $type;
- my $last_nonblank_token = $token;
- my $list_str = $left_bond_strength{'?'};
-
- my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
- $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
- );
-
- # main loop to compute bond strengths between each pair of tokens
- foreach my $i ( 0 .. $max_index_to_go ) {
- $last_type = $type;
- if ( $type ne 'b' ) {
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- }
- $type = $types_to_go[$i];
-
- # strength on both sides of a blank is the same
- if ( $type eq 'b' && $last_type ne 'b' ) {
- $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
- next;
- }
-
- $token = $tokens_to_go[$i];
- $block_type = $block_type_to_go[$i];
- $i_next = $i + 1;
- $next_type = $types_to_go[$i_next];
- $next_token = $tokens_to_go[$i_next];
- $total_nesting_depth = $nesting_depth_to_go[$i_next];
- $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
-
- # We are computing the strength of the bond between the current
- # token and the NEXT token.
-
- #---------------------------------------------------------------
- # Bond Strength Section 1:
- # First Approximation.
- # Use minimum of individual left and right tabulated bond
- # strengths.
- #---------------------------------------------------------------
- my $bsr = $right_bond_strength{$type};
- my $bsl = $left_bond_strength{$next_nonblank_type};
-
- # define right bond strengths of certain keywords
- if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
- $bsr = $right_bond_strength{$token};
- }
- elsif ( $token eq 'ne' or $token eq 'eq' ) {
- $bsr = NOMINAL;
- }
-
- # set terminal bond strength to the nominal value
- # this will cause good preceding breaks to be retained
- if ( $i_next_nonblank > $max_index_to_go ) {
- $bsl = NOMINAL;
- }
-
- # define right bond strengths of certain keywords
- if ( $next_nonblank_type eq 'k'
- && defined( $left_bond_strength{$next_nonblank_token} ) )
- {
- $bsl = $left_bond_strength{$next_nonblank_token};
- }
- elsif ($next_nonblank_token eq 'ne'
- or $next_nonblank_token eq 'eq' )
- {
- $bsl = NOMINAL;
- }
- elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
- $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
- }
-
- # Use the minimum of the left and right strengths. Note: it might
- # seem that we would want to keep a NO_BREAK if either token has
- # this value. This didn't work, for example because in an arrow
- # list, it prevents the comma from separating from the following
- # bare word (which is probably quoted by its arrow). So necessary
- # NO_BREAK's have to be handled as special cases in the final
- # section.
- if ( !defined($bsr) ) { $bsr = VERY_STRONG }
- if ( !defined($bsl) ) { $bsl = VERY_STRONG }
- my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
- my $bond_str_1 = $bond_str;
-
- #---------------------------------------------------------------
- # Bond Strength Section 2:
- # Apply hardwired rules..
- #---------------------------------------------------------------
-
- # Patch to put terminal or clauses on a new line: Weaken the bond
- # at an || followed by die or similar keyword to make the terminal
- # or clause fall on a new line, like this:
- #
- # my $class = shift
- # || die "Cannot add broadcast: No class identifier found";
- #
- # Otherwise the break will be at the previous '=' since the || and
- # = have the same starting strength and the or is biased, like
- # this:
- #
- # my $class =
- # shift || die "Cannot add broadcast: No class identifier found";
- #
- # In any case if the user places a break at either the = or the ||
- # it should remain there.
- if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
- if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
- if ( $want_break_before{$token} && $i > 0 ) {
- $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
- }
- else {
- $bond_str -= $delta_bias;
- }
- }
- }
-
- # good to break after end of code blocks
- if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
-
- $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
- $code_bias += $delta_bias;
- }
-
- if ( $type eq 'k' ) {
-
- # allow certain control keywords to stand out
- if ( $next_nonblank_type eq 'k'
- && $is_last_next_redo_return{$token} )
- {
- $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
- }
-
- # Don't break after keyword my. This is a quick fix for a
- # rare problem with perl. An example is this line from file
- # Container.pm:
-
- # foreach my $question( Debian::DebConf::ConfigDb::gettree(
- # $this->{'question'} ) )
-
- if ( $token eq 'my' ) {
- $bond_str = NO_BREAK;
- }
-
- }
-
- # good to break before 'if', 'unless', etc
- if ( $is_if_brace_follower{$next_nonblank_token} ) {
- $bond_str = VERY_WEAK;
- }
-
- if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
-
- # FIXME: needs more testing
- if ( $is_keyword_returning_list{$next_nonblank_token} ) {
- $bond_str = $list_str if ( $bond_str > $list_str );
- }
-
- # keywords like 'unless', 'if', etc, within statements
- # make good breaks
- if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
- $bond_str = VERY_WEAK / 1.05;
- }
- }
-
- # try not to break before a comma-arrow
- elsif ( $next_nonblank_type eq '=>' ) {
- if ( $bond_str < STRONG ) { $bond_str = STRONG }
- }
-
- #---------------------------------------------------------------
- # Additional hardwired NOBREAK rules
- #---------------------------------------------------------------
-
- # map1.t -- correct for a quirk in perl
- if ( $token eq '('
- && $next_nonblank_type eq 'i'
- && $last_nonblank_type eq 'k'
- && $is_sort_map_grep{$last_nonblank_token} )
-
- # /^(sort|map|grep)$/ )
- {
- $bond_str = NO_BREAK;
- }
-
- # extrude.t: do not break before paren at:
- # -l pid_filename(
- if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
- $bond_str = NO_BREAK;
- }
-
- # in older version of perl, use strict can cause problems with
- # breaks before bare words following opening parens. For example,
- # this will fail under older versions if a break is made between
- # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
- # command"); close MAIL;
- if ( $type eq '{' ) {
-
- if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
-
- # but it's fine to break if the word is followed by a '=>'
- # or if it is obviously a sub call
- my $i_next_next_nonblank = $i_next_nonblank + 1;
- my $next_next_type = $types_to_go[$i_next_next_nonblank];
- if ( $next_next_type eq 'b'
- && $i_next_nonblank < $max_index_to_go )
- {
- $i_next_next_nonblank++;
- $next_next_type = $types_to_go[$i_next_next_nonblank];
- }
-
- # We'll check for an old breakpoint and keep a leading
- # bareword if it was that way in the input file.
- # Presumably it was ok that way. For example, the
- # following would remain unchanged:
- #
- # @months = (
- # January, February, March, April,
- # May, June, July, August,
- # September, October, November, December,
- # );
- #
- # This should be sufficient:
- if (
- !$old_breakpoint_to_go[$i]
- && ( $next_next_type eq ','
- || $next_next_type eq '}' )
- )
- {
- $bond_str = NO_BREAK;
- }
- }
- }
-
- # Do not break between a possible filehandle and a ? or / and do
- # not introduce a break after it if there is no blank
- # (extrude.t)
- elsif ( $type eq 'Z' ) {
-
- # don't break..
- if (
-
- # if there is no blank and we do not want one. Examples:
- # print $x++ # do not break after $x
- # print HTML"HELLO" # break ok after HTML
- (
- $next_type ne 'b'
- && defined( $want_left_space{$next_type} )
- && $want_left_space{$next_type} == WS_NO
- )
-
- # or we might be followed by the start of a quote
- || $next_nonblank_type =~ /^[\/\?]$/
- )
- {
- $bond_str = NO_BREAK;
- }
- }
-
- # Breaking before a ? before a quote can cause trouble if
- # they are not separated by a blank.
- # Example: a syntax error occurs if you break before the ? here
- # my$logic=join$all?' && ':' || ',@regexps;
- # From: Professional_Perl_Programming_Code/multifind.pl
- if ( $next_nonblank_type eq '?' ) {
- $bond_str = NO_BREAK
- if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
- }
-
- # Breaking before a . followed by a number
- # can cause trouble if there is no intervening space
- # Example: a syntax error occurs if you break before the .2 here
- # $str .= pack($endian.2, ensurrogate($ord));
- # From: perl58/Unicode.pm
- elsif ( $next_nonblank_type eq '.' ) {
- $bond_str = NO_BREAK
- if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
- }
-
- # patch to put cuddled elses back together when on multiple
- # lines, as in: } \n else \n { \n
- if ($rOpts_cuddled_else) {
-
- if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
- || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
- {
- $bond_str = NO_BREAK;
- }
- }
- my $bond_str_2 = $bond_str;
-
- #---------------------------------------------------------------
- # End of hardwired rules
- #---------------------------------------------------------------
-
- #---------------------------------------------------------------
- # Bond Strength Section 3:
- # Apply table rules. These have priority over the above
- # hardwired rules.
- #---------------------------------------------------------------
-
- my $tabulated_bond_str;
- my $ltype = $type;
- my $rtype = $next_nonblank_type;
- if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
- if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
- $rtype = $next_nonblank_type . $next_nonblank_token;
- }
-
- if ( $binary_bond_strength{$ltype}{$rtype} ) {
- $bond_str = $binary_bond_strength{$ltype}{$rtype};
- $tabulated_bond_str = $bond_str;
- }
-
- if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
- $bond_str = NO_BREAK;
- $tabulated_bond_str = $bond_str;
- }
- my $bond_str_3 = $bond_str;
-
- # If the hardwired rules conflict with the tabulated bond
- # strength then there is an inconsistency that should be fixed
- FORMATTER_DEBUG_FLAG_BOND_TABLES
- && $tabulated_bond_str
- && $bond_str_1
- && $bond_str_1 != $bond_str_2
- && $bond_str_2 != $tabulated_bond_str
- && do {
- print STDERR
-"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
- };
-
- #-----------------------------------------------------------------
- # Bond Strength Section 4:
- # Modify strengths of certain tokens which often occur in sequence
- # by adding a small bias to each one in turn so that the breaks
- # occur from left to right.
- #
- # Note that we only changing strengths by small amounts here,
- # and usually increasing, so we should not be altering any NO_BREAKs.
- # Other routines which check for NO_BREAKs will use a tolerance
- # of one to avoid any problem.
- #-----------------------------------------------------------------
-
- # The bias tables use special keys
- my $left_key = bias_table_key( $type, $token );
- my $right_key =
- bias_table_key( $next_nonblank_type, $next_nonblank_token );
-
- # add any bias set by sub scan_list at old comma break points.
- if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
-
- # bias left token
- elsif ( defined( $bias{$left_key} ) ) {
- if ( !$want_break_before{$left_key} ) {
- $bias{$left_key} += $delta_bias;
- $bond_str += $bias{$left_key};
- }
- }
-
- # bias right token
- if ( defined( $bias{$right_key} ) ) {
- if ( $want_break_before{$right_key} ) {
-
- # for leading '.' align all but 'short' quotes; the idea
- # is to not place something like "\n" on a single line.
- if ( $right_key eq '.' ) {
- unless (
- $last_nonblank_type eq '.'
- && (
- length($token) <=
- $rOpts_short_concatenation_item_length )
- && ( !$is_closing_token{$token} )
- )
- {
- $bias{$right_key} += $delta_bias;
- }
- }
- else {
- $bias{$right_key} += $delta_bias;
- }
- $bond_str += $bias{$right_key};
- }
- }
- my $bond_str_4 = $bond_str;
-
- #---------------------------------------------------------------
- # Bond Strength Section 5:
- # Fifth Approximation.
- # Take nesting depth into account by adding the nesting depth
- # to the bond strength.
- #---------------------------------------------------------------
- my $strength;
-
- if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
- if ( $total_nesting_depth > 0 ) {
- $strength = $bond_str + $total_nesting_depth;
- }
- else {
- $strength = $bond_str;
- }
- }
- else {
- $strength = NO_BREAK;
- }
-
- #---------------------------------------------------------------
- # Bond Strength Section 6:
- # Sixth Approximation. Welds.
- #---------------------------------------------------------------
-
- # Do not allow a break within welds,
- if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK }
-
- # But encourage breaking after opening welded tokens
- elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) {
- $strength -= 1;
- }
-
-## # TESTING: weaken before first weld closing token
-## # This did not help
-## elsif ($i_next_nonblank <= $max_index_to_go
-## && weld_len_right_to_go($i_next_nonblank)
-## && $next_nonblank_token =~ /^[\}\]\)]$/ )
-## {
-## $strength -= 0.9;
-## }
-
- # always break after side comment
- if ( $type eq '#' ) { $strength = 0 }
-
- $bond_strength_to_go[$i] = $strength;
-
- FORMATTER_DEBUG_FLAG_BOND && do {
- my $str = substr( $token, 0, 15 );
- $str .= ' ' x ( 16 - length($str) );
- print STDOUT
-"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
- };
- } ## end main loop
- return;
- } ## end sub set_bond_strengths
-}
-
-sub pad_array_to_go {
-
- # to simplify coding in scan_list and set_bond_strengths, it helps
- # to create some extra blank tokens at the end of the arrays
- $tokens_to_go[ $max_index_to_go + 1 ] = '';
- $tokens_to_go[ $max_index_to_go + 2 ] = '';
- $types_to_go[ $max_index_to_go + 1 ] = 'b';
- $types_to_go[ $max_index_to_go + 2 ] = 'b';
- $nesting_depth_to_go[ $max_index_to_go + 1 ] =
- $nesting_depth_to_go[$max_index_to_go];
-
- # /^[R\}\)\]]$/
- if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
- if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
-
- # shouldn't happen:
- unless ( get_saw_brace_error() ) {
- warning(
-"Program bug in scan_list: hit nesting error which should have been caught\n"
- );
- report_definite_bug();
- }
- }
- else {
- $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
- }
- }
-
- # /^[L\{\(\[]$/
- elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
- $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
- }
- return;
-}
-
-{ # begin scan_list
-
- my (
- $block_type, $current_depth,
- $depth, $i,
- $i_last_nonblank_token, $last_colon_sequence_number,
- $last_nonblank_token, $last_nonblank_type,
- $last_nonblank_block_type, $last_old_breakpoint_count,
- $minimum_depth, $next_nonblank_block_type,
- $next_nonblank_token, $next_nonblank_type,
- $old_breakpoint_count, $starting_breakpoint_count,
- $starting_depth, $token,
- $type, $type_sequence,
- );
-
- my (
- @breakpoint_stack, @breakpoint_undo_stack,
- @comma_index, @container_type,
- @identifier_count_stack, @index_before_arrow,
- @interrupted_list, @item_count_stack,
- @last_comma_index, @last_dot_index,
- @last_nonblank_type, @old_breakpoint_count_stack,
- @opening_structure_index_stack, @rfor_semicolon_list,
- @has_old_logical_breakpoints, @rand_or_list,
- @i_equals,
- );
-
- # routine to define essential variables when we go 'up' to
- # a new depth
- sub check_for_new_minimum_depth {
- my $depth = shift;
- if ( $depth < $minimum_depth ) {
-
- $minimum_depth = $depth;
-
- # these arrays need not retain values between calls
- $breakpoint_stack[$depth] = $starting_breakpoint_count;
- $container_type[$depth] = "";
- $identifier_count_stack[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- $interrupted_list[$depth] = 1;
- $item_count_stack[$depth] = 0;
- $last_nonblank_type[$depth] = "";
- $opening_structure_index_stack[$depth] = -1;
-
- $breakpoint_undo_stack[$depth] = undef;
- $comma_index[$depth] = undef;
- $last_comma_index[$depth] = undef;
- $last_dot_index[$depth] = undef;
- $old_breakpoint_count_stack[$depth] = undef;
- $has_old_logical_breakpoints[$depth] = 0;
- $rand_or_list[$depth] = [];
- $rfor_semicolon_list[$depth] = [];
- $i_equals[$depth] = -1;
-
- # these arrays must retain values between calls
- if ( !defined( $has_broken_sublist[$depth] ) ) {
- $dont_align[$depth] = 0;
- $has_broken_sublist[$depth] = 0;
- $want_comma_break[$depth] = 0;
- }
- }
- return;
- }
-
- # routine to decide which commas to break at within a container;
- # returns:
- # $bp_count = number of comma breakpoints set
- # $do_not_break_apart = a flag indicating if container need not
- # be broken open
- sub set_comma_breakpoints {
-
- my $dd = shift;
- my $bp_count = 0;
- my $do_not_break_apart = 0;
-
- # anything to do?
- if ( $item_count_stack[$dd] ) {
-
- # handle commas not in containers...
- if ( $dont_align[$dd] ) {
- do_uncontained_comma_breaks($dd);
- }
-
- # handle commas within containers...
- else {
- my $fbc = $forced_breakpoint_count;
-
- # always open comma lists not preceded by keywords,
- # barewords, identifiers (that is, anything that doesn't
- # look like a function call)
- my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
-
- set_comma_breakpoints_do(
- $dd,
- $opening_structure_index_stack[$dd],
- $i,
- $item_count_stack[$dd],
- $identifier_count_stack[$dd],
- $comma_index[$dd],
- $next_nonblank_type,
- $container_type[$dd],
- $interrupted_list[$dd],
- \$do_not_break_apart,
- $must_break_open,
- );
- $bp_count = $forced_breakpoint_count - $fbc;
- $do_not_break_apart = 0 if $must_break_open;
- }
- }
- return ( $bp_count, $do_not_break_apart );
- }
-
- sub do_uncontained_comma_breaks {
-
- # Handle commas not in containers...
- # This is a catch-all routine for commas that we
- # don't know what to do with because the don't fall
- # within containers. We will bias the bond strength
- # to break at commas which ended lines in the input
- # file. This usually works better than just trying
- # to put as many items on a line as possible. A
- # downside is that if the input file is garbage it
- # won't work very well. However, the user can always
- # prevent following the old breakpoints with the
- # -iob flag.
- my $dd = shift;
- my $bias = -.01;
- my $old_comma_break_count = 0;
- foreach my $ii ( @{ $comma_index[$dd] } ) {
- if ( $old_breakpoint_to_go[$ii] ) {
- $old_comma_break_count++;
- $bond_strength_to_go[$ii] = $bias;
-
- # reduce bias magnitude to force breaks in order
- $bias *= 0.99;
- }
- }
-
- # Also put a break before the first comma if
- # (1) there was a break there in the input, and
- # (2) there was exactly one old break before the first comma break
- # (3) OLD: there are multiple old comma breaks
- # (3) NEW: there are one or more old comma breaks (see return example)
- #
- # For example, we will follow the user and break after
- # 'print' in this snippet:
- # print
- # "conformability (Not the same dimension)\n",
- # "\t", $have, " is ", text_unit($hu), "\n",
- # "\t", $want, " is ", text_unit($wu), "\n",
- # ;
- #
- # Another example, just one comma, where we will break after
- # the return:
- # return
- # $x * cos($a) - $y * sin($a),
- # $x * sin($a) + $y * cos($a);
-
- # Breaking a print statement:
- # print SAVEOUT
- # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
- # ( $? & 128 ) ? " -- core dumped" : "", "\n";
- #
- # But we will not force a break after the opening paren here
- # (causes a blinker):
- # $heap->{stream}->set_output_filter(
- # poe::filter::reference->new('myotherfreezer') ),
- # ;
- #
- my $i_first_comma = $comma_index[$dd]->[0];
- if ( $old_breakpoint_to_go[$i_first_comma] ) {
- my $level_comma = $levels_to_go[$i_first_comma];
- my $ibreak = -1;
- my $obp_count = 0;
- for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
- if ( $old_breakpoint_to_go[$ii] ) {
- $obp_count++;
- last if ( $obp_count > 1 );
- $ibreak = $ii
- if ( $levels_to_go[$ii] == $level_comma );
- }
- }
-
- # Changed rule from multiple old commas to just one here:
- if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
- {
- # Do not to break before an opening token because
- # it can lead to "blinkers".
- my $ibreakm = $ibreak;
- $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
- if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
- {
- set_forced_breakpoint($ibreak);
- }
- }
- }
- return;
- }
-
- my %is_logical_container;
-
- BEGIN {
- my @q = qw# if elsif unless while and or err not && | || ? : ! #;
- @is_logical_container{@q} = (1) x scalar(@q);
- }
-
- sub set_for_semicolon_breakpoints {
- my $dd = shift;
- foreach ( @{ $rfor_semicolon_list[$dd] } ) {
- set_forced_breakpoint($_);
- }
- return;
- }
-
- sub set_logical_breakpoints {
- my $dd = shift;
- if (
- $item_count_stack[$dd] == 0
- && $is_logical_container{ $container_type[$dd] }
-
- || $has_old_logical_breakpoints[$dd]
- )
- {
-
- # Look for breaks in this order:
- # 0 1 2 3
- # or and || &&
- foreach my $i ( 0 .. 3 ) {
- if ( $rand_or_list[$dd][$i] ) {
- foreach ( @{ $rand_or_list[$dd][$i] } ) {
- set_forced_breakpoint($_);
- }
-
- # break at any 'if' and 'unless' too
- foreach ( @{ $rand_or_list[$dd][4] } ) {
- set_forced_breakpoint($_);
- }
- $rand_or_list[$dd] = [];
- last;
- }
- }
- }
- return;
- }
-
- sub is_unbreakable_container {
-
- # never break a container of one of these types
- # because bad things can happen (map1.t)
- my $dd = shift;
- return $is_sort_map_grep{ $container_type[$dd] };
- }
-
- sub scan_list {
-
- # This routine is responsible for setting line breaks for all lists,
- # so that hierarchical structure can be displayed and so that list
- # items can be vertically aligned. The output of this routine is
- # stored in the array @forced_breakpoint_to_go, which is used to set
- # final breakpoints.
-
- $starting_depth = $nesting_depth_to_go[0];
-
- $block_type = ' ';
- $current_depth = $starting_depth;
- $i = -1;
- $last_colon_sequence_number = -1;
- $last_nonblank_token = ';';
- $last_nonblank_type = ';';
- $last_nonblank_block_type = ' ';
- $last_old_breakpoint_count = 0;
- $minimum_depth = $current_depth + 1; # forces update in check below
- $old_breakpoint_count = 0;
- $starting_breakpoint_count = $forced_breakpoint_count;
- $token = ';';
- $type = ';';
- $type_sequence = '';
-
- my $total_depth_variation = 0;
- my $i_old_assignment_break;
- my $depth_last = $starting_depth;
-
- check_for_new_minimum_depth($current_depth);
-
- my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
- my $want_previous_breakpoint = -1;
-
- my $saw_good_breakpoint;
- my $i_line_end = -1;
- my $i_line_start = -1;
-
- # loop over all tokens in this batch
- while ( ++$i <= $max_index_to_go ) {
- if ( $type ne 'b' ) {
- $i_last_nonblank_token = $i - 1;
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- $last_nonblank_block_type = $block_type;
- } ## end if ( $type ne 'b' )
- $type = $types_to_go[$i];
- $block_type = $block_type_to_go[$i];
- $token = $tokens_to_go[$i];
- $type_sequence = $type_sequence_to_go[$i];
- my $next_type = $types_to_go[ $i + 1 ];
- my $next_token = $tokens_to_go[ $i + 1 ];
- my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
-
- # set break if flag was set
- if ( $want_previous_breakpoint >= 0 ) {
- set_forced_breakpoint($want_previous_breakpoint);
- $want_previous_breakpoint = -1;
- }
-
- $last_old_breakpoint_count = $old_breakpoint_count;
- if ( $old_breakpoint_to_go[$i] ) {
- $i_line_end = $i;
- $i_line_start = $i_next_nonblank;
-
- $old_breakpoint_count++;
-
- # Break before certain keywords if user broke there and
- # this is a 'safe' break point. The idea is to retain
- # any preferred breaks for sequential list operations,
- # like a schwartzian transform.
- if ($rOpts_break_at_old_keyword_breakpoints) {
- if (
- $next_nonblank_type eq 'k'
- && $is_keyword_returning_list{$next_nonblank_token}
- && ( $type =~ /^[=\)\]\}Riw]$/
- || $type eq 'k'
- && $is_keyword_returning_list{$token} )
- )
- {
-
- # we actually have to set this break next time through
- # the loop because if we are at a closing token (such
- # as '}') which forms a one-line block, this break might
- # get undone.
- $want_previous_breakpoint = $i;
- } ## end if ( $next_nonblank_type...)
- } ## end if ($rOpts_break_at_old_keyword_breakpoints)
-
- # Break before attributes if user broke there
- if ($rOpts_break_at_old_attribute_breakpoints) {
- if ( $next_nonblank_type eq 'A' ) {
- $want_previous_breakpoint = $i;
- }
- }
-
- # remember an = break as possible good break point
- if ( $is_assignment{$type} ) {
- $i_old_assignment_break = $i;
- }
- elsif ( $is_assignment{$next_nonblank_type} ) {
- $i_old_assignment_break = $i_next_nonblank;
- }
- } ## end if ( $old_breakpoint_to_go...)
-
- next if ( $type eq 'b' );
- $depth = $nesting_depth_to_go[ $i + 1 ];
-
- $total_depth_variation += abs( $depth - $depth_last );
- $depth_last = $depth;
-
- # safety check - be sure we always break after a comment
- # Shouldn't happen .. an error here probably means that the
- # nobreak flag did not get turned off correctly during
- # formatting.
- if ( $type eq '#' ) {
- if ( $i != $max_index_to_go ) {
- warning(
-"Non-fatal program bug: backup logic needed to break after a comment\n"
- );
- report_definite_bug();
- $nobreak_to_go[$i] = 0;
- set_forced_breakpoint($i);
- } ## end if ( $i != $max_index_to_go)
- } ## end if ( $type eq '#' )
-
- # Force breakpoints at certain tokens in long lines.
- # Note that such breakpoints will be undone later if these tokens
- # are fully contained within parens on a line.
- if (
-
- # break before a keyword within a line
- $type eq 'k'
- && $i > 0
-
- # if one of these keywords:
- && $token =~ /^(if|unless|while|until|for)$/
-
- # but do not break at something like '1 while'
- && ( $last_nonblank_type ne 'n' || $i > 2 )
-
- # and let keywords follow a closing 'do' brace
- && $last_nonblank_block_type ne 'do'
-
- && (
- $is_long_line
-
- # or container is broken (by side-comment, etc)
- || ( $next_nonblank_token eq '('
- && $mate_index_to_go[$i_next_nonblank] < $i )
- )
- )
- {
- set_forced_breakpoint( $i - 1 );
- } ## end if ( $type eq 'k' && $i...)
-
- # remember locations of '||' and '&&' for possible breaks if we
- # decide this is a long logical expression.
- if ( $type eq '||' ) {
- push @{ $rand_or_list[$depth][2] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end if ( $type eq '||' )
- elsif ( $type eq '&&' ) {
- push @{ $rand_or_list[$depth][3] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end elsif ( $type eq '&&' )
- elsif ( $type eq 'f' ) {
- push @{ $rfor_semicolon_list[$depth] }, $i;
- }
- elsif ( $type eq 'k' ) {
- if ( $token eq 'and' ) {
- push @{ $rand_or_list[$depth][1] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end if ( $token eq 'and' )
-
- # break immediately at 'or's which are probably not in a logical
- # block -- but we will break in logical breaks below so that
- # they do not add to the forced_breakpoint_count
- elsif ( $token eq 'or' ) {
- push @{ $rand_or_list[$depth][0] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- if ( $is_logical_container{ $container_type[$depth] } ) {
- }
- else {
- if ($is_long_line) { set_forced_breakpoint($i) }
- elsif ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints )
- {
- $saw_good_breakpoint = 1;
- }
- } ## end else [ if ( $is_logical_container...)]
- } ## end elsif ( $token eq 'or' )
- elsif ( $token eq 'if' || $token eq 'unless' ) {
- push @{ $rand_or_list[$depth][4] }, $i;
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints )
- {
- set_forced_breakpoint($i);
- }
- } ## end elsif ( $token eq 'if' ||...)
- } ## end elsif ( $type eq 'k' )
- elsif ( $is_assignment{$type} ) {
- $i_equals[$depth] = $i;
- }
-
- if ($type_sequence) {
-
- # handle any postponed closing breakpoints
- if ( $token =~ /^[\)\]\}\:]$/ ) {
- if ( $type eq ':' ) {
- $last_colon_sequence_number = $type_sequence;
-
- # retain break at a ':' line break
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_ternary_breakpoints )
- {
-
- set_forced_breakpoint($i);
-
- # break at previous '='
- if ( $i_equals[$depth] > 0 ) {
- set_forced_breakpoint( $i_equals[$depth] );
- $i_equals[$depth] = -1;
- }
- } ## end if ( ( $i == $i_line_start...))
- } ## end if ( $type eq ':' )
- if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
- my $inc = ( $type eq ':' ) ? 0 : 1;
- set_forced_breakpoint( $i - $inc );
- delete $postponed_breakpoint{$type_sequence};
- }
- } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
-
- # set breaks at ?/: if they will get separated (and are
- # not a ?/: chain), or if the '?' is at the end of the
- # line
- elsif ( $token eq '?' ) {
- my $i_colon = $mate_index_to_go[$i];
- if (
- $i_colon <= 0 # the ':' is not in this batch
- || $i == 0 # this '?' is the first token of the line
- || $i ==
- $max_index_to_go # or this '?' is the last token
- )
- {
-
- # don't break at a '?' if preceded by ':' on
- # this line of previous ?/: pair on this line.
- # This is an attempt to preserve a chain of ?/:
- # expressions (elsif2.t). And don't break if
- # this has a side comment.
- set_forced_breakpoint($i)
- unless (
- $type_sequence == (
- $last_colon_sequence_number +
- TYPE_SEQUENCE_INCREMENT
- )
- || $tokens_to_go[$max_index_to_go] eq '#'
- );
- set_closing_breakpoint($i);
- } ## end if ( $i_colon <= 0 ||...)
- } ## end elsif ( $token eq '?' )
- } ## end if ($type_sequence)
-
-#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
-
- #------------------------------------------------------------
- # Handle Increasing Depth..
- #
- # prepare for a new list when depth increases
- # token $i is a '(','{', or '['
- #------------------------------------------------------------
- if ( $depth > $current_depth ) {
-
- $breakpoint_stack[$depth] = $forced_breakpoint_count;
- $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
- $has_broken_sublist[$depth] = 0;
- $identifier_count_stack[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- $interrupted_list[$depth] = 0;
- $item_count_stack[$depth] = 0;
- $last_comma_index[$depth] = undef;
- $last_dot_index[$depth] = undef;
- $last_nonblank_type[$depth] = $last_nonblank_type;
- $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
- $opening_structure_index_stack[$depth] = $i;
- $rand_or_list[$depth] = [];
- $rfor_semicolon_list[$depth] = [];
- $i_equals[$depth] = -1;
- $want_comma_break[$depth] = 0;
- $container_type[$depth] =
- ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
- ? $last_nonblank_token
- : "";
- $has_old_logical_breakpoints[$depth] = 0;
-
- # if line ends here then signal closing token to break
- if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
- {
- set_closing_breakpoint($i);
- }
-
- # Not all lists of values should be vertically aligned..
- $dont_align[$depth] =
-
- # code BLOCKS are handled at a higher level
- ( $block_type ne "" )
-
- # certain paren lists
- || ( $type eq '(' ) && (
-
- # it does not usually look good to align a list of
- # identifiers in a parameter list, as in:
- # my($var1, $var2, ...)
- # (This test should probably be refined, for now I'm just
- # testing for any keyword)
- ( $last_nonblank_type eq 'k' )
-
- # a trailing '(' usually indicates a non-list
- || ( $next_nonblank_type eq '(' )
- );
-
- # patch to outdent opening brace of long if/for/..
- # statements (like this one). See similar coding in
- # set_continuation breaks. We have also catch it here for
- # short line fragments which otherwise will not go through
- # set_continuation_breaks.
- if (
- $block_type
-
- # if we have the ')' but not its '(' in this batch..
- && ( $last_nonblank_token eq ')' )
- && $mate_index_to_go[$i_last_nonblank_token] < 0
-
- # and user wants brace to left
- && !$rOpts->{'opening-brace-always-on-right'}
-
- && ( $type eq '{' ) # should be true
- && ( $token eq '{' ) # should be true
- )
- {
- set_forced_breakpoint( $i - 1 );
- } ## end if ( $block_type && ( ...))
- } ## end if ( $depth > $current_depth)
-
- #------------------------------------------------------------
- # Handle Decreasing Depth..
- #
- # finish off any old list when depth decreases
- # token $i is a ')','}', or ']'
- #------------------------------------------------------------
- elsif ( $depth < $current_depth ) {
-
- check_for_new_minimum_depth($depth);
-
- # force all outer logical containers to break after we see on
- # old breakpoint
- $has_old_logical_breakpoints[$depth] ||=
- $has_old_logical_breakpoints[$current_depth];
-
- # Patch to break between ') {' if the paren list is broken.
- # There is similar logic in set_continuation_breaks for
- # non-broken lists.
- if ( $token eq ')'
- && $next_nonblank_block_type
- && $interrupted_list[$current_depth]
- && $next_nonblank_type eq '{'
- && !$rOpts->{'opening-brace-always-on-right'} )
- {
- set_forced_breakpoint($i);
- } ## end if ( $token eq ')' && ...
-
-#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
-
- # set breaks at commas if necessary
- my ( $bp_count, $do_not_break_apart ) =
- set_comma_breakpoints($current_depth);
-
- my $i_opening = $opening_structure_index_stack[$current_depth];
- my $saw_opening_structure = ( $i_opening >= 0 );
-
- # this term is long if we had to break at interior commas..
- my $is_long_term = $bp_count > 0;
-
- # If this is a short container with one or more comma arrows,
- # then we will mark it as a long term to open it if requested.
- # $rOpts_comma_arrow_breakpoints =
- # 0 - open only if comma precedes closing brace
- # 1 - stable: except for one line blocks
- # 2 - try to form 1 line blocks
- # 3 - ignore =>
- # 4 - always open up if vt=0
- # 5 - stable: even for one line blocks if vt=0
- if ( !$is_long_term
- && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
- && $index_before_arrow[ $depth + 1 ] > 0
- && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
- )
- {
- $is_long_term = $rOpts_comma_arrow_breakpoints == 4
- || ( $rOpts_comma_arrow_breakpoints == 0
- && $last_nonblank_token eq ',' )
- || ( $rOpts_comma_arrow_breakpoints == 5
- && $old_breakpoint_to_go[$i_opening] );
- } ## end if ( !$is_long_term &&...)
-
- # mark term as long if the length between opening and closing
- # parens exceeds allowed line length
- if ( !$is_long_term && $saw_opening_structure ) {
- my $i_opening_minus = find_token_starting_list($i_opening);
-
- # Note: we have to allow for one extra space after a
- # closing token so that we do not strand a comma or
- # semicolon, hence the '>=' here (oneline.t)
- # Note: we ignore left weld lengths here for best results
- $is_long_term =
- excess_line_length( $i_opening_minus, $i, 1 ) >= 0;
- } ## end if ( !$is_long_term &&...)
-
- # We've set breaks after all comma-arrows. Now we have to
- # undo them if this can be a one-line block
- # (the only breakpoints set will be due to comma-arrows)
- if (
-
- # user doesn't require breaking after all comma-arrows
- ( $rOpts_comma_arrow_breakpoints != 0 )
- && ( $rOpts_comma_arrow_breakpoints != 4 )
-
- # and if the opening structure is in this batch
- && $saw_opening_structure
-
- # and either on the same old line
- && (
- $old_breakpoint_count_stack[$current_depth] ==
- $last_old_breakpoint_count
-
- # or user wants to form long blocks with arrows
- || $rOpts_comma_arrow_breakpoints == 2
- )
-
- # and we made some breakpoints between the opening and closing
- && ( $breakpoint_undo_stack[$current_depth] <
- $forced_breakpoint_undo_count )
-
- # and this block is short enough to fit on one line
- # Note: use < because need 1 more space for possible comma
- && !$is_long_term
-
- )
- {
- undo_forced_breakpoint_stack(
- $breakpoint_undo_stack[$current_depth] );
- } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
-
- # now see if we have any comma breakpoints left
- my $has_comma_breakpoints =
- ( $breakpoint_stack[$current_depth] !=
- $forced_breakpoint_count );
-
- # update broken-sublist flag of the outer container
- $has_broken_sublist[$depth] =
- $has_broken_sublist[$depth]
- || $has_broken_sublist[$current_depth]
- || $is_long_term
- || $has_comma_breakpoints;
-
-# Having come to the closing ')', '}', or ']', now we have to decide if we
-# should 'open up' the structure by placing breaks at the opening and
-# closing containers. This is a tricky decision. Here are some of the
-# basic considerations:
-#
-# -If this is a BLOCK container, then any breakpoints will have already
-# been set (and according to user preferences), so we need do nothing here.
-#
-# -If we have a comma-separated list for which we can align the list items,
-# then we need to do so because otherwise the vertical aligner cannot
-# currently do the alignment.
-#
-# -If this container does itself contain a container which has been broken
-# open, then it should be broken open to properly show the structure.
-#
-# -If there is nothing to align, and no other reason to break apart,
-# then do not do it.
-#
-# We will not break open the parens of a long but 'simple' logical expression.
-# For example:
-#
-# This is an example of a simple logical expression and its formatting:
-#
-# if ( $bigwasteofspace1 && $bigwasteofspace2
-# || $bigwasteofspace3 && $bigwasteofspace4 )
-#
-# Most people would prefer this than the 'spacey' version:
-#
-# if (
-# $bigwasteofspace1 && $bigwasteofspace2
-# || $bigwasteofspace3 && $bigwasteofspace4
-# )
-#
-# To illustrate the rules for breaking logical expressions, consider:
-#
-# FULLY DENSE:
-# if ( $opt_excl
-# and ( exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc ))
-#
-# This is on the verge of being difficult to read. The current default is to
-# open it up like this:
-#
-# DEFAULT:
-# if (
-# $opt_excl
-# and ( exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc )
-# )
-#
-# This is a compromise which tries to avoid being too dense and to spacey.
-# A more spaced version would be:
-#
-# SPACEY:
-# if (
-# $opt_excl
-# and (
-# exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc
-# )
-# )
-#
-# Some people might prefer the spacey version -- an option could be added. The
-# innermost expression contains a long block '( exists $ids_... ')'.
-#
-# Here is how the logic goes: We will force a break at the 'or' that the
-# innermost expression contains, but we will not break apart its opening and
-# closing containers because (1) it contains no multi-line sub-containers itself,
-# and (2) there is no alignment to be gained by breaking it open like this
-#
-# and (
-# exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc
-# )
-#
-# (although this looks perfectly ok and might be good for long expressions). The
-# outer 'if' container, though, contains a broken sub-container, so it will be
-# broken open to avoid too much density. Also, since it contains no 'or's, there
-# will be a forced break at its 'and'.
-
- # set some flags telling something about this container..
- my $is_simple_logical_expression = 0;
- if ( $item_count_stack[$current_depth] == 0
- && $saw_opening_structure
- && $tokens_to_go[$i_opening] eq '('
- && $is_logical_container{ $container_type[$current_depth] }
- )
- {
-
- # This seems to be a simple logical expression with
- # no existing breakpoints. Set a flag to prevent
- # opening it up.
- if ( !$has_comma_breakpoints ) {
- $is_simple_logical_expression = 1;
- }
-
- # This seems to be a simple logical expression with
- # breakpoints (broken sublists, for example). Break
- # at all 'or's and '||'s.
- else {
- set_logical_breakpoints($current_depth);
- }
- } ## end if ( $item_count_stack...)
-
- if ( $is_long_term
- && @{ $rfor_semicolon_list[$current_depth] } )
- {
- set_for_semicolon_breakpoints($current_depth);
-
- # open up a long 'for' or 'foreach' container to allow
- # leading term alignment unless -lp is used.
- $has_comma_breakpoints = 1
- unless $rOpts_line_up_parentheses;
- } ## end if ( $is_long_term && ...)
-
- if (
-
- # breaks for code BLOCKS are handled at a higher level
- !$block_type
-
- # we do not need to break at the top level of an 'if'
- # type expression
- && !$is_simple_logical_expression
-
- ## modification to keep ': (' containers vertically tight;
- ## but probably better to let user set -vt=1 to avoid
- ## inconsistency with other paren types
- ## && ($container_type[$current_depth] ne ':')
-
- # otherwise, we require one of these reasons for breaking:
- && (
-
- # - this term has forced line breaks
- $has_comma_breakpoints
-
- # - the opening container is separated from this batch
- # for some reason (comment, blank line, code block)
- # - this is a non-paren container spanning multiple lines
- || !$saw_opening_structure
-
- # - this is a long block contained in another breakable
- # container
- || ( $is_long_term
- && $container_environment_to_go[$i_opening] ne
- 'BLOCK' )
- )
- )
- {
-
- # For -lp option, we must put a breakpoint before
- # the token which has been identified as starting
- # this indentation level. This is necessary for
- # proper alignment.
- if ( $rOpts_line_up_parentheses && $saw_opening_structure )
- {
- my $item = $leading_spaces_to_go[ $i_opening + 1 ];
- if ( $i_opening + 1 < $max_index_to_go
- && $types_to_go[ $i_opening + 1 ] eq 'b' )
- {
- $item = $leading_spaces_to_go[ $i_opening + 2 ];
- }
- if ( defined($item) ) {
- my $i_start_2 = $item->get_starting_index();
- if (
- defined($i_start_2)
-
- # we are breaking after an opening brace, paren,
- # so don't break before it too
- && $i_start_2 ne $i_opening
- )
- {
-
- # Only break for breakpoints at the same
- # indentation level as the opening paren
- my $test1 = $nesting_depth_to_go[$i_opening];
- my $test2 = $nesting_depth_to_go[$i_start_2];
- if ( $test2 == $test1 ) {
- set_forced_breakpoint( $i_start_2 - 1 );
- }
- } ## end if ( defined($i_start_2...))
- } ## end if ( defined($item) )
- } ## end if ( $rOpts_line_up_parentheses...)
-
- # break after opening structure.
- # note: break before closing structure will be automatic
- if ( $minimum_depth <= $current_depth ) {
-
- set_forced_breakpoint($i_opening)
- unless ( $do_not_break_apart
- || is_unbreakable_container($current_depth) );
-
- # break at ',' of lower depth level before opening token
- if ( $last_comma_index[$depth] ) {
- set_forced_breakpoint( $last_comma_index[$depth] );
- }
-
- # break at '.' of lower depth level before opening token
- if ( $last_dot_index[$depth] ) {
- set_forced_breakpoint( $last_dot_index[$depth] );
- }
-
- # break before opening structure if preceded by another
- # closing structure and a comma. This is normally
- # done by the previous closing brace, but not
- # if it was a one-line block.
- if ( $i_opening > 2 ) {
- my $i_prev =
- ( $types_to_go[ $i_opening - 1 ] eq 'b' )
- ? $i_opening - 2
- : $i_opening - 1;
-
- if ( $types_to_go[$i_prev] eq ','
- && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
- {
- set_forced_breakpoint($i_prev);
- }
-
- # also break before something like ':(' or '?('
- # if appropriate.
- elsif (
- $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
- {
- my $token_prev = $tokens_to_go[$i_prev];
- if ( $want_break_before{$token_prev} ) {
- set_forced_breakpoint($i_prev);
- }
- } ## end elsif ( $types_to_go[$i_prev...])
- } ## end if ( $i_opening > 2 )
- } ## end if ( $minimum_depth <=...)
-
- # break after comma following closing structure
- if ( $next_type eq ',' ) {
- set_forced_breakpoint( $i + 1 );
- }
-
- # break before an '=' following closing structure
- if (
- $is_assignment{$next_nonblank_type}
- && ( $breakpoint_stack[$current_depth] !=
- $forced_breakpoint_count )
- )
- {
- set_forced_breakpoint($i);
- } ## end if ( $is_assignment{$next_nonblank_type...})
-
- # break at any comma before the opening structure Added
- # for -lp, but seems to be good in general. It isn't
- # obvious how far back to look; the '5' below seems to
- # work well and will catch the comma in something like
- # push @list, myfunc( $param, $param, ..
-
- my $icomma = $last_comma_index[$depth];
- if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
- unless ( $forced_breakpoint_to_go[$icomma] ) {
- set_forced_breakpoint($icomma);
- }
- }
- } # end logic to open up a container
-
- # Break open a logical container open if it was already open
- elsif ($is_simple_logical_expression
- && $has_old_logical_breakpoints[$current_depth] )
- {
- set_logical_breakpoints($current_depth);
- }
-
- # Handle long container which does not get opened up
- elsif ($is_long_term) {
-
- # must set fake breakpoint to alert outer containers that
- # they are complex
- set_fake_breakpoint();
- } ## end elsif ($is_long_term)
-
- } ## end elsif ( $depth < $current_depth)
-
- #------------------------------------------------------------
- # Handle this token
- #------------------------------------------------------------
-
- $current_depth = $depth;
-
- # handle comma-arrow
- if ( $type eq '=>' ) {
- next if ( $last_nonblank_type eq '=>' );
- next if $rOpts_break_at_old_comma_breakpoints;
- next if $rOpts_comma_arrow_breakpoints == 3;
- $want_comma_break[$depth] = 1;
- $index_before_arrow[$depth] = $i_last_nonblank_token;
- next;
- } ## end if ( $type eq '=>' )
-
- elsif ( $type eq '.' ) {
- $last_dot_index[$depth] = $i;
- }
-
- # Turn off alignment if we are sure that this is not a list
- # environment. To be safe, we will do this if we see certain
- # non-list tokens, such as ';', and also the environment is
- # not a list. Note that '=' could be in any of the = operators
- # (lextest.t). We can't just use the reported environment
- # because it can be incorrect in some cases.
- elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
- && $container_environment_to_go[$i] ne 'LIST' )
- {
- $dont_align[$depth] = 1;
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
-
- # now just handle any commas
- next unless ( $type eq ',' );
-
- $last_dot_index[$depth] = undef;
- $last_comma_index[$depth] = $i;
-
- # break here if this comma follows a '=>'
- # but not if there is a side comment after the comma
- if ( $want_comma_break[$depth] ) {
-
- if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
- if ($rOpts_comma_arrow_breakpoints) {
- $want_comma_break[$depth] = 0;
- ##$index_before_arrow[$depth] = -1;
- next;
- }
- }
-
- set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
-
- # break before the previous token if it looks safe
- # Example of something that we will not try to break before:
- # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
- # Also we don't want to break at a binary operator (like +):
- # $c->createOval(
- # $x + $R, $y +
- # $R => $x - $R,
- # $y - $R, -fill => 'black',
- # );
- my $ibreak = $index_before_arrow[$depth] - 1;
- if ( $ibreak > 0
- && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
- {
- if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
-
- # don't break pointer calls, such as the following:
- # File::Spec->curdir => 1,
- # (This is tokenized as adjacent 'w' tokens)
- ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
-
- # And don't break before a comma, as in the following:
- # ( LONGER_THAN,=> 1,
- # EIGHTY_CHARACTERS,=> 2,
- # CAUSES_FORMATTING,=> 3,
- # LIKE_THIS,=> 4,
- # );
- # This example is for -tso but should be general rule
- if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
- && $tokens_to_go[ $ibreak + 1 ] ne ',' )
- {
- set_forced_breakpoint($ibreak);
- }
- } ## end if ( $types_to_go[$ibreak...])
- } ## end if ( $ibreak > 0 && $tokens_to_go...)
-
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
-
- # handle list which mixes '=>'s and ','s:
- # treat any list items so far as an interrupted list
- $interrupted_list[$depth] = 1;
- next;
- } ## end if ( $want_comma_break...)
-
- # break after all commas above starting depth
- if ( $depth < $starting_depth && !$dont_align[$depth] ) {
- set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
- next;
- }
-
- # add this comma to the list..
- my $item_count = $item_count_stack[$depth];
- if ( $item_count == 0 ) {
-
- # but do not form a list with no opening structure
- # for example:
-
- # open INFILE_COPY, ">$input_file_copy"
- # or die ("very long message");
-
- if ( ( $opening_structure_index_stack[$depth] < 0 )
- && $container_environment_to_go[$i] eq 'BLOCK' )
- {
- $dont_align[$depth] = 1;
- }
- } ## end if ( $item_count == 0 )
-
- $comma_index[$depth][$item_count] = $i;
- ++$item_count_stack[$depth];
- if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
- $identifier_count_stack[$depth]++;
- }
- } ## end while ( ++$i <= $max_index_to_go)
-
- #-------------------------------------------
- # end of loop over all tokens in this batch
- #-------------------------------------------
-
- # set breaks for any unfinished lists ..
- for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
-
- $interrupted_list[$dd] = 1;
- $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
- set_comma_breakpoints($dd);
- set_logical_breakpoints($dd)
- if ( $has_old_logical_breakpoints[$dd] );
- set_for_semicolon_breakpoints($dd);
-
- # break open container...
- my $i_opening = $opening_structure_index_stack[$dd];
- set_forced_breakpoint($i_opening)
- unless (
- is_unbreakable_container($dd)
-
- # Avoid a break which would place an isolated ' or "
- # on a line
- || ( $type eq 'Q'
- && $i_opening >= $max_index_to_go - 2
- && $token =~ /^['"]$/ )
- );
- } ## end for ( my $dd = $current_depth...)
-
- # Return a flag indicating if the input file had some good breakpoints.
- # This flag will be used to force a break in a line shorter than the
- # allowed line length.
- if ( $has_old_logical_breakpoints[$current_depth] ) {
- $saw_good_breakpoint = 1;
- }
-
- # A complex line with one break at an = has a good breakpoint.
- # This is not complex ($total_depth_variation=0):
- # $res1
- # = 10;
- #
- # This is complex ($total_depth_variation=6):
- # $res2 =
- # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
- elsif ($i_old_assignment_break
- && $total_depth_variation > 4
- && $old_breakpoint_count == 1 )
- {
- $saw_good_breakpoint = 1;
- } ## end elsif ( $i_old_assignment_break...)
-
- return $saw_good_breakpoint;
- } ## end sub scan_list
-} # end scan_list
-
-sub find_token_starting_list {
-
- # When testing to see if a block will fit on one line, some
- # previous token(s) may also need to be on the line; particularly
- # if this is a sub call. So we will look back at least one
- # token. NOTE: This isn't perfect, but not critical, because
- # if we mis-identify a block, it will be wrapped and therefore
- # fixed the next time it is formatted.
- my $i_opening_paren = shift;
- my $i_opening_minus = $i_opening_paren;
- my $im1 = $i_opening_paren - 1;
- my $im2 = $i_opening_paren - 2;
- my $im3 = $i_opening_paren - 3;
- my $typem1 = $types_to_go[$im1];
- my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
- if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
- $i_opening_minus = $i_opening_paren;
- }
- elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
- $i_opening_minus = $im1 if $im1 >= 0;
-
- # walk back to improve length estimate
- for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
- last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
- $i_opening_minus = $j;
- }
- if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
- }
- elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
- elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
- $i_opening_minus = $im2;
- }
- return $i_opening_minus;
-}
-
-{ # begin set_comma_breakpoints_do
-
- my %is_keyword_with_special_leading_term;
-
- BEGIN {
-
- # These keywords have prototypes which allow a special leading item
- # followed by a list
- my @q =
- qw(formline grep kill map printf sprintf push chmod join pack unshift);
- @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
- }
-
- sub set_comma_breakpoints_do {
-
- # Given a list with some commas, set breakpoints at some of the
- # commas, if necessary, to make it easy to read. This list is
- # an example:
- my (
- $depth, $i_opening_paren, $i_closing_paren,
- $item_count, $identifier_count, $rcomma_index,
- $next_nonblank_type, $list_type, $interrupted,
- $rdo_not_break_apart, $must_break_open,
- ) = @_;
-
- # nothing to do if no commas seen
- return if ( $item_count < 1 );
- my $i_first_comma = $rcomma_index->[0];
- my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
- my $i_last_comma = $i_true_last_comma;
- if ( $i_last_comma >= $max_index_to_go ) {
- $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
- return if ( $item_count < 1 );
- }
-
- #---------------------------------------------------------------
- # find lengths of all items in the list to calculate page layout
- #---------------------------------------------------------------
- my $comma_count = $item_count;
- my @item_lengths;
- my @i_term_begin;
- my @i_term_end;
- my @i_term_comma;
- my $i_prev_plus;
- my @max_length = ( 0, 0 );
- my $first_term_length;
- my $i = $i_opening_paren;
- my $is_odd = 1;
-
- foreach my $j ( 0 .. $comma_count - 1 ) {
- $is_odd = 1 - $is_odd;
- $i_prev_plus = $i + 1;
- $i = $rcomma_index->[$j];
-
- my $i_term_end =
- ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
- my $i_term_begin =
- ( $types_to_go[$i_prev_plus] eq 'b' )
- ? $i_prev_plus + 1
- : $i_prev_plus;
- push @i_term_begin, $i_term_begin;
- push @i_term_end, $i_term_end;
- push @i_term_comma, $i;
-
- # note: currently adding 2 to all lengths (for comma and space)
- my $length =
- 2 + token_sequence_length( $i_term_begin, $i_term_end );
- push @item_lengths, $length;
-
- if ( $j == 0 ) {
- $first_term_length = $length;
- }
- else {
-
- if ( $length > $max_length[$is_odd] ) {
- $max_length[$is_odd] = $length;
- }
- }
- }
-
- # now we have to make a distinction between the comma count and item
- # count, because the item count will be one greater than the comma
- # count if the last item is not terminated with a comma
- my $i_b =
- ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
- ? $i_last_comma + 1
- : $i_last_comma;
- my $i_e =
- ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
- ? $i_closing_paren - 2
- : $i_closing_paren - 1;
- my $i_effective_last_comma = $i_last_comma;
-
- my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
-
- if ( $last_item_length > 0 ) {
-
- # add 2 to length because other lengths include a comma and a blank
- $last_item_length += 2;
- push @item_lengths, $last_item_length;
- push @i_term_begin, $i_b + 1;
- push @i_term_end, $i_e;
- push @i_term_comma, undef;
-
- my $i_odd = $item_count % 2;
-
- if ( $last_item_length > $max_length[$i_odd] ) {
- $max_length[$i_odd] = $last_item_length;
- }
-
- $item_count++;
- $i_effective_last_comma = $i_e + 1;
-
- if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
- $identifier_count++;
- }
- }
-
- #---------------------------------------------------------------
- # End of length calculations
- #---------------------------------------------------------------
-
- #---------------------------------------------------------------
- # Compound List Rule 1:
- # Break at (almost) every comma for a list containing a broken
- # sublist. This has higher priority than the Interrupted List
- # Rule.
- #---------------------------------------------------------------
- if ( $has_broken_sublist[$depth] ) {
-
- # Break at every comma except for a comma between two
- # simple, small terms. This prevents long vertical
- # columns of, say, just 0's.
- my $small_length = 10; # 2 + actual maximum length wanted
-
- # We'll insert a break in long runs of small terms to
- # allow alignment in uniform tables.
- my $skipped_count = 0;
- my $columns = table_columns_available($i_first_comma);
- my $fields = int( $columns / $small_length );
- if ( $rOpts_maximum_fields_per_table
- && $fields > $rOpts_maximum_fields_per_table )
- {
- $fields = $rOpts_maximum_fields_per_table;
- }
- my $max_skipped_count = $fields - 1;
-
- my $is_simple_last_term = 0;
- my $is_simple_next_term = 0;
- foreach my $j ( 0 .. $item_count ) {
- $is_simple_last_term = $is_simple_next_term;
- $is_simple_next_term = 0;
- if ( $j < $item_count
- && $i_term_end[$j] == $i_term_begin[$j]
- && $item_lengths[$j] <= $small_length )
- {
- $is_simple_next_term = 1;
- }
- next if $j == 0;
- if ( $is_simple_last_term
- && $is_simple_next_term
- && $skipped_count < $max_skipped_count )
- {
- $skipped_count++;
- }
- else {
- $skipped_count = 0;
- my $i = $i_term_comma[ $j - 1 ];
- last unless defined $i;
- set_forced_breakpoint($i);
- }
- }
-
- # always break at the last comma if this list is
- # interrupted; we wouldn't want to leave a terminal '{', for
- # example.
- if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
- return;
- }
-
-#my ( $a, $b, $c ) = caller();
-#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
-#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
-#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
-
- #---------------------------------------------------------------
- # Interrupted List Rule:
- # A list is forced to use old breakpoints if it was interrupted
- # by side comments or blank lines, or requested by user.
- #---------------------------------------------------------------
- if ( $rOpts_break_at_old_comma_breakpoints
- || $interrupted
- || $i_opening_paren < 0 )
- {
- copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
- return;
- }
-
- #---------------------------------------------------------------
- # Looks like a list of items. We have to look at it and size it up.
- #---------------------------------------------------------------
-
- my $opening_token = $tokens_to_go[$i_opening_paren];
- my $opening_environment =
- $container_environment_to_go[$i_opening_paren];
-
- #-------------------------------------------------------------------
- # Return if this will fit on one line
- #-------------------------------------------------------------------
-
- my $i_opening_minus = find_token_starting_list($i_opening_paren);
- return
- unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
-
- #-------------------------------------------------------------------
- # Now we know that this block spans multiple lines; we have to set
- # at least one breakpoint -- real or fake -- as a signal to break
- # open any outer containers.
- #-------------------------------------------------------------------
- set_fake_breakpoint();
-
- # be sure we do not extend beyond the current list length
- if ( $i_effective_last_comma >= $max_index_to_go ) {
- $i_effective_last_comma = $max_index_to_go - 1;
- }
-
- # Set a flag indicating if we need to break open to keep -lp
- # items aligned. This is necessary if any of the list terms
- # exceeds the available space after the '('.
- my $need_lp_break_open = $must_break_open;
- if ( $rOpts_line_up_parentheses && !$must_break_open ) {
- my $columns_if_unbroken =
- maximum_line_length($i_opening_minus) -
- total_line_length( $i_opening_minus, $i_opening_paren );
- $need_lp_break_open =
- ( $max_length[0] > $columns_if_unbroken )
- || ( $max_length[1] > $columns_if_unbroken )
- || ( $first_term_length > $columns_if_unbroken );
- }
-
- # Specify if the list must have an even number of fields or not.
- # It is generally safest to assume an even number, because the
- # list items might be a hash list. But if we can be sure that
- # it is not a hash, then we can allow an odd number for more
- # flexibility.
- my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
-
- if ( $identifier_count >= $item_count - 1
- || $is_assignment{$next_nonblank_type}
- || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
- )
- {
- $odd_or_even = 1;
- }
-
- # do we have a long first term which should be
- # left on a line by itself?
- my $use_separate_first_term = (
- $odd_or_even == 1 # only if we can use 1 field/line
- && $item_count > 3 # need several items
- && $first_term_length >
- 2 * $max_length[0] - 2 # need long first term
- && $first_term_length >
- 2 * $max_length[1] - 2 # need long first term
- );
-
- # or do we know from the type of list that the first term should
- # be placed alone?
- if ( !$use_separate_first_term ) {
- if ( $is_keyword_with_special_leading_term{$list_type} ) {
- $use_separate_first_term = 1;
-
- # should the container be broken open?
- if ( $item_count < 3 ) {
- if ( $i_first_comma - $i_opening_paren < 4 ) {
- ${$rdo_not_break_apart} = 1;
- }
- }
- elsif ($first_term_length < 20
- && $i_first_comma - $i_opening_paren < 4 )
- {
- my $columns = table_columns_available($i_first_comma);
- if ( $first_term_length < $columns ) {
- ${$rdo_not_break_apart} = 1;
- }
- }
- }
- }
-
- # if so,
- if ($use_separate_first_term) {
-
- # ..set a break and update starting values
- $use_separate_first_term = 1;
- set_forced_breakpoint($i_first_comma);
- $i_opening_paren = $i_first_comma;
- $i_first_comma = $rcomma_index->[1];
- $item_count--;
- return if $comma_count == 1;
- shift @item_lengths;
- shift @i_term_begin;
- shift @i_term_end;
- shift @i_term_comma;
- }
-
- # if not, update the metrics to include the first term
- else {
- if ( $first_term_length > $max_length[0] ) {
- $max_length[0] = $first_term_length;
- }
- }
-
- # Field width parameters
- my $pair_width = ( $max_length[0] + $max_length[1] );
- my $max_width =
- ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
-
- # Number of free columns across the page width for laying out tables
- my $columns = table_columns_available($i_first_comma);
-
- # Estimated maximum number of fields which fit this space
- # This will be our first guess
- my $number_of_fields_max =
- maximum_number_of_fields( $columns, $odd_or_even, $max_width,
- $pair_width );
- my $number_of_fields = $number_of_fields_max;
-
- # Find the best-looking number of fields
- # and make this our second guess if possible
- my ( $number_of_fields_best, $ri_ragged_break_list,
- $new_identifier_count )
- = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
- $max_width );
-
- if ( $number_of_fields_best != 0
- && $number_of_fields_best < $number_of_fields_max )
- {
- $number_of_fields = $number_of_fields_best;
- }
-
- # ----------------------------------------------------------------------
- # If we are crowded and the -lp option is being used, try to
- # undo some indentation
- # ----------------------------------------------------------------------
- if (
- $rOpts_line_up_parentheses
- && (
- $number_of_fields == 0
- || ( $number_of_fields == 1
- && $number_of_fields != $number_of_fields_best )
- )
- )
- {
- my $available_spaces = get_available_spaces_to_go($i_first_comma);
- if ( $available_spaces > 0 ) {
-
- my $spaces_wanted = $max_width - $columns; # for 1 field
-
- if ( $number_of_fields_best == 0 ) {
- $number_of_fields_best =
- get_maximum_fields_wanted( \@item_lengths );
- }
-
- if ( $number_of_fields_best != 1 ) {
- my $spaces_wanted_2 =
- 1 + $pair_width - $columns; # for 2 fields
- if ( $available_spaces > $spaces_wanted_2 ) {
- $spaces_wanted = $spaces_wanted_2;
- }
- }
-
- if ( $spaces_wanted > 0 ) {
- my $deleted_spaces =
- reduce_lp_indentation( $i_first_comma, $spaces_wanted );
-
- # redo the math
- if ( $deleted_spaces > 0 ) {
- $columns = table_columns_available($i_first_comma);
- $number_of_fields_max =
- maximum_number_of_fields( $columns, $odd_or_even,
- $max_width, $pair_width );
- $number_of_fields = $number_of_fields_max;
-
- if ( $number_of_fields_best == 1
- && $number_of_fields >= 1 )
- {
- $number_of_fields = $number_of_fields_best;
- }
- }
- }
- }
- }
-
- # try for one column if two won't work
- if ( $number_of_fields <= 0 ) {
- $number_of_fields = int( $columns / $max_width );
- }
-
- # The user can place an upper bound on the number of fields,
- # which can be useful for doing maintenance on tables
- if ( $rOpts_maximum_fields_per_table
- && $number_of_fields > $rOpts_maximum_fields_per_table )
- {
- $number_of_fields = $rOpts_maximum_fields_per_table;
- }
-
- # How many columns (characters) and lines would this container take
- # if no additional whitespace were added?
- my $packed_columns = token_sequence_length( $i_opening_paren + 1,
- $i_effective_last_comma + 1 );
- if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
- my $packed_lines = 1 + int( $packed_columns / $columns );
-
- # are we an item contained in an outer list?
- my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
-
- if ( $number_of_fields <= 0 ) {
-
-# #---------------------------------------------------------------
-# # We're in trouble. We can't find a single field width that works.
-# # There is no simple answer here; we may have a single long list
-# # item, or many.
-# #---------------------------------------------------------------
-#
-# In many cases, it may be best to not force a break if there is just one
-# comma, because the standard continuation break logic will do a better
-# job without it.
-#
-# In the common case that all but one of the terms can fit
-# on a single line, it may look better not to break open the
-# containing parens. Consider, for example
-#
-# $color =
-# join ( '/',
-# sort { $color_value{$::a} <=> $color_value{$::b}; }
-# keys %colors );
-#
-# which will look like this with the container broken:
-#
-# $color = join (
-# '/',
-# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
-# );
-#
-# Here is an example of this rule for a long last term:
-#
-# log_message( 0, 256, 128,
-# "Number of routes in adj-RIB-in to be considered: $peercount" );
-#
-# And here is an example with a long first term:
-#
-# $s = sprintf(
-# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
-# $r, $pu, $ps, $cu, $cs, $tt
-# )
-# if $style eq 'all';
-
- my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
- my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
- my $long_first_term =
- excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
-
- # break at every comma ...
- if (
-
- # if requested by user or is best looking
- $number_of_fields_best == 1
-
- # or if this is a sublist of a larger list
- || $in_hierarchical_list
-
- # or if multiple commas and we don't have a long first or last
- # term
- || ( $comma_count > 1
- && !( $long_last_term || $long_first_term ) )
- )
- {
- foreach ( 0 .. $comma_count - 1 ) {
- set_forced_breakpoint( $rcomma_index->[$_] );
- }
- }
- elsif ($long_last_term) {
-
- set_forced_breakpoint($i_last_comma);
- ${$rdo_not_break_apart} = 1 unless $must_break_open;
- }
- elsif ($long_first_term) {
-
- set_forced_breakpoint($i_first_comma);
- }
- else {
-
- # let breaks be defined by default bond strength logic
- }
- return;
- }
-
- # --------------------------------------------------------
- # We have a tentative field count that seems to work.
- # How many lines will this require?
- # --------------------------------------------------------
- my $formatted_lines = $item_count / ($number_of_fields);
- if ( $formatted_lines != int $formatted_lines ) {
- $formatted_lines = 1 + int $formatted_lines;
- }
-
- # So far we've been trying to fill out to the right margin. But
- # compact tables are easier to read, so let's see if we can use fewer
- # fields without increasing the number of lines.
- $number_of_fields =
- compactify_table( $item_count, $number_of_fields, $formatted_lines,
- $odd_or_even );
-
- # How many spaces across the page will we fill?
- my $columns_per_line =
- ( int $number_of_fields / 2 ) * $pair_width +
- ( $number_of_fields % 2 ) * $max_width;
-
- my $formatted_columns;
-
- if ( $number_of_fields > 1 ) {
- $formatted_columns =
- ( $pair_width * ( int( $item_count / 2 ) ) +
- ( $item_count % 2 ) * $max_width );
- }
- else {
- $formatted_columns = $max_width * $item_count;
- }
- if ( $formatted_columns < $packed_columns ) {
- $formatted_columns = $packed_columns;
- }
-
- my $unused_columns = $formatted_columns - $packed_columns;
-
- # set some empirical parameters to help decide if we should try to
- # align; high sparsity does not look good, especially with few lines
- my $sparsity = ($unused_columns) / ($formatted_columns);
- my $max_allowed_sparsity =
- ( $item_count < 3 ) ? 0.1
- : ( $packed_lines == 1 ) ? 0.15
- : ( $packed_lines == 2 ) ? 0.4
- : 0.7;
-
- # Begin check for shortcut methods, which avoid treating a list
- # as a table for relatively small parenthesized lists. These
- # are usually easier to read if not formatted as tables.
- if (
- $packed_lines <= 2 # probably can fit in 2 lines
- && $item_count < 9 # doesn't have too many items
- && $opening_environment eq 'BLOCK' # not a sub-container
- && $opening_token eq '(' # is paren list
- )
- {
-
- # Shortcut method 1: for -lp and just one comma:
- # This is a no-brainer, just break at the comma.
- if (
- $rOpts_line_up_parentheses # -lp
- && $item_count == 2 # two items, one comma
- && !$must_break_open
- )
- {
- my $i_break = $rcomma_index->[0];
- set_forced_breakpoint($i_break);
- ${$rdo_not_break_apart} = 1;
- set_non_alignment_flags( $comma_count, $rcomma_index );
- return;
-
- }
-
- # method 2 is for most small ragged lists which might look
- # best if not displayed as a table.
- if (
- ( $number_of_fields == 2 && $item_count == 3 )
- || (
- $new_identifier_count > 0 # isn't all quotes
- && $sparsity > 0.15
- ) # would be fairly spaced gaps if aligned
- )
- {
-
- my $break_count = set_ragged_breakpoints( \@i_term_comma,
- $ri_ragged_break_list );
- ++$break_count if ($use_separate_first_term);
-
- # NOTE: we should really use the true break count here,
- # which can be greater if there are large terms and
- # little space, but usually this will work well enough.
- unless ($must_break_open) {
-
- if ( $break_count <= 1 ) {
- ${$rdo_not_break_apart} = 1;
- }
- elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
- {
- ${$rdo_not_break_apart} = 1;
- }
- }
- set_non_alignment_flags( $comma_count, $rcomma_index );
- return;
- }
-
- } # end shortcut methods
-
- # debug stuff
-
- FORMATTER_DEBUG_FLAG_SPARSE && do {
- print STDOUT
-"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
-
- };
-
- #---------------------------------------------------------------
- # Compound List Rule 2:
- # If this list is too long for one line, and it is an item of a
- # larger list, then we must format it, regardless of sparsity
- # (ian.t). One reason that we have to do this is to trigger
- # Compound List Rule 1, above, which causes breaks at all commas of
- # all outer lists. In this way, the structure will be properly
- # displayed.
- #---------------------------------------------------------------
-
- # Decide if this list is too long for one line unless broken
- my $total_columns = table_columns_available($i_opening_paren);
- my $too_long = $packed_columns > $total_columns;
-
- # For a paren list, include the length of the token just before the
- # '(' because this is likely a sub call, and we would have to
- # include the sub name on the same line as the list. This is still
- # imprecise, but not too bad. (steve.t)
- if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
-
- $too_long = excess_line_length( $i_opening_minus,
- $i_effective_last_comma + 1 ) > 0;
- }
-
- # FIXME: For an item after a '=>', try to include the length of the
- # thing before the '=>'. This is crude and should be improved by
- # actually looking back token by token.
- if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
- my $i_opening_minus = $i_opening_paren - 4;
- if ( $i_opening_minus >= 0 ) {
- $too_long = excess_line_length( $i_opening_minus,
- $i_effective_last_comma + 1 ) > 0;
- }
- }
-
- # Always break lists contained in '[' and '{' if too long for 1 line,
- # and always break lists which are too long and part of a more complex
- # structure.
- my $must_break_open_container = $must_break_open
- || ( $too_long
- && ( $in_hierarchical_list || $opening_token ne '(' ) );
-
-#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
-
- #---------------------------------------------------------------
- # The main decision:
- # Now decide if we will align the data into aligned columns. Do not
- # attempt to align columns if this is a tiny table or it would be
- # too spaced. It seems that the more packed lines we have, the
- # sparser the list that can be allowed and still look ok.
- #---------------------------------------------------------------
-
- if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
- || ( $formatted_lines < 2 )
- || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
- )
- {
-
- #---------------------------------------------------------------
- # too sparse: would look ugly if aligned in a table;
- #---------------------------------------------------------------
-
- # use old breakpoints if this is a 'big' list
- # FIXME: goal is to improve set_ragged_breakpoints so that
- # this is not necessary.
- if ( $packed_lines > 2 && $item_count > 10 ) {
- write_logfile_entry("List sparse: using old breakpoints\n");
- copy_old_breakpoints( $i_first_comma, $i_last_comma );
- }
-
- # let the continuation logic handle it if 2 lines
- else {
-
- my $break_count = set_ragged_breakpoints( \@i_term_comma,
- $ri_ragged_break_list );
- ++$break_count if ($use_separate_first_term);
-
- unless ($must_break_open_container) {
- if ( $break_count <= 1 ) {
- ${$rdo_not_break_apart} = 1;
- }
- elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
- {
- ${$rdo_not_break_apart} = 1;
- }
- }
- set_non_alignment_flags( $comma_count, $rcomma_index );
- }
- return;
- }
-
- #---------------------------------------------------------------
- # go ahead and format as a table
- #---------------------------------------------------------------
- write_logfile_entry(
- "List: auto formatting with $number_of_fields fields/row\n");
-
- my $j_first_break =
- $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
-
- for (
- my $j = $j_first_break ;
- $j < $comma_count ;
- $j += $number_of_fields
- )
- {
- my $i = $rcomma_index->[$j];
- set_forced_breakpoint($i);
- }
- return;
- }
-}
-
-sub set_non_alignment_flags {
-
- # set flag which indicates that these commas should not be
- # aligned
- my ( $comma_count, $rcomma_index ) = @_;
- foreach ( 0 .. $comma_count - 1 ) {
- $matching_token_to_go[ $rcomma_index->[$_] ] = 1;
- }
- return;
-}
-
-sub study_list_complexity {
-
- # Look for complex tables which should be formatted with one term per line.
- # Returns the following:
- #
- # \@i_ragged_break_list = list of good breakpoints to avoid lines
- # which are hard to read
- # $number_of_fields_best = suggested number of fields based on
- # complexity; = 0 if any number may be used.
- #
- my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
- my $item_count = @{$ri_term_begin};
- my $complex_item_count = 0;
- my $number_of_fields_best = $rOpts_maximum_fields_per_table;
- my $i_max = @{$ritem_lengths} - 1;
- ##my @item_complexity;
-
- my $i_last_last_break = -3;
- my $i_last_break = -2;
- my @i_ragged_break_list;
-
- my $definitely_complex = 30;
- my $definitely_simple = 12;
- my $quote_count = 0;
-
- for my $i ( 0 .. $i_max ) {
- my $ib = $ri_term_begin->[$i];
- my $ie = $ri_term_end->[$i];
-
- # define complexity: start with the actual term length
- my $weighted_length = ( $ritem_lengths->[$i] - 2 );
-
- ##TBD: join types here and check for variations
- ##my $str=join "", @tokens_to_go[$ib..$ie];
-
- my $is_quote = 0;
- if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
- $is_quote = 1;
- $quote_count++;
- }
- elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
- $quote_count++;
- }
-
- if ( $ib eq $ie ) {
- if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
- $complex_item_count++;
- $weighted_length *= 2;
- }
- else {
- }
- }
- else {
- if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
- $complex_item_count++;
- $weighted_length *= 2;
- }
- if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
- $weighted_length += 4;
- }
- }
-
- # add weight for extra tokens.
- $weighted_length += 2 * ( $ie - $ib );
-
-## my $BUB = join '', @tokens_to_go[$ib..$ie];
-## print "# COMPLEXITY:$weighted_length $BUB\n";
-
-##push @item_complexity, $weighted_length;
-
- # now mark a ragged break after this item it if it is 'long and
- # complex':
- if ( $weighted_length >= $definitely_complex ) {
-
- # if we broke after the previous term
- # then break before it too
- if ( $i_last_break == $i - 1
- && $i > 1
- && $i_last_last_break != $i - 2 )
- {
-
- ## FIXME: don't strand a small term
- pop @i_ragged_break_list;
- push @i_ragged_break_list, $i - 2;
- push @i_ragged_break_list, $i - 1;
- }
-
- push @i_ragged_break_list, $i;
- $i_last_last_break = $i_last_break;
- $i_last_break = $i;
- }
-
- # don't break before a small last term -- it will
- # not look good on a line by itself.
- elsif ($i == $i_max
- && $i_last_break == $i - 1
- && $weighted_length <= $definitely_simple )
- {
- pop @i_ragged_break_list;
- }
- }
-
- my $identifier_count = $i_max + 1 - $quote_count;
-
- # Need more tuning here..
- if ( $max_width > 12
- && $complex_item_count > $item_count / 2
- && $number_of_fields_best != 2 )
- {
- $number_of_fields_best = 1;
- }
-
- return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
-}
-
-sub get_maximum_fields_wanted {
-
- # Not all tables look good with more than one field of items.
- # This routine looks at a table and decides if it should be
- # formatted with just one field or not.
- # This coding is still under development.
- my ($ritem_lengths) = @_;
-
- my $number_of_fields_best = 0;
-
- # For just a few items, we tentatively assume just 1 field.
- my $item_count = @{$ritem_lengths};
- if ( $item_count <= 5 ) {
- $number_of_fields_best = 1;
- }
-
- # For larger tables, look at it both ways and see what looks best
- else {
-
- my $is_odd = 1;
- my @max_length = ( 0, 0 );
- my @last_length_2 = ( undef, undef );
- my @first_length_2 = ( undef, undef );
- my $last_length = undef;
- my $total_variation_1 = 0;
- my $total_variation_2 = 0;
- my @total_variation_2 = ( 0, 0 );
- foreach my $j ( 0 .. $item_count - 1 ) {
-
- $is_odd = 1 - $is_odd;
- my $length = $ritem_lengths->[$j];
- if ( $length > $max_length[$is_odd] ) {
- $max_length[$is_odd] = $length;
- }
-
- if ( defined($last_length) ) {
- my $dl = abs( $length - $last_length );
- $total_variation_1 += $dl;
- }
- $last_length = $length;
-
- my $ll = $last_length_2[$is_odd];
- if ( defined($ll) ) {
- my $dl = abs( $length - $ll );
- $total_variation_2[$is_odd] += $dl;
- }
- else {
- $first_length_2[$is_odd] = $length;
- }
- $last_length_2[$is_odd] = $length;
- }
- $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
-
- my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
- unless ( $total_variation_2 < $factor * $total_variation_1 ) {
- $number_of_fields_best = 1;
- }
- }
- return ($number_of_fields_best);
-}
-
-sub table_columns_available {
- my $i_first_comma = shift;
- my $columns =
- maximum_line_length($i_first_comma) -
- leading_spaces_to_go($i_first_comma);
-
- # Patch: the vertical formatter does not line up lines whose lengths
- # exactly equal the available line length because of allowances
- # that must be made for side comments. Therefore, the number of
- # available columns is reduced by 1 character.
- $columns -= 1;
- return $columns;
-}
-
-sub maximum_number_of_fields {
-
- # how many fields will fit in the available space?
- my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
- my $max_pairs = int( $columns / $pair_width );
- my $number_of_fields = $max_pairs * 2;
- if ( $odd_or_even == 1
- && $max_pairs * $pair_width + $max_width <= $columns )
- {
- $number_of_fields++;
- }
- return $number_of_fields;
-}
-
-sub compactify_table {
-
- # given a table with a certain number of fields and a certain number
- # of lines, see if reducing the number of fields will make it look
- # better.
- my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
- if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
- my $min_fields;
-
- for (
- $min_fields = $number_of_fields ;
- $min_fields >= $odd_or_even
- && $min_fields * $formatted_lines >= $item_count ;
- $min_fields -= $odd_or_even
- )
- {
- $number_of_fields = $min_fields;
- }
- }
- return $number_of_fields;
-}
-
-sub set_ragged_breakpoints {
-
- # Set breakpoints in a list that cannot be formatted nicely as a
- # table.
- my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
-
- my $break_count = 0;
- foreach ( @{$ri_ragged_break_list} ) {
- my $j = $ri_term_comma->[$_];
- if ($j) {
- set_forced_breakpoint($j);
- $break_count++;
- }
- }
- return $break_count;
-}
-
-sub copy_old_breakpoints {
- my ( $i_first_comma, $i_last_comma ) = @_;
- for my $i ( $i_first_comma .. $i_last_comma ) {
- if ( $old_breakpoint_to_go[$i] ) {
- set_forced_breakpoint($i);
- }
- }
- return;
-}
-
-sub set_nobreaks {
- my ( $i, $j ) = @_;
- if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
-
- FORMATTER_DEBUG_FLAG_NOBREAK && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
- };
-
- @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
- }
-
- # shouldn't happen; non-critical error
- else {
- FORMATTER_DEBUG_FLAG_NOBREAK && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
- "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
- };
- }
- return;
-}
-
-sub set_fake_breakpoint {
-
- # Just bump up the breakpoint count as a signal that there are breaks.
- # This is useful if we have breaks but may want to postpone deciding where
- # to make them.
- $forced_breakpoint_count++;
- return;
-}
-
-sub set_forced_breakpoint {
- my $i = shift;
-
- return unless defined $i && $i >= 0;
-
- # no breaks between welded tokens
- return if ( weld_len_right_to_go($i) );
-
- # when called with certain tokens, use bond strengths to decide
- # if we break before or after it
- my $token = $tokens_to_go[$i];
-
- if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
- if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
- }
-
- # breaks are forced before 'if' and 'unless'
- elsif ( $is_if_unless{$token} ) { $i-- }
-
- if ( $i >= 0 && $i <= $max_index_to_go ) {
- my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
-
- FORMATTER_DEBUG_FLAG_FORCE && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
- };
-
- if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
- $forced_breakpoint_to_go[$i_nonblank] = 1;
-
- if ( $i_nonblank > $index_max_forced_break ) {
- $index_max_forced_break = $i_nonblank;
- }
- $forced_breakpoint_count++;
- $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
- $i_nonblank;
-
- # if we break at an opening container..break at the closing
- if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
- set_closing_breakpoint($i_nonblank);
- }
- }
- }
- return;
-}
-
-sub clear_breakpoint_undo_stack {
- $forced_breakpoint_undo_count = 0;
- return;
-}
-
-sub undo_forced_breakpoint_stack {
-
- my $i_start = shift;
- if ( $i_start < 0 ) {
- $i_start = 0;
- my ( $a, $b, $c ) = caller();
- warning(
-"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
- );
- }
-
- while ( $forced_breakpoint_undo_count > $i_start ) {
- my $i =
- $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
- if ( $i >= 0 && $i <= $max_index_to_go ) {
- $forced_breakpoint_to_go[$i] = 0;
- $forced_breakpoint_count--;
-
- FORMATTER_DEBUG_FLAG_UNDOBP && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
- };
- }
-
- # shouldn't happen, but not a critical error
- else {
- FORMATTER_DEBUG_FLAG_UNDOBP && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
- };
- }
- }
- return;
-}
-
-{ # begin recombine_breakpoints
-
- my %is_amp_amp;
- my %is_ternary;
- my %is_math_op;
- my %is_plus_minus;
- my %is_mult_div;
-
- BEGIN {
-
- my @q;
- @q = qw( && || );
- @is_amp_amp{@q} = (1) x scalar(@q);
-
- @q = qw( ? : );
- @is_ternary{@q} = (1) x scalar(@q);
-
- @q = qw( + - * / );
- @is_math_op{@q} = (1) x scalar(@q);
-
- @q = qw( + - );
- @is_plus_minus{@q} = (1) x scalar(@q);
-
- @q = qw( * / );
- @is_mult_div{@q} = (1) x scalar(@q);
- }
-
- sub DUMP_BREAKPOINTS {
-
- # Debug routine to dump current breakpoints...not normally called
- # We are given indexes to the current lines:
- # $ri_beg = ref to array of BEGinning indexes of each line
- # $ri_end = ref to array of ENDing indexes of each line
- my ( $ri_beg, $ri_end, $msg ) = @_;
- print STDERR "----Dumping breakpoints from: $msg----\n";
- for my $n ( 0 .. @{$ri_end} - 1 ) {
- my $ibeg = $ri_beg->[$n];
- my $iend = $ri_end->[$n];
- my $text = "";
- foreach my $i ( $ibeg .. $iend ) {
- $text .= $tokens_to_go[$i];
- }
- print STDERR "$n ($ibeg:$iend) $text\n";
- }
- print STDERR "----\n";
- return;
- }
-
- sub unmask_phantom_semicolons {
-
- my ( $self, $ri_beg, $ri_end ) = @_;
-
- # Walk down the lines of this batch and unmask any invisible line-ending
- # semicolons. They were placed by sub respace_tokens but we only now
- # know if we actually need them.
-
- my $nmax = @{$ri_end} - 1;
- foreach my $n ( 0 .. $nmax ) {
-
- my $i = $ri_end->[$n];
- if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
-
- $tokens_to_go[$i] = $rtoken_vars_to_go[$i]->[_TOKEN_] =
- $want_left_space{';'} == WS_NO ? ';' : ' ;';
- my $line_number = $rtoken_vars_to_go[$i]->[_LINE_INDEX_] + 1;
- note_added_semicolon($line_number);
- }
- }
- return;
- }
-
- sub recombine_breakpoints {
-
- # sub set_continuation_breaks is very liberal in setting line breaks
- # for long lines, always setting breaks at good breakpoints, even
- # when that creates small lines. Sometimes small line fragments
- # are produced which would look better if they were combined.
- # That's the task of this routine.
- #
- # We are given indexes to the current lines:
- # $ri_beg = ref to array of BEGinning indexes of each line
- # $ri_end = ref to array of ENDing indexes of each line
- my ( $ri_beg, $ri_end ) = @_;
-
- # Make a list of all good joining tokens between the lines
- # n-1 and n.
- my @joint;
- my $nmax = @{$ri_end} - 1;
- for my $n ( 1 .. $nmax ) {
- my $ibeg_1 = $ri_beg->[ $n - 1 ];
- my $iend_1 = $ri_end->[ $n - 1 ];
- my $iend_2 = $ri_end->[$n];
- my $ibeg_2 = $ri_beg->[$n];
-
- my ( $itok, $itokp, $itokm );
-
- foreach my $itest ( $iend_1, $ibeg_2 ) {
- my $type = $types_to_go[$itest];
- if ( $is_math_op{$type}
- || $is_amp_amp{$type}
- || $is_assignment{$type}
- || $type eq ':' )
- {
- $itok = $itest;
- }
- }
- $joint[$n] = [$itok];
- }
-
- my $more_to_do = 1;
-
- # We keep looping over all of the lines of this batch
- # until there are no more possible recombinations
- my $nmax_last = @{$ri_end};
- my $reverse = 0;
- while ($more_to_do) {
- my $n_best = 0;
- my $bs_best;
- my $nmax = @{$ri_end} - 1;
-
- # Safety check for infinite loop
- unless ( $nmax < $nmax_last ) {
-
- # Shouldn't happen because splice below decreases nmax on each
- # pass.
- Fault("Program bug-infinite loop in recombine breakpoints\n");
- }
- $nmax_last = $nmax;
- $more_to_do = 0;
- my $skip_Section_3;
- my $leading_amp_count = 0;
- my $this_line_is_semicolon_terminated;
-
- # loop over all remaining lines in this batch
- for my $iter ( 1 .. $nmax ) {
-
- # alternating sweep direction gives symmetric results
- # for recombining lines which exceed the line length
- # such as eval {{{{.... }}}}
- my $n;
- if ($reverse) { $n = 1 + $nmax - $iter; }
- else { $n = $iter }
-
- #----------------------------------------------------------
- # If we join the current pair of lines,
- # line $n-1 will become the left part of the joined line
- # line $n will become the right part of the joined line
- #
- # Here are Indexes of the endpoint tokens of the two lines:
- #
- # -----line $n-1--- | -----line $n-----
- # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
- # ^
- # |
- # We want to decide if we should remove the line break
- # between the tokens at $iend_1 and $ibeg_2
- #
- # We will apply a number of ad-hoc tests to see if joining
- # here will look ok. The code will just issue a 'next'
- # command if the join doesn't look good. If we get through
- # the gauntlet of tests, the lines will be recombined.
- #----------------------------------------------------------
- #
- # beginning and ending tokens of the lines we are working on
- my $ibeg_1 = $ri_beg->[ $n - 1 ];
- my $iend_1 = $ri_end->[ $n - 1 ];
- my $iend_2 = $ri_end->[$n];
- my $ibeg_2 = $ri_beg->[$n];
- my $ibeg_nmax = $ri_beg->[$nmax];
-
- # combined line cannot be too long
- my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 );
- next if ( $excess > 0 );
-
- my $type_iend_1 = $types_to_go[$iend_1];
- my $type_iend_2 = $types_to_go[$iend_2];
- my $type_ibeg_1 = $types_to_go[$ibeg_1];
- my $type_ibeg_2 = $types_to_go[$ibeg_2];
-
- # terminal token of line 2 if any side comment is ignored:
- my $iend_2t = $iend_2;
- my $type_iend_2t = $type_iend_2;
-
- # some beginning indexes of other lines, which may not exist
- my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
- my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
- my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
-
- my $bs_tweak = 0;
-
- #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
- # $nesting_depth_to_go[$ibeg_1] );
-
- FORMATTER_DEBUG_FLAG_RECOMBINE && do {
- print STDERR
-"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
- };
-
- # If line $n is the last line, we set some flags and
- # do any special checks for it
- if ( $n == $nmax ) {
-
- # a terminal '{' should stay where it is
- next if $type_ibeg_2 eq '{';
-
- if ( $type_iend_2 eq '#'
- && $iend_2 - $ibeg_2 >= 2
- && $types_to_go[ $iend_2 - 1 ] eq 'b' )
- {
- $iend_2t = $iend_2 - 2;
- $type_iend_2t = $types_to_go[$iend_2t];
- }
-
- $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
- }
-
- #----------------------------------------------------------
- # Recombine Section 0:
- # Examine the special token joining this line pair, if any.
- # Put as many tests in this section to avoid duplicate code and
- # to make formatting independent of whether breaks are to the
- # left or right of an operator.
- #----------------------------------------------------------
-
- my ($itok) = @{ $joint[$n] };
- if ($itok) {
-
- # FIXME: Patch - may not be necessary
- my $iend_1 =
- $type_iend_1 eq 'b'
- ? $iend_1 - 1
- : $iend_1;
-
- my $iend_2 =
- $type_iend_2 eq 'b'
- ? $iend_2 - 1
- : $iend_2;
- ## END PATCH
-
- my $type = $types_to_go[$itok];
-
- if ( $type eq ':' ) {
-
- # do not join at a colon unless it disobeys the break request
- if ( $itok eq $iend_1 ) {
- next unless $want_break_before{$type};
- }
- else {
- $leading_amp_count++;
- next if $want_break_before{$type};
- }
- } ## end if ':'
-
- # handle math operators + - * /
- elsif ( $is_math_op{$type} ) {
-
- # Combine these lines if this line is a single
- # number, or if it is a short term with same
- # operator as the previous line. For example, in
- # the following code we will combine all of the
- # short terms $A, $B, $C, $D, $E, $F, together
- # instead of leaving them one per line:
- # my $time =
- # $A * $B * $C * $D * $E * $F *
- # ( 2. * $eps * $sigma * $area ) *
- # ( 1. / $tcold**3 - 1. / $thot**3 );
-
- # This can be important in math-intensive code.
-
- my $good_combo;
-
- my $itokp = min( $inext_to_go[$itok], $iend_2 );
- my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
- my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
- my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
-
- # check for a number on the right
- if ( $types_to_go[$itokp] eq 'n' ) {
-
- # ok if nothing else on right
- if ( $itokp == $iend_2 ) {
- $good_combo = 1;
- }
- else {
-
- # look one more token to right..
- # okay if math operator or some termination
- $good_combo =
- ( ( $itokpp == $iend_2 )
- && $is_math_op{ $types_to_go[$itokpp] } )
- || $types_to_go[$itokpp] =~ /^[#,;]$/;
- }
- }
-
- # check for a number on the left
- if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
-
- # okay if nothing else to left
- if ( $itokm == $ibeg_1 ) {
- $good_combo = 1;
- }
-
- # otherwise look one more token to left
- else {
-
- # okay if math operator, comma, or assignment
- $good_combo = ( $itokmm == $ibeg_1 )
- && ( $is_math_op{ $types_to_go[$itokmm] }
- || $types_to_go[$itokmm] =~ /^[,]$/
- || $is_assignment{ $types_to_go[$itokmm] }
- );
- }
- }
-
- # look for a single short token either side of the
- # operator
- if ( !$good_combo ) {
-
- # Slight adjustment factor to make results
- # independent of break before or after operator in
- # long summed lists. (An operator and a space make
- # two spaces).
- my $two = ( $itok eq $iend_1 ) ? 2 : 0;
-
- $good_combo =
-
- # numbers or id's on both sides of this joint
- $types_to_go[$itokp] =~ /^[in]$/
- && $types_to_go[$itokm] =~ /^[in]$/
-
- # one of the two lines must be short:
- && (
- (
- # no more than 2 nonblank tokens right of
- # joint
- $itokpp == $iend_2
-
- # short
- && token_sequence_length( $itokp, $iend_2 )
- < $two +
- $rOpts_short_concatenation_item_length
- )
- || (
- # no more than 2 nonblank tokens left of
- # joint
- $itokmm == $ibeg_1
-
- # short
- && token_sequence_length( $ibeg_1, $itokm )
- < 2 - $two +
- $rOpts_short_concatenation_item_length
- )
-
- )
-
- # keep pure terms; don't mix +- with */
- && !(
- $is_plus_minus{$type}
- && ( $is_mult_div{ $types_to_go[$itokmm] }
- || $is_mult_div{ $types_to_go[$itokpp] } )
- )
- && !(
- $is_mult_div{$type}
- && ( $is_plus_minus{ $types_to_go[$itokmm] }
- || $is_plus_minus{ $types_to_go[$itokpp] } )
- )
-
- ;
- }
-
- # it is also good to combine if we can reduce to 2 lines
- if ( !$good_combo ) {
-
- # index on other line where same token would be in a
- # long chain.
- my $iother =
- ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
-
- $good_combo =
- $n == 2
- && $n == $nmax
- && $types_to_go[$iother] ne $type;
- }
-
- next unless ($good_combo);
-
- } ## end math
-
- elsif ( $is_amp_amp{$type} ) {
- ##TBD
- } ## end &&, ||
-
- elsif ( $is_assignment{$type} ) {
- ##TBD
- } ## end assignment
- }
-
- #----------------------------------------------------------
- # Recombine Section 1:
- # Join welded nested containers immediately
- # use alternating sweep direction until all are welds
- # are done. This produces more symmetric opening and
- # closing joins when lines exceed line length.
- #----------------------------------------------------------
- if ( weld_len_right_to_go($iend_1)
- || weld_len_left_to_go($ibeg_2) )
- {
- $n_best = $n;
- $reverse = 1 - $reverse;
- last;
- }
- $reverse = 0;
-
- #----------------------------------------------------------
- # Recombine Section 2:
- # Examine token at $iend_1 (right end of first line of pair)
- #----------------------------------------------------------
-
- # an isolated '}' may join with a ';' terminated segment
- if ( $type_iend_1 eq '}' ) {
-
- # Check for cases where combining a semicolon terminated
- # statement with a previous isolated closing paren will
- # allow the combined line to be outdented. This is
- # generally a good move. For example, we can join up
- # the last two lines here:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # )
- # = stat($file);
- #
- # to get:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # ) = stat($file);
- #
- # which makes the parens line up.
- #
- # Another example, from Joe Matarazzo, probably looks best
- # with the 'or' clause appended to the trailing paren:
- # $self->some_method(
- # PARAM1 => 'foo',
- # PARAM2 => 'bar'
- # ) or die "Some_method didn't work";
- #
- # But we do not want to do this for something like the -lp
- # option where the paren is not outdentable because the
- # trailing clause will be far to the right.
- #
- # The logic here is synchronized with the logic in sub
- # sub set_adjusted_indentation, which actually does
- # the outdenting.
- #
- $skip_Section_3 ||= $this_line_is_semicolon_terminated
-
- # only one token on last line
- && $ibeg_1 == $iend_1
-
- # must be structural paren
- && $tokens_to_go[$iend_1] eq ')'
-
- # style must allow outdenting,
- && !$closing_token_indentation{')'}
-
- # only leading '&&', '||', and ':' if no others seen
- # (but note: our count made below could be wrong
- # due to intervening comments)
- && ( $leading_amp_count == 0
- || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
-
- # but leading colons probably line up with a
- # previous colon or question (count could be wrong).
- && $type_ibeg_2 ne ':'
-
- # only one step in depth allowed. this line must not
- # begin with a ')' itself.
- && ( $nesting_depth_to_go[$iend_1] ==
- $nesting_depth_to_go[$iend_2] + 1 );
-
- # YVES patch 2 of 2:
- # Allow cuddled eval chains, like this:
- # eval {
- # #STUFF;
- # 1; # return true
- # } or do {
- # #handle error
- # };
- # This patch works together with a patch in
- # setting adjusted indentation (where the closing eval
- # brace is outdented if possible).
- # The problem is that an 'eval' block has continuation
- # indentation and it looks better to undo it in some
- # cases. If we do not use this patch we would get:
- # eval {
- # #STUFF;
- # 1; # return true
- # }
- # or do {
- # #handle error
- # };
- # The alternative, for uncuddled style, is to create
- # a patch in set_adjusted_indentation which undoes
- # the indentation of a leading line like 'or do {'.
- # This doesn't work well with -icb through
- if (
- $block_type_to_go[$iend_1] eq 'eval'
- && !$rOpts->{'line-up-parentheses'}
- && !$rOpts->{'indent-closing-brace'}
- && $tokens_to_go[$iend_2] eq '{'
- && (
- ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
- || ( $type_ibeg_2 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_2] } )
- || $is_if_unless{ $tokens_to_go[$ibeg_2] }
- )
- )
- {
- $skip_Section_3 ||= 1;
- }
-
- next
- unless (
- $skip_Section_3
-
- # handle '.' and '?' specially below
- || ( $type_ibeg_2 =~ /^[\.\?]$/ )
- );
- }
-
- elsif ( $type_iend_1 eq '{' ) {
-
- # YVES
- # honor breaks at opening brace
- # Added to prevent recombining something like this:
- # } || eval { package main;
- next if $forced_breakpoint_to_go[$iend_1];
- }
-
- # do not recombine lines with ending &&, ||,
- elsif ( $is_amp_amp{$type_iend_1} ) {
- next unless $want_break_before{$type_iend_1};
- }
-
- # Identify and recombine a broken ?/: chain
- elsif ( $type_iend_1 eq '?' ) {
-
- # Do not recombine different levels
- next
- if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
-
- # do not recombine unless next line ends in :
- next unless $type_iend_2 eq ':';
- }
-
- # for lines ending in a comma...
- elsif ( $type_iend_1 eq ',' ) {
-
- # Do not recombine at comma which is following the
- # input bias.
- # TODO: might be best to make a special flag
- next if ( $old_breakpoint_to_go[$iend_1] );
-
- # an isolated '},' may join with an identifier + ';'
- # this is useful for the class of a 'bless' statement (bless.t)
- if ( $type_ibeg_1 eq '}'
- && $type_ibeg_2 eq 'i' )
- {
- next
- unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
- && ( $iend_2 == ( $ibeg_2 + 1 ) )
- && $this_line_is_semicolon_terminated );
-
- # override breakpoint
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
-
- # but otherwise ..
- else {
-
- # do not recombine after a comma unless this will leave
- # just 1 more line
- next unless ( $n + 1 >= $nmax );
-
- # do not recombine if there is a change in indentation depth
- next
- if (
- $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
-
- # do not recombine a "complex expression" after a
- # comma. "complex" means no parens.
- my $saw_paren;
- foreach my $ii ( $ibeg_2 .. $iend_2 ) {
- if ( $tokens_to_go[$ii] eq '(' ) {
- $saw_paren = 1;
- last;
- }
- }
- next if $saw_paren;
- }
- }
-
- # opening paren..
- elsif ( $type_iend_1 eq '(' ) {
-
- # No longer doing this
- }
-
- elsif ( $type_iend_1 eq ')' ) {
-
- # No longer doing this
- }
-
- # keep a terminal for-semicolon
- elsif ( $type_iend_1 eq 'f' ) {
- next;
- }
-
- # if '=' at end of line ...
- elsif ( $is_assignment{$type_iend_1} ) {
-
- # keep break after = if it was in input stream
- # this helps prevent 'blinkers'
- next if $old_breakpoint_to_go[$iend_1]
-
- # don't strand an isolated '='
- && $iend_1 != $ibeg_1;
-
- my $is_short_quote =
- ( $type_ibeg_2 eq 'Q'
- && $ibeg_2 == $iend_2
- && token_sequence_length( $ibeg_2, $ibeg_2 ) <
- $rOpts_short_concatenation_item_length );
- my $is_ternary =
- ( $type_ibeg_1 eq '?'
- && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
-
- # always join an isolated '=', a short quote, or if this
- # will put ?/: at start of adjacent lines
- if ( $ibeg_1 != $iend_1
- && !$is_short_quote
- && !$is_ternary )
- {
- next
- unless (
- (
-
- # unless we can reduce this to two lines
- $nmax < $n + 2
-
- # or three lines, the last with a leading semicolon
- || ( $nmax == $n + 2
- && $types_to_go[$ibeg_nmax] eq ';' )
-
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
-
- # or the next line ends in an open paren or brace
- # and the break hasn't been forced [dima.t]
- || ( !$forced_breakpoint_to_go[$iend_1]
- && $type_iend_2 eq '{' )
- )
-
- # do not recombine if the two lines might align well
- # this is a very approximate test for this
- && ( $ibeg_3 >= 0
- && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
- );
-
- if (
-
- # Recombine if we can make two lines
- $nmax >= $n + 2
-
- # -lp users often prefer this:
- # my $title = function($env, $env, $sysarea,
- # "bubba Borrower Entry");
- # so we will recombine if -lp is used we have
- # ending comma
- && ( !$rOpts_line_up_parentheses
- || $type_iend_2 ne ',' )
- )
- {
-
- # otherwise, scan the rhs line up to last token for
- # complexity. Note that we are not counting the last
- # token in case it is an opening paren.
- my $tv = 0;
- my $depth = $nesting_depth_to_go[$ibeg_2];
- foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 1 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
-
- # ok to recombine if no level changes before last token
- if ( $tv > 0 ) {
-
- # otherwise, do not recombine if more than two
- # level changes.
- next if ( $tv > 1 );
-
- # check total complexity of the two adjacent lines
- # that will occur if we do this join
- my $istop =
- ( $n < $nmax )
- ? $ri_end->[ $n + 1 ]
- : $iend_2;
- foreach my $i ( $iend_2 .. $istop ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 2 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
-
- # do not recombine if total is more than 2 level changes
- next if ( $tv > 2 );
- }
- }
- }
-
- unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
- }
-
- # for keywords..
- elsif ( $type_iend_1 eq 'k' ) {
-
- # make major control keywords stand out
- # (recombine.t)
- next
- if (
-
- #/^(last|next|redo|return)$/
- $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
-
- # but only if followed by multiple lines
- && $n < $nmax
- );
-
- if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
- next
- unless $want_break_before{ $tokens_to_go[$iend_1] };
- }
- }
-
- #----------------------------------------------------------
- # Recombine Section 3:
- # Examine token at $ibeg_2 (left end of second line of pair)
- #----------------------------------------------------------
-
- # join lines identified above as capable of
- # causing an outdented line with leading closing paren
- # Note that we are skipping the rest of this section
- # and the rest of the loop to do the join
- if ($skip_Section_3) {
- $forced_breakpoint_to_go[$iend_1] = 0;
- $n_best = $n;
- last;
- }
-
- # handle lines with leading &&, ||
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
-
- $leading_amp_count++;
-
- # ok to recombine if it follows a ? or :
- # and is followed by an open paren..
- my $ok =
- ( $is_ternary{$type_ibeg_1}
- && $tokens_to_go[$iend_2] eq '(' )
-
- # or is followed by a ? or : at same depth
- #
- # We are looking for something like this. We can
- # recombine the && line with the line above to make the
- # structure more clear:
- # return
- # exists $G->{Attr}->{V}
- # && exists $G->{Attr}->{V}->{$u}
- # ? %{ $G->{Attr}->{V}->{$u} }
- # : ();
- #
- # We should probably leave something like this alone:
- # return
- # exists $G->{Attr}->{E}
- # && exists $G->{Attr}->{E}->{$u}
- # && exists $G->{Attr}->{E}->{$u}->{$v}
- # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
- # : ();
- # so that we either have all of the &&'s (or ||'s)
- # on one line, as in the first example, or break at
- # each one as in the second example. However, it
- # sometimes makes things worse to check for this because
- # it prevents multiple recombinations. So this is not done.
- || ( $ibeg_3 >= 0
- && $is_ternary{ $types_to_go[$ibeg_3] }
- && $nesting_depth_to_go[$ibeg_3] ==
- $nesting_depth_to_go[$ibeg_2] );
-
- next if !$ok && $want_break_before{$type_ibeg_2};
- $forced_breakpoint_to_go[$iend_1] = 0;
-
- # tweak the bond strength to give this joint priority
- # over ? and :
- $bs_tweak = 0.25;
- }
-
- # Identify and recombine a broken ?/: chain
- elsif ( $type_ibeg_2 eq '?' ) {
-
- # Do not recombine different levels
- my $lev = $levels_to_go[$ibeg_2];
- next if ( $lev ne $levels_to_go[$ibeg_1] );
-
- # Do not recombine a '?' if either next line or
- # previous line does not start with a ':'. The reasons
- # are that (1) no alignment of the ? will be possible
- # and (2) the expression is somewhat complex, so the
- # '?' is harder to see in the interior of the line.
- my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
- my $precedes_colon =
- $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
- next unless ( $follows_colon || $precedes_colon );
-
- # we will always combining a ? line following a : line
- if ( !$follows_colon ) {
-
- # ...otherwise recombine only if it looks like a chain.
- # we will just look at a few nearby lines to see if
- # this looks like a chain.
- my $local_count = 0;
- foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
- $local_count++
- if $ii >= 0
- && $types_to_go[$ii] eq ':'
- && $levels_to_go[$ii] == $lev;
- }
- next unless ( $local_count > 1 );
- }
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
-
- # do not recombine lines with leading '.'
- elsif ( $type_ibeg_2 eq '.' ) {
- my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
- next
- unless (
-
- # ... unless there is just one and we can reduce
- # this to two lines if we do. For example, this
- #
- #
- # $bodyA .=
- # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
- #
- # looks better than this:
- # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
- # . '$args .= $pat;'
-
- (
- $n == 2
- && $n == $nmax
- && $type_ibeg_1 ne $type_ibeg_2
- )
-
- # ... or this would strand a short quote , like this
- # . "some long quote"
- # . "\n";
-
- || ( $types_to_go[$i_next_nonblank] eq 'Q'
- && $i_next_nonblank >= $iend_2 - 1
- && $token_lengths_to_go[$i_next_nonblank] <
- $rOpts_short_concatenation_item_length )
- );
- }
-
- # handle leading keyword..
- elsif ( $type_ibeg_2 eq 'k' ) {
-
- # handle leading "or"
- if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
-
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
-
- # important: only combine a very simple or
- # statement because the step below may have
- # combined a trailing 'and' with this or,
- # and we do not want to then combine
- # everything together
- && ( $iend_2 - $ibeg_2 <= 7 )
- )
- );
-
- #X: RT #81854
- $forced_breakpoint_to_go[$iend_1] = 0
- unless $old_breakpoint_to_go[$iend_1];
- }
-
- # handle leading 'and'
- elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
-
- # Decide if we will combine a single terminal 'and'
- # after an 'if' or 'unless'.
-
- # This looks best with the 'and' on the same
- # line as the 'if':
- #
- # $a = 1
- # if $seconds and $nu < 2;
- #
- # But this looks better as shown:
- #
- # $a = 1
- # if !$this->{Parents}{$_}
- # or $this->{Parents}{$_} eq $_;
- #
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
-
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
- || $tokens_to_go[$ibeg_1] eq 'or' )
- )
- );
- }
-
- # handle leading "if" and "unless"
- elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
-
- # FIXME: This is still experimental..may not be too useful
- next
- unless (
- $this_line_is_semicolon_terminated
-
- # previous line begins with 'and' or 'or'
- && $type_ibeg_1 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_1] }
-
- );
- }
-
- # handle all other leading keywords
- else {
-
- # keywords look best at start of lines,
- # but combine things like "1 while"
- unless ( $is_assignment{$type_iend_1} ) {
- next
- if ( ( $type_iend_1 ne 'k' )
- && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
- }
- }
- }
-
- # similar treatment of && and || as above for 'and' and 'or':
- # NOTE: This block of code is currently bypassed because
- # of a previous block but is retained for possible future use.
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
-
- # maybe looking at something like:
- # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
-
- next
- unless (
- $this_line_is_semicolon_terminated
-
- # previous line begins with an 'if' or 'unless' keyword
- && $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
-
- );
- }
-
- # handle line with leading = or similar
- elsif ( $is_assignment{$type_ibeg_2} ) {
- next unless ( $n == 1 || $n == $nmax );
- next if $old_breakpoint_to_go[$iend_1];
- next
- unless (
-
- # unless we can reduce this to two lines
- $nmax == 2
-
- # or three lines, the last with a leading semicolon
- || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
-
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
-
- # or this is a short line ending in ;
- || ( $n == $nmax && $this_line_is_semicolon_terminated )
- );
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
-
- #----------------------------------------------------------
- # Recombine Section 4:
- # Combine the lines if we arrive here and it is possible
- #----------------------------------------------------------
-
- # honor hard breakpoints
- next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
-
- my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
-
- # Require a few extra spaces before recombining lines if we are
- # at an old breakpoint unless this is a simple list or terminal
- # line. The goal is to avoid oscillating between two
- # quasi-stable end states. For example this snippet caused
- # problems:
-## my $this =
-## bless {
-## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
-## },
-## $type;
- next
- if ( $old_breakpoint_to_go[$iend_1]
- && !$this_line_is_semicolon_terminated
- && $n < $nmax
- && $excess + 4 > 0
- && $type_iend_2 ne ',' );
-
- # do not recombine if we would skip in indentation levels
- if ( $n < $nmax ) {
- my $if_next = $ri_beg->[ $n + 1 ];
- next
- if (
- $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
- && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
-
- # but an isolated 'if (' is undesirable
- && !(
- $n == 1
- && $iend_1 - $ibeg_1 <= 2
- && $type_ibeg_1 eq 'k'
- && $tokens_to_go[$ibeg_1] eq 'if'
- && $tokens_to_go[$iend_1] ne '('
- )
- );
- }
-
- # honor no-break's
- next if ( $bs >= NO_BREAK - 1 );
-
- # remember the pair with the greatest bond strength
- if ( !$n_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- else {
-
- if ( $bs > $bs_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- }
- }
-
- # recombine the pair with the greatest bond strength
- if ($n_best) {
- splice @{$ri_beg}, $n_best, 1;
- splice @{$ri_end}, $n_best - 1, 1;
- splice @joint, $n_best, 1;
-
- # keep going if we are still making progress
- $more_to_do++;
- }
- }
- return ( $ri_beg, $ri_end );
- }
-} # end recombine_breakpoints
-
-sub break_all_chain_tokens {
-
- # scan the current breakpoints looking for breaks at certain "chain
- # operators" (. : && || + etc) which often occur repeatedly in a long
- # statement. If we see a break at any one, break at all similar tokens
- # within the same container.
- #
- my ( $ri_left, $ri_right ) = @_;
-
- my %saw_chain_type;
- my %left_chain_type;
- my %right_chain_type;
- my %interior_chain_type;
- my $nmax = @{$ri_right} - 1;
-
- # scan the left and right end tokens of all lines
- my $count = 0;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- my $typel = $types_to_go[$il];
- my $typer = $types_to_go[$ir];
- $typel = '+' if ( $typel eq '-' ); # treat + and - the same
- $typer = '+' if ( $typer eq '-' );
- $typel = '*' if ( $typel eq '/' ); # treat * and / the same
- $typer = '*' if ( $typer eq '/' );
- my $tokenl = $tokens_to_go[$il];
- my $tokenr = $tokens_to_go[$ir];
-
- if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
- next if ( $typel eq '?' );
- push @{ $left_chain_type{$typel} }, $il;
- $saw_chain_type{$typel} = 1;
- $count++;
- }
- if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
- next if ( $typer eq '?' );
- push @{ $right_chain_type{$typer} }, $ir;
- $saw_chain_type{$typer} = 1;
- $count++;
- }
- }
- return unless $count;
-
- # now look for any interior tokens of the same types
- $count = 0;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- foreach my $i ( $il + 1 .. $ir - 1 ) {
- my $type = $types_to_go[$i];
- $type = '+' if ( $type eq '-' );
- $type = '*' if ( $type eq '/' );
- if ( $saw_chain_type{$type} ) {
- push @{ $interior_chain_type{$type} }, $i;
- $count++;
- }
- }
- }
- return unless $count;
-
- # now make a list of all new break points
- my @insert_list;
-
- # loop over all chain types
- foreach my $type ( keys %saw_chain_type ) {
-
- # quit if just ONE continuation line with leading . For example--
- # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
- # . $contents;
- last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
-
- # loop over all interior chain tokens
- foreach my $itest ( @{ $interior_chain_type{$type} } ) {
-
- # loop over all left end tokens of same type
- if ( $left_chain_type{$type} ) {
- next if $nobreak_to_go[ $itest - 1 ];
- foreach my $i ( @{ $left_chain_type{$type} } ) {
- next unless in_same_container( $i, $itest );
- push @insert_list, $itest - 1;
-
- # Break at matching ? if this : is at a different level.
- # For example, the ? before $THRf_DEAD in the following
- # should get a break if its : gets a break.
- #
- # my $flags =
- # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
- # : ( $_ & 4 ) ? $THRf_R_DETACHED
- # : $THRf_R_JOINABLE;
- if ( $type eq ':'
- && $levels_to_go[$i] != $levels_to_go[$itest] )
- {
- my $i_question = $mate_index_to_go[$itest];
- if ( $i_question > 0 ) {
- push @insert_list, $i_question - 1;
- }
- }
- last;
- }
- }
-
- # loop over all right end tokens of same type
- if ( $right_chain_type{$type} ) {
- next if $nobreak_to_go[$itest];
- foreach my $i ( @{ $right_chain_type{$type} } ) {
- next unless in_same_container( $i, $itest );
- push @insert_list, $itest;
-
- # break at matching ? if this : is at a different level
- if ( $type eq ':'
- && $levels_to_go[$i] != $levels_to_go[$itest] )
- {
- my $i_question = $mate_index_to_go[$itest];
- if ( $i_question >= 0 ) {
- push @insert_list, $i_question;
- }
- }
- last;
- }
- }
- }
- }
-
- # insert any new break points
- if (@insert_list) {
- insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
- }
- return;
-}
-
-sub break_equals {
-
- # Look for assignment operators that could use a breakpoint.
- # For example, in the following snippet
- #
- # $HOME = $ENV{HOME}
- # || $ENV{LOGDIR}
- # || $pw[7]
- # || die "no home directory for user $<";
- #
- # we could break at the = to get this, which is a little nicer:
- # $HOME =
- # $ENV{HOME}
- # || $ENV{LOGDIR}
- # || $pw[7]
- # || die "no home directory for user $<";
- #
- # The logic here follows the logic in set_logical_padding, which
- # will add the padding in the second line to improve alignment.
- #
- my ( $ri_left, $ri_right ) = @_;
- my $nmax = @{$ri_right} - 1;
- return unless ( $nmax >= 2 );
-
- # scan the left ends of first two lines
- my $tokbeg = "";
- my $depth_beg;
- for my $n ( 1 .. 2 ) {
- my $il = $ri_left->[$n];
- my $typel = $types_to_go[$il];
- my $tokenl = $tokens_to_go[$il];
-
- my $has_leading_op = ( $tokenl =~ /^\w/ )
- ? $is_chain_operator{$tokenl} # + - * / : ? && ||
- : $is_chain_operator{$typel}; # and, or
- return unless ($has_leading_op);
- if ( $n > 1 ) {
- return
- unless ( $tokenl eq $tokbeg
- && $nesting_depth_to_go[$il] eq $depth_beg );
- }
- $tokbeg = $tokenl;
- $depth_beg = $nesting_depth_to_go[$il];
- }
-
- # now look for any interior tokens of the same types
- my $il = $ri_left->[0];
- my $ir = $ri_right->[0];
-
- # now make a list of all new break points
- my @insert_list;
- for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
- my $type = $types_to_go[$i];
- if ( $is_assignment{$type}
- && $nesting_depth_to_go[$i] eq $depth_beg )
- {
- if ( $want_break_before{$type} ) {
- push @insert_list, $i - 1;
- }
- else {
- push @insert_list, $i;
- }
- }
- }
-
- # Break after a 'return' followed by a chain of operators
- # return ( $^O !~ /win32|dos/i )
- # && ( $^O ne 'VMS' )
- # && ( $^O ne 'OS2' )
- # && ( $^O ne 'MacOS' );
- # To give:
- # return
- # ( $^O !~ /win32|dos/i )
- # && ( $^O ne 'VMS' )
- # && ( $^O ne 'OS2' )
- # && ( $^O ne 'MacOS' );
- my $i = 0;
- if ( $types_to_go[$i] eq 'k'
- && $tokens_to_go[$i] eq 'return'
- && $ir > $il
- && $nesting_depth_to_go[$i] eq $depth_beg )
- {
- push @insert_list, $i;
- }
-
- return unless (@insert_list);
-
- # One final check...
- # scan second and third lines and be sure there are no assignments
- # we want to avoid breaking at an = to make something like this:
- # unless ( $icon =
- # $html_icons{"$type-$state"}
- # or $icon = $html_icons{$type}
- # or $icon = $html_icons{$state} )
- for my $n ( 1 .. 2 ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- foreach my $i ( $il + 1 .. $ir ) {
- my $type = $types_to_go[$i];
- return
- if ( $is_assignment{$type}
- && $nesting_depth_to_go[$i] eq $depth_beg );
- }
- }
-
- # ok, insert any new break point
- if (@insert_list) {
- insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
- }
- return;
-}
-
-sub insert_final_breaks {
-
- my ( $ri_left, $ri_right ) = @_;
-
- my $nmax = @{$ri_right} - 1;
-
- # scan the left and right end tokens of all lines
- my $count = 0;
- my $i_first_colon = -1;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- my $typel = $types_to_go[$il];
- my $typer = $types_to_go[$ir];
- return if ( $typel eq '?' );
- return if ( $typer eq '?' );
- if ( $typel eq ':' ) { $i_first_colon = $il; last; }
- elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
- }
-
- # For long ternary chains,
- # if the first : we see has its # ? is in the interior
- # of a preceding line, then see if there are any good
- # breakpoints before the ?.
- if ( $i_first_colon > 0 ) {
- my $i_question = $mate_index_to_go[$i_first_colon];
- if ( $i_question > 0 ) {
- my @insert_list;
- for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
- my $token = $tokens_to_go[$ii];
- my $type = $types_to_go[$ii];
-
- # For now, a good break is either a comma or a 'return'.
- if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
- && in_same_container( $ii, $i_question ) )
- {
- push @insert_list, $ii;
- last;
- }
- }
-
- # insert any new break points
- if (@insert_list) {
- insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
- }
- }
- }
- return;
-}
-
-sub in_same_container {
-
- # check to see if tokens at i1 and i2 are in the
- # same container, and not separated by a comma, ? or :
- my ( $i1, $i2 ) = @_;
- my $type = $types_to_go[$i1];
- my $depth = $nesting_depth_to_go[$i1];
- return unless ( $nesting_depth_to_go[$i2] == $depth );
- if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
-
- ###########################################################
- # This is potentially a very slow routine and not critical.
- # For safety just give up for large differences.
- # See test file 'infinite_loop.txt'
- # TODO: replace this loop with a data structure
- ###########################################################
- return if ( $i2 - $i1 > 200 );
-
- foreach my $i ( $i1 + 1 .. $i2 - 1 ) {
- next if ( $nesting_depth_to_go[$i] > $depth );
- return if ( $nesting_depth_to_go[$i] < $depth );
-
- my $tok = $tokens_to_go[$i];
- $tok = ',' if $tok eq '=>'; # treat => same as ,
-
- # Example: we would not want to break at any of these .'s
- # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
- if ( $type ne ':' ) {
- return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
- }
- else {
- return if ( $tok =~ /^[\,]$/ );
- }
- }
- return 1;
-}
-
-sub set_continuation_breaks {
-
- # Define an array of indexes for inserting newline characters to
- # keep the line lengths below the maximum desired length. There is
- # an implied break after the last token, so it need not be included.
-
- # Method:
- # This routine is part of series of routines which adjust line
- # lengths. It is only called if a statement is longer than the
- # maximum line length, or if a preliminary scanning located
- # desirable break points. Sub scan_list has already looked at
- # these tokens and set breakpoints (in array
- # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
- # after commas, after opening parens, and before closing parens).
- # This routine will honor these breakpoints and also add additional
- # breakpoints as necessary to keep the line length below the maximum
- # requested. It bases its decision on where the 'bond strength' is
- # lowest.
-
- # Output: returns references to the arrays:
- # @i_first
- # @i_last
- # which contain the indexes $i of the first and last tokens on each
- # line.
-
- # In addition, the array:
- # $forced_breakpoint_to_go[$i]
- # may be updated to be =1 for any index $i after which there must be
- # a break. This signals later routines not to undo the breakpoint.
-
- my $saw_good_break = shift;
- my @i_first = (); # the first index to output
- my @i_last = (); # the last index to output
- my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
- if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
-
- set_bond_strengths();
-
- my $imin = 0;
- my $imax = $max_index_to_go;
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- my $i_begin = $imin; # index for starting next iteration
-
- my $leading_spaces = leading_spaces_to_go($imin);
- my $line_count = 0;
- my $last_break_strength = NO_BREAK;
- my $i_last_break = -1;
- my $max_bias = 0.001;
- my $tiny_bias = 0.0001;
- my $leading_alignment_token = "";
- my $leading_alignment_type = "";
-
- # see if any ?/:'s are in order
- my $colons_in_order = 1;
- my $last_tok = "";
- my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ];
- my $colon_count = @colon_list;
- foreach (@colon_list) {
- if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
- $last_tok = $_;
- }
-
- # This is a sufficient but not necessary condition for colon chain
- my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
-
- #-------------------------------------------------------
- # BEGINNING of main loop to set continuation breakpoints
- # Keep iterating until we reach the end
- #-------------------------------------------------------
- while ( $i_begin <= $imax ) {
- my $lowest_strength = NO_BREAK;
- my $starting_sum = $summed_lengths_to_go[$i_begin];
- my $i_lowest = -1;
- my $i_test = -1;
- my $lowest_next_token = '';
- my $lowest_next_type = 'b';
- my $i_lowest_next_nonblank = -1;
-
- #-------------------------------------------------------
- # BEGINNING of inner loop to find the best next breakpoint
- #-------------------------------------------------------
- for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
- my $type = $types_to_go[$i_test];
- my $token = $tokens_to_go[$i_test];
- my $next_type = $types_to_go[ $i_test + 1 ];
- my $next_token = $tokens_to_go[ $i_test + 1 ];
- my $i_next_nonblank = $inext_to_go[$i_test];
- my $next_nonblank_type = $types_to_go[$i_next_nonblank];
- my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
- my $strength = $bond_strength_to_go[$i_test];
- my $maximum_line_length = maximum_line_length($i_begin);
-
- # use old breaks as a tie-breaker. For example to
- # prevent blinkers with -pbp in this code:
-
-##@keywords{
-## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
-## = ();
-
- # At the same time try to prevent a leading * in this code
- # with the default formatting:
- #
-## return
-## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
-## * ( $x**( $a - 1 ) )
-## * ( ( 1 - $x )**( $b - 1 ) );
-
- # reduce strength a bit to break ties at an old breakpoint ...
- if (
- $old_breakpoint_to_go[$i_test]
-
- # which is a 'good' breakpoint, meaning ...
- # we don't want to break before it
- && !$want_break_before{$type}
-
- # and either we want to break before the next token
- # or the next token is not short (i.e. not a '*', '/' etc.)
- && $i_next_nonblank <= $imax
- && ( $want_break_before{$next_nonblank_type}
- || $token_lengths_to_go[$i_next_nonblank] > 2
- || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
- )
- {
- $strength -= $tiny_bias;
- }
-
- # otherwise increase strength a bit if this token would be at the
- # maximum line length. This is necessary to avoid blinking
- # in the above example when the -iob flag is added.
- else {
- my $len =
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 1 ] -
- $starting_sum;
- if ( $len >= $maximum_line_length ) {
- $strength += $tiny_bias;
- }
- }
-
- my $must_break = 0;
-
- # Force an immediate break at certain operators
- # with lower level than the start of the line,
- # unless we've already seen a better break.
- #
- ##############################################
- # Note on an issue with a preceding ?
- ##############################################
- # We don't include a ? in the above list, but there may
- # be a break at a previous ? if the line is long.
- # Because of this we do not want to force a break if
- # there is a previous ? on this line. For now the best way
- # to do this is to not break if we have seen a lower strength
- # point, which is probably a ?.
- #
- # Example of unwanted breaks we are avoiding at a '.' following a ?
- # from pod2html using perltidy -gnu:
- # )
- # ? "\n<A NAME=\""
- # . $value
- # . "\">\n$text</A>\n"
- # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
- if (
- (
- $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
- || ( $next_nonblank_type eq 'k'
- && $next_nonblank_token =~ /^(and|or)$/ )
- )
- && ( $nesting_depth_to_go[$i_begin] >
- $nesting_depth_to_go[$i_next_nonblank] )
- && ( $strength <= $lowest_strength )
- )
- {
- set_forced_breakpoint($i_next_nonblank);
- }
-
- if (
-
- # Try to put a break where requested by scan_list
- $forced_breakpoint_to_go[$i_test]
-
- # break between ) { in a continued line so that the '{' can
- # be outdented
- # See similar logic in scan_list which catches instances
- # where a line is just something like ') {'. We have to
- # be careful because the corresponding block keyword might
- # not be on the first line, such as 'for' here:
- #
- # eval {
- # for ("a") {
- # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
- # }
- # };
- #
- || (
- $line_count
- && ( $token eq ')' )
- && ( $next_nonblank_type eq '{' )
- && ($next_nonblank_block_type)
- && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
-
- # RT #104427: Dont break before opening sub brace because
- # sub block breaks handled at higher level, unless
- # it looks like the preceeding list is long and broken
- && !(
- $next_nonblank_block_type =~ /^sub\b/
- && ( $nesting_depth_to_go[$i_begin] ==
- $nesting_depth_to_go[$i_next_nonblank] )
- )
-
- && !$rOpts->{'opening-brace-always-on-right'}
- )
-
- # There is an implied forced break at a terminal opening brace
- || ( ( $type eq '{' ) && ( $i_test == $imax ) )
- )
- {
-
- # Forced breakpoints must sometimes be overridden, for example
- # because of a side comment causing a NO_BREAK. It is easier
- # to catch this here than when they are set.
- if ( $strength < NO_BREAK - 1 ) {
- $strength = $lowest_strength - $tiny_bias;
- $must_break = 1;
- }
- }
-
- # quit if a break here would put a good terminal token on
- # the next line and we already have a possible break
- if (
- !$must_break
- && ( $next_nonblank_type =~ /^[\;\,]$/ )
- && (
- (
- $leading_spaces +
- $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
- $starting_sum
- ) > $maximum_line_length
- )
- )
- {
- last if ( $i_lowest >= 0 );
- }
-
- # Avoid a break which would strand a single punctuation
- # token. For example, we do not want to strand a leading
- # '.' which is followed by a long quoted string.
- # But note that we do want to do this with -extrude (l=1)
- # so please test any changes to this code on -extrude.
- if (
- !$must_break
- && ( $i_test == $i_begin )
- && ( $i_test < $imax )
- && ( $token eq $type )
- && (
- (
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 1 ] -
- $starting_sum
- ) < $maximum_line_length
- )
- )
- {
- $i_test = min( $imax, $inext_to_go[$i_test] );
- redo;
- }
-
- if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
- {
-
- # break at previous best break if it would have produced
- # a leading alignment of certain common tokens, and it
- # is different from the latest candidate break
- last
- if ($leading_alignment_type);
-
- # Force at least one breakpoint if old code had good
- # break It is only called if a breakpoint is required or
- # desired. This will probably need some adjustments
- # over time. A goal is to try to be sure that, if a new
- # side comment is introduced into formatted text, then
- # the same breakpoints will occur. scbreak.t
- last
- if (
- $i_test == $imax # we are at the end
- && !$forced_breakpoint_count #
- && $saw_good_break # old line had good break
- && $type =~ /^[#;\{]$/ # and this line ends in
- # ';' or side comment
- && $i_last_break < 0 # and we haven't made a break
- && $i_lowest >= 0 # and we saw a possible break
- && $i_lowest < $imax - 1 # (but not just before this ;)
- && $strength - $lowest_strength < 0.5 * WEAK # and it's good
- );
-
- # Do not skip past an important break point in a short final
- # segment. For example, without this check we would miss the
- # break at the final / in the following code:
- #
- # $depth_stop =
- # ( $tau * $mass_pellet * $q_0 *
- # ( 1. - exp( -$t_stop / $tau ) ) -
- # 4. * $pi * $factor * $k_ice *
- # ( $t_melt - $t_ice ) *
- # $r_pellet *
- # $t_stop ) /
- # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
- #
- if ( $line_count > 2
- && $i_lowest < $i_test
- && $i_test > $imax - 2
- && $nesting_depth_to_go[$i_begin] >
- $nesting_depth_to_go[$i_lowest]
- && $lowest_strength < $last_break_strength - .5 * WEAK )
- {
- # Make this break for math operators for now
- my $ir = $inext_to_go[$i_lowest];
- my $il = $iprev_to_go[$ir];
- last
- if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
- || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
- }
-
- # Update the minimum bond strength location
- $lowest_strength = $strength;
- $i_lowest = $i_test;
- $lowest_next_token = $next_nonblank_token;
- $lowest_next_type = $next_nonblank_type;
- $i_lowest_next_nonblank = $i_next_nonblank;
- last if $must_break;
-
- # set flags to remember if a break here will produce a
- # leading alignment of certain common tokens
- if ( $line_count > 0
- && $i_test < $imax
- && ( $lowest_strength - $last_break_strength <= $max_bias )
- )
- {
- my $i_last_end = $iprev_to_go[$i_begin];
- my $tok_beg = $tokens_to_go[$i_begin];
- my $type_beg = $types_to_go[$i_begin];
- if (
-
- # check for leading alignment of certain tokens
- (
- $tok_beg eq $next_nonblank_token
- && $is_chain_operator{$tok_beg}
- && ( $type_beg eq 'k'
- || $type_beg eq $tok_beg )
- && $nesting_depth_to_go[$i_begin] >=
- $nesting_depth_to_go[$i_next_nonblank]
- )
-
- || ( $tokens_to_go[$i_last_end] eq $token
- && $is_chain_operator{$token}
- && ( $type eq 'k' || $type eq $token )
- && $nesting_depth_to_go[$i_last_end] >=
- $nesting_depth_to_go[$i_test] )
- )
- {
- $leading_alignment_token = $next_nonblank_token;
- $leading_alignment_type = $next_nonblank_type;
- }
- }
- }
-
- my $too_long = ( $i_test >= $imax );
- if ( !$too_long ) {
- my $next_length =
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 2 ] -
- $starting_sum;
- $too_long = $next_length > $maximum_line_length;
-
- # To prevent blinkers we will avoid leaving a token exactly at
- # the line length limit unless it is the last token or one of
- # several "good" types.
- #
- # The following code was a blinker with -pbp before this
- # modification:
-## $last_nonblank_token eq '('
-## && $is_indirect_object_taker{ $paren_type
-## [$paren_depth] }
- # The issue causing the problem is that if the
- # term [$paren_depth] gets broken across a line then
- # the whitespace routine doesn't see both opening and closing
- # brackets and will format like '[ $paren_depth ]'. This
- # leads to an oscillation in length depending if we break
- # before the closing bracket or not.
- if ( !$too_long
- && $i_test + 1 < $imax
- && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
- {
- $too_long = $next_length >= $maximum_line_length;
- }
- }
-
- FORMATTER_DEBUG_FLAG_BREAK
- && do {
- my $ltok = $token;
- my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
- my $i_testp2 = $i_test + 2;
- if ( $i_testp2 > $max_index_to_go + 1 ) {
- $i_testp2 = $max_index_to_go + 1;
- }
- if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
- if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
- print STDOUT
-"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
- };
-
- # allow one extra terminal token after exceeding line length
- # if it would strand this token.
- if ( $rOpts_fuzzy_line_length
- && $too_long
- && $i_lowest == $i_test
- && $token_lengths_to_go[$i_test] > 1
- && $next_nonblank_type =~ /^[\;\,]$/ )
- {
- $too_long = 0;
- }
-
- last
- if (
- ( $i_test == $imax ) # we're done if no more tokens,
- || (
- ( $i_lowest >= 0 ) # or no more space and we have a break
- && $too_long
- )
- );
- }
-
- #-------------------------------------------------------
- # END of inner loop to find the best next breakpoint
- # Now decide exactly where to put the breakpoint
- #-------------------------------------------------------
-
- # it's always ok to break at imax if no other break was found
- if ( $i_lowest < 0 ) { $i_lowest = $imax }
-
- # semi-final index calculation
- my $i_next_nonblank = $inext_to_go[$i_lowest];
- my $next_nonblank_type = $types_to_go[$i_next_nonblank];
- my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
-
- #-------------------------------------------------------
- # ?/: rule 1 : if a break here will separate a '?' on this
- # line from its closing ':', then break at the '?' instead.
- #-------------------------------------------------------
- foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
- next unless ( $tokens_to_go[$i] eq '?' );
-
- # do not break if probable sequence of ?/: statements
- next if ($is_colon_chain);
-
- # do not break if statement is broken by side comment
- next
- if (
- $tokens_to_go[$max_index_to_go] eq '#'
- && terminal_type( \@types_to_go, \@block_type_to_go, 0,
- $max_index_to_go ) !~ /^[\;\}]$/
- );
-
- # no break needed if matching : is also on the line
- next
- if ( $mate_index_to_go[$i] >= 0
- && $mate_index_to_go[$i] <= $i_next_nonblank );
-
- $i_lowest = $i;
- if ( $want_break_before{'?'} ) { $i_lowest-- }
- last;
- }
-
- #-------------------------------------------------------
- # END of inner loop to find the best next breakpoint:
- # Break the line after the token with index i=$i_lowest
- #-------------------------------------------------------
-
- # final index calculation
- $i_next_nonblank = $inext_to_go[$i_lowest];
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
-
- FORMATTER_DEBUG_FLAG_BREAK
- && print STDOUT
- "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
-
- #-------------------------------------------------------
- # ?/: rule 2 : if we break at a '?', then break at its ':'
- #
- # Note: this rule is also in sub scan_list to handle a break
- # at the start and end of a line (in case breaks are dictated
- # by side comments).
- #-------------------------------------------------------
- if ( $next_nonblank_type eq '?' ) {
- set_closing_breakpoint($i_next_nonblank);
- }
- elsif ( $types_to_go[$i_lowest] eq '?' ) {
- set_closing_breakpoint($i_lowest);
- }
-
- #-------------------------------------------------------
- # ?/: rule 3 : if we break at a ':' then we save
- # its location for further work below. We may need to go
- # back and break at its '?'.
- #-------------------------------------------------------
- if ( $next_nonblank_type eq ':' ) {
- push @i_colon_breaks, $i_next_nonblank;
- }
- elsif ( $types_to_go[$i_lowest] eq ':' ) {
- push @i_colon_breaks, $i_lowest;
- }
-
- # here we should set breaks for all '?'/':' pairs which are
- # separated by this line
-
- $line_count++;
-
- # save this line segment, after trimming blanks at the ends
- push( @i_first,
- ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
- push( @i_last,
- ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
-
- # set a forced breakpoint at a container opening, if necessary, to
- # signal a break at a closing container. Excepting '(' for now.
- if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
- && !$forced_breakpoint_to_go[$i_lowest] )
- {
- set_closing_breakpoint($i_lowest);
- }
-
- # get ready to go again
- $i_begin = $i_lowest + 1;
- $last_break_strength = $lowest_strength;
- $i_last_break = $i_lowest;
- $leading_alignment_token = "";
- $leading_alignment_type = "";
- $lowest_next_token = '';
- $lowest_next_type = 'b';
-
- if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
- $i_begin++;
- }
-
- # update indentation size
- if ( $i_begin <= $imax ) {
- $leading_spaces = leading_spaces_to_go($i_begin);
- }
- }
-
- #-------------------------------------------------------
- # END of main loop to set continuation breakpoints
- # Now go back and make any necessary corrections
- #-------------------------------------------------------
-
- #-------------------------------------------------------
- # ?/: rule 4 -- if we broke at a ':', then break at
- # corresponding '?' unless this is a chain of ?: expressions
- #-------------------------------------------------------
- if (@i_colon_breaks) {
-
- # using a simple method for deciding if we are in a ?/: chain --
- # this is a chain if it has multiple ?/: pairs all in order;
- # otherwise not.
- # Note that if line starts in a ':' we count that above as a break
- my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
-
- unless ($is_chain) {
- my @insert_list = ();
- foreach (@i_colon_breaks) {
- my $i_question = $mate_index_to_go[$_];
- if ( $i_question >= 0 ) {
- if ( $want_break_before{'?'} ) {
- $i_question = $iprev_to_go[$i_question];
- }
-
- if ( $i_question >= 0 ) {
- push @insert_list, $i_question;
- }
- }
- insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
- }
- }
- }
- return ( \@i_first, \@i_last, $colon_count );
-}
-
-sub insert_additional_breaks {
-
- # this routine will add line breaks at requested locations after
- # sub set_continuation_breaks has made preliminary breaks.
-
- my ( $ri_break_list, $ri_first, $ri_last ) = @_;
- my $i_f;
- my $i_l;
- my $line_number = 0;
- foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
-
- $i_f = $ri_first->[$line_number];
- $i_l = $ri_last->[$line_number];
- while ( $i_break_left >= $i_l ) {
- $line_number++;
-
- # shouldn't happen unless caller passes bad indexes
- if ( $line_number >= @{$ri_last} ) {
- warning(
-"Non-fatal program bug: couldn't set break at $i_break_left\n"
- );
- report_definite_bug();
- return;
- }
- $i_f = $ri_first->[$line_number];
- $i_l = $ri_last->[$line_number];
- }
-
- # Do not leave a blank at the end of a line; back up if necessary
- if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
-
- my $i_break_right = $inext_to_go[$i_break_left];
- if ( $i_break_left >= $i_f
- && $i_break_left < $i_l
- && $i_break_right > $i_f
- && $i_break_right <= $i_l )
- {
- splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
- splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
- }
- }
- return;
-}
-
-sub set_closing_breakpoint {
-
- # set a breakpoint at a matching closing token
- # at present, this is only used to break at a ':' which matches a '?'
- my $i_break = shift;
-
- if ( $mate_index_to_go[$i_break] >= 0 ) {
-
- # CAUTION: infinite recursion possible here:
- # set_closing_breakpoint calls set_forced_breakpoint, and
- # set_forced_breakpoint call set_closing_breakpoint
- # ( test files attrib.t, BasicLyx.pm.html).
- # Don't reduce the '2' in the statement below
- if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
-
- # break before } ] and ), but sub set_forced_breakpoint will decide
- # to break before or after a ? and :
- my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
- set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
- }
- }
- else {
- my $type_sequence = $type_sequence_to_go[$i_break];
- if ($type_sequence) {
- my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
- $postponed_breakpoint{$type_sequence} = 1;
- }
- }
- return;
-}
-
-sub compare_indentation_levels {
-
- # check to see if output line tabbing agrees with input line
- # this can be very useful for debugging a script which has an extra
- # or missing brace
- my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
- if ( $guessed_indentation_level ne $structural_indentation_level ) {
- $last_tabbing_disagreement = $input_line_number;
-
- if ($in_tabbing_disagreement) {
- }
- else {
- $tabbing_disagreement_count++;
-
- if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
- write_logfile_entry(
-"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
- );
- }
- $in_tabbing_disagreement = $input_line_number;
- $first_tabbing_disagreement = $in_tabbing_disagreement
- unless ($first_tabbing_disagreement);
- }
- }
- else {
-
- if ($in_tabbing_disagreement) {
-
- if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
- write_logfile_entry(
-"End indentation disagreement from input line $in_tabbing_disagreement\n"
- );
-
- if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
- write_logfile_entry(
- "No further tabbing disagreements will be noted\n");
- }
- }
- $in_tabbing_disagreement = 0;
- }
- }
- return;
-}
-
-#####################################################################
-#
-# the Perl::Tidy::IndentationItem class supplies items which contain
-# how much whitespace should be used at the start of a line
-#
-#####################################################################
-
-package Perl::Tidy::IndentationItem;
-
-sub new {
-
- # Create an 'indentation_item' which describes one level of leading
- # whitespace when the '-lp' indentation is used.
- my (
- $class, $spaces, $level,
- $ci_level, $available_spaces, $index,
- $gnu_sequence_number, $align_paren, $stack_depth,
- $starting_index,
- ) = @_;
-
- my $closed = -1;
- my $arrow_count = 0;
- my $comma_count = 0;
- my $have_child = 0;
- my $want_right_spaces = 0;
- my $marked = 0;
-
- # DEFINITIONS:
- # spaces => # total leading white spaces
- # level => # the indentation 'level'
- # ci_level => # the 'continuation level'
- # available_spaces => # how many left spaces available
- # # for this level
- # closed => # index where we saw closing '}'
- # comma_count => # how many commas at this level?
- # sequence_number => # output batch number
- # index => # index in output batch list
- # have_child => # any dependents?
- # recoverable_spaces => # how many spaces to the right
- # # we would like to move to get
- # # alignment (negative if left)
- # align_paren => # do we want to try to align
- # # with an opening structure?
- # marked => # if visited by corrector logic
- # stack_depth => # indentation nesting depth
- # starting_index => # first token index of this level
- # arrow_count => # how many =>'s
-
- return bless {
- _spaces => $spaces,
- _level => $level,
- _ci_level => $ci_level,
- _available_spaces => $available_spaces,
- _closed => $closed,
- _comma_count => $comma_count,
- _sequence_number => $gnu_sequence_number,
- _index => $index,
- _have_child => $have_child,
- _recoverable_spaces => $want_right_spaces,
- _align_paren => $align_paren,
- _marked => $marked,
- _stack_depth => $stack_depth,
- _starting_index => $starting_index,
- _arrow_count => $arrow_count,
- }, $class;
-}
-
-sub permanently_decrease_available_spaces {
-
- # make a permanent reduction in the available indentation spaces
- # at one indentation item. NOTE: if there are child nodes, their
- # total SPACES must be reduced by the caller.
-
- my ( $item, $spaces_needed ) = @_;
- my $available_spaces = $item->get_available_spaces();
- my $deleted_spaces =
- ( $available_spaces > $spaces_needed )
- ? $spaces_needed
- : $available_spaces;
- $item->decrease_available_spaces($deleted_spaces);
- $item->decrease_SPACES($deleted_spaces);
- $item->set_recoverable_spaces(0);
-
- return $deleted_spaces;
-}
-
-sub tentatively_decrease_available_spaces {
-
- # We are asked to tentatively delete $spaces_needed of indentation
- # for a indentation item. We may want to undo this later. NOTE: if
- # there are child nodes, their total SPACES must be reduced by the
- # caller.
- my ( $item, $spaces_needed ) = @_;
- my $available_spaces = $item->get_available_spaces();
- my $deleted_spaces =
- ( $available_spaces > $spaces_needed )
- ? $spaces_needed
- : $available_spaces;
- $item->decrease_available_spaces($deleted_spaces);
- $item->decrease_SPACES($deleted_spaces);
- $item->increase_recoverable_spaces($deleted_spaces);
- return $deleted_spaces;
-}
-
-sub get_stack_depth {
- my $self = shift;
- return $self->{_stack_depth};
-}
-
-sub get_spaces {
- my $self = shift;
- return $self->{_spaces};
-}
-
-sub get_marked {
- my $self = shift;
- return $self->{_marked};
-}
-
-sub set_marked {
- my ( $self, $value ) = @_;
- if ( defined($value) ) {
- $self->{_marked} = $value;
- }
- return $self->{_marked};
-}
-
-sub get_available_spaces {
- my $self = shift;
- return $self->{_available_spaces};
-}
-
-sub decrease_SPACES {
- my ( $self, $value ) = @_;
- if ( defined($value) ) {
- $self->{_spaces} -= $value;
- }
- return $self->{_spaces};
-}
-
-sub decrease_available_spaces {
- my ( $self, $value ) = @_;
- if ( defined($value) ) {
- $self->{_available_spaces} -= $value;
- }
- return $self->{_available_spaces};
-}
-
-sub get_align_paren {
- my $self = shift;
- return $self->{_align_paren};
-}
-
-sub get_recoverable_spaces {
- my $self = shift;
- return $self->{_recoverable_spaces};
-}
-
-sub set_recoverable_spaces {
- my ( $self, $value ) = @_;
- if ( defined($value) ) {
- $self->{_recoverable_spaces} = $value;
- }
- return $self->{_recoverable_spaces};
-}
-
-sub increase_recoverable_spaces {
- my ( $self, $value ) = @_;
- if ( defined($value) ) {
- $self->{_recoverable_spaces} += $value;
- }
- return $self->{_recoverable_spaces};
-}
-
-sub get_ci_level {
- my $self = shift;
- return $self->{_ci_level};
-}
-
-sub get_level {
- my $self = shift;
- return $self->{_level};
-}
-
-sub get_sequence_number {
- my $self = shift;
- return $self->{_sequence_number};
-}
-
-sub get_index {
- my $self = shift;
- return $self->{_index};
-}
-
-sub get_starting_index {
- my $self = shift;
- return $self->{_starting_index};
-}
-
-sub set_have_child {
- my ( $self, $value ) = @_;
- if ( defined($value) ) {
- $self->{_have_child} = $value;
- }
- return $self->{_have_child};
-}
-
-sub get_have_child {
- my $self = shift;
- return $self->{_have_child};
-}
-
-sub set_arrow_count {
- my ( $self, $value ) = @_;
- if ( defined($value) ) {
- $self->{_arrow_count} = $value;
- }
- return $self->{_arrow_count};
-}
-
-sub get_arrow_count {
- my $self = shift;
- return $self->{_arrow_count};
-}
-
-sub set_comma_count {
- my ( $self, $value ) = @_;
- if ( defined($value) ) {
- $self->{_comma_count} = $value;
- }
- return $self->{_comma_count};
-}
-
-sub get_comma_count {
- my $self = shift;
- return $self->{_comma_count};
-}
-
-sub set_closed {
- my ( $self, $value ) = @_;
- if ( defined($value) ) {
- $self->{_closed} = $value;
- }
- return $self->{_closed};
-}
-
-sub get_closed {
- my $self = shift;
- return $self->{_closed};
-}
-
-#####################################################################
-#
-# the Perl::Tidy::VerticalAligner::Line class supplies an object to
-# contain a single output line
-#
-#####################################################################
-
-package Perl::Tidy::VerticalAligner::Line;
-
-{
-
- use strict;
- ##use Carp;
-
- my %default_data = (
- jmax => undef,
- jmax_original_line => undef,
- rtokens => undef,
- rfields => undef,
- rpatterns => undef,
- indentation => undef,
- leading_space_count => undef,
- outdent_long_lines => undef,
- list_type => undef,
- is_hanging_side_comment => undef,
- ralignments => [],
- maximum_line_length => undef,
- rvertical_tightness_flags => undef
- );
- {
-
- # methods to count object population
- my $_count = 0;
- sub get_count { return $_count; }
- sub _increment_count { return ++$_count }
- sub _decrement_count { return --$_count }
- }
-
- # Constructor may be called as a class method
- sub new {
- my ( $caller, %arg ) = @_;
- my $caller_is_obj = ref($caller);
- my $class = $caller_is_obj || $caller;
- ##no strict "refs";
- my $self = bless {}, $class;
-
- $self->{_ralignments} = [];
-
- foreach my $key ( keys %default_data ) {
- my $_key = '_' . $key;
-
- # Caller keys do not have an underscore
- if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} }
- elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} }
- else { $self->{$_key} = $default_data{$_key} }
- }
-
- $self->_increment_count();
- return $self;
- }
-
- sub DESTROY {
- my $self = shift;
- $self->_decrement_count();
- return;
- }
-
- sub get_jmax { my $self = shift; return $self->{_jmax} }
-
- sub get_jmax_original_line {
- my $self = shift;
- return $self->{_jmax_original_line};
- }
- sub get_rtokens { my $self = shift; return $self->{_rtokens} }
- sub get_rfields { my $self = shift; return $self->{_rfields} }
- sub get_rpatterns { my $self = shift; return $self->{_rpatterns} }
- sub get_indentation { my $self = shift; return $self->{_indentation} }
-
- sub get_leading_space_count {
- my $self = shift;
- return $self->{_leading_space_count};
- }
-
- sub get_outdent_long_lines {
- my $self = shift;
- return $self->{_outdent_long_lines};
- }
- sub get_list_type { my $self = shift; return $self->{_list_type} }
-
- sub get_is_hanging_side_comment {
- my $self = shift;
- return $self->{_is_hanging_side_comment};
- }
-
- sub get_rvertical_tightness_flags {
- my $self = shift;
- return $self->{_rvertical_tightness_flags};
- }
-
- sub set_column {
- ## FIXME: does caller ever supply $val??
- my ( $self, $j, $val ) = @_;
- return $self->{_ralignments}->[$j]->set_column($val);
- }
-
- sub get_alignment {
- my ( $self, $j ) = @_;
- return $self->{_ralignments}->[$j];
- }
- sub get_alignments { my $self = shift; return @{ $self->{_ralignments} } }
-
- sub get_column {
- my ( $self, $j ) = @_;
- return $self->{_ralignments}->[$j]->get_column();
- }
-
- sub get_starting_column {
- my ( $self, $j ) = @_;
- return $self->{_ralignments}->[$j]->get_starting_column();
- }
-
- sub increment_column {
- my ( $self, $k, $pad ) = @_;
- $self->{_ralignments}->[$k]->increment_column($pad);
- return;
- }
-
- sub set_alignments {
- my $self = shift;
- @{ $self->{_ralignments} } = @_;
- return;
- }
-
- sub current_field_width {
- my ( $self, $j ) = @_;
- if ( $j == 0 ) {
- return $self->get_column($j);
- }
- else {
- return $self->get_column($j) - $self->get_column( $j - 1 );
- }
- }
-
- sub field_width_growth {
- my ( $self, $j ) = @_;
- return $self->get_column($j) - $self->get_starting_column($j);
- }
-
- sub starting_field_width {
- my ( $self, $j ) = @_;
- if ( $j == 0 ) {
- return $self->get_starting_column($j);
- }
- else {
- return $self->get_starting_column($j) -
- $self->get_starting_column( $j - 1 );
- }
- }
-
- sub increase_field_width {
-
- my ( $self, $j, $pad ) = @_;
- my $jmax = $self->get_jmax();
- for my $k ( $j .. $jmax ) {
- $self->increment_column( $k, $pad );
- }
- return;
- }
-
- sub get_available_space_on_right {
- my $self = shift;
- my $jmax = $self->get_jmax();
- return $self->{_maximum_line_length} - $self->get_column($jmax);
- }
-
- sub set_jmax { my ( $self, $val ) = @_; $self->{_jmax} = $val; return }
-
- sub set_jmax_original_line {
- my ( $self, $val ) = @_;
- $self->{_jmax_original_line} = $val;
- return;
- }
-
- sub set_rtokens {
- my ( $self, $val ) = @_;
- $self->{_rtokens} = $val;
- return;
- }
-
- sub set_rfields {
- my ( $self, $val ) = @_;
- $self->{_rfields} = $val;
- return;
- }
-
- sub set_rpatterns {
- my ( $self, $val ) = @_;
- $self->{_rpatterns} = $val;
- return;
- }
-
- sub set_indentation {
- my ( $self, $val ) = @_;
- $self->{_indentation} = $val;
- return;
- }
-
- sub set_leading_space_count {
- my ( $self, $val ) = @_;
- $self->{_leading_space_count} = $val;
- return;
- }
-
- sub set_outdent_long_lines {
- my ( $self, $val ) = @_;
- $self->{_outdent_long_lines} = $val;
- return;
- }
-
- sub set_list_type {
- my ( $self, $val ) = @_;
- $self->{_list_type} = $val;
- return;
- }
-
- sub set_is_hanging_side_comment {
- my ( $self, $val ) = @_;
- $self->{_is_hanging_side_comment} = $val;
- return;
- }
-
- sub set_alignment {
- my ( $self, $j, $val ) = @_;
- $self->{_ralignments}->[$j] = $val;
- return;
- }
-
-}
-
-#####################################################################
-#
-# the Perl::Tidy::VerticalAligner::Alignment class holds information
-# on a single column being aligned
-#
-#####################################################################
-package Perl::Tidy::VerticalAligner::Alignment;
-
-{
-
- use strict;
-
- #use Carp;
-
- # _column # the current column number
- # _starting_column # column number when created
- # _matching_token # what token we are matching
- # _starting_line # the line index of creation
- # _ending_line
- # the most recent line to use it
- # _saved_column
- # _serial_number # unique number for this alignment
-
- my %default_data = (
- column => undef,
- starting_column => undef,
- matching_token => undef,
- starting_line => undef,
- ending_line => undef,
- saved_column => undef,
- serial_number => undef,
- );
-
- # class population count
- {
- my $_count = 0;
- sub get_count { return $_count }
- sub _increment_count { return ++$_count }
- sub _decrement_count { return --$_count }
- }
-
- # constructor
- sub new {
- my ( $caller, %arg ) = @_;
- my $caller_is_obj = ref($caller);
- my $class = $caller_is_obj || $caller;
- ##no strict "refs";
- my $self = bless {}, $class;
-
- foreach my $key ( keys %default_data ) {
- my $_key = '_' . $key;
- if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} }
- elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} }
- else { $self->{$_key} = $default_data{$_key} }
- }
- $self->_increment_count();
- return $self;
- }
-
- sub DESTROY {
- my $self = shift;
- $self->_decrement_count();
- return;
- }
-
- sub get_column { my $self = shift; return $self->{_column} }
-
- sub get_starting_column {
- my $self = shift;
- return $self->{_starting_column};
- }
- sub get_matching_token { my $self = shift; return $self->{_matching_token} }
- sub get_starting_line { my $self = shift; return $self->{_starting_line} }
- sub get_ending_line { my $self = shift; return $self->{_ending_line} }
- sub get_serial_number { my $self = shift; return $self->{_serial_number} }
-
- sub set_column { my ( $self, $val ) = @_; $self->{_column} = $val; return }
-
- sub set_starting_column {
- my ( $self, $val ) = @_;
- $self->{_starting_column} = $val;
- return;
- }
-
- sub set_matching_token {
- my ( $self, $val ) = @_;
- $self->{_matching_token} = $val;
- return;
- }
-
- sub set_starting_line {
- my ( $self, $val ) = @_;
- $self->{_starting_line} = $val;
- return;
- }
-
- sub set_ending_line {
- my ( $self, $val ) = @_;
- $self->{_ending_line} = $val;
- return;
- }
-
- sub increment_column {
- my ( $self, $val ) = @_;
- $self->{_column} += $val;
- return;
- }
-
- sub save_column {
- my $self = shift;
- $self->{_saved_column} = $self->{_column};
- return;
- }
-
- sub restore_column {
- my $self = shift;
- $self->{_column} = $self->{_saved_column};
- return;
- }
-}
-
-package Perl::Tidy::VerticalAligner;
-
-# The Perl::Tidy::VerticalAligner package collects output lines and
-# attempts to line up certain common tokens, such as => and #, which are
-# identified by the calling routine.
-#
-# There are two main routines: valign_input and flush. Append acts as a
-# storage buffer, collecting lines into a group which can be vertically
-# aligned. When alignment is no longer possible or desirable, it dumps
-# the group to flush.
-#
-# valign_input -----> flush
-#
-# collects writes
-# vertical one
-# groups group
-
-BEGIN {
-
- # Caution: these debug flags produce a lot of output
- # They should all be 0 except when debugging small scripts
-
- use constant VALIGN_DEBUG_FLAG_APPEND => 0;
- use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
- use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
- use constant VALIGN_DEBUG_FLAG_TABS => 0;
-
- my $debug_warning = sub {
- print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
- return;
- };
-
- VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
- VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
- VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
- VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS');
-
-}
-
-use vars qw(
- $vertical_aligner_self
- $current_line
- $maximum_alignment_index
- $ralignment_list
- $maximum_jmax_seen
- $minimum_jmax_seen
- $previous_minimum_jmax_seen
- $previous_maximum_jmax_seen
- $maximum_line_index
- $group_level
- $group_type
- $group_maximum_gap
- $marginal_match
- $last_level_written
- $last_leading_space_count
- $extra_indent_ok
- $zero_count
- @group_lines
- $last_comment_column
- $last_side_comment_line_number
- $last_side_comment_length
- $last_side_comment_level
- $outdented_line_count
- $first_outdented_line_at
- $last_outdented_line_at
- $diagnostics_object
- $logger_object
- $file_writer_object
- @side_comment_history
- $comment_leading_space_count
- $is_matching_terminal_line
- $consecutive_block_comments
-
- $cached_line_text
- $cached_line_type
- $cached_line_flag
- $cached_seqno
- $cached_line_valid
- $cached_line_leading_space_count
- $cached_seqno_string
-
- $valign_buffer_filling
- @valign_buffer
-
- $seqno_string
- $last_nonblank_seqno_string
-
- $rOpts
-
- $rOpts_maximum_line_length
- $rOpts_variable_maximum_line_length
- $rOpts_continuation_indentation
- $rOpts_indent_columns
- $rOpts_tabs
- $rOpts_entab_leading_whitespace
- $rOpts_valign
-
- $rOpts_fixed_position_side_comment
- $rOpts_minimum_space_to_comment
-
-);
-
-sub initialize {
-
- (
- my $class, $rOpts, $file_writer_object, $logger_object,
- $diagnostics_object
- ) = @_;
-
- # variables describing the entire space group:
- $ralignment_list = [];
- $group_level = 0;
- $last_level_written = -1;
- $extra_indent_ok = 0; # can we move all lines to the right?
- $last_side_comment_length = 0;
- $maximum_jmax_seen = 0;
- $minimum_jmax_seen = 0;
- $previous_minimum_jmax_seen = 0;
- $previous_maximum_jmax_seen = 0;
-
- # variables describing each line of the group
- @group_lines = (); # list of all lines in group
-
- $outdented_line_count = 0;
- $first_outdented_line_at = 0;
- $last_outdented_line_at = 0;
- $last_side_comment_line_number = 0;
- $last_side_comment_level = -1;
- $is_matching_terminal_line = 0;
-
- # most recent 3 side comments; [ line number, column ]
- $side_comment_history[0] = [ -300, 0 ];
- $side_comment_history[1] = [ -200, 0 ];
- $side_comment_history[2] = [ -100, 0 ];
-
- # valign_output_step_B cache:
- $cached_line_text = "";
- $cached_line_type = 0;
- $cached_line_flag = 0;
- $cached_seqno = 0;
- $cached_line_valid = 0;
- $cached_line_leading_space_count = 0;
- $cached_seqno_string = "";
-
- # string of sequence numbers joined together
- $seqno_string = "";
- $last_nonblank_seqno_string = "";
-
- # frequently used parameters
- $rOpts_indent_columns = $rOpts->{'indent-columns'};
- $rOpts_tabs = $rOpts->{'tabs'};
- $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
- $rOpts_fixed_position_side_comment =
- $rOpts->{'fixed-position-side-comment'};
- $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
- $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
- $rOpts_variable_maximum_line_length =
- $rOpts->{'variable-maximum-line-length'};
- $rOpts_valign = $rOpts->{'valign'};
-
- $consecutive_block_comments = 0;
- forget_side_comment();
-
- initialize_for_new_group();
-
- $vertical_aligner_self = {};
- bless $vertical_aligner_self, $class;
- return $vertical_aligner_self;
-}
-
-sub initialize_for_new_group {
- $maximum_line_index = -1; # lines in the current group
- $maximum_alignment_index = -1; # alignments in current group
- $zero_count = 0; # count consecutive lines without tokens
- $current_line = undef; # line being matched for alignment
- $group_maximum_gap = 0; # largest gap introduced
- $group_type = "";
- $marginal_match = 0;
- $comment_leading_space_count = 0;
- $last_leading_space_count = 0;
- return;
-}
-
-# interface to Perl::Tidy::Diagnostics routines
-sub write_diagnostics {
- my $msg = shift;
- if ($diagnostics_object) {
- $diagnostics_object->write_diagnostics($msg);
- }
- return;
-}
-
-# interface to Perl::Tidy::Logger routines
-sub warning {
- my ($msg) = @_;
- if ($logger_object) {
- $logger_object->warning($msg);
- }
- return;
-}
-
-sub write_logfile_entry {
- my ($msg) = @_;
- if ($logger_object) {
- $logger_object->write_logfile_entry($msg);
- }
- return;
-}
-
-sub report_definite_bug {
- if ($logger_object) {
- $logger_object->report_definite_bug();
- }
- return;
-}
-
-sub get_spaces {
-
- # return the number of leading spaces associated with an indentation
- # variable $indentation is either a constant number of spaces or an
- # object with a get_spaces method.
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_spaces() : $indentation;
-}
-
-sub get_recoverable_spaces {
-
- # return the number of spaces (+ means shift right, - means shift left)
- # that we would like to shift a group of lines with the same indentation
- # to get them to line up with their opening parens
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
-}
-
-sub get_stack_depth {
-
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_stack_depth() : 0;
-}
-
-sub make_alignment {
- my ( $col, $token ) = @_;
-
- # make one new alignment at column $col which aligns token $token
- ++$maximum_alignment_index;
- my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
- column => $col,
- starting_column => $col,
- matching_token => $token,
- starting_line => $maximum_line_index,
- ending_line => $maximum_line_index,
- serial_number => $maximum_alignment_index,
- );
- $ralignment_list->[$maximum_alignment_index] = $alignment;
- return $alignment;
-}
-
-sub dump_alignments {
- print STDOUT
-"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
- for my $i ( 0 .. $maximum_alignment_index ) {
- my $column = $ralignment_list->[$i]->get_column();
- my $starting_column = $ralignment_list->[$i]->get_starting_column();
- my $matching_token = $ralignment_list->[$i]->get_matching_token();
- my $starting_line = $ralignment_list->[$i]->get_starting_line();
- my $ending_line = $ralignment_list->[$i]->get_ending_line();
- print STDOUT
-"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
- }
- return;
-}
-
-sub save_alignment_columns {
- for my $i ( 0 .. $maximum_alignment_index ) {
- $ralignment_list->[$i]->save_column();
- }
- return;
-}
-
-sub restore_alignment_columns {
- for my $i ( 0 .. $maximum_alignment_index ) {
- $ralignment_list->[$i]->restore_column();
- }
- return;
-}
-
-sub forget_side_comment {
- $last_comment_column = 0;
- return;
-}
-
-sub maximum_line_length_for_level {
-
- # return maximum line length for line starting with a given level
- my $maximum_line_length = $rOpts_maximum_line_length;
- if ($rOpts_variable_maximum_line_length) {
- my $level = shift;
- if ( $level < 0 ) { $level = 0 }
- $maximum_line_length += $level * $rOpts_indent_columns;
- }
- return $maximum_line_length;
-}
-
-sub valign_input {
-
- # Place one line in the current vertical group.
- #
- # The input parameters are:
- # $level = indentation level of this line
- # $rfields = reference to array of fields
- # $rpatterns = reference to array of patterns, one per field
- # $rtokens = reference to array of tokens starting fields 1,2,..
- #
- # Here is an example of what this package does. In this example,
- # we are trying to line up both the '=>' and the '#'.
- #
- # '18' => 'grave', # \`
- # '19' => 'acute', # `'
- # '20' => 'caron', # \v
- # <-tabs-><f1-><--field 2 ---><-f3->
- # | | | |
- # | | | |
- # col1 col2 col3 col4
- #
- # The calling routine has already broken the entire line into 3 fields as
- # indicated. (So the work of identifying promising common tokens has
- # already been done).
- #
- # In this example, there will be 2 tokens being matched: '=>' and '#'.
- # They are the leading parts of fields 2 and 3, but we do need to know
- # what they are so that we can dump a group of lines when these tokens
- # change.
- #
- # The fields contain the actual characters of each field. The patterns
- # are like the fields, but they contain mainly token types instead
- # of tokens, so they have fewer characters. They are used to be
- # sure we are matching fields of similar type.
- #
- # In this example, there will be 4 column indexes being adjusted. The
- # first one is always at zero. The interior columns are at the start of
- # the matching tokens, and the last one tracks the maximum line length.
- #
- # Each time a new line comes in, it joins the current vertical
- # group if possible. Otherwise it causes the current group to be dumped
- # and a new group is started.
- #
- # For each new group member, the column locations are increased, as
- # necessary, to make room for the new fields. When the group is finally
- # output, these column numbers are used to compute the amount of spaces of
- # padding needed for each field.
- #
- # Programming note: the fields are assumed not to have any tab characters.
- # Tabs have been previously removed except for tabs in quoted strings and
- # side comments. Tabs in these fields can mess up the column counting.
- # The log file warns the user if there are any such tabs.
-
- my (
- $level, $level_end,
- $indentation, $rfields,
- $rtokens, $rpatterns,
- $is_forced_break, $outdent_long_lines,
- $is_terminal_ternary, $is_terminal_statement,
- $do_not_pad, $rvertical_tightness_flags,
- $level_jump,
- ) = @_;
-
- # number of fields is $jmax
- # number of tokens between fields is $jmax-1
- my $jmax = $#{$rfields};
-
- my $leading_space_count = get_spaces($indentation);
-
- # set outdented flag to be sure we either align within statements or
- # across statement boundaries, but not both.
- my $is_outdented = $last_leading_space_count > $leading_space_count;
- $last_leading_space_count = $leading_space_count;
-
- # Patch: undo for hanging side comment
- my $is_hanging_side_comment =
- ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
- $is_outdented = 0 if $is_hanging_side_comment;
-
- # Forget side comment alignment after seeing 2 or more block comments
- my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
- if ($is_block_comment) {
- $consecutive_block_comments++;
- }
- else {
- if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
- $consecutive_block_comments = 0;
- }
-
- VALIGN_DEBUG_FLAG_APPEND0 && do {
- print STDOUT
-"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
- };
-
- # Validate cached line if necessary: If we can produce a container
- # with just 2 lines total by combining an existing cached opening
- # token with the closing token to follow, then we will mark both
- # cached flags as valid.
- if ($rvertical_tightness_flags) {
- if ( $maximum_line_index <= 0
- && $cached_line_type
- && $cached_seqno
- && $rvertical_tightness_flags->[2]
- && $rvertical_tightness_flags->[2] == $cached_seqno )
- {
- $rvertical_tightness_flags->[3] ||= 1;
- $cached_line_valid ||= 1;
- }
- }
-
- # do not join an opening block brace with an unbalanced line
- # unless requested with a flag value of 2
- if ( $cached_line_type == 3
- && $maximum_line_index < 0
- && $cached_line_flag < 2
- && $level_jump != 0 )
- {
- $cached_line_valid = 0;
- }
-
- # patch until new aligner is finished
- if ($do_not_pad) { my_flush() }
-
- # shouldn't happen:
- if ( $level < 0 ) { $level = 0 }
-
- # do not align code across indentation level changes
- # or if vertical alignment is turned off for debugging
- if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
-
- # we are allowed to shift a group of lines to the right if its
- # level is greater than the previous and next group
- $extra_indent_ok =
- ( $level < $group_level && $last_level_written < $group_level );
-
- my_flush();
-
- # If we know that this line will get flushed out by itself because
- # of level changes, we can leave the extra_indent_ok flag set.
- # That way, if we get an external flush call, we will still be
- # able to do some -lp alignment if necessary.
- $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
-
- $group_level = $level;
-
- # wait until after the above flush to get the leading space
- # count because it may have been changed if the -icp flag is in
- # effect
- $leading_space_count = get_spaces($indentation);
-
- }
-
- # --------------------------------------------------------------------
- # Patch to collect outdentable block COMMENTS
- # --------------------------------------------------------------------
- my $is_blank_line = "";
- if ( $group_type eq 'COMMENT' ) {
- if (
- (
- $is_block_comment
- && $outdent_long_lines
- && $leading_space_count == $comment_leading_space_count
- )
- || $is_blank_line
- )
- {
- $group_lines[ ++$maximum_line_index ] = $rfields->[0];
- return;
- }
- else {
- my_flush();
- }
- }
-
- # --------------------------------------------------------------------
- # add dummy fields for terminal ternary
- # --------------------------------------------------------------------
- my $j_terminal_match;
- if ( $is_terminal_ternary && $current_line ) {
- $j_terminal_match =
- fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
- $jmax = @{$rfields} - 1;
- }
-
- # --------------------------------------------------------------------
- # add dummy fields for else statement
- # --------------------------------------------------------------------
- if ( $rfields->[0] =~ /^else\s*$/
- && $current_line
- && $level_jump == 0 )
- {
- $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
- $jmax = @{$rfields} - 1;
- }
-
- # --------------------------------------------------------------------
- # Step 1. Handle simple line of code with no fields to match.
- # --------------------------------------------------------------------
- if ( $jmax <= 0 ) {
- $zero_count++;
-
- if ( $maximum_line_index >= 0
- && !get_recoverable_spaces( $group_lines[0]->get_indentation() ) )
- {
-
- # flush the current group if it has some aligned columns..
- if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
-
- # flush current group if we are just collecting side comments..
- elsif (
-
- # ...and we haven't seen a comment lately
- ( $zero_count > 3 )
-
- # ..or if this new line doesn't fit to the left of the comments
- || ( ( $leading_space_count + length( $rfields->[0] ) ) >
- $group_lines[0]->get_column(0) )
- )
- {
- my_flush();
- }
- }
-
- # patch to start new COMMENT group if this comment may be outdented
- if ( $is_block_comment
- && $outdent_long_lines
- && $maximum_line_index < 0 )
- {
- $group_type = 'COMMENT';
- $comment_leading_space_count = $leading_space_count;
- $group_lines[ ++$maximum_line_index ] = $rfields->[0];
- return;
- }
-
- # just write this line directly if no current group, no side comment,
- # and no space recovery is needed.
- if ( $maximum_line_index < 0 && !get_recoverable_spaces($indentation) )
- {
- valign_output_step_B( $leading_space_count, $rfields->[0], 0,
- $outdent_long_lines, $rvertical_tightness_flags, $level );
- return;
- }
- }
- else {
- $zero_count = 0;
- }
-
- # programming check: (shouldn't happen)
- # an error here implies an incorrect call was made
- if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
- warning(
-"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
- );
- report_definite_bug();
- }
-
- # --------------------------------------------------------------------
- # create an object to hold this line
- # --------------------------------------------------------------------
- my $new_line = new Perl::Tidy::VerticalAligner::Line(
- jmax => $jmax,
- jmax_original_line => $jmax,
- rtokens => $rtokens,
- rfields => $rfields,
- rpatterns => $rpatterns,
- indentation => $indentation,
- leading_space_count => $leading_space_count,
- outdent_long_lines => $outdent_long_lines,
- list_type => "",
- is_hanging_side_comment => $is_hanging_side_comment,
- maximum_line_length => maximum_line_length_for_level($level),
- rvertical_tightness_flags => $rvertical_tightness_flags,
- );
-
- # Initialize a global flag saying if the last line of the group should
- # match end of group and also terminate the group. There should be no
- # returns between here and where the flag is handled at the bottom.
- my $col_matching_terminal = 0;
- if ( defined($j_terminal_match) ) {
-
- # remember the column of the terminal ? or { to match with
- $col_matching_terminal = $current_line->get_column($j_terminal_match);
-
- # set global flag for sub decide_if_aligned
- $is_matching_terminal_line = 1;
- }
-
- # --------------------------------------------------------------------
- # It simplifies things to create a zero length side comment
- # if none exists.
- # --------------------------------------------------------------------
- make_side_comment( $new_line, $level_end );
-
- # --------------------------------------------------------------------
- # Decide if this is a simple list of items.
- # There are 3 list types: none, comma, comma-arrow.
- # We use this below to be less restrictive in deciding what to align.
- # --------------------------------------------------------------------
- if ($is_forced_break) {
- decide_if_list($new_line);
- }
-
- if ($current_line) {
-
- # --------------------------------------------------------------------
- # Allow hanging side comment to join current group, if any
- # This will help keep side comments aligned, because otherwise we
- # will have to start a new group, making alignment less likely.
- # --------------------------------------------------------------------
- join_hanging_comment( $new_line, $current_line )
- if $is_hanging_side_comment;
-
- # --------------------------------------------------------------------
- # If there is just one previous line, and it has more fields
- # than the new line, try to join fields together to get a match with
- # the new line. At the present time, only a single leading '=' is
- # allowed to be compressed out. This is useful in rare cases where
- # a table is forced to use old breakpoints because of side comments,
- # and the table starts out something like this:
- # my %MonthChars = ('0', 'Jan', # side comment
- # '1', 'Feb',
- # '2', 'Mar',
- # Eliminating the '=' field will allow the remaining fields to line up.
- # This situation does not occur if there are no side comments
- # because scan_list would put a break after the opening '('.
- # --------------------------------------------------------------------
- eliminate_old_fields( $new_line, $current_line );
-
- # --------------------------------------------------------------------
- # If the new line has more fields than the current group,
- # see if we can match the first fields and combine the remaining
- # fields of the new line.
- # --------------------------------------------------------------------
- eliminate_new_fields( $new_line, $current_line );
-
- # --------------------------------------------------------------------
- # Flush previous group unless all common tokens and patterns match..
- # --------------------------------------------------------------------
- check_match( $new_line, $current_line );
-
- # --------------------------------------------------------------------
- # See if there is space for this line in the current group (if any)
- # --------------------------------------------------------------------
- if ($current_line) {
- check_fit( $new_line, $current_line );
- }
- }
-
- # --------------------------------------------------------------------
- # Append this line to the current group (or start new group)
- # --------------------------------------------------------------------
- add_to_group($new_line);
-
- # Future update to allow this to vary:
- $current_line = $new_line if ( $maximum_line_index == 0 );
-
- # output this group if it ends in a terminal else or ternary line
- if ( defined($j_terminal_match) ) {
-
- # if there is only one line in the group (maybe due to failure to match
- # perfectly with previous lines), then align the ? or { of this
- # terminal line with the previous one unless that would make the line
- # too long
- if ( $maximum_line_index == 0 ) {
- my $col_now = $current_line->get_column($j_terminal_match);
- my $pad = $col_matching_terminal - $col_now;
- my $padding_available =
- $current_line->get_available_space_on_right();
- if ( $pad > 0 && $pad <= $padding_available ) {
- $current_line->increase_field_width( $j_terminal_match, $pad );
- }
- }
- my_flush();
- $is_matching_terminal_line = 0;
- }
-
- # --------------------------------------------------------------------
- # Step 8. Some old debugging stuff
- # --------------------------------------------------------------------
- VALIGN_DEBUG_FLAG_APPEND && do {
- print STDOUT "APPEND fields:";
- dump_array( @{$rfields} );
- print STDOUT "APPEND tokens:";
- dump_array( @{$rtokens} );
- print STDOUT "APPEND patterns:";
- dump_array( @{$rpatterns} );
- dump_alignments();
- };
-
- return;
-}
-
-sub join_hanging_comment {
-
- my $line = shift;
- my $jmax = $line->get_jmax();
- return 0 unless $jmax == 1; # must be 2 fields
- my $rtokens = $line->get_rtokens();
- return 0 unless $rtokens->[0] eq '#'; # the second field is a comment..
- my $rfields = $line->get_rfields();
- return 0 unless $rfields->[0] =~ /^\s*$/; # the first field is empty...
- my $old_line = shift;
- my $maximum_field_index = $old_line->get_jmax();
- return 0
- unless $maximum_field_index > $jmax; # the current line has more fields
- my $rpatterns = $line->get_rpatterns();
-
- $line->set_is_hanging_side_comment(1);
- $jmax = $maximum_field_index;
- $line->set_jmax($jmax);
- $rfields->[$jmax] = $rfields->[1];
- $rtokens->[ $jmax - 1 ] = $rtokens->[0];
- $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
- foreach my $j ( 1 .. $jmax - 1 ) {
- $rfields->[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
- $rtokens->[ $j - 1 ] = "";
- $rpatterns->[ $j - 1 ] = "";
- }
- return 1;
-}
-
-sub eliminate_old_fields {
-
- my $new_line = shift;
- my $jmax = $new_line->get_jmax();
- if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
- if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
-
- # there must be one previous line
- return unless ( $maximum_line_index == 0 );
-
- my $old_line = shift;
- my $maximum_field_index = $old_line->get_jmax();
-
- ###############################################
- # Moved below to allow new coding for => matches
- # return unless $maximum_field_index > $jmax;
- ###############################################
-
- # Identify specific cases where field elimination is allowed:
- # case=1: both lines have comma-separated lists, and the first
- # line has an equals
- # case=2: both lines have leading equals
-
- # case 1 is the default
- my $case = 1;
-
- # See if case 2: both lines have leading '='
- # We'll require similar leading patterns in this case
- my $old_rtokens = $old_line->get_rtokens();
- my $rtokens = $new_line->get_rtokens();
- my $rpatterns = $new_line->get_rpatterns();
- my $old_rpatterns = $old_line->get_rpatterns();
- if ( $rtokens->[0] =~ /^=>?\d*$/
- && $old_rtokens->[0] eq $rtokens->[0]
- && $old_rpatterns->[0] eq $rpatterns->[0] )
- {
- $case = 2;
- }
-
- # not too many fewer fields in new line for case 1
- return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
-
- # case 1 must have side comment
- my $old_rfields = $old_line->get_rfields();
- return
- if ( $case == 1
- && length( $old_rfields->[$maximum_field_index] ) == 0 );
-
- my $rfields = $new_line->get_rfields();
-
- my $hid_equals = 0;
-
- my @new_alignments = ();
- my @new_fields = ();
- my @new_matching_patterns = ();
- my @new_matching_tokens = ();
-
- my $j = 0;
- my $current_field = '';
- my $current_pattern = '';
-
- # loop over all old tokens
- my $in_match = 0;
- foreach my $k ( 0 .. $maximum_field_index - 1 ) {
- $current_field .= $old_rfields->[$k];
- $current_pattern .= $old_rpatterns->[$k];
- last if ( $j > $jmax - 1 );
-
- if ( $old_rtokens->[$k] eq $rtokens->[$j] ) {
- $in_match = 1;
- $new_fields[$j] = $current_field;
- $new_matching_patterns[$j] = $current_pattern;
- $current_field = '';
- $current_pattern = '';
- $new_matching_tokens[$j] = $old_rtokens->[$k];
- $new_alignments[$j] = $old_line->get_alignment($k);
- $j++;
- }
- else {
-
- if ( $old_rtokens->[$k] =~ /^\=\d*$/ ) {
- last if ( $case == 2 ); # avoid problems with stuff
- # like: $a=$b=$c=$d;
- $hid_equals = 1;
- }
- last
- if ( $in_match && $case == 1 )
- ; # disallow gaps in matching field types in case 1
- }
- }
-
- # Modify the current state if we are successful.
- # We must exactly reach the ends of the new list for success, and the old
- # pattern must have more fields. Here is an example where the first and
- # second lines have the same number, and we should not align:
- # my @a = map chr, 0 .. 255;
- # my @b = grep /\W/, @a;
- # my @c = grep /[^\w]/, @a;
-
- # Otherwise, we would get all of the commas aligned, which doesn't work as
- # well:
- # my @a = map chr, 0 .. 255;
- # my @b = grep /\W/, @a;
- # my @c = grep /[^\w]/, @a;
-
- if ( ( $j == $jmax )
- && ( $current_field eq '' )
- && ( $case != 1 || $hid_equals )
- && ( $maximum_field_index > $jmax ) )
- {
- my $k = $maximum_field_index;
- $current_field .= $old_rfields->[$k];
- $current_pattern .= $old_rpatterns->[$k];
- $new_fields[$j] = $current_field;
- $new_matching_patterns[$j] = $current_pattern;
-
- $new_alignments[$j] = $old_line->get_alignment($k);
- $maximum_field_index = $j;
-
- $old_line->set_alignments(@new_alignments);
- $old_line->set_jmax($jmax);
- $old_line->set_rtokens( \@new_matching_tokens );
- $old_line->set_rfields( \@new_fields );
- $old_line->set_rpatterns( \@{$rpatterns} );
- }
-
- # Dumb Down starting match if necessary:
- #
- # Consider the following two lines:
- #
- # {
- # $a => 20 > 3 ? 1 : 0,
- # $xyz => 5,
- # }
-
-# We would like to get alignment regardless of the order of the two lines.
-# If the lines come in in this order, then we will simplify the patterns of the first line
-# in sub eliminate_new_fields.
-# If the lines come in reverse order, then we achieve this with eliminate_new_fields.
-
- # This update is currently restricted to leading '=>' matches. Although we
- # could do this for both '=' and '=>', overall the results for '=' come out
- # better without this step because this step can eliminate some other good
- # matches. For example, with the '=' we get:
-
-# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
-# my @dsf = map "$_\x{FFFE}Fred", @disilva;
-# my @dsj = map "$_\x{FFFE}John", @disilva;
-# my @dsJ = map "$_ John", @disilva;
-
- # without including '=' we get:
-
-# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
-# my @dsf = map "$_\x{FFFE}Fred", @disilva;
-# my @dsj = map "$_\x{FFFE}John", @disilva;
-# my @dsJ = map "$_ John", @disilva;
- elsif (
- $case == 2
-
- && @new_matching_tokens == 1
- ##&& $new_matching_tokens[0] =~ /^=/ # see note above
- && $new_matching_tokens[0] =~ /^=>/
- && $maximum_field_index > 2
- )
- {
- my $jmaxm = $jmax - 1;
- my $kmaxm = $maximum_field_index - 1;
- my $have_side_comment = $old_rtokens->[$kmaxm] eq '#';
-
- # We need to reduce the group pattern to be just two tokens,
- # the leading equality or => and the final side comment
-
- my $mid_field = join "",
- @{$old_rfields}[ 1 .. $maximum_field_index - 1 ];
- my $mid_patterns = join "",
- @{$old_rpatterns}[ 1 .. $maximum_field_index - 1 ];
- my @new_alignments = (
- $old_line->get_alignment(0),
- $old_line->get_alignment( $maximum_field_index - 1 )
- );
- my @new_tokens =
- ( $old_rtokens->[0], $old_rtokens->[ $maximum_field_index - 1 ] );
- my @new_fields = (
- $old_rfields->[0], $mid_field, $old_rfields->[$maximum_field_index]
- );
- my @new_patterns = (
- $old_rpatterns->[0], $mid_patterns,
- $old_rpatterns->[$maximum_field_index]
- );
-
- $maximum_field_index = 2;
- $old_line->set_jmax($maximum_field_index);
- $old_line->set_rtokens( \@new_tokens );
- $old_line->set_rfields( \@new_fields );
- $old_line->set_rpatterns( \@new_patterns );
-
- initialize_for_new_group();
- add_to_group($old_line);
- $current_line = $old_line;
- }
- return;
-}
-
-# create an empty side comment if none exists
-sub make_side_comment {
- my ( $new_line, $level_end ) = @_;
- my $jmax = $new_line->get_jmax();
- my $rtokens = $new_line->get_rtokens();
-
- # if line does not have a side comment...
- if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- $rtokens->[$jmax] = '#';
- $rfields->[ ++$jmax ] = '';
- $rpatterns->[$jmax] = '#';
- $new_line->set_jmax($jmax);
- $new_line->set_jmax_original_line($jmax);
- }
-
- # line has a side comment..
- else {
-
- # don't remember old side comment location for very long
- my $line_number = $vertical_aligner_self->get_output_line_number();
- my $rfields = $new_line->get_rfields();
- if (
- $line_number - $last_side_comment_line_number > 12
-
- # and don't remember comment location across block level changes
- || ( $level_end < $last_side_comment_level
- && $rfields->[0] =~ /^}/ )
- )
- {
- forget_side_comment();
- }
- $last_side_comment_line_number = $line_number;
- $last_side_comment_level = $level_end;
- }
- return;
-}
-
-sub decide_if_list {
-
- my $line = shift;
-
- # A list will be taken to be a line with a forced break in which all
- # of the field separators are commas or comma-arrows (except for the
- # trailing #)
-
- # List separator tokens are things like ',3' or '=>2',
- # where the trailing digit is the nesting depth. Allow braces
- # to allow nested list items.
- my $rtokens = $line->get_rtokens();
- my $test_token = $rtokens->[0];
- if ( $test_token =~ /^(\,|=>)/ ) {
- my $list_type = $test_token;
- my $jmax = $line->get_jmax();
-
- foreach ( 1 .. $jmax - 2 ) {
- if ( $rtokens->[$_] !~ /^(\,|=>|\{)/ ) {
- $list_type = "";
- last;
- }
- }
- $line->set_list_type($list_type);
- }
- return;
-}
-
-sub eliminate_new_fields {
-
- my ( $new_line, $old_line ) = @_;
- return unless ( $maximum_line_index >= 0 );
- my $jmax = $new_line->get_jmax();
-
- my $old_rtokens = $old_line->get_rtokens();
- my $rtokens = $new_line->get_rtokens();
- my $is_assignment =
- ( $rtokens->[0] =~ /^=>?\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
-
- # must be monotonic variation
- return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
-
- # must be more fields in the new line
- my $maximum_field_index = $old_line->get_jmax();
- return unless ( $maximum_field_index < $jmax );
-
- unless ($is_assignment) {
- return
- unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
- ; # only if monotonic
-
- # never combine fields of a comma list
- return
- unless ( $maximum_field_index > 1 )
- && ( $new_line->get_list_type() !~ /^,/ );
- }
-
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- my $old_rpatterns = $old_line->get_rpatterns();
-
- # loop over all OLD tokens except comment and check match
- my $match = 1;
- foreach my $k ( 0 .. $maximum_field_index - 2 ) {
- if ( ( $old_rtokens->[$k] ne $rtokens->[$k] )
- || ( $old_rpatterns->[$k] ne $rpatterns->[$k] ) )
- {
- $match = 0;
- last;
- }
- }
-
- # first tokens agree, so combine extra new tokens
- if ($match) {
- ##for my $k ( $maximum_field_index .. $jmax - 1 ) {
- foreach my $k ( $maximum_field_index .. $jmax - 1 ) {
-
- $rfields->[ $maximum_field_index - 1 ] .= $rfields->[$k];
- $rfields->[$k] = "";
- $rpatterns->[ $maximum_field_index - 1 ] .= $rpatterns->[$k];
- $rpatterns->[$k] = "";
- }
-
- $rtokens->[ $maximum_field_index - 1 ] = '#';
- $rfields->[$maximum_field_index] = $rfields->[$jmax];
- $rpatterns->[$maximum_field_index] = $rpatterns->[$jmax];
- $jmax = $maximum_field_index;
- }
- $new_line->set_jmax($jmax);
- return;
-}
-
-sub fix_terminal_ternary {
-
- # Add empty fields as necessary to align a ternary term
- # like this:
- #
- # my $leapyear =
- # $year % 4 ? 0
- # : $year % 100 ? 1
- # : $year % 400 ? 0
- # : 1;
- #
- # returns 1 if the terminal item should be indented
-
- my ( $rfields, $rtokens, $rpatterns ) = @_;
-
- my $jmax = @{$rfields} - 1;
- my $old_line = $group_lines[$maximum_line_index];
- my $rfields_old = $old_line->get_rfields();
-
- my $rpatterns_old = $old_line->get_rpatterns();
- my $rtokens_old = $old_line->get_rtokens();
- my $maximum_field_index = $old_line->get_jmax();
-
- # look for the question mark after the :
- my ($jquestion);
- my $depth_question;
- my $pad = "";
- foreach my $j ( 0 .. $maximum_field_index - 1 ) {
- my $tok = $rtokens_old->[$j];
- if ( $tok =~ /^\?(\d+)$/ ) {
- $depth_question = $1;
-
- # depth must be correct
- next unless ( $depth_question eq $group_level );
-
- $jquestion = $j;
- if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
- $pad = " " x length($1);
- }
- else {
- return; # shouldn't happen
- }
- last;
- }
- }
- return unless ( defined($jquestion) ); # shouldn't happen
-
- # Now splice the tokens and patterns of the previous line
- # into the else line to insure a match. Add empty fields
- # as necessary.
- my $jadd = $jquestion;
-
- # Work on copies of the actual arrays in case we have
- # to return due to an error
- my @fields = @{$rfields};
- my @patterns = @{$rpatterns};
- my @tokens = @{$rtokens};
-
- VALIGN_DEBUG_FLAG_TERNARY && do {
- local $" = '><';
- print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
- print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
- print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
- print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
- print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
- print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
- };
-
- # handle cases of leading colon on this line
- if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
-
- my ( $colon, $therest ) = ( $1, $2 );
-
- # Handle sub-case of first field with leading colon plus additional code
- # This is the usual situation as at the '1' below:
- # ...
- # : $year % 400 ? 0
- # : 1;
- if ($therest) {
-
- # Split the first field after the leading colon and insert padding.
- # Note that this padding will remain even if the terminal value goes
- # out on a separate line. This does not seem to look to bad, so no
- # mechanism has been included to undo it.
- my $field1 = shift @fields;
- unshift @fields, ( $colon, $pad . $therest );
-
- # change the leading pattern from : to ?
- return unless ( $patterns[0] =~ s/^\:/?/ );
-
- # install leading tokens and patterns of existing line
- unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
- unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
-
- # insert appropriate number of empty fields
- splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
- }
-
- # handle sub-case of first field just equal to leading colon.
- # This can happen for example in the example below where
- # the leading '(' would create a new alignment token
- # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
- # : ( $mname = $name . '->' );
- else {
-
- return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
-
- # prepend a leading ? onto the second pattern
- $patterns[1] = "?b" . $patterns[1];
-
- # pad the second field
- $fields[1] = $pad . $fields[1];
-
- # install leading tokens and patterns of existing line, replacing
- # leading token and inserting appropriate number of empty fields
- splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
- splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
- splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
- }
- }
-
- # Handle case of no leading colon on this line. This will
- # be the case when -wba=':' is used. For example,
- # $year % 400 ? 0 :
- # 1;
- else {
-
- # install leading tokens and patterns of existing line
- $patterns[0] = '?' . 'b' . $patterns[0];
- unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
- unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
-
- # insert appropriate number of empty fields
- $jadd = $jquestion + 1;
- $fields[0] = $pad . $fields[0];
- splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
- }
-
- VALIGN_DEBUG_FLAG_TERNARY && do {
- local $" = '><';
- print STDOUT "MODIFIED TOKENS=<@tokens>\n";
- print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
- print STDOUT "MODIFIED FIELDS=<@fields>\n";
- };
-
- # all ok .. update the arrays
- @{$rfields} = @fields;
- @{$rtokens} = @tokens;
- @{$rpatterns} = @patterns;
-
- # force a flush after this line
- return $jquestion;
-}
-
-sub fix_terminal_else {
-
- # Add empty fields as necessary to align a balanced terminal
- # else block to a previous if/elsif/unless block,
- # like this:
- #
- # if ( 1 || $x ) { print "ok 13\n"; }
- # else { print "not ok 13\n"; }
- #
- # returns 1 if the else block should be indented
- #
- my ( $rfields, $rtokens, $rpatterns ) = @_;
- my $jmax = @{$rfields} - 1;
- return unless ( $jmax > 0 );
-
- # check for balanced else block following if/elsif/unless
- my $rfields_old = $current_line->get_rfields();
-
- # TBD: add handling for 'case'
- return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
-
- # look for the opening brace after the else, and extract the depth
- my $tok_brace = $rtokens->[0];
- my $depth_brace;
- if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
-
- # probably: "else # side_comment"
- else { return }
-
- my $rpatterns_old = $current_line->get_rpatterns();
- my $rtokens_old = $current_line->get_rtokens();
- my $maximum_field_index = $current_line->get_jmax();
-
- # be sure the previous if/elsif is followed by an opening paren
- my $jparen = 0;
- my $tok_paren = '(' . $depth_brace;
- my $tok_test = $rtokens_old->[$jparen];
- return unless ( $tok_test eq $tok_paren ); # shouldn't happen
-
- # Now find the opening block brace
- my ($jbrace);
- foreach my $j ( 1 .. $maximum_field_index - 1 ) {
- my $tok = $rtokens_old->[$j];
- if ( $tok eq $tok_brace ) {
- $jbrace = $j;
- last;
- }
- }
- return unless ( defined($jbrace) ); # shouldn't happen
-
- # Now splice the tokens and patterns of the previous line
- # into the else line to insure a match. Add empty fields
- # as necessary.
- my $jadd = $jbrace - $jparen;
- splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
- splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
- splice( @{$rfields}, 1, 0, ('') x $jadd );
-
- # force a flush after this line if it does not follow a case
- if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
- else { return $jbrace }
-}
-
-{ # sub check_match
- my %is_good_alignment;
-
- BEGIN {
-
- # Vertically aligning on certain "good" tokens is usually okay
- # so we can be less restrictive in marginal cases.
- my @q = qw( { ? => = );
- push @q, (',');
- @is_good_alignment{@q} = (1) x scalar(@q);
- }
-
- sub check_match {
-
- # See if the current line matches the current vertical alignment group.
- # If not, flush the current group.
- my ( $new_line, $old_line ) = @_;
-
- # uses global variables:
- # $previous_minimum_jmax_seen
- # $maximum_jmax_seen
- # $maximum_line_index
- # $marginal_match
- my $jmax = $new_line->get_jmax();
- my $maximum_field_index = $old_line->get_jmax();
-
- # flush if this line has too many fields
- if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
-
- # flush if adding this line would make a non-monotonic field count
- if (
- ( $maximum_field_index > $jmax ) # this has too few fields
- && (
- ( $previous_minimum_jmax_seen <
- $jmax ) # and wouldn't be monotonic
- || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
- )
- )
- {
- goto NO_MATCH;
- }
-
- # otherwise see if this line matches the current group
- my $jmax_original_line = $new_line->get_jmax_original_line();
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
- my $rtokens = $new_line->get_rtokens();
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- my $list_type = $new_line->get_list_type();
-
- my $group_list_type = $old_line->get_list_type();
- my $old_rpatterns = $old_line->get_rpatterns();
- my $old_rtokens = $old_line->get_rtokens();
-
- my $jlimit = $jmax - 1;
- if ( $maximum_field_index > $jmax ) {
- $jlimit = $jmax_original_line;
- --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
- }
-
- # handle comma-separated lists ..
- if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
- for my $j ( 0 .. $jlimit ) {
- my $old_tok = $old_rtokens->[$j];
- next unless $old_tok;
- my $new_tok = $rtokens->[$j];
- next unless $new_tok;
-
- # lists always match ...
- # unless they would align any '=>'s with ','s
- goto NO_MATCH
- if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
- || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
- }
- }
-
- # do detailed check for everything else except hanging side comments
- elsif ( !$is_hanging_side_comment ) {
-
- my $leading_space_count = $new_line->get_leading_space_count();
-
- my $max_pad = 0;
- my $min_pad = 0;
- my $saw_good_alignment;
-
- for my $j ( 0 .. $jlimit ) {
-
- my $old_tok = $old_rtokens->[$j];
- my $new_tok = $rtokens->[$j];
-
- # Note on encoding used for alignment tokens:
- # -------------------------------------------
- # Tokens are "decorated" with information which can help
- # prevent unwanted alignments. Consider for example the
- # following two lines:
- # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
- # local ( $i, $f ) = &'bdiv( $xn, $xd );
- # There are three alignment tokens in each line, a comma,
- # an =, and a comma. In the first line these three tokens
- # are encoded as:
- # ,4+local-18 =3 ,4+split-7
- # and in the second line they are encoded as
- # ,4+local-18 =3 ,4+&'bdiv-8
- # Tokens always at least have token name and nesting
- # depth. So in this example the ='s are at depth 3 and
- # the ,'s are at depth 4. This prevents aligning tokens
- # of different depths. Commas contain additional
- # information, as follows:
- # , {depth} + {container name} - {spaces to opening paren}
- # This allows us to reject matching the rightmost commas
- # in the above two lines, since they are for different
- # function calls. This encoding is done in
- # 'sub send_lines_to_vertical_aligner'.
-
- # Pick off actual token.
- # Everything up to the first digit is the actual token.
- my $alignment_token = $new_tok;
- if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
-
- # see if the decorated tokens match
- my $tokens_match = $new_tok eq $old_tok
-
- # Exception for matching terminal : of ternary statement..
- # consider containers prefixed by ? and : a match
- || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
-
- # No match if the alignment tokens differ...
- if ( !$tokens_match ) {
-
- # ...Unless this is a side comment
- if (
- $j == $jlimit
-
- # and there is either at least one alignment token
- # or this is a single item following a list. This
- # latter rule is required for 'December' to join
- # the following list:
- # my (@months) = (
- # '', 'January', 'February', 'March',
- # 'April', 'May', 'June', 'July',
- # 'August', 'September', 'October', 'November',
- # 'December'
- # );
- # If it doesn't then the -lp formatting will fail.
- && ( $j > 0 || $old_tok =~ /^,/ )
- )
- {
- $marginal_match = 1
- if ( $marginal_match == 0
- && $maximum_line_index == 0 );
- last;
- }
-
- goto NO_MATCH;
- }
-
- # Calculate amount of padding required to fit this in.
- # $pad is the number of spaces by which we must increase
- # the current field to squeeze in this field.
- my $pad =
- length( $rfields->[$j] ) - $old_line->current_field_width($j);
- if ( $j == 0 ) { $pad += $leading_space_count; }
-
- # remember max pads to limit marginal cases
- if ( $alignment_token ne '#' ) {
- if ( $pad > $max_pad ) { $max_pad = $pad }
- if ( $pad < $min_pad ) { $min_pad = $pad }
- }
- if ( $is_good_alignment{$alignment_token} ) {
- $saw_good_alignment = 1;
- }
-
- # If patterns don't match, we have to be careful...
- if ( $old_rpatterns->[$j] ne $rpatterns->[$j] ) {
-
- # flag this as a marginal match since patterns differ
- $marginal_match = 1
- if ( $marginal_match == 0 && $maximum_line_index == 0 );
-
- # We have to be very careful about aligning commas
- # when the pattern's don't match, because it can be
- # worse to create an alignment where none is needed
- # than to omit one. Here's an example where the ','s
- # are not in named containers. The first line below
- # should not match the next two:
- # ( $a, $b ) = ( $b, $r );
- # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
- # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
- if ( $alignment_token eq ',' ) {
-
- # do not align commas unless they are in named containers
- goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
- }
-
- # do not align parens unless patterns match;
- # large ugly spaces can occur in math expressions.
- elsif ( $alignment_token eq '(' ) {
-
- # But we can allow a match if the parens don't
- # require any padding.
- if ( $pad != 0 ) { goto NO_MATCH }
- }
-
- # Handle an '=' alignment with different patterns to
- # the left.
- elsif ( $alignment_token eq '=' ) {
-
- # It is best to be a little restrictive when
- # aligning '=' tokens. Here is an example of
- # two lines that we will not align:
- # my $variable=6;
- # $bb=4;
- # The problem is that one is a 'my' declaration,
- # and the other isn't, so they're not very similar.
- # We will filter these out by comparing the first
- # letter of the pattern. This is crude, but works
- # well enough.
- if (
- substr( $old_rpatterns->[$j], 0, 1 ) ne
- substr( $rpatterns->[$j], 0, 1 ) )
- {
- goto NO_MATCH;
- }
-
- # If we pass that test, we'll call it a marginal match.
- # Here is an example of a marginal match:
- # $done{$$op} = 1;
- # $op = compile_bblock($op);
- # The left tokens are both identifiers, but
- # one accesses a hash and the other doesn't.
- # We'll let this be a tentative match and undo
- # it later if we don't find more than 2 lines
- # in the group.
- elsif ( $maximum_line_index == 0 ) {
- $marginal_match =
- 2; # =2 prevents being undone below
- }
- }
- }
-
- # Don't let line with fewer fields increase column widths
- # ( align3.t )
- if ( $maximum_field_index > $jmax ) {
-
- # Exception: suspend this rule to allow last lines to join
- if ( $pad > 0 ) { goto NO_MATCH; }
- }
- } ## end for my $j ( 0 .. $jlimit)
-
- # Turn off the "marginal match" flag in some cases...
- # A "marginal match" occurs when the alignment tokens agree
- # but there are differences in the other tokens (patterns).
- # If we leave the marginal match flag set, then the rule is that we
- # will align only if there are more than two lines in the group.
- # We will turn of the flag if we almost have a match
- # and either we have seen a good alignment token or we
- # just need a small pad (2 spaces) to fit. These rules are
- # the result of experimentation. Tokens which misaligned by just
- # one or two characters are annoying. On the other hand,
- # large gaps to less important alignment tokens are also annoying.
- if ( $marginal_match == 1
- && $jmax == $maximum_field_index
- && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
- )
- {
- $marginal_match = 0;
- }
- ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
- }
-
- # We have a match (even if marginal).
- # If the current line has fewer fields than the current group
- # but otherwise matches, copy the remaining group fields to
- # make it a perfect match.
- if ( $maximum_field_index > $jmax ) {
-
- ##########################################################
- # FIXME: The previous version had a bug which made side comments
- # become regular fields, so for now the program does not allow a
- # line with side comment to match. This should eventually be done.
- # The best test file for experimenting is 'lista.t'
- ##########################################################
-
- my $comment = $rfields->[$jmax];
- goto NO_MATCH if ($comment);
-
- # Corrected loop
- for my $jj ( $jlimit .. $maximum_field_index ) {
- $rtokens->[$jj] = $old_rtokens->[$jj];
- $rfields->[ $jj + 1 ] = '';
- $rpatterns->[ $jj + 1 ] = $old_rpatterns->[ $jj + 1 ];
- }
-
-## THESE DO NOT GIVE CORRECT RESULTS
-## $rfields->[$jmax] = $comment;
-## $new_line->set_jmax($jmax);
-
- }
- return;
-
- NO_MATCH:
- ##print "no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$old_rtokens->[0]\n";
- my_flush();
- return;
- }
-}
-
-sub check_fit {
-
- my ( $new_line, $old_line ) = @_;
- return unless ( $maximum_line_index >= 0 );
-
- my $jmax = $new_line->get_jmax();
- my $leading_space_count = $new_line->get_leading_space_count();
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
- my $rtokens = $new_line->get_rtokens();
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
-
- my $group_list_type = $group_lines[0]->get_list_type();
-
- my $padding_so_far = 0;
- my $padding_available = $old_line->get_available_space_on_right();
-
- # save current columns in case this doesn't work
- save_alignment_columns();
-
- my $maximum_field_index = $old_line->get_jmax();
- for my $j ( 0 .. $jmax ) {
-
- my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j);
-
- if ( $j == 0 ) {
- $pad += $leading_space_count;
- }
-
- # remember largest gap of the group, excluding gap to side comment
- if ( $pad < 0
- && $group_maximum_gap < -$pad
- && $j > 0
- && $j < $jmax - 1 )
- {
- $group_maximum_gap = -$pad;
- }
-
- next if $pad < 0;
-
- ## OLD NOTES:
- ## This patch helps sometimes, but it doesn't check to see if
- ## the line is too long even without the side comment. It needs
- ## to be reworked.
- ##don't let a long token with no trailing side comment push
- ##side comments out, or end a group. (sidecmt1.t)
- ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0);
-
- # BEGIN PATCH for keith1.txt.
- # If the group began matching multiple tokens but later this got
- # reduced to a fewer number of matching tokens, then the fields
- # of the later lines will still have to fit into their corresponding
- # fields. So a large later field will "push" the other fields to
- # the right, including previous side comments, and if there is no room
- # then there is no match.
- # For example, look at the last line in the following snippet:
-
- # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true : false;
- # my $env = ($b_prod_db) ? "prd" : "val";
- # my $plant = ( $OPT{p} ) ? $OPT{p} : "STL";
- # my $task = $OPT{t};
- # my $fnam = "longggggggggggggggg.$record_created.$env.$plant.idash";
-
- # The long term will push the '?' to the right to fit in, and in this
- # case there is not enough room so it will not match the equals unless
- # we do something special.
-
- # Usually it looks good to keep an initial alignment of '=' going, and
- # we can do this if the long term can fit in the space taken up by the
- # remaining fields (the ? : fields here).
-
- # Allowing any matching token for now, but it could be restricted
- # to an '='-like token if necessary.
-
- if (
- $pad > $padding_available
- && $jmax == 2 # matching one thing (plus #)
- && $j == $jmax - 1 # at last field
- && $maximum_line_index > 0 # more than 1 line in group now
- && $jmax < $maximum_field_index # other lines have more fields
- && length( $rfields->[$jmax] ) == 0 # no side comment
-
- # Uncomment to match only equals (but this does not seem necessary)
- # && $rtokens->[0] =~ /^=\d/ # matching an equals
- )
- {
- my $extra_padding = 0;
- foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) {
- $extra_padding += $old_line->current_field_width($jj);
- }
-
- next if ( $pad <= $padding_available + $extra_padding );
- }
-
- # END PATCH for keith1.pl
-
- # This line will need space; lets see if we want to accept it..
- if (
-
- # not if this won't fit
- ( $pad > $padding_available )
-
- # previously, there were upper bounds placed on padding here
- # (maximum_whitespace_columns), but they were not really helpful
-
- )
- {
-
- # revert to starting state then flush; things didn't work out
- restore_alignment_columns();
- my_flush();
- last;
- }
-
- # patch to avoid excessive gaps in previous lines,
- # due to a line of fewer fields.
- # return join( ".",
- # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
- # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
- next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
-
- # looks ok, squeeze this field in
- $old_line->increase_field_width( $j, $pad );
- $padding_available -= $pad;
-
- # remember largest gap of the group, excluding gap to side comment
- if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
- $group_maximum_gap = $pad;
- }
- }
- return;
-}
-
-sub add_to_group {
-
- # The current line either starts a new alignment group or is
- # accepted into the current alignment group.
- my $new_line = shift;
- $group_lines[ ++$maximum_line_index ] = $new_line;
-
- # initialize field lengths if starting new group
- if ( $maximum_line_index == 0 ) {
-
- my $jmax = $new_line->get_jmax();
- my $rfields = $new_line->get_rfields();
- my $rtokens = $new_line->get_rtokens();
- my $col = $new_line->get_leading_space_count();
-
- for my $j ( 0 .. $jmax ) {
- $col += length( $rfields->[$j] );
-
- # create initial alignments for the new group
- my $token = "";
- if ( $j < $jmax ) { $token = $rtokens->[$j] }
- my $alignment = make_alignment( $col, $token );
- $new_line->set_alignment( $j, $alignment );
- }
-
- $maximum_jmax_seen = $jmax;
- $minimum_jmax_seen = $jmax;
- }
-
- # use previous alignments otherwise
- else {
- my @new_alignments =
- $group_lines[ $maximum_line_index - 1 ]->get_alignments();
- $new_line->set_alignments(@new_alignments);
- }
-
- # remember group jmax extremes for next call to valign_input
- $previous_minimum_jmax_seen = $minimum_jmax_seen;
- $previous_maximum_jmax_seen = $maximum_jmax_seen;
- return;
-}
-
-sub dump_array {
-
- # debug routine to dump array contents
- local $" = ')(';
- print STDOUT "(@_)\n";
- return;
-}
-
-# flush() sends the current Perl::Tidy::VerticalAligner group down the
-# pipeline to Perl::Tidy::FileWriter.
-
-# This is the external flush, which also empties the buffer and cache
-sub flush {
-
- # the buffer must be emptied first, then any cached text
- dump_valign_buffer();
-
- if ( $maximum_line_index < 0 ) {
- if ($cached_line_type) {
- $seqno_string = $cached_seqno_string;
- valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
- $cached_line_type = 0;
- $cached_line_text = "";
- $cached_seqno_string = "";
- }
- }
- else {
- my_flush();
- }
- return;
-}
-
-sub reduce_valign_buffer_indentation {
-
- my ($diff) = @_;
- if ( $valign_buffer_filling && $diff ) {
- my $max_valign_buffer = @valign_buffer;
- foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
- my ( $line, $leading_space_count, $level ) =
- @{ $valign_buffer[$i] };
- my $ws = substr( $line, 0, $diff );
- if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
- $line = substr( $line, $diff );
- }
- if ( $leading_space_count >= $diff ) {
- $leading_space_count -= $diff;
- $level = level_change( $leading_space_count, $diff, $level );
- }
- $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
- }
- }
- return;
-}
-
-sub level_change {
-
- # compute decrease in level when we remove $diff spaces from the
- # leading spaces
- my ( $leading_space_count, $diff, $level ) = @_;
- if ($rOpts_indent_columns) {
- my $olev =
- int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
- my $nlev = int( $leading_space_count / $rOpts_indent_columns );
- $level -= ( $olev - $nlev );
- if ( $level < 0 ) { $level = 0 }
- }
- return $level;
-}
-
-sub dump_valign_buffer {
- if (@valign_buffer) {
- foreach (@valign_buffer) {
- valign_output_step_D( @{$_} );
- }
- @valign_buffer = ();
- }
- $valign_buffer_filling = "";
- return;
-}
-
-# This is the internal flush, which leaves the cache intact
-sub my_flush {
-
- return if ( $maximum_line_index < 0 );
-
- # handle a group of comment lines
- if ( $group_type eq 'COMMENT' ) {
-
- VALIGN_DEBUG_FLAG_APPEND0 && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
-
- };
- my $leading_space_count = $comment_leading_space_count;
- my $leading_string = get_leading_string($leading_space_count);
-
- # zero leading space count if any lines are too long
- my $max_excess = 0;
- for my $i ( 0 .. $maximum_line_index ) {
- my $str = $group_lines[$i];
- my $excess =
- length($str) +
- $leading_space_count -
- maximum_line_length_for_level($group_level);
- if ( $excess > $max_excess ) {
- $max_excess = $excess;
- }
- }
-
- if ( $max_excess > 0 ) {
- $leading_space_count -= $max_excess;
- if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
- $last_outdented_line_at =
- $file_writer_object->get_output_line_number();
- unless ($outdented_line_count) {
- $first_outdented_line_at = $last_outdented_line_at;
- }
- $outdented_line_count += ( $maximum_line_index + 1 );
- }
-
- # write the group of lines
- my $outdent_long_lines = 0;
- for my $i ( 0 .. $maximum_line_index ) {
- valign_output_step_B( $leading_space_count, $group_lines[$i], 0,
- $outdent_long_lines, "", $group_level );
- }
- }
-
- # handle a group of code lines
- else {
-
- VALIGN_DEBUG_FLAG_APPEND0 && do {
- my $group_list_type = $group_lines[0]->get_list_type();
- my ( $a, $b, $c ) = caller();
- my $maximum_field_index = $group_lines[0]->get_jmax();
- print STDOUT
-"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
-
- };
-
- # some small groups are best left unaligned
- my $do_not_align = decide_if_aligned();
-
- # optimize side comment location
- $do_not_align = adjust_side_comment($do_not_align);
-
- # recover spaces for -lp option if possible
- my $extra_leading_spaces = get_extra_leading_spaces();
-
- # all lines of this group have the same basic leading spacing
- my $group_leader_length = $group_lines[0]->get_leading_space_count();
-
- # add extra leading spaces if helpful
- # NOTE: Use zero; this did not work well
- my $min_ci_gap = 0;
-
- # loop to output all lines
- for my $i ( 0 .. $maximum_line_index ) {
- my $line = $group_lines[$i];
- valign_output_step_A( $line, $min_ci_gap, $do_not_align,
- $group_leader_length, $extra_leading_spaces );
- }
- }
- initialize_for_new_group();
- return;
-}
-
-sub decide_if_aligned {
-
- # Do not try to align two lines which are not really similar
- return unless $maximum_line_index == 1;
- return if ($is_matching_terminal_line);
-
- my $group_list_type = $group_lines[0]->get_list_type();
-
- my $do_not_align = (
-
- # always align lists
- !$group_list_type
-
- && (
-
- # don't align if it was just a marginal match
- $marginal_match
-
- # don't align two lines with big gap
- || $group_maximum_gap > 12
-
- # or lines with differing number of alignment tokens
- # TODO: this could be improved. It occasionally rejects
- # good matches.
- || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
- )
- );
-
- # But try to convert them into a simple comment group if the first line
- # a has side comment
- my $rfields = $group_lines[0]->get_rfields();
- my $maximum_field_index = $group_lines[0]->get_jmax();
- if ( $do_not_align
- && ( $maximum_line_index > 0 )
- && ( length( $rfields->[$maximum_field_index] ) > 0 ) )
- {
- combine_fields();
- $do_not_align = 0;
- }
- return $do_not_align;
-}
-
-sub adjust_side_comment {
-
- my $do_not_align = shift;
-
- # let's see if we can move the side comment field out a little
- # to improve readability (the last field is always a side comment field)
- my $have_side_comment = 0;
- my $first_side_comment_line = -1;
- my $maximum_field_index = $group_lines[0]->get_jmax();
- for my $i ( 0 .. $maximum_line_index ) {
- my $line = $group_lines[$i];
-
- if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
- $have_side_comment = 1;
- $first_side_comment_line = $i;
- last;
- }
- }
-
- my $kmax = $maximum_field_index + 1;
-
- if ($have_side_comment) {
-
- my $line = $group_lines[0];
-
- # the maximum space without exceeding the line length:
- my $avail = $line->get_available_space_on_right();
-
- # try to use the previous comment column
- my $side_comment_column = $line->get_column( $kmax - 2 );
- my $move = $last_comment_column - $side_comment_column;
-
-## my $sc_line0 = $side_comment_history[0]->[0];
-## my $sc_col0 = $side_comment_history[0]->[1];
-## my $sc_line1 = $side_comment_history[1]->[0];
-## my $sc_col1 = $side_comment_history[1]->[1];
-## my $sc_line2 = $side_comment_history[2]->[0];
-## my $sc_col2 = $side_comment_history[2]->[1];
-##
-## # FUTURE UPDATES:
-## # Be sure to ignore 'do not align' and '} # end comments'
-## # Find first $move > 0 and $move <= $avail as follows:
-## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
-## # 2. try sc_col2 if (line-sc_line2) < 12
-## # 3. try min possible space, plus up to 8,
-## # 4. try min possible space
-
- if ( $kmax > 0 && !$do_not_align ) {
-
- # but if this doesn't work, give up and use the minimum space
- if ( $move > $avail ) {
- $move = $rOpts_minimum_space_to_comment - 1;
- }
-
- # but we want some minimum space to the comment
- my $min_move = $rOpts_minimum_space_to_comment - 1;
- if ( $move >= 0
- && $last_side_comment_length > 0
- && ( $first_side_comment_line == 0 )
- && $group_level == $last_level_written )
- {
- $min_move = 0;
- }
-
- if ( $move < $min_move ) {
- $move = $min_move;
- }
-
- # previously, an upper bound was placed on $move here,
- # (maximum_space_to_comment), but it was not helpful
-
- # don't exceed the available space
- if ( $move > $avail ) { $move = $avail }
-
- # we can only increase space, never decrease
- if ( $move > 0 ) {
- $line->increase_field_width( $maximum_field_index - 1, $move );
- }
-
- # remember this column for the next group
- $last_comment_column = $line->get_column( $kmax - 2 );
- }
- else {
-
- # try to at least line up the existing side comment location
- if ( $kmax > 0 && $move > 0 && $move < $avail ) {
- $line->increase_field_width( $maximum_field_index - 1, $move );
- $do_not_align = 0;
- }
-
- # reset side comment column if we can't align
- else {
- forget_side_comment();
- }
- }
- }
- return $do_not_align;
-}
-
-sub valign_output_step_A {
-
- ###############################################################
- # This is Step A in writing vertically aligned lines.
- # The line is prepared according to the alignments which have
- # been found and shipped to the next step.
- ###############################################################
-
- my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
- $extra_leading_spaces )
- = @_;
- my $rfields = $line->get_rfields();
- my $leading_space_count = $line->get_leading_space_count();
- my $outdent_long_lines = $line->get_outdent_long_lines();
- my $maximum_field_index = $line->get_jmax();
- my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
-
- # add any extra spaces
- if ( $leading_space_count > $group_leader_length ) {
- $leading_space_count += $min_ci_gap;
- }
-
- my $str = $rfields->[0];
-
- # loop to concatenate all fields of this line and needed padding
- my $total_pad_count = 0;
- for my $j ( 1 .. $maximum_field_index ) {
-
- # skip zero-length side comments
- last
- if (
- ( $j == $maximum_field_index )
- && ( !defined( $rfields->[$j] )
- || ( length( $rfields->[$j] ) == 0 ) )
- );
-
- # compute spaces of padding before this field
- my $col = $line->get_column( $j - 1 );
- my $pad = $col - ( length($str) + $leading_space_count );
-
- if ($do_not_align) {
- $pad =
- ( $j < $maximum_field_index )
- ? 0
- : $rOpts_minimum_space_to_comment - 1;
- }
-
- # if the -fpsc flag is set, move the side comment to the selected
- # column if and only if it is possible, ignoring constraints on
- # line length and minimum space to comment
- if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
- {
- my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
- if ( $newpad >= 0 ) { $pad = $newpad; }
- }
-
- # accumulate the padding
- if ( $pad > 0 ) { $total_pad_count += $pad; }
-
- # add this field
- if ( !defined $rfields->[$j] ) {
- write_diagnostics("UNDEFined field at j=$j\n");
- }
-
- # only add padding when we have a finite field;
- # this avoids extra terminal spaces if we have empty fields
- if ( length( $rfields->[$j] ) > 0 ) {
- $str .= ' ' x $total_pad_count;
- $total_pad_count = 0;
- $str .= $rfields->[$j];
- }
- else {
- $total_pad_count = 0;
- }
-
- # update side comment history buffer
- if ( $j == $maximum_field_index ) {
- my $lineno = $file_writer_object->get_output_line_number();
- shift @side_comment_history;
- push @side_comment_history, [ $lineno, $col ];
- }
- }
-
- my $side_comment_length = ( length( $rfields->[$maximum_field_index] ) );
-
- # ship this line off
- valign_output_step_B( $leading_space_count + $extra_leading_spaces,
- $str, $side_comment_length, $outdent_long_lines,
- $rvertical_tightness_flags, $group_level );
- return;
-}
-
-sub get_extra_leading_spaces {
-
- #----------------------------------------------------------
- # Define any extra indentation space (for the -lp option).
- # Here is why:
- # If a list has side comments, sub scan_list must dump the
- # list before it sees everything. When this happens, it sets
- # the indentation to the standard scheme, but notes how
- # many spaces it would have liked to use. We may be able
- # to recover that space here in the event that all of the
- # lines of a list are back together again.
- #----------------------------------------------------------
-
- my $extra_leading_spaces = 0;
- if ($extra_indent_ok) {
- my $object = $group_lines[0]->get_indentation();
- if ( ref($object) ) {
- my $extra_indentation_spaces_wanted =
- get_recoverable_spaces($object);
-
- # all indentation objects must be the same
- for my $i ( 1 .. $maximum_line_index ) {
- if ( $object != $group_lines[$i]->get_indentation() ) {
- $extra_indentation_spaces_wanted = 0;
- last;
- }
- }
-
- if ($extra_indentation_spaces_wanted) {
-
- # the maximum space without exceeding the line length:
- my $avail = $group_lines[0]->get_available_space_on_right();
- $extra_leading_spaces =
- ( $avail > $extra_indentation_spaces_wanted )
- ? $extra_indentation_spaces_wanted
- : $avail;
-
- # update the indentation object because with -icp the terminal
- # ');' will use the same adjustment.
- $object->permanently_decrease_available_spaces(
- -$extra_leading_spaces );
- }
- }
- }
- return $extra_leading_spaces;
-}
-
-sub combine_fields {
-
- # combine all fields except for the comment field ( sidecmt.t )
- # Uses global variables:
- # @group_lines
- # $maximum_line_index
- my $maximum_field_index = $group_lines[0]->get_jmax();
- foreach my $j ( 0 .. $maximum_line_index ) {
- my $line = $group_lines[$j];
- my $rfields = $line->get_rfields();
- foreach ( 1 .. $maximum_field_index - 1 ) {
- $rfields->[0] .= $rfields->[$_];
- }
- $rfields->[1] = $rfields->[$maximum_field_index];
-
- $line->set_jmax(1);
- $line->set_column( 0, 0 );
- $line->set_column( 1, 0 );
-
- }
- $maximum_field_index = 1;
-
- for my $j ( 0 .. $maximum_line_index ) {
- my $line = $group_lines[$j];
- my $rfields = $line->get_rfields();
- for my $k ( 0 .. $maximum_field_index ) {
- my $pad = length( $rfields->[$k] ) - $line->current_field_width($k);
- if ( $k == 0 ) {
- $pad += $group_lines[$j]->get_leading_space_count();
- }
-
- if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
-
- }
- }
- return;
-}
-
-sub get_output_line_number {
-
- # the output line number reported to a caller is the number of items
- # written plus the number of items in the buffer
- my $self = shift;
- return 1 + $maximum_line_index +
- $file_writer_object->get_output_line_number();
-}
-
-sub valign_output_step_B {
-
- ###############################################################
- # This is Step B in writing vertically aligned lines.
- # Vertical tightness is applied according to preset flags.
- # In particular this routine handles stacking of opening
- # and closing tokens.
- ###############################################################
-
- my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
- $rvertical_tightness_flags, $level )
- = @_;
-
- # handle outdenting of long lines:
- if ($outdent_long_lines) {
- my $excess =
- length($str) -
- $side_comment_length +
- $leading_space_count -
- maximum_line_length_for_level($level);
- if ( $excess > 0 ) {
- $leading_space_count = 0;
- $last_outdented_line_at =
- $file_writer_object->get_output_line_number();
-
- unless ($outdented_line_count) {
- $first_outdented_line_at = $last_outdented_line_at;
- }
- $outdented_line_count++;
- }
- }
-
- # Make preliminary leading whitespace. It could get changed
- # later by entabbing, so we have to keep track of any changes
- # to the leading_space_count from here on.
- my $leading_string =
- $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
-
- # Unpack any recombination data; it was packed by
- # sub send_lines_to_vertical_aligner. Contents:
- #
- # [0] type: 1=opening non-block 2=closing non-block
- # 3=opening block brace 4=closing block brace
- # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
- # if closing: spaces of padding to use
- # [2] sequence number of container
- # [3] valid flag: do not append if this flag is false
- #
- my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
- $seqno_end );
- if ($rvertical_tightness_flags) {
- (
- $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
- $seqno_end
- ) = @{$rvertical_tightness_flags};
- }
-
- $seqno_string = $seqno_end;
-
- # handle any cached line ..
- # either append this line to it or write it out
- if ( length($cached_line_text) ) {
-
- # Dump an invalid cached line
- if ( !$cached_line_valid ) {
- valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
- }
-
- # Handle cached line ending in OPENING tokens
- elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
-
- my $gap = $leading_space_count - length($cached_line_text);
-
- # handle option of just one tight opening per line:
- if ( $cached_line_flag == 1 ) {
- if ( defined($open_or_close) && $open_or_close == 1 ) {
- $gap = -1;
- }
- }
-
- if ( $gap >= 0 && defined($seqno_beg) ) {
- $leading_string = $cached_line_text . ' ' x $gap;
- $leading_space_count = $cached_line_leading_space_count;
- $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
- $level = $last_level_written;
- }
- else {
- valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
- }
- }
-
- # Handle cached line ending in CLOSING tokens
- else {
- my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
- if (
-
- # The new line must start with container
- $seqno_beg
-
- # The container combination must be okay..
- && (
-
- # okay to combine like types
- ( $open_or_close == $cached_line_type )
-
- # closing block brace may append to non-block
- || ( $cached_line_type == 2 && $open_or_close == 4 )
-
- # something like ');'
- || ( !$open_or_close && $cached_line_type == 2 )
-
- )
-
- # The combined line must fit
- && (
- length($test_line) <=
- maximum_line_length_for_level($last_level_written) )
- )
- {
-
- $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
-
- # Patch to outdent closing tokens ending # in ');'
- # If we are joining a line like ');' to a previous stacked
- # set of closing tokens, then decide if we may outdent the
- # combined stack to the indentation of the ');'. Since we
- # should not normally outdent any of the other tokens more than
- # the indentation of the lines that contained them, we will
- # only do this if all of the corresponding opening
- # tokens were on the same line. This can happen with
- # -sot and -sct. For example, it is ok here:
- # __PACKAGE__->load_components( qw(
- # PK::Auto
- # Core
- # ));
- #
- # But, for example, we do not outdent in this example because
- # that would put the closing sub brace out farther than the
- # opening sub brace:
- #
- # perltidy -sot -sct
- # $c->Tk::bind(
- # '<Control-f>' => sub {
- # my ($c) = @_;
- # my $e = $c->XEvent;
- # itemsUnderArea $c;
- # } );
- #
- if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
-
- # The way to tell this is if the stacked sequence numbers
- # of this output line are the reverse of the stacked
- # sequence numbers of the previous non-blank line of
- # sequence numbers. So we can join if the previous
- # nonblank string of tokens is the mirror image. For
- # example if stack )}] is 13:8:6 then we are looking for a
- # leading stack like [{( which is 6:8:13 We only need to
- # check the two ends, because the intermediate tokens must
- # fall in order. Note on speed: having to split on colons
- # and eliminate multiple colons might appear to be slow,
- # but it's not an issue because we almost never come
- # through here. In a typical file we don't.
- $seqno_string =~ s/^:+//;
- $last_nonblank_seqno_string =~ s/^:+//;
- $seqno_string =~ s/:+/:/g;
- $last_nonblank_seqno_string =~ s/:+/:/g;
-
- # how many spaces can we outdent?
- my $diff =
- $cached_line_leading_space_count - $leading_space_count;
- if ( $diff > 0
- && length($seqno_string)
- && length($last_nonblank_seqno_string) ==
- length($seqno_string) )
- {
- my @seqno_last =
- ( split ':', $last_nonblank_seqno_string );
- my @seqno_now = ( split ':', $seqno_string );
- if ( $seqno_now[-1] == $seqno_last[0]
- && $seqno_now[0] == $seqno_last[-1] )
- {
-
- # OK to outdent ..
- # for absolute safety, be sure we only remove
- # whitespace
- my $ws = substr( $test_line, 0, $diff );
- if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
-
- $test_line = substr( $test_line, $diff );
- $cached_line_leading_space_count -= $diff;
- $last_level_written =
- level_change(
- $cached_line_leading_space_count,
- $diff, $last_level_written );
- reduce_valign_buffer_indentation($diff);
- }
-
- # shouldn't happen, but not critical:
- ##else {
- ## ERROR transferring indentation here
- ##}
- }
- }
- }
-
- $str = $test_line;
- $leading_string = "";
- $leading_space_count = $cached_line_leading_space_count;
- $level = $last_level_written;
- }
- else {
- valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
- }
- }
- }
- $cached_line_type = 0;
- $cached_line_text = "";
-
- # make the line to be written
- my $line = $leading_string . $str;
-
- # write or cache this line
- if ( !$open_or_close || $side_comment_length > 0 ) {
- valign_output_step_C( $line, $leading_space_count, $level );
- }
- else {
- $cached_line_text = $line;
- $cached_line_type = $open_or_close;
- $cached_line_flag = $tightness_flag;
- $cached_seqno = $seqno;
- $cached_line_valid = $valid;
- $cached_line_leading_space_count = $leading_space_count;
- $cached_seqno_string = $seqno_string;
- }
-
- $last_level_written = $level;
- $last_side_comment_length = $side_comment_length;
- $extra_indent_ok = 0;
- return;
-}
-
-sub valign_output_step_C {
-
- ###############################################################
- # This is Step C in writing vertically aligned lines.
- # Lines are either stored in a buffer or passed along to the next step.
- # The reason for storing lines is that we may later want to reduce their
- # indentation when -sot and -sct are both used.
- ###############################################################
- my @args = @_;
-
- # Dump any saved lines if we see a line with an unbalanced opening or
- # closing token.
- dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
-
- # Either store or write this line
- if ($valign_buffer_filling) {
- push @valign_buffer, [@args];
- }
- else {
- valign_output_step_D(@args);
- }
-
- # For lines starting or ending with opening or closing tokens..
- if ($seqno_string) {
- $last_nonblank_seqno_string = $seqno_string;
-
- # Start storing lines when we see a line with multiple stacked opening
- # tokens.
- # patch for RT #94354, requested by Colin Williams
- if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
- {
-
- # This test is efficient but a little subtle: The first test says
- # that we have multiple sequence numbers and hence multiple opening
- # or closing tokens in this line. The second part of the test
- # rejects stacked closing and ternary tokens. So if we get here
- # then we should have stacked unbalanced opening tokens.
-
- # Here is a complex example:
-
- # Foo($Bar[0], { # (side comment)
- # baz => 1,
- # });
-
- # The first line has sequence 6::4. It does not begin with
- # a closing token or ternary, so it passes the test and must be
- # stacked opening tokens.
-
- # The last line has sequence 4:6 but is a stack of closing tokens,
- # so it gets rejected.
-
- # Note that the sequence number of an opening token for a qw quote
- # is a negative number and will be rejected.
- # For example, for the following line:
- # skip_symbols([qw(
- # $seqno_string='10:5:-1'. It would be okay to accept it but
- # I decided not to do this after testing.
-
- $valign_buffer_filling = $seqno_string;
-
- }
- }
- return;
-}
-
-sub valign_output_step_D {
-
- ###############################################################
- # This is Step D in writing vertically aligned lines.
- # Write one vertically aligned line of code to the output object.
- ###############################################################
-
- my ( $line, $leading_space_count, $level ) = @_;
-
- # The line is currently correct if there is no tabbing (recommended!)
- # We may have to lop off some leading spaces and replace with tabs.
- if ( $leading_space_count > 0 ) {
-
- # Nothing to do if no tabs
- if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
- || $rOpts_indent_columns <= 0 )
- {
-
- # nothing to do
- }
-
- # Handle entab option
- elsif ($rOpts_entab_leading_whitespace) {
- my $space_count =
- $leading_space_count % $rOpts_entab_leading_whitespace;
- my $tab_count =
- int( $leading_space_count / $rOpts_entab_leading_whitespace );
- my $leading_string = "\t" x $tab_count . ' ' x $space_count;
- if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
- substr( $line, 0, $leading_space_count ) = $leading_string;
- }
- else {
-
- # shouldn't happen - program error counting whitespace
- # - skip entabbing
- VALIGN_DEBUG_FLAG_TABS
- && warning(
-"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
- );
- }
- }
-
- # Handle option of one tab per level
- else {
- my $leading_string = ( "\t" x $level );
- my $space_count =
- $leading_space_count - $level * $rOpts_indent_columns;
-
- # shouldn't happen:
- if ( $space_count < 0 ) {
-
- # But it could be an outdented comment
- if ( $line !~ /^\s*#/ ) {
- VALIGN_DEBUG_FLAG_TABS
- && warning(
-"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
- );
- }
- $leading_string = ( ' ' x $leading_space_count );
- }
- else {
- $leading_string .= ( ' ' x $space_count );
- }
- if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
- substr( $line, 0, $leading_space_count ) = $leading_string;
- }
- else {
-
- # shouldn't happen - program error counting whitespace
- # we'll skip entabbing
- VALIGN_DEBUG_FLAG_TABS
- && warning(
-"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
- );
- }
- }
- }
- $file_writer_object->write_code_line( $line . "\n" );
- return;
-}
-
-{ # begin get_leading_string
-
- my @leading_string_cache;
-
- sub get_leading_string {
-
- # define the leading whitespace string for this line..
- my $leading_whitespace_count = shift;
-
- # Handle case of zero whitespace, which includes multi-line quotes
- # (which may have a finite level; this prevents tab problems)
- if ( $leading_whitespace_count <= 0 ) {
- return "";
- }
-
- # look for previous result
- elsif ( $leading_string_cache[$leading_whitespace_count] ) {
- return $leading_string_cache[$leading_whitespace_count];
- }
-
- # must compute a string for this number of spaces
- my $leading_string;
-
- # Handle simple case of no tabs
- if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
- || $rOpts_indent_columns <= 0 )
- {
- $leading_string = ( ' ' x $leading_whitespace_count );
- }
-
- # Handle entab option
- elsif ($rOpts_entab_leading_whitespace) {
- my $space_count =
- $leading_whitespace_count % $rOpts_entab_leading_whitespace;
- my $tab_count = int(
- $leading_whitespace_count / $rOpts_entab_leading_whitespace );
- $leading_string = "\t" x $tab_count . ' ' x $space_count;
- }
-
- # Handle option of one tab per level
- else {
- $leading_string = ( "\t" x $group_level );
- my $space_count =
- $leading_whitespace_count - $group_level * $rOpts_indent_columns;
-
- # shouldn't happen:
- if ( $space_count < 0 ) {
- VALIGN_DEBUG_FLAG_TABS
- && warning(
-"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
- );
-
- # -- skip entabbing
- $leading_string = ( ' ' x $leading_whitespace_count );
- }
- else {
- $leading_string .= ( ' ' x $space_count );
- }
- }
- $leading_string_cache[$leading_whitespace_count] = $leading_string;
- return $leading_string;
- }
-} # end get_leading_string
-
-sub report_anything_unusual {
- my $self = shift;
- if ( $outdented_line_count > 0 ) {
- write_logfile_entry(
- "$outdented_line_count long lines were outdented:\n");
- write_logfile_entry(
- " First at output line $first_outdented_line_at\n");
-
- if ( $outdented_line_count > 1 ) {
- write_logfile_entry(
- " Last at output line $last_outdented_line_at\n");
- }
- write_logfile_entry(
- " use -noll to prevent outdenting, -l=n to increase line length\n"
- );
- write_logfile_entry("\n");
- }
- return;
-}
-
-#####################################################################
-#
-# the Perl::Tidy::FileWriter class writes the output file
-#
-#####################################################################
-
-package Perl::Tidy::FileWriter;
-
-# Maximum number of little messages; probably need not be changed.
-use constant MAX_NAG_MESSAGES => 6;
-
-sub write_logfile_entry {
- my ( $self, $msg ) = @_;
- my $logger_object = $self->{_logger_object};
- if ($logger_object) {
- $logger_object->write_logfile_entry($msg);
- }
- return;
-}
-
-sub new {
- my ( $class, $line_sink_object, $rOpts, $logger_object ) = @_;
-
- return bless {
- _line_sink_object => $line_sink_object,
- _logger_object => $logger_object,
- _rOpts => $rOpts,
- _output_line_number => 1,
- _consecutive_blank_lines => 0,
- _consecutive_nonblank_lines => 0,
- _first_line_length_error => 0,
- _max_line_length_error => 0,
- _last_line_length_error => 0,
- _first_line_length_error_at => 0,
- _max_line_length_error_at => 0,
- _last_line_length_error_at => 0,
- _line_length_error_count => 0,
- _max_output_line_length => 0,
- _max_output_line_length_at => 0,
- }, $class;
-}
-
-sub tee_on {
- my $self = shift;
- $self->{_line_sink_object}->tee_on();
- return;
-}
-
-sub tee_off {
- my $self = shift;
- $self->{_line_sink_object}->tee_off();
- return;
-}
-
-sub get_output_line_number {
- my $self = shift;
- return $self->{_output_line_number};
-}
-
-sub decrement_output_line_number {
- my $self = shift;
- $self->{_output_line_number}--;
- return;
-}
-
-sub get_consecutive_nonblank_lines {
- my $self = shift;
- return $self->{_consecutive_nonblank_lines};
-}
-
-sub reset_consecutive_blank_lines {
- my $self = shift;
- $self->{_consecutive_blank_lines} = 0;
- return;
-}
-
-sub want_blank_line {
- my $self = shift;
- unless ( $self->{_consecutive_blank_lines} ) {
- $self->write_blank_code_line();
- }
- return;
-}
-
-sub require_blank_code_lines {
-
- # write out the requested number of blanks regardless of the value of -mbl
- # unless -mbl=0. This allows extra blank lines to be written for subs and
- # packages even with the default -mbl=1
- my ( $self, $count ) = @_;
- my $need = $count - $self->{_consecutive_blank_lines};
- my $rOpts = $self->{_rOpts};
- my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
- foreach my $i ( 0 .. $need - 1 ) {
- $self->write_blank_code_line($forced);
- }
- return;
-}
-
-sub write_blank_code_line {
- my $self = shift;
- my $forced = shift;
- my $rOpts = $self->{_rOpts};
- return
- if (!$forced
- && $self->{_consecutive_blank_lines} >=
- $rOpts->{'maximum-consecutive-blank-lines'} );
- $self->{_consecutive_blank_lines}++;
- $self->{_consecutive_nonblank_lines} = 0;
- $self->write_line("\n");
- return;
-}
-
-sub write_code_line {
- my $self = shift;
- my $a = shift;
-
- if ( $a =~ /^\s*$/ ) {
- my $rOpts = $self->{_rOpts};
- return
- if ( $self->{_consecutive_blank_lines} >=
- $rOpts->{'maximum-consecutive-blank-lines'} );
- $self->{_consecutive_blank_lines}++;
- $self->{_consecutive_nonblank_lines} = 0;
- }
- else {
- $self->{_consecutive_blank_lines} = 0;
- $self->{_consecutive_nonblank_lines}++;
- }
- $self->write_line($a);
- return;
-}
-
-sub write_line {
- my ( $self, $a ) = @_;
-
- # TODO: go through and see if the test is necessary here
- if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
-
- $self->{_line_sink_object}->write_line($a);
-
- # This calculation of excess line length ignores any internal tabs
- my $rOpts = $self->{_rOpts};
- my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
- if ( $a =~ /^\t+/g ) {
- $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
- }
-
- # Note that we just incremented output line number to future value
- # so we must subtract 1 for current line number
- if ( length($a) > 1 + $self->{_max_output_line_length} ) {
- $self->{_max_output_line_length} = length($a) - 1;
- $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
- }
-
- if ( $exceed > 0 ) {
- my $output_line_number = $self->{_output_line_number};
- $self->{_last_line_length_error} = $exceed;
- $self->{_last_line_length_error_at} = $output_line_number - 1;
- if ( $self->{_line_length_error_count} == 0 ) {
- $self->{_first_line_length_error} = $exceed;
- $self->{_first_line_length_error_at} = $output_line_number - 1;
- }
-
- if (
- $self->{_last_line_length_error} > $self->{_max_line_length_error} )
- {
- $self->{_max_line_length_error} = $exceed;
- $self->{_max_line_length_error_at} = $output_line_number - 1;
- }
-
- if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
- $self->write_logfile_entry(
- "Line length exceeded by $exceed characters\n");
- }
- $self->{_line_length_error_count}++;
- }
- return;
-}
-
-sub report_line_length_errors {
- my $self = shift;
- my $rOpts = $self->{_rOpts};
- my $line_length_error_count = $self->{_line_length_error_count};
- if ( $line_length_error_count == 0 ) {
- $self->write_logfile_entry(
- "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
- my $max_output_line_length = $self->{_max_output_line_length};
- my $max_output_line_length_at = $self->{_max_output_line_length_at};
- $self->write_logfile_entry(
-" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
- );
-
- }
- else {
-
- my $word = ( $line_length_error_count > 1 ) ? "s" : "";
- $self->write_logfile_entry(
-"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
- );
-
- $word = ( $line_length_error_count > 1 ) ? "First" : "";
- my $first_line_length_error = $self->{_first_line_length_error};
- my $first_line_length_error_at = $self->{_first_line_length_error_at};
- $self->write_logfile_entry(
-" $word at line $first_line_length_error_at by $first_line_length_error characters\n"
- );
-
- if ( $line_length_error_count > 1 ) {
- my $max_line_length_error = $self->{_max_line_length_error};
- my $max_line_length_error_at = $self->{_max_line_length_error_at};
- my $last_line_length_error = $self->{_last_line_length_error};
- my $last_line_length_error_at = $self->{_last_line_length_error_at};
- $self->write_logfile_entry(
-" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
- );
- $self->write_logfile_entry(
-" Last at line $last_line_length_error_at by $last_line_length_error characters\n"
- );
- }
- }
- return;
-}
-
-#####################################################################
-#
-# The Perl::Tidy::Debugger class shows line tokenization
-#
-#####################################################################
-
-package Perl::Tidy::Debugger;
-
-sub new {
-
- my ( $class, $filename ) = @_;
-
- return bless {
- _debug_file => $filename,
- _debug_file_opened => 0,
- _fh => undef,
- }, $class;
-}
-
-sub really_open_debug_file {
-
- my $self = shift;
- my $debug_file = $self->{_debug_file};
- my $fh;
- unless ( $fh = IO::File->new("> $debug_file") ) {
- Perl::Tidy::Warn("can't open $debug_file: $!\n");
- }
- $self->{_debug_file_opened} = 1;
- $self->{_fh} = $fh;
- print $fh
- "Use -dump-token-types (-dtt) to get a list of token type codes\n";
- return;
-}
-
-sub close_debug_file {
-
- my $self = shift;
- my $fh = $self->{_fh};
- if ( $self->{_debug_file_opened} ) {
- eval { $self->{_fh}->close() };
- }
- return;
-}
-
-sub write_debug_entry {
-
- # This is a debug dump routine which may be modified as necessary
- # to dump tokens on a line-by-line basis. The output will be written
- # to the .DEBUG file when the -D flag is entered.
- my ( $self, $line_of_tokens ) = @_;
-
- my $input_line = $line_of_tokens->{_line_text};
-
- my $rtoken_type = $line_of_tokens->{_rtoken_type};
- my $rtokens = $line_of_tokens->{_rtokens};
- my $rlevels = $line_of_tokens->{_rlevels};
- my $rslevels = $line_of_tokens->{_rslevels};
- my $rblock_type = $line_of_tokens->{_rblock_type};
-
- my $input_line_number = $line_of_tokens->{_line_number};
- my $line_type = $line_of_tokens->{_line_type};
- ##my $rtoken_array = $line_of_tokens->{_token_array};
-
- my ( $j, $num );
-
- my $token_str = "$input_line_number: ";
- my $reconstructed_original = "$input_line_number: ";
- my $block_str = "$input_line_number: ";
-
- #$token_str .= "$line_type: ";
- #$reconstructed_original .= "$line_type: ";
-
- my $pattern = "";
- my @next_char = ( '"', '"' );
- my $i_next = 0;
- unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
- my $fh = $self->{_fh};
-
- # FIXME: could convert to use of token_array instead
- foreach my $j ( 0 .. @{$rtoken_type} - 1 ) {
-
- # testing patterns
- if ( $rtoken_type->[$j] eq 'k' ) {
- $pattern .= $rtokens->[$j];
- }
- else {
- $pattern .= $rtoken_type->[$j];
- }
- $reconstructed_original .= $rtokens->[$j];
- $block_str .= "($rblock_type->[$j])";
- $num = length( $rtokens->[$j] );
- my $type_str = $rtoken_type->[$j];
-
- # be sure there are no blank tokens (shouldn't happen)
- # This can only happen if a programming error has been made
- # because all valid tokens are non-blank
- if ( $type_str eq ' ' ) {
- print $fh "BLANK TOKEN on the next line\n";
- $type_str = $next_char[$i_next];
- $i_next = 1 - $i_next;
- }
-
- if ( length($type_str) == 1 ) {
- $type_str = $type_str x $num;
- }
- $token_str .= $type_str;
- }
-
- # Write what you want here ...
- # print $fh "$input_line\n";
- # print $fh "$pattern\n";
- print $fh "$reconstructed_original\n";
- print $fh "$token_str\n";
-
- #print $fh "$block_str\n";
- return;
-}
-
-#####################################################################
-#
-# The Perl::Tidy::LineBuffer class supplies a 'get_line()'
-# method for returning the next line to be parsed, as well as a
-# 'peek_ahead()' method
-#
-# The input parameter is an object with a 'get_line()' method
-# which returns the next line to be parsed
-#
-#####################################################################
-
-package Perl::Tidy::LineBuffer;
-
-sub new {
-
- my ( $class, $line_source_object ) = @_;
-
- return bless {
- _line_source_object => $line_source_object,
- _rlookahead_buffer => [],
- }, $class;
-}
-
-sub peek_ahead {
- my ( $self, $buffer_index ) = @_;
- my $line = undef;
- my $line_source_object = $self->{_line_source_object};
- my $rlookahead_buffer = $self->{_rlookahead_buffer};
- if ( $buffer_index < scalar( @{$rlookahead_buffer} ) ) {
- $line = $rlookahead_buffer->[$buffer_index];
- }
- else {
- $line = $line_source_object->get_line();
- push( @{$rlookahead_buffer}, $line );
- }
- return $line;
-}
-
-sub get_line {
- my $self = shift;
- my $line = undef;
- my $line_source_object = $self->{_line_source_object};
- my $rlookahead_buffer = $self->{_rlookahead_buffer};
-
- if ( scalar( @{$rlookahead_buffer} ) ) {
- $line = shift @{$rlookahead_buffer};
- }
- else {
- $line = $line_source_object->get_line();
- }
- return $line;
-}
-
-########################################################################
-#
-# the Perl::Tidy::Tokenizer package is essentially a filter which
-# reads lines of perl source code from a source object and provides
-# corresponding tokenized lines through its get_line() method. Lines
-# flow from the source_object to the caller like this:
-#
-# source_object --> LineBuffer_object --> Tokenizer --> calling routine
-# get_line() get_line() get_line() line_of_tokens
-#
-# The source object can be any object with a get_line() method which
-# supplies one line (a character string) perl call.
-# The LineBuffer object is created by the Tokenizer.
-# The Tokenizer returns a reference to a data structure 'line_of_tokens'
-# containing one tokenized line for each call to its get_line() method.
-#
-# WARNING: This is not a real class yet. Only one tokenizer my be used.
-#
-########################################################################
-
-package Perl::Tidy::Tokenizer;
-
-BEGIN {
-
- # Caution: these debug flags produce a lot of output
- # They should all be 0 except when debugging small scripts
-
- use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
- use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
- use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
- use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
- use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
-
- my $debug_warning = sub {
- print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n";
- };
-
- TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
- TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
- TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
- TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
- TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
-
-}
-
-use Carp;
-
-# PACKAGE VARIABLES for processing an entire FILE.
-use vars qw{
- $tokenizer_self
-
- $last_nonblank_token
- $last_nonblank_type
- $last_nonblank_block_type
- $statement_type
- $in_attribute_list
- $current_package
- $context
-
- %is_constant
- %is_user_function
- %user_function_prototype
- %is_block_function
- %is_block_list_function
- %saw_function_definition
-
- $brace_depth
- $paren_depth
- $square_bracket_depth
-
- @current_depth
- @total_depth
- $total_depth
- @nesting_sequence_number
- @current_sequence_number
- @paren_type
- @paren_semicolon_count
- @paren_structural_type
- @brace_type
- @brace_structural_type
- @brace_context
- @brace_package
- @square_bracket_type
- @square_bracket_structural_type
- @depth_array
- @nested_ternary_flag
- @nested_statement_type
- @starting_line_of_current_depth
-};
-
-# GLOBAL CONSTANTS for routines in this package
-use vars qw{
- %is_indirect_object_taker
- %is_block_operator
- %expecting_operator_token
- %expecting_operator_types
- %expecting_term_types
- %expecting_term_token
- %is_digraph
- %is_file_test_operator
- %is_trigraph
- %is_tetragraph
- %is_valid_token_type
- %is_keyword
- %is_code_block_token
- %really_want_term
- @opening_brace_names
- @closing_brace_names
- %is_keyword_taking_list
- %is_q_qq_qw_qx_qr_s_y_tr_m
-};
-
-# possible values of operator_expected()
-use constant TERM => -1;
-use constant UNKNOWN => 0;
-use constant OPERATOR => 1;
-
-# possible values of context
-use constant SCALAR_CONTEXT => -1;
-use constant UNKNOWN_CONTEXT => 0;
-use constant LIST_CONTEXT => 1;
-
-# Maximum number of little messages; probably need not be changed.
-use constant MAX_NAG_MESSAGES => 6;
-
-{
-
- # methods to count instances
- my $_count = 0;
- sub get_count { return $_count; }
- sub _increment_count { return ++$_count }
- sub _decrement_count { return --$_count }
-}
-
-sub DESTROY {
- my $self = shift;
- $self->_decrement_count();
- return;
-}
-
-sub new {
-
- my $class = shift;
-
- # Note: 'tabs' and 'indent_columns' are temporary and should be
- # removed asap
- my %defaults = (
- source_object => undef,
- debugger_object => undef,
- diagnostics_object => undef,
- logger_object => undef,
- starting_level => undef,
- indent_columns => 4,
- tabsize => 8,
- look_for_hash_bang => 0,
- trim_qw => 1,
- look_for_autoloader => 1,
- look_for_selfloader => 1,
- starting_line_number => 1,
- extended_syntax => 0,
- );
- my %args = ( %defaults, @_ );
-
- # we are given an object with a get_line() method to supply source lines
- my $source_object = $args{source_object};
-
- # we create another object with a get_line() and peek_ahead() method
- my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
-
- # Tokenizer state data is as follows:
- # _rhere_target_list reference to list of here-doc targets
- # _here_doc_target the target string for a here document
- # _here_quote_character the type of here-doc quoting (" ' ` or none)
- # to determine if interpolation is done
- # _quote_target character we seek if chasing a quote
- # _line_start_quote line where we started looking for a long quote
- # _in_here_doc flag indicating if we are in a here-doc
- # _in_pod flag set if we are in pod documentation
- # _in_error flag set if we saw severe error (binary in script)
- # _in_data flag set if we are in __DATA__ section
- # _in_end flag set if we are in __END__ section
- # _in_format flag set if we are in a format description
- # _in_attribute_list flag telling if we are looking for attributes
- # _in_quote flag telling if we are chasing a quote
- # _starting_level indentation level of first line
- # _line_buffer_object object with get_line() method to supply source code
- # _diagnostics_object place to write debugging information
- # _unexpected_error_count error count used to limit output
- # _lower_case_labels_at line numbers where lower case labels seen
- # _hit_bug program bug detected
- $tokenizer_self = {
- _rhere_target_list => [],
- _in_here_doc => 0,
- _here_doc_target => "",
- _here_quote_character => "",
- _in_data => 0,
- _in_end => 0,
- _in_format => 0,
- _in_error => 0,
- _in_pod => 0,
- _in_attribute_list => 0,
- _in_quote => 0,
- _quote_target => "",
- _line_start_quote => -1,
- _starting_level => $args{starting_level},
- _know_starting_level => defined( $args{starting_level} ),
- _tabsize => $args{tabsize},
- _indent_columns => $args{indent_columns},
- _look_for_hash_bang => $args{look_for_hash_bang},
- _trim_qw => $args{trim_qw},
- _continuation_indentation => $args{continuation_indentation},
- _outdent_labels => $args{outdent_labels},
- _last_line_number => $args{starting_line_number} - 1,
- _saw_perl_dash_P => 0,
- _saw_perl_dash_w => 0,
- _saw_use_strict => 0,
- _saw_v_string => 0,
- _hit_bug => 0,
- _look_for_autoloader => $args{look_for_autoloader},
- _look_for_selfloader => $args{look_for_selfloader},
- _saw_autoloader => 0,
- _saw_selfloader => 0,
- _saw_hash_bang => 0,
- _saw_end => 0,
- _saw_data => 0,
- _saw_negative_indentation => 0,
- _started_tokenizing => 0,
- _line_buffer_object => $line_buffer_object,
- _debugger_object => $args{debugger_object},
- _diagnostics_object => $args{diagnostics_object},
- _logger_object => $args{logger_object},
- _unexpected_error_count => 0,
- _started_looking_for_here_target_at => 0,
- _nearly_matched_here_target_at => undef,
- _line_text => "",
- _rlower_case_labels_at => undef,
- _extended_syntax => $args{extended_syntax},
- };
-
- prepare_for_a_new_file();
- find_starting_indentation_level();
-
- bless $tokenizer_self, $class;
-
- # This is not a full class yet, so die if an attempt is made to
- # create more than one object.
-
- if ( _increment_count() > 1 ) {
- confess
-"Attempt to create more than 1 object in $class, which is not a true class yet\n";
- }
-
- return $tokenizer_self;
-
-}
-
-# interface to Perl::Tidy::Logger routines
-sub warning {
- my $msg = shift;
- my $logger_object = $tokenizer_self->{_logger_object};
- if ($logger_object) {
- $logger_object->warning($msg);
- }
- return;
-}
-
-sub complain {
- my $msg = shift;
- my $logger_object = $tokenizer_self->{_logger_object};
- if ($logger_object) {
- $logger_object->complain($msg);
- }
- return;
-}
-
-sub write_logfile_entry {
- my $msg = shift;
- my $logger_object = $tokenizer_self->{_logger_object};
- if ($logger_object) {
- $logger_object->write_logfile_entry($msg);
- }
- return;
-}
-
-sub interrupt_logfile {
- my $logger_object = $tokenizer_self->{_logger_object};
- if ($logger_object) {
- $logger_object->interrupt_logfile();
- }
- return;
-}
-
-sub resume_logfile {
- my $logger_object = $tokenizer_self->{_logger_object};
- if ($logger_object) {
- $logger_object->resume_logfile();
- }
- return;
-}
-
-sub increment_brace_error {
- my $logger_object = $tokenizer_self->{_logger_object};
- if ($logger_object) {
- $logger_object->increment_brace_error();
- }
- return;
-}
-
-sub report_definite_bug {
- $tokenizer_self->{_hit_bug} = 1;
- my $logger_object = $tokenizer_self->{_logger_object};
- if ($logger_object) {
- $logger_object->report_definite_bug();
- }
- return;
-}
-
-sub brace_warning {
- my $msg = shift;
- my $logger_object = $tokenizer_self->{_logger_object};
- if ($logger_object) {
- $logger_object->brace_warning($msg);
- }
- return;
-}
-
-sub get_saw_brace_error {
- my $logger_object = $tokenizer_self->{_logger_object};
- if ($logger_object) {
- return $logger_object->get_saw_brace_error();
- }
- else {
- return 0;
- }
-}
-
-# interface to Perl::Tidy::Diagnostics routines
-sub write_diagnostics {
- my $msg = shift;
- if ( $tokenizer_self->{_diagnostics_object} ) {
- $tokenizer_self->{_diagnostics_object}->write_diagnostics($msg);
- }
- return;
-}
-
-sub report_tokenization_errors {
-
- my $self = shift;
- my $severe_error = $self->{_in_error};
-
- my $level = get_indentation_level();
- if ( $level != $tokenizer_self->{_starting_level} ) {
- warning("final indentation level: $level\n");
- }
-
- check_final_nesting_depths();
-
- if ( $tokenizer_self->{_look_for_hash_bang}
- && !$tokenizer_self->{_saw_hash_bang} )
- {
- warning(
- "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
- }
-
- if ( $tokenizer_self->{_in_format} ) {
- warning("hit EOF while in format description\n");
- }
-
- if ( $tokenizer_self->{_in_pod} ) {
-
- # Just write log entry if this is after __END__ or __DATA__
- # because this happens to often, and it is not likely to be
- # a parsing error.
- if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
- write_logfile_entry(
-"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
- );
- }
-
- else {
- complain(
-"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
- );
- }
-
- }
-
- if ( $tokenizer_self->{_in_here_doc} ) {
- $severe_error = 1;
- my $here_doc_target = $tokenizer_self->{_here_doc_target};
- my $started_looking_for_here_target_at =
- $tokenizer_self->{_started_looking_for_here_target_at};
- if ($here_doc_target) {
- warning(
-"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
- );
- }
- else {
- warning(
-"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
- );
- }
- my $nearly_matched_here_target_at =
- $tokenizer_self->{_nearly_matched_here_target_at};
- if ($nearly_matched_here_target_at) {
- warning(
-"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
- );
- }
- }
-
- if ( $tokenizer_self->{_in_quote} ) {
- $severe_error = 1;
- my $line_start_quote = $tokenizer_self->{_line_start_quote};
- my $quote_target = $tokenizer_self->{_quote_target};
- my $what =
- ( $tokenizer_self->{_in_attribute_list} )
- ? "attribute list"
- : "quote/pattern";
- warning(
-"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
- );
- }
-
- if ( $tokenizer_self->{_hit_bug} ) {
- $severe_error = 1;
- }
-
- my $logger_object = $tokenizer_self->{_logger_object};
-
-# TODO: eventually may want to activate this to cause file to be output verbatim
- if (0) {
-
- # Set the severe error for a fairly high warning count because
- # some of the warnings do not harm formatting, such as duplicate
- # sub names.
- my $warning_count = $logger_object->{_warning_count};
- if ( $warning_count > 50 ) {
- $severe_error = 1;
- }
-
- # Brace errors are significant, so set the severe error flag at
- # a low number.
- my $saw_brace_error = $logger_object->{_saw_brace_error};
- if ( $saw_brace_error > 2 ) {
- $severe_error = 1;
- }
- }
-
- unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
- if ( $] < 5.006 ) {
- write_logfile_entry("Suggest including '-w parameter'\n");
- }
- else {
- write_logfile_entry("Suggest including 'use warnings;'\n");
- }
- }
-
- if ( $tokenizer_self->{_saw_perl_dash_P} ) {
- write_logfile_entry("Use of -P parameter for defines is discouraged\n");
- }
-
- unless ( $tokenizer_self->{_saw_use_strict} ) {
- write_logfile_entry("Suggest including 'use strict;'\n");
- }
-
- # it is suggested that labels have at least one upper case character
- # for legibility and to avoid code breakage as new keywords are introduced
- if ( $tokenizer_self->{_rlower_case_labels_at} ) {
- my @lower_case_labels_at =
- @{ $tokenizer_self->{_rlower_case_labels_at} };
- write_logfile_entry(
- "Suggest using upper case characters in label(s)\n");
- local $" = ')(';
- write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
- }
- return $severe_error;
-}
-
-sub report_v_string {
-
- # warn if this version can't handle v-strings
- my $tok = shift;
- unless ( $tokenizer_self->{_saw_v_string} ) {
- $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
- }
- if ( $] < 5.006 ) {
- warning(
-"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
- );
- }
- return;
-}
-
-sub get_input_line_number {
- return $tokenizer_self->{_last_line_number};
-}
-
-# returns the next tokenized line
-sub get_line {
-
- my $self = shift;
-
- # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
- # $square_bracket_depth, $paren_depth
-
- my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
- $tokenizer_self->{_line_text} = $input_line;
-
- return unless ($input_line);
-
- my $input_line_number = ++$tokenizer_self->{_last_line_number};
-
- # Find and remove what characters terminate this line, including any
- # control r
- my $input_line_separator = "";
- if ( chomp($input_line) ) { $input_line_separator = $/ }
-
- # TODO: what other characters should be included here?
- if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
- $input_line_separator = $2 . $input_line_separator;
- }
-
- # for backwards compatibility we keep the line text terminated with
- # a newline character
- $input_line .= "\n";
- $tokenizer_self->{_line_text} = $input_line; # update
-
- # create a data structure describing this line which will be
- # returned to the caller.
-
- # _line_type codes are:
- # SYSTEM - system-specific code before hash-bang line
- # CODE - line of perl code (including comments)
- # POD_START - line starting pod, such as '=head'
- # POD - pod documentation text
- # POD_END - last line of pod section, '=cut'
- # HERE - text of here-document
- # HERE_END - last line of here-doc (target word)
- # FORMAT - format section
- # FORMAT_END - last line of format section, '.'
- # DATA_START - __DATA__ line
- # DATA - unidentified text following __DATA__
- # END_START - __END__ line
- # END - unidentified text following __END__
- # ERROR - we are in big trouble, probably not a perl script
-
- # Other variables:
- # _curly_brace_depth - depth of curly braces at start of line
- # _square_bracket_depth - depth of square brackets at start of line
- # _paren_depth - depth of parens at start of line
- # _starting_in_quote - this line continues a multi-line quote
- # (so don't trim leading blanks!)
- # _ending_in_quote - this line ends in a multi-line quote
- # (so don't trim trailing blanks!)
- my $line_of_tokens = {
- _line_type => 'EOF',
- _line_text => $input_line,
- _line_number => $input_line_number,
- _rtoken_type => undef,
- _rtokens => undef,
- _rlevels => undef,
- _rslevels => undef,
- _rblock_type => undef,
- _rcontainer_type => undef,
- _rcontainer_environment => undef,
- _rtype_sequence => undef,
- _rnesting_tokens => undef,
- _rci_levels => undef,
- _rnesting_blocks => undef,
- _guessed_indentation_level => 0,
- _starting_in_quote => 0, # to be set by subroutine
- _ending_in_quote => 0,
- _curly_brace_depth => $brace_depth,
- _square_bracket_depth => $square_bracket_depth,
- _paren_depth => $paren_depth,
- _quote_character => '',
- };
-
- # must print line unchanged if we are in a here document
- if ( $tokenizer_self->{_in_here_doc} ) {
-
- $line_of_tokens->{_line_type} = 'HERE';
- my $here_doc_target = $tokenizer_self->{_here_doc_target};
- my $here_quote_character = $tokenizer_self->{_here_quote_character};
- my $candidate_target = $input_line;
- chomp $candidate_target;
-
- # Handle <<~ targets, which are indicated here by a leading space on
- # the here quote character
- if ( $here_quote_character =~ /^\s/ ) {
- $candidate_target =~ s/^\s*//;
- }
- if ( $candidate_target eq $here_doc_target ) {
- $tokenizer_self->{_nearly_matched_here_target_at} = undef;
- $line_of_tokens->{_line_type} = 'HERE_END';
- write_logfile_entry("Exiting HERE document $here_doc_target\n");
-
- my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
- if ( @{$rhere_target_list} ) { # there can be multiple here targets
- ( $here_doc_target, $here_quote_character ) =
- @{ shift @{$rhere_target_list} };
- $tokenizer_self->{_here_doc_target} = $here_doc_target;
- $tokenizer_self->{_here_quote_character} =
- $here_quote_character;
- write_logfile_entry(
- "Entering HERE document $here_doc_target\n");
- $tokenizer_self->{_nearly_matched_here_target_at} = undef;
- $tokenizer_self->{_started_looking_for_here_target_at} =
- $input_line_number;
- }
- else {
- $tokenizer_self->{_in_here_doc} = 0;
- $tokenizer_self->{_here_doc_target} = "";
- $tokenizer_self->{_here_quote_character} = "";
- }
- }
-
- # check for error of extra whitespace
- # note for PERL6: leading whitespace is allowed
- else {
- $candidate_target =~ s/\s*$//;
- $candidate_target =~ s/^\s*//;
- if ( $candidate_target eq $here_doc_target ) {
- $tokenizer_self->{_nearly_matched_here_target_at} =
- $input_line_number;
- }
- }
- return $line_of_tokens;
- }
-
- # must print line unchanged if we are in a format section
- elsif ( $tokenizer_self->{_in_format} ) {
-
- if ( $input_line =~ /^\.[\s#]*$/ ) {
- write_logfile_entry("Exiting format section\n");
- $tokenizer_self->{_in_format} = 0;
- $line_of_tokens->{_line_type} = 'FORMAT_END';
- }
- else {
- $line_of_tokens->{_line_type} = 'FORMAT';
- }
- return $line_of_tokens;
- }
-
- # must print line unchanged if we are in pod documentation
- elsif ( $tokenizer_self->{_in_pod} ) {
-
- $line_of_tokens->{_line_type} = 'POD';
- if ( $input_line =~ /^=cut/ ) {
- $line_of_tokens->{_line_type} = 'POD_END';
- write_logfile_entry("Exiting POD section\n");
- $tokenizer_self->{_in_pod} = 0;
- }
- if ( $input_line =~ /^\#\!.*perl\b/ ) {
- warning(
- "Hash-bang in pod can cause older versions of perl to fail! \n"
- );
- }
-
- return $line_of_tokens;
- }
-
- # must print line unchanged if we have seen a severe error (i.e., we
- # are seeing illegal tokens and cannot continue. Syntax errors do
- # not pass this route). Calling routine can decide what to do, but
- # the default can be to just pass all lines as if they were after __END__
- elsif ( $tokenizer_self->{_in_error} ) {
- $line_of_tokens->{_line_type} = 'ERROR';
- return $line_of_tokens;
- }
-
- # print line unchanged if we are __DATA__ section
- elsif ( $tokenizer_self->{_in_data} ) {
-
- # ...but look for POD
- # Note that the _in_data and _in_end flags remain set
- # so that we return to that state after seeing the
- # end of a pod section
- if ( $input_line =~ /^=(?!cut)/ ) {
- $line_of_tokens->{_line_type} = 'POD_START';
- write_logfile_entry("Entering POD section\n");
- $tokenizer_self->{_in_pod} = 1;
- return $line_of_tokens;
- }
- else {
- $line_of_tokens->{_line_type} = 'DATA';
- return $line_of_tokens;
- }
- }
-
- # print line unchanged if we are in __END__ section
- elsif ( $tokenizer_self->{_in_end} ) {
-
- # ...but look for POD
- # Note that the _in_data and _in_end flags remain set
- # so that we return to that state after seeing the
- # end of a pod section
- if ( $input_line =~ /^=(?!cut)/ ) {
- $line_of_tokens->{_line_type} = 'POD_START';
- write_logfile_entry("Entering POD section\n");
- $tokenizer_self->{_in_pod} = 1;
- return $line_of_tokens;
- }
- else {
- $line_of_tokens->{_line_type} = 'END';
- return $line_of_tokens;
- }
- }
-
- # check for a hash-bang line if we haven't seen one
- if ( !$tokenizer_self->{_saw_hash_bang} ) {
- if ( $input_line =~ /^\#\!.*perl\b/ ) {
- $tokenizer_self->{_saw_hash_bang} = $input_line_number;
-
- # check for -w and -P flags
- if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
- $tokenizer_self->{_saw_perl_dash_P} = 1;
- }
-
- if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
- $tokenizer_self->{_saw_perl_dash_w} = 1;
- }
-
- if (
- ( $input_line_number > 1 )
-
- # leave any hash bang in a BEGIN block alone
- # i.e. see 'debugger-duck_type.t'
- && !(
- $last_nonblank_block_type
- && $last_nonblank_block_type eq 'BEGIN'
- )
- && ( !$tokenizer_self->{_look_for_hash_bang} )
- )
- {
-
- # this is helpful for VMS systems; we may have accidentally
- # tokenized some DCL commands
- if ( $tokenizer_self->{_started_tokenizing} ) {
- warning(
-"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
- );
- }
- else {
- complain("Useless hash-bang after line 1\n");
- }
- }
-
- # Report the leading hash-bang as a system line
- # This will prevent -dac from deleting it
- else {
- $line_of_tokens->{_line_type} = 'SYSTEM';
- return $line_of_tokens;
- }
- }
- }
-
- # wait for a hash-bang before parsing if the user invoked us with -x
- if ( $tokenizer_self->{_look_for_hash_bang}
- && !$tokenizer_self->{_saw_hash_bang} )
- {
- $line_of_tokens->{_line_type} = 'SYSTEM';
- return $line_of_tokens;
- }
-
- # a first line of the form ': #' will be marked as SYSTEM
- # since lines of this form may be used by tcsh
- if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
- $line_of_tokens->{_line_type} = 'SYSTEM';
- return $line_of_tokens;
- }
-
- # now we know that it is ok to tokenize the line...
- # the line tokenizer will modify any of these private variables:
- # _rhere_target_list
- # _in_data
- # _in_end
- # _in_format
- # _in_error
- # _in_pod
- # _in_quote
- my $ending_in_quote_last = $tokenizer_self->{_in_quote};
- tokenize_this_line($line_of_tokens);
-
- # Now finish defining the return structure and return it
- $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
-
- # handle severe error (binary data in script)
- if ( $tokenizer_self->{_in_error} ) {
- $tokenizer_self->{_in_quote} = 0; # to avoid any more messages
- warning("Giving up after error\n");
- $line_of_tokens->{_line_type} = 'ERROR';
- reset_indentation_level(0); # avoid error messages
- return $line_of_tokens;
- }
-
- # handle start of pod documentation
- if ( $tokenizer_self->{_in_pod} ) {
-
- # This gets tricky..above a __DATA__ or __END__ section, perl
- # accepts '=cut' as the start of pod section. But afterwards,
- # only pod utilities see it and they may ignore an =cut without
- # leading =head. In any case, this isn't good.
- if ( $input_line =~ /^=cut\b/ ) {
- if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
- complain("=cut while not in pod ignored\n");
- $tokenizer_self->{_in_pod} = 0;
- $line_of_tokens->{_line_type} = 'POD_END';
- }
- else {
- $line_of_tokens->{_line_type} = 'POD_START';
- complain(
-"=cut starts a pod section .. this can fool pod utilities.\n"
- );
- write_logfile_entry("Entering POD section\n");
- }
- }
-
- else {
- $line_of_tokens->{_line_type} = 'POD_START';
- write_logfile_entry("Entering POD section\n");
- }
-
- return $line_of_tokens;
- }
-
- # update indentation levels for log messages
- if ( $input_line !~ /^\s*$/ ) {
- my $rlevels = $line_of_tokens->{_rlevels};
- $line_of_tokens->{_guessed_indentation_level} =
- guess_old_indentation_level($input_line);
- }
-
- # see if this line contains here doc targets
- my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
- if ( @{$rhere_target_list} ) {
-
- my ( $here_doc_target, $here_quote_character ) =
- @{ shift @{$rhere_target_list} };
- $tokenizer_self->{_in_here_doc} = 1;
- $tokenizer_self->{_here_doc_target} = $here_doc_target;
- $tokenizer_self->{_here_quote_character} = $here_quote_character;
- write_logfile_entry("Entering HERE document $here_doc_target\n");
- $tokenizer_self->{_started_looking_for_here_target_at} =
- $input_line_number;
- }
-
- # NOTE: __END__ and __DATA__ statements are written unformatted
- # because they can theoretically contain additional characters
- # which are not tokenized (and cannot be read with <DATA> either!).
- if ( $tokenizer_self->{_in_data} ) {
- $line_of_tokens->{_line_type} = 'DATA_START';
- write_logfile_entry("Starting __DATA__ section\n");
- $tokenizer_self->{_saw_data} = 1;
-
- # keep parsing after __DATA__ if use SelfLoader was seen
- if ( $tokenizer_self->{_saw_selfloader} ) {
- $tokenizer_self->{_in_data} = 0;
- write_logfile_entry(
- "SelfLoader seen, continuing; -nlsl deactivates\n");
- }
-
- return $line_of_tokens;
- }
-
- elsif ( $tokenizer_self->{_in_end} ) {
- $line_of_tokens->{_line_type} = 'END_START';
- write_logfile_entry("Starting __END__ section\n");
- $tokenizer_self->{_saw_end} = 1;
-
- # keep parsing after __END__ if use AutoLoader was seen
- if ( $tokenizer_self->{_saw_autoloader} ) {
- $tokenizer_self->{_in_end} = 0;
- write_logfile_entry(
- "AutoLoader seen, continuing; -nlal deactivates\n");
- }
- return $line_of_tokens;
- }
-
- # now, finally, we know that this line is type 'CODE'
- $line_of_tokens->{_line_type} = 'CODE';
-
- # remember if we have seen any real code
- if ( !$tokenizer_self->{_started_tokenizing}
- && $input_line !~ /^\s*$/
- && $input_line !~ /^\s*#/ )
- {
- $tokenizer_self->{_started_tokenizing} = 1;
- }
-
- if ( $tokenizer_self->{_debugger_object} ) {
- $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
- }
-
- # Note: if keyword 'format' occurs in this line code, it is still CODE
- # (keyword 'format' need not start a line)
- if ( $tokenizer_self->{_in_format} ) {
- write_logfile_entry("Entering format section\n");
- }
-
- if ( $tokenizer_self->{_in_quote}
- and ( $tokenizer_self->{_line_start_quote} < 0 ) )
- {
-
- #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
- if (
- ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
- {
- $tokenizer_self->{_line_start_quote} = $input_line_number;
- write_logfile_entry(
- "Start multi-line quote or pattern ending in $quote_target\n");
- }
- }
- elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
- && !$tokenizer_self->{_in_quote} )
- {
- $tokenizer_self->{_line_start_quote} = -1;
- write_logfile_entry("End of multi-line quote or pattern\n");
- }
-
- # we are returning a line of CODE
- return $line_of_tokens;
-}
-
-sub find_starting_indentation_level {
-
- # We need to find the indentation level of the first line of the
- # script being formatted. Often it will be zero for an entire file,
- # but if we are formatting a local block of code (within an editor for
- # example) it may not be zero. The user may specify this with the
- # -sil=n parameter but normally doesn't so we have to guess.
- #
- # USES GLOBAL VARIABLES: $tokenizer_self
- my $starting_level = 0;
-
- # use value if given as parameter
- if ( $tokenizer_self->{_know_starting_level} ) {
- $starting_level = $tokenizer_self->{_starting_level};
- }
-
- # if we know there is a hash_bang line, the level must be zero
- elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
- $tokenizer_self->{_know_starting_level} = 1;
- }
-
- # otherwise figure it out from the input file
- else {
- my $line;
- my $i = 0;
-
- # keep looking at lines until we find a hash bang or piece of code
- my $msg = "";
- while ( $line =
- $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
- {
-
- # if first line is #! then assume starting level is zero
- if ( $i == 1 && $line =~ /^\#\!/ ) {
- $starting_level = 0;
- last;
- }
- next if ( $line =~ /^\s*#/ ); # skip past comments
- next if ( $line =~ /^\s*$/ ); # skip past blank lines
- $starting_level = guess_old_indentation_level($line);
- last;
- }
- $msg = "Line $i implies starting-indentation-level = $starting_level\n";
- write_logfile_entry("$msg");
- }
- $tokenizer_self->{_starting_level} = $starting_level;
- reset_indentation_level($starting_level);
- return;
-}
-
-sub guess_old_indentation_level {
- my ($line) = @_;
-
- # Guess the indentation level of an input line.
- #
- # For the first line of code this result will define the starting
- # indentation level. It will mainly be non-zero when perltidy is applied
- # within an editor to a local block of code.
- #
- # This is an impossible task in general because we can't know what tabs
- # meant for the old script and how many spaces were used for one
- # indentation level in the given input script. For example it may have
- # been previously formatted with -i=7 -et=3. But we can at least try to
- # make sure that perltidy guesses correctly if it is applied repeatedly to
- # a block of code within an editor, so that the block stays at the same
- # level when perltidy is applied repeatedly.
- #
- # USES GLOBAL VARIABLES: $tokenizer_self
- my $level = 0;
-
- # find leading tabs, spaces, and any statement label
- my $spaces = 0;
- if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
-
- # If there are leading tabs, we use the tab scheme for this run, if
- # any, so that the code will remain stable when editing.
- if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} }
-
- if ($2) { $spaces += length($2) }
-
- # correct for outdented labels
- if ( $3 && $tokenizer_self->{'_outdent_labels'} ) {
- $spaces += $tokenizer_self->{_continuation_indentation};
- }
- }
-
- # compute indentation using the value of -i for this run.
- # If -i=0 is used for this run (which is possible) it doesn't matter
- # what we do here but we'll guess that the old run used 4 spaces per level.
- my $indent_columns = $tokenizer_self->{_indent_columns};
- $indent_columns = 4 if ( !$indent_columns );
- $level = int( $spaces / $indent_columns );
- return ($level);
-}
-
-# This is a currently unused debug routine
-sub dump_functions {
-
- my $fh = *STDOUT;
- foreach my $pkg ( keys %is_user_function ) {
- print $fh "\nnon-constant subs in package $pkg\n";
-
- foreach my $sub ( keys %{ $is_user_function{$pkg} } ) {
- my $msg = "";
- if ( $is_block_list_function{$pkg}{$sub} ) {
- $msg = 'block_list';
- }
-
- if ( $is_block_function{$pkg}{$sub} ) {
- $msg = 'block';
- }
- print $fh "$sub $msg\n";
- }
- }
-
- foreach my $pkg ( keys %is_constant ) {
- print $fh "\nconstants and constant subs in package $pkg\n";
-
- foreach my $sub ( keys %{ $is_constant{$pkg} } ) {
- print $fh "$sub\n";
- }
- }
- return;
-}
-
-sub ones_count {
-
- # count number of 1's in a string of 1's and 0's
- # example: ones_count("010101010101") gives 6
- my $str = shift;
- return $str =~ tr/1/0/;
-}
-
-sub prepare_for_a_new_file {
-
- # previous tokens needed to determine what to expect next
- $last_nonblank_token = ';'; # the only possible starting state which
- $last_nonblank_type = ';'; # will make a leading brace a code block
- $last_nonblank_block_type = '';
-
- # scalars for remembering statement types across multiple lines
- $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
- $in_attribute_list = 0;
-
- # scalars for remembering where we are in the file
- $current_package = "main";
- $context = UNKNOWN_CONTEXT;
-
- # hashes used to remember function information
- %is_constant = (); # user-defined constants
- %is_user_function = (); # user-defined functions
- %user_function_prototype = (); # their prototypes
- %is_block_function = ();
- %is_block_list_function = ();
- %saw_function_definition = ();
-
- # variables used to track depths of various containers
- # and report nesting errors
- $paren_depth = 0;
- $brace_depth = 0;
- $square_bracket_depth = 0;
- @current_depth[ 0 .. $#closing_brace_names ] =
- (0) x scalar @closing_brace_names;
- $total_depth = 0;
- @total_depth = ();
- @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
- ( 0 .. $#closing_brace_names );
- @current_sequence_number = ();
- $paren_type[$paren_depth] = '';
- $paren_semicolon_count[$paren_depth] = 0;
- $paren_structural_type[$brace_depth] = '';
- $brace_type[$brace_depth] = ';'; # identify opening brace as code block
- $brace_structural_type[$brace_depth] = '';
- $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
- $brace_package[$paren_depth] = $current_package;
- $square_bracket_type[$square_bracket_depth] = '';
- $square_bracket_structural_type[$square_bracket_depth] = '';
-
- initialize_tokenizer_state();
- return;
-}
-
-{ # begin tokenize_this_line
-
- use constant BRACE => 0;
- use constant SQUARE_BRACKET => 1;
- use constant PAREN => 2;
- use constant QUESTION_COLON => 3;
-
- # TV1: scalars for processing one LINE.
- # Re-initialized on each entry to sub tokenize_this_line.
- my (
- $block_type, $container_type, $expecting,
- $i, $i_tok, $input_line,
- $input_line_number, $last_nonblank_i, $max_token_index,
- $next_tok, $next_type, $peeked_ahead,
- $prototype, $rhere_target_list, $rtoken_map,
- $rtoken_type, $rtokens, $tok,
- $type, $type_sequence, $indent_flag,
- );
-
- # TV2: refs to ARRAYS for processing one LINE
- # Re-initialized on each call.
- my $routput_token_list = []; # stack of output token indexes
- my $routput_token_type = []; # token types
- my $routput_block_type = []; # types of code block
- my $routput_container_type = []; # paren types, such as if, elsif, ..
- my $routput_type_sequence = []; # nesting sequential number
- my $routput_indent_flag = []; #
-
- # TV3: SCALARS for quote variables. These are initialized with a
- # subroutine call and continually updated as lines are processed.
- my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
- $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
-
- # TV4: SCALARS for multi-line identifiers and
- # statements. These are initialized with a subroutine call
- # and continually updated as lines are processed.
- my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
-
- # TV5: SCALARS for tracking indentation level.
- # Initialized once and continually updated as lines are
- # processed.
- my (
- $nesting_token_string, $nesting_type_string,
- $nesting_block_string, $nesting_block_flag,
- $nesting_list_string, $nesting_list_flag,
- $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
- $in_statement_continuation, $level_in_tokenizer,
- $slevel_in_tokenizer, $rslevel_stack,
- );
-
- # TV6: SCALARS for remembering several previous
- # tokens. Initialized once and continually updated as
- # lines are processed.
- my (
- $last_nonblank_container_type, $last_nonblank_type_sequence,
- $last_last_nonblank_token, $last_last_nonblank_type,
- $last_last_nonblank_block_type, $last_last_nonblank_container_type,
- $last_last_nonblank_type_sequence, $last_nonblank_prototype,
- );
-
- # ----------------------------------------------------------------
- # beginning of tokenizer variable access and manipulation routines
- # ----------------------------------------------------------------
-
- sub initialize_tokenizer_state {
-
- # TV1: initialized on each call
- # TV2: initialized on each call
- # TV3:
- $in_quote = 0;
- $quote_type = 'Q';
- $quote_character = "";
- $quote_pos = 0;
- $quote_depth = 0;
- $quoted_string_1 = "";
- $quoted_string_2 = "";
- $allowed_quote_modifiers = "";
-
- # TV4:
- $id_scan_state = '';
- $identifier = '';
- $want_paren = "";
- $indented_if_level = 0;
-
- # TV5:
- $nesting_token_string = "";
- $nesting_type_string = "";
- $nesting_block_string = '1'; # initially in a block
- $nesting_block_flag = 1;
- $nesting_list_string = '0'; # initially not in a list
- $nesting_list_flag = 0; # initially not in a list
- $ci_string_in_tokenizer = "";
- $continuation_string_in_tokenizer = "0";
- $in_statement_continuation = 0;
- $level_in_tokenizer = 0;
- $slevel_in_tokenizer = 0;
- $rslevel_stack = [];
-
- # TV6:
- $last_nonblank_container_type = '';
- $last_nonblank_type_sequence = '';
- $last_last_nonblank_token = ';';
- $last_last_nonblank_type = ';';
- $last_last_nonblank_block_type = '';
- $last_last_nonblank_container_type = '';
- $last_last_nonblank_type_sequence = '';
- $last_nonblank_prototype = "";
- return;
- }
-
- sub save_tokenizer_state {
-
- my $rTV1 = [
- $block_type, $container_type, $expecting,
- $i, $i_tok, $input_line,
- $input_line_number, $last_nonblank_i, $max_token_index,
- $next_tok, $next_type, $peeked_ahead,
- $prototype, $rhere_target_list, $rtoken_map,
- $rtoken_type, $rtokens, $tok,
- $type, $type_sequence, $indent_flag,
- ];
-
- my $rTV2 = [
- $routput_token_list, $routput_token_type,
- $routput_block_type, $routput_container_type,
- $routput_type_sequence, $routput_indent_flag,
- ];
-
- my $rTV3 = [
- $in_quote, $quote_type,
- $quote_character, $quote_pos,
- $quote_depth, $quoted_string_1,
- $quoted_string_2, $allowed_quote_modifiers,
- ];
-
- my $rTV4 =
- [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
-
- my $rTV5 = [
- $nesting_token_string, $nesting_type_string,
- $nesting_block_string, $nesting_block_flag,
- $nesting_list_string, $nesting_list_flag,
- $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
- $in_statement_continuation, $level_in_tokenizer,
- $slevel_in_tokenizer, $rslevel_stack,
- ];
-
- my $rTV6 = [
- $last_nonblank_container_type,
- $last_nonblank_type_sequence,
- $last_last_nonblank_token,
- $last_last_nonblank_type,
- $last_last_nonblank_block_type,
- $last_last_nonblank_container_type,
- $last_last_nonblank_type_sequence,
- $last_nonblank_prototype,
- ];
- return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
- }
-
- sub restore_tokenizer_state {
- my ($rstate) = @_;
- my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
- (
- $block_type, $container_type, $expecting,
- $i, $i_tok, $input_line,
- $input_line_number, $last_nonblank_i, $max_token_index,
- $next_tok, $next_type, $peeked_ahead,
- $prototype, $rhere_target_list, $rtoken_map,
- $rtoken_type, $rtokens, $tok,
- $type, $type_sequence, $indent_flag,
- ) = @{$rTV1};
-
- (
- $routput_token_list, $routput_token_type,
- $routput_block_type, $routput_container_type,
- $routput_type_sequence, $routput_type_sequence,
- ) = @{$rTV2};
-
- (
- $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
- $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
- ) = @{$rTV3};
-
- ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
- @{$rTV4};
-
- (
- $nesting_token_string, $nesting_type_string,
- $nesting_block_string, $nesting_block_flag,
- $nesting_list_string, $nesting_list_flag,
- $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
- $in_statement_continuation, $level_in_tokenizer,
- $slevel_in_tokenizer, $rslevel_stack,
- ) = @{$rTV5};
-
- (
- $last_nonblank_container_type,
- $last_nonblank_type_sequence,
- $last_last_nonblank_token,
- $last_last_nonblank_type,
- $last_last_nonblank_block_type,
- $last_last_nonblank_container_type,
- $last_last_nonblank_type_sequence,
- $last_nonblank_prototype,
- ) = @{$rTV6};
- return;
- }
-
- sub get_indentation_level {
-
- # patch to avoid reporting error if indented if is not terminated
- if ($indented_if_level) { return $level_in_tokenizer - 1 }
- return $level_in_tokenizer;
- }
-
- sub reset_indentation_level {
- $level_in_tokenizer = $slevel_in_tokenizer = shift;
- push @{$rslevel_stack}, $slevel_in_tokenizer;
- return;
- }
-
- sub peeked_ahead {
- my $flag = shift;
- $peeked_ahead = defined($flag) ? $flag : $peeked_ahead;
- return $peeked_ahead;
- }
-
- # ------------------------------------------------------------
- # end of tokenizer variable access and manipulation routines
- # ------------------------------------------------------------
-
- # ------------------------------------------------------------
- # beginning of various scanner interface routines
- # ------------------------------------------------------------
- sub scan_replacement_text {
-
- # check for here-docs in replacement text invoked by
- # a substitution operator with executable modifier 'e'.
- #
- # given:
- # $replacement_text
- # return:
- # $rht = reference to any here-doc targets
- my ($replacement_text) = @_;
-
- # quick check
- return unless ( $replacement_text =~ /<</ );
-
- write_logfile_entry("scanning replacement text for here-doc targets\n");
-
- # save the logger object for error messages
- my $logger_object = $tokenizer_self->{_logger_object};
-
- # localize all package variables
- local (
- $tokenizer_self, $last_nonblank_token,
- $last_nonblank_type, $last_nonblank_block_type,
- $statement_type, $in_attribute_list,
- $current_package, $context,
- %is_constant, %is_user_function,
- %user_function_prototype, %is_block_function,
- %is_block_list_function, %saw_function_definition,
- $brace_depth, $paren_depth,
- $square_bracket_depth, @current_depth,
- @total_depth, $total_depth,
- @nesting_sequence_number, @current_sequence_number,
- @paren_type, @paren_semicolon_count,
- @paren_structural_type, @brace_type,
- @brace_structural_type, @brace_context,
- @brace_package, @square_bracket_type,
- @square_bracket_structural_type, @depth_array,
- @starting_line_of_current_depth, @nested_ternary_flag,
- @nested_statement_type,
- );
-
- # save all lexical variables
- my $rstate = save_tokenizer_state();
- _decrement_count(); # avoid error check for multiple tokenizers
-
- # make a new tokenizer
- my $rOpts = {};
- my $rpending_logfile_message;
- my $source_object =
- Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
- $rpending_logfile_message );
- my $tokenizer = Perl::Tidy::Tokenizer->new(
- source_object => $source_object,
- logger_object => $logger_object,
- starting_line_number => $input_line_number,
- );
-
- # scan the replacement text
- 1 while ( $tokenizer->get_line() );
-
- # remove any here doc targets
- my $rht = undef;
- if ( $tokenizer_self->{_in_here_doc} ) {
- $rht = [];
- push @{$rht},
- [
- $tokenizer_self->{_here_doc_target},
- $tokenizer_self->{_here_quote_character}
- ];
- if ( $tokenizer_self->{_rhere_target_list} ) {
- push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
- $tokenizer_self->{_rhere_target_list} = undef;
- }
- $tokenizer_self->{_in_here_doc} = undef;
- }
-
- # now its safe to report errors
- my $severe_error = $tokenizer->report_tokenization_errors();
-
- # TODO: Could propagate a severe error up
-
- # restore all tokenizer lexical variables
- restore_tokenizer_state($rstate);
-
- # return the here doc targets
- return $rht;
- }
-
- sub scan_bare_identifier {
- ( $i, $tok, $type, $prototype ) =
- scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
- $rtoken_map, $max_token_index );
- return;
- }
-
- sub scan_identifier {
- ( $i, $tok, $type, $id_scan_state, $identifier ) =
- scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
- $max_token_index, $expecting, $paren_type[$paren_depth] );
- return;
- }
-
- sub scan_id {
- ( $i, $tok, $type, $id_scan_state ) =
- scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
- $id_scan_state, $max_token_index );
- return;
- }
-
- sub scan_number {
- my $number;
- ( $i, $type, $number ) =
- scan_number_do( $input_line, $i, $rtoken_map, $type,
- $max_token_index );
- return $number;
- }
-
- # a sub to warn if token found where term expected
- sub error_if_expecting_TERM {
- if ( $expecting == TERM ) {
- if ( $really_want_term{$last_nonblank_type} ) {
- report_unexpected( $tok, "term", $i_tok, $last_nonblank_i,
- $rtoken_map, $rtoken_type, $input_line );
- return 1;
- }
- }
- return;
- }
-
- # a sub to warn if token found where operator expected
- sub error_if_expecting_OPERATOR {
- my $thing = shift;
- if ( $expecting == OPERATOR ) {
- if ( !defined($thing) ) { $thing = $tok }
- report_unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
- $rtoken_map, $rtoken_type, $input_line );
- if ( $i_tok == 0 ) {
- interrupt_logfile();
- warning("Missing ';' above?\n");
- resume_logfile();
- }
- return 1;
- }
- return;
- }
-
- # ------------------------------------------------------------
- # end scanner interfaces
- # ------------------------------------------------------------
-
- my %is_for_foreach;
- @_ = qw(for foreach);
- @is_for_foreach{@_} = (1) x scalar(@_);
-
- my %is_my_our;
- @_ = qw(my our);
- @is_my_our{@_} = (1) x scalar(@_);
-
- # These keywords may introduce blocks after parenthesized expressions,
- # in the form:
- # keyword ( .... ) { BLOCK }
- # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
- my %is_blocktype_with_paren;
- @_ =
- qw(if elsif unless while until for foreach switch case given when catch);
- @is_blocktype_with_paren{@_} = (1) x scalar(@_);
-
- # ------------------------------------------------------------
- # begin hash of code for handling most token types
- # ------------------------------------------------------------
- my $tokenization_code = {
-
- # no special code for these types yet, but syntax checks
- # could be added
-
-## '!' => undef,
-## '!=' => undef,
-## '!~' => undef,
-## '%=' => undef,
-## '&&=' => undef,
-## '&=' => undef,
-## '+=' => undef,
-## '-=' => undef,
-## '..' => undef,
-## '..' => undef,
-## '...' => undef,
-## '.=' => undef,
-## '<<=' => undef,
-## '<=' => undef,
-## '<=>' => undef,
-## '<>' => undef,
-## '=' => undef,
-## '==' => undef,
-## '=~' => undef,
-## '>=' => undef,
-## '>>' => undef,
-## '>>=' => undef,
-## '\\' => undef,
-## '^=' => undef,
-## '|=' => undef,
-## '||=' => undef,
-## '//=' => undef,
-## '~' => undef,
-## '~~' => undef,
-## '!~~' => undef,
-
- '>' => sub {
- error_if_expecting_TERM()
- if ( $expecting == TERM );
- },
- '|' => sub {
- error_if_expecting_TERM()
- if ( $expecting == TERM );
- },
- '$' => sub {
-
- # start looking for a scalar
- error_if_expecting_OPERATOR("Scalar")
- if ( $expecting == OPERATOR );
- scan_identifier();
-
- if ( $identifier eq '$^W' ) {
- $tokenizer_self->{_saw_perl_dash_w} = 1;
- }
-
- # Check for identifier in indirect object slot
- # (vorboard.pl, sort.t). Something like:
- # /^(print|printf|sort|exec|system)$/
- if (
- $is_indirect_object_taker{$last_nonblank_token}
-
- || ( ( $last_nonblank_token eq '(' )
- && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
- || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
- )
- {
- $type = 'Z';
- }
- },
- '(' => sub {
-
- ++$paren_depth;
- $paren_semicolon_count[$paren_depth] = 0;
- if ($want_paren) {
- $container_type = $want_paren;
- $want_paren = "";
- }
- elsif ( $statement_type =~ /^sub\b/ ) {
- $container_type = $statement_type;
- }
- else {
- $container_type = $last_nonblank_token;
-
- # We can check for a syntax error here of unexpected '(',
- # but this is going to get messy...
- if (
- $expecting == OPERATOR
-
- # be sure this is not a method call of the form
- # &method(...), $method->(..), &{method}(...),
- # $ref[2](list) is ok & short for $ref[2]->(list)
- # NOTE: at present, braces in something like &{ xxx }
- # are not marked as a block, we might have a method call
- && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
-
- )
- {
-
- # ref: camel 3 p 703.
- if ( $last_last_nonblank_token eq 'do' ) {
- complain(
-"do SUBROUTINE is deprecated; consider & or -> notation\n"
- );
- }
- else {
-
- # if this is an empty list, (), then it is not an
- # error; for example, we might have a constant pi and
- # invoke it with pi() or just pi;
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens,
- $max_token_index );
- if ( $next_nonblank_token ne ')' ) {
- my $hint;
- error_if_expecting_OPERATOR('(');
-
- if ( $last_nonblank_type eq 'C' ) {
- $hint =
- "$last_nonblank_token has a void prototype\n";
- }
- elsif ( $last_nonblank_type eq 'i' ) {
- if ( $i_tok > 0
- && $last_nonblank_token =~ /^\$/ )
- {
- $hint =
-"Do you mean '$last_nonblank_token->(' ?\n";
- }
- }
- if ($hint) {
- interrupt_logfile();
- warning($hint);
- resume_logfile();
- }
- } ## end if ( $next_nonblank_token...
- } ## end else [ if ( $last_last_nonblank_token...
- } ## end if ( $expecting == OPERATOR...
- }
- $paren_type[$paren_depth] = $container_type;
- ( $type_sequence, $indent_flag ) =
- increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
-
- # propagate types down through nested parens
- # for example: the second paren in 'if ((' would be structural
- # since the first is.
-
- if ( $last_nonblank_token eq '(' ) {
- $type = $last_nonblank_type;
- }
-
- # We exclude parens as structural after a ',' because it
- # causes subtle problems with continuation indentation for
- # something like this, where the first 'or' will not get
- # indented.
- #
- # assert(
- # __LINE__,
- # ( not defined $check )
- # or ref $check
- # or $check eq "new"
- # or $check eq "old",
- # );
- #
- # Likewise, we exclude parens where a statement can start
- # because of problems with continuation indentation, like
- # these:
- #
- # ($firstline =~ /^#\!.*perl/)
- # and (print $File::Find::name, "\n")
- # and (return 1);
- #
- # (ref($usage_fref) =~ /CODE/)
- # ? &$usage_fref
- # : (&blast_usage, &blast_params, &blast_general_params);
-
- else {
- $type = '{';
- }
-
- if ( $last_nonblank_type eq ')' ) {
- warning(
- "Syntax error? found token '$last_nonblank_type' then '('\n"
- );
- }
- $paren_structural_type[$paren_depth] = $type;
-
- },
- ')' => sub {
- ( $type_sequence, $indent_flag ) =
- decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
-
- if ( $paren_structural_type[$paren_depth] eq '{' ) {
- $type = '}';
- }
-
- $container_type = $paren_type[$paren_depth];
-
- # restore statement type as 'sub' at closing paren of a signature
- # so that a subsequent ':' is identified as an attribute
- if ( $container_type =~ /^sub\b/ ) {
- $statement_type = $container_type;
- }
-
- # /^(for|foreach)$/
- if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
- my $num_sc = $paren_semicolon_count[$paren_depth];
- if ( $num_sc > 0 && $num_sc != 2 ) {
- warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
- }
- }
-
- if ( $paren_depth > 0 ) { $paren_depth-- }
- },
- ',' => sub {
- if ( $last_nonblank_type eq ',' ) {
- complain("Repeated ','s \n");
- }
-
- # patch for operator_expected: note if we are in the list (use.t)
- if ( $statement_type eq 'use' ) { $statement_type = '_use' }
-## FIXME: need to move this elsewhere, perhaps check after a '('
-## elsif ($last_nonblank_token eq '(') {
-## warning("Leading ','s illegal in some versions of perl\n");
-## }
- },
- ';' => sub {
- $context = UNKNOWN_CONTEXT;
- $statement_type = '';
- $want_paren = "";
-
- # /^(for|foreach)$/
- if ( $is_for_foreach{ $paren_type[$paren_depth] } )
- { # mark ; in for loop
-
- # Be careful: we do not want a semicolon such as the
- # following to be included:
- #
- # for (sort {strcoll($a,$b);} keys %investments) {
-
- if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
- && $square_bracket_depth ==
- $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
- {
-
- $type = 'f';
- $paren_semicolon_count[$paren_depth]++;
- }
- }
-
- },
- '"' => sub {
- error_if_expecting_OPERATOR("String")
- if ( $expecting == OPERATOR );
- $in_quote = 1;
- $type = 'Q';
- $allowed_quote_modifiers = "";
- },
- "'" => sub {
- error_if_expecting_OPERATOR("String")
- if ( $expecting == OPERATOR );
- $in_quote = 1;
- $type = 'Q';
- $allowed_quote_modifiers = "";
- },
- '`' => sub {
- error_if_expecting_OPERATOR("String")
- if ( $expecting == OPERATOR );
- $in_quote = 1;
- $type = 'Q';
- $allowed_quote_modifiers = "";
- },
- '/' => sub {
- my $is_pattern;
-
- if ( $expecting == UNKNOWN ) { # indeterminate, must guess..
- my $msg;
- ( $is_pattern, $msg ) =
- guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
- $max_token_index );
-
- if ($msg) {
- write_diagnostics("DIVIDE:$msg\n");
- write_logfile_entry($msg);
- }
- }
- else { $is_pattern = ( $expecting == TERM ) }
-
- if ($is_pattern) {
- $in_quote = 1;
- $type = 'Q';
- $allowed_quote_modifiers = '[msixpodualngc]';
- }
- else { # not a pattern; check for a /= token
-
- if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /=
- $i++;
- $tok = '/=';
- $type = $tok;
- }
-
- #DEBUG - collecting info on what tokens follow a divide
- # for development of guessing algorithm
- #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
- # #write_diagnostics( "DIVIDE? $input_line\n" );
- #}
- }
- },
- '{' => sub {
-
- # if we just saw a ')', we will label this block with
- # its type. We need to do this to allow sub
- # code_block_type to determine if this brace starts a
- # code block or anonymous hash. (The type of a paren
- # pair is the preceding token, such as 'if', 'else',
- # etc).
- $container_type = "";
-
- # ATTRS: for a '{' following an attribute list, reset
- # things to look like we just saw the sub name
- if ( $statement_type =~ /^sub/ ) {
- $last_nonblank_token = $statement_type;
- $last_nonblank_type = 'i';
- $statement_type = "";
- }
-
- # patch for SWITCH/CASE: hide these keywords from an immediately
- # following opening brace
- elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
- && $statement_type eq $last_nonblank_token )
- {
- $last_nonblank_token = ";";
- }
-
- elsif ( $last_nonblank_token eq ')' ) {
- $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
-
- # defensive move in case of a nesting error (pbug.t)
- # in which this ')' had no previous '('
- # this nesting error will have been caught
- if ( !defined($last_nonblank_token) ) {
- $last_nonblank_token = 'if';
- }
-
- # check for syntax error here;
- unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
- if ( $tokenizer_self->{'_extended_syntax'} ) {
-
- # we append a trailing () to mark this as an unknown
- # block type. This allows perltidy to format some
- # common extensions of perl syntax.
- # This is used by sub code_block_type
- $last_nonblank_token .= '()';
- }
- else {
- my $list =
- join( ' ', sort keys %is_blocktype_with_paren );
- warning(
-"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
- );
- }
- }
- }
-
- # patch for paren-less for/foreach glitch, part 2.
- # see note below under 'qw'
- elsif ($last_nonblank_token eq 'qw'
- && $is_for_foreach{$want_paren} )
- {
- $last_nonblank_token = $want_paren;
- if ( $last_last_nonblank_token eq $want_paren ) {
- warning(
-"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
- );
-
- }
- $want_paren = "";
- }
-
- # now identify which of the three possible types of
- # curly braces we have: hash index container, anonymous
- # hash reference, or code block.
-
- # non-structural (hash index) curly brace pair
- # get marked 'L' and 'R'
- if ( is_non_structural_brace() ) {
- $type = 'L';
-
- # patch for SWITCH/CASE:
- # allow paren-less identifier after 'when'
- # if the brace is preceded by a space
- if ( $statement_type eq 'when'
- && $last_nonblank_type eq 'i'
- && $last_last_nonblank_type eq 'k'
- && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
- {
- $type = '{';
- $block_type = $statement_type;
- }
- }
-
- # code and anonymous hash have the same type, '{', but are
- # distinguished by 'block_type',
- # which will be blank for an anonymous hash
- else {
-
- $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
- $max_token_index );
-
- # patch to promote bareword type to function taking block
- if ( $block_type
- && $last_nonblank_type eq 'w'
- && $last_nonblank_i >= 0 )
- {
- if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
- $routput_token_type->[$last_nonblank_i] = 'G';
- }
- }
-
- # patch for SWITCH/CASE: if we find a stray opening block brace
- # where we might accept a 'case' or 'when' block, then take it
- if ( $statement_type eq 'case'
- || $statement_type eq 'when' )
- {
- if ( !$block_type || $block_type eq '}' ) {
- $block_type = $statement_type;
- }
- }
- }
-
- $brace_type[ ++$brace_depth ] = $block_type;
- $brace_package[$brace_depth] = $current_package;
- $brace_structural_type[$brace_depth] = $type;
- $brace_context[$brace_depth] = $context;
- ( $type_sequence, $indent_flag ) =
- increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
- },
- '}' => sub {
- $block_type = $brace_type[$brace_depth];
- if ($block_type) { $statement_type = '' }
- if ( defined( $brace_package[$brace_depth] ) ) {
- $current_package = $brace_package[$brace_depth];
- }
-
- # can happen on brace error (caught elsewhere)
- else {
- }
- ( $type_sequence, $indent_flag ) =
- decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
-
- if ( $brace_structural_type[$brace_depth] eq 'L' ) {
- $type = 'R';
- }
-
- # propagate type information for 'do' and 'eval' blocks, and also
- # for smartmatch operator. This is necessary to enable us to know
- # if an operator or term is expected next.
- if ( $is_block_operator{$block_type} ) {
- $tok = $block_type;
- }
-
- $context = $brace_context[$brace_depth];
- if ( $brace_depth > 0 ) { $brace_depth--; }
- },
- '&' => sub { # maybe sub call? start looking
-
- # We have to check for sub call unless we are sure we
- # are expecting an operator. This example from s2p
- # got mistaken as a q operator in an early version:
- # print BODY &q(<<'EOT');
- if ( $expecting != OPERATOR ) {
-
- # But only look for a sub call if we are expecting a term or
- # if there is no existing space after the &.
- # For example we probably don't want & as sub call here:
- # Fcntl::S_IRUSR & $mode;
- if ( $expecting == TERM || $next_type ne 'b' ) {
- scan_identifier();
- }
- }
- else {
- }
- },
- '<' => sub { # angle operator or less than?
-
- if ( $expecting != OPERATOR ) {
- ( $i, $type ) =
- find_angle_operator_termination( $input_line, $i, $rtoken_map,
- $expecting, $max_token_index );
-
- if ( $type eq '<' && $expecting == TERM ) {
- error_if_expecting_TERM();
- interrupt_logfile();
- warning("Unterminated <> operator?\n");
- resume_logfile();
- }
- }
- else {
- }
- },
- '?' => sub { # ?: conditional or starting pattern?
-
- my $is_pattern;
-
- if ( $expecting == UNKNOWN ) {
-
- my $msg;
- ( $is_pattern, $msg ) =
- guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
- $max_token_index );
-
- if ($msg) { write_logfile_entry($msg) }
- }
- else { $is_pattern = ( $expecting == TERM ) }
-
- if ($is_pattern) {
- $in_quote = 1;
- $type = 'Q';
- $allowed_quote_modifiers = '[msixpodualngc]';
- }
- else {
- ( $type_sequence, $indent_flag ) =
- increase_nesting_depth( QUESTION_COLON,
- $rtoken_map->[$i_tok] );
- }
- },
- '*' => sub { # typeglob, or multiply?
-
- if ( $expecting == TERM ) {
- scan_identifier();
- }
- else {
-
- if ( $rtokens->[ $i + 1 ] eq '=' ) {
- $tok = '*=';
- $type = $tok;
- $i++;
- }
- elsif ( $rtokens->[ $i + 1 ] eq '*' ) {
- $tok = '**';
- $type = $tok;
- $i++;
- if ( $rtokens->[ $i + 1 ] eq '=' ) {
- $tok = '**=';
- $type = $tok;
- $i++;
- }
- }
- }
- },
- '.' => sub { # what kind of . ?
-
- if ( $expecting != OPERATOR ) {
- scan_number();
- if ( $type eq '.' ) {
- error_if_expecting_TERM()
- if ( $expecting == TERM );
- }
- }
- else {
- }
- },
- ':' => sub {
-
- # if this is the first nonblank character, call it a label
- # since perl seems to just swallow it
- if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
- $type = 'J';
- }
-
- # ATTRS: check for a ':' which introduces an attribute list
- # (this might eventually get its own token type)
- elsif ( $statement_type =~ /^sub\b/ ) {
- $type = 'A';
- $in_attribute_list = 1;
- }
-
- # check for scalar attribute, such as
- # my $foo : shared = 1;
- elsif ($is_my_our{$statement_type}
- && $current_depth[QUESTION_COLON] == 0 )
- {
- $type = 'A';
- $in_attribute_list = 1;
- }
-
- # otherwise, it should be part of a ?/: operator
- else {
- ( $type_sequence, $indent_flag ) =
- decrease_nesting_depth( QUESTION_COLON,
- $rtoken_map->[$i_tok] );
- if ( $last_nonblank_token eq '?' ) {
- warning("Syntax error near ? :\n");
- }
- }
- },
- '+' => sub { # what kind of plus?
-
- if ( $expecting == TERM ) {
- my $number = scan_number();
-
- # unary plus is safest assumption if not a number
- if ( !defined($number) ) { $type = 'p'; }
- }
- elsif ( $expecting == OPERATOR ) {
- }
- else {
- if ( $next_type eq 'w' ) { $type = 'p' }
- }
- },
- '@' => sub {
-
- error_if_expecting_OPERATOR("Array")
- if ( $expecting == OPERATOR );
- scan_identifier();
- },
- '%' => sub { # hash or modulo?
-
- # first guess is hash if no following blank
- if ( $expecting == UNKNOWN ) {
- if ( $next_type ne 'b' ) { $expecting = TERM }
- }
- if ( $expecting == TERM ) {
- scan_identifier();
- }
- },
- '[' => sub {
- $square_bracket_type[ ++$square_bracket_depth ] =
- $last_nonblank_token;
- ( $type_sequence, $indent_flag ) =
- increase_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
-
- # It may seem odd, but structural square brackets have
- # type '{' and '}'. This simplifies the indentation logic.
- if ( !is_non_structural_brace() ) {
- $type = '{';
- }
- $square_bracket_structural_type[$square_bracket_depth] = $type;
- },
- ']' => sub {
- ( $type_sequence, $indent_flag ) =
- decrease_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
-
- if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
- {
- $type = '}';
- }
-
- # propagate type information for smartmatch operator. This is
- # necessary to enable us to know if an operator or term is expected
- # next.
- if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
- $tok = $square_bracket_type[$square_bracket_depth];
- }
-
- if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
- },
- '-' => sub { # what kind of minus?
-
- if ( ( $expecting != OPERATOR )
- && $is_file_test_operator{$next_tok} )
- {
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i + 1, $rtokens,
- $max_token_index );
-
- # check for a quoted word like "-w=>xx";
- # it is sufficient to just check for a following '='
- if ( $next_nonblank_token eq '=' ) {
- $type = 'm';
- }
- else {
- $i++;
- $tok .= $next_tok;
- $type = 'F';
- }
- }
- elsif ( $expecting == TERM ) {
- my $number = scan_number();
-
- # maybe part of bareword token? unary is safest
- if ( !defined($number) ) { $type = 'm'; }
-
- }
- elsif ( $expecting == OPERATOR ) {
- }
- else {
-
- if ( $next_type eq 'w' ) {
- $type = 'm';
- }
- }
- },
-
- '^' => sub {
-
- # check for special variables like ${^WARNING_BITS}
- if ( $expecting == TERM ) {
-
- # FIXME: this should work but will not catch errors
- # because we also have to be sure that previous token is
- # a type character ($,@,%).
- if ( $last_nonblank_token eq '{'
- && ( $next_tok =~ /^[A-Za-z_]/ ) )
- {
-
- if ( $next_tok eq 'W' ) {
- $tokenizer_self->{_saw_perl_dash_w} = 1;
- }
- $tok = $tok . $next_tok;
- $i = $i + 1;
- $type = 'w';
- }
-
- else {
- unless ( error_if_expecting_TERM() ) {
-
- # Something like this is valid but strange:
- # undef ^I;
- complain("The '^' seems unusual here\n");
- }
- }
- }
- },
-
- '::' => sub { # probably a sub call
- scan_bare_identifier();
- },
- '<<' => sub { # maybe a here-doc?
- return
- unless ( $i < $max_token_index )
- ; # here-doc not possible if end of line
-
- if ( $expecting != OPERATOR ) {
- my ( $found_target, $here_doc_target, $here_quote_character,
- $saw_error );
- (
- $found_target, $here_doc_target, $here_quote_character, $i,
- $saw_error
- )
- = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
- $max_token_index );
-
- if ($found_target) {
- push @{$rhere_target_list},
- [ $here_doc_target, $here_quote_character ];
- $type = 'h';
- if ( length($here_doc_target) > 80 ) {
- my $truncated = substr( $here_doc_target, 0, 80 );
- complain("Long here-target: '$truncated' ...\n");
- }
- elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
- complain(
- "Unconventional here-target: '$here_doc_target'\n");
- }
- }
- elsif ( $expecting == TERM ) {
- unless ($saw_error) {
-
- # shouldn't happen..
- warning("Program bug; didn't find here doc target\n");
- report_definite_bug();
- }
- }
- }
- else {
- }
- },
- '<<~' => sub { # a here-doc, new type added in v26
- return
- unless ( $i < $max_token_index )
- ; # here-doc not possible if end of line
- if ( $expecting != OPERATOR ) {
- my ( $found_target, $here_doc_target, $here_quote_character,
- $saw_error );
- (
- $found_target, $here_doc_target, $here_quote_character, $i,
- $saw_error
- )
- = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
- $max_token_index );
-
- if ($found_target) {
-
- if ( length($here_doc_target) > 80 ) {
- my $truncated = substr( $here_doc_target, 0, 80 );
- complain("Long here-target: '$truncated' ...\n");
- }
- elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
- complain(
- "Unconventional here-target: '$here_doc_target'\n");
- }
-
- # Note that we put a leading space on the here quote
- # character indicate that it may be preceded by spaces
- $here_quote_character = " " . $here_quote_character;
- push @{$rhere_target_list},
- [ $here_doc_target, $here_quote_character ];
- $type = 'h';
- }
- elsif ( $expecting == TERM ) {
- unless ($saw_error) {
-
- # shouldn't happen..
- warning("Program bug; didn't find here doc target\n");
- report_definite_bug();
- }
- }
- }
- else {
- }
- },
- '->' => sub {
-
- # if -> points to a bare word, we must scan for an identifier,
- # otherwise something like ->y would look like the y operator
- scan_identifier();
- },
-
- # type = 'pp' for pre-increment, '++' for post-increment
- '++' => sub {
- if ( $expecting == TERM ) { $type = 'pp' }
- elsif ( $expecting == UNKNOWN ) {
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens, $max_token_index );
- if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
- }
- },
-
- '=>' => sub {
- if ( $last_nonblank_type eq $tok ) {
- complain("Repeated '=>'s \n");
- }
-
- # patch for operator_expected: note if we are in the list (use.t)
- # TODO: make version numbers a new token type
- if ( $statement_type eq 'use' ) { $statement_type = '_use' }
- },
-
- # type = 'mm' for pre-decrement, '--' for post-decrement
- '--' => sub {
-
- if ( $expecting == TERM ) { $type = 'mm' }
- elsif ( $expecting == UNKNOWN ) {
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens, $max_token_index );
- if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
- }
- },
-
- '&&' => sub {
- error_if_expecting_TERM()
- if ( $expecting == TERM );
- },
-
- '||' => sub {
- error_if_expecting_TERM()
- if ( $expecting == TERM );
- },
-
- '//' => sub {
- error_if_expecting_TERM()
- if ( $expecting == TERM );
- },
- };
-
- # ------------------------------------------------------------
- # end hash of code for handling individual token types
- # ------------------------------------------------------------
-
- my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
-
- # These block types terminate statements and do not need a trailing
- # semicolon
- # patched for SWITCH/CASE/
- my %is_zero_continuation_block_type;
- @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
- if elsif else unless while until for foreach switch case given when);
- @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
-
- my %is_not_zero_continuation_block_type;
- @_ = qw(sort grep map do eval);
- @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
-
- my %is_logical_container;
- @_ = qw(if elsif unless while and or err not && ! || for foreach);
- @is_logical_container{@_} = (1) x scalar(@_);
-
- my %is_binary_type;
- @_ = qw(|| &&);
- @is_binary_type{@_} = (1) x scalar(@_);
-
- my %is_binary_keyword;
- @_ = qw(and or err eq ne cmp);
- @is_binary_keyword{@_} = (1) x scalar(@_);
-
- # 'L' is token for opening { at hash key
- my %is_opening_type;
- @_ = qw" L { ( [ ";
- @is_opening_type{@_} = (1) x scalar(@_);
-
- # 'R' is token for closing } at hash key
- my %is_closing_type;
- @_ = qw" R } ) ] ";
- @is_closing_type{@_} = (1) x scalar(@_);
-
- my %is_redo_last_next_goto;
- @_ = qw(redo last next goto);
- @is_redo_last_next_goto{@_} = (1) x scalar(@_);
-
- my %is_use_require;
- @_ = qw(use require);
- @is_use_require{@_} = (1) x scalar(@_);
-
- my %is_sub_package;
- @_ = qw(sub package);
- @is_sub_package{@_} = (1) x scalar(@_);
-
- # This hash holds the hash key in $tokenizer_self for these keywords:
- my %is_format_END_DATA = (
- 'format' => '_in_format',
- '__END__' => '_in_end',
- '__DATA__' => '_in_data',
- );
-
- # original ref: camel 3 p 147,
- # but perl may accept undocumented flags
- # perl 5.10 adds 'p' (preserve)
- # Perl version 5.22 added 'n'
- # From http://perldoc.perl.org/perlop.html we have
- # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
- # s/PATTERN/REPLACEMENT/msixpodualngcer
- # y/SEARCHLIST/REPLACEMENTLIST/cdsr
- # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
- # qr/STRING/msixpodualn
- my %quote_modifiers = (
- 's' => '[msixpodualngcer]',
- 'y' => '[cdsr]',
- 'tr' => '[cdsr]',
- 'm' => '[msixpodualngc]',
- 'qr' => '[msixpodualn]',
- 'q' => "",
- 'qq' => "",
- 'qw' => "",
- 'qx' => "",
- );
-
- # table showing how many quoted things to look for after quote operator..
- # s, y, tr have 2 (pattern and replacement)
- # others have 1 (pattern only)
- my %quote_items = (
- 's' => 2,
- 'y' => 2,
- 'tr' => 2,
- 'm' => 1,
- 'qr' => 1,
- 'q' => 1,
- 'qq' => 1,
- 'qw' => 1,
- 'qx' => 1,
- );
-
- sub tokenize_this_line {
-
- # This routine breaks a line of perl code into tokens which are of use in
- # indentation and reformatting. One of my goals has been to define tokens
- # such that a newline may be inserted between any pair of tokens without
- # changing or invalidating the program. This version comes close to this,
- # although there are necessarily a few exceptions which must be caught by
- # the formatter. Many of these involve the treatment of bare words.
- #
- # The tokens and their types are returned in arrays. See previous
- # routine for their names.
- #
- # See also the array "valid_token_types" in the BEGIN section for an
- # up-to-date list.
- #
- # To simplify things, token types are either a single character, or they
- # are identical to the tokens themselves.
- #
- # As a debugging aid, the -D flag creates a file containing a side-by-side
- # comparison of the input string and its tokenization for each line of a file.
- # This is an invaluable debugging aid.
- #
- # In addition to tokens, and some associated quantities, the tokenizer
- # also returns flags indication any special line types. These include
- # quotes, here_docs, formats.
- #
- # -----------------------------------------------------------------------
- #
- # How to add NEW_TOKENS:
- #
- # New token types will undoubtedly be needed in the future both to keep up
- # with changes in perl and to help adapt the tokenizer to other applications.
- #
- # Here are some notes on the minimal steps. I wrote these notes while
- # adding the 'v' token type for v-strings, which are things like version
- # numbers 5.6.0, and ip addresses, and will use that as an example. ( You
- # can use your editor to search for the string "NEW_TOKENS" to find the
- # appropriate sections to change):
- #
- # *. Try to talk somebody else into doing it! If not, ..
- #
- # *. Make a backup of your current version in case things don't work out!
- #
- # *. Think of a new, unused character for the token type, and add to
- # the array @valid_token_types in the BEGIN section of this package.
- # For example, I used 'v' for v-strings.
- #
- # *. Implement coding to recognize the $type of the token in this routine.
- # This is the hardest part, and is best done by imitating or modifying
- # some of the existing coding. For example, to recognize v-strings, I
- # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
- # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
- #
- # *. Update sub operator_expected. This update is critically important but
- # the coding is trivial. Look at the comments in that routine for help.
- # For v-strings, which should behave like numbers, I just added 'v' to the
- # regex used to handle numbers and strings (types 'n' and 'Q').
- #
- # *. Implement a 'bond strength' rule in sub set_bond_strengths in
- # Perl::Tidy::Formatter for breaking lines around this token type. You can
- # skip this step and take the default at first, then adjust later to get
- # desired results. For adding type 'v', I looked at sub bond_strength and
- # saw that number type 'n' was using default strengths, so I didn't do
- # anything. I may tune it up someday if I don't like the way line
- # breaks with v-strings look.
- #
- # *. Implement a 'whitespace' rule in sub set_whitespace_flags in
- # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
- # and saw that type 'n' used spaces on both sides, so I just added 'v'
- # to the array @spaces_both_sides.
- #
- # *. Update HtmlWriter package so that users can colorize the token as
- # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
- # that package. For v-strings, I initially chose to use a default color
- # equal to the default for numbers, but it might be nice to change that
- # eventually.
- #
- # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
- #
- # *. Run lots and lots of debug tests. Start with special files designed
- # to test the new token type. Run with the -D flag to create a .DEBUG
- # file which shows the tokenization. When these work ok, test as many old
- # scripts as possible. Start with all of the '.t' files in the 'test'
- # directory of the distribution file. Compare .tdy output with previous
- # version and updated version to see the differences. Then include as
- # many more files as possible. My own technique has been to collect a huge
- # number of perl scripts (thousands!) into one directory and run perltidy
- # *, then run diff between the output of the previous version and the
- # current version.
- #
- # *. For another example, search for the smartmatch operator '~~'
- # with your editor to see where updates were made for it.
- #
- # -----------------------------------------------------------------------
-
- my $line_of_tokens = shift;
- my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
-
- # patch while coding change is underway
- # make callers private data to allow access
- # $tokenizer_self = $caller_tokenizer_self;
-
- # extract line number for use in error messages
- $input_line_number = $line_of_tokens->{_line_number};
-
- # reinitialize for multi-line quote
- $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
-
- # check for pod documentation
- if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
-
- # must not be in multi-line quote
- # and must not be in an equation
- if ( !$in_quote && ( operator_expected( 'b', '=', 'b' ) == TERM ) )
- {
- $tokenizer_self->{_in_pod} = 1;
- return;
- }
- }
-
- $input_line = $untrimmed_input_line;
-
- chomp $input_line;
-
- # trim start of this line unless we are continuing a quoted line
- # do not trim end because we might end in a quote (test: deken4.pl)
- # Perl::Tidy::Formatter will delete needless trailing blanks
- unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
- $input_line =~ s/^\s*//; # trim left end
- }
-
- # Set a flag to indicate if we might be at an __END__ or __DATA__ line
- # This will be used below to avoid quoting a bare word followed by
- # a fat comma.
- my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/;
-
- # update the copy of the line for use in error messages
- # This must be exactly what we give the pre_tokenizer
- $tokenizer_self->{_line_text} = $input_line;
-
- # re-initialize for the main loop
- $routput_token_list = []; # stack of output token indexes
- $routput_token_type = []; # token types
- $routput_block_type = []; # types of code block
- $routput_container_type = []; # paren types, such as if, elsif, ..
- $routput_type_sequence = []; # nesting sequential number
-
- $rhere_target_list = [];
-
- $tok = $last_nonblank_token;
- $type = $last_nonblank_type;
- $prototype = $last_nonblank_prototype;
- $last_nonblank_i = -1;
- $block_type = $last_nonblank_block_type;
- $container_type = $last_nonblank_container_type;
- $type_sequence = $last_nonblank_type_sequence;
- $indent_flag = 0;
- $peeked_ahead = 0;
-
- # tokenization is done in two stages..
- # stage 1 is a very simple pre-tokenization
- my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
-
- # a little optimization for a full-line comment
- if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
- $max_tokens_wanted = 1 # no use tokenizing a comment
- }
-
- # start by breaking the line into pre-tokens
- ( $rtokens, $rtoken_map, $rtoken_type ) =
- pre_tokenize( $input_line, $max_tokens_wanted );
-
- $max_token_index = scalar( @{$rtokens} ) - 1;
- push( @{$rtokens}, ' ', ' ', ' ' ); # extra whitespace simplifies logic
- push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced
- push( @{$rtoken_type}, 'b', 'b', 'b' );
-
- # initialize for main loop
- foreach my $ii ( 0 .. $max_token_index + 3 ) {
- $routput_token_type->[$ii] = "";
- $routput_block_type->[$ii] = "";
- $routput_container_type->[$ii] = "";
- $routput_type_sequence->[$ii] = "";
- $routput_indent_flag->[$ii] = 0;
- }
- $i = -1;
- $i_tok = -1;
-
- # ------------------------------------------------------------
- # begin main tokenization loop
- # ------------------------------------------------------------
-
- # we are looking at each pre-token of one line and combining them
- # into tokens
- while ( ++$i <= $max_token_index ) {
-
- if ($in_quote) { # continue looking for end of a quote
- $type = $quote_type;
-
- unless ( @{$routput_token_list} )
- { # initialize if continuation line
- push( @{$routput_token_list}, $i );
- $routput_token_type->[$i] = $type;
-
- }
- $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
-
- # scan for the end of the quote or pattern
- (
- $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
- $quoted_string_1, $quoted_string_2
- )
- = do_quote(
- $i, $in_quote, $quote_character,
- $quote_pos, $quote_depth, $quoted_string_1,
- $quoted_string_2, $rtokens, $rtoken_map,
- $max_token_index
- );
-
- # all done if we didn't find it
- last if ($in_quote);
-
- # save pattern and replacement text for rescanning
- my $qs1 = $quoted_string_1;
- my $qs2 = $quoted_string_2;
-
- # re-initialize for next search
- $quote_character = '';
- $quote_pos = 0;
- $quote_type = 'Q';
- $quoted_string_1 = "";
- $quoted_string_2 = "";
- last if ( ++$i > $max_token_index );
-
- # look for any modifiers
- if ($allowed_quote_modifiers) {
-
- # check for exact quote modifiers
- if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) {
- my $str = $rtokens->[$i];
- my $saw_modifier_e;
- while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
- my $pos = pos($str);
- my $char = substr( $str, $pos - 1, 1 );
- $saw_modifier_e ||= ( $char eq 'e' );
- }
-
- # For an 'e' quote modifier we must scan the replacement
- # text for here-doc targets.
- if ($saw_modifier_e) {
-
- my $rht = scan_replacement_text($qs1);
-
- # Change type from 'Q' to 'h' for quotes with
- # here-doc targets so that the formatter (see sub
- # print_line_of_tokens) will not make any line
- # breaks after this point.
- if ($rht) {
- push @{$rhere_target_list}, @{$rht};
- $type = 'h';
- if ( $i_tok < 0 ) {
- my $ilast = $routput_token_list->[-1];
- $routput_token_type->[$ilast] = $type;
- }
- }
- }
-
- if ( defined( pos($str) ) ) {
-
- # matched
- if ( pos($str) == length($str) ) {
- last if ( ++$i > $max_token_index );
- }
-
- # Looks like a joined quote modifier
- # and keyword, maybe something like
- # s/xxx/yyy/gefor @k=...
- # Example is "galgen.pl". Would have to split
- # the word and insert a new token in the
- # pre-token list. This is so rare that I haven't
- # done it. Will just issue a warning citation.
-
- # This error might also be triggered if my quote
- # modifier characters are incomplete
- else {
- warning(<<EOM);
-
-Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
-Please put a space between quote modifiers and trailing keywords.
-EOM
-
- # print "token $rtokens->[$i]\n";
- # my $num = length($str) - pos($str);
- # $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num);
- # print "continuing with new token $rtokens->[$i]\n";
-
- # skipping past this token does least damage
- last if ( ++$i > $max_token_index );
- }
- }
- else {
-
- # example file: rokicki4.pl
- # This error might also be triggered if my quote
- # modifier characters are incomplete
- write_logfile_entry(
-"Note: found word $str at quote modifier location\n"
- );
- }
- }
-
- # re-initialize
- $allowed_quote_modifiers = "";
- }
- }
-
- unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) {
-
- # try to catch some common errors
- if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
-
- if ( $last_nonblank_token eq 'eq' ) {
- complain("Should 'eq' be '==' here ?\n");
- }
- elsif ( $last_nonblank_token eq 'ne' ) {
- complain("Should 'ne' be '!=' here ?\n");
- }
- }
-
- $last_last_nonblank_token = $last_nonblank_token;
- $last_last_nonblank_type = $last_nonblank_type;
- $last_last_nonblank_block_type = $last_nonblank_block_type;
- $last_last_nonblank_container_type =
- $last_nonblank_container_type;
- $last_last_nonblank_type_sequence =
- $last_nonblank_type_sequence;
- $last_nonblank_token = $tok;
- $last_nonblank_type = $type;
- $last_nonblank_prototype = $prototype;
- $last_nonblank_block_type = $block_type;
- $last_nonblank_container_type = $container_type;
- $last_nonblank_type_sequence = $type_sequence;
- $last_nonblank_i = $i_tok;
- }
-
- # store previous token type
- if ( $i_tok >= 0 ) {
- $routput_token_type->[$i_tok] = $type;
- $routput_block_type->[$i_tok] = $block_type;
- $routput_container_type->[$i_tok] = $container_type;
- $routput_type_sequence->[$i_tok] = $type_sequence;
- $routput_indent_flag->[$i_tok] = $indent_flag;
- }
- my $pre_tok = $rtokens->[$i]; # get the next pre-token
- my $pre_type = $rtoken_type->[$i]; # and type
- $tok = $pre_tok;
- $type = $pre_type; # to be modified as necessary
- $block_type = ""; # blank for all tokens except code block braces
- $container_type = ""; # blank for all tokens except some parens
- $type_sequence = ""; # blank for all tokens except ?/:
- $indent_flag = 0;
- $prototype = ""; # blank for all tokens except user defined subs
- $i_tok = $i;
-
- # this pre-token will start an output token
- push( @{$routput_token_list}, $i_tok );
-
- # continue gathering identifier if necessary
- # but do not start on blanks and comments
- if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
-
- if ( $id_scan_state =~ /^(sub|package)/ ) {
- scan_id();
- }
- else {
- scan_identifier();
- }
-
- last if ($id_scan_state);
- next if ( ( $i > 0 ) || $type );
-
- # didn't find any token; start over
- $type = $pre_type;
- $tok = $pre_tok;
- }
-
- # handle whitespace tokens..
- next if ( $type eq 'b' );
- my $prev_tok = $i > 0 ? $rtokens->[ $i - 1 ] : ' ';
- my $prev_type = $i > 0 ? $rtoken_type->[ $i - 1 ] : 'b';
-
- # Build larger tokens where possible, since we are not in a quote.
- #
- # First try to assemble digraphs. The following tokens are
- # excluded and handled specially:
- # '/=' is excluded because the / might start a pattern.
- # 'x=' is excluded since it might be $x=, with $ on previous line
- # '**' and *= might be typeglobs of punctuation variables
- # I have allowed tokens starting with <, such as <=,
- # because I don't think these could be valid angle operators.
- # test file: storrs4.pl
- my $test_tok = $tok . $rtokens->[ $i + 1 ];
- my $combine_ok = $is_digraph{$test_tok};
-
- # check for special cases which cannot be combined
- if ($combine_ok) {
-
- # '//' must be defined_or operator if an operator is expected.
- # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
- # could be migrated here for clarity
-
- # Patch for RT#102371, misparsing a // in the following snippet:
- # state $b //= ccc();
- # The solution is to always accept the digraph (or trigraph) after
- # token type 'Z' (possible file handle). The reason is that
- # sub operator_expected gives TERM expected here, which is
- # wrong in this case.
- if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
- my $next_type = $rtokens->[ $i + 1 ];
- my $expecting =
- operator_expected( $prev_type, $tok, $next_type );
-
- # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
- $combine_ok = 0 if ( $expecting == TERM );
- }
-
- # Patch for RT #114359: Missparsing of "print $x ** 0.5;
- # Accept the digraphs '**' only after type 'Z'
- # Otherwise postpone the decision.
- if ( $test_tok eq '**' ) {
- if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 }
- }
- }
-
- if (
- $combine_ok
-
- && ( $test_tok ne '/=' ) # might be pattern
- && ( $test_tok ne 'x=' ) # might be $x
- && ( $test_tok ne '*=' ) # typeglob?
-
- # Moved above as part of fix for
- # RT #114359: Missparsing of "print $x ** 0.5;
- # && ( $test_tok ne '**' ) # typeglob?
- )
- {
- $tok = $test_tok;
- $i++;
-
- # Now try to assemble trigraphs. Note that all possible
- # perl trigraphs can be constructed by appending a character
- # to a digraph.
- $test_tok = $tok . $rtokens->[ $i + 1 ];
-
- if ( $is_trigraph{$test_tok} ) {
- $tok = $test_tok;
- $i++;
- }
-
- # The only current tetragraph is the double diamond operator
- # and its first three characters are not a trigraph, so
- # we do can do a special test for it
- elsif ( $test_tok eq '<<>' ) {
- $test_tok .= $rtokens->[ $i + 2 ];
- if ( $is_tetragraph{$test_tok} ) {
- $tok = $test_tok;
- $i += 2;
- }
- }
- }
-
- $type = $tok;
- $next_tok = $rtokens->[ $i + 1 ];
- $next_type = $rtoken_type->[ $i + 1 ];
-
- TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
- local $" = ')(';
- my @debug_list = (
- $last_nonblank_token, $tok,
- $next_tok, $brace_depth,
- $brace_type[$brace_depth], $paren_depth,
- $paren_type[$paren_depth]
- );
- print STDOUT "TOKENIZE:(@debug_list)\n";
- };
-
- # turn off attribute list on first non-blank, non-bareword
- if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
-
- ###############################################################
- # We have the next token, $tok.
- # Now we have to examine this token and decide what it is
- # and define its $type
- #
- # section 1: bare words
- ###############################################################
-
- if ( $pre_type eq 'w' ) {
- $expecting = operator_expected( $prev_type, $tok, $next_type );
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens, $max_token_index );
-
- # ATTRS: handle sub and variable attributes
- if ($in_attribute_list) {
-
- # treat bare word followed by open paren like qw(
- if ( $next_nonblank_token eq '(' ) {
- $in_quote = $quote_items{'q'};
- $allowed_quote_modifiers = $quote_modifiers{'q'};
- $type = 'q';
- $quote_type = 'q';
- next;
- }
-
- # handle bareword not followed by open paren
- else {
- $type = 'w';
- next;
- }
- }
-
- # quote a word followed by => operator
- # unless the word __END__ or __DATA__ and the only word on
- # the line.
- if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) {
-
- if ( $rtokens->[ $i_next + 1 ] eq '>' ) {
- if ( $is_constant{$current_package}{$tok} ) {
- $type = 'C';
- }
- elsif ( $is_user_function{$current_package}{$tok} ) {
- $type = 'U';
- $prototype =
- $user_function_prototype{$current_package}{$tok};
- }
- elsif ( $tok =~ /^v\d+$/ ) {
- $type = 'v';
- report_v_string($tok);
- }
- else { $type = 'w' }
-
- next;
- }
- }
-
- # quote a bare word within braces..like xxx->{s}; note that we
- # must be sure this is not a structural brace, to avoid
- # mistaking {s} in the following for a quoted bare word:
- # for(@[){s}bla}BLA}
- # Also treat q in something like var{-q} as a bare word, not qoute operator
- if (
- $next_nonblank_token eq '}'
- && (
- $last_nonblank_type eq 'L'
- || ( $last_nonblank_type eq 'm'
- && $last_last_nonblank_type eq 'L' )
- )
- )
- {
- $type = 'w';
- next;
- }
-
- # a bare word immediately followed by :: is not a keyword;
- # use $tok_kw when testing for keywords to avoid a mistake
- my $tok_kw = $tok;
- if ( $rtokens->[ $i + 1 ] eq ':'
- && $rtokens->[ $i + 2 ] eq ':' )
- {
- $tok_kw .= '::';
- }
-
- # handle operator x (now we know it isn't $x=)
- if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
- if ( $tok eq 'x' ) {
-
- if ( $rtokens->[ $i + 1 ] eq '=' ) { # x=
- $tok = 'x=';
- $type = $tok;
- $i++;
- }
- else {
- $type = 'x';
- }
- }
-
- # FIXME: Patch: mark something like x4 as an integer for now
- # It gets fixed downstream. This is easier than
- # splitting the pretoken.
- else {
- $type = 'n';
- }
- }
- elsif ( $tok_kw eq 'CORE::' ) {
- $type = $tok = $tok_kw;
- $i += 2;
- }
- elsif ( ( $tok eq 'strict' )
- and ( $last_nonblank_token eq 'use' ) )
- {
- $tokenizer_self->{_saw_use_strict} = 1;
- scan_bare_identifier();
- }
-
- elsif ( ( $tok eq 'warnings' )
- and ( $last_nonblank_token eq 'use' ) )
- {
- $tokenizer_self->{_saw_perl_dash_w} = 1;
-
- # scan as identifier, so that we pick up something like:
- # use warnings::register
- scan_bare_identifier();
- }
-
- elsif (
- $tok eq 'AutoLoader'
- && $tokenizer_self->{_look_for_autoloader}
- && (
- $last_nonblank_token eq 'use'
-
- # these regexes are from AutoSplit.pm, which we want
- # to mimic
- || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
- || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
- )
- )
- {
- write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
- $tokenizer_self->{_saw_autoloader} = 1;
- $tokenizer_self->{_look_for_autoloader} = 0;
- scan_bare_identifier();
- }
-
- elsif (
- $tok eq 'SelfLoader'
- && $tokenizer_self->{_look_for_selfloader}
- && ( $last_nonblank_token eq 'use'
- || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
- || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
- )
- {
- write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
- $tokenizer_self->{_saw_selfloader} = 1;
- $tokenizer_self->{_look_for_selfloader} = 0;
- scan_bare_identifier();
- }
-
- elsif ( ( $tok eq 'constant' )
- and ( $last_nonblank_token eq 'use' ) )
- {
- scan_bare_identifier();
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens,
- $max_token_index );
-
- if ($next_nonblank_token) {
-
- if ( $is_keyword{$next_nonblank_token} ) {
-
- # Assume qw is used as a quote and okay, as in:
- # use constant qw{ DEBUG 0 };
- # Not worth trying to parse for just a warning
-
- # NOTE: This warning is deactivated because recent
- # versions of perl do not complain here, but
- # the coding is retained for reference.
- if ( 0 && $next_nonblank_token ne 'qw' ) {
- warning(
-"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
- );
- }
- }
-
- # FIXME: could check for error in which next token is
- # not a word (number, punctuation, ..)
- else {
- $is_constant{$current_package}{$next_nonblank_token}
- = 1;
- }
- }
- }
-
- # various quote operators
- elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
-##NICOL PATCH
- if ( $expecting == OPERATOR ) {
-
- # Be careful not to call an error for a qw quote
- # where a parenthesized list is allowed. For example,
- # it could also be a for/foreach construct such as
- #
- # foreach my $key qw\Uno Due Tres Quadro\ {
- # print "Set $key\n";
- # }
- #
-
- # Or it could be a function call.
- # NOTE: Braces in something like &{ xxx } are not
- # marked as a block, we might have a method call.
- # &method(...), $method->(..), &{method}(...),
- # $ref[2](list) is ok & short for $ref[2]->(list)
- #
- # See notes in 'sub code_block_type' and
- # 'sub is_non_structural_brace'
-
- unless (
- $tok eq 'qw'
- && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
- || $is_for_foreach{$want_paren} )
- )
- {
- error_if_expecting_OPERATOR();
- }
- }
- $in_quote = $quote_items{$tok};
- $allowed_quote_modifiers = $quote_modifiers{$tok};
-
- # All quote types are 'Q' except possibly qw quotes.
- # qw quotes are special in that they may generally be trimmed
- # of leading and trailing whitespace. So they are given a
- # separate type, 'q', unless requested otherwise.
- $type =
- ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
- ? 'q'
- : 'Q';
- $quote_type = $type;
- }
-
- # check for a statement label
- elsif (
- ( $next_nonblank_token eq ':' )
- && ( $rtokens->[ $i_next + 1 ] ne ':' )
- && ( $i_next <= $max_token_index ) # colon on same line
- && label_ok()
- )
- {
- if ( $tok !~ /[A-Z]/ ) {
- push @{ $tokenizer_self->{_rlower_case_labels_at} },
- $input_line_number;
- }
- $type = 'J';
- $tok .= ':';
- $i = $i_next;
- next;
- }
-
- # 'sub' || 'package'
- elsif ( $is_sub_package{$tok_kw} ) {
- error_if_expecting_OPERATOR()
- if ( $expecting == OPERATOR );
- scan_id();
- }
-
- # Note on token types for format, __DATA__, __END__:
- # It simplifies things to give these type ';', so that when we
- # start rescanning we will be expecting a token of type TERM.
- # We will switch to type 'k' before outputting the tokens.
- elsif ( $is_format_END_DATA{$tok_kw} ) {
- $type = ';'; # make tokenizer look for TERM next
- $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
- last;
- }
-
- elsif ( $is_keyword{$tok_kw} ) {
- $type = 'k';
-
- # Since for and foreach may not be followed immediately
- # by an opening paren, we have to remember which keyword
- # is associated with the next '('
- if ( $is_for_foreach{$tok} ) {
- if ( new_statement_ok() ) {
- $want_paren = $tok;
- }
- }
-
- # recognize 'use' statements, which are special
- elsif ( $is_use_require{$tok} ) {
- $statement_type = $tok;
- error_if_expecting_OPERATOR()
- if ( $expecting == OPERATOR );
- }
-
- # remember my and our to check for trailing ": shared"
- elsif ( $is_my_our{$tok} ) {
- $statement_type = $tok;
- }
-
- # Check for misplaced 'elsif' and 'else', but allow isolated
- # else or elsif blocks to be formatted. This is indicated
- # by a last noblank token of ';'
- elsif ( $tok eq 'elsif' ) {
- if ( $last_nonblank_token ne ';'
- && $last_nonblank_block_type !~
- /^(if|elsif|unless)$/ )
- {
- warning(
-"expecting '$tok' to follow one of 'if|elsif|unless'\n"
- );
- }
- }
- elsif ( $tok eq 'else' ) {
-
- # patched for SWITCH/CASE
- if (
- $last_nonblank_token ne ';'
- && $last_nonblank_block_type !~
- /^(if|elsif|unless|case|when)$/
-
- # patch to avoid an unwanted error message for
- # the case of a parenless 'case' (RT 105484):
- # switch ( 1 ) { case x { 2 } else { } }
- && $statement_type !~
- /^(if|elsif|unless|case|when)$/
- )
- {
- warning(
-"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
- );
- }
- }
- elsif ( $tok eq 'continue' ) {
- if ( $last_nonblank_token ne ';'
- && $last_nonblank_block_type !~
- /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
- {
-
- # note: ';' '{' and '}' in list above
- # because continues can follow bare blocks;
- # ':' is labeled block
- #
- ############################################
- # NOTE: This check has been deactivated because
- # continue has an alternative usage for given/when
- # blocks in perl 5.10
- ## warning("'$tok' should follow a block\n");
- ############################################
- }
- }
-
- # patch for SWITCH/CASE if 'case' and 'when are
- # treated as keywords.
- elsif ( $tok eq 'when' || $tok eq 'case' ) {
- $statement_type = $tok; # next '{' is block
- }
-
- #
- # indent trailing if/unless/while/until
- # outdenting will be handled by later indentation loop
-## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
-##$opt_o = 1
-## if !(
-## $opt_b
-## || $opt_c
-## || $opt_d
-## || $opt_f
-## || $opt_i
-## || $opt_l
-## || $opt_o
-## || $opt_x
-## );
-## if ( $tok =~ /^(if|unless|while|until)$/
-## && $next_nonblank_token ne '(' )
-## {
-## $indent_flag = 1;
-## }
- }
-
- # check for inline label following
- # /^(redo|last|next|goto)$/
- elsif (( $last_nonblank_type eq 'k' )
- && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
- {
- $type = 'j';
- next;
- }
-
- # something else --
- else {
-
- scan_bare_identifier();
- if ( $type eq 'w' ) {
-
- if ( $expecting == OPERATOR ) {
-
- # don't complain about possible indirect object
- # notation.
- # For example:
- # package main;
- # sub new($) { ... }
- # $b = new A::; # calls A::new
- # $c = new A; # same thing but suspicious
- # This will call A::new but we have a 'new' in
- # main:: which looks like a constant.
- #
- if ( $last_nonblank_type eq 'C' ) {
- if ( $tok !~ /::$/ ) {
- complain(<<EOM);
-Expecting operator after '$last_nonblank_token' but found bare word '$tok'
- Maybe indirectet object notation?
-EOM
- }
- }
- else {
- error_if_expecting_OPERATOR("bareword");
- }
- }
-
- # mark bare words immediately followed by a paren as
- # functions
- $next_tok = $rtokens->[ $i + 1 ];
- if ( $next_tok eq '(' ) {
- $type = 'U';
- }
-
- # underscore after file test operator is file handle
- if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
- $type = 'Z';
- }
-
- # patch for SWITCH/CASE if 'case' and 'when are
- # not treated as keywords:
- if (
- (
- $tok eq 'case'
- && $brace_type[$brace_depth] eq 'switch'
- )
- || ( $tok eq 'when'
- && $brace_type[$brace_depth] eq 'given' )
- )
- {
- $statement_type = $tok; # next '{' is block
- $type = 'k'; # for keyword syntax coloring
- }
-
- # patch for SWITCH/CASE if switch and given not keywords
- # Switch is not a perl 5 keyword, but we will gamble
- # and mark switch followed by paren as a keyword. This
- # is only necessary to get html syntax coloring nice,
- # and does not commit this as being a switch/case.
- if ( $next_nonblank_token eq '('
- && ( $tok eq 'switch' || $tok eq 'given' ) )
- {
- $type = 'k'; # for keyword syntax coloring
- }
- }
- }
- }
-
- ###############################################################
- # section 2: strings of digits
- ###############################################################
- elsif ( $pre_type eq 'd' ) {
- $expecting = operator_expected( $prev_type, $tok, $next_type );
- error_if_expecting_OPERATOR("Number")
- if ( $expecting == OPERATOR );
- my $number = scan_number();
- if ( !defined($number) ) {
-
- # shouldn't happen - we should always get a number
- warning("non-number beginning with digit--program bug\n");
- report_definite_bug();
- }
- }
-
- ###############################################################
- # section 3: all other tokens
- ###############################################################
-
- else {
- last if ( $tok eq '#' );
- my $code = $tokenization_code->{$tok};
- if ($code) {
- $expecting =
- operator_expected( $prev_type, $tok, $next_type );
- $code->();
- redo if $in_quote;
- }
- }
- }
-
- # -----------------------------
- # end of main tokenization loop
- # -----------------------------
-
- if ( $i_tok >= 0 ) {
- $routput_token_type->[$i_tok] = $type;
- $routput_block_type->[$i_tok] = $block_type;
- $routput_container_type->[$i_tok] = $container_type;
- $routput_type_sequence->[$i_tok] = $type_sequence;
- $routput_indent_flag->[$i_tok] = $indent_flag;
- }
-
- unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
- $last_last_nonblank_token = $last_nonblank_token;
- $last_last_nonblank_type = $last_nonblank_type;
- $last_last_nonblank_block_type = $last_nonblank_block_type;
- $last_last_nonblank_container_type = $last_nonblank_container_type;
- $last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
- $last_nonblank_token = $tok;
- $last_nonblank_type = $type;
- $last_nonblank_block_type = $block_type;
- $last_nonblank_container_type = $container_type;
- $last_nonblank_type_sequence = $type_sequence;
- $last_nonblank_prototype = $prototype;
- }
-
- # reset indentation level if necessary at a sub or package
- # in an attempt to recover from a nesting error
- if ( $level_in_tokenizer < 0 ) {
- if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
- reset_indentation_level(0);
- brace_warning("resetting level to 0 at $1 $2\n");
- }
- }
-
- # all done tokenizing this line ...
- # now prepare the final list of tokens and types
-
- my @token_type = (); # stack of output token types
- my @block_type = (); # stack of output code block types
- my @container_type = (); # stack of output code container types
- my @type_sequence = (); # stack of output type sequence numbers
- my @tokens = (); # output tokens
- my @levels = (); # structural brace levels of output tokens
- my @slevels = (); # secondary nesting levels of output tokens
- my @nesting_tokens = (); # string of tokens leading to this depth
- my @nesting_types = (); # string of token types leading to this depth
- my @nesting_blocks = (); # string of block types leading to this depth
- my @nesting_lists = (); # string of list types leading to this depth
- my @ci_string = (); # string needed to compute continuation indentation
- my @container_environment = (); # BLOCK or LIST
- my $container_environment = '';
- my $im = -1; # previous $i value
- my $num;
- my $ci_string_sum = ones_count($ci_string_in_tokenizer);
-
-# Computing Token Indentation
-#
-# The final section of the tokenizer forms tokens and also computes
-# parameters needed to find indentation. It is much easier to do it
-# in the tokenizer than elsewhere. Here is a brief description of how
-# indentation is computed. Perl::Tidy computes indentation as the sum
-# of 2 terms:
-#
-# (1) structural indentation, such as if/else/elsif blocks
-# (2) continuation indentation, such as long parameter call lists.
-#
-# These are occasionally called primary and secondary indentation.
-#
-# Structural indentation is introduced by tokens of type '{', although
-# the actual tokens might be '{', '(', or '['. Structural indentation
-# is of two types: BLOCK and non-BLOCK. Default structural indentation
-# is 4 characters if the standard indentation scheme is used.
-#
-# Continuation indentation is introduced whenever a line at BLOCK level
-# is broken before its termination. Default continuation indentation
-# is 2 characters in the standard indentation scheme.
-#
-# Both types of indentation may be nested arbitrarily deep and
-# interlaced. The distinction between the two is somewhat arbitrary.
-#
-# For each token, we will define two variables which would apply if
-# the current statement were broken just before that token, so that
-# that token started a new line:
-#
-# $level = the structural indentation level,
-# $ci_level = the continuation indentation level
-#
-# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
-# assuming defaults. However, in some special cases it is customary
-# to modify $ci_level from this strict value.
-#
-# The total structural indentation is easy to compute by adding and
-# subtracting 1 from a saved value as types '{' and '}' are seen. The
-# running value of this variable is $level_in_tokenizer.
-#
-# The total continuation is much more difficult to compute, and requires
-# several variables. These variables are:
-#
-# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
-# each indentation level, if there are intervening open secondary
-# structures just prior to that level.
-# $continuation_string_in_tokenizer = a string of 1's and 0's indicating
-# if the last token at that level is "continued", meaning that it
-# is not the first token of an expression.
-# $nesting_block_string = a string of 1's and 0's indicating, for each
-# indentation level, if the level is of type BLOCK or not.
-# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
-# $nesting_list_string = a string of 1's and 0's indicating, for each
-# indentation level, if it is appropriate for list formatting.
-# If so, continuation indentation is used to indent long list items.
-# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
-# @{$rslevel_stack} = a stack of total nesting depths at each
-# structural indentation level, where "total nesting depth" means
-# the nesting depth that would occur if every nesting token -- '{', '[',
-# and '(' -- , regardless of context, is used to compute a nesting
-# depth.
-
- #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
- #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
-
- my ( $ci_string_i, $level_i, $nesting_block_string_i,
- $nesting_list_string_i, $nesting_token_string_i,
- $nesting_type_string_i, );
-
- foreach my $i ( @{$routput_token_list} )
- { # scan the list of pre-tokens indexes
-
- # self-checking for valid token types
- my $type = $routput_token_type->[$i];
- my $forced_indentation_flag = $routput_indent_flag->[$i];
-
- # See if we should undo the $forced_indentation_flag.
- # Forced indentation after 'if', 'unless', 'while' and 'until'
- # expressions without trailing parens is optional and doesn't
- # always look good. It is usually okay for a trailing logical
- # expression, but if the expression is a function call, code block,
- # or some kind of list it puts in an unwanted extra indentation
- # level which is hard to remove.
- #
- # Example where extra indentation looks ok:
- # return 1
- # if $det_a < 0 and $det_b > 0
- # or $det_a > 0 and $det_b < 0;
- #
- # Example where extra indentation is not needed because
- # the eval brace also provides indentation:
- # print "not " if defined eval {
- # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
- # };
- #
- # The following rule works fairly well:
- # Undo the flag if the end of this line, or start of the next
- # line, is an opening container token or a comma.
- # This almost always works, but if not after another pass it will
- # be stable.
- if ( $forced_indentation_flag && $type eq 'k' ) {
- my $ixlast = -1;
- my $ilast = $routput_token_list->[$ixlast];
- my $toklast = $routput_token_type->[$ilast];
- if ( $toklast eq '#' ) {
- $ixlast--;
- $ilast = $routput_token_list->[$ixlast];
- $toklast = $routput_token_type->[$ilast];
- }
- if ( $toklast eq 'b' ) {
- $ixlast--;
- $ilast = $routput_token_list->[$ixlast];
- $toklast = $routput_token_type->[$ilast];
- }
- if ( $toklast =~ /^[\{,]$/ ) {
- $forced_indentation_flag = 0;
- }
- else {
- ( $toklast, my $i_next ) =
- find_next_nonblank_token( $max_token_index, $rtokens,
- $max_token_index );
- if ( $toklast =~ /^[\{,]$/ ) {
- $forced_indentation_flag = 0;
- }
- }
- }
-
- # if we are already in an indented if, see if we should outdent
- if ($indented_if_level) {
-
- # don't try to nest trailing if's - shouldn't happen
- if ( $type eq 'k' ) {
- $forced_indentation_flag = 0;
- }
-
- # check for the normal case - outdenting at next ';'
- elsif ( $type eq ';' ) {
- if ( $level_in_tokenizer == $indented_if_level ) {
- $forced_indentation_flag = -1;
- $indented_if_level = 0;
- }
- }
-
- # handle case of missing semicolon
- elsif ( $type eq '}' ) {
- if ( $level_in_tokenizer == $indented_if_level ) {
- $indented_if_level = 0;
-
- # TBD: This could be a subroutine call
- $level_in_tokenizer--;
- if ( @{$rslevel_stack} > 1 ) {
- pop( @{$rslevel_stack} );
- }
- if ( length($nesting_block_string) > 1 )
- { # true for valid script
- chop $nesting_block_string;
- chop $nesting_list_string;
- }
-
- }
- }
- }
-
- my $tok = $rtokens->[$i]; # the token, but ONLY if same as pretoken
- $level_i = $level_in_tokenizer;
-
- # This can happen by running perltidy on non-scripts
- # although it could also be bug introduced by programming change.
- # Perl silently accepts a 032 (^Z) and takes it as the end
- if ( !$is_valid_token_type{$type} ) {
- my $val = ord($type);
- warning(
- "unexpected character decimal $val ($type) in script\n");
- $tokenizer_self->{_in_error} = 1;
- }
-
- # ----------------------------------------------------------------
- # TOKEN TYPE PATCHES
- # output __END__, __DATA__, and format as type 'k' instead of ';'
- # to make html colors correct, etc.
- my $fix_type = $type;
- if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
-
- # output anonymous 'sub' as keyword
- if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
-
- # -----------------------------------------------------------------
-
- $nesting_token_string_i = $nesting_token_string;
- $nesting_type_string_i = $nesting_type_string;
- $nesting_block_string_i = $nesting_block_string;
- $nesting_list_string_i = $nesting_list_string;
-
- # set primary indentation levels based on structural braces
- # Note: these are set so that the leading braces have a HIGHER
- # level than their CONTENTS, which is convenient for indentation
- # Also, define continuation indentation for each token.
- if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
- {
-
- # use environment before updating
- $container_environment =
- $nesting_block_flag ? 'BLOCK'
- : $nesting_list_flag ? 'LIST'
- : "";
-
- # if the difference between total nesting levels is not 1,
- # there are intervening non-structural nesting types between
- # this '{' and the previous unclosed '{'
- my $intervening_secondary_structure = 0;
- if ( @{$rslevel_stack} ) {
- $intervening_secondary_structure =
- $slevel_in_tokenizer - $rslevel_stack->[-1];
- }
-
- # Continuation Indentation
- #
- # Having tried setting continuation indentation both in the formatter and
- # in the tokenizer, I can say that setting it in the tokenizer is much,
- # much easier. The formatter already has too much to do, and can't
- # make decisions on line breaks without knowing what 'ci' will be at
- # arbitrary locations.
- #
- # But a problem with setting the continuation indentation (ci) here
- # in the tokenizer is that we do not know where line breaks will actually
- # be. As a result, we don't know if we should propagate continuation
- # indentation to higher levels of structure.
- #
- # For nesting of only structural indentation, we never need to do this.
- # For example, in a long if statement, like this
- #
- # if ( !$output_block_type[$i]
- # && ($in_statement_continuation) )
- # { <--outdented
- # do_something();
- # }
- #
- # the second line has ci but we do normally give the lines within the BLOCK
- # any ci. This would be true if we had blocks nested arbitrarily deeply.
- #
- # But consider something like this, where we have created a break after
- # an opening paren on line 1, and the paren is not (currently) a
- # structural indentation token:
- #
- # my $file = $menubar->Menubutton(
- # qw/-text File -underline 0 -menuitems/ => [
- # [
- # Cascade => '~View',
- # -menuitems => [
- # ...
- #
- # The second line has ci, so it would seem reasonable to propagate it
- # down, giving the third line 1 ci + 1 indentation. This suggests the
- # following rule, which is currently used to propagating ci down: if there
- # are any non-structural opening parens (or brackets, or braces), before
- # an opening structural brace, then ci is propagated down, and otherwise
- # not. The variable $intervening_secondary_structure contains this
- # information for the current token, and the string
- # "$ci_string_in_tokenizer" is a stack of previous values of this
- # variable.
-
- # save the current states
- push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
- $level_in_tokenizer++;
-
- if ($forced_indentation_flag) {
-
- # break BEFORE '?' when there is forced indentation
- if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
- if ( $type eq 'k' ) {
- $indented_if_level = $level_in_tokenizer;
- }
-
- # do not change container environment here if we are not
- # at a real list. Adding this check prevents "blinkers"
- # often near 'unless" clauses, such as in the following
- # code:
-## next
-## unless -e (
-## $archive =
-## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
-## );
-
- $nesting_block_string .= "$nesting_block_flag";
- }
- else {
-
- if ( $routput_block_type->[$i] ) {
- $nesting_block_flag = 1;
- $nesting_block_string .= '1';
- }
- else {
- $nesting_block_flag = 0;
- $nesting_block_string .= '0';
- }
- }
-
- # we will use continuation indentation within containers
- # which are not blocks and not logical expressions
- my $bit = 0;
- if ( !$routput_block_type->[$i] ) {
-
- # propagate flag down at nested open parens
- if ( $routput_container_type->[$i] eq '(' ) {
- $bit = 1 if $nesting_list_flag;
- }
-
- # use list continuation if not a logical grouping
- # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
- else {
- $bit = 1
- unless
- $is_logical_container{ $routput_container_type->[$i]
- };
- }
- }
- $nesting_list_string .= $bit;
- $nesting_list_flag = $bit;
-
- $ci_string_in_tokenizer .=
- ( $intervening_secondary_structure != 0 ) ? '1' : '0';
- $ci_string_sum = ones_count($ci_string_in_tokenizer);
- $continuation_string_in_tokenizer .=
- ( $in_statement_continuation > 0 ) ? '1' : '0';
-
- # Sometimes we want to give an opening brace continuation indentation,
- # and sometimes not. For code blocks, we don't do it, so that the leading
- # '{' gets outdented, like this:
- #
- # if ( !$output_block_type[$i]
- # && ($in_statement_continuation) )
- # { <--outdented
- #
- # For other types, we will give them continuation indentation. For example,
- # here is how a list looks with the opening paren indented:
- #
- # @LoL =
- # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
- # [ "homer", "marge", "bart" ], );
- #
- # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
-
- my $total_ci = $ci_string_sum;
- if (
- !$routput_block_type->[$i] # patch: skip for BLOCK
- && ($in_statement_continuation)
- && !( $forced_indentation_flag && $type eq ':' )
- )
- {
- $total_ci += $in_statement_continuation
- unless ( $ci_string_in_tokenizer =~ /1$/ );
- }
-
- $ci_string_i = $total_ci;
- $in_statement_continuation = 0;
- }
-
- elsif ($type eq '}'
- || $type eq 'R'
- || $forced_indentation_flag < 0 )
- {
-
- # only a nesting error in the script would prevent popping here
- if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
-
- $level_i = --$level_in_tokenizer;
-
- # restore previous level values
- if ( length($nesting_block_string) > 1 )
- { # true for valid script
- chop $nesting_block_string;
- $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
- chop $nesting_list_string;
- $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
-
- chop $ci_string_in_tokenizer;
- $ci_string_sum = ones_count($ci_string_in_tokenizer);
-
- $in_statement_continuation =
- chop $continuation_string_in_tokenizer;
-
- # zero continuation flag at terminal BLOCK '}' which
- # ends a statement.
- if ( $routput_block_type->[$i] ) {
-
- # ...These include non-anonymous subs
- # note: could be sub ::abc { or sub 'abc
- if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
-
- # note: older versions of perl require the /gc modifier
- # here or else the \G does not work.
- if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
- {
- $in_statement_continuation = 0;
- }
- }
-
-# ...and include all block types except user subs with
-# block prototypes and these: (sort|grep|map|do|eval)
-# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
- elsif (
- $is_zero_continuation_block_type{
- $routput_block_type->[$i]
- } )
- {
- $in_statement_continuation = 0;
- }
-
- # ..but these are not terminal types:
- # /^(sort|grep|map|do|eval)$/ )
- elsif (
- $is_not_zero_continuation_block_type{
- $routput_block_type->[$i]
- } )
- {
- }
-
- # ..and a block introduced by a label
- # /^\w+\s*:$/gc ) {
- elsif ( $routput_block_type->[$i] =~ /:$/ ) {
- $in_statement_continuation = 0;
- }
-
- # user function with block prototype
- else {
- $in_statement_continuation = 0;
- }
- }
-
- # If we are in a list, then
- # we must set continuation indentation at the closing
- # paren of something like this (paren after $check):
- # assert(
- # __LINE__,
- # ( not defined $check )
- # or ref $check
- # or $check eq "new"
- # or $check eq "old",
- # );
- elsif ( $tok eq ')' ) {
- $in_statement_continuation = 1
- if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
- }
-
- elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
- }
-
- # use environment after updating
- $container_environment =
- $nesting_block_flag ? 'BLOCK'
- : $nesting_list_flag ? 'LIST'
- : "";
- $ci_string_i = $ci_string_sum + $in_statement_continuation;
- $nesting_block_string_i = $nesting_block_string;
- $nesting_list_string_i = $nesting_list_string;
- }
-
- # not a structural indentation type..
- else {
-
- $container_environment =
- $nesting_block_flag ? 'BLOCK'
- : $nesting_list_flag ? 'LIST'
- : "";
-
- # zero the continuation indentation at certain tokens so
- # that they will be at the same level as its container. For
- # commas, this simplifies the -lp indentation logic, which
- # counts commas. For ?: it makes them stand out.
- if ($nesting_list_flag) {
- if ( $type =~ /^[,\?\:]$/ ) {
- $in_statement_continuation = 0;
- }
- }
-
- # be sure binary operators get continuation indentation
- if (
- $container_environment
- && ( $type eq 'k' && $is_binary_keyword{$tok}
- || $is_binary_type{$type} )
- )
- {
- $in_statement_continuation = 1;
- }
-
- # continuation indentation is sum of any open ci from previous
- # levels plus the current level
- $ci_string_i = $ci_string_sum + $in_statement_continuation;
-
- # update continuation flag ...
- # if this isn't a blank or comment..
- if ( $type ne 'b' && $type ne '#' ) {
-
- # and we are in a BLOCK
- if ($nesting_block_flag) {
-
- # the next token after a ';' and label starts a new stmt
- if ( $type eq ';' || $type eq 'J' ) {
- $in_statement_continuation = 0;
- }
-
- # otherwise, we are continuing the current statement
- else {
- $in_statement_continuation = 1;
- }
- }
-
- # if we are not in a BLOCK..
- else {
-
- # do not use continuation indentation if not list
- # environment (could be within if/elsif clause)
- if ( !$nesting_list_flag ) {
- $in_statement_continuation = 0;
- }
-
- # otherwise, the token after a ',' starts a new term
-
- # Patch FOR RT#99961; no continuation after a ';'
- # This is needed because perltidy currently marks
- # a block preceded by a type character like % or @
- # as a non block, to simplify formatting. But these
- # are actually blocks and can have semicolons.
- # See code_block_type() and is_non_structural_brace().
- elsif ( $type eq ',' || $type eq ';' ) {
- $in_statement_continuation = 0;
- }
-
- # otherwise, we are continuing the current term
- else {
- $in_statement_continuation = 1;
- }
- }
- }
- }
-
- if ( $level_in_tokenizer < 0 ) {
- unless ( $tokenizer_self->{_saw_negative_indentation} ) {
- $tokenizer_self->{_saw_negative_indentation} = 1;
- warning("Starting negative indentation\n");
- }
- }
-
- # set secondary nesting levels based on all containment token types
- # Note: these are set so that the nesting depth is the depth
- # of the PREVIOUS TOKEN, which is convenient for setting
- # the strength of token bonds
- my $slevel_i = $slevel_in_tokenizer;
-
- # /^[L\{\(\[]$/
- if ( $is_opening_type{$type} ) {
- $slevel_in_tokenizer++;
- $nesting_token_string .= $tok;
- $nesting_type_string .= $type;
- }
-
- # /^[R\}\)\]]$/
- elsif ( $is_closing_type{$type} ) {
- $slevel_in_tokenizer--;
- my $char = chop $nesting_token_string;
-
- if ( $char ne $matching_start_token{$tok} ) {
- $nesting_token_string .= $char . $tok;
- $nesting_type_string .= $type;
- }
- else {
- chop $nesting_type_string;
- }
- }
-
- push( @block_type, $routput_block_type->[$i] );
- push( @ci_string, $ci_string_i );
- push( @container_environment, $container_environment );
- push( @container_type, $routput_container_type->[$i] );
- push( @levels, $level_i );
- push( @nesting_tokens, $nesting_token_string_i );
- push( @nesting_types, $nesting_type_string_i );
- push( @slevels, $slevel_i );
- push( @token_type, $fix_type );
- push( @type_sequence, $routput_type_sequence->[$i] );
- push( @nesting_blocks, $nesting_block_string );
- push( @nesting_lists, $nesting_list_string );
-
- # now form the previous token
- if ( $im >= 0 ) {
- $num =
- $rtoken_map->[$i] - $rtoken_map->[$im]; # how many characters
-
- if ( $num > 0 ) {
- push( @tokens,
- substr( $input_line, $rtoken_map->[$im], $num ) );
- }
- }
- $im = $i;
- }
-
- $num = length($input_line) - $rtoken_map->[$im]; # make the last token
- if ( $num > 0 ) {
- push( @tokens, substr( $input_line, $rtoken_map->[$im], $num ) );
- }
-
- $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
- $tokenizer_self->{_in_quote} = $in_quote;
- $tokenizer_self->{_quote_target} =
- $in_quote ? matching_end_token($quote_character) : "";
- $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
-
- $line_of_tokens->{_rtoken_type} = \@token_type;
- $line_of_tokens->{_rtokens} = \@tokens;
- $line_of_tokens->{_rblock_type} = \@block_type;
- $line_of_tokens->{_rcontainer_type} = \@container_type;
- $line_of_tokens->{_rcontainer_environment} = \@container_environment;
- $line_of_tokens->{_rtype_sequence} = \@type_sequence;
- $line_of_tokens->{_rlevels} = \@levels;
- $line_of_tokens->{_rslevels} = \@slevels;
- $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
- $line_of_tokens->{_rci_levels} = \@ci_string;
- $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
-
- return;
- }
-} # end tokenize_this_line
-
-#########i#############################################################
-# Tokenizer routines which assist in identifying token types
-#######################################################################
-
-sub operator_expected {
-
- # Many perl symbols have two or more meanings. For example, '<<'
- # can be a shift operator or a here-doc operator. The
- # interpretation of these symbols depends on the current state of
- # the tokenizer, which may either be expecting a term or an
- # operator. For this example, a << would be a shift if an operator
- # is expected, and a here-doc if a term is expected. This routine
- # is called to make this decision for any current token. It returns
- # one of three possible values:
- #
- # OPERATOR - operator expected (or at least, not a term)
- # UNKNOWN - can't tell
- # TERM - a term is expected (or at least, not an operator)
- #
- # The decision is based on what has been seen so far. This
- # information is stored in the "$last_nonblank_type" and
- # "$last_nonblank_token" variables. For example, if the
- # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
- # if $last_nonblank_type is 'n' (numeric), we are expecting an
- # OPERATOR.
- #
- # If a UNKNOWN is returned, the calling routine must guess. A major
- # goal of this tokenizer is to minimize the possibility of returning
- # UNKNOWN, because a wrong guess can spoil the formatting of a
- # script.
- #
- # adding NEW_TOKENS: it is critically important that this routine be
- # updated to allow it to determine if an operator or term is to be
- # expected after the new token. Doing this simply involves adding
- # the new token character to one of the regexes in this routine or
- # to one of the hash lists
- # that it uses, which are initialized in the BEGIN section.
- # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
- # $statement_type
-
- my ( $prev_type, $tok, $next_type ) = @_;
-
- my $op_expected = UNKNOWN;
-
-##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
-
-# Note: function prototype is available for token type 'U' for future
-# program development. It contains the leading and trailing parens,
-# and no blanks. It might be used to eliminate token type 'C', for
-# example (prototype = '()'). Thus:
-# if ($last_nonblank_type eq 'U') {
-# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
-# }
-
- # A possible filehandle (or object) requires some care...
- if ( $last_nonblank_type eq 'Z' ) {
-
- # angle.t
- if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
- $op_expected = UNKNOWN;
- }
-
- # For possible file handle like "$a", Perl uses weird parsing rules.
- # For example:
- # print $a/2,"/hi"; - division
- # print $a / 2,"/hi"; - division
- # print $a/ 2,"/hi"; - division
- # print $a /2,"/hi"; - pattern (and error)!
- elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
- $op_expected = TERM;
- }
-
- # Note when an operation is being done where a
- # filehandle might be expected, since a change in whitespace
- # could change the interpretation of the statement.
- else {
- if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
- complain("operator in print statement not recommended\n");
- $op_expected = OPERATOR;
- }
- }
- }
-
- # Check for smartmatch operator before preceding brace or square bracket.
- # For example, at the ? after the ] in the following expressions we are
- # expecting an operator:
- #
- # qr/3/ ~~ ['1234'] ? 1 : 0;
- # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
- elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) {
- $op_expected = OPERATOR;
- }
-
- # handle something after 'do' and 'eval'
- elsif ( $is_block_operator{$last_nonblank_token} ) {
-
- # something like $a = eval "expression";
- # ^
- if ( $last_nonblank_type eq 'k' ) {
- $op_expected = TERM; # expression or list mode following keyword
- }
-
- # something like $a = do { BLOCK } / 2;
- # or this ? after a smartmatch anonynmous hash or array reference:
- # qr/3/ ~~ ['1234'] ? 1 : 0;
- # ^
- else {
- $op_expected = OPERATOR; # block mode following }
- }
- }
-
- # handle bare word..
- elsif ( $last_nonblank_type eq 'w' ) {
-
- # unfortunately, we can't tell what type of token to expect next
- # after most bare words
- $op_expected = UNKNOWN;
- }
-
- # operator, but not term possible after these types
- # Note: moved ')' from type to token because parens in list context
- # get marked as '{' '}' now. This is a minor glitch in the following:
- # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
- #
- elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
- || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
- {
- $op_expected = OPERATOR;
-
- # in a 'use' statement, numbers and v-strings are not true
- # numbers, so to avoid incorrect error messages, we will
- # mark them as unknown for now (use.t)
- # TODO: it would be much nicer to create a new token V for VERSION
- # number in a use statement. Then this could be a check on type V
- # and related patches which change $statement_type for '=>'
- # and ',' could be removed. Further, it would clean things up to
- # scan the 'use' statement with a separate subroutine.
- if ( ( $statement_type eq 'use' )
- && ( $last_nonblank_type =~ /^[nv]$/ ) )
- {
- $op_expected = UNKNOWN;
- }
-
- # expecting VERSION or {} after package NAMESPACE
- elsif ($statement_type =~ /^package\b/
- && $last_nonblank_token =~ /^package\b/ )
- {
- $op_expected = TERM;
- }
- }
-
- # no operator after many keywords, such as "die", "warn", etc
- elsif ( $expecting_term_token{$last_nonblank_token} ) {
-
- # patch for dor.t (defined or).
- # perl functions which may be unary operators
- # TODO: This list is incomplete, and these should be put
- # into a hash.
- if ( $tok eq '/'
- && $next_type eq '/'
- && $last_nonblank_type eq 'k'
- && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
- {
- $op_expected = OPERATOR;
- }
- else {
- $op_expected = TERM;
- }
- }
-
- # no operator after things like + - ** (i.e., other operators)
- elsif ( $expecting_term_types{$last_nonblank_type} ) {
- $op_expected = TERM;
- }
-
- # a few operators, like "time", have an empty prototype () and so
- # take no parameters but produce a value to operate on
- elsif ( $expecting_operator_token{$last_nonblank_token} ) {
- $op_expected = OPERATOR;
- }
-
- # post-increment and decrement produce values to be operated on
- elsif ( $expecting_operator_types{$last_nonblank_type} ) {
- $op_expected = OPERATOR;
- }
-
- # no value to operate on after sub block
- elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
-
- # a right brace here indicates the end of a simple block.
- # all non-structural right braces have type 'R'
- # all braces associated with block operator keywords have been given those
- # keywords as "last_nonblank_token" and caught above.
- # (This statement is order dependent, and must come after checking
- # $last_nonblank_token).
- elsif ( $last_nonblank_type eq '}' ) {
-
- # patch for dor.t (defined or).
- if ( $tok eq '/'
- && $next_type eq '/'
- && $last_nonblank_token eq ']' )
- {
- $op_expected = OPERATOR;
- }
-
- # Patch for RT #116344: misparse a ternary operator after an anonymous
- # hash, like this:
- # return ref {} ? 1 : 0;
- # The right brace should really be marked type 'R' in this case, and
- # it is safest to return an UNKNOWN here. Expecting a TERM will
- # cause the '?' to always be interpreted as a pattern delimiter
- # rather than introducing a ternary operator.
- elsif ( $tok eq '?' ) {
- $op_expected = UNKNOWN;
- }
- else {
- $op_expected = TERM;
- }
- }
-
- # something else..what did I forget?
- else {
-
- # collecting diagnostics on unknown operator types..see what was missed
- $op_expected = UNKNOWN;
- write_diagnostics(
-"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
- );
- }
-
- TOKENIZER_DEBUG_FLAG_EXPECT && do {
- print STDOUT
-"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
- };
- return $op_expected;
-}
-
-sub new_statement_ok {
-
- # return true if the current token can start a new statement
- # USES GLOBAL VARIABLES: $last_nonblank_type
-
- return label_ok() # a label would be ok here
-
- || $last_nonblank_type eq 'J'; # or we follow a label
-
-}
-
-sub label_ok {
-
- # Decide if a bare word followed by a colon here is a label
- # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
- # $brace_depth, @brace_type
-
- # if it follows an opening or closing code block curly brace..
- if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
- && $last_nonblank_type eq $last_nonblank_token )
- {
-
- # it is a label if and only if the curly encloses a code block
- return $brace_type[$brace_depth];
- }
-
- # otherwise, it is a label if and only if it follows a ';' (real or fake)
- # or another label
- else {
- return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
- }
-}
-
-sub code_block_type {
-
- # Decide if this is a block of code, and its type.
- # Must be called only when $type = $token = '{'
- # The problem is to distinguish between the start of a block of code
- # and the start of an anonymous hash reference
- # Returns "" if not code block, otherwise returns 'last_nonblank_token'
- # to indicate the type of code block. (For example, 'last_nonblank_token'
- # might be 'if' for an if block, 'else' for an else block, etc).
- # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
- # $last_nonblank_block_type, $brace_depth, @brace_type
-
- # handle case of multiple '{'s
-
-# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
-
- my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
- if ( $last_nonblank_token eq '{'
- && $last_nonblank_type eq $last_nonblank_token )
- {
-
- # opening brace where a statement may appear is probably
- # a code block but might be and anonymous hash reference
- if ( $brace_type[$brace_depth] ) {
- return decide_if_code_block( $i, $rtokens, $rtoken_type,
- $max_token_index );
- }
-
- # cannot start a code block within an anonymous hash
- else {
- return "";
- }
- }
-
- elsif ( $last_nonblank_token eq ';' ) {
-
- # an opening brace where a statement may appear is probably
- # a code block but might be and anonymous hash reference
- return decide_if_code_block( $i, $rtokens, $rtoken_type,
- $max_token_index );
- }
-
- # handle case of '}{'
- elsif ($last_nonblank_token eq '}'
- && $last_nonblank_type eq $last_nonblank_token )
- {
-
- # a } { situation ...
- # could be hash reference after code block..(blktype1.t)
- if ($last_nonblank_block_type) {
- return decide_if_code_block( $i, $rtokens, $rtoken_type,
- $max_token_index );
- }
-
- # must be a block if it follows a closing hash reference
- else {
- return $last_nonblank_token;
- }
- }
-
- ################################################################
- # NOTE: braces after type characters start code blocks, but for
- # simplicity these are not identified as such. See also
- # sub is_non_structural_brace.
- ################################################################
-
-## elsif ( $last_nonblank_type eq 't' ) {
-## return $last_nonblank_token;
-## }
-
- # brace after label:
- elsif ( $last_nonblank_type eq 'J' ) {
- return $last_nonblank_token;
- }
-
-# otherwise, look at previous token. This must be a code block if
-# it follows any of these:
-# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
- elsif ( $is_code_block_token{$last_nonblank_token} ) {
-
- # Bug Patch: Note that the opening brace after the 'if' in the following
- # snippet is an anonymous hash ref and not a code block!
- # print 'hi' if { x => 1, }->{x};
- # We can identify this situation because the last nonblank type
- # will be a keyword (instead of a closing peren)
- if ( $last_nonblank_token =~ /^(if|unless)$/
- && $last_nonblank_type eq 'k' )
- {
- return "";
- }
- else {
- return $last_nonblank_token;
- }
- }
-
- # or a sub or package BLOCK
- elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
- && $last_nonblank_token =~ /^(sub|package)\b/ )
- {
- return $last_nonblank_token;
- }
-
- elsif ( $statement_type =~ /^(sub|package)\b/ ) {
- return $statement_type;
- }
-
- # user-defined subs with block parameters (like grep/map/eval)
- elsif ( $last_nonblank_type eq 'G' ) {
- return $last_nonblank_token;
- }
-
- # check bareword
- elsif ( $last_nonblank_type eq 'w' ) {
- return decide_if_code_block( $i, $rtokens, $rtoken_type,
- $max_token_index );
- }
-
- # Patch for bug # RT #94338 reported by Daniel Trizen
- # for-loop in a parenthesized block-map triggering an error message:
- # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
- # Check for a code block within a parenthesized function call
- elsif ( $last_nonblank_token eq '(' ) {
- my $paren_type = $paren_type[$paren_depth];
- if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) {
-
- # We will mark this as a code block but use type 't' instead
- # of the name of the contining function. This will allow for
- # correct parsing but will usually produce better formatting.
- # Braces with block type 't' are not broken open automatically
- # in the formatter as are other code block types, and this usually
- # works best.
- return 't'; # (Not $paren_type)
- }
- else {
- return "";
- }
- }
-
- # handle unknown syntax ') {'
- # we previously appended a '()' to mark this case
- elsif ( $last_nonblank_token =~ /\(\)$/ ) {
- return $last_nonblank_token;
- }
-
- # anything else must be anonymous hash reference
- else {
- return "";
- }
-}
-
-sub decide_if_code_block {
-
- # USES GLOBAL VARIABLES: $last_nonblank_token
- my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
-
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens, $max_token_index );
-
- # we are at a '{' where a statement may appear.
- # We must decide if this brace starts an anonymous hash or a code
- # block.
- # return "" if anonymous hash, and $last_nonblank_token otherwise
-
- # initialize to be code BLOCK
- my $code_block_type = $last_nonblank_token;
-
- # Check for the common case of an empty anonymous hash reference:
- # Maybe something like sub { { } }
- if ( $next_nonblank_token eq '}' ) {
- $code_block_type = "";
- }
-
- else {
-
- # To guess if this '{' is an anonymous hash reference, look ahead
- # and test as follows:
- #
- # it is a hash reference if next come:
- # - a string or digit followed by a comma or =>
- # - bareword followed by =>
- # otherwise it is a code block
- #
- # Examples of anonymous hash ref:
- # {'aa',};
- # {1,2}
- #
- # Examples of code blocks:
- # {1; print "hello\n", 1;}
- # {$a,1};
-
- # We are only going to look ahead one more (nonblank/comment) line.
- # Strange formatting could cause a bad guess, but that's unlikely.
- my @pre_types;
- my @pre_tokens;
-
- # Ignore the rest of this line if it is a side comment
- if ( $next_nonblank_token ne '#' ) {
- @pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ];
- @pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ];
- }
- my ( $rpre_tokens, $rpre_types ) =
- peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
- # generous, and prevents
- # wasting lots of
- # time in mangled files
- if ( defined($rpre_types) && @{$rpre_types} ) {
- push @pre_types, @{$rpre_types};
- push @pre_tokens, @{$rpre_tokens};
- }
-
- # put a sentinel token to simplify stopping the search
- push @pre_types, '}';
- push @pre_types, '}';
-
- my $jbeg = 0;
- $jbeg = 1 if $pre_types[0] eq 'b';
-
- # first look for one of these
- # - bareword
- # - bareword with leading -
- # - digit
- # - quoted string
- my $j = $jbeg;
- if ( $pre_types[$j] =~ /^[\'\"]/ ) {
-
- # find the closing quote; don't worry about escapes
- my $quote_mark = $pre_types[$j];
- foreach my $k ( $j + 1 .. $#pre_types - 1 ) {
- if ( $pre_types[$k] eq $quote_mark ) {
- $j = $k + 1;
- my $next = $pre_types[$j];
- last;
- }
- }
- }
- elsif ( $pre_types[$j] eq 'd' ) {
- $j++;
- }
- elsif ( $pre_types[$j] eq 'w' ) {
- $j++;
- }
- elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
- $j++;
- }
- if ( $j > $jbeg ) {
-
- $j++ if $pre_types[$j] eq 'b';
-
- # Patched for RT #95708
- if (
-
- # it is a comma which is not a pattern delimeter except for qw
- (
- $pre_types[$j] eq ','
- && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
- )
-
- # or a =>
- || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
- )
- {
- $code_block_type = "";
- }
- }
- }
-
- return $code_block_type;
-}
-
-sub report_unexpected {
-
- # report unexpected token type and show where it is
- # USES GLOBAL VARIABLES: $tokenizer_self
- my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
- $rpretoken_type, $input_line )
- = @_;
-
- if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
- my $msg = "found $found where $expecting expected";
- my $pos = $rpretoken_map->[$i_tok];
- interrupt_logfile();
- my $input_line_number = $tokenizer_self->{_last_line_number};
- my ( $offset, $numbered_line, $underline ) =
- make_numbered_line( $input_line_number, $input_line, $pos );
- $underline = write_on_underline( $underline, $pos - $offset, '^' );
-
- my $trailer = "";
- if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
- my $pos_prev = $rpretoken_map->[$last_nonblank_i];
- my $num;
- if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) {
- $num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev;
- }
- else {
- $num = $pos - $pos_prev;
- }
- if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
-
- $underline =
- write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
- $trailer = " (previous token underlined)";
- }
- warning( $numbered_line . "\n" );
- warning( $underline . "\n" );
- warning( $msg . $trailer . "\n" );
- resume_logfile();
- }
- return;
-}
-
-sub is_non_structural_brace {
-
- # Decide if a brace or bracket is structural or non-structural
- # by looking at the previous token and type
- # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
-
- # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
- # Tentatively deactivated because it caused the wrong operator expectation
- # for this code:
- # $user = @vars[1] / 100;
- # Must update sub operator_expected before re-implementing.
- # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
- # return 0;
- # }
-
- ################################################################
- # NOTE: braces after type characters start code blocks, but for
- # simplicity these are not identified as such. See also
- # sub code_block_type
- ################################################################
-
- ##if ($last_nonblank_type eq 't') {return 0}
-
- # otherwise, it is non-structural if it is decorated
- # by type information.
- # For example, the '{' here is non-structural: ${xxx}
- return (
- $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
-
- # or if we follow a hash or array closing curly brace or bracket
- # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
- # because the first '}' would have been given type 'R'
- || $last_nonblank_type =~ /^([R\]])$/
- );
-}
-
-#########i#############################################################
-# Tokenizer routines for tracking container nesting depths
-#######################################################################
-
-# The following routines keep track of nesting depths of the nesting
-# types, ( [ { and ?. This is necessary for determining the indentation
-# level, and also for debugging programs. Not only do they keep track of
-# nesting depths of the individual brace types, but they check that each
-# of the other brace types is balanced within matching pairs. For
-# example, if the program sees this sequence:
-#
-# { ( ( ) }
-#
-# then it can determine that there is an extra left paren somewhere
-# between the { and the }. And so on with every other possible
-# combination of outer and inner brace types. For another
-# example:
-#
-# ( [ ..... ] ] )
-#
-# which has an extra ] within the parens.
-#
-# The brace types have indexes 0 .. 3 which are indexes into
-# the matrices.
-#
-# The pair ? : are treated as just another nesting type, with ? acting
-# as the opening brace and : acting as the closing brace.
-#
-# The matrix
-#
-# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
-#
-# saves the nesting depth of brace type $b (where $b is either of the other
-# nesting types) when brace type $a enters a new depth. When this depth
-# decreases, a check is made that the current depth of brace types $b is
-# unchanged, or otherwise there must have been an error. This can
-# be very useful for localizing errors, particularly when perl runs to
-# the end of a large file (such as this one) and announces that there
-# is a problem somewhere.
-#
-# A numerical sequence number is maintained for every nesting type,
-# so that each matching pair can be uniquely identified in a simple
-# way.
-
-sub increase_nesting_depth {
- my ( $aa, $pos ) = @_;
-
- # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
- # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
- # $statement_type
- $current_depth[$aa]++;
- $total_depth++;
- $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
- my $input_line_number = $tokenizer_self->{_last_line_number};
- my $input_line = $tokenizer_self->{_line_text};
-
- # Sequence numbers increment by number of items. This keeps
- # a unique set of numbers but still allows the relative location
- # of any type to be determined.
- $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
- my $seqno = $nesting_sequence_number[$aa];
- $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
-
- $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
- [ $input_line_number, $input_line, $pos ];
-
- for my $bb ( 0 .. $#closing_brace_names ) {
- next if ( $bb == $aa );
- $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
- }
-
- # set a flag for indenting a nested ternary statement
- my $indent = 0;
- if ( $aa == QUESTION_COLON ) {
- $nested_ternary_flag[ $current_depth[$aa] ] = 0;
- if ( $current_depth[$aa] > 1 ) {
- if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
- my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
- if ( $pdepth == $total_depth - 1 ) {
- $indent = 1;
- $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
- }
- }
- }
- }
- $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
- $statement_type = "";
- return ( $seqno, $indent );
-}
-
-sub decrease_nesting_depth {
-
- my ( $aa, $pos ) = @_;
-
- # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
- # @current_sequence_number, @depth_array, @starting_line_of_current_depth
- # $statement_type
- my $seqno = 0;
- my $input_line_number = $tokenizer_self->{_last_line_number};
- my $input_line = $tokenizer_self->{_line_text};
-
- my $outdent = 0;
- $total_depth--;
- if ( $current_depth[$aa] > 0 ) {
-
- # set a flag for un-indenting after seeing a nested ternary statement
- $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
- if ( $aa == QUESTION_COLON ) {
- $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
- }
- $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
-
- # check that any brace types $bb contained within are balanced
- for my $bb ( 0 .. $#closing_brace_names ) {
- next if ( $bb == $aa );
-
- unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
- $current_depth[$bb] )
- {
- my $diff =
- $current_depth[$bb] -
- $depth_array[$aa][$bb][ $current_depth[$aa] ];
-
- # don't whine too many times
- my $saw_brace_error = get_saw_brace_error();
- if (
- $saw_brace_error <= MAX_NAG_MESSAGES
-
- # if too many closing types have occurred, we probably
- # already caught this error
- && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
- )
- {
- interrupt_logfile();
- my $rsl =
- $starting_line_of_current_depth[$aa]
- [ $current_depth[$aa] ];
- my $sl = $rsl->[0];
- my $rel = [ $input_line_number, $input_line, $pos ];
- my $el = $rel->[0];
- my ($ess);
-
- if ( $diff == 1 || $diff == -1 ) {
- $ess = '';
- }
- else {
- $ess = 's';
- }
- my $bname =
- ( $diff > 0 )
- ? $opening_brace_names[$bb]
- : $closing_brace_names[$bb];
- write_error_indicator_pair( @{$rsl}, '^' );
- my $msg = <<"EOM";
-Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
-EOM
-
- if ( $diff > 0 ) {
- my $rml =
- $starting_line_of_current_depth[$bb]
- [ $current_depth[$bb] ];
- my $ml = $rml->[0];
- $msg .=
-" The most recent un-matched $bname is on line $ml\n";
- write_error_indicator_pair( @{$rml}, '^' );
- }
- write_error_indicator_pair( @{$rel}, '^' );
- warning($msg);
- resume_logfile();
- }
- increment_brace_error();
- }
- }
- $current_depth[$aa]--;
- }
- else {
-
- my $saw_brace_error = get_saw_brace_error();
- if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
- my $msg = <<"EOM";
-There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
-EOM
- indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
- }
- increment_brace_error();
- }
- return ( $seqno, $outdent );
-}
-
-sub check_final_nesting_depths {
-
- # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
-
- for my $aa ( 0 .. $#closing_brace_names ) {
-
- if ( $current_depth[$aa] ) {
- my $rsl =
- $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
- my $sl = $rsl->[0];
- my $msg = <<"EOM";
-Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
-The most recent un-matched $opening_brace_names[$aa] is on line $sl
-EOM
- indicate_error( $msg, @{$rsl}, '^' );
- increment_brace_error();
- }
- }
- return;
-}
-
-#########i#############################################################
-# Tokenizer routines for looking ahead in input stream
-#######################################################################
-
-sub peek_ahead_for_n_nonblank_pre_tokens {
-
- # returns next n pretokens if they exist
- # returns undef's if hits eof without seeing any pretokens
- # USES GLOBAL VARIABLES: $tokenizer_self
- my $max_pretokens = shift;
- my $line;
- my $i = 0;
- my ( $rpre_tokens, $rmap, $rpre_types );
-
- while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
- {
- $line =~ s/^\s*//; # trim leading blanks
- next if ( length($line) <= 0 ); # skip blank
- next if ( $line =~ /^#/ ); # skip comment
- ( $rpre_tokens, $rmap, $rpre_types ) =
- pre_tokenize( $line, $max_pretokens );
- last;
- }
- return ( $rpre_tokens, $rpre_types );
-}
-
-# look ahead for next non-blank, non-comment line of code
-sub peek_ahead_for_nonblank_token {
-
- # USES GLOBAL VARIABLES: $tokenizer_self
- my ( $rtokens, $max_token_index ) = @_;
- my $line;
- my $i = 0;
-
- while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
- {
- $line =~ s/^\s*//; # trim leading blanks
- next if ( length($line) <= 0 ); # skip blank
- next if ( $line =~ /^#/ ); # skip comment
- my ( $rtok, $rmap, $rtype ) =
- pre_tokenize( $line, 2 ); # only need 2 pre-tokens
- my $j = $max_token_index + 1;
-
- foreach my $tok ( @{$rtok} ) {
- last if ( $tok =~ "\n" );
- $rtokens->[ ++$j ] = $tok;
- }
- last;
- }
- return $rtokens;
-}
-
-#########i#############################################################
-# Tokenizer guessing routines for ambiguous situations
-#######################################################################
-
-sub guess_if_pattern_or_conditional {
-
- # this routine is called when we have encountered a ? following an
- # unknown bareword, and we must decide if it starts a pattern or not
- # input parameters:
- # $i - token index of the ? starting possible pattern
- # output parameters:
- # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
- # msg = a warning or diagnostic message
- # USES GLOBAL VARIABLES: $last_nonblank_token
-
- # FIXME: this needs to be rewritten
-
- my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
- my $is_pattern = 0;
- my $msg = "guessing that ? after $last_nonblank_token starts a ";
-
- if ( $i >= $max_token_index ) {
- $msg .= "conditional (no end to pattern found on the line)\n";
- }
- else {
- my $ibeg = $i;
- $i = $ibeg + 1;
- my $next_token = $rtokens->[$i]; # first token after ?
-
- # look for a possible ending ? on this line..
- my $in_quote = 1;
- my $quote_depth = 0;
- my $quote_character = '';
- my $quote_pos = 0;
- my $quoted_string;
- (
- $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
- $quoted_string
- )
- = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
- $quote_pos, $quote_depth, $max_token_index );
-
- if ($in_quote) {
-
- # we didn't find an ending ? on this line,
- # so we bias towards conditional
- $is_pattern = 0;
- $msg .= "conditional (no ending ? on this line)\n";
-
- # we found an ending ?, so we bias towards a pattern
- }
- else {
-
- # Watch out for an ending ? in quotes, like this
- # my $case_flag = File::Spec->case_tolerant ? '(?i)' : '';
- my $s_quote = 0;
- my $d_quote = 0;
- my $colons = 0;
- foreach my $ii ( $ibeg + 1 .. $i - 1 ) {
- my $tok = $rtokens->[$ii];
- if ( $tok eq ":" ) { $colons++ }
- if ( $tok eq "'" ) { $s_quote++ }
- if ( $tok eq '"' ) { $d_quote++ }
- }
- if ( $s_quote % 2 || $d_quote % 2 || $colons ) {
- $is_pattern = 0;
- $msg .= "found ending ? but unbalanced quote chars\n";
- }
- elsif ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
- $is_pattern = 1;
- $msg .= "pattern (found ending ? and pattern expected)\n";
- }
- else {
- $msg .= "pattern (uncertain, but found ending ?)\n";
- }
- }
- }
- return ( $is_pattern, $msg );
-}
-
-sub guess_if_pattern_or_division {
-
- # this routine is called when we have encountered a / following an
- # unknown bareword, and we must decide if it starts a pattern or is a
- # division
- # input parameters:
- # $i - token index of the / starting possible pattern
- # output parameters:
- # $is_pattern = 0 if probably division, =1 if probably a pattern
- # msg = a warning or diagnostic message
- # USES GLOBAL VARIABLES: $last_nonblank_token
- my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
- my $is_pattern = 0;
- my $msg = "guessing that / after $last_nonblank_token starts a ";
-
- if ( $i >= $max_token_index ) {
- $msg .= "division (no end to pattern found on the line)\n";
- }
- else {
- my $ibeg = $i;
- my $divide_expected =
- numerator_expected( $i, $rtokens, $max_token_index );
- $i = $ibeg + 1;
- my $next_token = $rtokens->[$i]; # first token after slash
-
- # look for a possible ending / on this line..
- my $in_quote = 1;
- my $quote_depth = 0;
- my $quote_character = '';
- my $quote_pos = 0;
- my $quoted_string;
- (
- $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
- $quoted_string
- )
- = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
- $quote_pos, $quote_depth, $max_token_index );
-
- if ($in_quote) {
-
- # we didn't find an ending / on this line,
- # so we bias towards division
- if ( $divide_expected >= 0 ) {
- $is_pattern = 0;
- $msg .= "division (no ending / on this line)\n";
- }
- else {
- $msg = "multi-line pattern (division not possible)\n";
- $is_pattern = 1;
- }
-
- }
-
- # we found an ending /, so we bias towards a pattern
- else {
-
- if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
-
- if ( $divide_expected >= 0 ) {
-
- if ( $i - $ibeg > 60 ) {
- $msg .= "division (matching / too distant)\n";
- $is_pattern = 0;
- }
- else {
- $msg .= "pattern (but division possible too)\n";
- $is_pattern = 1;
- }
- }
- else {
- $is_pattern = 1;
- $msg .= "pattern (division not possible)\n";
- }
- }
- else {
-
- if ( $divide_expected >= 0 ) {
- $is_pattern = 0;
- $msg .= "division (pattern not possible)\n";
- }
- else {
- $is_pattern = 1;
- $msg .=
- "pattern (uncertain, but division would not work here)\n";
- }
- }
- }
- }
- return ( $is_pattern, $msg );
-}
-
-# try to resolve here-doc vs. shift by looking ahead for
-# non-code or the end token (currently only looks for end token)
-# returns 1 if it is probably a here doc, 0 if not
-sub guess_if_here_doc {
-
- # This is how many lines we will search for a target as part of the
- # guessing strategy. It is a constant because there is probably
- # little reason to change it.
- # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
- # %is_constant,
- my $HERE_DOC_WINDOW = 40;
-
- my $next_token = shift;
- my $here_doc_expected = 0;
- my $line;
- my $k = 0;
- my $msg = "checking <<";
-
- while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
- {
- chomp $line;
-
- if ( $line =~ /^$next_token$/ ) {
- $msg .= " -- found target $next_token ahead $k lines\n";
- $here_doc_expected = 1; # got it
- last;
- }
- last if ( $k >= $HERE_DOC_WINDOW );
- }
-
- unless ($here_doc_expected) {
-
- if ( !defined($line) ) {
- $here_doc_expected = -1; # hit eof without seeing target
- $msg .= " -- must be shift; target $next_token not in file\n";
-
- }
- else { # still unsure..taking a wild guess
-
- if ( !$is_constant{$current_package}{$next_token} ) {
- $here_doc_expected = 1;
- $msg .=
- " -- guessing it's a here-doc ($next_token not a constant)\n";
- }
- else {
- $msg .=
- " -- guessing it's a shift ($next_token is a constant)\n";
- }
- }
- }
- write_logfile_entry($msg);
- return $here_doc_expected;
-}
-
-#########i#############################################################
-# Tokenizer Routines for scanning identifiers and related items
-#######################################################################
-
-sub scan_bare_identifier_do {
-
- # this routine is called to scan a token starting with an alphanumeric
- # variable or package separator, :: or '.
- # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
- # $last_nonblank_type,@paren_type, $paren_depth
-
- my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
- $max_token_index )
- = @_;
- my $i_begin = $i;
- my $package = undef;
-
- my $i_beg = $i;
-
- # we have to back up one pretoken at a :: since each : is one pretoken
- if ( $tok eq '::' ) { $i_beg-- }
- if ( $tok eq '->' ) { $i_beg-- }
- my $pos_beg = $rtoken_map->[$i_beg];
- pos($input_line) = $pos_beg;
-
- # Examples:
- # A::B::C
- # A::
- # ::A
- # A'B
- if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
-
- my $pos = pos($input_line);
- my $numc = $pos - $pos_beg;
- $tok = substr( $input_line, $pos_beg, $numc );
-
- # type 'w' includes anything without leading type info
- # ($,%,@,*) including something like abc::def::ghi
- $type = 'w';
-
- my $sub_name = "";
- if ( defined($2) ) { $sub_name = $2; }
- if ( defined($1) ) {
- $package = $1;
-
- # patch: don't allow isolated package name which just ends
- # in the old style package separator (single quote). Example:
- # use CGI':all';
- if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
- $pos--;
- }
-
- $package =~ s/\'/::/g;
- if ( $package =~ /^\:/ ) { $package = 'main' . $package }
- $package =~ s/::$//;
- }
- else {
- $package = $current_package;
-
- if ( $is_keyword{$tok} ) {
- $type = 'k';
- }
- }
-
- # if it is a bareword..
- if ( $type eq 'w' ) {
-
- # check for v-string with leading 'v' type character
- # (This seems to have precedence over filehandle, type 'Y')
- if ( $tok =~ /^v\d[_\d]*$/ ) {
-
- # we only have the first part - something like 'v101' -
- # look for more
- if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
- $pos = pos($input_line);
- $numc = $pos - $pos_beg;
- $tok = substr( $input_line, $pos_beg, $numc );
- }
- $type = 'v';
-
- # warn if this version can't handle v-strings
- report_v_string($tok);
- }
-
- elsif ( $is_constant{$package}{$sub_name} ) {
- $type = 'C';
- }
-
- # bareword after sort has implied empty prototype; for example:
- # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
- # This has priority over whatever the user has specified.
- elsif ($last_nonblank_token eq 'sort'
- && $last_nonblank_type eq 'k' )
- {
- $type = 'Z';
- }
-
- # Note: strangely, perl does not seem to really let you create
- # functions which act like eval and do, in the sense that eval
- # and do may have operators following the final }, but any operators
- # that you create with prototype (&) apparently do not allow
- # trailing operators, only terms. This seems strange.
- # If this ever changes, here is the update
- # to make perltidy behave accordingly:
-
- # elsif ( $is_block_function{$package}{$tok} ) {
- # $tok='eval'; # patch to do braces like eval - doesn't work
- # $type = 'k';
- #}
- # FIXME: This could become a separate type to allow for different
- # future behavior:
- elsif ( $is_block_function{$package}{$sub_name} ) {
- $type = 'G';
- }
-
- elsif ( $is_block_list_function{$package}{$sub_name} ) {
- $type = 'G';
- }
- elsif ( $is_user_function{$package}{$sub_name} ) {
- $type = 'U';
- $prototype = $user_function_prototype{$package}{$sub_name};
- }
-
- # check for indirect object
- elsif (
-
- # added 2001-03-27: must not be followed immediately by '('
- # see fhandle.t
- ( $input_line !~ m/\G\(/gc )
-
- # and
- && (
-
- # preceded by keyword like 'print', 'printf' and friends
- $is_indirect_object_taker{$last_nonblank_token}
-
- # or preceded by something like 'print(' or 'printf('
- || (
- ( $last_nonblank_token eq '(' )
- && $is_indirect_object_taker{ $paren_type[$paren_depth]
- }
-
- )
- )
- )
- {
-
- # may not be indirect object unless followed by a space
- if ( $input_line =~ m/\G\s+/gc ) {
- $type = 'Y';
-
- # Abandon Hope ...
- # Perl's indirect object notation is a very bad
- # thing and can cause subtle bugs, especially for
- # beginning programmers. And I haven't even been
- # able to figure out a sane warning scheme which
- # doesn't get in the way of good scripts.
-
- # Complain if a filehandle has any lower case
- # letters. This is suggested good practice.
- # Use 'sub_name' because something like
- # main::MYHANDLE is ok for filehandle
- if ( $sub_name =~ /[a-z]/ ) {
-
- # could be bug caused by older perltidy if
- # followed by '('
- if ( $input_line =~ m/\G\s*\(/gc ) {
- complain(
-"Caution: unknown word '$tok' in indirect object slot\n"
- );
- }
- }
- }
-
- # bareword not followed by a space -- may not be filehandle
- # (may be function call defined in a 'use' statement)
- else {
- $type = 'Z';
- }
- }
- }
-
- # Now we must convert back from character position
- # to pre_token index.
- # I don't think an error flag can occur here ..but who knows
- my $error;
- ( $i, $error ) =
- inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
- if ($error) {
- warning("scan_bare_identifier: Possibly invalid tokenization\n");
- }
- }
-
- # no match but line not blank - could be syntax error
- # perl will take '::' alone without complaint
- else {
- $type = 'w';
-
- # change this warning to log message if it becomes annoying
- warning("didn't find identifier after leading ::\n");
- }
- return ( $i, $tok, $type, $prototype );
-}
-
-sub scan_id_do {
-
-# This is the new scanner and will eventually replace scan_identifier.
-# Only type 'sub' and 'package' are implemented.
-# Token types $ * % @ & -> are not yet implemented.
-#
-# Scan identifier following a type token.
-# The type of call depends on $id_scan_state: $id_scan_state = ''
-# for starting call, in which case $tok must be the token defining
-# the type.
-#
-# If the type token is the last nonblank token on the line, a value
-# of $id_scan_state = $tok is returned, indicating that further
-# calls must be made to get the identifier. If the type token is
-# not the last nonblank token on the line, the identifier is
-# scanned and handled and a value of '' is returned.
-# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
-# $statement_type, $tokenizer_self
-
- my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
- $max_token_index )
- = @_;
- my $type = '';
- my ( $i_beg, $pos_beg );
-
- #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
- #my ($a,$b,$c) = caller;
- #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
-
- # on re-entry, start scanning at first token on the line
- if ($id_scan_state) {
- $i_beg = $i;
- $type = '';
- }
-
- # on initial entry, start scanning just after type token
- else {
- $i_beg = $i + 1;
- $id_scan_state = $tok;
- $type = 't';
- }
-
- # find $i_beg = index of next nonblank token,
- # and handle empty lines
- my $blank_line = 0;
- my $next_nonblank_token = $rtokens->[$i_beg];
- if ( $i_beg > $max_token_index ) {
- $blank_line = 1;
- }
- else {
-
- # only a '#' immediately after a '$' is not a comment
- if ( $next_nonblank_token eq '#' ) {
- unless ( $tok eq '$' ) {
- $blank_line = 1;
- }
- }
-
- if ( $next_nonblank_token =~ /^\s/ ) {
- ( $next_nonblank_token, $i_beg ) =
- find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
- $max_token_index );
- if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
- $blank_line = 1;
- }
- }
- }
-
- # handle non-blank line; identifier, if any, must follow
- unless ($blank_line) {
-
- if ( $id_scan_state eq 'sub' ) {
- ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
- $input_line, $i, $i_beg,
- $tok, $type, $rtokens,
- $rtoken_map, $id_scan_state, $max_token_index
- );
- }
-
- elsif ( $id_scan_state eq 'package' ) {
- ( $i, $tok, $type ) =
- do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
- $rtoken_map, $max_token_index );
- $id_scan_state = '';
- }
-
- else {
- warning("invalid token in scan_id: $tok\n");
- $id_scan_state = '';
- }
- }
-
- if ( $id_scan_state && ( !defined($type) || !$type ) ) {
-
- # shouldn't happen:
- warning(
-"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
- );
- report_definite_bug();
- }
-
- TOKENIZER_DEBUG_FLAG_NSCAN && do {
- print STDOUT
- "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
- };
- return ( $i, $tok, $type, $id_scan_state );
-}
-
-sub check_prototype {
- my ( $proto, $package, $subname ) = @_;
- return unless ( defined($package) && defined($subname) );
- if ( defined($proto) ) {
- $proto =~ s/^\s*\(\s*//;
- $proto =~ s/\s*\)$//;
- if ($proto) {
- $is_user_function{$package}{$subname} = 1;
- $user_function_prototype{$package}{$subname} = "($proto)";
-
- # prototypes containing '&' must be treated specially..
- if ( $proto =~ /\&/ ) {
-
- # right curly braces of prototypes ending in
- # '&' may be followed by an operator
- if ( $proto =~ /\&$/ ) {
- $is_block_function{$package}{$subname} = 1;
- }
-
- # right curly braces of prototypes NOT ending in
- # '&' may NOT be followed by an operator
- elsif ( $proto !~ /\&$/ ) {
- $is_block_list_function{$package}{$subname} = 1;
- }
- }
- }
- else {
- $is_constant{$package}{$subname} = 1;
- }
- }
- else {
- $is_user_function{$package}{$subname} = 1;
- }
- return;
-}
-
-sub do_scan_package {
-
- # do_scan_package parses a package name
- # it is called with $i_beg equal to the index of the first nonblank
- # token following a 'package' token.
- # USES GLOBAL VARIABLES: $current_package,
-
- # package NAMESPACE
- # package NAMESPACE VERSION
- # package NAMESPACE BLOCK
- # package NAMESPACE VERSION BLOCK
- #
- # If VERSION is provided, package sets the $VERSION variable in the given
- # namespace to a version object with the VERSION provided. VERSION must be
- # a "strict" style version number as defined by the version module: a
- # positive decimal number (integer or decimal-fraction) without
- # exponentiation or else a dotted-decimal v-string with a leading 'v'
- # character and at least three components.
- # reference http://perldoc.perl.org/functions/package.html
-
- my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
- $max_token_index )
- = @_;
- my $package = undef;
- my $pos_beg = $rtoken_map->[$i_beg];
- pos($input_line) = $pos_beg;
-
- # handle non-blank line; package name, if any, must follow
- if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
- $package = $1;
- $package = ( defined($1) && $1 ) ? $1 : 'main';
- $package =~ s/\'/::/g;
- if ( $package =~ /^\:/ ) { $package = 'main' . $package }
- $package =~ s/::$//;
- my $pos = pos($input_line);
- my $numc = $pos - $pos_beg;
- $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
- $type = 'i';
-
- # Now we must convert back from character position
- # to pre_token index.
- # I don't think an error flag can occur here ..but ?
- my $error;
- ( $i, $error ) =
- inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
- if ($error) { warning("Possibly invalid package\n") }
- $current_package = $package;
-
- # we should now have package NAMESPACE
- # now expecting VERSION, BLOCK, or ; to follow ...
- # package NAMESPACE VERSION
- # package NAMESPACE BLOCK
- # package NAMESPACE VERSION BLOCK
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens, $max_token_index );
-
- # check that something recognizable follows, but do not parse.
- # A VERSION number will be parsed later as a number or v-string in the
- # normal way. What is important is to set the statement type if
- # everything looks okay so that the operator_expected() routine
- # knows that the number is in a package statement.
- # Examples of valid primitive tokens that might follow are:
- # 1235 . ; { } v3 v
- if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) {
- $statement_type = $tok;
- }
- else {
- warning(
- "Unexpected '$next_nonblank_token' after package name '$tok'\n"
- );
- }
- }
-
- # no match but line not blank --
- # could be a label with name package, like package: , for example.
- else {
- $type = 'k';
- }
-
- return ( $i, $tok, $type );
-}
-
-sub scan_identifier_do {
-
- # This routine assembles tokens into identifiers. It maintains a
- # scan state, id_scan_state. It updates id_scan_state based upon
- # current id_scan_state and token, and returns an updated
- # id_scan_state and the next index after the identifier.
- # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
- # $last_nonblank_type
-
- my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
- $expecting, $container_type )
- = @_;
- my $i_begin = $i;
- my $type = '';
- my $tok_begin = $rtokens->[$i_begin];
- if ( $tok_begin eq ':' ) { $tok_begin = '::' }
- my $id_scan_state_begin = $id_scan_state;
- my $identifier_begin = $identifier;
- my $tok = $tok_begin;
- my $message = "";
-
- my $in_prototype_or_signature = $container_type =~ /^sub/;
-
- # these flags will be used to help figure out the type:
- my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
- my $saw_type;
-
- # allow old package separator (') except in 'use' statement
- my $allow_tick = ( $last_nonblank_token ne 'use' );
-
- # get started by defining a type and a state if necessary
- unless ($id_scan_state) {
- $context = UNKNOWN_CONTEXT;
-
- # fixup for digraph
- if ( $tok eq '>' ) {
- $tok = '->';
- $tok_begin = $tok;
- }
- $identifier = $tok;
-
- if ( $tok eq '$' || $tok eq '*' ) {
- $id_scan_state = '$';
- $context = SCALAR_CONTEXT;
- }
- elsif ( $tok eq '%' || $tok eq '@' ) {
- $id_scan_state = '$';
- $context = LIST_CONTEXT;
- }
- elsif ( $tok eq '&' ) {
- $id_scan_state = '&';
- }
- elsif ( $tok eq 'sub' or $tok eq 'package' ) {
- $saw_alpha = 0; # 'sub' is considered type info here
- $id_scan_state = '$';
- $identifier .= ' '; # need a space to separate sub from sub name
- }
- elsif ( $tok eq '::' ) {
- $id_scan_state = 'A';
- }
- elsif ( $tok =~ /^[A-Za-z_]/ ) {
- $id_scan_state = ':';
- }
- elsif ( $tok eq '->' ) {
- $id_scan_state = '$';
- }
- else {
-
- # shouldn't happen
- my ( $a, $b, $c ) = caller;
- warning("Program Bug: scan_identifier given bad token = $tok \n");
- warning(" called from sub $a line: $c\n");
- report_definite_bug();
- }
- $saw_type = !$saw_alpha;
- }
- else {
- $i--;
- $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
- }
-
- # now loop to gather the identifier
- my $i_save = $i;
-
- while ( $i < $max_token_index ) {
- $i_save = $i unless ( $tok =~ /^\s*$/ );
- $tok = $rtokens->[ ++$i ];
-
- if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
- $tok = '::';
- $i++;
- }
-
- if ( $id_scan_state eq '$' ) { # starting variable name
-
- if ( $tok eq '$' ) {
-
- $identifier .= $tok;
-
- # we've got a punctuation variable if end of line (punct.t)
- if ( $i == $max_token_index ) {
- $type = 'i';
- $id_scan_state = '';
- last;
- }
- }
-
- # POSTDEFREF ->@ ->% ->& ->*
- elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
- $identifier .= $tok;
- }
- elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
- $saw_alpha = 1;
- $id_scan_state = ':'; # now need ::
- $identifier .= $tok;
- }
- elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
- $saw_alpha = 1;
- $id_scan_state = ':'; # now need ::
- $identifier .= $tok;
-
- # Perl will accept leading digits in identifiers,
- # although they may not always produce useful results.
- # Something like $main::0 is ok. But this also works:
- #
- # sub howdy::123::bubba{ print "bubba $54321!\n" }
- # howdy::123::bubba();
- #
- }
- elsif ( $tok =~ /^[0-9]/ ) { # numeric
- $saw_alpha = 1;
- $id_scan_state = ':'; # now need ::
- $identifier .= $tok;
- }
- elsif ( $tok eq '::' ) {
- $id_scan_state = 'A';
- $identifier .= $tok;
- }
-
- # $# and POSTDEFREF ->$#
- elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array
- $identifier .= $tok; # keep same state, a $ could follow
- }
- elsif ( $tok eq '{' ) {
-
- # check for something like ${#} or ${©}
- if (
- (
- $identifier eq '$'
- || $identifier eq '@'
- || $identifier eq '$#'
- )
- && $i + 2 <= $max_token_index
- && $rtokens->[ $i + 2 ] eq '}'
- && $rtokens->[ $i + 1 ] !~ /[\s\w]/
- )
- {
- my $next2 = $rtokens->[ $i + 2 ];
- my $next1 = $rtokens->[ $i + 1 ];
- $identifier .= $tok . $next1 . $next2;
- $i += 2;
- $id_scan_state = '';
- last;
- }
-
- # skip something like ${xxx} or ->{
- $id_scan_state = '';
-
- # if this is the first token of a line, any tokens for this
- # identifier have already been accumulated
- if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
- $i = $i_save;
- last;
- }
-
- # space ok after leading $ % * & @
- elsif ( $tok =~ /^\s*$/ ) {
-
- if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
-
- if ( length($identifier) > 1 ) {
- $id_scan_state = '';
- $i = $i_save;
- $type = 'i'; # probably punctuation variable
- last;
- }
- else {
-
- # spaces after $'s are common, and space after @
- # is harmless, so only complain about space
- # after other type characters. Space after $ and
- # @ will be removed in formatting. Report space
- # after % and * because they might indicate a
- # parsing error. In other words '% ' might be a
- # modulo operator. Delete this warning if it
- # gets annoying.
- if ( $identifier !~ /^[\@\$]$/ ) {
- $message =
- "Space in identifier, following $identifier\n";
- }
- }
- }
-
- # else:
- # space after '->' is ok
- }
- elsif ( $tok eq '^' ) {
-
- # check for some special variables like $^W
- if ( $identifier =~ /^[\$\*\@\%]$/ ) {
- $identifier .= $tok;
- $id_scan_state = 'A';
-
- # Perl accepts '$^]' or '@^]', but
- # there must not be a space before the ']'.
- my $next1 = $rtokens->[ $i + 1 ];
- if ( $next1 eq ']' ) {
- $i++;
- $identifier .= $next1;
- $id_scan_state = "";
- last;
- }
- }
- else {
- $id_scan_state = '';
- }
- }
- else { # something else
-
- if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) {
- $id_scan_state = '';
- $i = $i_save;
- $type = 'i'; # probably punctuation variable
- last;
- }
-
- # check for various punctuation variables
- if ( $identifier =~ /^[\$\*\@\%]$/ ) {
- $identifier .= $tok;
- }
-
- # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
- elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) {
- $identifier .= $tok;
- }
-
- elsif ( $identifier eq '$#' ) {
-
- if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
-
- # perl seems to allow just these: $#: $#- $#+
- elsif ( $tok =~ /^[\:\-\+]$/ ) {
- $type = 'i';
- $identifier .= $tok;
- }
- else {
- $i = $i_save;
- write_logfile_entry( 'Use of $# is deprecated' . "\n" );
- }
- }
- elsif ( $identifier eq '$$' ) {
-
- # perl does not allow references to punctuation
- # variables without braces. For example, this
- # won't work:
- # $:=\4;
- # $a = $$:;
- # You would have to use
- # $a = ${$:};
-
- $i = $i_save;
- if ( $tok eq '{' ) { $type = 't' }
- else { $type = 'i' }
- }
- elsif ( $identifier eq '->' ) {
- $i = $i_save;
- }
- else {
- $i = $i_save;
- if ( length($identifier) == 1 ) { $identifier = ''; }
- }
- $id_scan_state = '';
- last;
- }
- }
- elsif ( $id_scan_state eq '&' ) { # starting sub call?
-
- if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric ..
- $id_scan_state = ':'; # now need ::
- $saw_alpha = 1;
- $identifier .= $tok;
- }
- elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
- $id_scan_state = ':'; # now need ::
- $saw_alpha = 1;
- $identifier .= $tok;
- }
- elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
- $id_scan_state = ':'; # now need ::
- $saw_alpha = 1;
- $identifier .= $tok;
- }
- elsif ( $tok =~ /^\s*$/ ) { # allow space
- }
- elsif ( $tok eq '::' ) { # leading ::
- $id_scan_state = 'A'; # accept alpha next
- $identifier .= $tok;
- }
- elsif ( $tok eq '{' ) {
- if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
- $i = $i_save;
- $id_scan_state = '';
- last;
- }
- else {
-
- # punctuation variable?
- # testfile: cunningham4.pl
- #
- # We have to be careful here. If we are in an unknown state,
- # we will reject the punctuation variable. In the following
- # example the '&' is a binary operator but we are in an unknown
- # state because there is no sigil on 'Prima', so we don't
- # know what it is. But it is a bad guess that
- # '&~' is a function variable.
- # $self->{text}->{colorMap}->[
- # Prima::PodView::COLOR_CODE_FOREGROUND
- # & ~tb::COLOR_INDEX ] =
- # $sec->{ColorCode}
- if ( $identifier eq '&' && $expecting ) {
- $identifier .= $tok;
- }
- else {
- $identifier = '';
- $i = $i_save;
- $type = '&';
- }
- $id_scan_state = '';
- last;
- }
- }
- elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)
-
- if ( $tok =~ /^[A-Za-z_]/ ) { # found it
- $identifier .= $tok;
- $id_scan_state = ':'; # now need ::
- $saw_alpha = 1;
- }
- elsif ( $tok eq "'" && $allow_tick ) {
- $identifier .= $tok;
- $id_scan_state = ':'; # now need ::
- $saw_alpha = 1;
- }
- elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
- $identifier .= $tok;
- $id_scan_state = ':'; # now need ::
- $saw_alpha = 1;
- }
- elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
- $id_scan_state = '(';
- $identifier .= $tok;
- }
- elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
- $id_scan_state = ')';
- $identifier .= $tok;
- }
- else {
- $id_scan_state = '';
- $i = $i_save;
- last;
- }
- }
- elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha
-
- if ( $tok eq '::' ) { # got it
- $identifier .= $tok;
- $id_scan_state = 'A'; # now require alpha
- }
- elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here
- $identifier .= $tok;
- $id_scan_state = ':'; # now need ::
- $saw_alpha = 1;
- }
- elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
- $identifier .= $tok;
- $id_scan_state = ':'; # now need ::
- $saw_alpha = 1;
- }
- elsif ( $tok eq "'" && $allow_tick ) { # tick
-
- if ( $is_keyword{$identifier} ) {
- $id_scan_state = ''; # that's all
- $i = $i_save;
- }
- else {
- $identifier .= $tok;
- }
- }
- elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
- $id_scan_state = '(';
- $identifier .= $tok;
- }
- elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
- $id_scan_state = ')';
- $identifier .= $tok;
- }
- else {
- $id_scan_state = ''; # that's all
- $i = $i_save;
- last;
- }
- }
- elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype
-
- if ( $tok eq '(' ) { # got it
- $identifier .= $tok;
- $id_scan_state = ')'; # now find the end of it
- }
- elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
- $identifier .= $tok;
- }
- else {
- $id_scan_state = ''; # that's all - no prototype
- $i = $i_save;
- last;
- }
- }
- elsif ( $id_scan_state eq ')' ) { # looking for ) to end
-
- if ( $tok eq ')' ) { # got it
- $identifier .= $tok;
- $id_scan_state = ''; # all done
- last;
- }
- elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
- $identifier .= $tok;
- }
- else { # probable error in script, but keep going
- warning("Unexpected '$tok' while seeking end of prototype\n");
- $identifier .= $tok;
- }
- }
- else { # can get here due to error in initialization
- $id_scan_state = '';
- $i = $i_save;
- last;
- }
- }
-
- if ( $id_scan_state eq ')' ) {
- warning("Hit end of line while seeking ) to end prototype\n");
- }
-
- # once we enter the actual identifier, it may not extend beyond
- # the end of the current line
- if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
- $id_scan_state = '';
- }
- if ( $i < 0 ) { $i = 0 }
-
- unless ($type) {
-
- if ($saw_type) {
-
- if ($saw_alpha) {
- if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
- $type = 'w';
- }
- else { $type = 'i' }
- }
- elsif ( $identifier eq '->' ) {
- $type = '->';
- }
- elsif (
- ( length($identifier) > 1 )
-
- # In something like '@$=' we have an identifier '@$'
- # In something like '$${' we have type '$$' (and only
- # part of an identifier)
- && !( $identifier =~ /\$$/ && $tok eq '{' )
- && ( $identifier !~ /^(sub |package )$/ )
- )
- {
- $type = 'i';
- }
- else { $type = 't' }
- }
- elsif ($saw_alpha) {
-
- # type 'w' includes anything without leading type info
- # ($,%,@,*) including something like abc::def::ghi
- $type = 'w';
- }
- else {
- $type = '';
- } # this can happen on a restart
- }
-
- if ($identifier) {
- $tok = $identifier;
- if ($message) { write_logfile_entry($message) }
- }
- else {
- $tok = $tok_begin;
- $i = $i_begin;
- }
-
- TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
- my ( $a, $b, $c ) = caller;
- print STDOUT
-"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
- print STDOUT
-"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
- };
- return ( $i, $tok, $type, $id_scan_state, $identifier );
-}
-
-{
-
- # saved package and subnames in case prototype is on separate line
- my ( $package_saved, $subname_saved );
-
- sub do_scan_sub {
-
- # do_scan_sub parses a sub name and prototype
- # it is called with $i_beg equal to the index of the first nonblank
- # token following a 'sub' token.
-
- # TODO: add future error checks to be sure we have a valid
- # sub name. For example, 'sub &doit' is wrong. Also, be sure
- # a name is given if and only if a non-anonymous sub is
- # appropriate.
- # USES GLOBAL VARS: $current_package, $last_nonblank_token,
- # $in_attribute_list, %saw_function_definition,
- # $statement_type
-
- my (
- $input_line, $i, $i_beg,
- $tok, $type, $rtokens,
- $rtoken_map, $id_scan_state, $max_token_index
- ) = @_;
- $id_scan_state = ""; # normally we get everything in one call
- my $subname = undef;
- my $package = undef;
- my $proto = undef;
- my $attrs = undef;
- my $match;
-
- my $pos_beg = $rtoken_map->[$i_beg];
- pos($input_line) = $pos_beg;
-
- # Look for the sub NAME
- if (
- $input_line =~ m/\G\s*
- ((?:\w*(?:'|::))*) # package - something that ends in :: or '
- (\w+) # NAME - required
- /gcx
- )
- {
- $match = 1;
- $subname = $2;
-
- $package = ( defined($1) && $1 ) ? $1 : $current_package;
- $package =~ s/\'/::/g;
- if ( $package =~ /^\:/ ) { $package = 'main' . $package }
- $package =~ s/::$//;
- my $pos = pos($input_line);
- my $numc = $pos - $pos_beg;
- $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
- $type = 'i';
- }
-
- # Now look for PROTO ATTRS
- # Look for prototype/attributes which are usually on the same
- # line as the sub name but which might be on a separate line.
- # For example, we might have an anonymous sub with attributes,
- # or a prototype on a separate line from its sub name
-
- # NOTE: We only want to parse PROTOTYPES here. If we see anything that
- # does not look like a prototype, we assume it is a SIGNATURE and we
- # will stop and let the the standard tokenizer handle it. In
- # particular, we stop if we see any nested parens, braces, or commas.
- my $saw_opening_paren = $input_line =~ /\G\s*\(/;
- if (
- $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO
- (\s*:)? # ATTRS leading ':'
- /gcx
- && ( $1 || $2 )
- )
- {
- $proto = $1;
- $attrs = $2;
-
- # If we also found the sub name on this call then append PROTO.
- # This is not necessary but for compatability with previous
- # versions when the -csc flag is used:
- if ( $match && $proto ) {
- $tok .= $proto;
- }
- $match ||= 1;
-
- # Handle prototype on separate line from subname
- if ($subname_saved) {
- $package = $package_saved;
- $subname = $subname_saved;
- $tok = $last_nonblank_token;
- }
- $type = 'i';
- }
-
- if ($match) {
-
- # ATTRS: if there are attributes, back up and let the ':' be
- # found later by the scanner.
- my $pos = pos($input_line);
- if ($attrs) {
- $pos -= length($attrs);
- }
-
- my $next_nonblank_token = $tok;
-
- # catch case of line with leading ATTR ':' after anonymous sub
- if ( $pos == $pos_beg && $tok eq ':' ) {
- $type = 'A';
- $in_attribute_list = 1;
- }
-
- # Otherwise, if we found a match we must convert back from
- # string position to the pre_token index for continued parsing.
- else {
-
- # I don't think an error flag can occur here ..but ?
- my $error;
- ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
- $max_token_index );
- if ($error) { warning("Possibly invalid sub\n") }
-
- # check for multiple definitions of a sub
- ( $next_nonblank_token, my $i_next ) =
- find_next_nonblank_token_on_this_line( $i, $rtokens,
- $max_token_index );
- }
-
- if ( $next_nonblank_token =~ /^(\s*|#)$/ )
- { # skip blank or side comment
- my ( $rpre_tokens, $rpre_types ) =
- peek_ahead_for_n_nonblank_pre_tokens(1);
- if ( defined($rpre_tokens) && @{$rpre_tokens} ) {
- $next_nonblank_token = $rpre_tokens->[0];
- }
- else {
- $next_nonblank_token = '}';
- }
- }
- $package_saved = "";
- $subname_saved = "";
-
- # See what's next...
- if ( $next_nonblank_token eq '{' ) {
- if ($subname) {
-
- # Check for multiple definitions of a sub, but
- # it is ok to have multiple sub BEGIN, etc,
- # so we do not complain if name is all caps
- if ( $saw_function_definition{$package}{$subname}
- && $subname !~ /^[A-Z]+$/ )
- {
- my $lno = $saw_function_definition{$package}{$subname};
- warning(
-"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
- );
- }
- $saw_function_definition{$package}{$subname} =
- $tokenizer_self->{_last_line_number};
- }
- }
- elsif ( $next_nonblank_token eq ';' ) {
- }
- elsif ( $next_nonblank_token eq '}' ) {
- }
-
- # ATTRS - if an attribute list follows, remember the name
- # of the sub so the next opening brace can be labeled.
- # Setting 'statement_type' causes any ':'s to introduce
- # attributes.
- elsif ( $next_nonblank_token eq ':' ) {
- $statement_type = $tok;
- }
-
- # if we stopped before an open paren ...
- elsif ( $next_nonblank_token eq '(' ) {
-
- # If we DID NOT see this paren above then it must be on the
- # next line so we will set a flag to come back here and see if
- # it is a PROTOTYPE
-
- # Otherwise, we assume it is a SIGNATURE rather than a
- # PROTOTYPE and let the normal tokenizer handle it as a list
- if ( !$saw_opening_paren ) {
- $id_scan_state = 'sub'; # we must come back to get proto
- $package_saved = $package;
- $subname_saved = $subname;
- }
- $statement_type = $tok;
- }
- elsif ($next_nonblank_token) { # EOF technically ok
- warning(
-"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
- );
- }
- check_prototype( $proto, $package, $subname );
- }
-
- # no match but line not blank
- else {
- }
- return ( $i, $tok, $type, $id_scan_state );
- }
-}
-
-#########i###############################################################
-# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
-#########################################################################
-
-sub find_next_nonblank_token {
- my ( $i, $rtokens, $max_token_index ) = @_;
-
- if ( $i >= $max_token_index ) {
- if ( !peeked_ahead() ) {
- peeked_ahead(1);
- $rtokens =
- peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
- }
- }
- my $next_nonblank_token = $rtokens->[ ++$i ];
-
- if ( $next_nonblank_token =~ /^\s*$/ ) {
- $next_nonblank_token = $rtokens->[ ++$i ];
- }
- return ( $next_nonblank_token, $i );
-}
-
-sub numerator_expected {
-
- # this is a filter for a possible numerator, in support of guessing
- # for the / pattern delimiter token.
- # returns -
- # 1 - yes
- # 0 - can't tell
- # -1 - no
- # Note: I am using the convention that variables ending in
- # _expected have these 3 possible values.
- my ( $i, $rtokens, $max_token_index ) = @_;
- my $numerator_expected = 0;
-
- my $next_token = $rtokens->[ $i + 1 ];
- if ( $next_token eq '=' ) { $i++; } # handle /=
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens, $max_token_index );
-
- if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
- $numerator_expected = 1;
- }
- else {
-
- if ( $next_nonblank_token =~ /^\s*$/ ) {
- $numerator_expected = 0;
- }
- else {
- $numerator_expected = -1;
- }
- }
- return $numerator_expected;
-}
-
-sub pattern_expected {
-
- # This is the start of a filter for a possible pattern.
- # It looks at the token after a possible pattern and tries to
- # determine if that token could end a pattern.
- # returns -
- # 1 - yes
- # 0 - can't tell
- # -1 - no
- my ( $i, $rtokens, $max_token_index ) = @_;
- my $is_pattern = 0;
-
- my $next_token = $rtokens->[ $i + 1 ];
- if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier
- my ( $next_nonblank_token, $i_next ) =
- find_next_nonblank_token( $i, $rtokens, $max_token_index );
-
- # list of tokens which may follow a pattern
- # (can probably be expanded)
- if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
- {
- $is_pattern = 1;
- }
- else {
-
- if ( $next_nonblank_token =~ /^\s*$/ ) {
- $is_pattern = 0;
- }
- else {
- $is_pattern = -1;
- }
- }
- return $is_pattern;
-}
-
-sub find_next_nonblank_token_on_this_line {
- my ( $i, $rtokens, $max_token_index ) = @_;
- my $next_nonblank_token;
-
- if ( $i < $max_token_index ) {
- $next_nonblank_token = $rtokens->[ ++$i ];
-
- if ( $next_nonblank_token =~ /^\s*$/ ) {
-
- if ( $i < $max_token_index ) {
- $next_nonblank_token = $rtokens->[ ++$i ];
- }
- }
- }
- else {
- $next_nonblank_token = "";
- }
- return ( $next_nonblank_token, $i );
-}
-
-sub find_angle_operator_termination {
-
- # We are looking at a '<' and want to know if it is an angle operator.
- # We are to return:
- # $i = pretoken index of ending '>' if found, current $i otherwise
- # $type = 'Q' if found, '>' otherwise
- my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
- my $i = $i_beg;
- my $type = '<';
- pos($input_line) = 1 + $rtoken_map->[$i];
-
- my $filter;
-
- # we just have to find the next '>' if a term is expected
- if ( $expecting == TERM ) { $filter = '[\>]' }
-
- # we have to guess if we don't know what is expected
- elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
-
- # shouldn't happen - we shouldn't be here if operator is expected
- else { warning("Program Bug in find_angle_operator_termination\n") }
-
- # To illustrate what we might be looking at, in case we are
- # guessing, here are some examples of valid angle operators
- # (or file globs):
- # <tmp_imp/*>
- # <FH>
- # <$fh>
- # <*.c *.h>
- # <_>
- # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
- # <${PREFIX}*img*.$IMAGE_TYPE>
- # <img*.$IMAGE_TYPE>
- # <Timg*.$IMAGE_TYPE>
- # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
- #
- # Here are some examples of lines which do not have angle operators:
- # return undef unless $self->[2]++ < $#{$self->[1]};
- # < 2 || @$t >
- #
- # the following line from dlister.pl caused trouble:
- # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
- #
- # If the '<' starts an angle operator, it must end on this line and
- # it must not have certain characters like ';' and '=' in it. I use
- # this to limit the testing. This filter should be improved if
- # possible.
-
- if ( $input_line =~ /($filter)/g ) {
-
- if ( $1 eq '>' ) {
-
- # We MAY have found an angle operator termination if we get
- # here, but we need to do more to be sure we haven't been
- # fooled.
- my $pos = pos($input_line);
-
- my $pos_beg = $rtoken_map->[$i];
- my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
-
- # Reject if the closing '>' follows a '-' as in:
- # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
- if ( $expecting eq UNKNOWN ) {
- my $check = substr( $input_line, $pos - 2, 1 );
- if ( $check eq '-' ) {
- return ( $i, $type );
- }
- }
-
- ######################################debug#####
- #write_diagnostics( "ANGLE? :$str\n");
- #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
- ######################################debug#####
- $type = 'Q';
- my $error;
- ( $i, $error ) =
- inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
-
- # It may be possible that a quote ends midway in a pretoken.
- # If this happens, it may be necessary to split the pretoken.
- if ($error) {
- warning(
- "Possible tokinization error..please check this line\n");
- report_possible_bug();
- }
-
- # Now let's see where we stand....
- # OK if math op not possible
- if ( $expecting == TERM ) {
- }
-
- # OK if there are no more than 2 pre-tokens inside
- # (not possible to write 2 token math between < and >)
- # This catches most common cases
- elsif ( $i <= $i_beg + 3 ) {
- write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
- }
-
- # Not sure..
- else {
-
- # Let's try a Brace Test: any braces inside must balance
- my $br = 0;
- while ( $str =~ /\{/g ) { $br++ }
- while ( $str =~ /\}/g ) { $br-- }
- my $sb = 0;
- while ( $str =~ /\[/g ) { $sb++ }
- while ( $str =~ /\]/g ) { $sb-- }
- my $pr = 0;
- while ( $str =~ /\(/g ) { $pr++ }
- while ( $str =~ /\)/g ) { $pr-- }
-
- # if braces do not balance - not angle operator
- if ( $br || $sb || $pr ) {
- $i = $i_beg;
- $type = '<';
- write_diagnostics(
- "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
- }
-
- # we should keep doing more checks here...to be continued
- # Tentatively accepting this as a valid angle operator.
- # There are lots more things that can be checked.
- else {
- write_diagnostics(
- "ANGLE-Guessing yes: $str expecting=$expecting\n");
- write_logfile_entry("Guessing angle operator here: $str\n");
- }
- }
- }
-
- # didn't find ending >
- else {
- if ( $expecting == TERM ) {
- warning("No ending > for angle operator\n");
- }
- }
- }
- return ( $i, $type );
-}
-
-sub scan_number_do {
-
- # scan a number in any of the formats that Perl accepts
- # Underbars (_) are allowed in decimal numbers.
- # input parameters -
- # $input_line - the string to scan
- # $i - pre_token index to start scanning
- # $rtoken_map - reference to the pre_token map giving starting
- # character position in $input_line of token $i
- # output parameters -
- # $i - last pre_token index of the number just scanned
- # number - the number (characters); or undef if not a number
-
- my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
- my $pos_beg = $rtoken_map->[$i];
- my $pos;
- my $i_begin = $i;
- my $number = undef;
- my $type = $input_type;
-
- my $first_char = substr( $input_line, $pos_beg, 1 );
-
- # Look for bad starting characters; Shouldn't happen..
- if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
- warning("Program bug - scan_number given character $first_char\n");
- report_definite_bug();
- return ( $i, $type, $number );
- }
-
- # handle v-string without leading 'v' character ('Two Dot' rule)
- # (vstring.t)
- # TODO: v-strings may contain underscores
- pos($input_line) = $pos_beg;
- if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
- $pos = pos($input_line);
- my $numc = $pos - $pos_beg;
- $number = substr( $input_line, $pos_beg, $numc );
- $type = 'v';
- report_v_string($number);
- }
-
- # handle octal, hex, binary
- if ( !defined($number) ) {
- pos($input_line) = $pos_beg;
- if ( $input_line =~
- /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
- {
- $pos = pos($input_line);
- my $numc = $pos - $pos_beg;
- $number = substr( $input_line, $pos_beg, $numc );
- $type = 'n';
- }
- }
-
- # handle decimal
- if ( !defined($number) ) {
- pos($input_line) = $pos_beg;
-
- if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
- $pos = pos($input_line);
-
- # watch out for things like 0..40 which would give 0. by this;
- if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
- && ( substr( $input_line, $pos, 1 ) eq '.' ) )
- {
- $pos--;
- }
- my $numc = $pos - $pos_beg;
- $number = substr( $input_line, $pos_beg, $numc );
- $type = 'n';
- }
- }
-
- # filter out non-numbers like e + - . e2 .e3 +e6
- # the rule: at least one digit, and any 'e' must be preceded by a digit
- if (
- $number !~ /\d/ # no digits
- || ( $number =~ /^(.*)[eE]/
- && $1 !~ /\d/ ) # or no digits before the 'e'
- )
- {
- $number = undef;
- $type = $input_type;
- return ( $i, $type, $number );
- }
-
- # Found a number; now we must convert back from character position
- # to pre_token index. An error here implies user syntax error.
- # An example would be an invalid octal number like '009'.
- my $error;
- ( $i, $error ) =
- inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
- if ($error) { warning("Possibly invalid number\n") }
-
- return ( $i, $type, $number );
-}
-
-sub inverse_pretoken_map {
-
- # Starting with the current pre_token index $i, scan forward until
- # finding the index of the next pre_token whose position is $pos.
- my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
- my $error = 0;
-
- while ( ++$i <= $max_token_index ) {
-
- if ( $pos <= $rtoken_map->[$i] ) {
-
- # Let the calling routine handle errors in which we do not
- # land on a pre-token boundary. It can happen by running
- # perltidy on some non-perl scripts, for example.
- if ( $pos < $rtoken_map->[$i] ) { $error = 1 }
- $i--;
- last;
- }
- }
- return ( $i, $error );
-}
-
-sub find_here_doc {
-
- # find the target of a here document, if any
- # input parameters:
- # $i - token index of the second < of <<
- # ($i must be less than the last token index if this is called)
- # output parameters:
- # $found_target = 0 didn't find target; =1 found target
- # HERE_TARGET - the target string (may be empty string)
- # $i - unchanged if not here doc,
- # or index of the last token of the here target
- # $saw_error - flag noting unbalanced quote on here target
- my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
- my $ibeg = $i;
- my $found_target = 0;
- my $here_doc_target = '';
- my $here_quote_character = '';
- my $saw_error = 0;
- my ( $next_nonblank_token, $i_next_nonblank, $next_token );
- $next_token = $rtokens->[ $i + 1 ];
-
- # perl allows a backslash before the target string (heredoc.t)
- my $backslash = 0;
- if ( $next_token eq '\\' ) {
- $backslash = 1;
- $next_token = $rtokens->[ $i + 2 ];
- }
-
- ( $next_nonblank_token, $i_next_nonblank ) =
- find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
-
- if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
-
- my $in_quote = 1;
- my $quote_depth = 0;
- my $quote_pos = 0;
- my $quoted_string;
-
- (
- $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
- $quoted_string
- )
- = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
- $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
-
- if ($in_quote) { # didn't find end of quote, so no target found
- $i = $ibeg;
- if ( $expecting == TERM ) {
- warning(
-"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
- );
- $saw_error = 1;
- }
- }
- else { # found ending quote
- ##my $j;
- $found_target = 1;
-
- my $tokj;
- foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) {
- $tokj = $rtokens->[$j];
-
- # we have to remove any backslash before the quote character
- # so that the here-doc-target exactly matches this string
- next
- if ( $tokj eq "\\"
- && $j < $i - 1
- && $rtokens->[ $j + 1 ] eq $here_quote_character );
- $here_doc_target .= $tokj;
- }
- }
- }
-
- elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
- $found_target = 1;
- write_logfile_entry(
- "found blank here-target after <<; suggest using \"\"\n");
- $i = $ibeg;
- }
- elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
-
- my $here_doc_expected;
- if ( $expecting == UNKNOWN ) {
- $here_doc_expected = guess_if_here_doc($next_token);
- }
- else {
- $here_doc_expected = 1;
- }
-
- if ($here_doc_expected) {
- $found_target = 1;
- $here_doc_target = $next_token;
- $i = $ibeg + 1;
- }
-
- }
- else {
-
- if ( $expecting == TERM ) {
- $found_target = 1;
- write_logfile_entry("Note: bare here-doc operator <<\n");
- }
- else {
- $i = $ibeg;
- }
- }
-
- # patch to neglect any prepended backslash
- if ( $found_target && $backslash ) { $i++ }
-
- return ( $found_target, $here_doc_target, $here_quote_character, $i,
- $saw_error );
-}
-
-sub do_quote {
-
- # follow (or continue following) quoted string(s)
- # $in_quote return code:
- # 0 - ok, found end
- # 1 - still must find end of quote whose target is $quote_character
- # 2 - still looking for end of first of two quotes
- #
- # Returns updated strings:
- # $quoted_string_1 = quoted string seen while in_quote=1
- # $quoted_string_2 = quoted string seen while in_quote=2
- my (
- $i, $in_quote, $quote_character,
- $quote_pos, $quote_depth, $quoted_string_1,
- $quoted_string_2, $rtokens, $rtoken_map,
- $max_token_index
- ) = @_;
-
- my $in_quote_starting = $in_quote;
-
- my $quoted_string;
- if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
- my $ibeg = $i;
- (
- $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
- $quoted_string
- )
- = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
- $quote_pos, $quote_depth, $max_token_index );
- $quoted_string_2 .= $quoted_string;
- if ( $in_quote == 1 ) {
- if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
- $quote_character = '';
- }
- else {
- $quoted_string_2 .= "\n";
- }
- }
-
- if ( $in_quote == 1 ) { # one (more) quote to follow
- my $ibeg = $i;
- (
- $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
- $quoted_string
- )
- = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
- $quote_pos, $quote_depth, $max_token_index );
- $quoted_string_1 .= $quoted_string;
- if ( $in_quote == 1 ) {
- $quoted_string_1 .= "\n";
- }
- }
- return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
- $quoted_string_1, $quoted_string_2 );
-}
-
-sub follow_quoted_string {
-
- # scan for a specific token, skipping escaped characters
- # if the quote character is blank, use the first non-blank character
- # input parameters:
- # $rtokens = reference to the array of tokens
- # $i = the token index of the first character to search
- # $in_quote = number of quoted strings being followed
- # $beginning_tok = the starting quote character
- # $quote_pos = index to check next for alphanumeric delimiter
- # output parameters:
- # $i = the token index of the ending quote character
- # $in_quote = decremented if found end, unchanged if not
- # $beginning_tok = the starting quote character
- # $quote_pos = index to check next for alphanumeric delimiter
- # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
- # $quoted_string = the text of the quote (without quotation tokens)
- my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
- $max_token_index )
- = @_;
- my ( $tok, $end_tok );
- my $i = $i_beg - 1;
- my $quoted_string = "";
-
- TOKENIZER_DEBUG_FLAG_QUOTE && do {
- print STDOUT
-"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
- };
-
- # get the corresponding end token
- if ( $beginning_tok !~ /^\s*$/ ) {
- $end_tok = matching_end_token($beginning_tok);
- }
-
- # a blank token means we must find and use the first non-blank one
- else {
- my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
-
- while ( $i < $max_token_index ) {
- $tok = $rtokens->[ ++$i ];
-
- if ( $tok !~ /^\s*$/ ) {
-
- if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
- $i = $max_token_index;
- }
- else {
-
- if ( length($tok) > 1 ) {
- if ( $quote_pos <= 0 ) { $quote_pos = 1 }
- $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
- }
- else {
- $beginning_tok = $tok;
- $quote_pos = 0;
- }
- $end_tok = matching_end_token($beginning_tok);
- $quote_depth = 1;
- last;
- }
- }
- else {
- $allow_quote_comments = 1;
- }
- }
- }
-
- # There are two different loops which search for the ending quote
- # character. In the rare case of an alphanumeric quote delimiter, we
- # have to look through alphanumeric tokens character-by-character, since
- # the pre-tokenization process combines multiple alphanumeric
- # characters, whereas for a non-alphanumeric delimiter, only tokens of
- # length 1 can match.
-
- ###################################################################
- # Case 1 (rare): loop for case of alphanumeric quote delimiter..
- # "quote_pos" is the position the current word to begin searching
- ###################################################################
- if ( $beginning_tok =~ /\w/ ) {
-
- # Note this because it is not recommended practice except
- # for obfuscated perl contests
- if ( $in_quote == 1 ) {
- write_logfile_entry(
- "Note: alphanumeric quote delimiter ($beginning_tok) \n");
- }
-
- while ( $i < $max_token_index ) {
-
- if ( $quote_pos == 0 || ( $i < 0 ) ) {
- $tok = $rtokens->[ ++$i ];
-
- if ( $tok eq '\\' ) {
-
- # retain backslash unless it hides the end token
- $quoted_string .= $tok
- unless $rtokens->[ $i + 1 ] eq $end_tok;
- $quote_pos++;
- last if ( $i >= $max_token_index );
- $tok = $rtokens->[ ++$i ];
- }
- }
- my $old_pos = $quote_pos;
-
- unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
- {
-
- }
- $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
-
- if ( $quote_pos > 0 ) {
-
- $quoted_string .=
- substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
-
- $quote_depth--;
-
- if ( $quote_depth == 0 ) {
- $in_quote--;
- last;
- }
- }
- else {
- $quoted_string .= substr( $tok, $old_pos );
- }
- }
- }
-
- ########################################################################
- # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
- ########################################################################
- else {
-
- while ( $i < $max_token_index ) {
- $tok = $rtokens->[ ++$i ];
-
- if ( $tok eq $end_tok ) {
- $quote_depth--;
-
- if ( $quote_depth == 0 ) {
- $in_quote--;
- last;
- }
- }
- elsif ( $tok eq $beginning_tok ) {
- $quote_depth++;
- }
- elsif ( $tok eq '\\' ) {
-
- # retain backslash unless it hides the beginning or end token
- $tok = $rtokens->[ ++$i ];
- $quoted_string .= '\\'
- unless ( $tok eq $end_tok || $tok eq $beginning_tok );
- }
- $quoted_string .= $tok;
- }
- }
- if ( $i > $max_token_index ) { $i = $max_token_index }
- return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
- $quoted_string );
-}
-
-sub indicate_error {
- my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
- interrupt_logfile();
- warning($msg);
- write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
- resume_logfile();
- return;
-}
-
-sub write_error_indicator_pair {
- my ( $line_number, $input_line, $pos, $carrat ) = @_;
- my ( $offset, $numbered_line, $underline ) =
- make_numbered_line( $line_number, $input_line, $pos );
- $underline = write_on_underline( $underline, $pos - $offset, $carrat );
- warning( $numbered_line . "\n" );
- $underline =~ s/\s*$//;
- warning( $underline . "\n" );
- return;
-}
-
-sub make_numbered_line {
-
- # Given an input line, its line number, and a character position of
- # interest, create a string not longer than 80 characters of the form
- # $lineno: sub_string
- # such that the sub_string of $str contains the position of interest
- #
- # Here is an example of what we want, in this case we add trailing
- # '...' because the line is long.
- #
- # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
- #
- # Here is another example, this time in which we used leading '...'
- # because of excessive length:
- #
- # 2: ... er of the World Wide Web Consortium's
- #
- # input parameters are:
- # $lineno = line number
- # $str = the text of the line
- # $pos = position of interest (the error) : 0 = first character
- #
- # We return :
- # - $offset = an offset which corrects the position in case we only
- # display part of a line, such that $pos-$offset is the effective
- # position from the start of the displayed line.
- # - $numbered_line = the numbered line as above,
- # - $underline = a blank 'underline' which is all spaces with the same
- # number of characters as the numbered line.
-
- my ( $lineno, $str, $pos ) = @_;
- my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
- my $excess = length($str) - $offset - 68;
- my $numc = ( $excess > 0 ) ? 68 : undef;
-
- if ( defined($numc) ) {
- if ( $offset == 0 ) {
- $str = substr( $str, $offset, $numc - 4 ) . " ...";
- }
- else {
- $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
- }
- }
- else {
-
- if ( $offset == 0 ) {
- }
- else {
- $str = "... " . substr( $str, $offset + 4 );
- }
- }
-
- my $numbered_line = sprintf( "%d: ", $lineno );
- $offset -= length($numbered_line);
- $numbered_line .= $str;
- my $underline = " " x length($numbered_line);
- return ( $offset, $numbered_line, $underline );
-}
-
-sub write_on_underline {
-
- # The "underline" is a string that shows where an error is; it starts
- # out as a string of blanks with the same length as the numbered line of
- # code above it, and we have to add marking to show where an error is.
- # In the example below, we want to write the string '--^' just below
- # the line of bad code:
- #
- # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
- # ---^
- # We are given the current underline string, plus a position and a
- # string to write on it.
- #
- # In the above example, there will be 2 calls to do this:
- # First call: $pos=19, pos_chr=^
- # Second call: $pos=16, pos_chr=---
- #
- # This is a trivial thing to do with substr, but there is some
- # checking to do.
-
- my ( $underline, $pos, $pos_chr ) = @_;
-
- # check for error..shouldn't happen
- unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
- return $underline;
- }
- my $excess = length($pos_chr) + $pos - length($underline);
- if ( $excess > 0 ) {
- $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
- }
- substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
- return ($underline);
-}
-
-sub pre_tokenize {
-
- # Break a string, $str, into a sequence of preliminary tokens. We
- # are interested in these types of tokens:
- # words (type='w'), example: 'max_tokens_wanted'
- # digits (type = 'd'), example: '0755'
- # whitespace (type = 'b'), example: ' '
- # any other single character (i.e. punct; type = the character itself).
- # We cannot do better than this yet because we might be in a quoted
- # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
- # tokens.
- my ( $str, $max_tokens_wanted ) = @_;
-
- # we return references to these 3 arrays:
- my @tokens = (); # array of the tokens themselves
- my @token_map = (0); # string position of start of each token
- my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
-
- do {
-
- # whitespace
- if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
-
- # numbers
- # note that this must come before words!
- elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
-
- # words
- elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
-
- # single-character punctuation
- elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
-
- # that's all..
- else {
- return ( \@tokens, \@token_map, \@type );
- }
-
- push @tokens, $1;
- push @token_map, pos($str);
-
- } while ( --$max_tokens_wanted != 0 );
-
- return ( \@tokens, \@token_map, \@type );
-}
-
-sub show_tokens {
-
- # this is an old debug routine
- # not called, but saved for reference
- my ( $rtokens, $rtoken_map ) = @_;
- my $num = scalar( @{$rtokens} );
-
- foreach my $i ( 0 .. $num - 1 ) {
- my $len = length( $rtokens->[$i] );
- print STDOUT "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n";
- }
- return;
-}
-
-{
- my %matching_end_token;
-
- BEGIN {
- %matching_end_token = (
- '{' => '}',
- '(' => ')',
- '[' => ']',
- '<' => '>',
- );
- }
-
- sub matching_end_token {
-
- # return closing character for a pattern
- my $beginning_token = shift;
- if ( $matching_end_token{$beginning_token} ) {
- return $matching_end_token{$beginning_token};
- }
- return ($beginning_token);
- }
-}
-
-sub dump_token_types {
- my ( $class, $fh ) = @_;
-
- # This should be the latest list of token types in use
- # adding NEW_TOKENS: add a comment here
- print $fh <<'END_OF_LIST';
-
-Here is a list of the token types currently used for lines of type 'CODE'.
-For the following tokens, the "type" of a token is just the token itself.
-
-.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
-( ) <= >= == =~ !~ != ++ -- /= x=
-... **= <<= >>= &&= ||= //= <=>
-, + - / * | % ! x ~ = \ ? : . < > ^ &
-
-The following additional token types are defined:
-
- type meaning
- b blank (white space)
- { indent: opening structural curly brace or square bracket or paren
- (code block, anonymous hash reference, or anonymous array reference)
- } outdent: right structural curly brace or square bracket or paren
- [ left non-structural square bracket (enclosing an array index)
- ] right non-structural square bracket
- ( left non-structural paren (all but a list right of an =)
- ) right non-structural paren
- L left non-structural curly brace (enclosing a key)
- R right non-structural curly brace
- ; terminal semicolon
- f indicates a semicolon in a "for" statement
- h here_doc operator <<
- # a comment
- Q indicates a quote or pattern
- q indicates a qw quote block
- k a perl keyword
- C user-defined constant or constant function (with void prototype = ())
- U user-defined function taking parameters
- G user-defined function taking block parameter (like grep/map/eval)
- M (unused, but reserved for subroutine definition name)
- P (unused, but -html uses it to label pod text)
- t type indicater such as %,$,@,*,&,sub
- w bare word (perhaps a subroutine call)
- i identifier of some type (with leading %, $, @, *, &, sub, -> )
- n a number
- v a v-string
- F a file test operator (like -e)
- Y File handle
- Z identifier in indirect object slot: may be file handle, object
- J LABEL: code block label
- j LABEL after next, last, redo, goto
- p unary +
- m unary -
- pp pre-increment operator ++
- mm pre-decrement operator --
- A : used as attribute separator
-
- Here are the '_line_type' codes used internally:
- SYSTEM - system-specific code before hash-bang line
- CODE - line of perl code (including comments)
- POD_START - line starting pod, such as '=head'
- POD - pod documentation text
- POD_END - last line of pod section, '=cut'
- HERE - text of here-document
- HERE_END - last line of here-doc (target word)
- FORMAT - format section
- FORMAT_END - last line of format section, '.'
- DATA_START - __DATA__ line
- DATA - unidentified text following __DATA__
- END_START - __END__ line
- END - unidentified text following __END__
- ERROR - we are in big trouble, probably not a perl script
-END_OF_LIST
-
- return;
-}
-
-BEGIN {
-
- # These names are used in error messages
- @opening_brace_names = qw# '{' '[' '(' '?' #;
- @closing_brace_names = qw# '}' ']' ')' ':' #;
-
- my @q;
-
- my @digraphs = qw(
- .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
- <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
- );
- @is_digraph{@digraphs} = (1) x scalar(@digraphs);
-
- my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
- @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
-
- my @tetragraphs = qw( <<>> );
- @is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs);
-
- # make a hash of all valid token types for self-checking the tokenizer
- # (adding NEW_TOKENS : select a new character and add to this list)
- my @valid_token_types = qw#
- A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
- { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
- #;
- push( @valid_token_types, @digraphs );
- push( @valid_token_types, @trigraphs );
- push( @valid_token_types, @tetragraphs );
- push( @valid_token_types, ( '#', ',', 'CORE::' ) );
- @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
-
- # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
- my @file_test_operators =
- qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
- @is_file_test_operator{@file_test_operators} =
- (1) x scalar(@file_test_operators);
-
- # these functions have prototypes of the form (&), so when they are
- # followed by a block, that block MAY BE followed by an operator.
- # Smartmatch operator ~~ may be followed by anonymous hash or array ref
- @q = qw( do eval );
- @is_block_operator{@q} = (1) x scalar(@q);
-
- # these functions allow an identifier in the indirect object slot
- @q = qw( print printf sort exec system say);
- @is_indirect_object_taker{@q} = (1) x scalar(@q);
-
- # These tokens may precede a code block
- # patched for SWITCH/CASE/CATCH. Actually these could be removed
- # now and we could let the extended-syntax coding handle them
- @q =
- qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
- unless do while until eval for foreach map grep sort
- switch case given when catch try finally);
- @is_code_block_token{@q} = (1) x scalar(@q);
-
- # I'll build the list of keywords incrementally
- my @Keywords = ();
-
- # keywords and tokens after which a value or pattern is expected,
- # but not an operator. In other words, these should consume terms
- # to their right, or at least they are not expected to be followed
- # immediately by operators.
- my @value_requestor = qw(
- AUTOLOAD
- BEGIN
- CHECK
- DESTROY
- END
- EQ
- GE
- GT
- INIT
- LE
- LT
- NE
- UNITCHECK
- abs
- accept
- alarm
- and
- atan2
- bind
- binmode
- bless
- break
- caller
- chdir
- chmod
- chomp
- chop
- chown
- chr
- chroot
- close
- closedir
- cmp
- connect
- continue
- cos
- crypt
- dbmclose
- dbmopen
- defined
- delete
- die
- dump
- each
- else
- elsif
- eof
- eq
- exec
- exists
- exit
- exp
- fcntl
- fileno
- flock
- for
- foreach
- formline
- ge
- getc
- getgrgid
- getgrnam
- gethostbyaddr
- gethostbyname
- getnetbyaddr
- getnetbyname
- getpeername
- getpgrp
- getpriority
- getprotobyname
- getprotobynumber
- getpwnam
- getpwuid
- getservbyname
- getservbyport
- getsockname
- getsockopt
- glob
- gmtime
- goto
- grep
- gt
- hex
- if
- index
- int
- ioctl
- join
- keys
- kill
- last
- lc
- lcfirst
- le
- length
- link
- listen
- local
- localtime
- lock
- log
- lstat
- lt
- map
- mkdir
- msgctl
- msgget
- msgrcv
- msgsnd
- my
- ne
- next
- no
- not
- oct
- open
- opendir
- or
- ord
- our
- pack
- pipe
- pop
- pos
- print
- printf
- prototype
- push
- quotemeta
- rand
- read
- readdir
- readlink
- readline
- readpipe
- recv
- redo
- ref
- rename
- require
- reset
- return
- reverse
- rewinddir
- rindex
- rmdir
- scalar
- seek
- seekdir
- select
- semctl
- semget
- semop
- send
- sethostent
- setnetent
- setpgrp
- setpriority
- setprotoent
- setservent
- setsockopt
- shift
- shmctl
- shmget
- shmread
- shmwrite
- shutdown
- sin
- sleep
- socket
- socketpair
- sort
- splice
- split
- sprintf
- sqrt
- srand
- stat
- study
- substr
- symlink
- syscall
- sysopen
- sysread
- sysseek
- system
- syswrite
- tell
- telldir
- tie
- tied
- truncate
- uc
- ucfirst
- umask
- undef
- unless
- unlink
- unpack
- unshift
- untie
- until
- use
- utime
- values
- vec
- waitpid
- warn
- while
- write
- xor
-
- switch
- case
- given
- when
- err
- say
-
- catch
- );
-
- # patched above for SWITCH/CASE given/when err say
- # 'err' is a fairly safe addition.
- # TODO: 'default' still needed if appropriate
- # 'use feature' seen, but perltidy works ok without it.
- # Concerned that 'default' could break code.
- push( @Keywords, @value_requestor );
-
- # These are treated the same but are not keywords:
- my @extra_vr = qw(
- constant
- vars
- );
- push( @value_requestor, @extra_vr );
-
- @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
-
- # this list contains keywords which do not look for arguments,
- # so that they might be followed by an operator, or at least
- # not a term.
- my @operator_requestor = qw(
- endgrent
- endhostent
- endnetent
- endprotoent
- endpwent
- endservent
- fork
- getgrent
- gethostent
- getlogin
- getnetent
- getppid
- getprotoent
- getpwent
- getservent
- setgrent
- setpwent
- time
- times
- wait
- wantarray
- );
-
- push( @Keywords, @operator_requestor );
-
- # These are treated the same but are not considered keywords:
- my @extra_or = qw(
- STDERR
- STDIN
- STDOUT
- );
-
- push( @operator_requestor, @extra_or );
-
- @expecting_operator_token{@operator_requestor} =
- (1) x scalar(@operator_requestor);
-
- # these token TYPES expect trailing operator but not a term
- # note: ++ and -- are post-increment and decrement, 'C' = constant
- my @operator_requestor_types = qw( ++ -- C <> q );
- @expecting_operator_types{@operator_requestor_types} =
- (1) x scalar(@operator_requestor_types);
-
- # these token TYPES consume values (terms)
- # note: pp and mm are pre-increment and decrement
- # f=semicolon in for, F=file test operator
- my @value_requestor_type = qw#
- L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
- **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
- <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
- f F pp mm Y p m U J G j >> << ^ t
- ~. ^. |. &. ^.= |.= &.=
- #;
- push( @value_requestor_type, ',' )
- ; # (perl doesn't like a ',' in a qw block)
- @expecting_term_types{@value_requestor_type} =
- (1) x scalar(@value_requestor_type);
-
- # Note: the following valid token types are not assigned here to
- # hashes requesting to be followed by values or terms, but are
- # instead currently hard-coded into sub operator_expected:
- # ) -> :: Q R Z ] b h i k n v w } #
-
- # For simple syntax checking, it is nice to have a list of operators which
- # will really be unhappy if not followed by a term. This includes most
- # of the above...
- %really_want_term = %expecting_term_types;
-
- # with these exceptions...
- delete $really_want_term{'U'}; # user sub, depends on prototype
- delete $really_want_term{'F'}; # file test works on $_ if no following term
- delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
- # let perl do it
-
- @q = qw(q qq qw qx qr s y tr m);
- @is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
-
- # These keywords are handled specially in the tokenizer code:
- my @special_keywords = qw(
- do
- eval
- format
- m
- package
- q
- qq
- qr
- qw
- qx
- s
- sub
- tr
- y
- );
- push( @Keywords, @special_keywords );
-
- # Keywords after which list formatting may be used
- # WARNING: do not include |map|grep|eval or perl may die on
- # syntax errors (map1.t).
- my @keyword_taking_list = qw(
- and
- chmod
- chomp
- chop
- chown
- dbmopen
- die
- elsif
- exec
- fcntl
- for
- foreach
- formline
- getsockopt
- if
- index
- ioctl
- join
- kill
- local
- msgctl
- msgrcv
- msgsnd
- my
- open
- or
- our
- pack
- print
- printf
- push
- read
- readpipe
- recv
- return
- reverse
- rindex
- seek
- select
- semctl
- semget
- send
- setpriority
- setsockopt
- shmctl
- shmget
- shmread
- shmwrite
- socket
- socketpair
- sort
- splice
- split
- sprintf
- substr
- syscall
- sysopen
- sysread
- sysseek
- system
- syswrite
- tie
- unless
- unlink
- unpack
- unshift
- until
- vec
- warn
- while
- given
- when
- );
- @is_keyword_taking_list{@keyword_taking_list} =
- (1) x scalar(@keyword_taking_list);
-
- # These are not used in any way yet
- # my @unused_keywords = qw(
- # __FILE__
- # __LINE__
- # __PACKAGE__
- # );
-
- # The list of keywords was originally extracted from function 'keyword' in
- # perl file toke.c version 5.005.03, using this utility, plus a
- # little editing: (file getkwd.pl):
- # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
- # Add 'get' prefix where necessary, then split into the above lists.
- # This list should be updated as necessary.
- # The list should not contain these special variables:
- # ARGV DATA ENV SIG STDERR STDIN STDOUT
- # __DATA__ __END__
-
- @is_keyword{@Keywords} = (1) x scalar(@Keywords);
-}
-1;