+#
############################################################
#
# perltidy - a perl script indenter and formatter
#
-# Copyright (c) 2000-2007 by Steve Hancock
+# Copyright (c) 2000-2009 by Steve Hancock
# Distributed under the GPL license agreement; see file COPYING
#
# This program is free software; you can redistribute it and/or modify
#
# perltidy Tidy.pm
#
-# Code Contributions:
+# Code Contributions: See ChangeLog.html for a complete history.
# Michael Cartmell supplied code for adaptation to VMS and helped with
# v-strings.
# Hugh S. Myers supplied sub streamhandle and the supporting code to
# Yves Orton supplied coding to help detect Windows versions.
# Axel Rose supplied a patch for MacPerl.
# Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
-# Dan Tyrell sent a patch for binary I/O.
+# Dan Tyrell contributed a patch for binary I/O.
+# Ueli Hugenschmidt contributed a patch for -fpsc
+# Sam Kington supplied a patch to identify the initial indentation of
+# entabbed code.
+# jonathan swartz supplied patches for:
+# * .../ pattern, which looks upwards from directory
+# * --notidy, to be used in directories where we want to avoid
+# accidentally tidying
+# * prefilter and postfilter
+# * iterations option
+#
# Many others have supplied key ideas, suggestions, and bug reports;
# see the CHANGES file.
#
@ISA = qw( Exporter );
@EXPORT = qw( &perltidy );
+use Cwd;
use IO::File;
use File::Basename;
BEGIN {
- ( $VERSION = q($Id: Tidy.pm,v 1.64 2007/05/08 20:01:45 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
+ ( $VERSION = q($Id: Tidy.pm,v 1.74 2010/12/17 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
}
sub streamhandle {
my $test_file = $path . $name;
my ( $test_name, $test_path ) = fileparse($test_file);
return $test_file if ( $test_name eq $name );
- return undef if ( $^O eq 'VMS' );
+ return undef if ( $^O eq 'VMS' );
# this should work at least for Windows and Unix:
$test_file = $path . '/' . $name;
dump_options_category => undef,
dump_options_range => undef,
dump_abbreviations => undef,
+ prefilter => undef,
+ postfilter => undef,
);
# don't overwrite callers ARGV
my $source_stream = $input_hash{'source'};
my $stderr_stream = $input_hash{'stderr'};
my $user_formatter = $input_hash{'formatter'};
+ my $prefilter = $input_hash{'prefilter'};
+ my $postfilter = $input_hash{'postfilter'};
# various dump parameters
my $dump_options_type = $input_hash{'dump_options_type'};
return if ($quit_now);
+ # make printable string of options for this run as possible diagnostic
+ my $readable_options = readable_options( $rOpts, $roption_string );
+
# dump from command line
if ( $rOpts->{'dump-options'} ) {
- dump_options( $rOpts, $roption_string );
+ print STDOUT $readable_options;
exit 1;
}
if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
my $pattern = fileglob_to_re($input_file);
- eval "/$pattern/";
+ ##eval "/$pattern/";
if ( !$@ && opendir( DIR, './' ) ) {
my @files =
grep { /$pattern/ && !-d $_ } readdir(DIR);
$rpending_logfile_message );
next unless ($source_object);
+ # Prefilters and postfilters: The prefilter is a code reference
+ # that will be applied to the source before tidying, and the
+ # postfilter is a code reference to the result before outputting.
+ if ($prefilter) {
+ my $buf = '';
+ while ( my $line = $source_object->get_line() ) {
+ $buf .= $line;
+ }
+ $buf = $prefilter->($buf);
+
+ $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
+ $rpending_logfile_message );
+ }
+
# register this file name with the Diagnostics package
$diagnostics_object->set_input_file($input_file)
if $diagnostics_object;
if ( defined($line_separator) ) { $binmode = 1 }
else { $line_separator = "\n" }
- my $sink_object =
- Perl::Tidy::LineSink->new( $output_file, $tee_file,
- $line_separator, $rOpts, $rpending_logfile_message, $binmode );
+ my ( $sink_object, $postfilter_buffer );
+ if ($postfilter) {
+ $sink_object =
+ Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
+ $line_separator, $rOpts, $rpending_logfile_message,
+ $binmode );
+ }
+ else {
+ $sink_object =
+ Perl::Tidy::LineSink->new( $output_file, $tee_file,
+ $line_separator, $rOpts, $rpending_logfile_message,
+ $binmode );
+ }
#---------------------------------------------------------------
# initialize the error logger
$saw_extrude );
write_logfile_header(
$rOpts, $logger_object, $config_file,
- $rraw_options, $Windows_type
+ $rraw_options, $Windows_type, $readable_options,
);
if ($$rpending_logfile_message) {
$logger_object->write_logfile_entry($$rpending_logfile_message);
Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
}
- #---------------------------------------------------------------
- # create a formatter for this file : html writer or pretty printer
- #---------------------------------------------------------------
+ # loop over iterations
+ my $max_iterations = $rOpts->{'iterations'};
+ my $sink_object_final = $sink_object;
+ for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
+ my $temp_buffer;
- # we have to delete any old formatter because, for safety,
- # the formatter will check to see that there is only one.
- $formatter = undef;
+ # local copies of some debugging objects which get deleted
+ # after first iteration, but will reappear after this loop
+ my $debugger_object = $debugger_object;
+ my $logger_object = $logger_object;
+ my $diagnostics_object = $diagnostics_object;
- if ($user_formatter) {
- $formatter = $user_formatter;
- }
- elsif ( $rOpts->{'format'} eq 'html' ) {
- $formatter =
- Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
- $actual_output_extension, $html_toc_extension,
- $html_src_extension );
- }
- elsif ( $rOpts->{'format'} eq 'tidy' ) {
- $formatter = Perl::Tidy::Formatter->new(
+ # output to temp buffer until last iteration
+ if ( $iter < $max_iterations ) {
+ $sink_object =
+ Perl::Tidy::LineSink->new( \$temp_buffer, $tee_file,
+ $line_separator, $rOpts, $rpending_logfile_message,
+ $binmode );
+ }
+ else {
+ $sink_object = $sink_object_final;
+
+ # terminate some debugging output after first pass
+ # to avoid needless output.
+ $debugger_object = undef;
+ $logger_object = undef;
+ $diagnostics_object = undef;
+ }
+
+ #---------------------------------------------------------------
+ # create a formatter for this file : html writer or pretty printer
+ #---------------------------------------------------------------
+
+ # we have to delete any old formatter because, for safety,
+ # the formatter will check to see that there is only one.
+ $formatter = undef;
+
+ if ($user_formatter) {
+ $formatter = $user_formatter;
+ }
+ elsif ( $rOpts->{'format'} eq 'html' ) {
+ $formatter =
+ Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
+ $actual_output_extension, $html_toc_extension,
+ $html_src_extension );
+ }
+ elsif ( $rOpts->{'format'} eq 'tidy' ) {
+ $formatter = Perl::Tidy::Formatter->new(
+ logger_object => $logger_object,
+ diagnostics_object => $diagnostics_object,
+ sink_object => $sink_object,
+ );
+ }
+ else {
+ die "I don't know how to do -format=$rOpts->{'format'}\n";
+ }
+
+ unless ($formatter) {
+ die
+ "Unable to continue with $rOpts->{'format'} formatting\n";
+ }
+
+ #---------------------------------------------------------------
+ # create the tokenizer for this file
+ #---------------------------------------------------------------
+ $tokenizer = undef; # must destroy old tokenizer
+ $tokenizer = Perl::Tidy::Tokenizer->new(
+ source_object => $source_object,
logger_object => $logger_object,
+ debugger_object => $debugger_object,
diagnostics_object => $diagnostics_object,
- sink_object => $sink_object,
+ starting_level => $rOpts->{'starting-indentation-level'},
+ tabs => $rOpts->{'tabs'},
+ entab_leading_space => $rOpts->{'entab-leading-whitespace'},
+ indent_columns => $rOpts->{'indent-columns'},
+ look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
+ look_for_autoloader => $rOpts->{'look-for-autoloader'},
+ look_for_selfloader => $rOpts->{'look-for-selfloader'},
+ trim_qw => $rOpts->{'trim-qw'},
);
- }
- else {
- die "I don't know how to do -format=$rOpts->{'format'}\n";
- }
- unless ($formatter) {
- die "Unable to continue with $rOpts->{'format'} formatting\n";
- }
+ #---------------------------------------------------------------
+ # now we can do it
+ #---------------------------------------------------------------
+ process_this_file( $tokenizer, $formatter );
- #---------------------------------------------------------------
- # create the tokenizer for this file
- #---------------------------------------------------------------
- $tokenizer = undef; # must destroy old tokenizer
- $tokenizer = Perl::Tidy::Tokenizer->new(
- source_object => $source_object,
- logger_object => $logger_object,
- debugger_object => $debugger_object,
- diagnostics_object => $diagnostics_object,
- starting_level => $rOpts->{'starting-indentation-level'},
- tabs => $rOpts->{'tabs'},
- indent_columns => $rOpts->{'indent-columns'},
- look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
- look_for_autoloader => $rOpts->{'look-for-autoloader'},
- look_for_selfloader => $rOpts->{'look-for-selfloader'},
- trim_qw => $rOpts->{'trim-qw'},
- );
+ #---------------------------------------------------------------
+ # close the input source and report errors
+ #---------------------------------------------------------------
+ $source_object->close_input_file();
- #---------------------------------------------------------------
- # now we can do it
- #---------------------------------------------------------------
- process_this_file( $tokenizer, $formatter );
+ # line source for next iteration (if any) comes from the current
+ # temporary buffer
+ if ( $iter < $max_iterations ) {
+ $source_object =
+ Perl::Tidy::LineSource->new( \$temp_buffer, $rOpts,
+ $rpending_logfile_message );
+ }
- #---------------------------------------------------------------
- # close the input source and report errors
- #---------------------------------------------------------------
- $source_object->close_input_file();
+ } # end loop over iterations
# get file names to use for syntax check
my $ifname = $source_object->get_input_file_copy_name();
$sink_object->close_output_file() if $sink_object;
$debugger_object->close_debug_file() if $debugger_object;
+ if ($postfilter) {
+ my $new_sink =
+ Perl::Tidy::LineSink->new( $output_file, $tee_file,
+ $line_separator, $rOpts, $rpending_logfile_message,
+ $binmode );
+ my $buf = $postfilter->($postfilter_buffer);
+ foreach my $line ( split( "\n", $buf ) ) {
+ $new_sink->write_line($line);
+ }
+ }
+
my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
if ($output_file) {
}
sub write_logfile_header {
- my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) =
- @_;
+ my (
+ $rOpts, $logger_object, $config_file,
+ $rraw_options, $Windows_type, $readable_options
+ ) = @_;
$logger_object->write_logfile_entry(
"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
);
$logger_object->write_logfile_entry(
"------------------------------------\n");
- foreach ( keys %{$rOpts} ) {
- $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" );
- }
+ $logger_object->write_logfile_entry($readable_options);
+
$logger_object->write_logfile_entry(
"------------------------------------\n");
}
npro
recombine!
valign!
+ notidy
);
my $category = 13; # Debugging
$add_option->( 'backup-file-extension', 'bext', '=s' );
$add_option->( 'force-read-binary', 'f', '!' );
$add_option->( 'format', 'fmt', '=s' );
+ $add_option->( 'iterations', 'it', '=i' );
$add_option->( 'logfile', 'log', '!' );
$add_option->( 'logfile-gap', 'g', ':i' );
$add_option->( 'outfile', 'o', '=s' );
$add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
$add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
$add_option->( 'closing-side-comments', 'csc', '!' );
+ $add_option->( 'closing-side-comments-balanced', 'cscb', '!' );
$add_option->( 'format-skipping', 'fs', '!' );
$add_option->( 'format-skipping-begin', 'fsb', '=s' );
$add_option->( 'format-skipping-end', 'fse', '=s' );
$add_option->( 'hanging-side-comments', 'hsc', '!' );
$add_option->( 'indent-block-comments', 'ibc', '!' );
$add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
+ $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
$add_option->( 'minimum-space-to-comment', 'msc', '=i' );
$add_option->( 'outdent-long-comments', 'olc', '!' );
$add_option->( 'outdent-static-block-comments', 'osbc', '!' );
########################################
$category = 5; # Linebreak controls
########################################
- $add_option->( 'add-newlines', 'anl', '!' );
- $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
- $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
- $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
- $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
- $add_option->( 'cuddled-else', 'ce', '!' );
- $add_option->( 'delete-old-newlines', 'dnl', '!' );
- $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
- $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
- $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
- $add_option->( 'opening-paren-right', 'opr', '!' );
- $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
- $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
- $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
- $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
- $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
- $add_option->( 'stack-closing-paren', 'scp', '!' );
- $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
- $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
- $add_option->( 'stack-opening-paren', 'sop', '!' );
- $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
- $add_option->( 'vertical-tightness', 'vt', '=i' );
- $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
- $add_option->( 'want-break-after', 'wba', '=s' );
- $add_option->( 'want-break-before', 'wbb', '=s' );
+ $add_option->( 'add-newlines', 'anl', '!' );
+ $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
+ $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
+ $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
+ $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
+ $add_option->( 'cuddled-else', 'ce', '!' );
+ $add_option->( 'delete-old-newlines', 'dnl', '!' );
+ $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
+ $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
+ $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
+ $add_option->( 'opening-paren-right', 'opr', '!' );
+ $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
+ $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' );
+ $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
+ $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
+ $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
+ $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
+ $add_option->( 'stack-closing-paren', 'scp', '!' );
+ $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
+ $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
+ $add_option->( 'stack-opening-paren', 'sop', '!' );
+ $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
+ $add_option->( 'vertical-tightness', 'vt', '=i' );
+ $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
+ $add_option->( 'want-break-after', 'wba', '=s' );
+ $add_option->( 'want-break-before', 'wbb', '=s' );
+ $add_option->( 'break-after-all-operators', 'baao', '!' );
+ $add_option->( 'break-before-all-operators', 'bbao', '!' );
+ $add_option->( 'keep-interior-semicolons', 'kis', '!' );
########################################
$category = 6; # Controlling list formatting
$add_option->( 'blanks-before-subs', 'bbs', '!' );
$add_option->( 'long-block-line-count', 'lbl', '=i' );
$add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
- $add_option->( 'swallow-optional-blank-lines', 'sob', '!' );
+ $add_option->( 'keep-old-blank-lines', 'kbl', '=i' );
########################################
$category = 9; # Other controls
closing-side-comment-interval=6
closing-side-comment-maximum-text=20
closing-side-comment-else-flag=0
+ closing-side-comments-balanced
closing-paren-indentation=0
closing-brace-indentation=0
closing-square-bracket-indentation=0
hanging-side-comments
indent-block-comments
indent-columns=4
+ iterations=1
+ keep-old-blank-lines=1
long-block-line-count=8
look-for-autoloader
look-for-selfloader
noquiet
noshow-options
nostatic-side-comments
- noswallow-optional-blank-lines
notabs
nowarning-output
outdent-labels
#---------------------------------------------------------------
%expansion = (
%expansion,
- 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
- 'fnl' => [qw(freeze-newlines)],
- 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
- 'fws' => [qw(freeze-whitespace)],
+ 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
+ 'fnl' => [qw(freeze-newlines)],
+ 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
+ 'fws' => [qw(freeze-whitespace)],
+ 'freeze-blank-lines' =>
+ [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
+ 'fbl' => [qw(freeze-blank-lines)],
'indent-only' => [qw(freeze-newlines freeze-whitespace)],
'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
'nooutdent-long-lines' =>
'nhtml' => [qw(format=tidy)],
'tidy' => [qw(format=tidy)],
+ 'swallow-optional-blank-lines' => [qw(kbl=0)],
+ 'noswallow-optional-blank-lines' => [qw(kbl=1)],
+ 'sob' => [qw(kbl=0)],
+ 'nsob' => [qw(kbl=1)],
+
'break-after-comma-arrows' => [qw(cab=0)],
'nobreak-after-comma-arrows' => [qw(cab=1)],
'baa' => [qw(cab=0)],
'mangle' => [
qw(
check-syntax
+ keep-old-blank-lines=0
delete-old-newlines
delete-old-whitespace
delete-semicolons
"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
}
$config_file = $2;
+
+ # resolve <dir>/.../<file>, meaning look upwards from directory
+ if ( defined($config_file) ) {
+ if ( my ( $start_dir, $search_file ) =
+ ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
+ {
+ $start_dir = '.' if !$start_dir;
+ $start_dir = Cwd::realpath($start_dir);
+ if ( my $found_file =
+ find_file_upwards( $start_dir, $search_file ) )
+ {
+ $config_file = $found_file;
+ }
+ }
+ }
unless ( -e $config_file ) {
warn "cannot find file given with -pro=$config_file: $!\n";
$config_file = "";
}
}
+ # check iteration count and quietly fix if necessary:
+ # - iterations option only applies to code beautification mode
+ # - it shouldn't be nessary to use more than about 2 iterations
+ if ( $rOpts->{'format'} ne 'tidy' ) {
+ $rOpts->{'iterations'} = 1;
+ }
+ elsif ( defined( $rOpts->{'iterations'} ) ) {
+ if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
+ elsif ( $rOpts->{'iterations'} > 5 ) { $rOpts->{'iterations'} = 5 }
+ }
+ else {
+ $rOpts->{'iterations'} = 1;
+ }
+
# see if user set a non-negative logfile-gap
if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
$rOpts->{'opening-brace-on-new-line'};
}
- # set shortcut flag if no blanks to be written
- unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) {
- $rOpts->{'swallow-optional-blank-lines'} = 1;
- }
-
if ( $rOpts->{'entab-leading-whitespace'} ) {
if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
warn "-et=n must use a positive integer; ignoring -et\n";
}
}
+sub find_file_upwards {
+ my ( $search_dir, $search_file ) = @_;
+
+ $search_dir =~ s{/+$}{};
+ $search_file =~ s{^/+}{};
+
+ while (1) {
+ my $try_path = "$search_dir/$search_file";
+ if ( -f $try_path ) {
+ return $try_path;
+ }
+ elsif ( $search_dir eq '/' ) {
+ return undef;
+ }
+ else {
+ $search_dir = dirname($search_dir);
+ }
+ }
+}
+
sub expand_command_abbreviations {
# go through @ARGV and expand any abbreviations
}
sub is_unix {
- return ( $^O !~ /win32|dos/i )
+ return
+ ( $^O !~ /win32|dos/i )
&& ( $^O ne 'VMS' )
&& ( $^O ne 'OS2' )
&& ( $^O ne 'MacOS' );
sub find_config_file {
# look for a .perltidyrc configuration file
+ # For Windows also look for a file named perltidy.ini
my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
$rpending_complaint ) = @_;
# look in current directory first
$config_file = ".perltidyrc";
return $config_file if $exists_config_file->($config_file);
+ if ($is_Windows) {
+ $config_file = "perltidy.ini";
+ return $config_file if $exists_config_file->($config_file);
+ }
# Default environment vars.
my @envs = qw(PERLTIDY HOME);
# test ENV as directory:
$config_file = catfile( $ENV{$var}, ".perltidyrc" );
return $config_file if $exists_config_file->($config_file);
+
+ if ($is_Windows) {
+ $config_file = catfile( $ENV{$var}, "perltidy.ini" );
+ return $config_file if $exists_config_file->($config_file);
+ }
}
else {
$$rconfig_file_chatter .= "\n";
Win_Config_Locs( $rpending_complaint, $Windows_type );
# Check All Users directory, if there is one.
+ # i.e. C:\Documents and Settings\User\perltidy.ini
if ($allusers) {
+
$config_file = catfile( $allusers, ".perltidyrc" );
return $config_file if $exists_config_file->($config_file);
+
+ $config_file = catfile( $allusers, "perltidy.ini" );
+ return $config_file if $exists_config_file->($config_file);
}
# Check system directory.
+ # retain old code in case someone has been able to create
+ # a file with a leading period.
$config_file = catfile( $system, ".perltidyrc" );
return $config_file if $exists_config_file->($config_file);
+
+ $config_file = catfile( $system, "perltidy.ini" );
+ return $config_file if $exists_config_file->($config_file);
}
}
foreach (@_) { print STDOUT "$_\n" }
}
-sub dump_options {
+sub readable_options {
- # write the options back out as a valid .perltidyrc file
+ # return options for this run as a string which could be
+ # put in a perltidyrc file
my ( $rOpts, $roption_string ) = @_;
my %Getopt_flags;
- my $rGetopt_flags = \%Getopt_flags;
+ my $rGetopt_flags = \%Getopt_flags;
+ my $readable_options = "# Final parameter set for this run.\n";
+ $readable_options .=
+ "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
foreach my $opt ( @{$roption_string} ) {
my $flag = "";
if ( $opt =~ /(.*)(!|=.*)$/ ) {
$rGetopt_flags->{$opt} = $flag;
}
}
- print STDOUT "# Final parameter set for this run:\n";
foreach my $key ( sort keys %{$rOpts} ) {
my $flag = $rGetopt_flags->{$key};
my $value = $rOpts->{$key};
else {
# shouldn't happen
- print
+ $readable_options .=
"# ERROR in dump_options: unrecognized flag $flag for $key\n";
}
}
- print STDOUT $prefix . $key . $suffix . "\n";
+ $readable_options .= $prefix . $key . $suffix . "\n";
}
+ return $readable_options;
}
sub show_version {
print <<"EOM";
This is perltidy, v$VERSION
-Copyright 2000-2007, Steve Hancock
+Copyright 2000-2010, Steve Hancock
Perltidy is free software and may be copied under the terms of the GNU
General Public License, which is included in the distribution files.
-bbs add blank line before subs and packages
-bbc add blank line before block comments
-bbb add blank line between major blocks
- -sob swallow optional blank lines
+ -kbl=n keep old blank lines? 0=no, 1=some, 2=all
+ -mbl=n maximum consecutive blank lines to output (default=1)
-ce cuddled else; use this style: '} else {'
-dnl delete old newlines (default)
- -mbl=n maximum consecutive blank lines (default=1)
-l=n maximum line length; default n=80
-bl opening brace on new line
-sbl opening sub brace on new line. value of -bl is used if not given.
-wbb=s want break before tokens in string
Following Old Breakpoints
+ -kis keep interior semicolons. Allows multiple statements per line.
-boc break at old comma breaks: turns off all automatic list formatting
-bol break at old logical breakpoints: or, and, ||, && (default)
-bok break at old list keyword breakpoints such as map, sort (default)
-ibc indent block comments (default)
-isbc indent spaced block comments; may indent unless no leading space
-msc=n minimum desired spaces to side comment, default 4
+ -fpsc=n fix position for side comments; default 0;
-csc add or update closing side comments after closing BLOCK brace
-dcsc delete closing side comments created by a -csc command
-cscp=s change closing side comment prefix to be other than '## end'
EOM
}
my $i = $self->[2]++;
- ##my $line = $self->[0]->[$i];
return $self->[0]->[$i];
}
return $line;
}
-sub old_get_line {
- my $self = shift;
- my $line = undef;
- my $fh = $self->{_fh};
- my $fh_copy = $self->{_fh_copy};
- $line = $fh->getline();
- if ( $line && $fh_copy ) { $fh_copy->print($line); }
- return $line;
-}
-
#####################################################################
#
# the Perl::Tidy::LineSink class supplies a write_line method for
my $warning_count = $self->{_warning_count};
my $saw_code_bug = $self->{_saw_code_bug};
- my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
+ my $save_logfile =
+ ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
|| $saw_code_bug == 1
|| $rOpts->{'logfile'};
my $log_file = $self->{_log_file};
if ($fh) {
my $routput_array = $self->{_output_array};
foreach ( @{$routput_array} ) { $fh->print($_) }
- eval { $fh->close() };
+ eval { $fh->close() };
}
}
}
$rOpts_maximum_fields_per_table
$rOpts_maximum_line_length
$rOpts_short_concatenation_item_length
- $rOpts_swallow_optional_blank_lines
+ $rOpts_keep_old_blank_lines
$rOpts_ignore_old_breakpoints
$rOpts_format_skipping
$rOpts_space_function_paren
$rOpts_space_keyword_paren
+ $rOpts_keep_interior_semicolons
$half_maximum_line_length
@is_chain_operator{@_} = (1) x scalar(@_);
# We can remove semicolons after blocks preceded by these keywords
- @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
+ @_ =
+ qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless while until for foreach);
@is_block_without_semicolon{@_} = (1) x scalar(@_);
my $line_type = $line_of_tokens->{_line_type};
my $input_line = $line_of_tokens->{_line_text};
+ if ( $rOpts->{notidy} ) {
+ write_unindented_line($input_line);
+ $last_line_type = $line_type;
+ return;
+ }
+
# _line_type codes are:
# SYSTEM - system-specific code before hash-bang line
# CODE - line of perl code (including comments)
# any other lines of type END or DATA.
if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
- if ( !$skip_line
+ if ( !$skip_line
&& $line_type eq 'POD_START'
- && $last_line_type !~ /^(END|DATA)$/ )
+ # If the previous line is a __DATA__ line (or data
+ # contents, it's not valid to change it at all, no
+ # matter what is in the data
+ && $last_line_type !~ /^(END|DATA(?:_START)?)$/ )
{
want_blank_line();
}
my $spaces_needed =
$gnu_position_predictor - $rOpts_maximum_line_length + 2;
- return if ( $spaces_needed < 0 );
+ return if ( $spaces_needed <= 0 );
# We are over the limit, so try to remove a requested number of
# spaces from leading whitespace. We are only allowed to remove
for ( ; $i <= $max_gnu_item_index ; $i++ ) {
my $old_spaces = $gnu_item_list[$i]->get_SPACES();
- if ( $old_spaces > $deleted_spaces ) {
+ if ( $old_spaces >= $deleted_spaces ) {
$gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
}
}
# implement user break preferences
- foreach my $tok ( split_words( $rOpts->{'want-break-after'} ) ) {
- if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
+ my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+ . : ? && || and or err xor
+ );
+
+ my $break_after = sub {
+ foreach my $tok (@_) {
+ if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
+ }
}
- }
+ };
- foreach my $tok ( split_words( $rOpts->{'want-break-before'} ) ) {
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
+ my $break_before = sub {
+ foreach my $tok (@_) {
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
+ }
}
- }
+ };
+
+ $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
+ $break_before->(@all_operators)
+ if ( $rOpts->{'break-before-all-operators'} );
+
+ $break_after->( split_words( $rOpts->{'want-break-after'} ) );
+ $break_before->( split_words( $rOpts->{'want-break-before'} ) );
# make note if breaks are before certain key types
%want_break_before = ();
- foreach my $tok (
- '=', '.', ',', ':', '?', '&&', '||', 'and',
- 'or', 'err', 'xor', '+', '-', '*', '/',
- )
- {
+ foreach my $tok ( @all_operators, ',' ) {
$want_break_before{$tok} =
$left_bond_strength{$tok} < $right_bond_strength{$tok};
}
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
$rOpts_short_concatenation_item_length =
$rOpts->{'short-concatenation-item-length'};
- $rOpts_swallow_optional_blank_lines =
- $rOpts->{'swallow-optional-blank-lines'};
- $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
- $rOpts_format_skipping = $rOpts->{'format-skipping'};
- $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
- $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
- $half_maximum_line_length = $rOpts_maximum_line_length / 2;
+ $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
+ $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
+ $rOpts_format_skipping = $rOpts->{'format-skipping'};
+ $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
+ $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
+ $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
+ $half_maximum_line_length = $rOpts_maximum_line_length / 2;
# Note that both opening and closing tokens can access the opening
# and closing flags of their container types.
# retain any space after here doc operator ( hereerr.t)
|| ( $typel eq 'h' )
- # FIXME: this needs some further work; extrude.t has test cases
- # it is safest to retain any space after start of ? : operator
- # because of perl's quirky parser.
- # ie, this line will fail if you remove the space after the '?':
- # $b=join $comma ? ',' : ':', @_; # ok
- # $b=join $comma ?',' : ':', @_; # error!
- # but this is ok :)
- # $b=join $comma?',' : ':', @_; # not a problem!
- ## || ($typel eq '?')
-
# be careful with a space around ++ and --, to avoid ambiguity as to
# which token it applies
|| ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
#use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
|| ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
+ # We must be sure that a space between a ? and a quoted string
+ # remains if the space before the ? remains. [Loca.pm, lockarea]
+ # ie,
+ # $b=join $comma ? ',' : ':', @_; # ok
+ # $b=join $comma?',' : ':', @_; # ok!
+ # $b=join $comma ?',' : ':', @_; # error!
+ # Not really required:
+ ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
+
+ # do not remove space between an '&' and a bare word because
+ # it may turn into a function evaluation, like here
+ # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
+ # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
+ || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
+
; # the value of this long logic sequence is the result we want
return $result;
}
}
else { $tightness = $tightness{$last_token} }
+ #=================================================================
+ # Patch for fabrice_bug.pl
+ # We must always avoid spaces around a bare word beginning with ^ as in:
+ # my $before = ${^PREMATCH};
+ # Because all of the following cause an error in perl:
+ # my $before = ${ ^PREMATCH };
+ # my $before = ${ ^PREMATCH};
+ # my $before = ${^PREMATCH };
+ # So if brace tightness flag is -bt=0 we must temporarily reset to bt=1.
+ # Note that here we must set tightness=1 and not 2 so that the closing space
+ # is also avoided (via the $j_tight_closing_paren flag in coding)
+ if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
+
+ #=================================================================
+
if ( $tightness <= 0 ) {
$ws = WS_YES;
}
my $j_here = $j;
++$j_here
if ( $token eq '-'
- && $last_token eq '{'
+ && $last_token eq '{'
&& $$rtoken_type[ $j + 1 ] eq 'w' );
# $j_next is where a closing token should be if
# 'w' and 'i' checks for something like:
# myfun( &myfun( ->myfun(
# -----------------------------------------------------
- elsif (( $last_type =~ /^[wU]$/ )
+ elsif (( $last_type =~ /^[wUG]$/ )
|| ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
{
$ws = WS_NO unless ($rOpts_space_function_paren);
$ci_levels_to_go[$max_index_to_go] = $ci_level;
$mate_index_to_go[$max_index_to_go] = -1;
$matching_token_to_go[$max_index_to_go] = '';
+ $bond_strength_to_go[$max_index_to_go] = 0;
# Note: negative levels are currently retained as a diagnostic so that
# the 'final indentation level' is correctly reported for bad scripts.
# Handle a blank line..
if ( $jmax < 0 ) {
- # For the 'swallow-optional-blank-lines' option, we delete all
+ # If keep-old-blank-lines is zero, we delete all
# old blank lines and let the blank line rules generate any
# needed blanks.
- if ( !$rOpts_swallow_optional_blank_lines ) {
+ if ($rOpts_keep_old_blank_lines) {
flush();
- $file_writer_object->write_blank_code_line();
+ $file_writer_object->write_blank_code_line(
+ $rOpts_keep_old_blank_lines == 2 );
$last_line_leading_type = 'b';
}
$last_line_had_side_comment = 0;
if (
$rOpts->{'indent-block-comments'}
- && ( !$rOpts->{'indent-spaced-block-comments'}
+ && ( !$rOpts->{'indent-spaced-block-comments'}
|| $input_line =~ /^\s+/ )
&& !$is_static_block_comment_without_leading_space
)
# /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
# Examples:
# *VERSION = \'1.01';
- # ( $VERSION ) = '$Revision: 1.64 $ ' =~ /\$Revision:\s+([^\s]+)/;
+ # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
# We will pass such a line straight through without breaking
# it unless -npvl is used
my $is_VERSION_statement = 0;
if (
- !$saw_VERSION_in_this_file
+ !$saw_VERSION_in_this_file
&& $input_line =~ /VERSION/ # quick check to reject most lines
&& $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
)
# make note of something like '$var = s/xxx/yyy/;'
# in case it should have been '$var =~ s/xxx/yyy/;'
if (
- $token =~ /^(s|tr|y|m|\/)/
+ $token =~ /^(s|tr|y|m|\/)/
&& $last_nonblank_token =~ /^(=|==|!=)$/
# precededed by simple scalar
$block_type !~ /^sub/
? $rOpts->{'opening-brace-on-new-line'}
- # use -sbl flag unless this is an anonymous sub block
+ # use -sbl flag for a named sub block
: $block_type !~ /^sub\W*$/
? $rOpts->{'opening-sub-brace-on-new-line'}
- # do not break for anonymous subs
- : 0;
+ # use -asbl flag for an anonymous sub block
+ : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
# Break before an opening '{' ...
if (
# hash (blktype.t, blktype1.t)
&& ( $block_type !~ /^[\{\};]$/ )
+ # patch: and do not add semi-colons for recently
+ # added block types (see tmp/semicolon.t)
+ && ( $block_type !~
+ /^(switch|case|given|when|default)$/ )
+
# it seems best not to add semicolons in these
# special block types: sort|map|grep
&& ( !$is_sort_map_grep{$block_type} )
output_line_to_go()
unless ( $no_internal_newlines
+ || ( $rOpts_keep_interior_semicolons && $j < $jmax )
|| ( $next_nonblank_token eq '}' ) );
}
my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
if ( !defined($lc) ) { $lc = 0 }
- $want_blank = $rOpts->{'blanks-before-blocks'}
+ $want_blank =
+ $rOpts->{'blanks-before-blocks'}
&& $lc >= $rOpts->{'long-block-line-count'}
&& $file_writer_object->get_consecutive_nonblank_lines() >=
$rOpts->{'long-block-line-count'}
# otherwise use multiple lines
else {
- ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break);
+ ( $ri_first, $ri_last, my $colon_count ) =
+ set_continuation_breaks($saw_good_break);
break_all_chain_tokens( $ri_first, $ri_last );
+ break_equals( $ri_first, $ri_last );
+
# now we do a correction step to clean this up a bit
# (The only time we would not do this is for debugging)
if ( $rOpts->{'recombine'} ) {
( $ri_first, $ri_last ) =
recombine_breakpoints( $ri_first, $ri_last );
}
+
+ insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
}
# do corrector step if -lp option is used
$file_writer_object->write_line( $_[0] );
}
+sub undo_ci {
+
+ # Undo continuation indentation in certain sequences
+ # For example, we can undo continuation indation in sort/map/grep chains
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
+ # To align the map/sort/grep keywords like this:
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
+ my ( $ri_first, $ri_last ) = @_;
+ my ( $line_1, $line_2, $lev_last );
+ my $this_line_is_semicolon_terminated;
+ my $max_line = @$ri_first - 1;
+
+ # looking at each line of this batch..
+ # We are looking at leading tokens and looking for a sequence
+ # all at the same level and higher level than enclosing lines.
+ foreach my $line ( 0 .. $max_line ) {
+
+ my $ibeg = $$ri_first[$line];
+ my $lev = $levels_to_go[$ibeg];
+ if ( $line > 0 ) {
+
+ # if we have started a chain..
+ if ($line_1) {
+
+ # see if it continues..
+ if ( $lev == $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
+
+ # chain continues...
+ # check for chain ending at end of a a statement
+ if ( $line == $max_line ) {
+
+ # see of this line ends a statement
+ my $iend = $$ri_last[$line];
+ $this_line_is_semicolon_terminated =
+ $types_to_go[$iend] eq ';'
+
+ # with possible side comment
+ || ( $types_to_go[$iend] eq '#'
+ && $iend - $ibeg >= 2
+ && $types_to_go[ $iend - 2 ] eq ';'
+ && $types_to_go[ $iend - 1 ] eq 'b' );
+ }
+ $line_2 = $line if ($this_line_is_semicolon_terminated);
+ }
+ else {
+
+ # kill chain
+ $line_1 = undef;
+ }
+ }
+ elsif ( $lev < $lev_last ) {
+
+ # chain ends with previous line
+ $line_2 = $line - 1;
+ }
+ elsif ( $lev > $lev_last ) {
+
+ # kill chain
+ $line_1 = undef;
+ }
+
+ # undo the continuation indentation if a chain ends
+ if ( defined($line_2) && defined($line_1) ) {
+ my $continuation_line_count = $line_2 - $line_1 + 1;
+ @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
+ (0) x ($continuation_line_count);
+ @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
+ @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
+ $line_1 = undef;
+ }
+ }
+
+ # not in a chain yet..
+ else {
+
+ # look for start of a new sort/map/grep chain
+ if ( $lev > $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
+ $line_1 = $line;
+ }
+ }
+ }
+ }
+ $lev_last = $lev;
+ }
+}
+
sub undo_lp_ci {
# If there is a single, long parameter within parens, like this:
my $max_line = @$ri_first - 1;
my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
- $tok_next, $has_leading_op_next, $has_leading_op );
+ $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
# looking at each line of this batch..
foreach $line ( 0 .. $max_line - 1 ) {
# see if the next line begins with a logical operator
- $ibeg = $$ri_first[$line];
- $iend = $$ri_last[$line];
- $ibeg_next = $$ri_first[ $line + 1 ];
- $tok_next = $tokens_to_go[$ibeg_next];
- $has_leading_op_next = $is_chain_operator{$tok_next};
+ $ibeg = $$ri_first[$line];
+ $iend = $$ri_last[$line];
+ $ibeg_next = $$ri_first[ $line + 1 ];
+ $tok_next = $tokens_to_go[$ibeg_next];
+ $type_next = $types_to_go[$ibeg_next];
+
+ $has_leading_op_next = ( $tok_next =~ /^\w/ )
+ ? $is_chain_operator{$tok_next} # + - * / : ? && ||
+ : $is_chain_operator{$type_next}; # and, or
+
next unless ($has_leading_op_next);
# next line must not be at lesser depth
# if this is not first line of the batch ...
if ( $line > 0 ) {
- # and we have leading operator
+ # and we have leading operator..
next if $has_leading_op;
- # and ..
+ # Introduce padding if..
# 1. the previous line is at lesser depth, or
# 2. the previous line ends in an assignment
- #
+ # 3. the previous line ends in a 'return'
+ # 4. the previous line ends in a comma
# Example 1: previous line at lesser depth
# if ( ( $Year < 1601 ) # <- we are here but
# || ( $Year > 2899 ) # list has not yet
# : $year % 100 ? 1
# : $year % 400 ? 0
# : 1;
+ #
+ # Example 3: previous line ending in comma:
+ # push @expr,
+ # /test/ ? undef
+ # : eval($_) ? 1
+ # : eval($_) ? 1
+ # : 0;
+
+ # be sure levels agree (do not indent after an indented 'if')
+ next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
+
+ # allow padding on first line after a comma but only if:
+ # (1) this is line 2 and
+ # (2) there are at more than three lines and
+ # (3) lines 3 and 4 have the same leading operator
+ # These rules try to prevent padding within a long
+ # comma-separated list.
+ my $ok_comma;
+ if ( $types_to_go[$iendm] eq ','
+ && $line == 1
+ && $max_line > 2 )
+ {
+ my $ibeg_next_next = $$ri_first[ $line + 2 ];
+ my $tok_next_next = $tokens_to_go[$ibeg_next_next];
+ $ok_comma = $tok_next_next eq $tok_next;
+ }
+
next
unless (
- $is_assignment{ $types_to_go[$iendm] }
+ $is_assignment{ $types_to_go[$iendm] }
+ || $ok_comma
|| ( $nesting_depth_to_go[$ibegm] <
$nesting_depth_to_go[$ibeg] )
+ || ( $types_to_go[$iendm] eq 'k'
+ && $tokens_to_go[$iendm] eq 'return' )
);
# we will add padding before the first token
if ( $types_to_go[$inext_next] eq 'b' ) {
$inext_next++;
}
- my $type = $types_to_go[$ipad];
+ my $type = $types_to_go[$ipad];
+ my $type_next = $types_to_go[ $ipad + 1 ];
# see if there are multiple continuation lines
my $logical_continuation_lines = 1;
$logical_continuation_lines++;
}
}
+
+ # see if leading types match
+ my $types_match = $types_to_go[$inext_next] eq $type;
+ my $matches_without_bang;
+
+ # if first line has leading ! then compare the following token
+ if ( !$types_match && $type eq '!' ) {
+ $types_match = $matches_without_bang =
+ $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
+ }
+
if (
# either we have multiple continuation lines to follow
|| (
# types must match
- $types_to_go[$inext_next] eq $type
+ $types_match
# and keywords must match if keyword
&& !(
my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
$pad_spaces = $length_2 - $length_1;
+ # If the first line has a leading ! and the second does
+ # not, then remove one space to try to align the next
+ # leading characters, which are often the same. For example:
+ # if ( !$ts
+ # || $ts == $self->Holder
+ # || $self->Holder->Type eq "Arena" )
+ #
+ # This usually helps readability, but if there are subsequent
+ # ! operators things will still get messed up. For example:
+ #
+ # if ( !exists $Net::DNS::typesbyname{$qtype}
+ # && exists $Net::DNS::classesbyname{$qtype}
+ # && !exists $Net::DNS::classesbyname{$qclass}
+ # && exists $Net::DNS::typesbyname{$qclass} )
+ # We can't fix that.
+ if ($matches_without_bang) { $pad_spaces-- }
+
# make sure this won't change if -lp is used
my $indentation_1 = $leading_spaces_to_go[$ibeg];
if ( ref($indentation_1) ) {
# we might be able to handle a pad of -1 by removing a blank
# token
if ( $pad_spaces < 0 ) {
+
if ( $pad_spaces == -1 ) {
if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
$tokens_to_go[ $ipad - 1 ] = '';
# now apply any padding for alignment
if ( $ipad >= 0 && $pad_spaces ) {
+
my $length_t = total_line_length( $ibeg, $iend );
if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
$tokens_to_go[$ipad] =
return $csc_text;
}
+{ # sub balance_csc_text
+
+ my %matching_char;
+
+ BEGIN {
+ %matching_char = (
+ '{' => '}',
+ '(' => ')',
+ '[' => ']',
+ '}' => '{',
+ ')' => '(',
+ ']' => '[',
+ );
+ }
+
+ sub balance_csc_text {
+
+ # Append characters to balance a closing side comment so that editors
+ # such as vim can correctly jump through code.
+ # Simple Example:
+ # input = ## end foreach my $foo ( sort { $b ...
+ # output = ## end foreach my $foo ( sort { $b ...})
+
+ # NOTE: This routine does not currently filter out structures within
+ # quoted text because the bounce algorithims in text editors do not
+ # necessarily do this either (a version of vim was checked and
+ # did not do this).
+
+ # Some complex examples which will cause trouble for some editors:
+ # while ( $mask_string =~ /\{[^{]*?\}/g ) {
+ # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
+ # if ( $1 eq '{' ) {
+ # test file test1/braces.pl has many such examples.
+
+ my ($csc) = @_;
+
+ # loop to examine characters one-by-one, RIGHT to LEFT and
+ # build a balancing ending, LEFT to RIGHT.
+ for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
+
+ my $char = substr( $csc, $pos, 1 );
+
+ # ignore everything except structural characters
+ next unless ( $matching_char{$char} );
+
+ # pop most recently appended character
+ my $top = chop($csc);
+
+ # push it back plus the mate to the newest character
+ # unless they balance each other.
+ $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
+ }
+
+ # return the balanced string
+ return $csc;
+ }
+}
+
sub add_closing_side_comment {
# add closing side comments after closing block braces if -csc used
if ( $i_block_leading_text == $i_terminal ) {
$token .= $block_leading_text;
}
+
+ $token = balance_csc_text($token)
+ if $rOpts->{'closing-side-comments-balanced'};
+
$token =~ s/\s*$//; # trim any trailing whitespace
# handle case of existing closing side comment
if ( $rOpts->{'closing-side-comment-warnings'} ) {
my $old_csc = $tokens_to_go[$max_index_to_go];
my $new_csc = $token;
- $new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
- my $new_trailing_dots = $1;
- $old_csc =~ s/\.\.\.\s*$//;
$new_csc =~ s/\s+//g; # trim all whitespace
- $old_csc =~ s/\s+//g;
+ $old_csc =~ s/\s+//g; # trim all whitespace
+ $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
+ my $new_trailing_dots = $1;
+ $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
# Patch to handle multiple closing side comments at
# else and elsif's. These have become too complicated
}
sub previous_nonblank_token {
- my ($i) = @_;
- if ( $i <= 0 ) {
- return "";
- }
- elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
- return $tokens_to_go[ $i - 1 ];
- }
- elsif ( $i > 1 ) {
- return $tokens_to_go[ $i - 2 ];
- }
- else {
- return "";
+ my ($i) = @_;
+ my $name = "";
+ my $im = $i - 1;
+ return "" if ( $im < 0 );
+ if ( $types_to_go[$im] eq 'b' ) { $im--; }
+ return "" if ( $im < 0 );
+ $name = $tokens_to_go[$im];
+
+ # prepend any sub name to an isolated -> to avoid unwanted alignments
+ # [test case is test8/penco.pl]
+ if ( $name eq '->' ) {
+ $im--;
+ if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
+ $name = $tokens_to_go[$im] . $name;
+ }
}
+ return $name;
}
sub send_lines_to_vertical_aligner {
Perl::Tidy::VerticalAligner::flush();
}
+ undo_ci( $ri_first, $ri_last );
+
set_logical_padding( $ri_first, $ri_last );
# loop to prepare each line for shipment
my $ibeg = $$ri_first[$n];
my $iend = $$ri_last[$n];
- my @patterns = ();
- my @tokens = ();
- my @fields = ();
- my $i_start = $ibeg;
- my $i;
-
- my $depth = 0;
- my @container_name = ("");
- my @multiple_comma_arrows = (undef);
+ my ( $rtokens, $rfields, $rpatterns ) =
+ make_alignment_patterns( $ibeg, $iend );
- my $j = 0; # field index
+ my ( $indentation, $lev, $level_end, $terminal_type,
+ $is_semicolon_terminated, $is_outdented_line )
+ = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
+ $ri_first, $ri_last, $rindentation_list );
- $patterns[0] = "";
- for $i ( $ibeg .. $iend ) {
+ # we will allow outdenting of long lines..
+ my $outdent_long_lines = (
- # Keep track of containers balanced on this line only.
- # These are used below to prevent unwanted cross-line alignments.
- # Unbalanced containers already avoid aligning across
- # container boundaries.
- if ( $tokens_to_go[$i] eq '(' ) {
- my $i_mate = $mate_index_to_go[$i];
- if ( $i_mate > $i && $i_mate <= $iend ) {
- $depth++;
- my $seqno = $type_sequence_to_go[$i];
- my $count = comma_arrow_count($seqno);
- $multiple_comma_arrows[$depth] = $count && $count > 1;
- my $name = previous_nonblank_token($i);
- $name =~ s/^->//;
- $container_name[$depth] = "+" . $name;
- }
- }
- elsif ( $tokens_to_go[$i] eq ')' ) {
- $depth-- if $depth > 0;
- }
+ # which are long quotes, if allowed
+ ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
- # if we find a new synchronization token, we are done with
- # a field
- if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
+ # which are long block comments, if allowed
+ || (
+ $types_to_go[$ibeg] eq '#'
+ && $rOpts->{'outdent-long-comments'}
- my $tok = my $raw_tok = $matching_token_to_go[$i];
+ # but not if this is a static block comment
+ && !$is_static_block_comment
+ )
+ );
+
+ my $level_jump =
+ $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
+
+ my $rvertical_tightness_flags =
+ set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
+ $ri_first, $ri_last );
+
+ # flush an outdented line to avoid any unwanted vertical alignment
+ Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+
+ my $is_terminal_ternary = 0;
+ if ( $tokens_to_go[$ibeg] eq ':'
+ || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
+ {
+ if ( ( $terminal_type eq ';' && $level_end <= $lev )
+ || ( $level_end < $lev ) )
+ {
+ $is_terminal_ternary = 1;
+ }
+ }
+
+ # send this new line down the pipe
+ my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
+ Perl::Tidy::VerticalAligner::append_line(
+ $lev,
+ $level_end,
+ $indentation,
+ $rfields,
+ $rtokens,
+ $rpatterns,
+ $forced_breakpoint_to_go[$iend] || $in_comma_list,
+ $outdent_long_lines,
+ $is_terminal_ternary,
+ $is_semicolon_terminated,
+ $do_not_pad,
+ $rvertical_tightness_flags,
+ $level_jump,
+ );
+ $in_comma_list =
+ $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
+
+ # flush an outdented line to avoid any unwanted vertical alignment
+ Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+
+ $do_not_pad = 0;
+
+ } # end of loop to output each line
+
+ # remember indentation of lines containing opening containers for
+ # later use by sub set_adjusted_indentation
+ save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
+}
+
+{ # begin make_alignment_patterns
+
+ my %block_type_map;
+ my %keyword_map;
+
+ BEGIN {
+
+ # map related block names into a common name to
+ # allow alignment
+ %block_type_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'if',
+ 'default' => 'if',
+ 'case' => 'if',
+ 'sort' => 'map',
+ 'grep' => 'map',
+ );
+
+ # map certain keywords to the same 'if' class to align
+ # long if/elsif sequences. [elsif.pl]
+ %keyword_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'given',
+ 'default' => 'given',
+ 'case' => 'switch',
+
+ # treat an 'undef' similar to numbers and quotes
+ 'undef' => 'Q',
+ );
+ }
+
+ sub make_alignment_patterns {
+
+ # Here we do some important preliminary work for the
+ # vertical aligner. We create three arrays for one
+ # output line. These arrays contain strings that can
+ # be tested by the vertical aligner to see if
+ # consecutive lines can be aligned vertically.
+ #
+ # The three arrays are indexed on the vertical
+ # alignment fields and are:
+ # @tokens - a list of any vertical alignment tokens for this line.
+ # These are tokens, such as '=' '&&' '#' etc which
+ # we want to might align vertically. These are
+ # decorated with various information such as
+ # nesting depth to prevent unwanted vertical
+ # alignment matches.
+ # @fields - the actual text of the line between the vertical alignment
+ # tokens.
+ # @patterns - a modified list of token types, one for each alignment
+ # field. These should normally each match before alignment is
+ # allowed, even when the alignment tokens match.
+ my ( $ibeg, $iend ) = @_;
+ my @tokens = ();
+ my @fields = ();
+ my @patterns = ();
+ my $i_start = $ibeg;
+ my $i;
+
+ my $depth = 0;
+ my @container_name = ("");
+ my @multiple_comma_arrows = (undef);
+
+ my $j = 0; # field index
+
+ $patterns[0] = "";
+ for $i ( $ibeg .. $iend ) {
+
+ # Keep track of containers balanced on this line only.
+ # These are used below to prevent unwanted cross-line alignments.
+ # Unbalanced containers already avoid aligning across
+ # container boundaries.
+ if ( $tokens_to_go[$i] eq '(' ) {
+
+ # if container is balanced on this line...
+ my $i_mate = $mate_index_to_go[$i];
+ if ( $i_mate > $i && $i_mate <= $iend ) {
+ $depth++;
+ my $seqno = $type_sequence_to_go[$i];
+ my $count = comma_arrow_count($seqno);
+ $multiple_comma_arrows[$depth] = $count && $count > 1;
+
+ # Append the previous token name to make the container name
+ # more unique. This name will also be given to any commas
+ # within this container, and it helps avoid undesirable
+ # alignments of different types of containers.
+ my $name = previous_nonblank_token($i);
+ $name =~ s/^->//;
+ $container_name[$depth] = "+" . $name;
+
+ # Make the container name even more unique if necessary.
+ # If we are not vertically aligning this opening paren,
+ # append a character count to avoid bad alignment because
+ # it usually looks bad to align commas within continers
+ # for which the opening parens do not align. Here
+ # is an example very BAD alignment of commas (because
+ # the atan2 functions are not all aligned):
+ # $XY =
+ # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
+ # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
+ # $X * atan2( $X, 1 ) -
+ # $Y * atan2( $Y, 1 );
+ #
+ # On the other hand, it is usually okay to align commas if
+ # opening parens align, such as:
+ # glVertex3d( $cx + $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy + $s * $ys, $z );
+ # glVertex3d( $cx - $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy - $s * $ys, $z );
+ #
+ # To distinguish between these situations, we will
+ # append the length of the line from the previous matching
+ # token, or beginning of line, to the function name. This
+ # will allow the vertical aligner to reject undesirable
+ # matches.
+
+ # if we are not aligning on this paren...
+ if ( $matching_token_to_go[$i] eq '' ) {
+
+ # Sum length from previous alignment, or start of line.
+ # Note that we have to sum token lengths here because
+ # padding has been done and so array $lengths_to_go
+ # is now wrong.
+ my $len =
+ length(
+ join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
+ $len += leading_spaces_to_go($i_start)
+ if ( $i_start == $ibeg );
+
+ # tack length onto the container name to make unique
+ $container_name[$depth] .= "-" . $len;
+ }
+ }
+ }
+ elsif ( $tokens_to_go[$i] eq ')' ) {
+ $depth-- if $depth > 0;
+ }
+
+ # if we find a new synchronization token, we are done with
+ # a field
+ if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
+
+ my $tok = my $raw_tok = $matching_token_to_go[$i];
# make separators in different nesting depths unique
# by appending the nesting depth digit.
$tok .= "$nesting_depth_to_go[$i]";
}
- # do any special decorations for commas to avoid unwanted
- # cross-line alignments.
- if ( $raw_tok eq ',' ) {
+ # also decorate commas with any container name to avoid
+ # unwanted cross-line alignments.
+ if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
if ( $container_name[$depth] ) {
$tok .= $container_name[$depth];
}
}
- # decorate '=>' with:
- # - Nothing if this container is unbalanced on this line.
- # - The previous token if it is balanced and multiple '=>'s
- # - The container name if it is bananced and no other '=>'s
- elsif ( $raw_tok eq '=>' ) {
- if ( $container_name[$depth] ) {
- if ( $multiple_comma_arrows[$depth] ) {
- $tok .= "+" . previous_nonblank_token($i);
- }
- else {
- $tok .= $container_name[$depth];
- }
+ # Patch to avoid aligning leading and trailing if, unless.
+ # Mark trailing if, unless statements with container names.
+ # This makes them different from leading if, unless which
+ # are not so marked at present. If we ever need to name
+ # them too, we could use ci to distinguish them.
+ # Example problem to avoid:
+ # return ( 2, "DBERROR" )
+ # if ( $retval == 2 );
+ # if ( scalar @_ ) {
+ # my ( $a, $b, $c, $d, $e, $f ) = @_;
+ # }
+ if ( $raw_tok eq '(' ) {
+ my $ci = $ci_levels_to_go[$ibeg];
+ if ( $container_name[$depth] =~ /^\+(if|unless)/
+ && $ci )
+ {
+ $tok .= $container_name[$depth];
}
}
+ # Decorate block braces with block types to avoid
+ # unwanted alignments such as the following:
+ # foreach ( @{$routput_array} ) { $fh->print($_) }
+ # eval { $fh->close() };
+ if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
+ my $block_type = $block_type_to_go[$i];
+
+ # map certain related block types to allow
+ # else blocks to align
+ $block_type = $block_type_map{$block_type}
+ if ( defined( $block_type_map{$block_type} ) );
+
+ # remove sub names to allow one-line sub braces to align
+ # regardless of name
+ if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
+
+ # allow all control-type blocks to align
+ if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
+
+ $tok .= $block_type;
+ }
+
# concatenate the text of the consecutive tokens to form
# the field
push( @fields,
if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
$type = 'Q';
+
+ # Patch to ignore leading minus before words,
+ # by changing pattern 'mQ' into just 'Q',
+ # so that we can align things like this:
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
}
}
- # minor patch to make numbers and quotes align
+ # patch to make numbers and quotes align
if ( $type eq 'n' ) { $type = 'Q' }
+ # patch to ignore any ! in patterns
+ if ( $type eq '!' ) { $type = '' }
+
$patterns[$j] .= $type;
}
# for keywords we have to use the actual text
else {
- # map certain keywords to the same 'if' class to align
- # long if/elsif sequences. my testfile: elsif.pl
my $tok = $tokens_to_go[$i];
- if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) {
- $tok = 'if';
- }
+
+ # but map certain keywords to a common string to allow
+ # alignment.
+ $tok = $keyword_map{$tok}
+ if ( defined( $keyword_map{$tok} ) );
$patterns[$j] .= $tok;
}
}
# done with this line .. join text of tokens to make the last field
push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
+ return ( \@tokens, \@fields, \@patterns );
+ }
- my ( $indentation, $lev, $level_end, $terminal_type,
- $is_semicolon_terminated, $is_outdented_line )
- = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
- $ri_first, $ri_last, $rindentation_list );
-
- # we will allow outdenting of long lines..
- my $outdent_long_lines = (
-
- # which are long quotes, if allowed
- ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
-
- # which are long block comments, if allowed
- || (
- $types_to_go[$ibeg] eq '#'
- && $rOpts->{'outdent-long-comments'}
-
- # but not if this is a static block comment
- && !$is_static_block_comment
- )
- );
-
- my $level_jump =
- $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
-
- my $rvertical_tightness_flags =
- set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
- $ri_first, $ri_last );
-
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
-
- my $is_terminal_ternary = 0;
- if ( $tokens_to_go[$ibeg] eq ':'
- || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
- {
- if ( ( $terminal_type eq ';' && $level_end <= $lev )
- || ( $level_end < $lev ) )
- {
- $is_terminal_ternary = 1;
- }
- }
-
- # send this new line down the pipe
- my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
- Perl::Tidy::VerticalAligner::append_line(
- $lev,
- $level_end,
- $indentation,
- \@fields,
- \@tokens,
- \@patterns,
- $forced_breakpoint_to_go[$iend] || $in_comma_list,
- $outdent_long_lines,
- $is_terminal_ternary,
- $is_semicolon_terminated,
- $do_not_pad,
- $rvertical_tightness_flags,
- $level_jump,
- );
- $in_comma_list =
- $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
-
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
-
- $do_not_pad = 0;
-
- } # end of loop to output each line
-
- # remember indentation of lines containing opening containers for
- # later use by sub set_adjusted_indentation
- save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
-}
+} # end make_alignment_patterns
-{ # begin unmatched_indexes
+{ # begin unmatched_indexes
# closure to keep track of unbalanced containers.
# arrays shared by the routines in this block:
# first, see if the opening token is in the current batch
my $i_opening = $mate_index_to_go[$i_closing];
- my ( $indent, $offset );
+ my ( $indent, $offset, $is_leading, $exists );
+ $exists = 1;
if ( $i_opening >= 0 ) {
# it is..look up the indentation
- ( $indent, $offset ) =
+ ( $indent, $offset, $is_leading ) =
lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
$rindentation_list );
}
my $seqno = $type_sequence_to_go[$i_closing];
if ($seqno) {
if ( $saved_opening_indentation{$seqno} ) {
- ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
+ ( $indent, $offset, $is_leading ) =
+ @{ $saved_opening_indentation{$seqno} };
}
# some kind of serious error
# (example is badfile.t)
else {
- $indent = 0;
- $offset = 0;
+ $indent = 0;
+ $offset = 0;
+ $is_leading = 0;
+ $exists = 0;
}
}
# if no sequence number it must be an unbalanced container
else {
- $indent = 0;
- $offset = 0;
+ $indent = 0;
+ $offset = 0;
+ $is_leading = 0;
+ $exists = 0;
}
}
- return ( $indent, $offset );
+ return ( $indent, $offset, $is_leading, $exists );
}
sub lookup_opening_indentation {
$rindentation_list->[0] =
$nline; # save line number to start looking next call
- my $ibeg = $ri_start->[$nline];
- my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
- return ( $rindentation_list->[ $nline + 1 ], $offset );
+ my $ibeg = $ri_start->[$nline];
+ my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
+ my $is_leading = ( $ibeg == $i_opening );
+ return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
}
{
my $adjust_indentation = 0;
my $default_adjust_indentation = $adjust_indentation;
- my ( $opening_indentation, $opening_offset );
+ my (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ );
# if we are at a closing token of some type..
if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
# get the indentation of the line containing the corresponding
# opening token
- ( $opening_indentation, $opening_offset ) =
- get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
$rindentation_list );
# First set the default behavior:
# and 'cuddled parens' of the form: ")->pack("
|| (
- $terminal_type eq '('
+ $terminal_type eq '('
&& $types_to_go[$ibeg] eq ')'
&& ( $nesting_depth_to_go[$iend] + 1 ==
$nesting_depth_to_go[$ibeg] )
}
}
+ # YVES patch 1 of 2:
+ # Undo ci of line with leading closing eval brace,
+ # but not beyond the indention of the line with
+ # the opening brace.
+ if ( $block_type_to_go[$ibeg] eq 'eval'
+ && !$rOpts->{'line-up-parentheses'}
+ && !$rOpts->{'indent-closing-brace'} )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( defined($opening_indentation)
+ && $indentation > $opening_indentation )
+ {
+ $adjust_indentation = 1;
+ }
+ }
+
$default_adjust_indentation = $adjust_indentation;
# Now modify default behavior according to user request:
}
}
+ # if line begins with a ':', align it with any
+ # previous line leading with corresponding ?
+ elsif ( $types_to_go[$ibeg] eq ':' ) {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ if ($is_leading) { $adjust_indentation = 2; }
+ }
+
##########################################################
# Section 2: set indentation according to flag set above
#
# we must treat something like '} else {' as if it were
# an isolated brace my $is_isolated_block_brace = (
# $iend == $ibeg ) && $block_type_to_go[$ibeg];
+ #############################################################
my $is_isolated_block_brace = $block_type_to_go[$ibeg]
&& ( $iend == $ibeg
|| $is_if_elsif_else_unless_while_until_for_foreach{
$block_type_to_go[$ibeg] } );
- #############################################################
- if ( !$is_isolated_block_brace && defined($opening_indentation) ) {
+
+ # only do this for a ':; which is aligned with its leading '?'
+ my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
+ if ( defined($opening_indentation)
+ && !$is_isolated_block_brace
+ && !$is_unaligned_colon )
+ {
if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
$indentation = $opening_indentation;
}
# Check for a last line with isolated opening BLOCK curly
elsif ($rOpts_block_brace_vertical_tightness
- && $ibeg eq $iend
+ && $ibeg eq $iend
&& $types_to_go[$iend] eq '{'
&& $block_type_to_go[$iend] =~
/$block_brace_vertical_tightness_pattern/o )
if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
$alignment_type = ""
unless $vert_last_nonblank_token =~
- /^(if|unless|elsif)$/;
+ /^(if|unless|elsif)$/;
}
# be sure the alignment tokens are unique
# adjust bond strength bias
#-----------------------------------------------------------------
+ # TESTING: add any bias set by sub scan_list at old comma
+ # break points.
+ elsif ( $type eq ',' ) {
+ $bond_str += $bond_strength_to_go[$i];
+ }
+
elsif ( $type eq 'f' ) {
$bond_str += $f_bias;
$f_bias += $delta_bias;
$bond_str = NO_BREAK;
}
+ # Never break between a bareword and a following paren because
+ # perl may give an error. For example, if a break is placed
+ # between 'to_filehandle' and its '(' the following line will
+ # give a syntax error [Carp.pm]: my( $no) =fileno(
+ # to_filehandle( $in)) ;
+ if ( $next_nonblank_token eq '(' ) {
+ $bond_str = NO_BREAK;
+ }
}
# use strict requires that bare word within braces not start new line
$bond_str = NO_BREAK;
}
+ # Breaking before a ++ can cause perl to guess wrong. For
+ # example the following line will cause a syntax error
+ # with -extrude if we break between '$i' and '++' [fixstyle2]
+ # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
+ elsif ( $next_nonblank_type eq '++' ) {
+ $bond_str = NO_BREAK;
+ }
+
+ # Breaking before a ? before a quote can cause trouble if
+ # they are not separated by a blank.
+ # Example: a syntax error occurs if you break before the ? here
+ # my$logic=join$all?' && ':' || ',@regexps;
+ # From: Professional_Perl_Programming_Code/multifind.pl
+ elsif ( $next_nonblank_type eq '?' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
+ }
+
+ # Breaking before a . followed by a number
+ # can cause trouble if there is no intervening space
+ # Example: a syntax error occurs if you break before the .2 here
+ # $str .= pack($endian.2, ensurrogate($ord));
+ # From: perl58/Unicode.pm
+ elsif ( $next_nonblank_type eq '.' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
+ }
+
# patch to put cuddled elses back together when on multiple
# lines, as in: } \n else \n { \n
if ($rOpts_cuddled_else) {
my $dd = shift;
my $bp_count = 0;
my $do_not_break_apart = 0;
- if ( $item_count_stack[$dd] && !$dont_align[$dd] ) {
-
- my $fbc = $forced_breakpoint_count;
-
- # always open comma lists not preceded by keywords,
- # barewords, identifiers (that is, anything that doesn't
- # look like a function call)
- my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
-
- set_comma_breakpoints_do(
- $dd,
- $opening_structure_index_stack[$dd],
- $i,
- $item_count_stack[$dd],
- $identifier_count_stack[$dd],
- $comma_index[$dd],
- $next_nonblank_type,
- $container_type[$dd],
- $interrupted_list[$dd],
- \$do_not_break_apart,
- $must_break_open,
- );
- $bp_count = $forced_breakpoint_count - $fbc;
- $do_not_break_apart = 0 if $must_break_open;
+
+ # anything to do?
+ if ( $item_count_stack[$dd] ) {
+
+ # handle commas not in containers...
+ if ( $dont_align[$dd] ) {
+ do_uncontained_comma_breaks($dd);
+ }
+
+ # handle commas within containers...
+ else {
+ my $fbc = $forced_breakpoint_count;
+
+ # always open comma lists not preceded by keywords,
+ # barewords, identifiers (that is, anything that doesn't
+ # look like a function call)
+ my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
+
+ set_comma_breakpoints_do(
+ $dd,
+ $opening_structure_index_stack[$dd],
+ $i,
+ $item_count_stack[$dd],
+ $identifier_count_stack[$dd],
+ $comma_index[$dd],
+ $next_nonblank_type,
+ $container_type[$dd],
+ $interrupted_list[$dd],
+ \$do_not_break_apart,
+ $must_break_open,
+ );
+ $bp_count = $forced_breakpoint_count - $fbc;
+ $do_not_break_apart = 0 if $must_break_open;
+ }
}
return ( $bp_count, $do_not_break_apart );
}
+ sub do_uncontained_comma_breaks {
+
+ # Handle commas not in containers...
+ # This is a catch-all routine for commas that we
+ # don't know what to do with because the don't fall
+ # within containers. We will bias the bond strength
+ # to break at commas which ended lines in the input
+ # file. This usually works better than just trying
+ # to put as many items on a line as possible. A
+ # downside is that if the input file is garbage it
+ # won't work very well. However, the user can always
+ # prevent following the old breakpoints with the
+ # -iob flag.
+ my $dd = shift;
+ my $bias = -.01;
+ foreach my $ii ( @{ $comma_index[$dd] } ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $bond_strength_to_go[$ii] = $bias;
+
+ # reduce bias magnitude to force breaks in order
+ $bias *= 0.99;
+ }
+ }
+
+ # Also put a break before the first comma if
+ # (1) there was a break there in the input, and
+ # (2) that was exactly one previous break in the input
+ #
+ # For example, we will follow the user and break after
+ # 'print' in this snippet:
+ # print
+ # "conformability (Not the same dimension)\n",
+ # "\t", $have, " is ", text_unit($hu), "\n",
+ # "\t", $want, " is ", text_unit($wu), "\n",
+ # ;
+ my $i_first_comma = $comma_index[$dd]->[0];
+ if ( $old_breakpoint_to_go[$i_first_comma] ) {
+ my $level_comma = $levels_to_go[$i_first_comma];
+ my $ibreak = -1;
+ my $obp_count = 0;
+ for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $obp_count++;
+ last if ( $obp_count > 1 );
+ $ibreak = $ii
+ if ( $levels_to_go[$ii] == $level_comma );
+ }
+ }
+ if ( $ibreak >= 0 && $obp_count == 1 ) {
+ set_forced_breakpoint($ibreak);
+ }
+ }
+ }
+
my %is_logical_container;
BEGIN {
$forced_breakpoint_count );
# update broken-sublist flag of the outer container
- $has_broken_sublist[$depth] = $has_broken_sublist[$depth]
+ $has_broken_sublist[$depth] =
+ $has_broken_sublist[$depth]
|| $has_broken_sublist[$current_depth]
|| $is_long_term
|| $has_comma_breakpoints;
next;
}
- # skip past these commas if we are not supposed to format them
- next if ( $dont_align[$depth] );
-
# break after all commas above starting depth
- if ( $depth < $starting_depth ) {
+ if ( $depth < $starting_depth && !$dont_align[$depth] ) {
set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
next;
}
&& $container_environment_to_go[$i] eq 'BLOCK' )
{
$dont_align[$depth] = 1;
- next;
}
}
if ( $rOpts_line_up_parentheses && !$must_break_open ) {
my $columns_if_unbroken = $rOpts_maximum_line_length -
total_line_length( $i_opening_minus, $i_opening_paren );
- $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken )
+ $need_lp_break_open =
+ ( $max_length[0] > $columns_if_unbroken )
|| ( $max_length[1] > $columns_if_unbroken )
|| ( $first_term_length > $columns_if_unbroken );
}
}
}
-sub recombine_breakpoints {
+{ # begin recombine_breakpoints
- # sub set_continuation_breaks is very liberal in setting line breaks
- # for long lines, always setting breaks at good breakpoints, even
- # when that creates small lines. Occasionally small line fragments
- # are produced which would look better if they were combined.
- # That's the task of this routine, recombine_breakpoints.
- my ( $ri_first, $ri_last ) = @_;
- my $more_to_do = 1;
+ my %is_amp_amp;
+ my %is_ternary;
+ my %is_math_op;
+
+ BEGIN {
+
+ @_ = qw( && || );
+ @is_amp_amp{@_} = (1) x scalar(@_);
- # We keep looping over all of the lines of this batch
- # until there are no more possible recombinations
- my $nmax_last = @$ri_last;
- while ($more_to_do) {
- my $n_best = 0;
- my $bs_best;
- my $n;
- my $nmax = @$ri_last - 1;
+ @_ = qw( ? : );
+ @is_ternary{@_} = (1) x scalar(@_);
+
+ @_ = qw( + - * / );
+ @is_math_op{@_} = (1) x scalar(@_);
+ }
+
+ sub recombine_breakpoints {
+
+ # sub set_continuation_breaks is very liberal in setting line breaks
+ # for long lines, always setting breaks at good breakpoints, even
+ # when that creates small lines. Occasionally small line fragments
+ # are produced which would look better if they were combined.
+ # That's the task of this routine, recombine_breakpoints.
+ #
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $ri_beg, $ri_end ) = @_;
- # safety check for infinite loop
- unless ( $nmax < $nmax_last ) {
+ my $more_to_do = 1;
+
+ # We keep looping over all of the lines of this batch
+ # until there are no more possible recombinations
+ my $nmax_last = @$ri_end;
+ while ($more_to_do) {
+ my $n_best = 0;
+ my $bs_best;
+ my $n;
+ my $nmax = @$ri_end - 1;
+
+ # safety check for infinite loop
+ unless ( $nmax < $nmax_last ) {
# shouldn't happen because splice below decreases nmax on each pass:
# but i get paranoid sometimes
- die "Program bug-infinite loop in recombine breakpoints\n";
- }
- $nmax_last = $nmax;
- $more_to_do = 0;
- my $previous_outdentable_closing_paren;
- my $leading_amp_count = 0;
- my $this_line_is_semicolon_terminated;
+ die "Program bug-infinite loop in recombine breakpoints\n";
+ }
+ $nmax_last = $nmax;
+ $more_to_do = 0;
+ my $previous_outdentable_closing_paren;
+ my $leading_amp_count = 0;
+ my $this_line_is_semicolon_terminated;
- # loop over all remaining lines in this batch
- for $n ( 1 .. $nmax ) {
+ # loop over all remaining lines in this batch
+ for $n ( 1 .. $nmax ) {
- #----------------------------------------------------------
- # If we join the current pair of lines,
- # line $n-1 will become the left part of the joined line
- # line $n will become the right part of the joined line
- #
- # Here are Indexes of the endpoint tokens of the two lines:
- #
- # ---left---- | ---right---
- # $if $imid | $imidr $il
- #
- # We want to decide if we should join tokens $imid to $imidr
- #
- # We will apply a number of ad-hoc tests to see if joining
- # here will look ok. The code will just issue a 'next'
- # command if the join doesn't look good. If we get through
- # the gauntlet of tests, the lines will be recombined.
- #----------------------------------------------------------
- my $if = $$ri_first[ $n - 1 ];
- my $il = $$ri_last[$n];
- my $imid = $$ri_last[ $n - 1 ];
- my $imidr = $$ri_first[$n];
-
- #my $depth_increase=( $nesting_depth_to_go[$imidr] -
- # $nesting_depth_to_go[$if] );
-
-##print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n";
-
- # If line $n is the last line, we set some flags and
- # do any special checks for it
- if ( $n == $nmax ) {
-
- # a terminal '{' should stay where it is
- next if $types_to_go[$imidr] eq '{';
-
- # set flag if statement $n ends in ';'
- $this_line_is_semicolon_terminated = $types_to_go[$il] eq ';'
-
- # with possible side comment
- || ( $types_to_go[$il] eq '#'
- && $il - $imidr >= 2
- && $types_to_go[ $il - 2 ] eq ';'
- && $types_to_go[ $il - 1 ] eq 'b' );
- }
-
- #----------------------------------------------------------
- # Section 1: examine token at $imid (right end of first line
- # of pair)
- #----------------------------------------------------------
-
- # an isolated '}' may join with a ';' terminated segment
- if ( $types_to_go[$imid] eq '}' ) {
-
- # Check for cases where combining a semicolon terminated
- # statement with a previous isolated closing paren will
- # allow the combined line to be outdented. This is
- # generally a good move. For example, we can join up
- # the last two lines here:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # )
- # = stat($file);
+ #----------------------------------------------------------
+ # If we join the current pair of lines,
+ # line $n-1 will become the left part of the joined line
+ # line $n will become the right part of the joined line
#
- # to get:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # ) = stat($file);
+ # Here are Indexes of the endpoint tokens of the two lines:
#
- # which makes the parens line up.
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^
+ # |
+ # We want to decide if we should remove the line break
+ # betwen the tokens at $iend_1 and $ibeg_2
#
- # Another example, from Joe Matarazzo, probably looks best
- # with the 'or' clause appended to the trailing paren:
- # $self->some_method(
- # PARAM1 => 'foo',
- # PARAM2 => 'bar'
- # ) or die "Some_method didn't work";
+ # We will apply a number of ad-hoc tests to see if joining
+ # here will look ok. The code will just issue a 'next'
+ # command if the join doesn't look good. If we get through
+ # the gauntlet of tests, the lines will be recombined.
+ #----------------------------------------------------------
#
- $previous_outdentable_closing_paren =
- $this_line_is_semicolon_terminated # ends in ';'
- && $if == $imid # only one token on last line
- && $tokens_to_go[$imid] eq ')' # must be structural paren
-
- # only &&, ||, and : if no others seen
- # (but note: our count made below could be wrong
- # due to intervening comments)
- && ( $leading_amp_count == 0
- || $types_to_go[$imidr] !~ /^(:|\&\&|\|\|)$/ )
-
- # but leading colons probably line up with with a
- # previous colon or question (count could be wrong).
- && $types_to_go[$imidr] ne ':'
-
- # only one step in depth allowed. this line must not
- # begin with a ')' itself.
- && ( $nesting_depth_to_go[$imid] ==
- $nesting_depth_to_go[$il] + 1 );
+ # beginning and ending tokens of the lines we are working on
+ my $ibeg_1 = $$ri_beg[ $n - 1 ];
+ my $iend_1 = $$ri_end[ $n - 1 ];
+ my $iend_2 = $$ri_end[$n];
+ my $ibeg_2 = $$ri_beg[$n];
+
+ my $ibeg_nmax = $$ri_beg[$nmax];
+
+ # some beginning indexes of other lines, which may not exist
+ my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1;
+ my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1;
+ my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
+
+ my $bs_tweak = 0;
+
+ #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
+ # $nesting_depth_to_go[$ibeg_1] );
+
+##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n";
+
+ # If line $n is the last line, we set some flags and
+ # do any special checks for it
+ if ( $n == $nmax ) {
+
+ # a terminal '{' should stay where it is
+ next if $types_to_go[$ibeg_2] eq '{';
+
+ # set flag if statement $n ends in ';'
+ $this_line_is_semicolon_terminated =
+ $types_to_go[$iend_2] eq ';'
+
+ # with possible side comment
+ || ( $types_to_go[$iend_2] eq '#'
+ && $iend_2 - $ibeg_2 >= 2
+ && $types_to_go[ $iend_2 - 2 ] eq ';'
+ && $types_to_go[ $iend_2 - 1 ] eq 'b' );
+ }
+
+ #----------------------------------------------------------
+ # Section 1: examine token at $iend_1 (right end of first line
+ # of pair)
+ #----------------------------------------------------------
+
+ # an isolated '}' may join with a ';' terminated segment
+ if ( $types_to_go[$iend_1] eq '}' ) {
+
+ # Check for cases where combining a semicolon terminated
+ # statement with a previous isolated closing paren will
+ # allow the combined line to be outdented. This is
+ # generally a good move. For example, we can join up
+ # the last two lines here:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # )
+ # = stat($file);
+ #
+ # to get:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # ) = stat($file);
+ #
+ # which makes the parens line up.
+ #
+ # Another example, from Joe Matarazzo, probably looks best
+ # with the 'or' clause appended to the trailing paren:
+ # $self->some_method(
+ # PARAM1 => 'foo',
+ # PARAM2 => 'bar'
+ # ) or die "Some_method didn't work";
+ #
+ $previous_outdentable_closing_paren =
+ $this_line_is_semicolon_terminated # ends in ';'
+ && $ibeg_1 == $iend_1 # only one token on last line
+ && $tokens_to_go[$iend_1] eq
+ ')' # must be structural paren
+
+ # only &&, ||, and : if no others seen
+ # (but note: our count made below could be wrong
+ # due to intervening comments)
+ && ( $leading_amp_count == 0
+ || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ )
+
+ # but leading colons probably line up with with a
+ # previous colon or question (count could be wrong).
+ && $types_to_go[$ibeg_2] ne ':'
+
+ # only one step in depth allowed. this line must not
+ # begin with a ')' itself.
+ && ( $nesting_depth_to_go[$iend_1] ==
+ $nesting_depth_to_go[$iend_2] + 1 );
+
+ # YVES patch 2 of 2:
+ # Allow cuddled eval chains, like this:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # } or do {
+ # #handle error
+ # };
+ # This patch works together with a patch in
+ # setting adjusted indentation (where the closing eval
+ # brace is outdented if possible).
+ # The problem is that an 'eval' block has continuation
+ # indentation and it looks better to undo it in some
+ # cases. If we do not use this patch we would get:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # }
+ # or do {
+ # #handle error
+ # };
+ # The alternative, for uncuddled style, is to create
+ # a patch in set_adjusted_indentation which undoes
+ # the indentation of a leading line like 'or do {'.
+ # This doesn't work well with -icb through
+ if (
+ $block_type_to_go[$iend_1] eq 'eval'
+ && !$rOpts->{'line-up-parentheses'}
+ && !$rOpts->{'indent-closing-brace'}
+ && $tokens_to_go[$iend_2] eq '{'
+ && (
+ ( $types_to_go[$ibeg_2] =~ /^(|\&\&|\|\|)$/ )
+ || ( $types_to_go[$ibeg_2] eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_2] } )
+ || $is_if_unless{ $tokens_to_go[$ibeg_2] }
+ )
+ )
+ {
+ $previous_outdentable_closing_paren ||= 1;
+ }
- next
- unless (
- $previous_outdentable_closing_paren
+ next
+ unless (
+ $previous_outdentable_closing_paren
- # handle '.' and '?' specially below
- || ( $types_to_go[$imidr] =~ /^[\.\?]$/ )
- );
- }
+ # handle '.' and '?' specially below
+ || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ )
+ );
+ }
- # do not recombine lines with ending &&, ||, or :
- elsif ( $types_to_go[$imid] =~ /^(|:|\&\&|\|\|)$/ ) {
- next unless $want_break_before{ $types_to_go[$imid] };
- }
+ # YVES
+ # honor breaks at opening brace
+ # Added to prevent recombining something like this:
+ # } || eval { package main;
+ elsif ( $types_to_go[$iend_1] eq '{' ) {
+ next if $forced_breakpoint_to_go[$iend_1];
+ }
- # for lines ending in a comma...
- elsif ( $types_to_go[$imid] eq ',' ) {
+ # do not recombine lines with ending &&, ||,
+ elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) {
+ next unless $want_break_before{ $types_to_go[$iend_1] };
+ }
- # an isolated '},' may join with an identifier + ';'
- # this is useful for the class of a 'bless' statement (bless.t)
- if ( $types_to_go[$if] eq '}'
- && $types_to_go[$imidr] eq 'i' )
- {
+ # keep a terminal colon
+ elsif ( $types_to_go[$iend_1] eq ':' ) {
+ next unless $want_break_before{ $types_to_go[$iend_1] };
+ }
+
+ # Identify and recombine a broken ?/: chain
+ elsif ( $types_to_go[$iend_1] eq '?' ) {
+
+ # Do not recombine different levels
next
- unless ( ( $if == ( $imid - 1 ) )
- && ( $il == ( $imidr + 1 ) )
- && $this_line_is_semicolon_terminated );
+ if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
- # override breakpoint
- $forced_breakpoint_to_go[$imid] = 0;
+ # do not recombine unless next line ends in :
+ next unless $types_to_go[$iend_2] eq ':';
}
- # but otherwise, do not recombine unless this will leave
- # just 1 more line
- else {
- next unless ( $n + 1 >= $nmax );
- }
- }
+ # for lines ending in a comma...
+ elsif ( $types_to_go[$iend_1] eq ',' ) {
- # opening paren..
- elsif ( $types_to_go[$imid] eq '(' ) {
+ # Do not recombine at comma which is following the
+ # input bias.
+ # TODO: might be best to make a special flag
+ next if ( $old_breakpoint_to_go[$iend_1] );
- # No longer doing this
- }
+ # an isolated '},' may join with an identifier + ';'
+ # this is useful for the class of a 'bless' statement (bless.t)
+ if ( $types_to_go[$ibeg_1] eq '}'
+ && $types_to_go[$ibeg_2] eq 'i' )
+ {
+ next
+ unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
+ && ( $iend_2 == ( $ibeg_2 + 1 ) )
+ && $this_line_is_semicolon_terminated );
- elsif ( $types_to_go[$imid] eq ')' ) {
+ # override breakpoint
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
- # No longer doing this
- }
+ # but otherwise ..
+ else {
- # keep a terminal colon
- elsif ( $types_to_go[$imid] eq ':' ) {
- next;
- }
+ # do not recombine after a comma unless this will leave
+ # just 1 more line
+ next unless ( $n + 1 >= $nmax );
- # keep a terminal for-semicolon
- elsif ( $types_to_go[$imid] eq 'f' ) {
- next;
- }
+ # do not recombine if there is a change in indentation depth
+ next
+ if (
+ $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
+
+ # do not recombine a "complex expression" after a
+ # comma. "complex" means no parens.
+ my $saw_paren;
+ foreach my $ii ( $ibeg_2 .. $iend_2 ) {
+ if ( $tokens_to_go[$ii] eq '(' ) {
+ $saw_paren = 1;
+ last;
+ }
+ }
+ next if $saw_paren;
+ }
+ }
- # if '=' at end of line ...
- elsif ( $is_assignment{ $types_to_go[$imid] } ) {
-
- my $is_short_quote =
- ( $types_to_go[$imidr] eq 'Q'
- && $imidr == $il
- && length( $tokens_to_go[$imidr] ) <
- $rOpts_short_concatenation_item_length );
- my $ifnmax = $$ri_first[$nmax];
- my $ifnp = ( $nmax > $n ) ? $$ri_first[ $n + 1 ] : $ifnmax;
- my $is_qk =
- ( $types_to_go[$if] eq '?' && $types_to_go[$ifnp] eq ':' );
-
- # always join an isolated '=', a short quote, or if this
- # will put ?/: at start of adjacent lines
- if ( $if != $imid
- && !$is_short_quote
- && !$is_qk )
- {
- next
- unless (
- (
+ # opening paren..
+ elsif ( $types_to_go[$iend_1] eq '(' ) {
- # unless we can reduce this to two lines
- $nmax < $n + 2
+ # No longer doing this
+ }
- # or three lines, the last with a leading semicolon
- || ( $nmax == $n + 2
- && $types_to_go[$ifnmax] eq ';' )
+ elsif ( $types_to_go[$iend_1] eq ')' ) {
- # or the next line ends with a here doc
- || $types_to_go[$il] eq 'h'
- )
+ # No longer doing this
+ }
- # do not recombine if the two lines might align well
- # this is a very approximate test for this
- && $types_to_go[$imidr] ne $types_to_go[$ifnp]
- );
+ # keep a terminal for-semicolon
+ elsif ( $types_to_go[$iend_1] eq 'f' ) {
+ next;
+ }
+
+ # if '=' at end of line ...
+ elsif ( $is_assignment{ $types_to_go[$iend_1] } ) {
+
+ my $is_short_quote =
+ ( $types_to_go[$ibeg_2] eq 'Q'
+ && $ibeg_2 == $iend_2
+ && length( $tokens_to_go[$ibeg_2] ) <
+ $rOpts_short_concatenation_item_length );
+ my $is_ternary =
+ ( $types_to_go[$ibeg_1] eq '?'
+ && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
- # -lp users often prefer this:
- # my $title = function($env, $env, $sysarea,
- # "bubba Borrower Entry");
- # so we will recombine if -lp is used we have ending comma
- if ( !$rOpts_line_up_parentheses
- || $types_to_go[$il] ne ',' )
+ # always join an isolated '=', a short quote, or if this
+ # will put ?/: at start of adjacent lines
+ if ( $ibeg_1 != $iend_1
+ && !$is_short_quote
+ && !$is_ternary )
{
+ next
+ unless (
+ (
- # otherwise, scan the rhs line up to last token for
- # complexity. Note that we are not counting the last
- # token in case it is an opening paren.
- my $tv = 0;
- my $depth = $nesting_depth_to_go[$imidr];
- for ( my $i = $imidr + 1 ; $i < $il ; $i++ ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 1 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
+ # unless we can reduce this to two lines
+ $nmax < $n + 2
+
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == $n + 2
+ && $types_to_go[$ibeg_nmax] eq ';' )
+
+ # or the next line ends with a here doc
+ || $types_to_go[$iend_2] eq 'h'
- # ok to recombine if no level changes before last token
- if ( $tv > 0 ) {
+ # or the next line ends in an open paren or brace
+ # and the break hasn't been forced [dima.t]
+ || ( !$forced_breakpoint_to_go[$iend_1]
+ && $types_to_go[$iend_2] eq '{' )
+ )
+
+ # do not recombine if the two lines might align well
+ # this is a very approximate test for this
+ && ( $ibeg_3 >= 0
+ && $types_to_go[$ibeg_2] ne
+ $types_to_go[$ibeg_3] )
+ );
- # otherwise, do not recombine if more than two
- # level changes.
- next if ( $tv > 1 );
+ # -lp users often prefer this:
+ # my $title = function($env, $env, $sysarea,
+ # "bubba Borrower Entry");
+ # so we will recombine if -lp is used we have ending
+ # comma
+ if ( !$rOpts_line_up_parentheses
+ || $types_to_go[$iend_2] ne ',' )
+ {
- # check total complexity of the two adjacent lines
- # that will occur if we do this join
- my $istop =
- ( $n < $nmax ) ? $$ri_last[ $n + 1 ] : $il;
- for ( my $i = $il ; $i <= $istop ; $i++ ) {
+ # otherwise, scan the rhs line up to last token for
+ # complexity. Note that we are not counting the last
+ # token in case it is an opening paren.
+ my $tv = 0;
+ my $depth = $nesting_depth_to_go[$ibeg_2];
+ for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
if ( $nesting_depth_to_go[$i] != $depth ) {
$tv++;
- last if ( $tv > 2 );
+ last if ( $tv > 1 );
}
$depth = $nesting_depth_to_go[$i];
}
+ # ok to recombine if no level changes before last token
+ if ( $tv > 0 ) {
+
+ # otherwise, do not recombine if more than two
+ # level changes.
+ next if ( $tv > 1 );
+
+ # check total complexity of the two adjacent lines
+ # that will occur if we do this join
+ my $istop =
+ ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
+ for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 2 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
+
# do not recombine if total is more than 2 level changes
- next if ( $tv > 2 );
+ next if ( $tv > 2 );
+ }
}
}
- }
- unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) {
- $forced_breakpoint_to_go[$imid] = 0;
+ unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
}
- }
- # for keywords..
- elsif ( $types_to_go[$imid] eq 'k' ) {
+ # for keywords..
+ elsif ( $types_to_go[$iend_1] eq 'k' ) {
- # make major control keywords stand out
- # (recombine.t)
- next
- if (
+ # make major control keywords stand out
+ # (recombine.t)
+ next
+ if (
- #/^(last|next|redo|return)$/
- $is_last_next_redo_return{ $tokens_to_go[$imid] }
+ #/^(last|next|redo|return)$/
+ $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
- # but only if followed by multiple lines
- && $n < $nmax
- );
+ # but only if followed by multiple lines
+ && $n < $nmax
+ );
- if ( $is_and_or{ $tokens_to_go[$imid] } ) {
- next unless $want_break_before{ $tokens_to_go[$imid] };
+ if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
+ next
+ unless $want_break_before{ $tokens_to_go[$iend_1] };
+ }
}
- }
- # handle trailing + - * /
- elsif ( $types_to_go[$imid] =~ /^[\+\-\*\/]$/ ) {
- my $i_next_nonblank = $imidr;
- my $i_next_next = $i_next_nonblank + 1;
- $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
+ # handle trailing + - * /
+ elsif ( $is_math_op{ $types_to_go[$iend_1] } ) {
- # do not strand numbers
- next
- unless (
- $types_to_go[$i_next_nonblank] eq 'n'
- && (
- $i_next_nonblank == $il
- || ( $i_next_next == $il
- && $types_to_go[$i_next_next] =~ /^[\+\-\*\/]$/ )
+ # combine lines if next line has single number
+ # or a short term followed by same operator
+ my $i_next_nonblank = $ibeg_2;
+ my $i_next_next = $i_next_nonblank + 1;
+ $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
+ my $number_follows = $types_to_go[$i_next_nonblank] eq 'n'
+ && (
+ $i_next_nonblank == $iend_2
+ || ( $i_next_next == $iend_2
+ && $is_math_op{ $types_to_go[$i_next_next] } )
|| $types_to_go[$i_next_next] eq ';'
- )
- );
- }
+ );
- #----------------------------------------------------------
- # Section 2: Now examine token at $imidr (left end of second
- # line of pair)
- #----------------------------------------------------------
-
- # join lines identified above as capable of
- # causing an outdented line with leading closing paren
- if ($previous_outdentable_closing_paren) {
- $forced_breakpoint_to_go[$imid] = 0;
- }
-
- # do not recombine lines with leading &&, ||, or :
- elsif ( $types_to_go[$imidr] =~ /^(:|\&\&|\|\|)$/ ) {
- $leading_amp_count++;
- next if $want_break_before{ $types_to_go[$imidr] };
- }
-
- # Identify and recombine a broken ?/: chain
- elsif ( $types_to_go[$imidr] eq '?' ) {
-
- # indexes of line first tokens --
- # mm - line before previous line
- # f - previous line
- # <-- this line
- # ff - next line
- # fff - line after next
- my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1;
- my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
- my $imm = $n > 1 ? $$ri_first[ $n - 2 ] : -1;
- my $seqno = $type_sequence_to_go[$imidr];
- my $f_ok =
- ( $types_to_go[$if] eq ':'
- && $type_sequence_to_go[$if] ==
- $seqno - TYPE_SEQUENCE_INCREMENT );
- my $mm_ok =
- ( $imm >= 0
- && $types_to_go[$imm] eq ':'
- && $type_sequence_to_go[$imm] ==
- $seqno - 2 * TYPE_SEQUENCE_INCREMENT );
-
- my $ff_ok =
- ( $iff > 0
- && $types_to_go[$iff] eq ':'
- && $type_sequence_to_go[$iff] == $seqno );
- my $fff_ok =
- ( $ifff > 0
- && $types_to_go[$ifff] eq ':'
- && $type_sequence_to_go[$ifff] ==
- $seqno + TYPE_SEQUENCE_INCREMENT );
-
- # we require that this '?' be part of a correct sequence
- # of 3 in a row or else no recombination is done.
- next
- unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) );
- $forced_breakpoint_to_go[$imid] = 0;
- }
+ # find token before last operator of previous line
+ my $iend_1_minus = $iend_1;
+ $iend_1_minus--
+ if ( $iend_1_minus > $ibeg_1 );
+ $iend_1_minus--
+ if ( $types_to_go[$iend_1_minus] eq 'b'
+ && $iend_1_minus > $ibeg_1 );
+
+ my $short_term_follows =
+ ( $types_to_go[$iend_2] eq $types_to_go[$iend_1]
+ && $types_to_go[$iend_1_minus] =~ /^[in]$/
+ && $iend_2 <= $ibeg_2 + 2
+ && length( $tokens_to_go[$ibeg_2] ) <
+ $rOpts_short_concatenation_item_length );
- # do not recombine lines with leading '.'
- elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) {
- my $i_next_nonblank = $imidr + 1;
- if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
- $i_next_nonblank++;
+ next
+ unless ( $number_follows || $short_term_follows );
}
- next
- unless (
+ #----------------------------------------------------------
+ # Section 2: Now examine token at $ibeg_2 (left end of second
+ # line of pair)
+ #----------------------------------------------------------
+
+ # join lines identified above as capable of
+ # causing an outdented line with leading closing paren
+ if ($previous_outdentable_closing_paren) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ # do not recombine lines with leading :
+ elsif ( $types_to_go[$ibeg_2] eq ':' ) {
+ $leading_amp_count++;
+ next if $want_break_before{ $types_to_go[$ibeg_2] };
+ }
+
+ # handle lines with leading &&, ||
+ elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
+
+ $leading_amp_count++;
+
+ # ok to recombine if it follows a ? or :
+ # and is followed by an open paren..
+ my $ok =
+ ( $is_ternary{ $types_to_go[$ibeg_1] }
+ && $tokens_to_go[$iend_2] eq '(' )
+
+ # or is followed by a ? or : at same depth
+ #
+ # We are looking for something like this. We can
+ # recombine the && line with the line above to make the
+ # structure more clear:
+ # return
+ # exists $G->{Attr}->{V}
+ # && exists $G->{Attr}->{V}->{$u}
+ # ? %{ $G->{Attr}->{V}->{$u} }
+ # : ();
+ #
+ # We should probably leave something like this alone:
+ # return
+ # exists $G->{Attr}->{E}
+ # && exists $G->{Attr}->{E}->{$u}
+ # && exists $G->{Attr}->{E}->{$u}->{$v}
+ # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
+ # : ();
+ # so that we either have all of the &&'s (or ||'s)
+ # on one line, as in the first example, or break at
+ # each one as in the second example. However, it
+ # sometimes makes things worse to check for this because
+ # it prevents multiple recombinations. So this is not done.
+ || ( $ibeg_3 >= 0
+ && $is_ternary{ $types_to_go[$ibeg_3] }
+ && $nesting_depth_to_go[$ibeg_3] ==
+ $nesting_depth_to_go[$ibeg_2] );
+
+ next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] };
+ $forced_breakpoint_to_go[$iend_1] = 0;
+
+ # tweak the bond strength to give this joint priority
+ # over ? and :
+ $bs_tweak = 0.25;
+ }
+
+ # Identify and recombine a broken ?/: chain
+ elsif ( $types_to_go[$ibeg_2] eq '?' ) {
+
+ # Do not recombine different levels
+ my $lev = $levels_to_go[$ibeg_2];
+ next if ( $lev ne $levels_to_go[$ibeg_1] );
+
+ # Do not recombine a '?' if either next line or
+ # previous line does not start with a ':'. The reasons
+ # are that (1) no alignment of the ? will be possible
+ # and (2) the expression is somewhat complex, so the
+ # '?' is harder to see in the interior of the line.
+ my $follows_colon =
+ $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':';
+ my $precedes_colon =
+ $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
+ next unless ( $follows_colon || $precedes_colon );
+
+ # we will always combining a ? line following a : line
+ if ( !$follows_colon ) {
+
+ # ...otherwise recombine only if it looks like a chain.
+ # we will just look at a few nearby lines to see if
+ # this looks like a chain.
+ my $local_count = 0;
+ foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
+ $local_count++
+ if $ii >= 0
+ && $types_to_go[$ii] eq ':'
+ && $levels_to_go[$ii] == $lev;
+ }
+ next unless ( $local_count > 1 );
+ }
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ # do not recombine lines with leading '.'
+ elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) {
+ my $i_next_nonblank = $ibeg_2 + 1;
+ if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
+ $i_next_nonblank++;
+ }
+
+ next
+ unless (
# ... unless there is just one and we can reduce
# this to two lines if we do. For example, this
# $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
# . '$args .= $pat;'
- (
- $n == 2
- && $n == $nmax
- && $types_to_go[$if] ne $types_to_go[$imidr]
- )
-
- # ... or this would strand a short quote , like this
- # . "some long qoute"
- # . "\n";
+ (
+ $n == 2
+ && $n == $nmax
+ && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2]
+ )
- || ( $types_to_go[$i_next_nonblank] eq 'Q'
- && $i_next_nonblank >= $il - 1
- && length( $tokens_to_go[$i_next_nonblank] ) <
- $rOpts_short_concatenation_item_length )
- );
- }
+ # ... or this would strand a short quote , like this
+ # . "some long qoute"
+ # . "\n";
+ || ( $types_to_go[$i_next_nonblank] eq 'Q'
+ && $i_next_nonblank >= $iend_2 - 1
+ && length( $tokens_to_go[$i_next_nonblank] ) <
+ $rOpts_short_concatenation_item_length )
+ );
+ }
- # handle leading keyword..
- elsif ( $types_to_go[$imidr] eq 'k' ) {
+ # handle leading keyword..
+ elsif ( $types_to_go[$ibeg_2] eq 'k' ) {
- # handle leading "and" and "or"
- if ( $is_and_or{ $tokens_to_go[$imidr] } ) {
+ # handle leading "or"
+ if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+
+ # following 'if' or 'unless' or 'or'
+ $types_to_go[$ibeg_1] eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+
+ # important: only combine a very simple or
+ # statement because the step below may have
+ # combined a trailing 'and' with this or,
+ # and we do not want to then combine
+ # everything together
+ && ( $iend_2 - $ibeg_2 <= 7 )
+ )
+ );
+ }
- # Decide if we will combine a single terminal 'and' and
- # 'or' after an 'if' or 'unless'. We should consider the
- # possible vertical alignment, and visual clutter.
+ # handle leading 'and'
+ elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
- # This looks best with the 'and' on the same
- # line as the 'if':
- #
- # $a = 1
- # if $seconds and $nu < 2;
- #
- # But this looks better as shown:
- #
- # $a = 1
- # if !$this->{Parents}{$_}
- # or $this->{Parents}{$_} eq $_;
- #
- # Eventually, it would be nice to look for
- # similarities (such as 'this' or 'Parents'), but
- # for now I'm using a simple rule that says that
- # the resulting line length must not be more than
- # half the maximum line length (making it 80/2 =
- # 40 characters by default).
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
+ # Decide if we will combine a single terminal 'and'
+ # after an 'if' or 'unless'.
- # following 'if' or 'unless'
- $types_to_go[$if] eq 'k'
- && $is_if_unless{ $tokens_to_go[$if] }
+ # This looks best with the 'and' on the same
+ # line as the 'if':
+ #
+ # $a = 1
+ # if $seconds and $nu < 2;
+ #
+ # But this looks better as shown:
+ #
+ # $a = 1
+ # if !$this->{Parents}{$_}
+ # or $this->{Parents}{$_} eq $_;
+ #
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
- )
- );
- }
+ # following 'if' or 'unless' or 'or'
+ $types_to_go[$ibeg_1] eq 'k'
+ && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ || $tokens_to_go[$ibeg_1] eq 'or' )
+ )
+ );
+ }
- # handle leading "if" and "unless"
- elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) {
+ # handle leading "if" and "unless"
+ elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
- # FIXME: This is still experimental..may not be too useful
- next
- unless (
- $this_line_is_semicolon_terminated
+ # FIXME: This is still experimental..may not be too useful
+ next
+ unless (
+ $this_line_is_semicolon_terminated
- # previous line begins with 'and' or 'or'
- && $types_to_go[$if] eq 'k'
- && $is_and_or{ $tokens_to_go[$if] }
+ # previous line begins with 'and' or 'or'
+ && $types_to_go[$ibeg_1] eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_1] }
- );
- }
+ );
+ }
- # handle all other leading keywords
- else {
+ # handle all other leading keywords
+ else {
- # keywords look best at start of lines,
- # but combine things like "1 while"
- unless ( $is_assignment{ $types_to_go[$imid] } ) {
- next
- if ( ( $types_to_go[$imid] ne 'k' )
- && ( $tokens_to_go[$imidr] ne 'while' ) );
+ # keywords look best at start of lines,
+ # but combine things like "1 while"
+ unless ( $is_assignment{ $types_to_go[$iend_1] } ) {
+ next
+ if ( ( $types_to_go[$iend_1] ne 'k' )
+ && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
+ }
}
}
- }
- # similar treatment of && and || as above for 'and' and 'or':
- # NOTE: This block of code is currently bypassed because
- # of a previous block but is retained for possible future use.
- elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) {
+ # similar treatment of && and || as above for 'and' and 'or':
+ # NOTE: This block of code is currently bypassed because
+ # of a previous block but is retained for possible future use.
+ elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
- # maybe looking at something like:
- # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
+ # maybe looking at something like:
+ # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
- next
- unless (
- $this_line_is_semicolon_terminated
+ next
+ unless (
+ $this_line_is_semicolon_terminated
- # previous line begins with an 'if' or 'unless' keyword
- && $types_to_go[$if] eq 'k'
- && $is_if_unless{ $tokens_to_go[$if] }
+ # previous line begins with an 'if' or 'unless' keyword
+ && $types_to_go[$ibeg_1] eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
- );
- }
-
- # handle leading + - * /
- elsif ( $types_to_go[$imidr] =~ /^[\+\-\*\/]$/ ) {
- my $i_next_nonblank = $imidr + 1;
- if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
- $i_next_nonblank++;
+ );
}
- my $i_next_next = $i_next_nonblank + 1;
- $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
+ # handle leading + - * /
+ elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) {
+ my $i_next_nonblank = $ibeg_2 + 1;
+ if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
+ $i_next_nonblank++;
+ }
- next
- unless (
+ my $i_next_next = $i_next_nonblank + 1;
+ $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
- # unless there is just one and we can reduce
- # this to two lines if we do. For example, this
- (
- $n == 2
- && $n == $nmax
- && $types_to_go[$if] ne $types_to_go[$imidr]
- )
-
- # do not strand numbers
- || (
+ my $is_number = (
$types_to_go[$i_next_nonblank] eq 'n'
- && ( $i_next_nonblank >= $il - 1
+ && ( $i_next_nonblank >= $iend_2 - 1
|| $types_to_go[$i_next_next] eq ';' )
- )
- );
- }
+ );
- # handle line with leading = or similar
- elsif ( $is_assignment{ $types_to_go[$imidr] } ) {
- next unless $n == 1;
- my $ifnmax = $$ri_first[$nmax];
- next
- unless (
+ my $iend_1_nonblank =
+ $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1;
+ my $iend_2_nonblank =
+ $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2;
+
+ my $is_short_term =
+ ( $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1]
+ && $types_to_go[$iend_2_nonblank] =~ /^[in]$/
+ && $types_to_go[$iend_1_nonblank] =~ /^[in]$/
+ && $iend_2_nonblank <= $ibeg_2 + 2
+ && length( $tokens_to_go[$iend_2_nonblank] ) <
+ $rOpts_short_concatenation_item_length );
+
+ # Combine these lines if this line is a single
+ # number, or if it is a short term with same
+ # operator as the previous line. For example, in
+ # the following code we will combine all of the
+ # short terms $A, $B, $C, $D, $E, $F, together
+ # instead of leaving them one per line:
+ # my $time =
+ # $A * $B * $C * $D * $E * $F *
+ # ( 2. * $eps * $sigma * $area ) *
+ # ( 1. / $tcold**3 - 1. / $thot**3 );
+ # This can be important in math-intensive code.
+ next
+ unless (
+ $is_number
+ || $is_short_term
- # unless we can reduce this to two lines
- $nmax == 2
+ # or if we can reduce this to two lines if we do.
+ || ( $n == 2
+ && $n == $nmax
+ && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] )
+ );
+ }
- # or three lines, the last with a leading semicolon
- || ( $nmax == 3 && $types_to_go[$ifnmax] eq ';' )
+ # handle line with leading = or similar
+ elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) {
+ next unless $n == 1;
+ next
+ unless (
- # or the next line ends with a here doc
- || $types_to_go[$il] eq 'h'
- );
- }
+ # unless we can reduce this to two lines
+ $nmax == 2
- #----------------------------------------------------------
- # Section 3:
- # Combine the lines if we arrive here and it is possible
- #----------------------------------------------------------
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
- # honor hard breakpoints
- next if ( $forced_breakpoint_to_go[$imid] > 0 );
+ # or the next line ends with a here doc
+ || $types_to_go[$iend_2] eq 'h'
+ );
+ }
- my $bs = $bond_strength_to_go[$imid];
+ #----------------------------------------------------------
+ # Section 3:
+ # Combine the lines if we arrive here and it is possible
+ #----------------------------------------------------------
- # combined line cannot be too long
- next
- if excess_line_length( $if, $il ) > 0;
+ # honor hard breakpoints
+ next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
- # do not recombine if we would skip in indentation levels
- if ( $n < $nmax ) {
- my $if_next = $$ri_first[ $n + 1 ];
- next
- if (
- $levels_to_go[$if] < $levels_to_go[$imidr]
- && $levels_to_go[$imidr] < $levels_to_go[$if_next]
+ my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
- # but an isolated 'if (' is undesirable
- && !(
- $n == 1
- && $imid - $if <= 2
- && $types_to_go[$if] eq 'k'
- && $tokens_to_go[$if] eq 'if'
- && $tokens_to_go[$imid] ne '('
- )
- );
- }
+ # combined line cannot be too long
+ next
+ if excess_line_length( $ibeg_1, $iend_2 ) > 0;
- # honor no-break's
- next if ( $bs == NO_BREAK );
+ # do not recombine if we would skip in indentation levels
+ if ( $n < $nmax ) {
+ my $if_next = $$ri_beg[ $n + 1 ];
+ next
+ if (
+ $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
+ && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
+
+ # but an isolated 'if (' is undesirable
+ && !(
+ $n == 1
+ && $iend_1 - $ibeg_1 <= 2
+ && $types_to_go[$ibeg_1] eq 'k'
+ && $tokens_to_go[$ibeg_1] eq 'if'
+ && $tokens_to_go[$iend_1] ne '('
+ )
+ );
+ }
- # remember the pair with the greatest bond strength
- if ( !$n_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- else {
+ # honor no-break's
+ next if ( $bs == NO_BREAK );
- if ( $bs > $bs_best ) {
+ # remember the pair with the greatest bond strength
+ if ( !$n_best ) {
$n_best = $n;
$bs_best = $bs;
}
+ else {
- # we have 2 or more candidates, so need another pass
- $more_to_do++;
+ if ( $bs > $bs_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
+ }
+ }
}
- }
- # recombine the pair with the greatest bond strength
- if ($n_best) {
- splice @$ri_first, $n_best, 1;
- splice @$ri_last, $n_best - 1, 1;
+ # recombine the pair with the greatest bond strength
+ if ($n_best) {
+ splice @$ri_beg, $n_best, 1;
+ splice @$ri_end, $n_best - 1, 1;
+
+ # keep going if we are still making progress
+ $more_to_do++;
+ }
}
+ return ( $ri_beg, $ri_end );
}
- return ( $ri_first, $ri_last );
-}
+} # end recombine_breakpoints
sub break_all_chain_tokens {
# statement. If we see a break at any one, break at all similar tokens
# within the same container.
#
- # TODO:
- # does not handle nested ?: operators correctly
- # coordinate better with ?: logic in set_continuation_breaks
my ( $ri_left, $ri_right ) = @_;
my %saw_chain_type;
foreach my $i ( @{ $left_chain_type{$type} } ) {
next unless in_same_container( $i, $itest );
push @insert_list, $itest - 1;
+
+ # Break at matching ? if this : is at a different level.
+ # For example, the ? before $THRf_DEAD in the following
+ # should get a break if its : gets a break.
+ #
+ # my $flags =
+ # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
+ # : ( $_ & 4 ) ? $THRf_R_DETACHED
+ # : $THRf_R_JOINABLE;
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question > 0 ) {
+ push @insert_list, $i_question - 1;
+ }
+ }
last;
}
}
foreach my $i ( @{ $right_chain_type{$type} } ) {
next unless in_same_container( $i, $itest );
push @insert_list, $itest;
+
+ # break at matching ? if this : is at a different level
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question >= 0 ) {
+ push @insert_list, $i_question;
+ }
+ }
last;
}
}
}
}
+sub break_equals {
+
+ # Look for assignment operators that could use a breakpoint.
+ # For example, in the following snippet
+ #
+ # $HOME = $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # we could break at the = to get this, which is a little nicer:
+ # $HOME =
+ # $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # The logic here follows the logic in set_logical_padding, which
+ # will add the padding in the second line to improve alignment.
+ #
+ my ( $ri_left, $ri_right ) = @_;
+ my $nmax = @$ri_right - 1;
+ return unless ( $nmax >= 2 );
+
+ # scan the left ends of first two lines
+ my $tokbeg = "";
+ my $depth_beg;
+ for my $n ( 1 .. 2 ) {
+ my $il = $$ri_left[$n];
+ my $typel = $types_to_go[$il];
+ my $tokenl = $tokens_to_go[$il];
+
+ my $has_leading_op = ( $tokenl =~ /^\w/ )
+ ? $is_chain_operator{$tokenl} # + - * / : ? && ||
+ : $is_chain_operator{$typel}; # and, or
+ return unless ($has_leading_op);
+ if ( $n > 1 ) {
+ return
+ unless ( $tokenl eq $tokbeg
+ && $nesting_depth_to_go[$il] eq $depth_beg );
+ }
+ $tokbeg = $tokenl;
+ $depth_beg = $nesting_depth_to_go[$il];
+ }
+
+ # now look for any interior tokens of the same types
+ my $il = $$ri_left[0];
+ my $ir = $$ri_right[0];
+
+ # now make a list of all new break points
+ my @insert_list;
+ for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
+ my $type = $types_to_go[$i];
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ if ( $want_break_before{$type} ) {
+ push @insert_list, $i - 1;
+ }
+ else {
+ push @insert_list, $i;
+ }
+ }
+ }
+
+ # Break after a 'return' followed by a chain of operators
+ # return ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ # To give:
+ # return
+ # ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ my $i = 0;
+ if ( $types_to_go[$i] eq 'k'
+ && $tokens_to_go[$i] eq 'return'
+ && $ir > $il
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ push @insert_list, $i;
+ }
+
+ return unless (@insert_list);
+
+ # One final check...
+ # scan second and thrid lines and be sure there are no assignments
+ # we want to avoid breaking at an = to make something like this:
+ # unless ( $icon =
+ # $html_icons{"$type-$state"}
+ # or $icon = $html_icons{$type}
+ # or $icon = $html_icons{$state} )
+ for my $n ( 1 .. 2 ) {
+ my $il = $$ri_left[$n];
+ my $ir = $$ri_right[$n];
+ for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
+ my $type = $types_to_go[$i];
+ return
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg );
+ }
+ }
+
+ # ok, insert any new break point
+ if (@insert_list) {
+ insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+}
+
+sub insert_final_breaks {
+
+ my ( $ri_left, $ri_right ) = @_;
+
+ my $nmax = @$ri_right - 1;
+
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ my $i_first_colon = -1;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $$ri_left[$n];
+ my $ir = $$ri_right[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ return if ( $typel eq '?' );
+ return if ( $typer eq '?' );
+ if ( $typel eq ':' ) { $i_first_colon = $il; last; }
+ elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
+ }
+
+ # For long ternary chains,
+ # if the first : we see has its # ? is in the interior
+ # of a preceding line, then see if there are any good
+ # breakpoints before the ?.
+ if ( $i_first_colon > 0 ) {
+ my $i_question = $mate_index_to_go[$i_first_colon];
+ if ( $i_question > 0 ) {
+ my @insert_list;
+ for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ my $token = $tokens_to_go[$ii];
+ my $type = $types_to_go[$ii];
+
+ # For now, a good break is either a comma or a 'return'.
+ if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
+ && in_same_container( $ii, $i_question ) )
+ {
+ push @insert_list, $ii;
+ last;
+ }
+ }
+
+ # insert any new break points
+ if (@insert_list) {
+ insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ }
+ }
+}
+
sub in_same_container {
# check to see if tokens at i1 and i2 are in the
my $depth = $nesting_depth_to_go[$i1];
return unless ( $nesting_depth_to_go[$i2] == $depth );
if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
+
+ ###########################################################
+ # This is potentially a very slow routine and not critical.
+ # For safety just give up for large differences.
+ # See test file 'infinite_loop.txt'
+ # TODO: replace this loop with a data structure
+ ###########################################################
+ return if ( $i2 - $i1 > 200 );
+
for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
next if ( $nesting_depth_to_go[$i] > $depth );
return if ( $nesting_depth_to_go[$i] < $depth );
# see if any ?/:'s are in order
my $colons_in_order = 1;
my $last_tok = "";
- my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
+ my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
+ my $colon_count = @colon_list;
foreach (@colon_list) {
if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
$last_tok = $_;
# See similar logic in scan_list which catches instances
# where a line is just something like ') {'
|| ( $line_count
- && ( $token eq ')' )
+ && ( $token eq ')' )
&& ( $next_nonblank_type eq '{' )
&& ($next_nonblank_block_type)
&& !$rOpts->{'opening-brace-always-on-right'} )
}
}
}
- return \@i_first, \@i_last;
+ return ( \@i_first, \@i_last, $colon_count );
}
sub insert_additional_breaks {
$rOpts_entab_leading_whitespace
$rOpts_valign
+ $rOpts_fixed_position_side_comment
$rOpts_minimum_space_to_comment
);
$rOpts_indent_columns = $rOpts->{'indent-columns'};
$rOpts_tabs = $rOpts->{'tabs'};
$rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
+ $rOpts_fixed_position_side_comment =
+ $rOpts->{'fixed-position-side-comment'};
$rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
$rOpts_valign = $rOpts->{'valign'};
&& $rvertical_tightness_flags->[2] == $cached_seqno )
{
$rvertical_tightness_flags->[3] ||= 1;
- $cached_line_valid ||= 1;
+ $cached_line_valid ||= 1;
}
}
my $old_line = shift;
my $maximum_field_index = $old_line->get_jmax();
+ ###############################################
# this line must have fewer fields
return unless $maximum_field_index > $jmax;
+ ###############################################
# Identify specific cases where field elimination is allowed:
# case=1: both lines have comma-separated lists, and the first
# look for the opening brace after the else, and extrace the depth
my $tok_brace = $rtokens->[0];
my $depth_brace;
- if ( $tok_brace =~ /^\{(\d+)$/ ) { $depth_brace = $1; }
+ if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
# probably: "else # side_comment"
else { return }
unless ( $rfields_old->[0] =~ /^case\s*$/ );
}
-sub check_match {
-
- my $new_line = shift;
- my $old_line = shift;
-
- # uses global variables:
- # $previous_minimum_jmax_seen
- # $maximum_jmax_seen
- # $maximum_line_index
- # $marginal_match
- my $jmax = $new_line->get_jmax();
- my $maximum_field_index = $old_line->get_jmax();
-
- # flush if this line has too many fields
- if ( $jmax > $maximum_field_index ) { my_flush(); return }
-
- # flush if adding this line would make a non-monotonic field count
- if (
- ( $maximum_field_index > $jmax ) # this has too few fields
- && (
- ( $previous_minimum_jmax_seen < $jmax ) # and wouldn't be monotonic
- || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
- )
- )
- {
- my_flush();
- return;
- }
-
- # otherwise append this line if everything matches
- my $jmax_original_line = $new_line->get_jmax_original_line();
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
- my $rtokens = $new_line->get_rtokens();
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- my $list_type = $new_line->get_list_type();
+{ # sub check_match
+ my %is_good_alignment;
- my $group_list_type = $old_line->get_list_type();
- my $old_rpatterns = $old_line->get_rpatterns();
- my $old_rtokens = $old_line->get_rtokens();
+ BEGIN {
- my $jlimit = $jmax - 1;
- if ( $maximum_field_index > $jmax ) {
- $jlimit = $jmax_original_line;
- --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
+ # Vertically aligning on certain "good" tokens is usually okay
+ # so we can be less restrictive in marginal cases.
+ @_ = qw( { ? => = );
+ push @_, (',');
+ @is_good_alignment{@_} = (1) x scalar(@_);
}
- my $everything_matches = 1;
-
- # common list types always match
- unless ( ( $group_list_type && ( $list_type eq $group_list_type ) )
- || $is_hanging_side_comment )
- {
+ sub check_match {
- my $leading_space_count = $new_line->get_leading_space_count();
- my $saw_equals = 0;
- for my $j ( 0 .. $jlimit ) {
- my $match = 1;
+ # See if the current line matches the current vertical alignment group.
+ # If not, flush the current group.
+ my $new_line = shift;
+ my $old_line = shift;
- my $old_tok = $$old_rtokens[$j];
- my $new_tok = $$rtokens[$j];
+ # uses global variables:
+ # $previous_minimum_jmax_seen
+ # $maximum_jmax_seen
+ # $maximum_line_index
+ # $marginal_match
+ my $jmax = $new_line->get_jmax();
+ my $maximum_field_index = $old_line->get_jmax();
- # Dumb down the match AFTER an equals and
- # also dumb down after seeing a ? ternary operator ...
- # Everything after a + is the token which preceded the previous
- # opening paren (container name). We won't require them to match.
- if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
- $new_tok = $1;
- $old_tok =~ s/\+.*$//;
- }
+ # flush if this line has too many fields
+ if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
- if ( $new_tok =~ /^[\?=]\d*$/ ) { $saw_equals = 1 }
+ # flush if adding this line would make a non-monotonic field count
+ if (
+ ( $maximum_field_index > $jmax ) # this has too few fields
+ && (
+ ( $previous_minimum_jmax_seen <
+ $jmax ) # and wouldn't be monotonic
+ || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
+ )
+ )
+ {
+ goto NO_MATCH;
+ }
+
+ # otherwise see if this line matches the current group
+ my $jmax_original_line = $new_line->get_jmax_original_line();
+ my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
+ my $rtokens = $new_line->get_rtokens();
+ my $rfields = $new_line->get_rfields();
+ my $rpatterns = $new_line->get_rpatterns();
+ my $list_type = $new_line->get_list_type();
+
+ my $group_list_type = $old_line->get_list_type();
+ my $old_rpatterns = $old_line->get_rpatterns();
+ my $old_rtokens = $old_line->get_rtokens();
+
+ my $jlimit = $jmax - 1;
+ if ( $maximum_field_index > $jmax ) {
+ $jlimit = $jmax_original_line;
+ --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
+ }
+
+ # handle comma-separated lists ..
+ if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
+ for my $j ( 0 .. $jlimit ) {
+ my $old_tok = $$old_rtokens[$j];
+ next unless $old_tok;
+ my $new_tok = $$rtokens[$j];
+ next unless $new_tok;
+
+ # lists always match ...
+ # unless they would align any '=>'s with ','s
+ goto NO_MATCH
+ if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
+ || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
+ }
+ }
+
+ # do detailed check for everything else except hanging side comments
+ elsif ( !$is_hanging_side_comment ) {
+
+ my $leading_space_count = $new_line->get_leading_space_count();
+
+ my $max_pad = 0;
+ my $min_pad = 0;
+ my $saw_good_alignment;
+
+ for my $j ( 0 .. $jlimit ) {
+
+ my $old_tok = $$old_rtokens[$j];
+ my $new_tok = $$rtokens[$j];
+
+ # Note on encoding used for alignment tokens:
+ # -------------------------------------------
+ # Tokens are "decorated" with information which can help
+ # prevent unwanted alignments. Consider for example the
+ # following two lines:
+ # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
+ # local ( $i, $f ) = &'bdiv( $xn, $xd );
+ # There are three alignment tokens in each line, a comma,
+ # an =, and a comma. In the first line these three tokens
+ # are encoded as:
+ # ,4+local-18 =3 ,4+split-7
+ # and in the second line they are encoded as
+ # ,4+local-18 =3 ,4+&'bdiv-8
+ # Tokens always at least have token name and nesting
+ # depth. So in this example the ='s are at depth 3 and
+ # the ,'s are at depth 4. This prevents aligning tokens
+ # of different depths. Commas contain additional
+ # information, as follows:
+ # , {depth} + {container name} - {spaces to opening paren}
+ # This allows us to reject matching the rightmost commas
+ # in the above two lines, since they are for different
+ # function calls. This encoding is done in
+ # 'sub send_lines_to_vertical_aligner'.
+
+ # Pick off actual token.
+ # Everything up to the first digit is the actual token.
+ my $alignment_token = $new_tok;
+ if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
+
+ # see if the decorated tokens match
+ my $tokens_match = $new_tok eq $old_tok
+
+ # Exception for matching terminal : of ternary statement..
+ # consider containers prefixed by ? and : a match
+ || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
+
+ # No match if the alignment tokens differ...
+ if ( !$tokens_match ) {
+
+ # ...Unless this is a side comment
+ if (
+ $j == $jlimit
+
+ # and there is either at least one alignment token
+ # or this is a single item following a list. This
+ # latter rule is required for 'December' to join
+ # the following list:
+ # my (@months) = (
+ # '', 'January', 'February', 'March',
+ # 'April', 'May', 'June', 'July',
+ # 'August', 'September', 'October', 'November',
+ # 'December'
+ # );
+ # If it doesn't then the -lp formatting will fail.
+ && ( $j > 0 || $old_tok =~ /^,/ )
+ )
+ {
+ $marginal_match = 1
+ if ( $marginal_match == 0
+ && $maximum_line_index == 0 );
+ last;
+ }
- # we never match if the matching tokens differ
- if ( $j < $jlimit
- && $old_tok ne $new_tok )
- {
- $match = 0;
- }
+ goto NO_MATCH;
+ }
- # otherwise, if patterns match, we always have a match.
- # However, if patterns don't match, we have to be careful...
- elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
+ # Calculate amount of padding required to fit this in.
+ # $pad is the number of spaces by which we must increase
+ # the current field to squeeze in this field.
+ my $pad =
+ length( $$rfields[$j] ) - $old_line->current_field_width($j);
+ if ( $j == 0 ) { $pad += $leading_space_count; }
- # We have to be very careful about aligning commas when the
- # pattern's don't match, because it can be worse to create an
- # alignment where none is needed than to omit one. The current
- # rule: if we are within a matching sub call (indicated by '+'
- # in the matching token), we'll allow a marginal match, but
- # otherwise not.
- #
- # Here's an example where we'd like to align the '='
- # my $cfile = File::Spec->catfile( 't', 'callext.c' );
- # my $inc = File::Spec->catdir( 'Basic', 'Core' );
- # because the function names differ.
- # Future alignment logic should make this unnecessary.
- #
- # Here's an example where the ','s are not contained in a call.
- # The first line below should probably not match the next two:
- # ( $a, $b ) = ( $b, $r );
- # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
- # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
- if ( $new_tok =~ /^,/ ) {
- if ( $$rtokens[$j] =~ /[A-Za-z]/ ) {
- $marginal_match = 1;
- }
- else {
- $match = 0;
- }
+ # remember max pads to limit marginal cases
+ if ( $alignment_token ne '#' ) {
+ if ( $pad > $max_pad ) { $max_pad = $pad }
+ if ( $pad < $min_pad ) { $min_pad = $pad }
}
-
- # parens don't align well unless patterns match
- elsif ( $new_tok =~ /^\(/ ) {
- $match = 0;
+ if ( $is_good_alignment{$alignment_token} ) {
+ $saw_good_alignment = 1;
}
- # Handle an '=' alignment with different patterns to
- # the left.
- elsif ( $new_tok =~ /^=\d*$/ ) {
+ # If patterns don't match, we have to be careful...
+ if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
- $saw_equals = 1;
+ # flag this as a marginal match since patterns differ
+ $marginal_match = 1
+ if ( $marginal_match == 0 && $maximum_line_index == 0 );
- # It is best to be a little restrictive when
- # aligning '=' tokens. Here is an example of
- # two lines that we will not align:
- # my $variable=6;
- # $bb=4;
- # The problem is that one is a 'my' declaration,
- # and the other isn't, so they're not very similar.
- # We will filter these out by comparing the first
- # letter of the pattern. This is crude, but works
- # well enough.
- if (
- substr( $$old_rpatterns[$j], 0, 1 ) ne
- substr( $$rpatterns[$j], 0, 1 ) )
- {
- $match = 0;
+ # We have to be very careful about aligning commas
+ # when the pattern's don't match, because it can be
+ # worse to create an alignment where none is needed
+ # than to omit one. Here's an example where the ','s
+ # are not in named continers. The first line below
+ # should not match the next two:
+ # ( $a, $b ) = ( $b, $r );
+ # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
+ # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
+ if ( $alignment_token eq ',' ) {
+
+ # do not align commas unless they are in named containers
+ goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
}
- # If we pass that test, we'll call it a marginal match.
- # Here is an example of a marginal match:
- # $done{$$op} = 1;
- # $op = compile_bblock($op);
- # The left tokens are both identifiers, but
- # one accesses a hash and the other doesn't.
- # We'll let this be a tentative match and undo
- # it later if we don't find more than 2 lines
- # in the group.
- elsif ( $maximum_line_index == 0 ) {
- $marginal_match = 1;
+ # do not align parens unless patterns match;
+ # large ugly spaces can occur in math expressions.
+ elsif ( $alignment_token eq '(' ) {
+
+ # But we can allow a match if the parens don't
+ # require any padding.
+ if ( $pad != 0 ) { goto NO_MATCH }
}
- }
- }
- # Don't let line with fewer fields increase column widths
- # ( align3.t )
- if ( $maximum_field_index > $jmax ) {
- my $pad =
- length( $$rfields[$j] ) - $old_line->current_field_width($j);
+ # Handle an '=' alignment with different patterns to
+ # the left.
+ elsif ( $alignment_token eq '=' ) {
+
+ # It is best to be a little restrictive when
+ # aligning '=' tokens. Here is an example of
+ # two lines that we will not align:
+ # my $variable=6;
+ # $bb=4;
+ # The problem is that one is a 'my' declaration,
+ # and the other isn't, so they're not very similar.
+ # We will filter these out by comparing the first
+ # letter of the pattern. This is crude, but works
+ # well enough.
+ if (
+ substr( $$old_rpatterns[$j], 0, 1 ) ne
+ substr( $$rpatterns[$j], 0, 1 ) )
+ {
+ goto NO_MATCH;
+ }
- if ( $j == 0 ) {
- $pad += $leading_space_count;
+ # If we pass that test, we'll call it a marginal match.
+ # Here is an example of a marginal match:
+ # $done{$$op} = 1;
+ # $op = compile_bblock($op);
+ # The left tokens are both identifiers, but
+ # one accesses a hash and the other doesn't.
+ # We'll let this be a tentative match and undo
+ # it later if we don't find more than 2 lines
+ # in the group.
+ elsif ( $maximum_line_index == 0 ) {
+ $marginal_match =
+ 2; # =2 prevents being undone below
+ }
+ }
}
- # TESTING: suspend this rule to allow last lines to join
- if ( $pad > 0 ) { $match = 0; }
- }
-
- unless ($match) {
- $everything_matches = 0;
- last;
+ # Don't let line with fewer fields increase column widths
+ # ( align3.t )
+ if ( $maximum_field_index > $jmax ) {
+
+ # Exception: suspend this rule to allow last lines to join
+ if ( $pad > 0 ) { goto NO_MATCH; }
+ }
+ } ## end for my $j ( 0 .. $jlimit)
+
+ # Turn off the "marginal match" flag in some cases...
+ # A "marginal match" occurs when the alignment tokens agree
+ # but there are differences in the other tokens (patterns).
+ # If we leave the marginal match flag set, then the rule is that we
+ # will align only if there are more than two lines in the group.
+ # We will turn of the flag if we almost have a match
+ # and either we have seen a good alignment token or we
+ # just need a small pad (2 spaces) to fit. These rules are
+ # the result of experimentation. Tokens which misaligned by just
+ # one or two characters are annoying. On the other hand,
+ # large gaps to less important alignment tokens are also annoying.
+ if ( $marginal_match == 1
+ && $jmax == $maximum_field_index
+ && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
+ )
+ {
+ $marginal_match = 0;
}
+ ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
}
- }
-
- if ( $maximum_field_index > $jmax ) {
-
- if ($everything_matches) {
+ # We have a match (even if marginal).
+ # If the current line has fewer fields than the current group
+ # but otherwise matches, copy the remaining group fields to
+ # make it a perfect match.
+ if ( $maximum_field_index > $jmax ) {
my $comment = $$rfields[$jmax];
for $jmax ( $jlimit .. $maximum_field_index ) {
$$rtokens[$jmax] = $$old_rtokens[$jmax];
$$rfields[$jmax] = $comment;
$new_line->set_jmax($jmax);
}
- }
+ return;
- my_flush() unless ($everything_matches);
+ NO_MATCH:
+ ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
+ my_flush();
+ return;
+ }
}
sub check_fit {
: $rOpts_minimum_space_to_comment - 1;
}
+ # if the -fpsc flag is set, move the side comment to the selected
+ # column if and only if it is possible, ignoring constraints on
+ # line length and minimum space to comment
+ if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
+ {
+ my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
+ if ( $newpad >= 0 ) { $pad = $newpad; }
+ }
+
# accumulate the padding
if ( $pad > 0 ) { $total_pad_count += $pad; }
}
sub write_blank_code_line {
- my $self = shift;
- my $rOpts = $self->{_rOpts};
+ my $self = shift;
+ my $forced = shift;
+ my $rOpts = $self->{_rOpts};
return
- if ( $self->{_consecutive_blank_lines} >=
+ if (!$forced
+ && $self->{_consecutive_blank_lines} >=
$rOpts->{'maximum-consecutive-blank-lines'} );
$self->{_consecutive_blank_lines}++;
$self->{_consecutive_nonblank_lines} = 0;
$pattern .= $$rtoken_type[$j];
}
$reconstructed_original .= $$rtokens[$j];
- $block_str .= "($$rblock_type[$j])";
+ $block_str .= "($$rblock_type[$j])";
$num = length( $$rtokens[$j] );
my $type_str = $$rtoken_type[$j];
$square_bracket_depth
@current_depth
+ @total_depth
+ $total_depth
@nesting_sequence_number
@current_sequence_number
@paren_type
@square_bracket_type
@square_bracket_structural_type
@depth_array
+ @nested_ternary_flag
@starting_line_of_current_depth
};
starting_level => undef,
indent_columns => 4,
tabs => 0,
+ entab_leading_space => undef,
look_for_hash_bang => 0,
trim_qw => 1,
look_for_autoloader => 1,
_starting_level => $args{starting_level},
_know_starting_level => defined( $args{starting_level} ),
_tabs => $args{tabs},
+ _entab_leading_space => $args{entab_leading_space},
_indent_columns => $args{indent_columns},
_look_for_hash_bang => $args{look_for_hash_bang},
_trim_qw => $args{trim_qw},
$line_of_tokens->{_line_type} = 'CODE';
# remember if we have seen any real code
- if ( !$tokenizer_self->{_started_tokenizing}
+ if ( !$tokenizer_self->{_started_tokenizing}
&& $input_line !~ /^\s*$/
&& $input_line !~ /^\s*#/ )
{
my $i = 0;
my $structural_indentation_level = -1; # flag for find_indentation_level
+ # keep looking at lines until we find a hash bang or piece of code
my $msg = "";
while ( $line =
$tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
$starting_level = 0;
last;
}
- next if ( $line =~ /^\s*#/ ); # must not be comment
- next if ( $line =~ /^\s*$/ ); # must not be blank
+ next if ( $line =~ /^\s*#/ ); # skip past comments
+ next if ( $line =~ /^\s*$/ ); # skip past blank lines
( $starting_level, $msg ) =
find_indentation_level( $line, $structural_indentation_level );
if ($msg) { write_logfile_entry("$msg") }
$know_input_tabstr = 0;
- if ( $tokenizer_self->{_tabs} ) {
+ # When -et=n is used for the output formatting, we will assume that
+ # tabs in the input formatting were also produced with -et=n. This may
+ # not be true, but it is the best guess because it will keep leading
+ # whitespace unchanged on repeated formatting on small pieces of code
+ # when -et=n is used. Thanks to Sam Kington for this patch.
+ if ( my $tabsize = $tokenizer_self->{_entab_leading_space} ) {
+ $leading_whitespace =~ s{^ (\t*) }
+ { " " x (length($1) * $tabsize) }xe;
+ $input_tabstr = " " x $tokenizer_self->{_indent_columns};
+ }
+ elsif ( $tokenizer_self->{_tabs} ) {
$input_tabstr = "\t";
if ( length($leading_whitespace) > 0 ) {
if ( $leading_whitespace !~ /\t/ ) {
$square_bracket_depth = 0;
@current_depth[ 0 .. $#closing_brace_names ] =
(0) x scalar @closing_brace_names;
+ $total_depth = 0;
+ @total_depth = ();
@nesting_sequence_number[ 0 .. $#closing_brace_names ] =
( 0 .. $#closing_brace_names );
@current_sequence_number = ();
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
- $type, $type_sequence,
+ $type, $type_sequence, $indent_flag,
);
# TV2: refs to ARRAYS for processing one LINE
my $routput_block_type = []; # types of code block
my $routput_container_type = []; # paren types, such as if, elsif, ..
my $routput_type_sequence = []; # nesting sequential number
+ my $routput_indent_flag = []; #
# TV3: SCALARS for quote variables. These are initialized with a
# subroutine call and continually updated as lines are processed.
# TV4: SCALARS for multi-line identifiers and
# statements. These are initialized with a subroutine call
# and continually updated as lines are processed.
- my ( $id_scan_state, $identifier, $want_paren, );
+ my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
# TV5: SCALARS for tracking indentation level.
# Initialized once and continually updated as lines are
$allowed_quote_modifiers = "";
# TV4:
- $id_scan_state = '';
- $identifier = '';
- $want_paren = "";
+ $id_scan_state = '';
+ $identifier = '';
+ $want_paren = "";
+ $indented_if_level = 0;
# TV5:
$nesting_token_string = "";
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
- $type, $type_sequence,
+ $type, $type_sequence, $indent_flag,
];
my $rTV2 = [
- $routput_token_list, $routput_token_type,
- $routput_block_type, $routput_container_type,
- $routput_type_sequence,
+ $routput_token_list, $routput_token_type,
+ $routput_block_type, $routput_container_type,
+ $routput_type_sequence, $routput_indent_flag,
];
my $rTV3 = [
$quoted_string_2, $allowed_quote_modifiers,
];
- my $rTV4 = [ $id_scan_state, $identifier, $want_paren, ];
+ my $rTV4 =
+ [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
my $rTV5 = [
$nesting_token_string, $nesting_type_string,
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
- $type, $type_sequence,
+ $type, $type_sequence, $indent_flag,
) = @{$rTV1};
(
- $routput_token_list, $routput_token_type,
- $routput_block_type, $routput_container_type,
- $routput_type_sequence,
+ $routput_token_list, $routput_token_type,
+ $routput_block_type, $routput_container_type,
+ $routput_type_sequence, $routput_type_sequence,
) = @{$rTV2};
(
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
) = @{$rTV3};
- ( $id_scan_state, $identifier, $want_paren, ) = @{$rTV4};
+ ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
+ @{$rTV4};
(
$nesting_token_string, $nesting_type_string,
}
sub get_indentation_level {
+
+ # patch to avoid reporting error if indented if is not terminated
+ if ($indented_if_level) { return $level_in_tokenizer - 1 }
return $level_in_tokenizer;
}
%is_block_list_function, %saw_function_definition,
$brace_depth, $paren_depth,
$square_bracket_depth, @current_depth,
+ @total_depth, $total_depth,
@nesting_sequence_number, @current_sequence_number,
@paren_type, @paren_semicolon_count,
@paren_structural_type, @brace_type,
@brace_context, @brace_package,
@square_bracket_type, @square_bracket_structural_type,
@depth_array, @starting_line_of_current_depth,
+ @nested_ternary_flag,
);
# save all lexical variables
sub scan_identifier {
( $i, $tok, $type, $id_scan_state, $identifier ) =
scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
- $max_token_index );
+ $max_token_index, $expecting );
}
sub scan_id {
} ## end if ( $expecting == OPERATOR...
}
$paren_type[$paren_depth] = $container_type;
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
# propagate types down through nested parens
},
')' => sub {
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
if ( $paren_structural_type[$paren_depth] eq '{' ) {
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[cgimosx]';
+ $allowed_quote_modifiers = '[cgimosxp]';
}
else { # not a pattern; check for a /= token
}
$brace_type[ ++$brace_depth ] = $block_type;
$brace_package[$brace_depth] = $current_package;
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
$brace_structural_type[$brace_depth] = $type;
$brace_context[$brace_depth] = $context;
# can happen on brace error (caught elsewhere)
else {
}
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
if ( $brace_structural_type[$brace_depth] eq 'L' ) {
find_angle_operator_termination( $input_line, $i, $rtoken_map,
$expecting, $max_token_index );
+ if ( $type eq '<' && $expecting == TERM ) {
+ error_if_expecting_TERM();
+ interrupt_logfile();
+ warning("Unterminated <> operator?\n");
+ resume_logfile();
+ }
}
else {
}
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[cgimosx]'; # TBD:check this
+ $allowed_quote_modifiers = '[cgimosxp]';
}
else {
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
increase_nesting_depth( QUESTION_COLON,
$$rtoken_map[$i_tok] );
}
# otherwise, it should be part of a ?/: operator
else {
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
decrease_nesting_depth( QUESTION_COLON,
$$rtoken_map[$i_tok] );
if ( $last_nonblank_token eq '?' ) {
'[' => sub {
$square_bracket_type[ ++$square_bracket_depth ] =
$last_nonblank_token;
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
# It may seem odd, but structural square brackets have
$square_bracket_structural_type[$square_bracket_depth] = $type;
},
']' => sub {
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
if ( ( $expecting != OPERATOR )
&& $is_file_test_operator{$next_tok} )
{
- $i++;
- $tok .= $next_tok;
- $type = 'F';
+ my ( $next_nonblank_token, $i_next ) =
+ find_next_nonblank_token( $i + 1, $rtokens,
+ $max_token_index );
+
+ # check for a quoted word like "-w=>xx";
+ # it is sufficient to just check for a following '='
+ if ( $next_nonblank_token eq '=' ) {
+ $type = 'm';
+ }
+ else {
+ $i++;
+ $tok .= $next_tok;
+ $type = 'F';
+ }
}
elsif ( $expecting == TERM ) {
my $number = scan_number();
# These block types terminate statements and do not need a trailing
# semicolon
- # patched for SWITCH/CASE:
+ # patched for SWITCH/CASE/
my %is_zero_continuation_block_type;
- @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ;
+ @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
if elsif else unless while until for foreach switch case given when);
@is_zero_continuation_block_type{@_} = (1) x scalar(@_);
# ref: camel 3 p 147,
# but perl may accept undocumented flags
+ # perl 5.10 adds 'p' (preserve)
my %quote_modifiers = (
- 's' => '[cegimosx]',
+ 's' => '[cegimosxp]',
'y' => '[cds]',
'tr' => '[cds]',
- 'm' => '[cgimosx]',
- 'qr' => '[imosx]',
+ 'm' => '[cgimosxp]',
+ 'qr' => '[imosxp]',
'q' => "",
'qq' => "",
'qw' => "",
$block_type = $last_nonblank_block_type;
$container_type = $last_nonblank_container_type;
$type_sequence = $last_nonblank_type_sequence;
+ $indent_flag = 0;
$peeked_ahead = 0;
# tokenization is done in two stages..
$routput_block_type->[$i] = "";
$routput_container_type->[$i] = "";
$routput_type_sequence->[$i] = "";
+ $routput_indent_flag->[$i] = 0;
}
$i = -1;
$i_tok = -1;
$routput_block_type->[$i_tok] = $block_type;
$routput_container_type->[$i_tok] = $container_type;
$routput_type_sequence->[$i_tok] = $type_sequence;
+ $routput_indent_flag->[$i_tok] = $indent_flag;
}
my $pre_tok = $$rtokens[$i]; # get the next pre-token
my $pre_type = $$rtoken_type[$i]; # and type
$block_type = ""; # blank for all tokens except code block braces
$container_type = ""; # blank for all tokens except some parens
$type_sequence = ""; # blank for all tokens except ?/:
+ $indent_flag = 0;
$prototype = ""; # blank for all tokens except user defined subs
$i_tok = $i;
}
}
- # quote a bare word within braces..like xxx->{s}; note that we
- # must be sure this is not a structural brace, to avoid
- # mistaking {s} in the following for a quoted bare word:
- # for(@[){s}bla}BLA}
- if ( ( $last_nonblank_type eq 'L' )
- && ( $next_nonblank_token eq '}' ) )
+ # quote a bare word within braces..like xxx->{s}; note that we
+ # must be sure this is not a structural brace, to avoid
+ # mistaking {s} in the following for a quoted bare word:
+ # for(@[){s}bla}BLA}
+ # Also treat q in something like var{-q} as a bare word, not qoute operator
+ ##if ( ( $last_nonblank_type eq 'L' )
+ ## && ( $next_nonblank_token eq '}' ) )
+ if (
+ $next_nonblank_token eq '}'
+ && (
+ $last_nonblank_type eq 'L'
+ || ( $last_nonblank_type eq 'm'
+ && $last_last_nonblank_type eq 'L' )
+ )
+ )
{
$type = 'w';
next;
&& label_ok()
)
{
- if ( $tok !~ /A-Z/ ) {
+ if ( $tok !~ /[A-Z]/ ) {
push @{ $tokenizer_self->{_rlower_case_labels_at} },
$input_line_number;
}
# note: ';' '{' and '}' in list above
# because continues can follow bare blocks;
# ':' is labeled block
- warning("'$tok' should follow a block\n");
+ #
+ ############################################
+ # NOTE: This check has been deactivated because
+ # continue has an alternative usage for given/when
+ # blocks in perl 5.10
+ ## warning("'$tok' should follow a block\n");
+ ############################################
}
}
elsif ( $tok eq 'when' || $tok eq 'case' ) {
$statement_type = $tok; # next '{' is block
}
+
+ # indent trailing if/unless/while/until
+ # outdenting will be handled by later indentation loop
+ if ( $tok =~ /^(if|unless|while|until)$/
+ && $next_nonblank_token ne '(' )
+ {
+ $indent_flag = 1;
+ }
}
# check for inline label following
# not treated as keywords:
if (
(
- $tok eq 'case'
+ $tok eq 'case'
&& $brace_type[$brace_depth] eq 'switch'
)
|| ( $tok eq 'when'
$routput_block_type->[$i_tok] = $block_type;
$routput_container_type->[$i_tok] = $container_type;
$routput_type_sequence->[$i_tok] = $type_sequence;
+ $routput_indent_flag->[$i_tok] = $indent_flag;
}
unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
{ # scan the list of pre-tokens indexes
# self-checking for valid token types
- my $type = $routput_token_type->[$i];
+ my $type = $routput_token_type->[$i];
+ my $forced_indentation_flag = $routput_indent_flag->[$i];
+
+ # See if we should undo the $forced_indentation_flag.
+ # Forced indentation after 'if', 'unless', 'while' and 'until'
+ # expressions without trailing parens is optional and doesn't
+ # always look good. It is usually okay for a trailing logical
+ # expression, but if the expression is a function call, code block,
+ # or some kind of list it puts in an unwanted extra indentation
+ # level which is hard to remove.
+ #
+ # Example where extra indentation looks ok:
+ # return 1
+ # if $det_a < 0 and $det_b > 0
+ # or $det_a > 0 and $det_b < 0;
+ #
+ # Example where extra indentation is not needed because
+ # the eval brace also provides indentation:
+ # print "not " if defined eval {
+ # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
+ # };
+ #
+ # The following rule works fairly well:
+ # Undo the flag if the end of this line, or start of the next
+ # line, is an opening container token or a comma.
+ # This almost always works, but if not after another pass it will
+ # be stable.
+ if ( $forced_indentation_flag && $type eq 'k' ) {
+ my $ixlast = -1;
+ my $ilast = $routput_token_list->[$ixlast];
+ my $toklast = $routput_token_type->[$ilast];
+ if ( $toklast eq '#' ) {
+ $ixlast--;
+ $ilast = $routput_token_list->[$ixlast];
+ $toklast = $routput_token_type->[$ilast];
+ }
+ if ( $toklast eq 'b' ) {
+ $ixlast--;
+ $ilast = $routput_token_list->[$ixlast];
+ $toklast = $routput_token_type->[$ilast];
+ }
+ if ( $toklast =~ /^[\{,]$/ ) {
+ $forced_indentation_flag = 0;
+ }
+ else {
+ ( $toklast, my $i_next ) =
+ find_next_nonblank_token( $max_token_index, $rtokens,
+ $max_token_index );
+ if ( $toklast =~ /^[\{,]$/ ) {
+ $forced_indentation_flag = 0;
+ }
+ }
+ }
+
+ # if we are already in an indented if, see if we should outdent
+ if ($indented_if_level) {
+
+ # don't try to nest trailing if's - shouldn't happen
+ if ( $type eq 'k' ) {
+ $forced_indentation_flag = 0;
+ }
+
+ # check for the normal case - outdenting at next ';'
+ elsif ( $type eq ';' ) {
+ if ( $level_in_tokenizer == $indented_if_level ) {
+ $forced_indentation_flag = -1;
+ $indented_if_level = 0;
+ }
+ }
+
+ # handle case of missing semicolon
+ elsif ( $type eq '}' ) {
+ if ( $level_in_tokenizer == $indented_if_level ) {
+ $indented_if_level = 0;
+
+ # TBD: This could be a subroutine call
+ $level_in_tokenizer--;
+ if ( @{$rslevel_stack} > 1 ) {
+ pop( @{$rslevel_stack} );
+ }
+ if ( length($nesting_block_string) > 1 )
+ { # true for valid script
+ chop $nesting_block_string;
+ chop $nesting_list_string;
+ }
+
+ }
+ }
+ }
+
my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
$level_i = $level_in_tokenizer;
# Note: these are set so that the leading braces have a HIGHER
# level than their CONTENTS, which is convenient for indentation
# Also, define continuation indentation for each token.
- if ( $type eq '{' || $type eq 'L' ) {
+ if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
+ {
# use environment before updating
$container_environment =
push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
$level_in_tokenizer++;
+ if ($forced_indentation_flag) {
+
+ # break BEFORE '?' when there is forced indentation
+ if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
+ if ( $type eq 'k' ) {
+ $indented_if_level = $level_in_tokenizer;
+ }
+ }
+
if ( $routput_block_type->[$i] ) {
$nesting_block_flag = 1;
$nesting_block_string .= '1';
else {
$bit = 1
unless
- $is_logical_container{ $routput_container_type->[$i]
- };
+ $is_logical_container{ $routput_container_type->[$i]
+ };
}
}
$nesting_list_string .= $bit;
if (
!$routput_block_type->[$i] # patch: skip for BLOCK
&& ($in_statement_continuation)
+ && !( $forced_indentation_flag && $type eq ':' )
)
{
$total_ci += $in_statement_continuation
$in_statement_continuation = 0;
}
- elsif ( $type eq '}' || $type eq 'R' ) {
+ elsif ($type eq '}'
+ || $type eq 'R'
+ || $forced_indentation_flag < 0 )
+ {
# only a nesting error in the script would prevent popping here
if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
# ...and include all block types except user subs with
# block prototypes and these: (sort|grep|map|do|eval)
-# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
+# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
elsif (
$is_zero_continuation_block_type{
$routput_block_type->[$i] } )
$in_statement_continuation = 1
if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
}
+
+ elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
}
# use environment after updating
# patch for dor.t (defined or).
if ( $tok eq '/'
- && $next_type eq '/'
+ && $next_type eq '/'
&& $last_nonblank_token eq ']' )
{
$op_expected = OPERATOR;
# otherwise, look at previous token. This must be a code block if
# it follows any of these:
-# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
+# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
elsif ( $is_code_block_token{$last_nonblank_token} ) {
- return $last_nonblank_token;
+
+ # Bug Patch: Note that the opening brace after the 'if' in the following
+ # snippet is an anonymous hash ref and not a code block!
+ # print 'hi' if { x => 1, }->{x};
+ # We can identify this situation because the last nonblank type
+ # will be a keyword (instead of a closing peren)
+ if ( $last_nonblank_token =~ /^(if|unless)$/
+ && $last_nonblank_type eq 'k' )
+ {
+ return "";
+ }
+ else {
+ return $last_nonblank_token;
+ }
}
# or a sub definition
# way.
sub increase_nesting_depth {
- my ( $a, $pos ) = @_;
+ my ( $aa, $pos ) = @_;
# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
# @current_sequence_number, @depth_array, @starting_line_of_current_depth
- my $b;
- $current_depth[$a]++;
+ my $bb;
+ $current_depth[$aa]++;
+ $total_depth++;
+ $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
my $input_line_number = $tokenizer_self->{_last_line_number};
my $input_line = $tokenizer_self->{_line_text};
# Sequence numbers increment by number of items. This keeps
# a unique set of numbers but still allows the relative location
# of any type to be determined.
- $nesting_sequence_number[$a] += scalar(@closing_brace_names);
- my $seqno = $nesting_sequence_number[$a];
- $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
+ $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
+ my $seqno = $nesting_sequence_number[$aa];
+ $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
- $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
+ $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
[ $input_line_number, $input_line, $pos ];
- for $b ( 0 .. $#closing_brace_names ) {
- next if ( $b == $a );
- $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
+ for $bb ( 0 .. $#closing_brace_names ) {
+ next if ( $bb == $aa );
+ $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
+ }
+
+ # set a flag for indenting a nested ternary statement
+ my $indent = 0;
+ if ( $aa == QUESTION_COLON ) {
+ $nested_ternary_flag[ $current_depth[$aa] ] = 0;
+ if ( $current_depth[$aa] > 1 ) {
+ if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
+ my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
+ if ( $pdepth == $total_depth - 1 ) {
+ $indent = 1;
+ $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
+ }
+ }
+ }
}
- return $seqno;
+ return ( $seqno, $indent );
}
sub decrease_nesting_depth {
- my ( $a, $pos ) = @_;
+ my ( $aa, $pos ) = @_;
# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
# @current_sequence_number, @depth_array, @starting_line_of_current_depth
- my $b;
+ my $bb;
my $seqno = 0;
my $input_line_number = $tokenizer_self->{_last_line_number};
my $input_line = $tokenizer_self->{_line_text};
- if ( $current_depth[$a] > 0 ) {
+ my $outdent = 0;
+ $total_depth--;
+ if ( $current_depth[$aa] > 0 ) {
- $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
+ # set a flag for un-indenting after seeing a nested ternary statement
+ $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
+ if ( $aa == QUESTION_COLON ) {
+ $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
+ }
- # check that any brace types $b contained within are balanced
- for $b ( 0 .. $#closing_brace_names ) {
- next if ( $b == $a );
+ # check that any brace types $bb contained within are balanced
+ for $bb ( 0 .. $#closing_brace_names ) {
+ next if ( $bb == $aa );
- unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
- $current_depth[$b] )
+ unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
+ $current_depth[$bb] )
{
my $diff =
- $current_depth[$b] -
- $depth_array[$a][$b][ $current_depth[$a] ];
+ $current_depth[$bb] -
+ $depth_array[$aa][$bb][ $current_depth[$aa] ];
# don't whine too many times
my $saw_brace_error = get_saw_brace_error();
{
interrupt_logfile();
my $rsl =
- $starting_line_of_current_depth[$a][ $current_depth[$a] ];
+ $starting_line_of_current_depth[$aa]
+ [ $current_depth[$aa] ];
my $sl = $$rsl[0];
my $rel = [ $input_line_number, $input_line, $pos ];
my $el = $$rel[0];
}
my $bname =
( $diff > 0 )
- ? $opening_brace_names[$b]
- : $closing_brace_names[$b];
+ ? $opening_brace_names[$bb]
+ : $closing_brace_names[$bb];
write_error_indicator_pair( @$rsl, '^' );
my $msg = <<"EOM";
-Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
+Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
EOM
if ( $diff > 0 ) {
my $rml =
- $starting_line_of_current_depth[$b]
- [ $current_depth[$b] ];
+ $starting_line_of_current_depth[$bb]
+ [ $current_depth[$bb] ];
my $ml = $$rml[0];
$msg .=
" The most recent un-matched $bname is on line $ml\n";
increment_brace_error();
}
}
- $current_depth[$a]--;
+ $current_depth[$aa]--;
}
else {
my $saw_brace_error = get_saw_brace_error();
if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
my $msg = <<"EOM";
-There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
+There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
EOM
indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
}
increment_brace_error();
}
- return $seqno;
+ return ( $seqno, $outdent );
}
sub check_final_nesting_depths {
- my ($a);
+ my ($aa);
# USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
- for $a ( 0 .. $#closing_brace_names ) {
+ for $aa ( 0 .. $#closing_brace_names ) {
- if ( $current_depth[$a] ) {
- my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
+ if ( $current_depth[$aa] ) {
+ my $rsl =
+ $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
my $sl = $$rsl[0];
my $msg = <<"EOM";
-Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
-The most recent un-matched $opening_brace_names[$a] is on line $sl
+Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
+The most recent un-matched $opening_brace_names[$aa] is on line $sl
EOM
indicate_error( $msg, @$rsl, '^' );
increment_brace_error();
# doesn't get in the way of good scripts.
# Complain if a filehandle has any lower case
- # letters. This is suggested good practice, but the
- # main reason for this warning is that prior to
- # release 20010328, perltidy incorrectly parsed a
- # function call after a print/printf, with the
- # result that a space got added before the opening
- # paren, thereby converting the function name to a
- # filehandle according to perl's weird rules. This
- # will not usually generate a syntax error, so this
- # is a potentially serious bug. By warning
- # of filehandles with any lower case letters,
- # followed by opening parens, we will help the user
- # find almost all of these older errors.
- # use 'sub_name' because something like
+ # letters. This is suggested good practice.
+ # Use 'sub_name' because something like
# main::MYHANDLE is ok for filehandle
if ( $sub_name =~ /[a-z]/ ) {
# USES GLOBAL VARIABLES: $context, $last_nonblank_token,
# $last_nonblank_type
- my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index ) = @_;
+ my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
+ $expecting )
+ = @_;
my $i_begin = $i;
my $type = '';
my $tok_begin = $$rtokens[$i_begin];
# punctuation variable?
# testfile: cunningham4.pl
- if ( $identifier eq '&' ) {
+ #
+ # We have to be careful here. If we are in an unknown state,
+ # we will reject the punctuation variable. In the following
+ # example the '&' is a binary opeator but we are in an unknown
+ # state because there is no sigil on 'Prima', so we don't
+ # know what it is. But it is a bad guess that
+ # '&~' is a punction variable.
+ # $self->{text}->{colorMap}->[
+ # Prima::PodView::COLOR_CODE_FOREGROUND
+ # & ~tb::COLOR_INDEX ] =
+ # $sec->{ColorCode}
+ if ( $identifier eq '&' && $expecting ) {
$identifier .= $tok;
}
else {
# -1 - no
my ( $i, $rtokens, $max_token_index ) = @_;
my $next_token = $$rtokens[ $i + 1 ];
- if ( $next_token =~ /^[cgimosx]/ ) { $i++; } # skip possible modifier
+ if ( $next_token =~ /^[cgimosxp]/ ) { $i++; } # skip possible modifier
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
# These tokens may precede a code block
# patched for SWITCH/CASE
- @_ = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
+ @_ =
+ qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless do while until eval for foreach map grep sort
switch case given when);
@is_code_block_token{@_} = (1) x scalar(@_);
LE
LT
NE
+ UNITCHECK
abs
accept
alarm
bind
binmode
bless
+ break
caller
chdir
chmod
formatter => $formatter, # callback object (see below)
dump_options => $dump_options,
dump_options_type => $dump_options_type,
+ prefilter => $prefilter_coderef,
+ postfilter => $postfilter_coderef,
);
=head1 DESCRIPTION
hash. This hash will receive all abbreviations used by Perl::Tidy. See the
demo program F<perltidyrc_dump.pl> for example usage.
+=item prefilter
+
+A code reference that will be applied to the source before tidying. It is
+expected to take the full content as a string in its input, and output the
+transformed content.
+
+=item postfilter
+
+A code reference that will be applied to the tidied result before outputting.
+It is expected to take the full content as a string in its input, and output
+the transformed content.
+
+Note: A convenient way to check the function of your custom prefilter and
+postfilter code is to use the --notidy option, first with just the prefilter
+and then with both the prefilter and postfilter. See also the file
+B<filter_example.pl> in the perltidy distribution.
+
=back
=head1 EXAMPLE
=head1 VERSION
-This man page documents Perl::Tidy version 20070508.
+This man page documents Perl::Tidy version 20101217.
=head1 AUTHOR