#
# perltidy - a perl script indenter and formatter
#
-# Copyright (c) 2000-2006 by Steve Hancock
+# Copyright (c) 2000-2007 by Steve Hancock
# Distributed under the GPL license agreement; see file COPYING
#
# This program is free software; you can redistribute it and/or modify
# Yves Orton supplied coding to help detect Windows versions.
# Axel Rose supplied a patch for MacPerl.
# Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
+# Dan Tyrell contributed a patch for binary I/O.
+# Ueli Hugenschmidt contributed a patch for -fpsc
# Many others have supplied key ideas, suggestions, and bug reports;
# see the CHANGES file.
#
use File::Basename;
BEGIN {
- ( $VERSION = q($Id: Tidy.pm,v 1.56 2006/07/19 23:13:33 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
+ ( $VERSION = q($Id: Tidy.pm,v 1.73 2007/12/05 17:51:17 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
}
sub streamhandle {
my $test_file = $path . $name;
my ( $test_name, $test_path ) = fileparse($test_file);
return $test_file if ( $test_name eq $name );
- return undef if ( $^O eq 'VMS' );
+ return undef if ( $^O eq 'VMS' );
# this should work at least for Windows and Unix:
$test_file = $path . '/' . $name;
foreach my $op ( @{$roption_string} ) {
my $opt = $op;
my $flag = "";
+
+ # Examples:
+ # some-option=s
+ # some-option=i
+ # some-option:i
+ # some-option!
if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
$opt = $1;
$flag = $2;
return if ($quit_now);
+ # make printable string of options for this run as possible diagnostic
+ my $readable_options = readable_options( $rOpts, $roption_string );
+
# dump from command line
if ( $rOpts->{'dump-options'} ) {
- dump_options( $rOpts, $roption_string );
+ print STDOUT $readable_options;
exit 1;
}
# make the pattern of file extensions that we shouldn't touch
my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
if ($output_extension) {
- $_ = quotemeta($output_extension);
- $forbidden_file_extensions .= "|$_";
+ my $ext = quotemeta($output_extension);
+ $forbidden_file_extensions .= "|$ext";
}
if ( $in_place_modify && $backup_extension ) {
- $_ = quotemeta($backup_extension);
- $forbidden_file_extensions .= "|$_";
+ my $ext = quotemeta($backup_extension);
+ $forbidden_file_extensions .= "|$ext";
}
$forbidden_file_extensions .= ')$';
if ( $rOpts->{'preserve-line-endings'} ) {
$line_separator = find_input_line_ending($input_file);
}
- $line_separator = "\n" unless defined($line_separator);
+
+ # Eventually all I/O may be done with binmode, but for now it is
+ # only done when a user requests a particular line separator
+ # through the -ple or -ole flags
+ my $binmode = 0;
+ if ( defined($line_separator) ) { $binmode = 1 }
+ else { $line_separator = "\n" }
my $sink_object =
Perl::Tidy::LineSink->new( $output_file, $tee_file,
- $line_separator, $rOpts, $rpending_logfile_message );
+ $line_separator, $rOpts, $rpending_logfile_message, $binmode );
#---------------------------------------------------------------
# initialize the error logger
$saw_extrude );
write_logfile_header(
$rOpts, $logger_object, $config_file,
- $rraw_options, $Windows_type
+ $rraw_options, $Windows_type, $readable_options,
);
if ($$rpending_logfile_message) {
$logger_object->write_logfile_entry($$rpending_logfile_message);
my $fout = IO::File->new("> $input_file")
or die
"problem opening $input_file for write for -b option; check directory permissions: $!\n";
+ binmode $fout;
my $line;
while ( $line = $output_file->getline() ) {
$fout->print($line);
}
sub write_logfile_header {
- my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) =
- @_;
+ my (
+ $rOpts, $logger_object, $config_file,
+ $rraw_options, $Windows_type, $readable_options
+ ) = @_;
$logger_object->write_logfile_entry(
"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
);
$logger_object->write_logfile_entry(
"------------------------------------\n");
- foreach ( keys %{$rOpts} ) {
- $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" );
- }
+ $logger_object->write_logfile_entry($readable_options);
+
$logger_object->write_logfile_entry(
"------------------------------------\n");
}
$add_option->( 'standard-output', 'st', '!' );
$add_option->( 'warning-output', 'w', '!' );
+ # options which are both toggle switches and values moved here
+ # to hide from tidyview (which does not show category 0 flags):
+ # -ole moved here from category 1
+ # -sil moved here from category 2
+ $add_option->( 'output-line-ending', 'ole', '=s' );
+ $add_option->( 'starting-indentation-level', 'sil', '=i' );
+
########################################
$category = 1; # Basic formatting options
########################################
$add_option->( 'entab-leading-whitespace', 'et', '=i' );
$add_option->( 'indent-columns', 'i', '=i' );
$add_option->( 'maximum-line-length', 'l', '=i' );
- $add_option->( 'output-line-ending', 'ole', '=s' );
$add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
$add_option->( 'preserve-line-endings', 'ple', '!' );
$add_option->( 'tabs', 't', '!' );
$category = 2; # Code indentation control
########################################
$add_option->( 'continuation-indentation', 'ci', '=i' );
- $add_option->( 'starting-indentation-level', 'sil', '=i' );
$add_option->( 'line-up-parentheses', 'lp', '!' );
$add_option->( 'outdent-keyword-list', 'okwl', '=s' );
$add_option->( 'outdent-keywords', 'okw', '!' );
$add_option->( 'hanging-side-comments', 'hsc', '!' );
$add_option->( 'indent-block-comments', 'ibc', '!' );
$add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
+ $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
$add_option->( 'minimum-space-to-comment', 'msc', '=i' );
$add_option->( 'outdent-long-comments', 'olc', '!' );
$add_option->( 'outdent-static-block-comments', 'osbc', '!' );
$add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
$add_option->( 'cuddled-else', 'ce', '!' );
$add_option->( 'delete-old-newlines', 'dnl', '!' );
- $add_option->( 'opening-brace-always-on-right', 'bar', '' );
+ $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
$add_option->( 'opening-brace-on-new-line', 'bl', '!' );
$add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
$add_option->( 'opening-paren-right', 'opr', '!' );
$add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
$add_option->( 'want-break-after', 'wba', '=s' );
$add_option->( 'want-break-before', 'wbb', '=s' );
+ $add_option->( 'break-after-all-operators', 'baao', '!' );
+ $add_option->( 'break-before-all-operators', 'bbao', '!' );
+ $add_option->( 'keep-interior-semicolons', 'kis', '!' );
########################################
$category = 6; # Controlling list formatting
# if min is undefined, there is no lower limit
# if max is undefined, there is no upper limit
# Parameters not listed here have defaults
- $option_range{'format'} = [qw(tidy html user)];
- $option_range{'output-line-ending'} = [qw(dos win mac unix)];
-
- $option_range{'block-brace-tightness'} = [ 0, 2 ];
- $option_range{'brace-tightness'} = [ 0, 2 ];
- $option_range{'paren-tightness'} = [ 0, 2 ];
- $option_range{'square-bracket-tightness'} = [ 0, 2 ];
-
- $option_range{'block-brace-vertical-tightness'} = [ 0, 2 ];
- $option_range{'brace-vertical-tightness'} = [ 0, 2 ];
- $option_range{'brace-vertical-tightness-closing'} = [ 0, 2 ];
- $option_range{'paren-vertical-tightness'} = [ 0, 2 ];
- $option_range{'paren-vertical-tightness-closing'} = [ 0, 2 ];
- $option_range{'square-bracket-vertical-tightness'} = [ 0, 2 ];
- $option_range{'square-bracket-vertical-tightness-closing'} = [ 0, 2 ];
- $option_range{'vertical-tightness'} = [ 0, 2 ];
- $option_range{'vertical-tightness-closing'} = [ 0, 2 ];
-
- $option_range{'closing-brace-indentation'} = [ 0, 3 ];
- $option_range{'closing-paren-indentation'} = [ 0, 3 ];
- $option_range{'closing-square-bracket-indentation'} = [ 0, 3 ];
- $option_range{'closing-token-indentation'} = [ 0, 3 ];
-
- $option_range{'closing-side-comment-else-flag'} = [ 0, 2 ];
- $option_range{'comma-arrow-breakpoints'} = [ 0, 3 ];
-
-# Note: we could actually allow negative ci if someone really wants it:
-# $option_range{'continuation-indentation'} = [ undef, undef ];
+ %option_range = (
+ 'format' => [ 'tidy', 'html', 'user' ],
+ 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
+
+ 'block-brace-tightness' => [ 0, 2 ],
+ 'brace-tightness' => [ 0, 2 ],
+ 'paren-tightness' => [ 0, 2 ],
+ 'square-bracket-tightness' => [ 0, 2 ],
+
+ 'block-brace-vertical-tightness' => [ 0, 2 ],
+ 'brace-vertical-tightness' => [ 0, 2 ],
+ 'brace-vertical-tightness-closing' => [ 0, 2 ],
+ 'paren-vertical-tightness' => [ 0, 2 ],
+ 'paren-vertical-tightness-closing' => [ 0, 2 ],
+ 'square-bracket-vertical-tightness' => [ 0, 2 ],
+ 'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
+ 'vertical-tightness' => [ 0, 2 ],
+ 'vertical-tightness-closing' => [ 0, 2 ],
+
+ 'closing-brace-indentation' => [ 0, 3 ],
+ 'closing-paren-indentation' => [ 0, 3 ],
+ 'closing-square-bracket-indentation' => [ 0, 3 ],
+ 'closing-token-indentation' => [ 0, 3 ],
+
+ 'closing-side-comment-else-flag' => [ 0, 2 ],
+ 'comma-arrow-breakpoints' => [ 0, 3 ],
+ );
+
+ # Note: we could actually allow negative ci if someone really wants it:
+ # $option_range{'continuation-indentation'} = [ undef, undef ];
#---------------------------------------------------------------
# Assign default values to the above options here, except
noblanks-before-subs
nofuzzy-line-length
notabs
+ norecombine
)
],
# Style suggested in Damian Conway's Perl Best Practices
'perl-best-practices' => [
qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
-q(wbb=% + - * / x != == >= <= =~ !~ < > | & >= < = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=)
+q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
],
# Additional styles can be added here
# entab leading whitespace has priority over the older 'tabs' option
if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
}
-
- if ( $rOpts->{'output-line-ending'} ) {
- unless ( is_unix() ) {
- warn "ignoring -ole; only works under unix\n";
- $rOpts->{'output-line-ending'} = undef;
- }
- }
- if ( $rOpts->{'preserve-line-endings'} ) {
- unless ( is_unix() ) {
- warn "ignoring -ple; only works under unix\n";
- $rOpts->{'preserve-line-endings'} = undef;
- }
- }
-
}
sub expand_command_abbreviations {
}
sub is_unix {
- return ( $^O !~ /win32|dos/i )
+ return
+ ( $^O !~ /win32|dos/i )
&& ( $^O ne 'VMS' )
&& ( $^O ne 'OS2' )
&& ( $^O ne 'MacOS' );
# 9x/Me box. Contributed by: Yves Orton.
my $rpending_complaint = shift;
- my $os = (@_) ? shift: Win_OS_Type();
+ my $os = (@_) ? shift : Win_OS_Type();
return unless $os;
my $system = "";
print STDOUT "$$rconfig_file_chatter";
if ($fh) {
print STDOUT "# Dump of file: '$config_file'\n";
- while ( $_ = $fh->getline() ) { print STDOUT }
+ while ( my $line = $fh->getline() ) { print STDOUT $line }
eval { $fh->close() };
}
else {
my $name = undef;
my $line_no;
- while ( $_ = $fh->getline() ) {
+ while ( my $line = $fh->getline() ) {
$line_no++;
- chomp;
- next if /^\s*#/; # skip full-line comment
- ( $_, $death_message ) = strip_comment( $_, $config_file, $line_no );
+ chomp $line;
+ next if $line =~ /^\s*#/; # skip full-line comment
+ ( $line, $death_message ) =
+ strip_comment( $line, $config_file, $line_no );
last if ($death_message);
- s/^\s*(.*?)\s*$/$1/; # trim both ends
- next unless $_;
+ $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
+ next unless $line;
# look for something of the general form
# newname { body }
# or just
# body
- if ( $_ =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
+ if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
my ( $newname, $body, $curly ) = ( $2, $3, $4 );
# handle a new alias definition
foreach (@_) { print STDOUT "$_\n" }
}
-sub dump_options {
+sub readable_options {
- # write the options back out as a valid .perltidyrc file
+ # return options for this run as a string which could be
+ # put in a perltidyrc file
my ( $rOpts, $roption_string ) = @_;
my %Getopt_flags;
- my $rGetopt_flags = \%Getopt_flags;
+ my $rGetopt_flags = \%Getopt_flags;
+ my $readable_options = "# Final parameter set for this run.\n";
+ $readable_options .=
+ "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
foreach my $opt ( @{$roption_string} ) {
my $flag = "";
if ( $opt =~ /(.*)(!|=.*)$/ ) {
$rGetopt_flags->{$opt} = $flag;
}
}
- print STDOUT "# Final parameter set for this run:\n";
foreach my $key ( sort keys %{$rOpts} ) {
my $flag = $rGetopt_flags->{$key};
my $value = $rOpts->{$key};
else {
# shouldn't happen
- print
+ $readable_options .=
"# ERROR in dump_options: unrecognized flag $flag for $key\n";
}
}
- print STDOUT $prefix . $key . $suffix . "\n";
+ $readable_options .= $prefix . $key . $suffix . "\n";
}
+ return $readable_options;
}
sub show_version {
print <<"EOM";
This is perltidy, v$VERSION
-Copyright 2000-2006, Steve Hancock
+Copyright 2000-2007, Steve Hancock
Perltidy is free software and may be copied under the terms of the GNU
General Public License, which is included in the distribution files.
-wbb=s want break before tokens in string
Following Old Breakpoints
+ -kis keep interior semicolons. Allows multiple statements per line.
-boc break at old comma breaks: turns off all automatic list formatting
-bol break at old logical breakpoints: or, and, ||, && (default)
-bok break at old list keyword breakpoints such as map, sort (default)
-ibc indent block comments (default)
-isbc indent spaced block comments; may indent unless no leading space
-msc=n minimum desired spaces to side comment, default 4
+ -fpsc=n fix position for side comments; default 0;
-csc add or update closing side comments after closing BLOCK brace
-dcsc delete closing side comments created by a -csc command
-cscp=s change closing side comment prefix to be other than '## end'
EOM
}
my $i = $self->[2]++;
- ##my $line = $self->[0]->[$i];
return $self->[0]->[$i];
}
return $line;
}
-sub old_get_line {
- my $self = shift;
- my $line = undef;
- my $fh = $self->{_fh};
- my $fh_copy = $self->{_fh_copy};
- $line = $fh->getline();
- if ( $line && $fh_copy ) { $fh_copy->print($line); }
- return $line;
-}
-
#####################################################################
#
# the Perl::Tidy::LineSink class supplies a write_line method for
sub new {
my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
- $rpending_logfile_message )
+ $rpending_logfile_message, $binmode )
= @_;
my $fh = undef;
my $fh_copy = undef;
( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
unless ($fh) { die "Cannot write to output stream\n"; }
$output_file_open = 1;
+ if ($binmode) {
+ if ( ref($fh) eq 'IO::File' ) {
+ binmode $fh;
+ }
+ if ( $output_file eq '-' ) { binmode STDOUT }
+ }
}
# in order to check output syntax when standard output is used,
_tee_file => $tee_file,
_tee_file_opened => 0,
_line_separator => $line_separator,
+ _binmode => $binmode,
}, $class;
}
my $fh_tee;
$fh_tee = IO::File->new(">$tee_file")
or die("couldn't open TEE file $tee_file: $!\n");
+ binmode $fh_tee if $self->{_binmode};
$self->{_tee_file_opened} = 1;
$self->{_fh_tee} = $fh_tee;
}
if ( $self->get_use_prefix() > 0 ) {
my $input_line_number =
Perl::Tidy::Tokenizer::get_input_line_number();
- print $fh_warnings "$input_line_number:\t@_";
+ $fh_warnings->print("$input_line_number:\t@_");
$self->write_logfile_entry("WARNING: @_");
}
else {
- print $fh_warnings @_;
+ $fh_warnings->print(@_);
$self->write_logfile_entry(@_);
}
}
$self->{_warning_count} = $warning_count;
if ( $warning_count == WARNING_LIMIT ) {
- print $fh_warnings "No further warnings will be given";
+ $fh_warnings->print("No further warnings will be given\n");
}
}
}
my $warning_count = $self->{_warning_count};
my $saw_code_bug = $self->{_saw_code_bug};
- my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
+ my $save_logfile =
+ ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
|| $saw_code_bug == 1
|| $rOpts->{'logfile'};
my $log_file = $self->{_log_file};
if ($fh) {
my $routput_array = $self->{_output_array};
foreach ( @{$routput_array} ) { $fh->print($_) }
- eval { $fh->close() };
+ eval { $fh->close() };
}
}
}
$rOpts_format_skipping
$rOpts_space_function_paren
$rOpts_space_keyword_paren
+ $rOpts_keep_interior_semicolons
$half_maximum_line_length
@_ = qw(and or err);
@is_and_or{@_} = (1) x scalar(@_);
- # Identify certain operators which often occur in chains
- @_ = qw(&& || and or : ? .);
+ # Identify certain operators which often occur in chains.
+ # Note: the minus (-) causes a side effect of padding of the first line in
+ # something like this (by sub set_logical_padding):
+ # Checkbutton => 'Transmission checked',
+ # -variable => \$TRANS
+ # This usually improves appearance so it seems ok.
+ @_ = qw(&& || and or : ? . + - * /);
@is_chain_operator{@_} = (1) x scalar(@_);
# We can remove semicolons after blocks preceded by these keywords
- @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
+ @_ =
+ qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless while until for foreach);
@is_block_without_semicolon{@_} = (1) x scalar(@_);
sub _decrement_count { --$_count }
}
+sub trim {
+
+ # trim leading and trailing whitespace from a string
+ $_[0] =~ s/\s+$//;
+ $_[0] =~ s/^\s+//;
+ return $_[0];
+}
+
+sub split_words {
+
+ # given a string containing words separated by whitespace,
+ # return the list of words
+ my ($str) = @_;
+ return unless $str;
+ $str =~ s/\s+$//;
+ $str =~ s/^\s+//;
+ return split( /\s+/, $str );
+}
+
# interface to Perl::Tidy::Logger routines
sub warning {
if ($logger_object) {
my $line_type = $line_of_tokens->{_line_type};
my $input_line = $line_of_tokens->{_line_text};
- my $want_blank_line_next = 0;
-
# _line_type codes are:
# SYSTEM - system-specific code before hash-bang line
# CODE - line of perl code (including comments)
# END_START - __END__ line
# END - unidentified text following __END__
# ERROR - we are in big trouble, probably not a perl script
- #
+
+ # put a blank line after an =cut which comes before __END__ and __DATA__
+ # (required by podchecker)
+ if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
+ }
+
# handle line of code..
if ( $line_type eq 'CODE' ) {
# any other lines of type END or DATA.
if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
- if ( !$skip_line
+ if ( !$skip_line
&& $line_type eq 'POD_START'
&& $last_line_type !~ /^(END|DATA)$/ )
{
want_blank_line();
}
-
- # patch to put a blank line after =cut
- # (required by podchecker)
- if ( $line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
- $file_writer_object->reset_consecutive_blank_lines();
- $want_blank_line_next = 1;
- }
}
# leave the blank counters in a predictable state
if ( !$skip_line ) {
if ($tee_line) { $file_writer_object->tee_on() }
write_unindented_line($input_line);
- if ($tee_line) { $file_writer_object->tee_off() }
- if ($want_blank_line_next) { want_blank_line(); }
+ if ($tee_line) { $file_writer_object->tee_off() }
}
}
$last_line_type = $line_type;
# handle the standard indentation scheme
#-------------------------------------------
unless ($rOpts_line_up_parentheses) {
- my $space_count = $ci_level * $rOpts_continuation_indentation + $level *
- $rOpts_indent_columns;
+ my $space_count =
+ $ci_level * $rOpts_continuation_indentation +
+ $level * $rOpts_indent_columns;
my $ci_spaces =
( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
# implement outdenting preferences for keywords
%outdent_keyword = ();
-
- # load defaults
- @_ = qw(next last redo goto return);
-
- # override defaults if requested
- if ( $_ = $rOpts->{'outdent-keyword-list'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
+ @_ = qw(next last redo goto return); # defaults
}
# FUTURE: if not a keyword, assume that it is an identifier
}
# implement user whitespace preferences
- if ( $_ = $rOpts->{'want-left-space'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
@want_left_space{@_} = (1) x scalar(@_);
}
- if ( $_ = $rOpts->{'want-right-space'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
@want_right_space{@_} = (1) x scalar(@_);
}
- if ( $_ = $rOpts->{'nowant-left-space'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+
+ if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
@want_left_space{@_} = (-1) x scalar(@_);
}
- if ( $_ = $rOpts->{'nowant-right-space'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
@want_right_space{@_} = (-1) x scalar(@_);
}
if ( $rOpts->{'dump-want-left-space'} ) {
@space_after_keyword{@_} = (1) x scalar(@_);
# allow user to modify these defaults
- if ( $_ = $rOpts->{'space-after-keyword'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
@space_after_keyword{@_} = (1) x scalar(@_);
}
- if ( $_ = $rOpts->{'nospace-after-keyword'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
@space_after_keyword{@_} = (0) x scalar(@_);
}
# implement user break preferences
- if ( $_ = $rOpts->{'want-break-after'} ) {
- @_ = split /\s+/;
+ my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+ . : ? && || and or err xor
+ );
+
+ my $break_after = sub {
foreach my $tok (@_) {
if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
my $lbs = $left_bond_strength{$tok};
( $lbs, $rbs );
}
}
- }
+ };
- if ( $_ = $rOpts->{'want-break-before'} ) {
- s/^\s+//;
- s/\s+$//;
- @_ = split /\s+/;
+ my $break_before = sub {
foreach my $tok (@_) {
my $lbs = $left_bond_strength{$tok};
my $rbs = $right_bond_strength{$tok};
( $lbs, $rbs );
}
}
- }
+ };
+
+ $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
+ $break_before->(@all_operators)
+ if ( $rOpts->{'break-before-all-operators'} );
+
+ $break_after->( split_words( $rOpts->{'want-break-after'} ) );
+ $break_before->( split_words( $rOpts->{'want-break-before'} ) );
# make note if breaks are before certain key types
%want_break_before = ();
-
- foreach
- my $tok ( '.', ',', ':', '?', '&&', '||', 'and', 'or', 'err', 'xor' )
- {
+ foreach my $tok ( @all_operators, ',' ) {
$want_break_before{$tok} =
$left_bond_strength{$tok} < $right_bond_strength{$tok};
}
%is_else_brace_follower = ();
# what can follow a multi-line anonymous sub definition closing curly:
- @_ = qw# ; : => or and && || ~~ ) #;
+ @_ = qw# ; : => or and && || ~~ !~~ ) #;
push @_, ',';
@is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
# what can follow a one-line anonynomous sub closing curly:
# one-line anonumous subs also have ']' here...
# see tk3.t and PP.pm
- @_ = qw# ; : => or and && || ) ] ~~ #;
+ @_ = qw# ; : => or and && || ) ] ~~ !~~ #;
push @_, ',';
@is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
$rOpts->{'short-concatenation-item-length'};
$rOpts_swallow_optional_blank_lines =
$rOpts->{'swallow-optional-blank-lines'};
- $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
- $rOpts_format_skipping = $rOpts->{'format-skipping'};
- $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
- $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
- $half_maximum_line_length = $rOpts_maximum_line_length / 2;
+ $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
+ $rOpts_format_skipping = $rOpts->{'format-skipping'};
+ $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
+ $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
+ $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
+ $half_maximum_line_length = $rOpts_maximum_line_length / 2;
# Note that both opening and closing tokens can access the opening
# and closing flags of their container types.
# pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
my ( $abbrev, $string ) = @_;
- $string =~ s/^\s+//;
- $string =~ s/\s+$//;
- my @list = split /\s+/, $string;
+ my @list = split_words($string);
my @words = ();
my %seen;
for my $i (@list) {
# retain any space after here doc operator ( hereerr.t)
|| ( $typel eq 'h' )
- # FIXME: this needs some further work; extrude.t has test cases
- # it is safest to retain any space after start of ? : operator
- # because of perl's quirky parser.
- # ie, this line will fail if you remove the space after the '?':
- # $b=join $comma ? ',' : ':', @_; # ok
- # $b=join $comma ?',' : ':', @_; # error!
- # but this is ok :)
- # $b=join $comma?',' : ':', @_; # not a problem!
- ## || ($typel eq '?')
-
# be careful with a space around ++ and --, to avoid ambiguity as to
# which token it applies
|| ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
$tokenl eq 'my'
# /^(for|foreach)$/
- && $is_for_foreach{$tokenll} && $tokenr =~ /^\$/
+ && $is_for_foreach{$tokenll}
+ && $tokenr =~ /^\$/
)
# must have space between grep and left paren; "grep(" will fail
#use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
|| ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
+ # We must be sure that a space between a ? and a quoted string
+ # remains if the space before the ? remains. [Loca.pm, lockarea]
+ # ie,
+ # $b=join $comma ? ',' : ':', @_; # ok
+ # $b=join $comma?',' : ':', @_; # ok!
+ # $b=join $comma ?',' : ':', @_; # error!
+ # Not really required:
+ ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
+
+ # do not remove space between an '&' and a bare word because
+ # it may turn into a function evaluation, like here
+ # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
+ # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
+ || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
+
; # the value of this long logic sequence is the result we want
return $result;
}
my @spaces_both_sides = qw"
+ - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
- .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~
+ .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
&&= ||= //= <=> A k f w F n C Y U G v
";
$binary_ws_rules{'R'}{'++'} = WS_NO;
$binary_ws_rules{'R'}{'--'} = WS_NO;
- $binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label
- $binary_ws_rules{'w'}{':'} = WS_NO;
+ ########################################################
+ # should no longer be necessary (see niek.pl)
+ ##$binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label
+ ##$binary_ws_rules{'w'}{':'} = WS_NO;
+ ########################################################
$binary_ws_rules{'i'}{'Q'} = WS_YES;
$binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
my $j_here = $j;
++$j_here
if ( $token eq '-'
- && $last_token eq '{'
+ && $last_token eq '{'
&& $$rtoken_type[ $j + 1 ] eq 'w' );
# $j_next is where a closing token should be if
$ci_levels_to_go[$max_index_to_go] = $ci_level;
$mate_index_to_go[$max_index_to_go] = -1;
$matching_token_to_go[$max_index_to_go] = '';
+ $bond_strength_to_go[$max_index_to_go] = 0;
# Note: negative levels are currently retained as a diagnostic so that
# the 'final indentation level' is correctly reported for bad scripts.
substr( $input_line, 0, 1 ) eq '#';
}
+ # Check for comments which are line directives
+ # Treat exactly as static block comments without leading space
+ # reference: perlsyn, near end, section Plain Old Comments (Not!)
+ # example: '# line 42 "new_filename.plx"'
+ if (
+ $jmax == 0
+ && $$rtoken_type[0] eq '#'
+ && $input_line =~ /^\# \s*
+ line \s+ (\d+) \s*
+ (?:\s("?)([^"]+)\2)? \s*
+ $/x
+ )
+ {
+ $is_static_block_comment = 1;
+ $is_static_block_comment_without_leading_space = 1;
+ }
+
# create a hanging side comment if appropriate
if (
$jmax == 0
if (
$rOpts->{'indent-block-comments'}
- && ( !$rOpts->{'indent-spaced-block-comments'}
+ && ( !$rOpts->{'indent-spaced-block-comments'}
|| $input_line =~ /^\s+/ )
&& !$is_static_block_comment_without_leading_space
)
# /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
# Examples:
# *VERSION = \'1.01';
- # ( $VERSION ) = '$Revision: 1.56 $ ' =~ /\$Revision:\s+([^\s]+)/;
+ # ( $VERSION ) = '$Revision: 1.73 $ ' =~ /\$Revision:\s+([^\s]+)/;
# We will pass such a line straight through without breaking
# it unless -npvl is used
my $is_VERSION_statement = 0;
if (
- !$saw_VERSION_in_this_file
+ !$saw_VERSION_in_this_file
&& $input_line =~ /VERSION/ # quick check to reject most lines
&& $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
)
# qw lines will still go out at the end of this routine.
if ( $rOpts->{'indent-only'} ) {
flush();
- $input_line =~ s/^\s*//; # trim left end
- $input_line =~ s/\s*$//; # trim right end
+ trim($input_line);
extract_token(0);
$token = $input_line;
# make note of something like '$var = s/xxx/yyy/;'
# in case it should have been '$var =~ s/xxx/yyy/;'
if (
- $token =~ /^(s|tr|y|m|\/)/
+ $token =~ /^(s|tr|y|m|\/)/
&& $last_nonblank_token =~ /^(=|==|!=)$/
# precededed by simple scalar
#
# But make a line break if the curly ends a
# significant block:
- ##if ( $is_until_while_for_if_elsif_else{$block_type} ) {
if (
$is_block_without_semicolon{$block_type}
output_line_to_go()
unless ( $no_internal_newlines
+ || ( $rOpts_keep_interior_semicolons && $j < $jmax )
|| ( $next_nonblank_token eq '}' ) );
}
if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
$old_breakpoint_to_go[$max_index_to_go] = 1;
}
- }
+ } # end sub print_line_of_tokens
} # end print_line_of_tokens
-sub note_added_semicolon {
- $last_added_semicolon_at = $input_line_number;
- if ( $added_semicolon_count == 0 ) {
- $first_added_semicolon_at = $last_added_semicolon_at;
- }
- $added_semicolon_count++;
- write_logfile_entry("Added ';' here\n");
-}
+# sub output_line_to_go sends one logical line of tokens on down the
+# pipeline to the VerticalAligner package, breaking the line into continuation
+# lines as necessary. The line of tokens is ready to go in the "to_go"
+# arrays.
+sub output_line_to_go {
-sub note_deleted_semicolon {
- $last_deleted_semicolon_at = $input_line_number;
- if ( $deleted_semicolon_count == 0 ) {
- $first_deleted_semicolon_at = $last_deleted_semicolon_at;
- }
- $deleted_semicolon_count++;
- write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
-}
+ # debug stuff; this routine can be called from many points
+ FORMATTER_DEBUG_FLAG_OUTPUT && do {
+ my ( $a, $b, $c ) = caller;
+ write_diagnostics(
+"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
+ );
+ my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
+ write_diagnostics("$output_str\n");
+ };
-sub note_embedded_tab {
- $embedded_tab_count++;
- $last_embedded_tab_at = $input_line_number;
- if ( !$first_embedded_tab_at ) {
- $first_embedded_tab_at = $last_embedded_tab_at;
+ # just set a tentative breakpoint if we might be in a one-line block
+ if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ set_forced_breakpoint($max_index_to_go);
+ return;
}
- if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
- write_logfile_entry("Embedded tabs in quote or pattern\n");
- }
-}
+ my $cscw_block_comment;
+ $cscw_block_comment = add_closing_side_comment()
+ if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
-sub starting_one_line_block {
+ match_opening_and_closing_tokens();
- # after seeing an opening curly brace, look for the closing brace
- # and see if the entire block will fit on a line. This routine is
- # not always right because it uses the old whitespace, so a check
- # is made later (at the closing brace) to make sure we really
- # have a one-line block. We have to do this preliminary check,
- # though, because otherwise we would always break at a semicolon
- # within a one-line block if the block contains multiple statements.
+ # tell the -lp option we are outputting a batch so it can close
+ # any unfinished items in its stack
+ finish_lp_batch();
- my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
- $rblock_type )
- = @_;
+ # If this line ends in a code block brace, set breaks at any
+ # previous closing code block braces to breakup a chain of code
+ # blocks on one line. This is very rare but can happen for
+ # user-defined subs. For example we might be looking at this:
+ # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
+ my $saw_good_break = 0; # flag to force breaks even if short line
+ if (
- # kill any current block - we can only go 1 deep
- destroy_one_line_block();
+ # looking for opening or closing block brace
+ $block_type_to_go[$max_index_to_go]
- # return value:
- # 1=distance from start of block to opening brace exceeds line length
- # 0=otherwise
+ # but not one of these which are never duplicated on a line:
+ # until|while|for|if|elsif|else
+ && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
+ )
+ {
+ my $lev = $nesting_depth_to_go[$max_index_to_go];
- my $i_start = 0;
+ # Walk backwards from the end and
+ # set break at any closing block braces at the same level.
+ # But quit if we are not in a chain of blocks.
+ for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
+ last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
+ next if ( $levels_to_go[$i] > $lev ); # skip past higher level
- # shouldn't happen: there must have been a prior call to
- # store_token_to_go to put the opening brace in the output stream
- if ( $max_index_to_go < 0 ) {
- warning("program bug: store_token_to_go called incorrectly\n");
- report_definite_bug();
- }
- else {
+ if ( $block_type_to_go[$i] ) {
+ if ( $tokens_to_go[$i] eq '}' ) {
+ set_forced_breakpoint($i);
+ $saw_good_break = 1;
+ }
+ }
- # cannot use one-line blocks with cuddled else else/elsif lines
- if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
- return 0;
+ # quit if we see anything besides words, function, blanks
+ # at this level
+ elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
}
}
- my $block_type = $$rblock_type[$j];
-
- # find the starting keyword for this block (such as 'if', 'else', ...)
+ my $imin = 0;
+ my $imax = $max_index_to_go;
- if ( $block_type =~ /^[\{\}\;\:]$/ ) {
- $i_start = $max_index_to_go;
+ # trim any blank tokens
+ if ( $max_index_to_go >= 0 ) {
+ if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
+ if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
}
- elsif ( $last_last_nonblank_token_to_go eq ')' ) {
-
- # For something like "if (xxx) {", the keyword "if" will be
- # just after the most recent break. This will be 0 unless
- # we have just killed a one-line block and are starting another.
- # (doif.t)
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
- }
+ # anything left to write?
+ if ( $imin <= $imax ) {
- unless ( $tokens_to_go[$i_start] eq $block_type ) {
- return 0;
- }
- }
+ # add a blank line before certain key types
+ if ( $last_line_leading_type !~ /^[#b]/ ) {
+ my $want_blank = 0;
+ my $leading_token = $tokens_to_go[$imin];
+ my $leading_type = $types_to_go[$imin];
- # the previous nonblank token should start these block types
- elsif (
- ( $last_last_nonblank_token_to_go eq $block_type )
- || ( $block_type =~ /^sub/
- && $last_last_nonblank_token_to_go =~ /^sub/ )
- )
- {
- $i_start = $last_last_nonblank_index_to_go;
- }
+ # blank lines before subs except declarations and one-liners
+ # MCONVERSION LOCATION - for sub tokenization change
+ if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
+ $want_blank = ( $rOpts->{'blanks-before-subs'} )
+ && (
+ terminal_type( \@types_to_go, \@block_type_to_go, $imin,
+ $imax ) !~ /^[\;\}]$/
+ );
+ }
- # patch for SWITCH/CASE to retain one-line case/when blocks
- elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
- }
- unless ( $tokens_to_go[$i_start] eq $block_type ) {
- return 0;
- }
- }
+ # break before all package declarations
+ # MCONVERSION LOCATION - for tokenizaton change
+ elsif ($leading_token =~ /^(package\s)/
+ && $leading_type eq 'i' )
+ {
+ $want_blank = ( $rOpts->{'blanks-before-subs'} );
+ }
- else {
- return 1;
- }
+ # break before certain key blocks except one-liners
+ if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
+ $want_blank = ( $rOpts->{'blanks-before-subs'} )
+ && (
+ terminal_type( \@types_to_go, \@block_type_to_go, $imin,
+ $imax ) ne '}'
+ );
+ }
+
+ # Break before certain block types if we haven't had a
+ # break at this level for a while. This is the
+ # difficult decision..
+ elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
+ && $leading_type eq 'k' )
+ {
+ my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
+ if ( !defined($lc) ) { $lc = 0 }
+
+ $want_blank =
+ $rOpts->{'blanks-before-blocks'}
+ && $lc >= $rOpts->{'long-block-line-count'}
+ && $file_writer_object->get_consecutive_nonblank_lines() >=
+ $rOpts->{'long-block-line-count'}
+ && (
+ terminal_type( \@types_to_go, \@block_type_to_go, $imin,
+ $imax ) ne '}'
+ );
+ }
+
+ if ($want_blank) {
+
+ # future: send blank line down normal path to VerticalAligner
+ Perl::Tidy::VerticalAligner::flush();
+ $file_writer_object->write_blank_code_line();
+ }
+ }
+
+ # update blank line variables and count number of consecutive
+ # non-blank, non-comment lines at this level
+ $last_last_line_leading_level = $last_line_leading_level;
+ $last_line_leading_level = $levels_to_go[$imin];
+ if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
+ $last_line_leading_type = $types_to_go[$imin];
+ if ( $last_line_leading_level == $last_last_line_leading_level
+ && $last_line_leading_type ne 'b'
+ && $last_line_leading_type ne '#'
+ && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
+ {
+ $nonblank_lines_at_depth[$last_line_leading_level]++;
+ }
+ else {
+ $nonblank_lines_at_depth[$last_line_leading_level] = 1;
+ }
+
+ FORMATTER_DEBUG_FLAG_FLUSH && do {
+ my ( $package, $file, $line ) = caller;
+ print
+"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
+ };
+
+ # add a couple of extra terminal blank tokens
+ pad_array_to_go();
+
+ # set all forced breakpoints for good list formatting
+ my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
+
+ if (
+ $max_index_to_go > 0
+ && (
+ $is_long_line
+ || $old_line_count_in_batch > 1
+ || is_unbalanced_batch()
+ || (
+ $comma_count_in_batch
+ && ( $rOpts_maximum_fields_per_table > 0
+ || $rOpts_comma_arrow_breakpoints == 0 )
+ )
+ )
+ )
+ {
+ $saw_good_break ||= scan_list();
+ }
+
+ # let $ri_first and $ri_last be references to lists of
+ # first and last tokens of line fragments to output..
+ my ( $ri_first, $ri_last );
+
+ # write a single line if..
+ if (
+
+ # we aren't allowed to add any newlines
+ !$rOpts_add_newlines
+
+ # or, we don't already have an interior breakpoint
+ # and we didn't see a good breakpoint
+ || (
+ !$forced_breakpoint_count
+ && !$saw_good_break
+
+ # and this line is 'short'
+ && !$is_long_line
+ )
+ )
+ {
+ @$ri_first = ($imin);
+ @$ri_last = ($imax);
+ }
+
+ # otherwise use multiple lines
+ else {
+
+ ( $ri_first, $ri_last, my $colon_count ) =
+ set_continuation_breaks($saw_good_break);
+
+ break_all_chain_tokens( $ri_first, $ri_last );
+
+ break_equals( $ri_first, $ri_last );
+
+ # now we do a correction step to clean this up a bit
+ # (The only time we would not do this is for debugging)
+ if ( $rOpts->{'recombine'} ) {
+ ( $ri_first, $ri_last ) =
+ recombine_breakpoints( $ri_first, $ri_last );
+ }
+
+ insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
+ }
+
+ # do corrector step if -lp option is used
+ my $do_not_pad = 0;
+ if ($rOpts_line_up_parentheses) {
+ $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
+ }
+ send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
+ }
+ prepare_for_new_input_lines();
+
+ # output any new -cscw block comment
+ if ($cscw_block_comment) {
+ flush();
+ $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
+ }
+}
+
+sub note_added_semicolon {
+ $last_added_semicolon_at = $input_line_number;
+ if ( $added_semicolon_count == 0 ) {
+ $first_added_semicolon_at = $last_added_semicolon_at;
+ }
+ $added_semicolon_count++;
+ write_logfile_entry("Added ';' here\n");
+}
+
+sub note_deleted_semicolon {
+ $last_deleted_semicolon_at = $input_line_number;
+ if ( $deleted_semicolon_count == 0 ) {
+ $first_deleted_semicolon_at = $last_deleted_semicolon_at;
+ }
+ $deleted_semicolon_count++;
+ write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
+}
+
+sub note_embedded_tab {
+ $embedded_tab_count++;
+ $last_embedded_tab_at = $input_line_number;
+ if ( !$first_embedded_tab_at ) {
+ $first_embedded_tab_at = $last_embedded_tab_at;
+ }
+
+ if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry("Embedded tabs in quote or pattern\n");
+ }
+}
+
+sub starting_one_line_block {
+
+ # after seeing an opening curly brace, look for the closing brace
+ # and see if the entire block will fit on a line. This routine is
+ # not always right because it uses the old whitespace, so a check
+ # is made later (at the closing brace) to make sure we really
+ # have a one-line block. We have to do this preliminary check,
+ # though, because otherwise we would always break at a semicolon
+ # within a one-line block if the block contains multiple statements.
+
+ my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
+ $rblock_type )
+ = @_;
+
+ # kill any current block - we can only go 1 deep
+ destroy_one_line_block();
+
+ # return value:
+ # 1=distance from start of block to opening brace exceeds line length
+ # 0=otherwise
+
+ my $i_start = 0;
+
+ # shouldn't happen: there must have been a prior call to
+ # store_token_to_go to put the opening brace in the output stream
+ if ( $max_index_to_go < 0 ) {
+ warning("program bug: store_token_to_go called incorrectly\n");
+ report_definite_bug();
+ }
+ else {
+
+ # cannot use one-line blocks with cuddled else else/elsif lines
+ if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
+ return 0;
+ }
+ }
+
+ my $block_type = $$rblock_type[$j];
+
+ # find the starting keyword for this block (such as 'if', 'else', ...)
+
+ if ( $block_type =~ /^[\{\}\;\:]$/ ) {
+ $i_start = $max_index_to_go;
+ }
+
+ elsif ( $last_last_nonblank_token_to_go eq ')' ) {
+
+ # For something like "if (xxx) {", the keyword "if" will be
+ # just after the most recent break. This will be 0 unless
+ # we have just killed a one-line block and are starting another.
+ # (doif.t)
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) {
+ $i_start++;
+ }
+
+ unless ( $tokens_to_go[$i_start] eq $block_type ) {
+ return 0;
+ }
+ }
+
+ # the previous nonblank token should start these block types
+ elsif (
+ ( $last_last_nonblank_token_to_go eq $block_type )
+ || ( $block_type =~ /^sub/
+ && $last_last_nonblank_token_to_go =~ /^sub/ )
+ )
+ {
+ $i_start = $last_last_nonblank_index_to_go;
+ }
+
+ # patch for SWITCH/CASE to retain one-line case/when blocks
+ elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) {
+ $i_start++;
+ }
+ unless ( $tokens_to_go[$i_start] eq $block_type ) {
+ return 0;
+ }
+ }
+
+ else {
+ return 1;
+ }
my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
my $max_line = @$ri_first - 1;
my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
- $tok_next, $has_leading_op_next, $has_leading_op );
+ $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
# looking at each line of this batch..
foreach $line ( 0 .. $max_line - 1 ) {
# see if the next line begins with a logical operator
- $ibeg = $$ri_first[$line];
- $iend = $$ri_last[$line];
- $ibeg_next = $$ri_first[ $line + 1 ];
- $tok_next = $tokens_to_go[$ibeg_next];
- $has_leading_op_next = $is_chain_operator{$tok_next};
+ $ibeg = $$ri_first[$line];
+ $iend = $$ri_last[$line];
+ $ibeg_next = $$ri_first[ $line + 1 ];
+ $tok_next = $tokens_to_go[$ibeg_next];
+ $type_next = $types_to_go[$ibeg_next];
+
+ $has_leading_op_next = ( $tok_next =~ /^\w/ )
+ ? $is_chain_operator{$tok_next} # + - * / : ? && ||
+ : $is_chain_operator{$type_next}; # and, or
+
next unless ($has_leading_op_next);
# next line must not be at lesser depth
# if this is not first line of the batch ...
if ( $line > 0 ) {
- # and we have leading operator
+ # and we have leading operator..
next if $has_leading_op;
- # and ..
+ # Introduce padding if..
# 1. the previous line is at lesser depth, or
# 2. the previous line ends in an assignment
- #
+ # 3. the previous line ends in a 'return'
+ # 4. the previous line ends in a comma
# Example 1: previous line at lesser depth
# if ( ( $Year < 1601 ) # <- we are here but
# || ( $Year > 2899 ) # list has not yet
# : $year % 100 ? 1
# : $year % 400 ? 0
# : 1;
+ #
+ # Example 3: previous line ending in comma:
+ # push @expr,
+ # /test/ ? undef
+ # : eval($_) ? 1
+ # : eval($_) ? 1
+ # : 0;
+
+ # be sure levels agree (do not indent after an indented 'if')
+ next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
+
+ # allow padding on first line after a comma but only if:
+ # (1) this is line 2 and
+ # (2) there are at more than three lines and
+ # (3) lines 3 and 4 have the same leading operator
+ # These rules try to prevent padding within a long
+ # comma-separated list.
+ my $ok_comma;
+ if ( $types_to_go[$iendm] eq ','
+ && $line == 1
+ && $max_line > 2 )
+ {
+ my $ibeg_next_next = $$ri_first[ $line + 2 ];
+ my $tok_next_next = $tokens_to_go[$ibeg_next_next];
+ $ok_comma = $tok_next_next eq $tok_next;
+ }
+
next
unless (
- $is_assignment{ $types_to_go[$iendm] }
+ $is_assignment{ $types_to_go[$iendm] }
+ || $ok_comma
|| ( $nesting_depth_to_go[$ibegm] <
$nesting_depth_to_go[$ibeg] )
+ || ( $types_to_go[$iendm] eq 'k'
+ && $tokens_to_go[$iendm] eq 'return' )
);
# we will add padding before the first token
# We can pad on line 1 of a statement if at least 3
# lines will be aligned. Otherwise, it
# can look very confusing.
- if ( $max_line > 2 ) {
+
+ # We have to be careful not to pad if there are too few
+ # lines. The current rule is:
+ # (1) in general we require at least 3 consecutive lines
+ # with the same leading chain operator token,
+ # (2) but an exception is that we only require two lines
+ # with leading colons if there are no more lines. For example,
+ # the first $i in the following snippet would get padding
+ # by the second rule:
+ #
+ # $i == 1 ? ( "First", "Color" )
+ # : $i == 2 ? ( "Then", "Rarity" )
+ # : ( "Then", "Name" );
+
+ if ( $max_line > 1 ) {
my $leading_token = $tokens_to_go[$ibeg_next];
+ my $tokens_differ;
# never indent line 1 of a '.' series because
# previous line is most likely at same level.
my $count = 1;
foreach my $l ( 2 .. 3 ) {
+ last if ( $line + $l > $max_line );
my $ibeg_next_next = $$ri_first[ $line + $l ];
- next
- unless $tokens_to_go[$ibeg_next_next] eq
- $leading_token;
+ if ( $tokens_to_go[$ibeg_next_next] ne
+ $leading_token )
+ {
+ $tokens_differ = 1;
+ last;
+ }
$count++;
}
- next unless $count == 3;
+ next if ($tokens_differ);
+ next if ( $count < 3 && $leading_token ne ':' );
$ipad = $ibeg;
}
else {
if ( $types_to_go[$inext_next] eq 'b' ) {
$inext_next++;
}
- my $type = $types_to_go[$ipad];
+ my $type = $types_to_go[$ipad];
+ my $type_next = $types_to_go[ $ipad + 1 ];
# see if there are multiple continuation lines
my $logical_continuation_lines = 1;
$logical_continuation_lines++;
}
}
+
+ # see if leading types match
+ my $types_match = $types_to_go[$inext_next] eq $type;
+ my $matches_without_bang;
+
+ # if first line has leading ! then compare the following token
+ if ( !$types_match && $type eq '!' ) {
+ $types_match = $matches_without_bang =
+ $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
+ }
+
if (
# either we have multiple continuation lines to follow
|| (
# types must match
- $types_to_go[$inext_next] eq $type
+ $types_match
# and keywords must match if keyword
&& !(
)
{
- #----------------------begin special check---------------
+ #----------------------begin special checks--------------
#
- # One more check is needed before we can make the pad.
+ # SPECIAL CHECK 1:
+ # A check is needed before we can make the pad.
# If we are in a list with some long items, we want each
# item to stand out. So in the following example, the
# first line begining with '$casefold->' would look good
);
}
}
+
+ # SPECIAL CHECK 2:
+ # a minus may introduce a quoted variable, and we will
+ # add the pad only if this line begins with a bare word,
+ # such as for the word 'Button' here:
+ # [
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ #
+ # On the other hand, if 'Button' is quoted, it looks best
+ # not to pad:
+ # [
+ # 'Button' => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ if ( $types_to_go[$ibeg_next] eq 'm' ) {
+ $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
+ }
+
next unless $ok_to_pad;
#----------------------end special check---------------
my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
$pad_spaces = $length_2 - $length_1;
+ # If the first line has a leading ! and the second does
+ # not, then remove one space to try to align the next
+ # leading characters, which are often the same. For example:
+ # if ( !$ts
+ # || $ts == $self->Holder
+ # || $self->Holder->Type eq "Arena" )
+ #
+ # This usually helps readability, but if there are subsequent
+ # ! operators things will still get messed up. For example:
+ #
+ # if ( !exists $Net::DNS::typesbyname{$qtype}
+ # && exists $Net::DNS::classesbyname{$qtype}
+ # && !exists $Net::DNS::classesbyname{$qclass}
+ # && exists $Net::DNS::typesbyname{$qclass} )
+ # We can't fix that.
+ if ($matches_without_bang) { $pad_spaces-- }
+
# make sure this won't change if -lp is used
my $indentation_1 = $leading_spaces_to_go[$ibeg];
if ( ref($indentation_1) ) {
# we might be able to handle a pad of -1 by removing a blank
# token
if ( $pad_spaces < 0 ) {
+
if ( $pad_spaces == -1 ) {
if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
$tokens_to_go[ $ipad - 1 ] = '';
# now apply any padding for alignment
if ( $ipad >= 0 && $pad_spaces ) {
+
my $length_t = total_line_length( $ibeg, $iend );
if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
$tokens_to_go[$ipad] =
# enough.
my $indentation_count = keys %saw_indentation;
my $is_vertically_aligned =
- ( $i == $ibeg
- && $first_line_comma_count > 1
- && $indentation_count == 1
- && ( $arrow_count == 0 || $arrow_count == $line_count ) );
-
- # Make the move if possible ..
- if (
-
- # we can always move left
- $move_right < 0
-
- # but we should only move right if we are sure it will
- # not spoil vertical alignment
- || ( $comma_count == 0 )
- || ( $comma_count > 0 && !$is_vertically_aligned )
- )
- {
- my $move =
- ( $move_right <= $right_margin )
- ? $move_right
- : $right_margin;
-
- foreach ( keys %saw_indentation ) {
- $saw_indentation{$_}
- ->permanently_decrease_AVAILABLE_SPACES( -$move );
- }
- }
-
- # Otherwise, record what we want and the vertical aligner
- # will try to recover it.
- else {
- $indentation->set_RECOVERABLE_SPACES($move_right);
- }
- }
- }
- }
- return $do_not_pad;
-}
-
-# flush is called to output any tokens in the pipeline, so that
-# an alternate source of lines can be written in the correct order
-
-sub flush {
- destroy_one_line_block();
- output_line_to_go();
- Perl::Tidy::VerticalAligner::flush();
-}
-
-# sub output_line_to_go sends one logical line of tokens on down the
-# pipeline to the VerticalAligner package, breaking the line into continuation
-# lines as necessary. The line of tokens is ready to go in the "to_go"
-# arrays.
-sub output_line_to_go {
-
- # debug stuff; this routine can be called from many points
- FORMATTER_DEBUG_FLAG_OUTPUT && do {
- my ( $a, $b, $c ) = caller;
- write_diagnostics(
-"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
- );
- my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
- write_diagnostics("$output_str\n");
- };
-
- # just set a tentative breakpoint if we might be in a one-line block
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
- set_forced_breakpoint($max_index_to_go);
- return;
- }
-
- my $cscw_block_comment;
- $cscw_block_comment = add_closing_side_comment()
- if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
-
- match_opening_and_closing_tokens();
-
- # tell the -lp option we are outputting a batch so it can close
- # any unfinished items in its stack
- finish_lp_batch();
-
- # If this line ends in a code block brace, set breaks at any
- # previous closing code block braces to breakup a chain of code
- # blocks on one line. This is very rare but can happen for
- # user-defined subs. For example we might be looking at this:
- # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
- my $saw_good_break = 0; # flag to force breaks even if short line
- if (
-
- # looking for opening or closing block brace
- $block_type_to_go[$max_index_to_go]
-
- # but not one of these which are never duplicated on a line:
- ##&& !$is_until_while_for_if_elsif_else{ $block_type_to_go
- ## [$max_index_to_go] }
- && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
- )
- {
- my $lev = $nesting_depth_to_go[$max_index_to_go];
-
- # Walk backwards from the end and
- # set break at any closing block braces at the same level.
- # But quit if we are not in a chain of blocks.
- for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
- last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
- next if ( $levels_to_go[$i] > $lev ); # skip past higher level
-
- if ( $block_type_to_go[$i] ) {
- if ( $tokens_to_go[$i] eq '}' ) {
- set_forced_breakpoint($i);
- $saw_good_break = 1;
- }
- }
-
- # quit if we see anything besides words, function, blanks
- # at this level
- elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
- }
- }
-
- my $imin = 0;
- my $imax = $max_index_to_go;
-
- # trim any blank tokens
- if ( $max_index_to_go >= 0 ) {
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- }
-
- # anything left to write?
- if ( $imin <= $imax ) {
-
- # add a blank line before certain key types
- if ( $last_line_leading_type !~ /^[#b]/ ) {
- my $want_blank = 0;
- my $leading_token = $tokens_to_go[$imin];
- my $leading_type = $types_to_go[$imin];
-
- # blank lines before subs except declarations and one-liners
- # MCONVERSION LOCATION - for sub tokenization change
- if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
- $want_blank = ( $rOpts->{'blanks-before-subs'} )
- && (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) !~ /^[\;\}]$/
- );
- }
-
- # break before all package declarations
- # MCONVERSION LOCATION - for tokenizaton change
- elsif ($leading_token =~ /^(package\s)/
- && $leading_type eq 'i' )
- {
- $want_blank = ( $rOpts->{'blanks-before-subs'} );
- }
-
- # break before certain key blocks except one-liners
- if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
- $want_blank = ( $rOpts->{'blanks-before-subs'} )
- && (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) ne '}'
- );
- }
-
- # Break before certain block types if we haven't had a
- # break at this level for a while. This is the
- # difficult decision..
- elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
- && $leading_type eq 'k' )
- {
- my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
- if ( !defined($lc) ) { $lc = 0 }
-
- $want_blank = $rOpts->{'blanks-before-blocks'}
- && $lc >= $rOpts->{'long-block-line-count'}
- && $file_writer_object->get_consecutive_nonblank_lines() >=
- $rOpts->{'long-block-line-count'}
- && (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) ne '}'
- );
- }
-
- if ($want_blank) {
-
- # future: send blank line down normal path to VerticalAligner
- Perl::Tidy::VerticalAligner::flush();
- $file_writer_object->write_blank_code_line();
- }
- }
-
- # update blank line variables and count number of consecutive
- # non-blank, non-comment lines at this level
- $last_last_line_leading_level = $last_line_leading_level;
- $last_line_leading_level = $levels_to_go[$imin];
- if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
- $last_line_leading_type = $types_to_go[$imin];
- if ( $last_line_leading_level == $last_last_line_leading_level
- && $last_line_leading_type ne 'b'
- && $last_line_leading_type ne '#'
- && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
- {
- $nonblank_lines_at_depth[$last_line_leading_level]++;
- }
- else {
- $nonblank_lines_at_depth[$last_line_leading_level] = 1;
- }
-
- FORMATTER_DEBUG_FLAG_FLUSH && do {
- my ( $package, $file, $line ) = caller;
- print
-"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
- };
-
- # add a couple of extra terminal blank tokens
- pad_array_to_go();
-
- # set all forced breakpoints for good list formatting
- my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
-
- if (
- $max_index_to_go > 0
- && (
- $is_long_line
- || $old_line_count_in_batch > 1
- || is_unbalanced_batch()
- || (
- $comma_count_in_batch
- && ( $rOpts_maximum_fields_per_table > 0
- || $rOpts_comma_arrow_breakpoints == 0 )
- )
- )
- )
- {
- $saw_good_break ||= scan_list();
- }
-
- # let $ri_first and $ri_last be references to lists of
- # first and last tokens of line fragments to output..
- my ( $ri_first, $ri_last );
-
- # write a single line if..
- if (
-
- # we aren't allowed to add any newlines
- !$rOpts_add_newlines
+ ( $i == $ibeg
+ && $first_line_comma_count > 1
+ && $indentation_count == 1
+ && ( $arrow_count == 0 || $arrow_count == $line_count ) );
- # or, we don't already have an interior breakpoint
- # and we didn't see a good breakpoint
- || (
- !$forced_breakpoint_count
- && !$saw_good_break
+ # Make the move if possible ..
+ if (
- # and this line is 'short'
- && !$is_long_line
- )
- )
- {
- @$ri_first = ($imin);
- @$ri_last = ($imax);
- }
+ # we can always move left
+ $move_right < 0
- # otherwise use multiple lines
- else {
+ # but we should only move right if we are sure it will
+ # not spoil vertical alignment
+ || ( $comma_count == 0 )
+ || ( $comma_count > 0 && !$is_vertically_aligned )
+ )
+ {
+ my $move =
+ ( $move_right <= $right_margin )
+ ? $move_right
+ : $right_margin;
- ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break);
+ foreach ( keys %saw_indentation ) {
+ $saw_indentation{$_}
+ ->permanently_decrease_AVAILABLE_SPACES( -$move );
+ }
+ }
- # now we do a correction step to clean this up a bit
- # (The only time we would not do this is for debugging)
- if ( $rOpts->{'recombine'} ) {
- ( $ri_first, $ri_last ) =
- recombine_breakpoints( $ri_first, $ri_last );
+ # Otherwise, record what we want and the vertical aligner
+ # will try to recover it.
+ else {
+ $indentation->set_RECOVERABLE_SPACES($move_right);
+ }
}
}
-
- # do corrector step if -lp option is used
- my $do_not_pad = 0;
- if ($rOpts_line_up_parentheses) {
- $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
- }
- send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
}
- prepare_for_new_input_lines();
+ return $do_not_pad;
+}
- # output any new -cscw block comment
- if ($cscw_block_comment) {
- flush();
- $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
- }
+# flush is called to output any tokens in the pipeline, so that
+# an alternate source of lines can be written in the correct order
+
+sub flush {
+ destroy_one_line_block();
+ output_line_to_go();
+ Perl::Tidy::VerticalAligner::flush();
}
sub reset_block_text_accumulator {
{
my $output_line_number =
$vertical_aligner_object->get_output_line_number();
- $block_line_count = $output_line_number -
+ $block_line_count =
+ $output_line_number -
$block_opening_line_number{$type_sequence} + 1;
delete $block_opening_line_number{$type_sequence};
}
# undo it if line length exceeded
my $length =
- length($csc_text) + length($block_type) +
+ length($csc_text) +
+ length($block_type) +
length( $rOpts->{'closing-side-comment-prefix'} ) +
$levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
if ( $length > $rOpts_maximum_line_length ) {
}
sub previous_nonblank_token {
- my ($i) = @_;
- if ( $i <= 0 ) {
- return "";
- }
- elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
- return $tokens_to_go[ $i - 1 ];
- }
- elsif ( $i > 1 ) {
- return $tokens_to_go[ $i - 2 ];
- }
- else {
- return "";
+ my ($i) = @_;
+ my $name = "";
+ my $im = $i - 1;
+ return "" if ( $im < 0 );
+ if ( $types_to_go[$im] eq 'b' ) { $im--; }
+ return "" if ( $im < 0 );
+ $name = $tokens_to_go[$im];
+
+ # prepend any sub name to an isolated -> to avoid unwanted alignments
+ # [test case is test8/penco.pl]
+ if ( $name eq '->' ) {
+ $im--;
+ if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
+ $name = $tokens_to_go[$im] . $name;
+ }
}
+ return $name;
}
sub send_lines_to_vertical_aligner {
my $ibeg = $$ri_first[$n];
my $iend = $$ri_last[$n];
- my @patterns = ();
+ my ( $rtokens, $rfields, $rpatterns ) =
+ make_alignment_patterns( $ibeg, $iend );
+
+ my ( $indentation, $lev, $level_end, $terminal_type,
+ $is_semicolon_terminated, $is_outdented_line )
+ = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
+ $ri_first, $ri_last, $rindentation_list );
+
+ # we will allow outdenting of long lines..
+ my $outdent_long_lines = (
+
+ # which are long quotes, if allowed
+ ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
+
+ # which are long block comments, if allowed
+ || (
+ $types_to_go[$ibeg] eq '#'
+ && $rOpts->{'outdent-long-comments'}
+
+ # but not if this is a static block comment
+ && !$is_static_block_comment
+ )
+ );
+
+ my $level_jump =
+ $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
+
+ my $rvertical_tightness_flags =
+ set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
+ $ri_first, $ri_last );
+
+ # flush an outdented line to avoid any unwanted vertical alignment
+ Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+
+ my $is_terminal_ternary = 0;
+ if ( $tokens_to_go[$ibeg] eq ':'
+ || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
+ {
+ if ( ( $terminal_type eq ';' && $level_end <= $lev )
+ || ( $level_end < $lev ) )
+ {
+ $is_terminal_ternary = 1;
+ }
+ }
+
+ # send this new line down the pipe
+ my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
+ Perl::Tidy::VerticalAligner::append_line(
+ $lev,
+ $level_end,
+ $indentation,
+ $rfields,
+ $rtokens,
+ $rpatterns,
+ $forced_breakpoint_to_go[$iend] || $in_comma_list,
+ $outdent_long_lines,
+ $is_terminal_ternary,
+ $is_semicolon_terminated,
+ $do_not_pad,
+ $rvertical_tightness_flags,
+ $level_jump,
+ );
+ $in_comma_list =
+ $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
+
+ # flush an outdented line to avoid any unwanted vertical alignment
+ Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+
+ $do_not_pad = 0;
+
+ } # end of loop to output each line
+
+ # remember indentation of lines containing opening containers for
+ # later use by sub set_adjusted_indentation
+ save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
+}
+
+{ # begin make_alignment_patterns
+
+ my %block_type_map;
+ my %keyword_map;
+
+ BEGIN {
+
+ # map related block names into a common name to
+ # allow alignment
+ %block_type_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'if',
+ 'default' => 'if',
+ 'case' => 'if',
+ 'sort' => 'map',
+ 'grep' => 'map',
+ );
+
+ # map certain keywords to the same 'if' class to align
+ # long if/elsif sequences. [elsif.pl]
+ %keyword_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'given',
+ 'default' => 'given',
+ 'case' => 'switch',
+
+ # treat an 'undef' similar to numbers and quotes
+ 'undef' => 'Q',
+ );
+ }
+
+ sub make_alignment_patterns {
+
+ # Here we do some important preliminary work for the
+ # vertical aligner. We create three arrays for one
+ # output line. These arrays contain strings that can
+ # be tested by the vertical aligner to see if
+ # consecutive lines can be aligned vertically.
+ #
+ # The three arrays are indexed on the vertical
+ # alignment fields and are:
+ # @tokens - a list of any vertical alignment tokens for this line.
+ # These are tokens, such as '=' '&&' '#' etc which
+ # we want to might align vertically. These are
+ # decorated with various information such as
+ # nesting depth to prevent unwanted vertical
+ # alignment matches.
+ # @fields - the actual text of the line between the vertical alignment
+ # tokens.
+ # @patterns - a modified list of token types, one for each alignment
+ # field. These should normally each match before alignment is
+ # allowed, even when the alignment tokens match.
+ my ( $ibeg, $iend ) = @_;
my @tokens = ();
my @fields = ();
+ my @patterns = ();
my $i_start = $ibeg;
my $i;
# Unbalanced containers already avoid aligning across
# container boundaries.
if ( $tokens_to_go[$i] eq '(' ) {
+
+ # if container is balanced on this line...
my $i_mate = $mate_index_to_go[$i];
if ( $i_mate > $i && $i_mate <= $iend ) {
$depth++;
my $seqno = $type_sequence_to_go[$i];
my $count = comma_arrow_count($seqno);
$multiple_comma_arrows[$depth] = $count && $count > 1;
+
+ # Append the previous token name to make the container name
+ # more unique. This name will also be given to any commas
+ # within this container, and it helps avoid undesirable
+ # alignments of different types of containers.
my $name = previous_nonblank_token($i);
$name =~ s/^->//;
$container_name[$depth] = "+" . $name;
+
+ # Make the container name even more unique if necessary.
+ # If we are not vertically aligning this opening paren,
+ # append a character count to avoid bad alignment because
+ # it usually looks bad to align commas within continers
+ # for which the opening parens do not align. Here
+ # is an example very BAD alignment of commas (because
+ # the atan2 functions are not all aligned):
+ # $XY =
+ # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
+ # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
+ # $X * atan2( $X, 1 ) -
+ # $Y * atan2( $Y, 1 );
+ #
+ # On the other hand, it is usually okay to align commas if
+ # opening parens align, such as:
+ # glVertex3d( $cx + $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy + $s * $ys, $z );
+ # glVertex3d( $cx - $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy - $s * $ys, $z );
+ #
+ # To distinguish between these situations, we will
+ # append the length of the line from the previous matching
+ # token, or beginning of line, to the function name. This
+ # will allow the vertical aligner to reject undesirable
+ # matches.
+
+ # if we are not aligning on this paren...
+ if ( $matching_token_to_go[$i] eq '' ) {
+
+ # Sum length from previous alignment, or start of line.
+ # Note that we have to sum token lengths here because
+ # padding has been done and so array $lengths_to_go
+ # is now wrong.
+ my $len =
+ length(
+ join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
+ $len += leading_spaces_to_go($i_start)
+ if ( $i_start == $ibeg );
+
+ # tack length onto the container name to make unique
+ $container_name[$depth] .= "-" . $len;
+ }
}
}
elsif ( $tokens_to_go[$i] eq ')' ) {
$tok .= "$nesting_depth_to_go[$i]";
}
- # do any special decorations for commas to avoid unwanted
- # cross-line alignments.
- if ( $raw_tok eq ',' ) {
+ # also decorate commas with any container name to avoid
+ # unwanted cross-line alignments.
+ if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
if ( $container_name[$depth] ) {
$tok .= $container_name[$depth];
}
}
- # decorate '=>' with:
- # - Nothing if this container is unbalanced on this line.
- # - The previous token if it is balanced and multiple '=>'s
- # - The container name if it is bananced and no other '=>'s
- elsif ( $raw_tok eq '=>' ) {
- if ( $container_name[$depth] ) {
- if ( $multiple_comma_arrows[$depth] ) {
- $tok .= "+" . previous_nonblank_token($i);
- }
- else {
- $tok .= $container_name[$depth];
- }
+ # Patch to avoid aligning leading and trailing if, unless.
+ # Mark trailing if, unless statements with container names.
+ # This makes them different from leading if, unless which
+ # are not so marked at present. If we ever need to name
+ # them too, we could use ci to distinguish them.
+ # Example problem to avoid:
+ # return ( 2, "DBERROR" )
+ # if ( $retval == 2 );
+ # if ( scalar @_ ) {
+ # my ( $a, $b, $c, $d, $e, $f ) = @_;
+ # }
+ if ( $raw_tok eq '(' ) {
+ my $ci = $ci_levels_to_go[$ibeg];
+ if ( $container_name[$depth] =~ /^\+(if|unless)/
+ && $ci )
+ {
+ $tok .= $container_name[$depth];
}
}
+ # Decorate block braces with block types to avoid
+ # unwanted alignments such as the following:
+ # foreach ( @{$routput_array} ) { $fh->print($_) }
+ # eval { $fh->close() };
+ if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
+ my $block_type = $block_type_to_go[$i];
+
+ # map certain related block types to allow
+ # else blocks to align
+ $block_type = $block_type_map{$block_type}
+ if ( defined( $block_type_map{$block_type} ) );
+
+ # remove sub names to allow one-line sub braces to align
+ # regardless of name
+ if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
+
+ # allow all control-type blocks to align
+ if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
+
+ $tok .= $block_type;
+ }
+
# concatenate the text of the consecutive tokens to form
# the field
push( @fields,
if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
$type = 'Q';
+
+ # Patch to ignore leading minus before words,
+ # by changing pattern 'mQ' into just 'Q',
+ # so that we can align things like this:
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
}
}
- # minor patch to make numbers and quotes align
+ # patch to make numbers and quotes align
if ( $type eq 'n' ) { $type = 'Q' }
+ # patch to ignore any ! in patterns
+ if ( $type eq '!' ) { $type = '' }
+
$patterns[$j] .= $type;
}
# for keywords we have to use the actual text
else {
- # map certain keywords to the same 'if' class to align
- # long if/elsif sequences. my testfile: elsif.pl
my $tok = $tokens_to_go[$i];
- if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) {
- $tok = 'if';
- }
+
+ # but map certain keywords to a common string to allow
+ # alignment.
+ $tok = $keyword_map{$tok}
+ if ( defined( $keyword_map{$tok} ) );
$patterns[$j] .= $tok;
}
}
# done with this line .. join text of tokens to make the last field
push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
+ return ( \@tokens, \@fields, \@patterns );
+ }
- my ( $indentation, $lev, $level_end, $terminal_type,
- $is_semicolon_terminated, $is_outdented_line )
- = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
- $ri_first, $ri_last, $rindentation_list );
-
- # we will allow outdenting of long lines..
- my $outdent_long_lines = (
-
- # which are long quotes, if allowed
- ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
-
- # which are long block comments, if allowed
- || (
- $types_to_go[$ibeg] eq '#'
- && $rOpts->{'outdent-long-comments'}
-
- # but not if this is a static block comment
- && !$is_static_block_comment
- )
- );
-
- my $level_jump =
- $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
-
- my $rvertical_tightness_flags =
- set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
- $ri_first, $ri_last );
-
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
-
- my $is_terminal_ternary = 0;
- if ( $tokens_to_go[$ibeg] eq ':'
- || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
- {
- if ( ( $terminal_type eq ';' && $level_end <= $lev )
- || ( $level_end < $lev ) )
- {
- $is_terminal_ternary = 1;
- }
- }
-
- # send this new line down the pipe
- my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
- Perl::Tidy::VerticalAligner::append_line(
- $lev,
- $level_end,
- $indentation,
- \@fields,
- \@tokens,
- \@patterns,
- $forced_breakpoint_to_go[$iend] || $in_comma_list,
- $outdent_long_lines,
- $is_terminal_ternary,
- $is_semicolon_terminated,
- $do_not_pad,
- $rvertical_tightness_flags,
- $level_jump,
- );
- $in_comma_list =
- $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
-
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
-
- $do_not_pad = 0;
-
- } # end of loop to output each line
-
- # remember indentation of lines containing opening containers for
- # later use by sub set_adjusted_indentation
- save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
-}
+} # end make_alignment_patterns
-{ # begin unmatched_indexes
+{ # begin unmatched_indexes
# closure to keep track of unbalanced containers.
# arrays shared by the routines in this block:
# first, see if the opening token is in the current batch
my $i_opening = $mate_index_to_go[$i_closing];
- my ( $indent, $offset );
+ my ( $indent, $offset, $is_leading, $exists );
+ $exists = 1;
if ( $i_opening >= 0 ) {
# it is..look up the indentation
- ( $indent, $offset ) =
+ ( $indent, $offset, $is_leading ) =
lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
$rindentation_list );
}
my $seqno = $type_sequence_to_go[$i_closing];
if ($seqno) {
if ( $saved_opening_indentation{$seqno} ) {
- ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
+ ( $indent, $offset, $is_leading ) =
+ @{ $saved_opening_indentation{$seqno} };
}
# some kind of serious error
# (example is badfile.t)
else {
- $indent = 0;
- $offset = 0;
+ $indent = 0;
+ $offset = 0;
+ $is_leading = 0;
+ $exists = 0;
}
}
# if no sequence number it must be an unbalanced container
else {
- $indent = 0;
- $offset = 0;
+ $indent = 0;
+ $offset = 0;
+ $is_leading = 0;
+ $exists = 0;
}
}
- return ( $indent, $offset );
+ return ( $indent, $offset, $is_leading, $exists );
}
sub lookup_opening_indentation {
$rindentation_list->[0] =
$nline; # save line number to start looking next call
- my $ibeg = $ri_start->[$nline];
- my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
- return ( $rindentation_list->[ $nline + 1 ], $offset );
+ my $ibeg = $ri_start->[$nline];
+ my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
+ my $is_leading = ( $ibeg == $i_opening );
+ return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
}
{
my $adjust_indentation = 0;
my $default_adjust_indentation = $adjust_indentation;
- my ( $opening_indentation, $opening_offset );
+ my (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ );
# if we are at a closing token of some type..
if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
# get the indentation of the line containing the corresponding
# opening token
- ( $opening_indentation, $opening_offset ) =
- get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
$rindentation_list );
# First set the default behavior:
# and 'cuddled parens' of the form: ")->pack("
|| (
- $terminal_type eq '('
+ $terminal_type eq '('
&& $types_to_go[$ibeg] eq ')'
&& ( $nesting_depth_to_go[$iend] + 1 ==
$nesting_depth_to_go[$ibeg] )
}
}
+ # if line begins with a ':', align it with any
+ # previous line leading with corresponding ?
+ elsif ( $types_to_go[$ibeg] eq ':' ) {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ if ($is_leading) { $adjust_indentation = 2; }
+ }
+
##########################################################
# Section 2: set indentation according to flag set above
#
# we must treat something like '} else {' as if it were
# an isolated brace my $is_isolated_block_brace = (
# $iend == $ibeg ) && $block_type_to_go[$ibeg];
+ #############################################################
my $is_isolated_block_brace = $block_type_to_go[$ibeg]
&& ( $iend == $ibeg
|| $is_if_elsif_else_unless_while_until_for_foreach{
$block_type_to_go[$ibeg] } );
- #############################################################
- if ( !$is_isolated_block_brace && defined($opening_indentation) ) {
+
+ # only do this for a ':; which is aligned with its leading '?'
+ my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
+ if ( defined($opening_indentation)
+ && !$is_isolated_block_brace
+ && !$is_unaligned_colon )
+ {
if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
$indentation = $opening_indentation;
}
# this is a line with just an opening token
&& ( $iend_next == $ibeg_next
- || $iend_next == $ibeg_next + 1
+ || $iend_next == $ibeg_next + 2
&& $types_to_go[$iend_next] eq '#' )
# looks bad if we align vertically with the wrong container
if (
$is_semicolon_terminated
|| ( $iend_next == $ibeg_next
- || $iend_next == $ibeg_next + 1
+ || $iend_next == $ibeg_next + 2
&& $types_to_go[$iend_next] eq '#' )
)
{
# Check for a last line with isolated opening BLOCK curly
elsif ($rOpts_block_brace_vertical_tightness
- && $ibeg eq $iend
+ && $ibeg eq $iend
&& $types_to_go[$iend] eq '{'
&& $block_type_to_go[$iend] =~
/$block_brace_vertical_tightness_pattern/o )
@_ = qw#
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
- { ? : => =~ && || // ~~
+ { ? : => =~ && || // ~~ !~~
#;
@is_vertical_alignment_type{@_} = (1) x scalar(@_);
$alignment_type = "";
}
- # Do not align leading ': ('. This would prevent
+ # Do not align leading ': (' or '. ('. This would prevent
# alignment in something like the following:
# $extra_space .=
# ( $input_line_number < 10 ) ? " "
# : ( $input_line_number < 100 ) ? " "
# : "";
+ # or
+ # $code =
+ # ( $case_matters ? $accessor : " lc($accessor) " )
+ # . ( $yesno ? " eq " : " ne " )
if ( $i == $ibeg + 2
- && $types_to_go[$ibeg] eq ':'
+ && $types_to_go[$ibeg] =~ /^[\.\:]$/
&& $types_to_go[ $i - 1 ] eq 'b' )
{
$alignment_type = "";
if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
$alignment_type = ""
unless $vert_last_nonblank_token =~
- /^(if|unless|elsif)$/;
+ /^(if|unless|elsif)$/;
}
# be sure the alignment tokens are unique
$left_bond_strength{'->'} = STRONG;
$right_bond_strength{'->'} = VERY_STRONG;
- # breaking AFTER these is just ok:
- @_ = qw" % + - * / x ";
+ # breaking AFTER modulus operator is ok:
+ @_ = qw" % ";
+ @left_bond_strength{@_} = (STRONG) x scalar(@_);
+ @right_bond_strength{@_} =
+ ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
+
+ # Break AFTER math operators * and /
+ @_ = qw" * / x ";
@left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} = (NOMINAL) x scalar(@_);
+ # Break AFTER weakest math operators + and -
+ # Make them weaker than * but a bit stronger than '.'
+ @_ = qw" + - ";
+ @left_bond_strength{@_} = (STRONG) x scalar(@_);
+ @right_bond_strength{@_} =
+ ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
+
# breaking BEFORE these is just ok:
@_ = qw" >> << ";
@right_bond_strength{@_} = (STRONG) x scalar(@_);
@left_bond_strength{@_} = (NOMINAL) x scalar(@_);
- # I prefer breaking before the string concatenation operator
+ # breaking before the string concatenation operator seems best
# because it can be hard to see at the end of a line
- # swap these to break after a '.'
- # this could be a future option
$right_bond_strength{'.'} = STRONG;
$left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
# make these a little weaker than nominal so that they get
# favored for end-of-line characters
- @_ = qw"!= == =~ !~ ~~";
+ @_ = qw"!= == =~ !~ ~~ !~~";
@left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} =
( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
# adjust bond strength bias
#-----------------------------------------------------------------
+ # TESTING: add any bias set by sub scan_list at old comma
+ # break points.
+ elsif ( $type eq ',' ) {
+ $bond_str += $bond_strength_to_go[$i];
+ }
+
elsif ( $type eq 'f' ) {
$bond_str += $f_bias;
$f_bias += $delta_bias;
$bond_str = NO_BREAK;
}
+ # Never break between a bareword and a following paren because
+ # perl may give an error. For example, if a break is placed
+ # between 'to_filehandle' and its '(' the following line will
+ # give a syntax error [Carp.pm]: my( $no) =fileno(
+ # to_filehandle( $in)) ;
+ if ( $next_nonblank_token eq '(' ) {
+ $bond_str = NO_BREAK;
+ }
}
# use strict requires that bare word within braces not start new line
$bond_str = NO_BREAK;
}
+ # Breaking before a ++ can cause perl to guess wrong. For
+ # example the following line will cause a syntax error
+ # with -extrude if we break between '$i' and '++' [fixstyle2]
+ # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
+ elsif ( $next_nonblank_type eq '++' ) {
+ $bond_str = NO_BREAK;
+ }
+
+ # Breaking before a ? before a quote can cause trouble if
+ # they are not separated by a blank.
+ # Example: a syntax error occurs if you break before the ? here
+ # my$logic=join$all?' && ':' || ',@regexps;
+ # From: Professional_Perl_Programming_Code/multifind.pl
+ elsif ( $next_nonblank_type eq '?' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
+ }
+
+ # Breaking before a . followed by a number
+ # can cause trouble if there is no intervening space
+ # Example: a syntax error occurs if you break before the .2 here
+ # $str .= pack($endian.2, ensurrogate($ord));
+ # From: perl58/Unicode.pm
+ elsif ( $next_nonblank_type eq '.' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
+ }
+
# patch to put cuddled elses back together when on multiple
# lines, as in: } \n else \n { \n
if ($rOpts_cuddled_else) {
my $dd = shift;
my $bp_count = 0;
my $do_not_break_apart = 0;
- if ( $item_count_stack[$dd] && !$dont_align[$dd] ) {
-
- my $fbc = $forced_breakpoint_count;
-
- # always open comma lists not preceded by keywords,
- # barewords, identifiers (that is, anything that doesn't
- # look like a function call)
- my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
-
- set_comma_breakpoints_do(
- $dd,
- $opening_structure_index_stack[$dd],
- $i,
- $item_count_stack[$dd],
- $identifier_count_stack[$dd],
- $comma_index[$dd],
- $next_nonblank_type,
- $container_type[$dd],
- $interrupted_list[$dd],
- \$do_not_break_apart,
- $must_break_open,
- );
- $bp_count = $forced_breakpoint_count - $fbc;
- $do_not_break_apart = 0 if $must_break_open;
+
+ # anything to do?
+ if ( $item_count_stack[$dd] ) {
+
+ # handle commas not in containers...
+ if ( $dont_align[$dd] ) {
+ do_uncontained_comma_breaks($dd);
+ }
+
+ # handle commas within containers...
+ else {
+ my $fbc = $forced_breakpoint_count;
+
+ # always open comma lists not preceded by keywords,
+ # barewords, identifiers (that is, anything that doesn't
+ # look like a function call)
+ my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
+
+ set_comma_breakpoints_do(
+ $dd,
+ $opening_structure_index_stack[$dd],
+ $i,
+ $item_count_stack[$dd],
+ $identifier_count_stack[$dd],
+ $comma_index[$dd],
+ $next_nonblank_type,
+ $container_type[$dd],
+ $interrupted_list[$dd],
+ \$do_not_break_apart,
+ $must_break_open,
+ );
+ $bp_count = $forced_breakpoint_count - $fbc;
+ $do_not_break_apart = 0 if $must_break_open;
+ }
}
return ( $bp_count, $do_not_break_apart );
}
+ sub do_uncontained_comma_breaks {
+
+ # Handle commas not in containers...
+ # This is a catch-all routine for commas that we
+ # don't know what to do with because the don't fall
+ # within containers. We will bias the bond strength
+ # to break at commas which ended lines in the input
+ # file. This usually works better than just trying
+ # to put as many items on a line as possible. A
+ # downside is that if the input file is garbage it
+ # won't work very well. However, the user can always
+ # prevent following the old breakpoints with the
+ # -iob flag.
+ my $dd = shift;
+ my $bias = -.01;
+ foreach my $ii ( @{ $comma_index[$dd] } ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $bond_strength_to_go[$ii] = $bias;
+
+ # reduce bias magnitude to force breaks in order
+ $bias *= 0.99;
+ }
+ }
+
+ # Also put a break before the first comma if
+ # (1) there was a break there in the input, and
+ # (2) that was exactly one previous break in the input
+ #
+ # For example, we will follow the user and break after
+ # 'print' in this snippet:
+ # print
+ # "conformability (Not the same dimension)\n",
+ # "\t", $have, " is ", text_unit($hu), "\n",
+ # "\t", $want, " is ", text_unit($wu), "\n",
+ # ;
+ my $i_first_comma = $comma_index[$dd]->[0];
+ if ( $old_breakpoint_to_go[$i_first_comma] ) {
+ my $level_comma = $levels_to_go[$i_first_comma];
+ my $ibreak = -1;
+ my $obp_count = 0;
+ for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $obp_count++;
+ last if ( $obp_count > 1 );
+ $ibreak = $ii
+ if ( $levels_to_go[$ii] == $level_comma );
+ }
+ }
+ if ( $ibreak >= 0 && $obp_count == 1 ) {
+ set_forced_breakpoint($ibreak);
+ }
+ }
+ }
+
my %is_logical_container;
BEGIN {
$last_colon_sequence_number = -1;
$last_nonblank_token = ';';
$last_nonblank_type = ';';
+ $last_nonblank_block_type = ' ';
$last_old_breakpoint_count = 0;
$minimum_depth = $current_depth + 1; # forces update in check below
$old_breakpoint_count = 0;
# loop over all tokens in this batch
while ( ++$i <= $max_index_to_go ) {
if ( $type ne 'b' ) {
- $i_last_nonblank_token = $i - 1;
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
+ $i_last_nonblank_token = $i - 1;
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+ $last_nonblank_block_type = $block_type;
}
$type = $types_to_go[$i];
$block_type = $block_type_to_go[$i];
# Note that such breakpoints will be undone later if these tokens
# are fully contained within parens on a line.
if (
- $type eq 'k'
+
+ # break before a keyword within a line
+ $type eq 'k'
&& $i > 0
- && $token =~ /^(if|unless)$/
+
+ # if one of these keywords:
+ && $token =~ /^(if|unless|while|until|for)$/
+
+ # but do not break at something like '1 while'
+ && ( $last_nonblank_type ne 'n' || $i > 2 )
+
+ # and let keywords follow a closing 'do' brace
+ && $last_nonblank_block_type ne 'do'
+
&& (
$is_long_line
$forced_breakpoint_count );
# update broken-sublist flag of the outer container
- $has_broken_sublist[$depth] = $has_broken_sublist[$depth]
+ $has_broken_sublist[$depth] =
+ $has_broken_sublist[$depth]
|| $has_broken_sublist[$current_depth]
|| $is_long_term
|| $has_comma_breakpoints;
# break before the previous token if it looks safe
# Example of something that we will not try to break before:
# DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
+ # Also we don't want to break at a binary operator (like +):
+ # $c->createOval(
+ # $x + $R, $y +
+ # $R => $x - $R,
+ # $y - $R, -fill => 'black',
+ # );
my $ibreak = $index_before_arrow[$depth] - 1;
if ( $ibreak > 0
&& $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
{
if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] =~ /^[,b\(\{\[]$/ ) {
- set_forced_breakpoint($ibreak);
+ if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
+ if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
+
+ # don't break pointer calls, such as the following:
+ # File::Spec->curdir => 1,
+ # (This is tokenized as adjacent 'w' tokens)
+ if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
+ set_forced_breakpoint($ibreak);
+ }
}
}
next;
}
- # skip past these commas if we are not supposed to format them
- next if ( $dont_align[$depth] );
-
# break after all commas above starting depth
- if ( $depth < $starting_depth ) {
+ if ( $depth < $starting_depth && !$dont_align[$depth] ) {
set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
next;
}
&& $container_environment_to_go[$i] eq 'BLOCK' )
{
$dont_align[$depth] = 1;
- next;
}
}
if ( $rOpts_line_up_parentheses && !$must_break_open ) {
my $columns_if_unbroken = $rOpts_maximum_line_length -
total_line_length( $i_opening_minus, $i_opening_paren );
- $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken )
+ $need_lp_break_open =
+ ( $max_length[0] > $columns_if_unbroken )
|| ( $max_length[1] > $columns_if_unbroken )
|| ( $first_term_length > $columns_if_unbroken );
}
if ( $number_of_fields > 1 ) {
$formatted_columns =
- ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) *
- $max_width );
+ ( $pair_width * ( int( $item_count / 2 ) ) +
+ ( $item_count % 2 ) * $max_width );
}
else {
$formatted_columns = $max_width * $item_count;
)
{
- my $break_count =
- set_ragged_breakpoints( \@i_term_comma,
+ my $break_count = set_ragged_breakpoints( \@i_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
# imprecise, but not too bad. (steve.t)
if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
- $too_long =
- excess_line_length( $i_opening_minus,
+ $too_long = excess_line_length( $i_opening_minus,
$i_effective_last_comma + 1 ) > 0;
}
if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
my $i_opening_minus = $i_opening_paren - 4;
if ( $i_opening_minus >= 0 ) {
- $too_long =
- excess_line_length( $i_opening_minus,
+ $too_long = excess_line_length( $i_opening_minus,
$i_effective_last_comma + 1 ) > 0;
}
}
# let the continuation logic handle it if 2 lines
else {
- my $break_count =
- set_ragged_breakpoints( \@i_term_comma,
+ my $break_count = set_ragged_breakpoints( \@i_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
# if we break before or after it
my $token = $tokens_to_go[$i];
- if ( $token =~ /^([\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
+ if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
}
}
}
-sub recombine_breakpoints {
+{ # begin recombine_breakpoints
- # sub set_continuation_breaks is very liberal in setting line breaks
- # for long lines, always setting breaks at good breakpoints, even
- # when that creates small lines. Occasionally small line fragments
- # are produced which would look better if they were combined.
- # That's the task of this routine, recombine_breakpoints.
- my ( $ri_first, $ri_last ) = @_;
- my $more_to_do = 1;
+ my %is_amp_amp;
+ my %is_ternary;
+ my %is_math_op;
+
+ BEGIN {
+
+ @_ = qw( && || );
+ @is_amp_amp{@_} = (1) x scalar(@_);
- # We keep looping over all of the lines of this batch
- # until there are no more possible recombinations
- my $nmax_last = @$ri_last;
- while ($more_to_do) {
- my $n_best = 0;
- my $bs_best;
- my $n;
- my $nmax = @$ri_last - 1;
+ @_ = qw( ? : );
+ @is_ternary{@_} = (1) x scalar(@_);
- # safety check for infinite loop
- unless ( $nmax < $nmax_last ) {
+ @_ = qw( + - * / );
+ @is_math_op{@_} = (1) x scalar(@_);
+ }
+
+ sub recombine_breakpoints {
+
+ # sub set_continuation_breaks is very liberal in setting line breaks
+ # for long lines, always setting breaks at good breakpoints, even
+ # when that creates small lines. Occasionally small line fragments
+ # are produced which would look better if they were combined.
+ # That's the task of this routine, recombine_breakpoints.
+ #
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $ri_beg, $ri_end ) = @_;
+
+ my $more_to_do = 1;
+
+ # We keep looping over all of the lines of this batch
+ # until there are no more possible recombinations
+ my $nmax_last = @$ri_end;
+ while ($more_to_do) {
+ my $n_best = 0;
+ my $bs_best;
+ my $n;
+ my $nmax = @$ri_end - 1;
+
+ # safety check for infinite loop
+ unless ( $nmax < $nmax_last ) {
# shouldn't happen because splice below decreases nmax on each pass:
# but i get paranoid sometimes
- die "Program bug-infinite loop in recombine breakpoints\n";
- }
- $nmax_last = $nmax;
- $more_to_do = 0;
- my $previous_outdentable_closing_paren;
- my $leading_amp_count = 0;
- my $this_line_is_semicolon_terminated;
+ die "Program bug-infinite loop in recombine breakpoints\n";
+ }
+ $nmax_last = $nmax;
+ $more_to_do = 0;
+ my $previous_outdentable_closing_paren;
+ my $leading_amp_count = 0;
+ my $this_line_is_semicolon_terminated;
- # loop over all remaining lines in this batch
- for $n ( 1 .. $nmax ) {
+ # loop over all remaining lines in this batch
+ for $n ( 1 .. $nmax ) {
- #----------------------------------------------------------
- # If we join the current pair of lines,
- # line $n-1 will become the left part of the joined line
- # line $n will become the right part of the joined line
- #
- # Here are Indexes of the endpoint tokens of the two lines:
- #
- # ---left---- | ---right---
- # $if $imid | $imidr $il
- #
- # We want to decide if we should join tokens $imid to $imidr
- #
- # We will apply a number of ad-hoc tests to see if joining
- # here will look ok. The code will just issue a 'next'
- # command if the join doesn't look good. If we get through
- # the gauntlet of tests, the lines will be recombined.
- #----------------------------------------------------------
- my $if = $$ri_first[ $n - 1 ];
- my $il = $$ri_last[$n];
- my $imid = $$ri_last[ $n - 1 ];
- my $imidr = $$ri_first[$n];
-
- #my $depth_increase=( $nesting_depth_to_go[$imidr] -
- # $nesting_depth_to_go[$if] );
-
-##print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n";
-
- # If line $n is the last line, we set some flags and
- # do any special checks for it
- if ( $n == $nmax ) {
-
- # a terminal '{' should stay where it is
- next if $types_to_go[$imidr] eq '{';
-
- # set flag if statement $n ends in ';'
- $this_line_is_semicolon_terminated = $types_to_go[$il] eq ';'
-
- # with possible side comment
- || ( $types_to_go[$il] eq '#'
- && $il - $imidr >= 2
- && $types_to_go[ $il - 2 ] eq ';'
- && $types_to_go[ $il - 1 ] eq 'b' );
- }
-
- #----------------------------------------------------------
- # Section 1: examine token at $imid (right end of first line
- # of pair)
- #----------------------------------------------------------
-
- # an isolated '}' may join with a ';' terminated segment
- if ( $types_to_go[$imid] eq '}' ) {
-
- # Check for cases where combining a semicolon terminated
- # statement with a previous isolated closing paren will
- # allow the combined line to be outdented. This is
- # generally a good move. For example, we can join up
- # the last two lines here:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # )
- # = stat($file);
+ #----------------------------------------------------------
+ # If we join the current pair of lines,
+ # line $n-1 will become the left part of the joined line
+ # line $n will become the right part of the joined line
#
- # to get:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # ) = stat($file);
+ # Here are Indexes of the endpoint tokens of the two lines:
#
- # which makes the parens line up.
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^
+ # |
+ # We want to decide if we should remove the line break
+ # betwen the tokens at $iend_1 and $ibeg_2
#
- # Another example, from Joe Matarazzo, probably looks best
- # with the 'or' clause appended to the trailing paren:
- # $self->some_method(
- # PARAM1 => 'foo',
- # PARAM2 => 'bar'
- # ) or die "Some_method didn't work";
+ # We will apply a number of ad-hoc tests to see if joining
+ # here will look ok. The code will just issue a 'next'
+ # command if the join doesn't look good. If we get through
+ # the gauntlet of tests, the lines will be recombined.
+ #----------------------------------------------------------
#
- $previous_outdentable_closing_paren =
- $this_line_is_semicolon_terminated # ends in ';'
- && $if == $imid # only one token on last line
- && $tokens_to_go[$imid] eq ')' # must be structural paren
-
- # only &&, ||, and : if no others seen
- # (but note: our count made below could be wrong
- # due to intervening comments)
- && ( $leading_amp_count == 0
- || $types_to_go[$imidr] !~ /^(:|\&\&|\|\|)$/ )
-
- # but leading colons probably line up with with a
- # previous colon or question (count could be wrong).
- && $types_to_go[$imidr] ne ':'
-
- # only one step in depth allowed. this line must not
- # begin with a ')' itself.
- && ( $nesting_depth_to_go[$imid] ==
- $nesting_depth_to_go[$il] + 1 );
+ # beginning and ending tokens of the lines we are working on
+ my $ibeg_1 = $$ri_beg[ $n - 1 ];
+ my $iend_1 = $$ri_end[ $n - 1 ];
+ my $iend_2 = $$ri_end[$n];
+ my $ibeg_2 = $$ri_beg[$n];
+
+ my $ibeg_nmax = $$ri_beg[$nmax];
+
+ # some beginning indexes of other lines, which may not exist
+ my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1;
+ my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1;
+ my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
+
+ my $bs_tweak = 0;
+
+ #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
+ # $nesting_depth_to_go[$ibeg_1] );
+
+##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n";
+
+ # If line $n is the last line, we set some flags and
+ # do any special checks for it
+ if ( $n == $nmax ) {
+
+ # a terminal '{' should stay where it is
+ next if $types_to_go[$ibeg_2] eq '{';
+
+ # set flag if statement $n ends in ';'
+ $this_line_is_semicolon_terminated =
+ $types_to_go[$iend_2] eq ';'
+
+ # with possible side comment
+ || ( $types_to_go[$iend_2] eq '#'
+ && $iend_2 - $ibeg_2 >= 2
+ && $types_to_go[ $iend_2 - 2 ] eq ';'
+ && $types_to_go[ $iend_2 - 1 ] eq 'b' );
+ }
+
+ #----------------------------------------------------------
+ # Section 1: examine token at $iend_1 (right end of first line
+ # of pair)
+ #----------------------------------------------------------
+
+ # an isolated '}' may join with a ';' terminated segment
+ if ( $types_to_go[$iend_1] eq '}' ) {
+
+ # Check for cases where combining a semicolon terminated
+ # statement with a previous isolated closing paren will
+ # allow the combined line to be outdented. This is
+ # generally a good move. For example, we can join up
+ # the last two lines here:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # )
+ # = stat($file);
+ #
+ # to get:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # ) = stat($file);
+ #
+ # which makes the parens line up.
+ #
+ # Another example, from Joe Matarazzo, probably looks best
+ # with the 'or' clause appended to the trailing paren:
+ # $self->some_method(
+ # PARAM1 => 'foo',
+ # PARAM2 => 'bar'
+ # ) or die "Some_method didn't work";
+ #
+ $previous_outdentable_closing_paren =
+ $this_line_is_semicolon_terminated # ends in ';'
+ && $ibeg_1 == $iend_1 # only one token on last line
+ && $tokens_to_go[$iend_1] eq
+ ')' # must be structural paren
+
+ # only &&, ||, and : if no others seen
+ # (but note: our count made below could be wrong
+ # due to intervening comments)
+ && ( $leading_amp_count == 0
+ || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ )
+
+ # but leading colons probably line up with with a
+ # previous colon or question (count could be wrong).
+ && $types_to_go[$ibeg_2] ne ':'
+
+ # only one step in depth allowed. this line must not
+ # begin with a ')' itself.
+ && ( $nesting_depth_to_go[$iend_1] ==
+ $nesting_depth_to_go[$iend_2] + 1 );
- next
- unless (
- $previous_outdentable_closing_paren
+ next
+ unless (
+ $previous_outdentable_closing_paren
- # handle '.' and '?' specially below
- || ( $types_to_go[$imidr] =~ /^[\.\?]$/ )
- );
- }
+ # handle '.' and '?' specially below
+ || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ )
+ );
+ }
- # do not recombine lines with ending &&, ||, or :
- elsif ( $types_to_go[$imid] =~ /^(|:|\&\&|\|\|)$/ ) {
- next unless $want_break_before{ $types_to_go[$imid] };
- }
+ # do not recombine lines with ending &&, ||,
+ elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) {
+ next unless $want_break_before{ $types_to_go[$iend_1] };
+ }
+
+ # keep a terminal colon
+ elsif ( $types_to_go[$iend_1] eq ':' ) {
+ next unless $want_break_before{ $types_to_go[$iend_1] };
+ }
- # for lines ending in a comma...
- elsif ( $types_to_go[$imid] eq ',' ) {
+ # Identify and recombine a broken ?/: chain
+ elsif ( $types_to_go[$iend_1] eq '?' ) {
- # an isolated '},' may join with an identifier + ';'
- # this is useful for the class of a 'bless' statement (bless.t)
- if ( $types_to_go[$if] eq '}'
- && $types_to_go[$imidr] eq 'i' )
- {
+ # Do not recombine different levels
next
- unless ( ( $if == ( $imid - 1 ) )
- && ( $il == ( $imidr + 1 ) )
- && $this_line_is_semicolon_terminated );
+ if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
- # override breakpoint
- $forced_breakpoint_to_go[$imid] = 0;
+ # do not recombine unless next line ends in :
+ next unless $types_to_go[$iend_2] eq ':';
}
- # but otherwise, do not recombine unless this will leave
- # just 1 more line
- else {
- next unless ( $n + 1 >= $nmax );
+ # for lines ending in a comma...
+ elsif ( $types_to_go[$iend_1] eq ',' ) {
+
+ # Do not recombine at comma which is following the
+ # input bias.
+ # TODO: might be best to make a special flag
+ next if ( $old_breakpoint_to_go[$iend_1] );
+
+ # an isolated '},' may join with an identifier + ';'
+ # this is useful for the class of a 'bless' statement (bless.t)
+ if ( $types_to_go[$ibeg_1] eq '}'
+ && $types_to_go[$ibeg_2] eq 'i' )
+ {
+ next
+ unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
+ && ( $iend_2 == ( $ibeg_2 + 1 ) )
+ && $this_line_is_semicolon_terminated );
+
+ # override breakpoint
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ # but otherwise ..
+ else {
+
+ # do not recombine after a comma unless this will leave
+ # just 1 more line
+ next unless ( $n + 1 >= $nmax );
+
+ # do not recombine if there is a change in indentation depth
+ next
+ if (
+ $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
+
+ # do not recombine a "complex expression" after a
+ # comma. "complex" means no parens.
+ my $saw_paren;
+ foreach my $ii ( $ibeg_2 .. $iend_2 ) {
+ if ( $tokens_to_go[$ii] eq '(' ) {
+ $saw_paren = 1;
+ last;
+ }
+ }
+ next if $saw_paren;
+ }
}
- }
- # opening paren..
- elsif ( $types_to_go[$imid] eq '(' ) {
+ # opening paren..
+ elsif ( $types_to_go[$iend_1] eq '(' ) {
- # No longer doing this
- }
+ # No longer doing this
+ }
- elsif ( $types_to_go[$imid] eq ')' ) {
+ elsif ( $types_to_go[$iend_1] eq ')' ) {
- # No longer doing this
- }
+ # No longer doing this
+ }
- # keep a terminal colon
- elsif ( $types_to_go[$imid] eq ':' ) {
- next;
- }
+ # keep a terminal for-semicolon
+ elsif ( $types_to_go[$iend_1] eq 'f' ) {
+ next;
+ }
- # keep a terminal for-semicolon
- elsif ( $types_to_go[$imid] eq 'f' ) {
- next;
- }
+ # if '=' at end of line ...
+ elsif ( $is_assignment{ $types_to_go[$iend_1] } ) {
- # if '=' at end of line ...
- elsif ( $is_assignment{ $types_to_go[$imid] } ) {
+ my $is_short_quote =
+ ( $types_to_go[$ibeg_2] eq 'Q'
+ && $ibeg_2 == $iend_2
+ && length( $tokens_to_go[$ibeg_2] ) <
+ $rOpts_short_concatenation_item_length );
+ my $is_ternary =
+ ( $types_to_go[$ibeg_1] eq '?'
+ && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
- # otherwise always ok to join isolated '='
- unless ( $if == $imid ) {
+ # always join an isolated '=', a short quote, or if this
+ # will put ?/: at start of adjacent lines
+ if ( $ibeg_1 != $iend_1
+ && !$is_short_quote
+ && !$is_ternary )
+ {
+ next
+ unless (
+ (
- my $is_math = (
- ( $types_to_go[$il] =~ /^[+-\/\*\)]$/ )
+ # unless we can reduce this to two lines
+ $nmax < $n + 2
- # note no '$' in pattern because -> can
- # start long identifier
- && !grep { $_ =~ /^(->|=>|[\,])/ }
- @types_to_go[ $imidr .. $il ]
- );
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == $n + 2
+ && $types_to_go[$ibeg_nmax] eq ';' )
+
+ # or the next line ends with a here doc
+ || $types_to_go[$iend_2] eq 'h'
+
+ # or the next line ends in an open paren or brace
+ # and the break hasn't been forced [dima.t]
+ || ( !$forced_breakpoint_to_go[$iend_1]
+ && $types_to_go[$iend_2] eq '{' )
+ )
- # retain the break after the '=' unless ...
+ # do not recombine if the two lines might align well
+ # this is a very approximate test for this
+ && ( $ibeg_3 >= 0
+ && $types_to_go[$ibeg_2] ne
+ $types_to_go[$ibeg_3] )
+ );
+
+ # -lp users often prefer this:
+ # my $title = function($env, $env, $sysarea,
+ # "bubba Borrower Entry");
+ # so we will recombine if -lp is used we have ending
+ # comma
+ if ( !$rOpts_line_up_parentheses
+ || $types_to_go[$iend_2] ne ',' )
+ {
+
+ # otherwise, scan the rhs line up to last token for
+ # complexity. Note that we are not counting the last
+ # token in case it is an opening paren.
+ my $tv = 0;
+ my $depth = $nesting_depth_to_go[$ibeg_2];
+ for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 1 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
+
+ # ok to recombine if no level changes before last token
+ if ( $tv > 0 ) {
+
+ # otherwise, do not recombine if more than two
+ # level changes.
+ next if ( $tv > 1 );
+
+ # check total complexity of the two adjacent lines
+ # that will occur if we do this join
+ my $istop =
+ ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
+ for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 2 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
+
+ # do not recombine if total is more than 2 level changes
+ next if ( $tv > 2 );
+ }
+ }
+ }
+
+ unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+ }
+
+ # for keywords..
+ elsif ( $types_to_go[$iend_1] eq 'k' ) {
+
+ # make major control keywords stand out
+ # (recombine.t)
next
- unless (
+ if (
- # '=' is followed by a number and looks like math
- ( $types_to_go[$imidr] eq 'n' && $is_math )
+ #/^(last|next|redo|return)$/
+ $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
- # or followed by a scalar and looks like math
- || ( ( $types_to_go[$imidr] eq 'i' )
- && ( $tokens_to_go[$imidr] =~ /^\$/ )
- && $is_math )
+ # but only if followed by multiple lines
+ && $n < $nmax
+ );
- # or followed by a single "short" token
- # ('12' is arbitrary)
- || ( $il == $imidr
- && token_sequence_length( $imidr, $imidr ) < 12 )
+ if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
+ next
+ unless $want_break_before{ $tokens_to_go[$iend_1] };
+ }
+ }
+ # handle trailing + - * /
+ elsif ( $is_math_op{ $types_to_go[$iend_1] } ) {
+
+ # combine lines if next line has single number
+ # or a short term followed by same operator
+ my $i_next_nonblank = $ibeg_2;
+ my $i_next_next = $i_next_nonblank + 1;
+ $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
+ my $number_follows = $types_to_go[$i_next_nonblank] eq 'n'
+ && (
+ $i_next_nonblank == $iend_2
+ || ( $i_next_next == $iend_2
+ && $is_math_op{ $types_to_go[$i_next_next] } )
+ || $types_to_go[$i_next_next] eq ';'
);
+
+ # find token before last operator of previous line
+ my $iend_1_minus = $iend_1;
+ $iend_1_minus--
+ if ( $iend_1_minus > $ibeg_1 );
+ $iend_1_minus--
+ if ( $types_to_go[$iend_1_minus] eq 'b'
+ && $iend_1_minus > $ibeg_1 );
+
+ my $short_term_follows =
+ ( $types_to_go[$iend_2] eq $types_to_go[$iend_1]
+ && $types_to_go[$iend_1_minus] =~ /^[in]$/
+ && $iend_2 <= $ibeg_2 + 2
+ && length( $tokens_to_go[$ibeg_2] ) <
+ $rOpts_short_concatenation_item_length );
+
+ next
+ unless ( $number_follows || $short_term_follows );
}
- unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) {
- $forced_breakpoint_to_go[$imid] = 0;
+
+ #----------------------------------------------------------
+ # Section 2: Now examine token at $ibeg_2 (left end of second
+ # line of pair)
+ #----------------------------------------------------------
+
+ # join lines identified above as capable of
+ # causing an outdented line with leading closing paren
+ if ($previous_outdentable_closing_paren) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
}
- }
- # for keywords..
- elsif ( $types_to_go[$imid] eq 'k' ) {
+ # do not recombine lines with leading :
+ elsif ( $types_to_go[$ibeg_2] eq ':' ) {
+ $leading_amp_count++;
+ next if $want_break_before{ $types_to_go[$ibeg_2] };
+ }
- # make major control keywords stand out
- # (recombine.t)
- next
- if (
+ # handle lines with leading &&, ||
+ elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
- #/^(last|next|redo|return)$/
- $is_last_next_redo_return{ $tokens_to_go[$imid] }
- );
+ $leading_amp_count++;
- if ( $is_and_or{ $tokens_to_go[$imid] } ) {
- next unless $want_break_before{ $tokens_to_go[$imid] };
- }
- }
-
- #----------------------------------------------------------
- # Section 2: Now examine token at $imidr (left end of second
- # line of pair)
- #----------------------------------------------------------
-
- # join lines identified above as capable of
- # causing an outdented line with leading closing paren
- if ($previous_outdentable_closing_paren) {
- $forced_breakpoint_to_go[$imid] = 0;
- }
-
- # do not recombine lines with leading &&, ||, or :
- elsif ( $types_to_go[$imidr] =~ /^(:|\&\&|\|\|)$/ ) {
- $leading_amp_count++;
- next if $want_break_before{ $types_to_go[$imidr] };
- }
-
- # Identify and recombine a broken ?/: chain
- elsif ( $types_to_go[$imidr] eq '?' ) {
-
- # indexes of line first tokens --
- # mm - line before previous line
- # f - previous line
- # <-- this line
- # ff - next line
- # fff - line after next
- my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1;
- my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
- my $imm = $n > 1 ? $$ri_first[ $n - 2 ] : -1;
- my $seqno = $type_sequence_to_go[$imidr];
- my $f_ok =
- ( $types_to_go[$if] eq ':'
- && $type_sequence_to_go[$if] ==
- $seqno - TYPE_SEQUENCE_INCREMENT );
- my $mm_ok =
- ( $imm >= 0
- && $types_to_go[$imm] eq ':'
- && $type_sequence_to_go[$imm] ==
- $seqno - 2 * TYPE_SEQUENCE_INCREMENT );
-
- my $ff_ok =
- ( $iff > 0
- && $types_to_go[$iff] eq ':'
- && $type_sequence_to_go[$iff] == $seqno );
- my $fff_ok =
- ( $ifff > 0
- && $types_to_go[$ifff] eq ':'
- && $type_sequence_to_go[$ifff] ==
- $seqno + TYPE_SEQUENCE_INCREMENT );
-
- # we require that this '?' be part of a correct sequence
- # of 3 in a row or else no recombination is done.
- next
- unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) );
- $forced_breakpoint_to_go[$imid] = 0;
- }
+ # ok to recombine if it follows a ? or :
+ # and is followed by an open paren..
+ my $ok =
+ ( $is_ternary{ $types_to_go[$ibeg_1] }
+ && $tokens_to_go[$iend_2] eq '(' )
- # do not recombine lines with leading '.'
- elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) {
- my $i_next_nonblank = $imidr + 1;
- if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
- $i_next_nonblank++;
+ # or is followed by a ? or : at same depth
+ #
+ # We are looking for something like this. We can
+ # recombine the && line with the line above to make the
+ # structure more clear:
+ # return
+ # exists $G->{Attr}->{V}
+ # && exists $G->{Attr}->{V}->{$u}
+ # ? %{ $G->{Attr}->{V}->{$u} }
+ # : ();
+ #
+ # We should probably leave something like this alone:
+ # return
+ # exists $G->{Attr}->{E}
+ # && exists $G->{Attr}->{E}->{$u}
+ # && exists $G->{Attr}->{E}->{$u}->{$v}
+ # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
+ # : ();
+ # so that we either have all of the &&'s (or ||'s)
+ # on one line, as in the first example, or break at
+ # each one as in the second example. However, it
+ # sometimes makes things worse to check for this because
+ # it prevents multiple recombinations. So this is not done.
+ || ( $ibeg_3 >= 0
+ && $is_ternary{ $types_to_go[$ibeg_3] }
+ && $nesting_depth_to_go[$ibeg_3] ==
+ $nesting_depth_to_go[$ibeg_2] );
+
+ next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] };
+ $forced_breakpoint_to_go[$iend_1] = 0;
+
+ # tweak the bond strength to give this joint priority
+ # over ? and :
+ $bs_tweak = 0.25;
+ }
+
+ # Identify and recombine a broken ?/: chain
+ elsif ( $types_to_go[$ibeg_2] eq '?' ) {
+
+ # Do not recombine different levels
+ my $lev = $levels_to_go[$ibeg_2];
+ next if ( $lev ne $levels_to_go[$ibeg_1] );
+
+ # Do not recombine a '?' if either next line or
+ # previous line does not start with a ':'. The reasons
+ # are that (1) no alignment of the ? will be possible
+ # and (2) the expression is somewhat complex, so the
+ # '?' is harder to see in the interior of the line.
+ my $follows_colon =
+ $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':';
+ my $precedes_colon =
+ $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
+ next unless ( $follows_colon || $precedes_colon );
+
+ # we will always combining a ? line following a : line
+ if ( !$follows_colon ) {
+
+ # ...otherwise recombine only if it looks like a chain.
+ # we will just look at a few nearby lines to see if
+ # this looks like a chain.
+ my $local_count = 0;
+ foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
+ $local_count++
+ if $ii >= 0
+ && $types_to_go[$ii] eq ':'
+ && $levels_to_go[$ii] == $lev;
+ }
+ next unless ( $local_count > 1 );
+ }
+ $forced_breakpoint_to_go[$iend_1] = 0;
}
- next
- unless (
+ # do not recombine lines with leading '.'
+ elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) {
+ my $i_next_nonblank = $ibeg_2 + 1;
+ if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
+ $i_next_nonblank++;
+ }
+
+ next
+ unless (
# ... unless there is just one and we can reduce
# this to two lines if we do. For example, this
# $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
# . '$args .= $pat;'
- (
- $n == 2
- && $n == $nmax
- && $types_to_go[$if] ne $types_to_go[$imidr]
- )
+ (
+ $n == 2
+ && $n == $nmax
+ && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2]
+ )
- # ... or this would strand a short quote , like this
- # . "some long qoute"
- # . "\n";
+ # ... or this would strand a short quote , like this
+ # . "some long qoute"
+ # . "\n";
+ || ( $types_to_go[$i_next_nonblank] eq 'Q'
+ && $i_next_nonblank >= $iend_2 - 1
+ && length( $tokens_to_go[$i_next_nonblank] ) <
+ $rOpts_short_concatenation_item_length )
+ );
+ }
- || ( $types_to_go[$i_next_nonblank] eq 'Q'
- && $i_next_nonblank >= $il - 1
- && length( $tokens_to_go[$i_next_nonblank] ) <
- $rOpts_short_concatenation_item_length )
- );
- }
+ # handle leading keyword..
+ elsif ( $types_to_go[$ibeg_2] eq 'k' ) {
- # handle leading keyword..
- elsif ( $types_to_go[$imidr] eq 'k' ) {
+ # handle leading "or"
+ if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+
+ # following 'if' or 'unless' or 'or'
+ $types_to_go[$ibeg_1] eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+
+ # important: only combine a very simple or
+ # statement because the step below may have
+ # combined a trailing 'and' with this or,
+ # and we do not want to then combine
+ # everything together
+ && ( $iend_2 - $ibeg_2 <= 7 )
+ )
+ );
+ }
- # handle leading "and" and "or"
- if ( $is_and_or{ $tokens_to_go[$imidr] } ) {
+ # handle leading 'and'
+ elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
- # Decide if we will combine a single terminal 'and' and
- # 'or' after an 'if' or 'unless'. We should consider the
- # possible vertical alignment, and visual clutter.
+ # Decide if we will combine a single terminal 'and'
+ # after an 'if' or 'unless'.
+
+ # This looks best with the 'and' on the same
+ # line as the 'if':
+ #
+ # $a = 1
+ # if $seconds and $nu < 2;
+ #
+ # But this looks better as shown:
+ #
+ # $a = 1
+ # if !$this->{Parents}{$_}
+ # or $this->{Parents}{$_} eq $_;
+ #
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+
+ # following 'if' or 'unless' or 'or'
+ $types_to_go[$ibeg_1] eq 'k'
+ && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ || $tokens_to_go[$ibeg_1] eq 'or' )
+ )
+ );
+ }
+
+ # handle leading "if" and "unless"
+ elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
+
+ # FIXME: This is still experimental..may not be too useful
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+
+ # previous line begins with 'and' or 'or'
+ && $types_to_go[$ibeg_1] eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_1] }
+
+ );
+ }
+
+ # handle all other leading keywords
+ else {
+
+ # keywords look best at start of lines,
+ # but combine things like "1 while"
+ unless ( $is_assignment{ $types_to_go[$iend_1] } ) {
+ next
+ if ( ( $types_to_go[$iend_1] ne 'k' )
+ && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
+ }
+ }
+ }
+
+ # similar treatment of && and || as above for 'and' and 'or':
+ # NOTE: This block of code is currently bypassed because
+ # of a previous block but is retained for possible future use.
+ elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
+
+ # maybe looking at something like:
+ # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
- # This looks best with the 'and' on the same
- # line as the 'if':
- #
- # $a = 1
- # if $seconds and $nu < 2;
- #
- # But this looks better as shown:
- #
- # $a = 1
- # if !$this->{Parents}{$_}
- # or $this->{Parents}{$_} eq $_;
- #
- # Eventually, it would be nice to look for
- # similarities (such as 'this' or 'Parents'), but
- # for now I'm using a simple rule that says that
- # the resulting line length must not be more than
- # half the maximum line length (making it 80/2 =
- # 40 characters by default).
next
unless (
$this_line_is_semicolon_terminated
- && (
- # following 'if' or 'unless'
- $types_to_go[$if] eq 'k'
- && $is_if_unless{ $tokens_to_go[$if] }
+ # previous line begins with an 'if' or 'unless' keyword
+ && $types_to_go[$ibeg_1] eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+
+ );
+ }
+
+ # handle leading + - * /
+ elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) {
+ my $i_next_nonblank = $ibeg_2 + 1;
+ if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
+ $i_next_nonblank++;
+ }
+
+ my $i_next_next = $i_next_nonblank + 1;
+ $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
+
+ my $is_number = (
+ $types_to_go[$i_next_nonblank] eq 'n'
+ && ( $i_next_nonblank >= $iend_2 - 1
+ || $types_to_go[$i_next_next] eq ';' )
+ );
+
+ my $iend_1_nonblank =
+ $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1;
+ my $iend_2_nonblank =
+ $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2;
+
+ my $is_short_term =
+ ( $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1]
+ && $types_to_go[$iend_2_nonblank] =~ /^[in]$/
+ && $types_to_go[$iend_1_nonblank] =~ /^[in]$/
+ && $iend_2_nonblank <= $ibeg_2 + 2
+ && length( $tokens_to_go[$iend_2_nonblank] ) <
+ $rOpts_short_concatenation_item_length );
+
+ # Combine these lines if this line is a single
+ # number, or if it is a short term with same
+ # operator as the previous line. For example, in
+ # the following code we will combine all of the
+ # short terms $A, $B, $C, $D, $E, $F, together
+ # instead of leaving them one per line:
+ # my $time =
+ # $A * $B * $C * $D * $E * $F *
+ # ( 2. * $eps * $sigma * $area ) *
+ # ( 1. / $tcold**3 - 1. / $thot**3 );
+ # This can be important in math-intensive code.
+ next
+ unless (
+ $is_number
+ || $is_short_term
+
+ # or if we can reduce this to two lines if we do.
+ || ( $n == 2
+ && $n == $nmax
+ && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] )
+ );
+ }
+
+ # handle line with leading = or similar
+ elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) {
+ next unless $n == 1;
+ next
+ unless (
+
+ # unless we can reduce this to two lines
+ $nmax == 2
+
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
+
+ # or the next line ends with a here doc
+ || $types_to_go[$iend_2] eq 'h'
+ );
+ }
+
+ #----------------------------------------------------------
+ # Section 3:
+ # Combine the lines if we arrive here and it is possible
+ #----------------------------------------------------------
+
+ # honor hard breakpoints
+ next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
+
+ my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
+
+ # combined line cannot be too long
+ next
+ if excess_line_length( $ibeg_1, $iend_2 ) > 0;
+
+ # do not recombine if we would skip in indentation levels
+ if ( $n < $nmax ) {
+ my $if_next = $$ri_beg[ $n + 1 ];
+ next
+ if (
+ $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
+ && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
+
+ # but an isolated 'if (' is undesirable
+ && !(
+ $n == 1
+ && $iend_1 - $ibeg_1 <= 2
+ && $types_to_go[$ibeg_1] eq 'k'
+ && $tokens_to_go[$ibeg_1] eq 'if'
+ && $tokens_to_go[$iend_1] ne '('
+ )
+ );
+ }
+
+ # honor no-break's
+ next if ( $bs == NO_BREAK );
+
+ # remember the pair with the greatest bond strength
+ if ( !$n_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
+ }
+ else {
- )
- );
+ if ( $bs > $bs_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
+ }
}
+ }
- # handle leading "if" and "unless"
- elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) {
+ # recombine the pair with the greatest bond strength
+ if ($n_best) {
+ splice @$ri_beg, $n_best, 1;
+ splice @$ri_end, $n_best - 1, 1;
- # FIXME: This is still experimental..may not be too useful
- next
- unless (
- $this_line_is_semicolon_terminated
+ # keep going if we are still making progress
+ $more_to_do++;
+ }
+ }
+ return ( $ri_beg, $ri_end );
+ }
+} # end recombine_breakpoints
- # previous line begins with 'and' or 'or'
- && $types_to_go[$if] eq 'k'
- && $is_and_or{ $tokens_to_go[$if] }
+sub break_all_chain_tokens {
- );
+ # scan the current breakpoints looking for breaks at certain "chain
+ # operators" (. : && || + etc) which often occur repeatedly in a long
+ # statement. If we see a break at any one, break at all similar tokens
+ # within the same container.
+ #
+ my ( $ri_left, $ri_right ) = @_;
+
+ my %saw_chain_type;
+ my %left_chain_type;
+ my %right_chain_type;
+ my %interior_chain_type;
+ my $nmax = @$ri_right - 1;
+
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $$ri_left[$n];
+ my $ir = $$ri_right[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ $typel = '+' if ( $typel eq '-' ); # treat + and - the same
+ $typer = '+' if ( $typer eq '-' );
+ $typel = '*' if ( $typel eq '/' ); # treat * and / the same
+ $typer = '*' if ( $typer eq '/' );
+ my $tokenl = $tokens_to_go[$il];
+ my $tokenr = $tokens_to_go[$ir];
+
+ if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
+ next if ( $typel eq '?' );
+ push @{ $left_chain_type{$typel} }, $il;
+ $saw_chain_type{$typel} = 1;
+ $count++;
+ }
+ if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
+ next if ( $typer eq '?' );
+ push @{ $right_chain_type{$typer} }, $ir;
+ $saw_chain_type{$typer} = 1;
+ $count++;
+ }
+ }
+ return unless $count;
+
+ # now look for any interior tokens of the same types
+ $count = 0;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $$ri_left[$n];
+ my $ir = $$ri_right[$n];
+ for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
+ my $type = $types_to_go[$i];
+ $type = '+' if ( $type eq '-' );
+ $type = '*' if ( $type eq '/' );
+ if ( $saw_chain_type{$type} ) {
+ push @{ $interior_chain_type{$type} }, $i;
+ $count++;
+ }
+ }
+ }
+ return unless $count;
+
+ # now make a list of all new break points
+ my @insert_list;
+
+ # loop over all chain types
+ foreach my $type ( keys %saw_chain_type ) {
+
+ # quit if just ONE continuation line with leading . For example--
+ # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
+ # . $contents;
+ last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
+
+ # loop over all interior chain tokens
+ foreach my $itest ( @{ $interior_chain_type{$type} } ) {
+
+ # loop over all left end tokens of same type
+ if ( $left_chain_type{$type} ) {
+ next if $nobreak_to_go[ $itest - 1 ];
+ foreach my $i ( @{ $left_chain_type{$type} } ) {
+ next unless in_same_container( $i, $itest );
+ push @insert_list, $itest - 1;
+
+ # Break at matching ? if this : is at a different level.
+ # For example, the ? before $THRf_DEAD in the following
+ # should get a break if its : gets a break.
+ #
+ # my $flags =
+ # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
+ # : ( $_ & 4 ) ? $THRf_R_DETACHED
+ # : $THRf_R_JOINABLE;
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question > 0 ) {
+ push @insert_list, $i_question - 1;
+ }
+ }
+ last;
}
+ }
- # handle all other leading keywords
- else {
+ # loop over all right end tokens of same type
+ if ( $right_chain_type{$type} ) {
+ next if $nobreak_to_go[$itest];
+ foreach my $i ( @{ $right_chain_type{$type} } ) {
+ next unless in_same_container( $i, $itest );
+ push @insert_list, $itest;
- # keywords look best at start of lines,
- # but combine things like "1 while"
- unless ( $is_assignment{ $types_to_go[$imid] } ) {
- next
- if ( ( $types_to_go[$imid] ne 'k' )
- && ( $tokens_to_go[$imidr] ne 'while' ) );
+ # break at matching ? if this : is at a different level
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question >= 0 ) {
+ push @insert_list, $i_question;
+ }
}
+ last;
}
}
+ }
+ }
- # similar treatment of && and || as above for 'and' and 'or':
- # NOTE: This block of code is currently bypassed because
- # of a previous block but is retained for possible future use.
- elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) {
+ # insert any new break points
+ if (@insert_list) {
+ insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+}
- # maybe looking at something like:
- # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
+sub break_equals {
- next
- unless (
- $this_line_is_semicolon_terminated
+ # Look for assignment operators that could use a breakpoint.
+ # For example, in the following snippet
+ #
+ # $HOME = $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # we could break at the = to get this, which is a little nicer:
+ # $HOME =
+ # $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # The logic here follows the logic in set_logical_padding, which
+ # will add the padding in the second line to improve alignment.
+ #
+ my ( $ri_left, $ri_right ) = @_;
+ my $nmax = @$ri_right - 1;
+ return unless ( $nmax >= 2 );
+
+ # scan the left ends of first two lines
+ my $tokbeg = "";
+ my $depth_beg;
+ for my $n ( 1 .. 2 ) {
+ my $il = $$ri_left[$n];
+ my $typel = $types_to_go[$il];
+ my $tokenl = $tokens_to_go[$il];
+
+ my $has_leading_op = ( $tokenl =~ /^\w/ )
+ ? $is_chain_operator{$tokenl} # + - * / : ? && ||
+ : $is_chain_operator{$typel}; # and, or
+ return unless ($has_leading_op);
+ if ( $n > 1 ) {
+ return
+ unless ( $tokenl eq $tokbeg
+ && $nesting_depth_to_go[$il] eq $depth_beg );
+ }
+ $tokbeg = $tokenl;
+ $depth_beg = $nesting_depth_to_go[$il];
+ }
- # previous line begins with an 'if' or 'unless' keyword
- && $types_to_go[$if] eq 'k'
- && $is_if_unless{ $tokens_to_go[$if] }
+ # now look for any interior tokens of the same types
+ my $il = $$ri_left[0];
+ my $ir = $$ri_right[0];
- );
+ # now make a list of all new break points
+ my @insert_list;
+ for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
+ my $type = $types_to_go[$i];
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ if ( $want_break_before{$type} ) {
+ push @insert_list, $i - 1;
}
+ else {
+ push @insert_list, $i;
+ }
+ }
+ }
- #----------------------------------------------------------
- # Section 3:
- # Combine the lines if we arrive here and it is possible
- #----------------------------------------------------------
-
- # honor hard breakpoints
- next if ( $forced_breakpoint_to_go[$imid] > 0 );
+ # Break after a 'return' followed by a chain of operators
+ # return ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ # To give:
+ # return
+ # ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ my $i = 0;
+ if ( $types_to_go[$i] eq 'k'
+ && $tokens_to_go[$i] eq 'return'
+ && $ir > $il
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ push @insert_list, $i;
+ }
+
+ return unless (@insert_list);
+
+ # One final check...
+ # scan second and thrid lines and be sure there are no assignments
+ # we want to avoid breaking at an = to make something like this:
+ # unless ( $icon =
+ # $html_icons{"$type-$state"}
+ # or $icon = $html_icons{$type}
+ # or $icon = $html_icons{$state} )
+ for my $n ( 1 .. 2 ) {
+ my $il = $$ri_left[$n];
+ my $ir = $$ri_right[$n];
+ for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
+ my $type = $types_to_go[$i];
+ return
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg );
+ }
+ }
- my $bs = $bond_strength_to_go[$imid];
+ # ok, insert any new break point
+ if (@insert_list) {
+ insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+}
- # combined line cannot be too long
- next
- if excess_line_length( $if, $il ) > 0;
+sub insert_final_breaks {
- # do not recombine if we would skip in indentation levels
- if ( $n < $nmax ) {
- my $if_next = $$ri_first[ $n + 1 ];
- next
- if (
- $levels_to_go[$if] < $levels_to_go[$imidr]
- && $levels_to_go[$imidr] < $levels_to_go[$if_next]
+ my ( $ri_left, $ri_right ) = @_;
- # but an isolated 'if (' is undesirable
- && !(
- $n == 1
- && $imid - $if <= 2
- && $types_to_go[$if] eq 'k'
- && $tokens_to_go[$if] eq 'if'
- && $tokens_to_go[$imid] ne '('
- )
- );
- }
+ my $nmax = @$ri_right - 1;
- # honor no-break's
- next if ( $bs == NO_BREAK );
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ my $i_first_colon = -1;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $$ri_left[$n];
+ my $ir = $$ri_right[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ return if ( $typel eq '?' );
+ return if ( $typer eq '?' );
+ if ( $typel eq ':' ) { $i_first_colon = $il; last; }
+ elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
+ }
- # remember the pair with the greatest bond strength
- if ( !$n_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- else {
+ # For long ternary chains,
+ # if the first : we see has its # ? is in the interior
+ # of a preceding line, then see if there are any good
+ # breakpoints before the ?.
+ if ( $i_first_colon > 0 ) {
+ my $i_question = $mate_index_to_go[$i_first_colon];
+ if ( $i_question > 0 ) {
+ my @insert_list;
+ for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ my $token = $tokens_to_go[$ii];
+ my $type = $types_to_go[$ii];
- if ( $bs > $bs_best ) {
- $n_best = $n;
- $bs_best = $bs;
+ # For now, a good break is either a comma or a 'return'.
+ if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
+ && in_same_container( $ii, $i_question ) )
+ {
+ push @insert_list, $ii;
+ last;
}
+ }
- # we have 2 or more candidates, so need another pass
- $more_to_do++;
+ # insert any new break points
+ if (@insert_list) {
+ insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
}
+ }
+}
- # recombine the pair with the greatest bond strength
- if ($n_best) {
- splice @$ri_first, $n_best, 1;
- splice @$ri_last, $n_best - 1, 1;
+sub in_same_container {
+
+ # check to see if tokens at i1 and i2 are in the
+ # same container, and not separated by a comma, ? or :
+ my ( $i1, $i2 ) = @_;
+ my $type = $types_to_go[$i1];
+ my $depth = $nesting_depth_to_go[$i1];
+ return unless ( $nesting_depth_to_go[$i2] == $depth );
+ if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
+
+ ###########################################################
+ # This is potentially a very slow routine and not critical.
+ # For safety just give up for large differences.
+ # See test file 'infinite_loop.txt'
+ # TODO: replace this loop with a data structure
+ ###########################################################
+ return if ( $i2-$i1 > 200 );
+
+ for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
+ next if ( $nesting_depth_to_go[$i] > $depth );
+ return if ( $nesting_depth_to_go[$i] < $depth );
+
+ my $tok = $tokens_to_go[$i];
+ $tok = ',' if $tok eq '=>'; # treat => same as ,
+
+ # Example: we would not want to break at any of these .'s
+ # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
+ if ( $type ne ':' ) {
+ return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
+ }
+ else {
+ return if ( $tok =~ /^[\,]$/ );
}
}
- return ( $ri_first, $ri_last );
+ return 1;
}
sub set_continuation_breaks {
# Define an array of indexes for inserting newline characters to
# keep the line lengths below the maximum desired length. There is
# an implied break after the last token, so it need not be included.
- # We'll break at points where the bond strength is lowest.
+
+ # Method:
+ # This routine is part of series of routines which adjust line
+ # lengths. It is only called if a statement is longer than the
+ # maximum line length, or if a preliminary scanning located
+ # desirable break points. Sub scan_list has already looked at
+ # these tokens and set breakpoints (in array
+ # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
+ # after commas, after opening parens, and before closing parens).
+ # This routine will honor these breakpoints and also add additional
+ # breakpoints as necessary to keep the line length below the maximum
+ # requested. It bases its decision on where the 'bond strength' is
+ # lowest.
+
+ # Output: returns references to the arrays:
+ # @i_first
+ # @i_last
+ # which contain the indexes $i of the first and last tokens on each
+ # line.
+
+ # In addition, the array:
+ # $forced_breakpoint_to_go[$i]
+ # may be updated to be =1 for any index $i after which there must be
+ # a break. This signals later routines not to undo the breakpoint.
my $saw_good_break = shift;
my @i_first = (); # the first index to output
my $imax = $max_index_to_go;
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- my $i_begin = $imin;
+ my $i_begin = $imin; # index for starting next iteration
my $leading_spaces = leading_spaces_to_go($imin);
my $line_count = 0;
# see if any ?/:'s are in order
my $colons_in_order = 1;
my $last_tok = "";
- my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
+ my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
+ my $colon_count = @colon_list;
foreach (@colon_list) {
if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
$last_tok = $_;
# This is a sufficient but not necessary condition for colon chain
my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
+ #-------------------------------------------------------
+ # BEGINNING of main loop to set continuation breakpoints
+ # Keep iterating until we reach the end
+ #-------------------------------------------------------
while ( $i_begin <= $imax ) {
my $lowest_strength = NO_BREAK;
my $starting_sum = $lengths_to_go[$i_begin];
my $lowest_next_type = 'b';
my $i_lowest_next_nonblank = -1;
- # loop to find next break point
+ #-------------------------------------------------------
+ # BEGINNING of inner loop to find the best next breakpoint
+ #-------------------------------------------------------
for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
my $type = $types_to_go[$i_test];
my $token = $tokens_to_go[$i_test];
# See similar logic in scan_list which catches instances
# where a line is just something like ') {'
|| ( $line_count
- && ( $token eq ')' )
+ && ( $token eq ')' )
&& ( $next_nonblank_type eq '{' )
&& ($next_nonblank_block_type)
&& !$rOpts->{'opening-brace-always-on-right'} )
&& ( $next_nonblank_type =~ /^[\;\,]$/ )
&& (
(
- $leading_spaces + $lengths_to_go[ $i_next_nonblank + 1 ]
- - $starting_sum
+ $leading_spaces +
+ $lengths_to_go[ $i_next_nonblank + 1 ] -
+ $starting_sum
) > $rOpts_maximum_line_length
)
)
&& ( $token eq $type )
&& (
(
- $leading_spaces + $lengths_to_go[ $i_test + 1 ] -
+ $leading_spaces +
+ $lengths_to_go[ $i_test + 1 ] -
$starting_sum
) <= $rOpts_maximum_line_length
)
? 1
: (
(
- $leading_spaces + $lengths_to_go[ $i_test + 2 ] -
+ $leading_spaces +
+ $lengths_to_go[ $i_test + 2 ] -
$starting_sum
) > $rOpts_maximum_line_length
);
);
}
+ #-------------------------------------------------------
+ # END of inner loop to find the best next breakpoint
+ # Now decide exactly where to put the breakpoint
+ #-------------------------------------------------------
+
# it's always ok to break at imax if no other break was found
if ( $i_lowest < 0 ) { $i_lowest = $imax }
last;
}
+ #-------------------------------------------------------
+ # END of inner loop to find the best next breakpoint:
+ # Break the line after the token with index i=$i_lowest
+ #-------------------------------------------------------
+
# final index calculation
$i_next_nonblank = (
( $types_to_go[ $i_lowest + 1 ] eq 'b' )
}
}
+ #-------------------------------------------------------
+ # END of main loop to set continuation breakpoints
+ # Now go back and make any necessary corrections
+ #-------------------------------------------------------
+
#-------------------------------------------------------
# ?/: rule 4 -- if we broke at a ':', then break at
# corresponding '?' unless this is a chain of ?: expressions
}
}
}
- return \@i_first, \@i_last;
+ return ( \@i_first, \@i_last, $colon_count );
}
sub insert_additional_breaks {
my $i_l;
my $line_number = 0;
my $i_break_left;
- foreach $i_break_left ( sort @$ri_break_list ) {
+ foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
$i_f = $$ri_first[$line_number];
$i_l = $$ri_last[$line_number];
$file_writer_object
@side_comment_history
$comment_leading_space_count
+ $is_matching_terminal_line
$cached_line_text
$cached_line_type
$rOpts_entab_leading_whitespace
$rOpts_valign
+ $rOpts_fixed_position_side_comment
$rOpts_minimum_space_to_comment
);
= @_;
# variables describing the entire space group:
-
$ralignment_list = [];
$group_level = 0;
$last_group_level_written = -1;
$last_outdented_line_at = 0;
$last_side_comment_line_number = 0;
$last_side_comment_level = -1;
+ $is_matching_terminal_line = 0;
# most recent 3 side comments; [ line number, column ]
$side_comment_history[0] = [ -300, 0 ];
$rOpts_indent_columns = $rOpts->{'indent-columns'};
$rOpts_tabs = $rOpts->{'tabs'};
$rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
+ $rOpts_fixed_position_side_comment =
+ $rOpts->{'fixed-position-side-comment'};
$rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
$rOpts_valign = $rOpts->{'valign'};
&& $rvertical_tightness_flags->[2] == $cached_seqno )
{
$rvertical_tightness_flags->[3] ||= 1;
- $cached_line_valid ||= 1;
+ $cached_line_valid ||= 1;
}
}
# --------------------------------------------------------------------
# add dummy fields for terminal ternary
# --------------------------------------------------------------------
+ my $j_terminal_match;
if ( $is_terminal_ternary && $current_line ) {
- fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
+ $j_terminal_match =
+ fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
$jmax = @{$rfields} - 1;
}
&& $current_line
&& $level_jump == 0 )
{
- fix_terminal_else( $rfields, $rtokens, $rpatterns );
+ $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
$jmax = @{$rfields} - 1;
}
rvertical_tightness_flags => $rvertical_tightness_flags,
);
+ # Initialize a global flag saying if the last line of the group should
+ # match end of group and also terminate the group. There should be no
+ # returns between here and where the flag is handled at the bottom.
+ my $col_matching_terminal = 0;
+ if ( defined($j_terminal_match) ) {
+
+ # remember the column of the terminal ? or { to match with
+ $col_matching_terminal = $current_line->get_column($j_terminal_match);
+
+ # set global flag for sub decide_if_aligned
+ $is_matching_terminal_line = 1;
+ }
+
# --------------------------------------------------------------------
# It simplifies things to create a zero length side comment
# if none exists.
# Future update to allow this to vary:
$current_line = $new_line if ( $maximum_line_index == 0 );
- my_flush() if ( $group_type eq "TERMINAL" );
+ # output this group if it ends in a terminal else or ternary line
+ if ( defined($j_terminal_match) ) {
+
+ # if there is only one line in the group (maybe due to failure to match
+ # perfectly with previous lines), then align the ? or { of this
+ # terminal line with the previous one unless that would make the line
+ # too long
+ if ( $maximum_line_index == 0 ) {
+ my $col_now = $current_line->get_column($j_terminal_match);
+ my $pad = $col_matching_terminal - $col_now;
+ my $padding_available =
+ $current_line->get_available_space_on_right();
+ if ( $pad > 0 && $pad <= $padding_available ) {
+ $current_line->increase_field_width( $j_terminal_match, $pad );
+ }
+ }
+ my_flush();
+ $is_matching_terminal_line = 0;
+ }
# --------------------------------------------------------------------
# Step 8. Some old debugging stuff
dump_array(@$rpatterns);
dump_alignments();
};
+
+ return;
}
sub join_hanging_comment {
my $old_line = shift;
my $maximum_field_index = $old_line->get_jmax();
+ ###############################################
# this line must have fewer fields
return unless $maximum_field_index > $jmax;
+ ###############################################
# Identify specific cases where field elimination is allowed:
# case=1: both lines have comma-separated lists, and the first
sub eliminate_new_fields {
return unless ( $maximum_line_index >= 0 );
- my $new_line = shift;
- my $old_line = shift;
- my $jmax = $new_line->get_jmax();
+ my ( $new_line, $old_line ) = @_;
+ my $jmax = $new_line->get_jmax();
my $old_rtokens = $old_line->get_rtokens();
my $rtokens = $new_line->get_rtokens();
my $is_assignment =
- ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] )
- || $group_type eq "TERMINAL" );
+ ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
# must be monotonic variation
return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
my $k;
for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
- || ( $$old_rpatterns[$k] ne $$rpatterns[$k] )
- && $group_type ne "TERMINAL" )
+ || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
{
$match = 0;
last;
# : $year % 400 ? 0
# : 1;
#
+ # returns 1 if the terminal item should be indented
+
my ( $rfields, $rtokens, $rpatterns ) = @_;
my $jmax = @{$rfields} - 1;
@{$rpatterns} = @patterns;
# force a flush after this line
- $group_type = "TERMINAL";
- return;
+ return $jquestion;
}
sub fix_terminal_else {
# if ( 1 || $x ) { print "ok 13\n"; }
# else { print "not ok 13\n"; }
#
+ # returns 1 if the else block should be indented
+ #
my ( $rfields, $rtokens, $rpatterns ) = @_;
my $jmax = @{$rfields} - 1;
return unless ( $jmax > 0 );
# look for the opening brace after the else, and extrace the depth
my $tok_brace = $rtokens->[0];
my $depth_brace;
- if ( $tok_brace =~ /^\{(\d+)$/ ) { $depth_brace = $1; }
+ if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
# probably: "else # side_comment"
else { return }
splice( @{$rfields}, 1, 0, ('') x $jadd );
# force a flush after this line if it does not follow a case
- $group_type = "TERMINAL"
+ return $jbrace
unless ( $rfields_old->[0] =~ /^case\s*$/ );
- return;
}
-sub check_match {
-
- my $new_line = shift;
- my $old_line = shift;
-
- my $jmax = $new_line->get_jmax();
- my $maximum_field_index = $old_line->get_jmax();
-
- # flush if this line has too many fields
- if ( $jmax > $maximum_field_index ) { my_flush(); return }
-
- # flush if adding this line would make a non-monotonic field count
- if (
- ( $maximum_field_index > $jmax ) # this has too few fields
- && (
- ( $previous_minimum_jmax_seen < $jmax ) # and wouldn't be monotonic
- || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
- )
- )
- {
- my_flush();
- return;
- }
-
- # otherwise append this line if everything matches
- my $jmax_original_line = $new_line->get_jmax_original_line();
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
- my $rtokens = $new_line->get_rtokens();
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- my $list_type = $new_line->get_list_type();
+{ # sub check_match
+ my %is_good_alignment;
- my $group_list_type = $old_line->get_list_type();
- my $old_rpatterns = $old_line->get_rpatterns();
- my $old_rtokens = $old_line->get_rtokens();
+ BEGIN {
- my $jlimit = $jmax - 1;
- if ( $maximum_field_index > $jmax ) {
- $jlimit = $jmax_original_line;
- --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
+ # Vertically aligning on certain "good" tokens is usually okay
+ # so we can be less restrictive in marginal cases.
+ @_ = qw( { ? => = );
+ push @_, (',');
+ @is_good_alignment{@_} = (1) x scalar(@_);
}
- my $everything_matches = 1;
-
- # common list types always match
- unless ( ( $group_list_type && ( $list_type eq $group_list_type ) )
- || $is_hanging_side_comment )
- {
+ sub check_match {
- my $leading_space_count = $new_line->get_leading_space_count();
- my $saw_equals = 0;
- for my $j ( 0 .. $jlimit ) {
- my $match = 1;
+ # See if the current line matches the current vertical alignment group.
+ # If not, flush the current group.
+ my $new_line = shift;
+ my $old_line = shift;
- my $old_tok = $$old_rtokens[$j];
- my $new_tok = $$rtokens[$j];
+ # uses global variables:
+ # $previous_minimum_jmax_seen
+ # $maximum_jmax_seen
+ # $maximum_line_index
+ # $marginal_match
+ my $jmax = $new_line->get_jmax();
+ my $maximum_field_index = $old_line->get_jmax();
- # Dumb down the match AFTER an equals and
- # also dumb down after seeing a ? ternary operator ...
- # Everything after a + is the token which preceded the previous
- # opening paren (container name). We won't require them to match.
- if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
- $new_tok = $1;
- $old_tok =~ s/\+.*$//;
- }
+ # flush if this line has too many fields
+ if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
- if ( $new_tok =~ /^[\?=]\d*$/ ) { $saw_equals = 1 }
+ # flush if adding this line would make a non-monotonic field count
+ if (
+ ( $maximum_field_index > $jmax ) # this has too few fields
+ && (
+ ( $previous_minimum_jmax_seen <
+ $jmax ) # and wouldn't be monotonic
+ || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
+ )
+ )
+ {
+ goto NO_MATCH;
+ }
+
+ # otherwise see if this line matches the current group
+ my $jmax_original_line = $new_line->get_jmax_original_line();
+ my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
+ my $rtokens = $new_line->get_rtokens();
+ my $rfields = $new_line->get_rfields();
+ my $rpatterns = $new_line->get_rpatterns();
+ my $list_type = $new_line->get_list_type();
+
+ my $group_list_type = $old_line->get_list_type();
+ my $old_rpatterns = $old_line->get_rpatterns();
+ my $old_rtokens = $old_line->get_rtokens();
+
+ my $jlimit = $jmax - 1;
+ if ( $maximum_field_index > $jmax ) {
+ $jlimit = $jmax_original_line;
+ --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
+ }
+
+ # handle comma-separated lists ..
+ if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
+ for my $j ( 0 .. $jlimit ) {
+ my $old_tok = $$old_rtokens[$j];
+ next unless $old_tok;
+ my $new_tok = $$rtokens[$j];
+ next unless $new_tok;
+
+ # lists always match ...
+ # unless they would align any '=>'s with ','s
+ goto NO_MATCH
+ if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
+ || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
+ }
+ }
+
+ # do detailed check for everything else except hanging side comments
+ elsif ( !$is_hanging_side_comment ) {
+
+ my $leading_space_count = $new_line->get_leading_space_count();
+
+ my $max_pad = 0;
+ my $min_pad = 0;
+ my $saw_good_alignment;
+
+ for my $j ( 0 .. $jlimit ) {
+
+ my $old_tok = $$old_rtokens[$j];
+ my $new_tok = $$rtokens[$j];
+
+ # Note on encoding used for alignment tokens:
+ # -------------------------------------------
+ # Tokens are "decorated" with information which can help
+ # prevent unwanted alignments. Consider for example the
+ # following two lines:
+ # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
+ # local ( $i, $f ) = &'bdiv( $xn, $xd );
+ # There are three alignment tokens in each line, a comma,
+ # an =, and a comma. In the first line these three tokens
+ # are encoded as:
+ # ,4+local-18 =3 ,4+split-7
+ # and in the second line they are encoded as
+ # ,4+local-18 =3 ,4+&'bdiv-8
+ # Tokens always at least have token name and nesting
+ # depth. So in this example the ='s are at depth 3 and
+ # the ,'s are at depth 4. This prevents aligning tokens
+ # of different depths. Commas contain additional
+ # information, as follows:
+ # , {depth} + {container name} - {spaces to opening paren}
+ # This allows us to reject matching the rightmost commas
+ # in the above two lines, since they are for different
+ # function calls. This encoding is done in
+ # 'sub send_lines_to_vertical_aligner'.
+
+ # Pick off actual token.
+ # Everything up to the first digit is the actual token.
+ my $alignment_token = $new_tok;
+ if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
+
+ # see if the decorated tokens match
+ my $tokens_match = $new_tok eq $old_tok
+
+ # Exception for matching terminal : of ternary statement..
+ # consider containers prefixed by ? and : a match
+ || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
+
+ # No match if the alignment tokens differ...
+ if ( !$tokens_match ) {
+
+ # ...Unless this is a side comment
+ if (
+ $j == $jlimit
+
+ # and there is either at least one alignment token
+ # or this is a single item following a list. This
+ # latter rule is required for 'December' to join
+ # the following list:
+ # my (@months) = (
+ # '', 'January', 'February', 'March',
+ # 'April', 'May', 'June', 'July',
+ # 'August', 'September', 'October', 'November',
+ # 'December'
+ # );
+ # If it doesn't then the -lp formatting will fail.
+ && ( $j > 0 || $old_tok =~ /^,/ )
+ )
+ {
+ $marginal_match = 1
+ if ( $marginal_match == 0
+ && $maximum_line_index == 0 );
+ last;
+ }
- # we never match if the matching tokens differ
- if ( $j < $jlimit
- && $old_tok ne $new_tok )
- {
- $match = 0;
- }
+ goto NO_MATCH;
+ }
- # otherwise, if patterns match, we always have a match.
- # However, if patterns don't match, we have to be careful...
- elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
+ # Calculate amount of padding required to fit this in.
+ # $pad is the number of spaces by which we must increase
+ # the current field to squeeze in this field.
+ my $pad =
+ length( $$rfields[$j] ) - $old_line->current_field_width($j);
+ if ( $j == 0 ) { $pad += $leading_space_count; }
- # We have to be very careful about aligning commas when the
- # pattern's don't match, because it can be worse to create an
- # alignment where none is needed than to omit one. The current
- # rule: if we are within a matching sub call (indicated by '+'
- # in the matching token), we'll allow a marginal match, but
- # otherwise not.
- #
- # Here's an example where we'd like to align the '='
- # my $cfile = File::Spec->catfile( 't', 'callext.c' );
- # my $inc = File::Spec->catdir( 'Basic', 'Core' );
- # because the function names differ.
- # Future alignment logic should make this unnecessary.
- #
- # Here's an example where the ','s are not contained in a call.
- # The first line below should probably not match the next two:
- # ( $a, $b ) = ( $b, $r );
- # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
- # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
- if ( $new_tok =~ /^,/ ) {
- if ( $$rtokens[$j] =~ /[A-Za-z]/ ) {
- $marginal_match = 1;
- }
- else {
- $match = 0;
- }
+ # remember max pads to limit marginal cases
+ if ( $alignment_token ne '#' ) {
+ if ( $pad > $max_pad ) { $max_pad = $pad }
+ if ( $pad < $min_pad ) { $min_pad = $pad }
}
-
- # parens don't align well unless patterns match
- elsif ( $new_tok =~ /^\(/ ) {
- $match = 0;
+ if ( $is_good_alignment{$alignment_token} ) {
+ $saw_good_alignment = 1;
}
- # Handle an '=' alignment with different patterns to
- # the left.
- elsif ( $new_tok =~ /^=\d*$/ ) {
+ # If patterns don't match, we have to be careful...
+ if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
- $saw_equals = 1;
+ # flag this as a marginal match since patterns differ
+ $marginal_match = 1
+ if ( $marginal_match == 0 && $maximum_line_index == 0 );
- # It is best to be a little restrictive when
- # aligning '=' tokens. Here is an example of
- # two lines that we will not align:
- # my $variable=6;
- # $bb=4;
- # The problem is that one is a 'my' declaration,
- # and the other isn't, so they're not very similar.
- # We will filter these out by comparing the first
- # letter of the pattern. This is crude, but works
- # well enough.
- if (
- substr( $$old_rpatterns[$j], 0, 1 ) ne
- substr( $$rpatterns[$j], 0, 1 ) )
- {
- $match = 0;
+ # We have to be very careful about aligning commas
+ # when the pattern's don't match, because it can be
+ # worse to create an alignment where none is needed
+ # than to omit one. Here's an example where the ','s
+ # are not in named continers. The first line below
+ # should not match the next two:
+ # ( $a, $b ) = ( $b, $r );
+ # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
+ # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
+ if ( $alignment_token eq ',' ) {
+
+ # do not align commas unless they are in named containers
+ goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
}
- # If we pass that test, we'll call it a marginal match.
- # Here is an example of a marginal match:
- # $done{$$op} = 1;
- # $op = compile_bblock($op);
- # The left tokens are both identifiers, but
- # one accesses a hash and the other doesn't.
- # We'll let this be a tentative match and undo
- # it later if we don't find more than 2 lines
- # in the group.
- elsif ( $maximum_line_index == 0 ) {
- $marginal_match = 1;
+ # do not align parens unless patterns match;
+ # large ugly spaces can occur in math expressions.
+ elsif ( $alignment_token eq '(' ) {
+
+ # But we can allow a match if the parens don't
+ # require any padding.
+ if ( $pad != 0 ) { goto NO_MATCH }
}
- }
- }
- # Don't let line with fewer fields increase column widths
- # ( align3.t )
- if ( $maximum_field_index > $jmax ) {
- my $pad =
- length( $$rfields[$j] ) - $old_line->current_field_width($j);
+ # Handle an '=' alignment with different patterns to
+ # the left.
+ elsif ( $alignment_token eq '=' ) {
+
+ # It is best to be a little restrictive when
+ # aligning '=' tokens. Here is an example of
+ # two lines that we will not align:
+ # my $variable=6;
+ # $bb=4;
+ # The problem is that one is a 'my' declaration,
+ # and the other isn't, so they're not very similar.
+ # We will filter these out by comparing the first
+ # letter of the pattern. This is crude, but works
+ # well enough.
+ if (
+ substr( $$old_rpatterns[$j], 0, 1 ) ne
+ substr( $$rpatterns[$j], 0, 1 ) )
+ {
+ goto NO_MATCH;
+ }
- if ( $j == 0 ) {
- $pad += $leading_space_count;
+ # If we pass that test, we'll call it a marginal match.
+ # Here is an example of a marginal match:
+ # $done{$$op} = 1;
+ # $op = compile_bblock($op);
+ # The left tokens are both identifiers, but
+ # one accesses a hash and the other doesn't.
+ # We'll let this be a tentative match and undo
+ # it later if we don't find more than 2 lines
+ # in the group.
+ elsif ( $maximum_line_index == 0 ) {
+ $marginal_match =
+ 2; # =2 prevents being undone below
+ }
+ }
}
- # TESTING: suspend this rule to allow last lines to join
- if ( $pad > 0 ) { $match = 0; }
- }
-
- unless ($match) {
- $everything_matches = 0;
- last;
+ # Don't let line with fewer fields increase column widths
+ # ( align3.t )
+ if ( $maximum_field_index > $jmax ) {
+
+ # Exception: suspend this rule to allow last lines to join
+ if ( $pad > 0 ) { goto NO_MATCH; }
+ }
+ } ## end for my $j ( 0 .. $jlimit)
+
+ # Turn off the "marginal match" flag in some cases...
+ # A "marginal match" occurs when the alignment tokens agree
+ # but there are differences in the other tokens (patterns).
+ # If we leave the marginal match flag set, then the rule is that we
+ # will align only if there are more than two lines in the group.
+ # We will turn of the flag if we almost have a match
+ # and either we have seen a good alignment token or we
+ # just need a small pad (2 spaces) to fit. These rules are
+ # the result of experimentation. Tokens which misaligned by just
+ # one or two characters are annoying. On the other hand,
+ # large gaps to less important alignment tokens are also annoying.
+ if ( $marginal_match == 1
+ && $jmax == $maximum_field_index
+ && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
+ )
+ {
+ $marginal_match = 0;
}
+ ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
}
- }
-
- if ( $maximum_field_index > $jmax ) {
-
- if ($everything_matches) {
+ # We have a match (even if marginal).
+ # If the current line has fewer fields than the current group
+ # but otherwise matches, copy the remaining group fields to
+ # make it a perfect match.
+ if ( $maximum_field_index > $jmax ) {
my $comment = $$rfields[$jmax];
for $jmax ( $jlimit .. $maximum_field_index ) {
$$rtokens[$jmax] = $$old_rtokens[$jmax];
$$rfields[$jmax] = $comment;
$new_line->set_jmax($jmax);
}
- }
+ return;
- my_flush() unless ($everything_matches);
+ NO_MATCH:
+ ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
+ my_flush();
+ return;
+ }
}
sub check_fit {
my $group_leader_length = $group_lines[0]->get_leading_space_count();
# add extra leading spaces if helpful
- my $min_ci_gap =
- improve_continuation_indentation( $do_not_align,
+ my $min_ci_gap = improve_continuation_indentation( $do_not_align,
$group_leader_length );
# loop to output all lines
# Do not try to align two lines which are not really similar
return unless $maximum_line_index == 1;
- return if ( $group_type eq "TERMINAL" );
+ return if ($is_matching_terminal_line);
my $group_list_type = $group_lines[0]->get_list_type();
my $leading_space_count = $line->get_leading_space_count();
my $rfields = $line->get_rfields();
- my $gap = $line->get_column(0) - $leading_space_count -
+ my $gap =
+ $line->get_column(0) -
+ $leading_space_count -
length( $$rfields[0] );
if ( $leading_space_count > $group_leader_length ) {
: $rOpts_minimum_space_to_comment - 1;
}
+ # if the -fpsc flag is set, move the side comment to the selected
+ # column if and only if it is possible, ignoring constraints on
+ # line length and minimum space to comment
+ if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
+ {
+ my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
+ if ( $newpad >= 0 ) { $pad = $newpad; }
+ }
+
# accumulate the padding
if ( $pad > 0 ) { $total_pad_count += $pad; }
sub combine_fields {
# combine all fields except for the comment field ( sidecmt.t )
+ # Uses global variables:
+ # @group_lines
+ # $maximum_line_index
my ( $j, $k );
my $maximum_field_index = $group_lines[0]->get_jmax();
for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
# handle outdenting of long lines:
if ($outdent_long_lines) {
my $excess =
- length($str) - $side_comment_length + $leading_space_count -
+ length($str) -
+ $side_comment_length +
+ $leading_space_count -
$rOpts_maximum_line_length;
if ( $excess > 0 ) {
$leading_space_count = 0;
elsif ($rOpts_entab_leading_whitespace) {
my $space_count =
$leading_whitespace_count % $rOpts_entab_leading_whitespace;
- my $tab_count =
- int(
+ my $tab_count = int(
$leading_whitespace_count / $rOpts_entab_leading_whitespace );
$leading_string = "\t" x $tab_count . ' ' x $space_count;
}
$pattern .= $$rtoken_type[$j];
}
$reconstructed_original .= $$rtokens[$j];
- $block_str .= "($$rblock_type[$j])";
+ $block_str .= "($$rblock_type[$j])";
$num = length( $$rtokens[$j] );
my $type_str = $$rtoken_type[$j];
$square_bracket_depth
@current_depth
+ @total_depth
+ $total_depth
@nesting_sequence_number
@current_sequence_number
@paren_type
@square_bracket_type
@square_bracket_structural_type
@depth_array
+ @nested_ternary_flag
@starting_line_of_current_depth
};
if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
complain("=cut while not in pod ignored\n");
$tokenizer_self->{_in_pod} = 0;
- $line_of_tokens->{_line_type} = 'POD_STOP';
+ $line_of_tokens->{_line_type} = 'POD_END';
}
else {
- $line_of_tokens->{_line_type} = 'POD_END';
+ $line_of_tokens->{_line_type} = 'POD_START';
complain(
"=cut starts a pod section .. this can fool pod utilities.\n"
);
$line_of_tokens->{_line_type} = 'CODE';
# remember if we have seen any real code
- if ( !$tokenizer_self->{_started_tokenizing}
+ if ( !$tokenizer_self->{_started_tokenizing}
&& $input_line !~ /^\s*$/
&& $input_line !~ /^\s*#/ )
{
}
}
+sub ones_count {
+
+ # count number of 1's in a string of 1's and 0's
+ # example: ones_count("010101010101") gives 6
+ return ( my $cis = $_[0] ) =~ tr/1/0/;
+}
+
sub prepare_for_a_new_file {
# previous tokens needed to determine what to expect next
$square_bracket_depth = 0;
@current_depth[ 0 .. $#closing_brace_names ] =
(0) x scalar @closing_brace_names;
+ $total_depth = 0;
+ @total_depth = ();
@nesting_sequence_number[ 0 .. $#closing_brace_names ] =
( 0 .. $#closing_brace_names );
@current_sequence_number = ();
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
- $type, $type_sequence,
+ $type, $type_sequence, $indent_flag,
);
# TV2: refs to ARRAYS for processing one LINE
my $routput_block_type = []; # types of code block
my $routput_container_type = []; # paren types, such as if, elsif, ..
my $routput_type_sequence = []; # nesting sequential number
+ my $routput_indent_flag = []; #
# TV3: SCALARS for quote variables. These are initialized with a
# subroutine call and continually updated as lines are processed.
# TV4: SCALARS for multi-line identifiers and
# statements. These are initialized with a subroutine call
# and continually updated as lines are processed.
- my ( $id_scan_state, $identifier, $want_paren, );
+ my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
# TV5: SCALARS for tracking indentation level.
# Initialized once and continually updated as lines are
$allowed_quote_modifiers = "";
# TV4:
- $id_scan_state = '';
- $identifier = '';
- $want_paren = "";
+ $id_scan_state = '';
+ $identifier = '';
+ $want_paren = "";
+ $indented_if_level = 0;
# TV5:
$nesting_token_string = "";
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
- $type, $type_sequence,
+ $type, $type_sequence, $indent_flag,
];
my $rTV2 = [
- $routput_token_list, $routput_token_type,
- $routput_block_type, $routput_container_type,
- $routput_type_sequence,
+ $routput_token_list, $routput_token_type,
+ $routput_block_type, $routput_container_type,
+ $routput_type_sequence, $routput_indent_flag,
];
my $rTV3 = [
$quoted_string_2, $allowed_quote_modifiers,
];
- my $rTV4 = [ $id_scan_state, $identifier, $want_paren, ];
+ my $rTV4 =
+ [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
my $rTV5 = [
$nesting_token_string, $nesting_type_string,
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
- $type, $type_sequence,
+ $type, $type_sequence, $indent_flag,
) = @{$rTV1};
(
- $routput_token_list, $routput_token_type,
- $routput_block_type, $routput_container_type,
- $routput_type_sequence,
+ $routput_token_list, $routput_token_type,
+ $routput_block_type, $routput_container_type,
+ $routput_type_sequence, $routput_type_sequence,
) = @{$rTV2};
(
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
) = @{$rTV3};
- ( $id_scan_state, $identifier, $want_paren, ) = @{$rTV4};
+ ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
+ @{$rTV4};
(
$nesting_token_string, $nesting_type_string,
}
sub get_indentation_level {
+
+ # patch to avoid reporting error if indented if is not terminated
+ if ($indented_if_level) { return $level_in_tokenizer - 1 }
return $level_in_tokenizer;
}
%is_block_list_function, %saw_function_definition,
$brace_depth, $paren_depth,
$square_bracket_depth, @current_depth,
+ @total_depth, $total_depth,
@nesting_sequence_number, @current_sequence_number,
@paren_type, @paren_semicolon_count,
@paren_structural_type, @brace_type,
@brace_context, @brace_package,
@square_bracket_type, @square_bracket_structural_type,
@depth_array, @starting_line_of_current_depth,
+ @nested_ternary_flag,
);
# save all lexical variables
sub scan_identifier {
( $i, $tok, $type, $id_scan_state, $identifier ) =
scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
- $max_token_index );
+ $max_token_index, $expecting );
}
sub scan_id {
## '//=' => undef,
## '~' => undef,
## '~~' => undef,
+## '!~~' => undef,
'>' => sub {
error_if_expecting_TERM()
} ## end if ( $expecting == OPERATOR...
}
$paren_type[$paren_depth] = $container_type;
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
# propagate types down through nested parens
},
')' => sub {
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
if ( $paren_structural_type[$paren_depth] eq '{' ) {
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[cgimosx]';
+ $allowed_quote_modifiers = '[cgimosxp]';
}
else { # not a pattern; check for a /= token
# which will be blank for an anonymous hash
else {
- $block_type =
- code_block_type( $i_tok, $rtokens, $rtoken_type,
+ $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
$max_token_index );
# patch to promote bareword type to function taking block
}
$brace_type[ ++$brace_depth ] = $block_type;
$brace_package[$brace_depth] = $current_package;
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
$brace_structural_type[$brace_depth] = $type;
$brace_context[$brace_depth] = $context;
# can happen on brace error (caught elsewhere)
else {
}
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
if ( $brace_structural_type[$brace_depth] eq 'L' ) {
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[cgimosx]'; # TBD:check this
+ $allowed_quote_modifiers = '[cgimosxp]';
}
else {
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
increase_nesting_depth( QUESTION_COLON,
$$rtoken_map[$i_tok] );
}
# otherwise, it should be part of a ?/: operator
else {
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
decrease_nesting_depth( QUESTION_COLON,
$$rtoken_map[$i_tok] );
if ( $last_nonblank_token eq '?' ) {
'[' => sub {
$square_bracket_type[ ++$square_bracket_depth ] =
$last_nonblank_token;
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
# It may seem odd, but structural square brackets have
$square_bracket_structural_type[$square_bracket_depth] = $type;
},
']' => sub {
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
if ( ( $expecting != OPERATOR )
&& $is_file_test_operator{$next_tok} )
{
- $i++;
- $tok .= $next_tok;
- $type = 'F';
+ my ( $next_nonblank_token, $i_next ) =
+ find_next_nonblank_token( $i + 1, $rtokens,
+ $max_token_index );
+
+ # check for a quoted word like "-w=>xx";
+ # it is sufficient to just check for a following '='
+ if ( $next_nonblank_token eq '=' ) {
+ $type = 'm';
+ }
+ else {
+ $i++;
+ $tok .= $next_tok;
+ $type = 'F';
+ }
}
elsif ( $expecting == TERM ) {
my $number = scan_number();
# semicolon
# patched for SWITCH/CASE:
my %is_zero_continuation_block_type;
- @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ;
+ @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
if elsif else unless while until for foreach switch case given when);
@is_zero_continuation_block_type{@_} = (1) x scalar(@_);
# ref: camel 3 p 147,
# but perl may accept undocumented flags
+ # perl 5.10 adds 'p' (preserve)
my %quote_modifiers = (
- 's' => '[cegimosx]',
+ 's' => '[cegimosxp]',
'y' => '[cds]',
'tr' => '[cds]',
- 'm' => '[cgimosx]',
- 'qr' => '[imosx]',
+ 'm' => '[cgimosxp]',
+ 'qr' => '[imosxp]',
'q' => "",
'qq' => "",
'qw' => "",
$block_type = $last_nonblank_block_type;
$container_type = $last_nonblank_container_type;
$type_sequence = $last_nonblank_type_sequence;
+ $indent_flag = 0;
$peeked_ahead = 0;
# tokenization is done in two stages..
$routput_block_type->[$i] = "";
$routput_container_type->[$i] = "";
$routput_type_sequence->[$i] = "";
+ $routput_indent_flag->[$i] = 0;
}
$i = -1;
$i_tok = -1;
$routput_block_type->[$i_tok] = $block_type;
$routput_container_type->[$i_tok] = $container_type;
$routput_type_sequence->[$i_tok] = $type_sequence;
+ $routput_indent_flag->[$i_tok] = $indent_flag;
}
my $pre_tok = $$rtokens[$i]; # get the next pre-token
my $pre_type = $$rtoken_type[$i]; # and type
$block_type = ""; # blank for all tokens except code block braces
$container_type = ""; # blank for all tokens except some parens
$type_sequence = ""; # blank for all tokens except ?/:
+ $indent_flag = 0;
$prototype = ""; # blank for all tokens except user defined subs
$i_tok = $i;
# treat bare word followed by open paren like qw(
if ( $next_nonblank_token eq '(' ) {
- $in_quote = $quote_items{q};
- $allowed_quote_modifiers = $quote_modifiers{q};
+ $in_quote = $quote_items{'q'};
+ $allowed_quote_modifiers = $quote_modifiers{'q'};
$type = 'q';
$quote_type = 'q';
next;
}
}
- # quote a bare word within braces..like xxx->{s}; note that we
- # must be sure this is not a structural brace, to avoid
- # mistaking {s} in the following for a quoted bare word:
- # for(@[){s}bla}BLA}
- if ( ( $last_nonblank_type eq 'L' )
- && ( $next_nonblank_token eq '}' ) )
+ # quote a bare word within braces..like xxx->{s}; note that we
+ # must be sure this is not a structural brace, to avoid
+ # mistaking {s} in the following for a quoted bare word:
+ # for(@[){s}bla}BLA}
+ # Also treat q in something like var{-q} as a bare word, not qoute operator
+ ##if ( ( $last_nonblank_type eq 'L' )
+ ## && ( $next_nonblank_token eq '}' ) )
+ if (
+ $next_nonblank_token eq '}'
+ && (
+ $last_nonblank_type eq 'L'
+ || ( $last_nonblank_type eq 'm'
+ && $last_last_nonblank_type eq 'L' )
+ )
+ )
{
$type = 'w';
next;
&& label_ok()
)
{
- if ( $tok !~ /A-Z/ ) {
+ if ( $tok !~ /[A-Z]/ ) {
push @{ $tokenizer_self->{_rlower_case_labels_at} },
$input_line_number;
}
# note: ';' '{' and '}' in list above
# because continues can follow bare blocks;
# ':' is labeled block
- warning("'$tok' should follow a block\n");
+ #
+ ############################################
+ # NOTE: This check has been deactivated because
+ # continue has an alternative usage for given/when
+ # blocks in perl 5.10
+ ## warning("'$tok' should follow a block\n");
+ ############################################
}
}
elsif ( $tok eq 'when' || $tok eq 'case' ) {
$statement_type = $tok; # next '{' is block
}
+
+ # indent trailing if/unless/while/until
+ # outdenting will be handled by later indentation loop
+ if ( $tok =~ /^(if|unless|while|until)$/
+ && $next_nonblank_token ne '(' )
+ {
+ $indent_flag = 1;
+ }
}
# check for inline label following
# not treated as keywords:
if (
(
- $tok eq 'case'
+ $tok eq 'case'
&& $brace_type[$brace_depth] eq 'switch'
)
|| ( $tok eq 'when'
$routput_block_type->[$i_tok] = $block_type;
$routput_container_type->[$i_tok] = $container_type;
$routput_type_sequence->[$i_tok] = $type_sequence;
+ $routput_indent_flag->[$i_tok] = $indent_flag;
}
unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
my $container_environment = '';
my $im = -1; # previous $i value
my $num;
- my $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
+ my $ci_string_sum = ones_count($ci_string_in_tokenizer);
-# =head1 Computing Token Indentation
+# Computing Token Indentation
#
# The final section of the tokenizer forms tokens and also computes
# parameters needed to find indentation. It is much easier to do it
{ # scan the list of pre-tokens indexes
# self-checking for valid token types
- my $type = $routput_token_type->[$i];
+ my $type = $routput_token_type->[$i];
+ my $forced_indentation_flag = $routput_indent_flag->[$i];
+
+ # See if we should undo the $forced_indentation_flag.
+ # Forced indentation after 'if', 'unless', 'while' and 'until'
+ # expressions without trailing parens is optional and doesn't
+ # always look good. It is usually okay for a trailing logical
+ # expression, but if the expression is a function call, code block,
+ # or some kind of list it puts in an unwanted extra indentation
+ # level which is hard to remove.
+ #
+ # Example where extra indentation looks ok:
+ # return 1
+ # if $det_a < 0 and $det_b > 0
+ # or $det_a > 0 and $det_b < 0;
+ #
+ # Example where extra indentation is not needed because
+ # the eval brace also provides indentation:
+ # print "not " if defined eval {
+ # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
+ # };
+ #
+ # The following rule works fairly well:
+ # Undo the flag if the end of this line, or start of the next
+ # line, is an opening container token or a comma.
+ # This almost always works, but if not after another pass it will
+ # be stable.
+ if ( $forced_indentation_flag && $type eq 'k' ) {
+ my $ixlast = -1;
+ my $ilast = $routput_token_list->[$ixlast];
+ my $toklast = $routput_token_type->[$ilast];
+ if ( $toklast eq '#' ) {
+ $ixlast--;
+ $ilast = $routput_token_list->[$ixlast];
+ $toklast = $routput_token_type->[$ilast];
+ }
+ if ( $toklast eq 'b' ) {
+ $ixlast--;
+ $ilast = $routput_token_list->[$ixlast];
+ $toklast = $routput_token_type->[$ilast];
+ }
+ if ( $toklast =~ /^[\{,]$/ ) {
+ $forced_indentation_flag = 0;
+ }
+ else {
+ ( $toklast, my $i_next ) =
+ find_next_nonblank_token( $max_token_index, $rtokens,
+ $max_token_index );
+ if ( $toklast =~ /^[\{,]$/ ) {
+ $forced_indentation_flag = 0;
+ }
+ }
+ }
+
+ # if we are already in an indented if, see if we should outdent
+ if ($indented_if_level) {
+
+ # don't try to nest trailing if's - shouldn't happen
+ if ( $type eq 'k' ) {
+ $forced_indentation_flag = 0;
+ }
+
+ # check for the normal case - outdenting at next ';'
+ elsif ( $type eq ';' ) {
+ if ( $level_in_tokenizer == $indented_if_level ) {
+ $forced_indentation_flag = -1;
+ $indented_if_level = 0;
+ }
+ }
+
+ # handle case of missing semicolon
+ elsif ( $type eq '}' ) {
+ if ( $level_in_tokenizer == $indented_if_level ) {
+ $indented_if_level = 0;
+
+ # TBD: This could be a subroutine call
+ $level_in_tokenizer--;
+ if ( @{$rslevel_stack} > 1 ) {
+ pop( @{$rslevel_stack} );
+ }
+ if ( length($nesting_block_string) > 1 )
+ { # true for valid script
+ chop $nesting_block_string;
+ chop $nesting_list_string;
+ }
+
+ }
+ }
+ }
+
my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
$level_i = $level_in_tokenizer;
# Note: these are set so that the leading braces have a HIGHER
# level than their CONTENTS, which is convenient for indentation
# Also, define continuation indentation for each token.
- if ( $type eq '{' || $type eq 'L' ) {
+ if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
+ {
# use environment before updating
$container_environment =
$slevel_in_tokenizer - $rslevel_stack->[-1];
}
- # =head1 Continuation Indentation
+ # Continuation Indentation
#
# Having tried setting continuation indentation both in the formatter and
# in the tokenizer, I can say that setting it in the tokenizer is much,
push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
$level_in_tokenizer++;
+ if ($forced_indentation_flag) {
+
+ # break BEFORE '?' when there is forced indentation
+ if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
+ if ( $type eq 'k' ) {
+ $indented_if_level = $level_in_tokenizer;
+ }
+ }
+
if ( $routput_block_type->[$i] ) {
$nesting_block_flag = 1;
$nesting_block_string .= '1';
else {
$bit = 1
unless
- $is_logical_container{ $routput_container_type->[$i]
- };
+ $is_logical_container{ $routput_container_type->[$i]
+ };
}
}
$nesting_list_string .= $bit;
$ci_string_in_tokenizer .=
( $intervening_secondary_structure != 0 ) ? '1' : '0';
- $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
+ $ci_string_sum = ones_count($ci_string_in_tokenizer);
$continuation_string_in_tokenizer .=
( $in_statement_continuation > 0 ) ? '1' : '0';
if (
!$routput_block_type->[$i] # patch: skip for BLOCK
&& ($in_statement_continuation)
+ && !( $forced_indentation_flag && $type eq ':' )
)
{
$total_ci += $in_statement_continuation
$in_statement_continuation = 0;
}
- elsif ( $type eq '}' || $type eq 'R' ) {
+ elsif ($type eq '}'
+ || $type eq 'R'
+ || $forced_indentation_flag < 0 )
+ {
# only a nesting error in the script would prevent popping here
if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
$nesting_list_flag = ( $nesting_list_string =~ /1$/ );
chop $ci_string_in_tokenizer;
- $ci_string_sum =
- ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
+ $ci_string_sum = ones_count($ci_string_in_tokenizer);
$in_statement_continuation =
chop $continuation_string_in_tokenizer;
# ...and include all block types except user subs with
# block prototypes and these: (sort|grep|map|do|eval)
-# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
+# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
elsif (
$is_zero_continuation_block_type{
$routput_block_type->[$i] } )
$in_statement_continuation = 1
if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
}
+
+ elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
}
# use environment after updating
# patch for dor.t (defined or).
if ( $tok eq '/'
- && $next_type eq '/'
+ && $next_type eq '/'
&& $last_nonblank_token eq ']' )
{
$op_expected = OPERATOR;
# otherwise, look at previous token. This must be a code block if
# it follows any of these:
-# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
+# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
elsif ( $is_code_block_token{$last_nonblank_token} ) {
return $last_nonblank_token;
}
# way.
sub increase_nesting_depth {
- my ( $a, $pos ) = @_;
+ my ( $aa, $pos ) = @_;
# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
# @current_sequence_number, @depth_array, @starting_line_of_current_depth
- my $b;
- $current_depth[$a]++;
+ my $bb;
+ $current_depth[$aa]++;
+ $total_depth++;
+ $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
my $input_line_number = $tokenizer_self->{_last_line_number};
my $input_line = $tokenizer_self->{_line_text};
# Sequence numbers increment by number of items. This keeps
# a unique set of numbers but still allows the relative location
# of any type to be determined.
- $nesting_sequence_number[$a] += scalar(@closing_brace_names);
- my $seqno = $nesting_sequence_number[$a];
- $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
+ $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
+ my $seqno = $nesting_sequence_number[$aa];
+ $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
- $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
+ $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
[ $input_line_number, $input_line, $pos ];
- for $b ( 0 .. $#closing_brace_names ) {
- next if ( $b == $a );
- $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
+ for $bb ( 0 .. $#closing_brace_names ) {
+ next if ( $bb == $aa );
+ $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
+ }
+
+ # set a flag for indenting a nested ternary statement
+ my $indent = 0;
+ if ( $aa == QUESTION_COLON ) {
+ $nested_ternary_flag[ $current_depth[$aa] ] = 0;
+ if ( $current_depth[$aa] > 1 ) {
+ if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
+ my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
+ if ( $pdepth == $total_depth - 1 ) {
+ $indent = 1;
+ $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
+ }
+ }
+ }
}
- return $seqno;
+ return ( $seqno, $indent );
}
sub decrease_nesting_depth {
- my ( $a, $pos ) = @_;
+ my ( $aa, $pos ) = @_;
# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
# @current_sequence_number, @depth_array, @starting_line_of_current_depth
- my $b;
+ my $bb;
my $seqno = 0;
my $input_line_number = $tokenizer_self->{_last_line_number};
my $input_line = $tokenizer_self->{_line_text};
- if ( $current_depth[$a] > 0 ) {
+ my $outdent = 0;
+ $total_depth--;
+ if ( $current_depth[$aa] > 0 ) {
- $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
+ # set a flag for un-indenting after seeing a nested ternary statement
+ $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
+ if ( $aa == QUESTION_COLON ) {
+ $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
+ }
- # check that any brace types $b contained within are balanced
- for $b ( 0 .. $#closing_brace_names ) {
- next if ( $b == $a );
+ # check that any brace types $bb contained within are balanced
+ for $bb ( 0 .. $#closing_brace_names ) {
+ next if ( $bb == $aa );
- unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
- $current_depth[$b] )
+ unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
+ $current_depth[$bb] )
{
- my $diff = $current_depth[$b] -
- $depth_array[$a][$b][ $current_depth[$a] ];
+ my $diff =
+ $current_depth[$bb] -
+ $depth_array[$aa][$bb][ $current_depth[$aa] ];
# don't whine too many times
my $saw_brace_error = get_saw_brace_error();
{
interrupt_logfile();
my $rsl =
- $starting_line_of_current_depth[$a][ $current_depth[$a] ];
+ $starting_line_of_current_depth[$aa]
+ [ $current_depth[$aa] ];
my $sl = $$rsl[0];
my $rel = [ $input_line_number, $input_line, $pos ];
my $el = $$rel[0];
}
my $bname =
( $diff > 0 )
- ? $opening_brace_names[$b]
- : $closing_brace_names[$b];
+ ? $opening_brace_names[$bb]
+ : $closing_brace_names[$bb];
write_error_indicator_pair( @$rsl, '^' );
my $msg = <<"EOM";
-Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
+Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
EOM
if ( $diff > 0 ) {
my $rml =
- $starting_line_of_current_depth[$b]
- [ $current_depth[$b] ];
+ $starting_line_of_current_depth[$bb]
+ [ $current_depth[$bb] ];
my $ml = $$rml[0];
$msg .=
" The most recent un-matched $bname is on line $ml\n";
increment_brace_error();
}
}
- $current_depth[$a]--;
+ $current_depth[$aa]--;
}
else {
my $saw_brace_error = get_saw_brace_error();
if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
my $msg = <<"EOM";
-There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
+There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
EOM
indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
}
increment_brace_error();
}
- return $seqno;
+ return ( $seqno, $outdent );
}
sub check_final_nesting_depths {
- my ($a);
+ my ($aa);
# USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
- for $a ( 0 .. $#closing_brace_names ) {
+ for $aa ( 0 .. $#closing_brace_names ) {
- if ( $current_depth[$a] ) {
- my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
+ if ( $current_depth[$aa] ) {
+ my $rsl =
+ $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
my $sl = $$rsl[0];
my $msg = <<"EOM";
-Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
-The most recent un-matched $opening_brace_names[$a] is on line $sl
+Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
+The most recent un-matched $opening_brace_names[$aa] is on line $sl
EOM
indicate_error( $msg, @$rsl, '^' );
increment_brace_error();
# doesn't get in the way of good scripts.
# Complain if a filehandle has any lower case
- # letters. This is suggested good practice, but the
- # main reason for this warning is that prior to
- # release 20010328, perltidy incorrectly parsed a
- # function call after a print/printf, with the
- # result that a space got added before the opening
- # paren, thereby converting the function name to a
- # filehandle according to perl's weird rules. This
- # will not usually generate a syntax error, so this
- # is a potentially serious bug. By warning
- # of filehandles with any lower case letters,
- # followed by opening parens, we will help the user
- # find almost all of these older errors.
- # use 'sub_name' because something like
+ # letters. This is suggested good practice.
+ # Use 'sub_name' because something like
# main::MYHANDLE is ok for filehandle
if ( $sub_name =~ /[a-z]/ ) {
# USES GLOBAL VARIABLES: $context, $last_nonblank_token,
# $last_nonblank_type
- my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index ) = @_;
+ my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
+ $expecting )
+ = @_;
my $i_begin = $i;
my $type = '';
my $tok_begin = $$rtokens[$i_begin];
# punctuation variable?
# testfile: cunningham4.pl
- if ( $identifier eq '&' ) {
+ #
+ # We have to be careful here. If we are in an unknown state,
+ # we will reject the punctuation variable. In the following
+ # example the '&' is a binary opeator but we are in an unknown
+ # state because there is no sigil on 'Prima', so we don't
+ # know what it is. But it is a bad guess that
+ # '&~' is a punction variable.
+ # $self->{text}->{colorMap}->[
+ # Prima::PodView::COLOR_CODE_FOREGROUND
+ # & ~tb::COLOR_INDEX ] =
+ # $sec->{ColorCode}
+ if ( $identifier eq '&' && $expecting ) {
$identifier .= $tok;
}
else {
# I don't think an error flag can occur here ..but ?
my $error;
- ( $i, $error ) =
- inverse_pretoken_map( $i, $pos, $rtoken_map,
+ ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
$max_token_index );
if ($error) { warning("Possibly invalid sub\n") }
$subname_saved = "";
if ( $next_nonblank_token eq '{' ) {
if ($subname) {
- if ( $saw_function_definition{$package}{$subname} ) {
+
+ # Check for multiple definitions of a sub, but
+ # it is ok to have multiple sub BEGIN, etc,
+ # so we do not complain if name is all caps
+ if ( $saw_function_definition{$package}{$subname}
+ && $subname !~ /^[A-Z]+$/ )
+ {
my $lno = $saw_function_definition{$package}{$subname};
warning(
"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
# -1 - no
my ( $i, $rtokens, $max_token_index ) = @_;
my $next_token = $$rtokens[ $i + 1 ];
- if ( $next_token =~ /^[cgimosx]/ ) { $i++; } # skip possible modifier
+ if ( $next_token =~ /^[cgimosxp]/ ) { $i++; } # skip possible modifier
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
@opening_brace_names = qw# '{' '[' '(' '?' #;
@closing_brace_names = qw# '}' ']' ')' ':' #;
- ## TESTING: added ~~
my @digraphs = qw(
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
<= >= == =~ !~ != ++ -- /= x= ~~
);
@is_digraph{@digraphs} = (1) x scalar(@digraphs);
- my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> );
+ my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
# make a hash of all valid token types for self-checking the tokenizer
# These tokens may precede a code block
# patched for SWITCH/CASE
- @_ = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
+ @_ =
+ qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless do while until eval for foreach map grep sort
switch case given when);
@is_code_block_token{@_} = (1) x scalar(@_);
LE
LT
NE
+ UNITCHECK
abs
accept
alarm
bind
binmode
bless
+ break
caller
chdir
chmod
my @value_requestor_type = qw#
L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
- <= >= == != => \ > < % * / ? & | ** <=> ~~
+ <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
f F pp mm Y p m U J G j >> << ^ t
#;
push( @value_requestor_type, ',' )
where F<filename> is a short script of interest. This will produce
F<filename.DEBUG> with interleaved lines of text and their token types.
-The -D flag has been in perltidy from the beginning for this purpose.
+The B<-D> flag has been in perltidy from the beginning for this purpose.
If you want to see the code which creates this file, it is
C<write_debug_entry> in Tidy.pm.
=head1 VERSION
-This man page documents Perl::Tidy version 20060719.
+This man page documents Perl::Tidy version 20071205.
=head1 AUTHOR