X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=lib%2FPerl%2FTidy.pm;h=edcec6d2f1d33eb20d3a867c4b842c9519c0dd8c;hb=d08e4809a710a08f2cc0cb5a6f3964582098e84c;hp=05a60632de8ad82de6fe933805d00947a2a1c592;hpb=b2d8cef8551aa63c2718732e53392e7ebdd6c75f;p=perltidy.git diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm index 05a6063..edcec6d 100644 --- a/lib/Perl/Tidy.pm +++ b/lib/Perl/Tidy.pm @@ -3,7 +3,7 @@ # # perltidy - a perl script indenter and formatter # -# Copyright (c) 2000-2012 by Steve Hancock +# Copyright (c) 2000-2017 by Steve Hancock # Distributed under the GPL license agreement; see file COPYING # # This program is free software; you can redistribute it and/or modify @@ -16,11 +16,11 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# For brief instructions instructions, try 'perltidy -h'. +# For brief instructions, try 'perltidy -h'. # For more complete documentation, try 'man perltidy' # or visit http://perltidy.sourceforge.net # @@ -53,9 +53,11 @@ ############################################################ package Perl::Tidy; -use 5.004; # need IO::File from 5.004 or later -BEGIN { $^W = 1; } # turn on warnings +# Actually should use a version later than about 5.8.5 to use +# wide characters. +use 5.004; # need IO::File from 5.004 or later +use warnings; use strict; use Exporter; use Carp; @@ -66,18 +68,22 @@ use vars qw{ @ISA @EXPORT $missing_file_spec + $fh_stderr + $rOpts_character_encoding }; @ISA = qw( Exporter ); @EXPORT = qw( &perltidy ); use Cwd; +use Encode (); use IO::File; use File::Basename; use File::Copy; +use File::Temp qw(tempfile); BEGIN { - ( $VERSION = q($Id: Tidy.pm,v 1.74 2012/07/01 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker + ( $VERSION = q($Id: Tidy.pm,v 1.74 2017/05/21 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker } sub streamhandle { @@ -119,7 +125,10 @@ sub streamhandle { # skipped and we can just let it crash if there is no # getline. if ( $mode =~ /[rR]/ ) { - if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) { + + # RT#97159; part 1 of 2: updated to use 'can' + ##if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) { + if ( $ref->can('getline') ) { $New = sub { $filename }; } else { @@ -136,7 +145,10 @@ EOM # Accept an object with a print method for writing. # See note above about IO::File if ( $mode =~ /[wW]/ ) { - if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) { + + # RT#97159; part 2 of 2: updated to use 'can' + ##if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) { + if ( $ref->can('print') ) { $New = sub { $filename }; } else { @@ -162,7 +174,8 @@ EOM } } $fh = $New->( $filename, $mode ) - or warn "Couldn't open file:$filename in mode:$mode : $!\n"; + or Warn("Couldn't open file:$filename in mode:$mode : $!\n"); + return $fh, ( $ref or $filename ); } @@ -234,38 +247,6 @@ sub catfile { return undef; } -sub make_temporary_filename { - - # Make a temporary filename. - # FIXME: return both a name and opened filehandle - # - # The POSIX tmpnam() function tends to be unreliable for non-unix systems - # (at least for the win32 systems that I've tested), so use a pre-defined - # name for them. A disadvantage of this is that two perltidy - # runs in the same working directory may conflict. However, the chance of - # that is small and managable by the user, especially on systems for which - # the POSIX tmpnam function doesn't work. - my $name = "perltidy.TMP"; - if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) { - return $name; - } - eval "use POSIX qw(tmpnam)"; - if ($@) { return $name } - use IO::File; - - # just make a couple of tries before giving up and using the default - for ( 0 .. 3 ) { - my $tmpname = tmpnam(); - my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL ); - if ($fh) { - $fh->close(); - return ($tmpname); - last; - } - } - return ($name); -} - # Here is a map of the flow of data from the input source to the output # line sink: # @@ -301,119 +282,117 @@ sub make_temporary_filename { # messages. It writes a .LOG file, which may be saved with a # '-log' or a '-g' flag. -{ +sub perltidy { - # variables needed by interrupt handler: - my $tokenizer; - my $input_file; - - # this routine may be called to give a status report if interrupted. If a - # parameter is given, it will call exit with that parameter. This is no - # longer used because it works under Unix but not under Windows. - sub interrupt_handler { - - my $exit_flag = shift; - print STDERR "perltidy interrupted"; - if ($tokenizer) { - my $input_line_number = - Perl::Tidy::Tokenizer::get_input_line_number(); - print STDERR " at line $input_line_number"; - } - if ($input_file) { - - if ( ref $input_file ) { print STDERR " of reference to:" } - else { print STDERR " of file:" } - print STDERR " $input_file"; - } - print STDERR "\n"; - exit $exit_flag if defined($exit_flag); - } - - sub perltidy { - - my %defaults = ( - argv => undef, - destination => undef, - formatter => undef, - logfile => undef, - errorfile => undef, - perltidyrc => undef, - source => undef, - stderr => undef, - dump_options => undef, - dump_options_type => undef, - dump_getopt_flags => undef, - dump_options_category => undef, - dump_options_range => undef, - dump_abbreviations => undef, - prefilter => undef, - postfilter => undef, - ); + my %defaults = ( + argv => undef, + destination => undef, + formatter => undef, + logfile => undef, + errorfile => undef, + perltidyrc => undef, + source => undef, + stderr => undef, + dump_options => undef, + dump_options_type => undef, + dump_getopt_flags => undef, + dump_options_category => undef, + dump_options_range => undef, + dump_abbreviations => undef, + prefilter => undef, + postfilter => undef, + ); - # don't overwrite callers ARGV - local @ARGV = @ARGV; + # don't overwrite callers ARGV + local @ARGV = @ARGV; + local *STDERR = *STDERR; - my %input_hash = @_; + my %input_hash = @_; - if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) { - local $" = ')('; - my @good_keys = sort keys %defaults; - @bad_keys = sort @bad_keys; - confess <('dump_options'); - my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags'); - my $dump_options_category = $get_hash_ref->('dump_options_category'); - my $dump_abbreviations = $get_hash_ref->('dump_abbreviations'); - my $dump_options_range = $get_hash_ref->('dump_options_range'); - - # validate dump_options_type - if ( defined($dump_options) ) { - unless ( defined($dump_options_type) ) { - $dump_options_type = 'perltidyrc'; - } - unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) { - croak <print( $_[0] ); } + + sub Exit ($) { + if ( $_[0] ) { goto ERROR_EXIT } + else { goto NORMAL_EXIT } + } + + sub Die ($) { Warn $_[0]; Exit(1); } + + # extract various dump parameters + my $dump_options_type = $input_hash{'dump_options_type'}; + my $dump_options = $get_hash_ref->('dump_options'); + my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags'); + my $dump_options_category = $get_hash_ref->('dump_options_category'); + my $dump_abbreviations = $get_hash_ref->('dump_abbreviations'); + my $dump_options_range = $get_hash_ref->('dump_options_range'); + + # validate dump_options_type + if ( defined($dump_options) ) { + unless ( defined($dump_options_type) ) { + $dump_options_type = 'perltidyrc'; + } + unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) { + croak <new(); - } + # if the user defines a formatter, there is no output stream, + # but we need a null stream to keep coding simple + $destination_stream = Perl::Tidy::DevNull->new(); + } - # see if ARGV is overridden - if ( defined($argv) ) { + # see if ARGV is overridden + if ( defined($argv) ) { - my $rargv = ref $argv; - if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef } + my $rargv = ref $argv; + if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef } - # ref to ARRAY - if ($rargv) { - if ( $rargv eq 'ARRAY' ) { - @ARGV = @$argv; - } - else { - croak <{$opt} = $flag; + # Examples: + # some-option=s + # some-option=i + # some-option:i + # some-option! + if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) { + $opt = $1; + $flag = $2; } + $dump_getopt_flags->{$opt} = $flag; } + } - if ( defined($dump_options_category) ) { - $quit_now = 1; - %{$dump_options_category} = %{$roption_category}; - } - - if ( defined($dump_options_range) ) { - $quit_now = 1; - %{$dump_options_range} = %{$roption_range}; - } + if ( defined($dump_options_category) ) { + $quit_now = 1; + %{$dump_options_category} = %{$roption_category}; + } - if ( defined($dump_abbreviations) ) { - $quit_now = 1; - %{$dump_abbreviations} = %{$rexpansion}; - } + if ( defined($dump_options_range) ) { + $quit_now = 1; + %{$dump_options_range} = %{$roption_range}; + } - if ( defined($dump_options) ) { - $quit_now = 1; - %{$dump_options} = %{$rOpts}; - } + if ( defined($dump_abbreviations) ) { + $quit_now = 1; + %{$dump_abbreviations} = %{$rexpansion}; + } - return if ($quit_now); + if ( defined($dump_options) ) { + $quit_now = 1; + %{$dump_options} = %{$rOpts}; + } - # make printable string of options for this run as possible diagnostic - my $readable_options = readable_options( $rOpts, $roption_string ); + Exit 0 if ($quit_now); - # dump from command line - if ( $rOpts->{'dump-options'} ) { - print STDOUT $readable_options; - exit 0; - } + # make printable string of options for this run as possible diagnostic + my $readable_options = readable_options( $rOpts, $roption_string ); - #--------------------------------------------------------------- - # check parameters and their interactions - #--------------------------------------------------------------- - check_options( $rOpts, $is_Windows, $Windows_type, - $rpending_complaint ); + # dump from command line + if ( $rOpts->{'dump-options'} ) { + print STDOUT $readable_options; + Exit 0; + } - if ($user_formatter) { - $rOpts->{'format'} = 'user'; - } + #--------------------------------------------------------------- + # check parameters and their interactions + #--------------------------------------------------------------- + my $tabsize = + check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ); - # there must be one entry here for every possible format - my %default_file_extension = ( - tidy => 'tdy', - html => 'html', - user => '', - ); + if ($user_formatter) { + $rOpts->{'format'} = 'user'; + } - # be sure we have a valid output format - unless ( exists $default_file_extension{ $rOpts->{'format'} } ) { - my $formats = join ' ', - sort map { "'" . $_ . "'" } keys %default_file_extension; - my $fmt = $rOpts->{'format'}; - die "-format='$fmt' but must be one of: $formats\n"; - } + # there must be one entry here for every possible format + my %default_file_extension = ( + tidy => 'tdy', + html => 'html', + user => '', + ); - my $output_extension = - make_extension( $rOpts->{'output-file-extension'}, - $default_file_extension{ $rOpts->{'format'} }, $dot ); + $rOpts_character_encoding = $rOpts->{'character-encoding'}; - # If the backup extension contains a / character then the backup should - # be deleted when the -b option is used. On older versions of - # perltidy this will generate an error message due to an illegal - # file name. - # - # A backup file will still be generated but will be deleted - # at the end. If -bext='/' then this extension will be - # the default 'bak'. Otherwise it will be whatever characters - # remains after all '/' characters are removed. For example: - # -bext extension slashes - # '/' bak 1 - # '/delete' delete 1 - # 'delete/' delete 1 - # '/dev/null' devnull 2 (Currently not allowed) - my $bext = $rOpts->{'backup-file-extension'}; - my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g ); - - # At present only one forward slash is allowed. In the future multiple - # slashes may be allowed to allow for other options - if ( $delete_backup > 1 ) { - die "-bext=$bext contains more than one '/'\n"; - } - - my $backup_extension = - make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); - - my $html_toc_extension = - make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); - - my $html_src_extension = - make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); - - # check for -b option; - # silently ignore unless beautify mode - my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} - && $rOpts->{'format'} eq 'tidy'; - - # turn off -b with warnings in case of conflicts with other options - if ($in_place_modify) { - if ( $rOpts->{'standard-output'} ) { - warn "Ignoring -b; you may not use -b and -st together\n"; - $in_place_modify = 0; - } - if ($destination_stream) { - warn -"Ignoring -b; you may not specify a destination stream and -b together\n"; - $in_place_modify = 0; - } - if ( ref($source_stream) ) { - warn -"Ignoring -b; you may not specify a source array and -b together\n"; - $in_place_modify = 0; - } - if ( $rOpts->{'outfile'} ) { - warn "Ignoring -b; you may not use -b and -o together\n"; - $in_place_modify = 0; - } - if ( defined( $rOpts->{'output-path'} ) ) { - warn "Ignoring -b; you may not use -b and -opath together\n"; - $in_place_modify = 0; - } - } + # be sure we have a valid output format + unless ( exists $default_file_extension{ $rOpts->{'format'} } ) { + my $formats = join ' ', + sort map { "'" . $_ . "'" } keys %default_file_extension; + my $fmt = $rOpts->{'format'}; + Die "-format='$fmt' but must be one of: $formats\n"; + } - Perl::Tidy::Formatter::check_options($rOpts); - if ( $rOpts->{'format'} eq 'html' ) { - Perl::Tidy::HtmlWriter->check_options($rOpts); - } + my $output_extension = make_extension( $rOpts->{'output-file-extension'}, + $default_file_extension{ $rOpts->{'format'} }, $dot ); - # make the pattern of file extensions that we shouldn't touch - my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)"; - if ($output_extension) { - my $ext = quotemeta($output_extension); - $forbidden_file_extensions .= "|$ext"; - } - if ( $in_place_modify && $backup_extension ) { - my $ext = quotemeta($backup_extension); - $forbidden_file_extensions .= "|$ext"; - } - $forbidden_file_extensions .= ')$'; - - # Create a diagnostics object if requested; - # This is only useful for code development - my $diagnostics_object = undef; - if ( $rOpts->{'DIAGNOSTICS'} ) { - $diagnostics_object = Perl::Tidy::Diagnostics->new(); + # If the backup extension contains a / character then the backup should + # be deleted when the -b option is used. On older versions of + # perltidy this will generate an error message due to an illegal + # file name. + # + # A backup file will still be generated but will be deleted + # at the end. If -bext='/' then this extension will be + # the default 'bak'. Otherwise it will be whatever characters + # remains after all '/' characters are removed. For example: + # -bext extension slashes + # '/' bak 1 + # '/delete' delete 1 + # 'delete/' delete 1 + # '/dev/null' devnull 2 (Currently not allowed) + my $bext = $rOpts->{'backup-file-extension'}; + my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g ); + + # At present only one forward slash is allowed. In the future multiple + # slashes may be allowed to allow for other options + if ( $delete_backup > 1 ) { + Die "-bext=$bext contains more than one '/'\n"; + } + + my $backup_extension = + make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); + + my $html_toc_extension = + make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); + + my $html_src_extension = + make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); + + # check for -b option; + # silently ignore unless beautify mode + my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} + && $rOpts->{'format'} eq 'tidy'; + + # Turn off -b with warnings in case of conflicts with other options. + # NOTE: Do this silently, without warnings, if there is a source or + # destination stream, or standard output is used. This is because the -b + # flag may have been in a .perltidyrc file and warnings break + # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014. + if ($in_place_modify) { + if ( $rOpts->{'standard-output'} ) { +## my $msg = "Ignoring -b; you may not use -b and -st together"; +## $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); +## Warn "$msg\n"; + $in_place_modify = 0; + } + if ($destination_stream) { + ##Warn "Ignoring -b; you may not specify a destination stream and -b together\n"; + $in_place_modify = 0; + } + if ( ref($source_stream) ) { + ##Warn "Ignoring -b; you may not specify a source array and -b together\n"; + $in_place_modify = 0; + } + if ( $rOpts->{'outfile'} ) { + ##Warn "Ignoring -b; you may not use -b and -o together\n"; + $in_place_modify = 0; + } + if ( defined( $rOpts->{'output-path'} ) ) { + ##Warn "Ignoring -b; you may not use -b and -opath together\n"; + $in_place_modify = 0; + } + } + + Perl::Tidy::Formatter::check_options($rOpts); + if ( $rOpts->{'format'} eq 'html' ) { + Perl::Tidy::HtmlWriter->check_options($rOpts); + } + + # make the pattern of file extensions that we shouldn't touch + my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)"; + if ($output_extension) { + my $ext = quotemeta($output_extension); + $forbidden_file_extensions .= "|$ext"; + } + if ( $in_place_modify && $backup_extension ) { + my $ext = quotemeta($backup_extension); + $forbidden_file_extensions .= "|$ext"; + } + $forbidden_file_extensions .= ')$'; + + # Create a diagnostics object if requested; + # This is only useful for code development + my $diagnostics_object = undef; + if ( $rOpts->{'DIAGNOSTICS'} ) { + $diagnostics_object = Perl::Tidy::Diagnostics->new(); + } + + # no filenames should be given if input is from an array + if ($source_stream) { + if ( @ARGV > 0 ) { + Die +"You may not specify any filenames when a source array is given\n"; } - # no filenames should be given if input is from an array - if ($source_stream) { - if ( @ARGV > 0 ) { - die -"You may not specify any filenames when a source array is given\n"; - } + # we'll stuff the source array into ARGV + unshift( @ARGV, $source_stream ); - # we'll stuff the source array into ARGV - unshift( @ARGV, $source_stream ); + # No special treatment for source stream which is a filename. + # This will enable checks for binary files and other bad stuff. + $source_stream = undef unless ref($source_stream); + } - # No special treatment for source stream which is a filename. - # This will enable checks for binary files and other bad stuff. - $source_stream = undef unless ref($source_stream); - } + # use stdin by default if no source array and no args + else { + unshift( @ARGV, '-' ) unless @ARGV; + } - # use stdin by default if no source array and no args - else { - unshift( @ARGV, '-' ) unless @ARGV; - } + #--------------------------------------------------------------- + # Ready to go... + # main loop to process all files in argument list + #--------------------------------------------------------------- + my $number_of_files = @ARGV; + my $formatter = undef; + my $tokenizer = undef; + while ( my $input_file = shift @ARGV ) { + my $fileroot; + my $input_file_permissions; #--------------------------------------------------------------- - # Ready to go... - # main loop to process all files in argument list + # prepare this input stream #--------------------------------------------------------------- - my $number_of_files = @ARGV; - my $formatter = undef; - $tokenizer = undef; - while ( $input_file = shift @ARGV ) { - my $fileroot; - my $input_file_permissions; + if ($source_stream) { + $fileroot = "perltidy"; - #--------------------------------------------------------------- - # prepare this input stream - #--------------------------------------------------------------- - if ($source_stream) { - $fileroot = "perltidy"; + # If the source is from an array or string, then .LOG output + # is only possible if a logfile stream is specified. This prevents + # unexpected perltidy.LOG files. + if ( !defined($logfile_stream) ) { + $logfile_stream = Perl::Tidy::DevNull->new(); } - elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN - $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc - $in_place_modify = 0; - } - else { - $fileroot = $input_file; - unless ( -e $input_file ) { - - # file doesn't exist - check for a file glob - if ( $input_file =~ /([\?\*\[\{])/ ) { - - # Windows shell may not remove quotes, so do it - my $input_file = $input_file; - if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } - if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } - my $pattern = fileglob_to_re($input_file); - ##eval "/$pattern/"; - if ( !$@ && opendir( DIR, './' ) ) { - my @files = - grep { /$pattern/ && !-d $_ } readdir(DIR); - closedir(DIR); - if (@files) { - unshift @ARGV, @files; - next; - } + } + elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN + $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc + $in_place_modify = 0; + } + else { + $fileroot = $input_file; + unless ( -e $input_file ) { + + # file doesn't exist - check for a file glob + if ( $input_file =~ /([\?\*\[\{])/ ) { + + # Windows shell may not remove quotes, so do it + my $input_file = $input_file; + if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } + if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } + my $pattern = fileglob_to_re($input_file); + ##eval "/$pattern/"; + if ( !$@ && opendir( DIR, './' ) ) { + my @files = + grep { /$pattern/ && !-d $_ } readdir(DIR); + closedir(DIR); + if (@files) { + unshift @ARGV, @files; + next; } } - print "skipping file: '$input_file': no matches found\n"; - next; } + Warn "skipping file: '$input_file': no matches found\n"; + next; + } - unless ( -f $input_file ) { - print "skipping file: $input_file: not a regular file\n"; - next; - } + unless ( -f $input_file ) { + Warn "skipping file: $input_file: not a regular file\n"; + next; + } - # As a safety precaution, skip zero length files. - # If for example a source file got clobberred somehow, - # the old .tdy or .bak files might still exist so we - # shouldn't overwrite them with zero length files. - unless ( -s $input_file ) { - print "skipping file: $input_file: Zero size\n"; - next; - } + # As a safety precaution, skip zero length files. + # If for example a source file got clobbered somehow, + # the old .tdy or .bak files might still exist so we + # shouldn't overwrite them with zero length files. + unless ( -s $input_file ) { + Warn "skipping file: $input_file: Zero size\n"; + next; + } - unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { - print -"skipping file: $input_file: Non-text (override with -f)\n"; - next; - } + unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { + Warn + "skipping file: $input_file: Non-text (override with -f)\n"; + next; + } - # we should have a valid filename now - $fileroot = $input_file; - $input_file_permissions = ( stat $input_file )[2] & 07777; + # we should have a valid filename now + $fileroot = $input_file; + $input_file_permissions = ( stat $input_file )[2] & 07777; - if ( $^O eq 'VMS' ) { - ( $fileroot, $dot ) = check_vms_filename($fileroot); - } + if ( $^O eq 'VMS' ) { + ( $fileroot, $dot ) = check_vms_filename($fileroot); + } - # add option to change path here - if ( defined( $rOpts->{'output-path'} ) ) { + # add option to change path here + if ( defined( $rOpts->{'output-path'} ) ) { - my ( $base, $old_path ) = fileparse($fileroot); - my $new_path = $rOpts->{'output-path'}; - unless ( -d $new_path ) { - unless ( mkdir $new_path, 0777 ) { - die "unable to create directory $new_path: $!\n"; - } + my ( $base, $old_path ) = fileparse($fileroot); + my $new_path = $rOpts->{'output-path'}; + unless ( -d $new_path ) { + unless ( mkdir $new_path, 0777 ) { + Die "unable to create directory $new_path: $!\n"; } - my $path = $new_path; - $fileroot = catfile( $path, $base ); - unless ($fileroot) { - die <new( $input_file, $rOpts, + $rpending_logfile_message ); + next unless ($source_object); + + # Prefilters and postfilters: The prefilter is a code reference + # that will be applied to the source before tidying, and the + # postfilter is a code reference to the result before outputting. + if ( + $prefilter + || ( $rOpts_character_encoding + && $rOpts_character_encoding eq 'utf8' ) + ) + { + my $buf = ''; + while ( my $line = $source_object->get_line() ) { + $buf .= $line; } - # the 'source_object' supplies a method to read the input file - my $source_object = - Perl::Tidy::LineSource->new( $input_file, $rOpts, - $rpending_logfile_message ); - next unless ($source_object); + $buf = $prefilter->($buf) if $prefilter; - # Prefilters and postfilters: The prefilter is a code reference - # that will be applied to the source before tidying, and the - # postfilter is a code reference to the result before outputting. - if ($prefilter) { - my $buf = ''; - while ( my $line = $source_object->get_line() ) { - $buf .= $line; + if ( $rOpts_character_encoding + && $rOpts_character_encoding eq 'utf8' + && !utf8::is_utf8($buf) ) + { + eval { + $buf = Encode::decode( 'UTF-8', $buf, + Encode::FB_CROAK | Encode::LEAVE_SRC ); + }; + if ($@) { + Warn +"skipping file: $input_file: Unable to decode source as UTF-8\n"; + next; } - $buf = $prefilter->($buf); - - $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts, - $rpending_logfile_message ); } - # register this file name with the Diagnostics package - $diagnostics_object->set_input_file($input_file) - if $diagnostics_object; + $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts, + $rpending_logfile_message ); + } - #--------------------------------------------------------------- - # prepare the output stream - #--------------------------------------------------------------- - my $output_file = undef; - my $actual_output_extension; + # register this file name with the Diagnostics package + $diagnostics_object->set_input_file($input_file) + if $diagnostics_object; - if ( $rOpts->{'outfile'} ) { + #--------------------------------------------------------------- + # prepare the output stream + #--------------------------------------------------------------- + my $output_file = undef; + my $actual_output_extension; - if ( $number_of_files <= 1 ) { + if ( $rOpts->{'outfile'} ) { - if ( $rOpts->{'standard-output'} ) { - die "You may not use -o and -st together\n"; - } - elsif ($destination_stream) { - die -"You may not specify a destination array and -o together\n"; - } - elsif ( defined( $rOpts->{'output-path'} ) ) { - die "You may not specify -o and -opath together\n"; - } - elsif ( defined( $rOpts->{'output-file-extension'} ) ) { - die "You may not specify -o and -oext together\n"; - } - $output_file = $rOpts->{outfile}; + if ( $number_of_files <= 1 ) { - # make sure user gives a file name after -o - if ( $output_file =~ /^-/ ) { - die "You must specify a valid filename after -o\n"; - } - - # do not overwrite input file with -o - if ( defined($input_file_permissions) - && ( $output_file eq $input_file ) ) - { - die - "Use 'perltidy -b $input_file' to modify in-place\n"; - } + if ( $rOpts->{'standard-output'} ) { + my $msg = "You may not use -o and -st together"; + $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); + Die "$msg\n"; } - else { - die "You may not use -o with more than one input file\n"; + elsif ($destination_stream) { + Die +"You may not specify a destination array and -o together\n"; } - } - elsif ( $rOpts->{'standard-output'} ) { - if ($destination_stream) { - die -"You may not specify a destination array and -st together\n"; + elsif ( defined( $rOpts->{'output-path'} ) ) { + Die "You may not specify -o and -opath together\n"; + } + elsif ( defined( $rOpts->{'output-file-extension'} ) ) { + Die "You may not specify -o and -oext together\n"; } - $output_file = '-'; + $output_file = $rOpts->{outfile}; - if ( $number_of_files <= 1 ) { + # make sure user gives a file name after -o + if ( $output_file =~ /^-/ ) { + Die "You must specify a valid filename after -o\n"; } - else { - die "You may not use -st with more than one input file\n"; + + # do not overwrite input file with -o + if ( defined($input_file_permissions) + && ( $output_file eq $input_file ) ) + { + Die "Use 'perltidy -b $input_file' to modify in-place\n"; } } - elsif ($destination_stream) { - $output_file = $destination_stream; + else { + Die "You may not use -o with more than one input file\n"; } - elsif ($source_stream) { # source but no destination goes to stdout - $output_file = '-'; + } + elsif ( $rOpts->{'standard-output'} ) { + if ($destination_stream) { + my $msg = + "You may not specify a destination array and -st together\n"; + $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); + Die "$msg\n"; } - elsif ( $input_file eq '-' ) { - $output_file = '-'; + $output_file = '-'; + + if ( $number_of_files <= 1 ) { } else { - if ($in_place_modify) { - $output_file = IO::File->new_tmpfile() - or die "cannot open temp file for -b option: $!\n"; - } - else { - $actual_output_extension = $output_extension; - $output_file = $fileroot . $output_extension; - } + Die "You may not use -st with more than one input file\n"; + } + } + elsif ($destination_stream) { + $output_file = $destination_stream; + } + elsif ($source_stream) { # source but no destination goes to stdout + $output_file = '-'; + } + elsif ( $input_file eq '-' ) { + $output_file = '-'; + } + else { + if ($in_place_modify) { + $output_file = IO::File->new_tmpfile() + or Die "cannot open temp file for -b option: $!\n"; } + else { + $actual_output_extension = $output_extension; + $output_file = $fileroot . $output_extension; + } + } - # the 'sink_object' knows how to write the output file - my $tee_file = $fileroot . $dot . "TEE"; + # the 'sink_object' knows how to write the output file + my $tee_file = $fileroot . $dot . "TEE"; - my $line_separator = $rOpts->{'output-line-ending'}; - if ( $rOpts->{'preserve-line-endings'} ) { - $line_separator = find_input_line_ending($input_file); - } + my $line_separator = $rOpts->{'output-line-ending'}; + if ( $rOpts->{'preserve-line-endings'} ) { + $line_separator = find_input_line_ending($input_file); + } + + # Eventually all I/O may be done with binmode, but for now it is + # only done when a user requests a particular line separator + # through the -ple or -ole flags + my $binmode = defined($line_separator) + || defined($rOpts_character_encoding); + $line_separator = "\n" unless defined($line_separator); + + my ( $sink_object, $postfilter_buffer ); + if ($postfilter) { + $sink_object = + Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file, + $line_separator, $rOpts, $rpending_logfile_message, $binmode ); + } + else { + $sink_object = + Perl::Tidy::LineSink->new( $output_file, $tee_file, + $line_separator, $rOpts, $rpending_logfile_message, $binmode ); + } + + #--------------------------------------------------------------- + # initialize the error logger for this file + #--------------------------------------------------------------- + my $warning_file = $fileroot . $dot . "ERR"; + if ($errorfile_stream) { $warning_file = $errorfile_stream } + my $log_file = $fileroot . $dot . "LOG"; + if ($logfile_stream) { $log_file = $logfile_stream } + + my $logger_object = + Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file, + $fh_stderr, $saw_extrude ); + write_logfile_header( + $rOpts, $logger_object, $config_file, + $rraw_options, $Windows_type, $readable_options, + ); + if ($$rpending_logfile_message) { + $logger_object->write_logfile_entry($$rpending_logfile_message); + } + if ($$rpending_complaint) { + $logger_object->complain($$rpending_complaint); + } + + #--------------------------------------------------------------- + # initialize the debug object, if any + #--------------------------------------------------------------- + my $debugger_object = undef; + if ( $rOpts->{DEBUG} ) { + $debugger_object = + Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" ); + } - # Eventually all I/O may be done with binmode, but for now it is - # only done when a user requests a particular line separator - # through the -ple or -ole flags - my $binmode = 0; - if ( defined($line_separator) ) { $binmode = 1 } - else { $line_separator = "\n" } + #--------------------------------------------------------------- + # loop over iterations for one source stream + #--------------------------------------------------------------- - my ( $sink_object, $postfilter_buffer ); - if ($postfilter) { + # We will do a convergence test if 3 or more iterations are allowed. + # It would be pointless for fewer because we have to make at least + # two passes before we can see if we are converged, and the test + # would just slow things down. + my $max_iterations = $rOpts->{'iterations'}; + my $convergence_log_message; + my %saw_md5; + my $do_convergence_test = $max_iterations > 2; + if ($do_convergence_test) { + eval "use Digest::MD5 qw(md5_hex)"; + $do_convergence_test = !$@; + + # Trying to avoid problems with ancient versions of perl because + # I don't know in which version number utf8::encode was introduced. + eval { my $string = "perltidy"; utf8::encode($string) }; + $do_convergence_test = $do_convergence_test && !$@; + } + + # save objects to allow redirecting output during iterations + my $sink_object_final = $sink_object; + my $debugger_object_final = $debugger_object; + my $logger_object_final = $logger_object; + + for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) { + + # send output stream to temp buffers until last iteration + my $sink_buffer; + if ( $iter < $max_iterations ) { $sink_object = - Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file, + Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file, $line_separator, $rOpts, $rpending_logfile_message, $binmode ); } else { - $sink_object = - Perl::Tidy::LineSink->new( $output_file, $tee_file, - $line_separator, $rOpts, $rpending_logfile_message, - $binmode ); + $sink_object = $sink_object_final; } - #--------------------------------------------------------------- - # initialize the error logger - #--------------------------------------------------------------- - my $warning_file = $fileroot . $dot . "ERR"; - if ($errorfile_stream) { $warning_file = $errorfile_stream } - my $log_file = $fileroot . $dot . "LOG"; - if ($logfile_stream) { $log_file = $logfile_stream } - - my $logger_object = - Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file, - $saw_extrude ); - write_logfile_header( - $rOpts, $logger_object, $config_file, - $rraw_options, $Windows_type, $readable_options, - ); - if ($$rpending_logfile_message) { - $logger_object->write_logfile_entry($$rpending_logfile_message); + # Save logger, debugger output only on pass 1 because: + # (1) line number references must be to the starting + # source, not an intermediate result, and + # (2) we need to know if there are errors so we can stop the + # iterations early if necessary. + if ( $iter > 1 ) { + $debugger_object = undef; + $logger_object = undef; + } + + #------------------------------------------------------------ + # create a formatter for this file : html writer or + # pretty printer + #------------------------------------------------------------ + + # we have to delete any old formatter because, for safety, + # the formatter will check to see that there is only one. + $formatter = undef; + + if ($user_formatter) { + $formatter = $user_formatter; + } + elsif ( $rOpts->{'format'} eq 'html' ) { + $formatter = + Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, + $actual_output_extension, $html_toc_extension, + $html_src_extension ); + } + elsif ( $rOpts->{'format'} eq 'tidy' ) { + $formatter = Perl::Tidy::Formatter->new( + logger_object => $logger_object, + diagnostics_object => $diagnostics_object, + sink_object => $sink_object, + ); } - if ($$rpending_complaint) { - $logger_object->complain($$rpending_complaint); + else { + Die "I don't know how to do -format=$rOpts->{'format'}\n"; } - #--------------------------------------------------------------- - # initialize the debug object, if any - #--------------------------------------------------------------- - my $debugger_object = undef; - if ( $rOpts->{DEBUG} ) { - $debugger_object = - Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" ); + unless ($formatter) { + Die "Unable to continue with $rOpts->{'format'} formatting\n"; } #--------------------------------------------------------------- - # loop over iterations for one source stream + # create the tokenizer for this file #--------------------------------------------------------------- + $tokenizer = undef; # must destroy old tokenizer + $tokenizer = Perl::Tidy::Tokenizer->new( + source_object => $source_object, + logger_object => $logger_object, + debugger_object => $debugger_object, + diagnostics_object => $diagnostics_object, + tabsize => $tabsize, + + starting_level => $rOpts->{'starting-indentation-level'}, + indent_columns => $rOpts->{'indent-columns'}, + look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, + look_for_autoloader => $rOpts->{'look-for-autoloader'}, + look_for_selfloader => $rOpts->{'look-for-selfloader'}, + trim_qw => $rOpts->{'trim-qw'}, + extended_syntax => $rOpts->{'extended-syntax'}, + + continuation_indentation => + $rOpts->{'continuation-indentation'}, + outdent_labels => $rOpts->{'outdent-labels'}, + ); - # We will do a convergence test if 3 or more iterations are allowed. - # It would be pointless for fewer because we have to make at least - # two passes before we can see if we are converged, and the test - # would just slow things down. - my $max_iterations = $rOpts->{'iterations'}; - my $convergence_log_message; - my %saw_md5; - my $do_convergence_test = $max_iterations > 2; - if ($do_convergence_test) { - eval "use Digest::MD5 qw(md5_hex)"; - $do_convergence_test = !$@; - } - - # save objects to allow redirecting output during iterations - my $sink_object_final = $sink_object; - my $debugger_object_final = $debugger_object; - my $logger_object_final = $logger_object; - - for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) { - - # send output stream to temp buffers until last iteration - my $sink_buffer; - if ( $iter < $max_iterations ) { - $sink_object = - Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file, - $line_separator, $rOpts, $rpending_logfile_message, - $binmode ); - } - else { - $sink_object = $sink_object_final; - } - - # Save logger, debugger output only on pass 1 because: - # (1) line number references must be to the starting - # source, not an intermediate result, and - # (2) we need to know if there are errors so we can stop the - # iterations early if necessary. - if ( $iter > 1 ) { - $debugger_object = undef; - $logger_object = undef; - } - - #------------------------------------------------------------ - # create a formatter for this file : html writer or - # pretty printer - #------------------------------------------------------------ - - # we have to delete any old formatter because, for safety, - # the formatter will check to see that there is only one. - $formatter = undef; + #--------------------------------------------------------------- + # now we can do it + #--------------------------------------------------------------- + process_this_file( $tokenizer, $formatter ); - if ($user_formatter) { - $formatter = $user_formatter; - } - elsif ( $rOpts->{'format'} eq 'html' ) { - $formatter = - Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, - $actual_output_extension, $html_toc_extension, - $html_src_extension ); - } - elsif ( $rOpts->{'format'} eq 'tidy' ) { - $formatter = Perl::Tidy::Formatter->new( - logger_object => $logger_object, - diagnostics_object => $diagnostics_object, - sink_object => $sink_object, - ); - } - else { - die "I don't know how to do -format=$rOpts->{'format'}\n"; - } + #--------------------------------------------------------------- + # close the input source and report errors + #--------------------------------------------------------------- + $source_object->close_input_file(); - unless ($formatter) { - die - "Unable to continue with $rOpts->{'format'} formatting\n"; - } + # line source for next iteration (if any) comes from the current + # temporary output buffer + if ( $iter < $max_iterations ) { - #--------------------------------------------------------------- - # create the tokenizer for this file - #--------------------------------------------------------------- - $tokenizer = undef; # must destroy old tokenizer - $tokenizer = Perl::Tidy::Tokenizer->new( - source_object => $source_object, - logger_object => $logger_object, - debugger_object => $debugger_object, - diagnostics_object => $diagnostics_object, - starting_level => $rOpts->{'starting-indentation-level'}, - tabs => $rOpts->{'tabs'}, - entab_leading_space => $rOpts->{'entab-leading-whitespace'}, - indent_columns => $rOpts->{'indent-columns'}, - look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, - look_for_autoloader => $rOpts->{'look-for-autoloader'}, - look_for_selfloader => $rOpts->{'look-for-selfloader'}, - trim_qw => $rOpts->{'trim-qw'}, - ); + $sink_object->close_output_file(); + $source_object = + Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts, + $rpending_logfile_message ); - #--------------------------------------------------------------- - # now we can do it - #--------------------------------------------------------------- - process_this_file( $tokenizer, $formatter ); - - #--------------------------------------------------------------- - # close the input source and report errors - #--------------------------------------------------------------- - $source_object->close_input_file(); - - # line source for next iteration (if any) comes from the current - # temporary output buffer - if ( $iter < $max_iterations ) { - - $sink_object->close_output_file(); - $source_object = - Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts, - $rpending_logfile_message ); - - # stop iterations if errors or converged - my $stop_now = $logger_object->{_warning_count}; - if ($stop_now) { - $convergence_log_message = <{_warning_count}; + if ($stop_now) { + $convergence_log_message = <write_diagnostics( - $convergence_log_message) - if $diagnostics_object; - } - else { - $convergence_log_message = <write_diagnostics( + $convergence_log_message) + if $diagnostics_object; + } + else { + $convergence_log_message = <write_diagnostics( - $convergence_log_message) - if $diagnostics_object && $iterm > 2; - } + $diagnostics_object->write_diagnostics( + $convergence_log_message) + if $diagnostics_object && $iterm > 2; } - } ## end if ($do_convergence_test) + } + } ## end if ($do_convergence_test) - if ($stop_now) { + if ($stop_now) { - # we are stopping the iterations early; - # copy the output stream to its final destination - $sink_object = $sink_object_final; - while ( my $line = $source_object->get_line() ) { - $sink_object->write_line($line); - } - $source_object->close_input_file(); - last; + # we are stopping the iterations early; + # copy the output stream to its final destination + $sink_object = $sink_object_final; + while ( my $line = $source_object->get_line() ) { + $sink_object->write_line($line); } - } ## end if ( $iter < $max_iterations) - } # end loop over iterations for one source file + $source_object->close_input_file(); + last; + } + } ## end if ( $iter < $max_iterations) + } # end loop over iterations for one source file - # restore objects which have been temporarily undefined - # for second and higher iterations - $debugger_object = $debugger_object_final; - $logger_object = $logger_object_final; + # restore objects which have been temporarily undefined + # for second and higher iterations + $debugger_object = $debugger_object_final; + $logger_object = $logger_object_final; - $logger_object->write_logfile_entry($convergence_log_message) - if $convergence_log_message; + $logger_object->write_logfile_entry($convergence_log_message) + if $convergence_log_message; - #--------------------------------------------------------------- - # Perform any postfilter operation - #--------------------------------------------------------------- - if ($postfilter) { - $sink_object->close_output_file(); - $sink_object = - Perl::Tidy::LineSink->new( $output_file, $tee_file, - $line_separator, $rOpts, $rpending_logfile_message, - $binmode ); - my $buf = $postfilter->($postfilter_buffer); - $source_object = - Perl::Tidy::LineSource->new( \$buf, $rOpts, - $rpending_logfile_message ); - ##chomp $buf; - ##foreach my $line ( split( "\n", $buf , -1) ) { - while ( my $line = $source_object->get_line() ) { - $sink_object->write_line($line); - } - $source_object->close_input_file(); + #--------------------------------------------------------------- + # Perform any postfilter operation + #--------------------------------------------------------------- + if ($postfilter) { + $sink_object->close_output_file(); + $sink_object = + Perl::Tidy::LineSink->new( $output_file, $tee_file, + $line_separator, $rOpts, $rpending_logfile_message, $binmode ); + my $buf = $postfilter->($postfilter_buffer); + $source_object = + Perl::Tidy::LineSource->new( \$buf, $rOpts, + $rpending_logfile_message ); + while ( my $line = $source_object->get_line() ) { + $sink_object->write_line($line); } + $source_object->close_input_file(); + } - # Save names of the input and output files for syntax check - my $ifname = $input_file; - my $ofname = $output_file; + # Save names of the input and output files for syntax check + my $ifname = $input_file; + my $ofname = $output_file; - #--------------------------------------------------------------- - # handle the -b option (backup and modify in-place) - #--------------------------------------------------------------- - if ($in_place_modify) { - unless ( -f $input_file ) { + #--------------------------------------------------------------- + # handle the -b option (backup and modify in-place) + #--------------------------------------------------------------- + if ($in_place_modify) { + unless ( -f $input_file ) { - # oh, oh, no real file to backup .. - # shouldn't happen because of numerous preliminary checks - die + # oh, oh, no real file to backup .. + # shouldn't happen because of numerous preliminary checks + Die "problem with -b backing up input file '$input_file': not a file\n"; - } - my $backup_name = $input_file . $backup_extension; - if ( -f $backup_name ) { - unlink($backup_name) - or die + } + my $backup_name = $input_file . $backup_extension; + if ( -f $backup_name ) { + unlink($backup_name) + or Die "unable to remove previous '$backup_name' for -b option; check permissions: $!\n"; - } + } - # backup the input file - # we use copy for symlinks, move for regular files - if ( -l $input_file ) { - File::Copy::copy( $input_file, $backup_name ) - or die "File::Copy failed trying to backup source: $!"; - } - else { - rename( $input_file, $backup_name ) - or die + # backup the input file + # we use copy for symlinks, move for regular files + if ( -l $input_file ) { + File::Copy::copy( $input_file, $backup_name ) + or Die "File::Copy failed trying to backup source: $!"; + } + else { + rename( $input_file, $backup_name ) + or Die "problem renaming $input_file to $backup_name for -b option: $!\n"; - } - $ifname = $backup_name; - - # copy the output to the original input file - # NOTE: it would be nice to just close $output_file and use - # File::Copy::copy here, but in this case $output_file is the - # handle of an open nameless temporary file so we would lose - # everything if we closed it. - seek( $output_file, 0, 0 ) - or die - "unable to rewind a temporary file for -b option: $!\n"; - my $fout = IO::File->new("> $input_file") - or die + } + $ifname = $backup_name; + + # copy the output to the original input file + # NOTE: it would be nice to just close $output_file and use + # File::Copy::copy here, but in this case $output_file is the + # handle of an open nameless temporary file so we would lose + # everything if we closed it. + seek( $output_file, 0, 0 ) + or Die "unable to rewind a temporary file for -b option: $!\n"; + my $fout = IO::File->new("> $input_file") + or Die "problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n"; - binmode $fout; - my $line; - while ( $line = $output_file->getline() ) { - $fout->print($line); + if ($binmode) { + if ( $rOpts->{'character-encoding'} + && $rOpts->{'character-encoding'} eq 'utf8' ) + { + binmode $fout, ":encoding(UTF-8)"; } - $fout->close(); - $output_file = $input_file; - $ofname = $input_file; + else { binmode $fout } } + my $line; + while ( $line = $output_file->getline() ) { + $fout->print($line); + } + $fout->close(); + $output_file = $input_file; + $ofname = $input_file; + } - #--------------------------------------------------------------- - # clean up and report errors - #--------------------------------------------------------------- - $sink_object->close_output_file() if $sink_object; - $debugger_object->close_debug_file() if $debugger_object; - - # set output file permissions - if ( $output_file && -f $output_file && !-l $output_file ) { - if ($input_file_permissions) { - - # give output script same permissions as input script, but - # make it user-writable or else we can't run perltidy again. - # Thus we retain whatever executable flags were set. - if ( $rOpts->{'format'} eq 'tidy' ) { - chmod( $input_file_permissions | 0600, $output_file ); - } + #--------------------------------------------------------------- + # clean up and report errors + #--------------------------------------------------------------- + $sink_object->close_output_file() if $sink_object; + $debugger_object->close_debug_file() if $debugger_object; - # else use default permissions for html and any other format + # set output file permissions + if ( $output_file && -f $output_file && !-l $output_file ) { + if ($input_file_permissions) { + + # give output script same permissions as input script, but + # make it user-writable or else we can't run perltidy again. + # Thus we retain whatever executable flags were set. + if ( $rOpts->{'format'} eq 'tidy' ) { + chmod( $input_file_permissions | 0600, $output_file ); } - } - #--------------------------------------------------------------- - # Do syntax check if requested and possible - #--------------------------------------------------------------- - my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes - if ( $logger_object - && $rOpts->{'check-syntax'} - && $ifname - && $ofname ) - { - $infile_syntax_ok = - check_syntax( $ifname, $ofname, $logger_object, $rOpts ); + # else use default permissions for html and any other format } + } - #--------------------------------------------------------------- - # remove the original file for in-place modify as follows: - # $delete_backup=0 never - # $delete_backup=1 only if no errors - # $delete_backup>1 always : CURRENTLY NOT ALLOWED, see above - #--------------------------------------------------------------- - if ( $in_place_modify - && $delete_backup - && -f $ifname - && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) ) - { + #--------------------------------------------------------------- + # Do syntax check if requested and possible + #--------------------------------------------------------------- + my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes + if ( $logger_object + && $rOpts->{'check-syntax'} + && $ifname + && $ofname ) + { + $infile_syntax_ok = + check_syntax( $ifname, $ofname, $logger_object, $rOpts ); + } + + #--------------------------------------------------------------- + # remove the original file for in-place modify as follows: + # $delete_backup=0 never + # $delete_backup=1 only if no errors + # $delete_backup>1 always : NOT ALLOWED, too risky, see above + #--------------------------------------------------------------- + if ( $in_place_modify + && $delete_backup + && -f $ifname + && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) ) + { - # As an added safety precaution, do not delete the source file - # if its size has dropped from positive to zero, since this - # could indicate a disaster of some kind, including a hardware - # failure. Actually, this could happen if you had a file of - # all comments (or pod) and deleted everything with -dac (-dap) - # for some reason. - if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) { - warn( + # As an added safety precaution, do not delete the source file + # if its size has dropped from positive to zero, since this + # could indicate a disaster of some kind, including a hardware + # failure. Actually, this could happen if you had a file of + # all comments (or pod) and deleted everything with -dac (-dap) + # for some reason. + if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) { + Warn( "output file '$output_file' missing or zero length; original '$ifname' not deleted\n" - ); - } - else { - unlink($ifname) - or die + ); + } + else { + unlink($ifname) + or Die "unable to remove previous '$ifname' for -b option; check permissions: $!\n"; - } } + } - $logger_object->finish( $infile_syntax_ok, $formatter ) - if $logger_object; - } # end of main loop to process all files - } # end of main program perltidy -} + $logger_object->finish( $infile_syntax_ok, $formatter ) + if $logger_object; + } # end of main loop to process all files + + NORMAL_EXIT: + return 0; + + ERROR_EXIT: + return 1; +} # end of main program perltidy sub get_stream_as_named_file { @@ -1336,12 +1347,7 @@ sub get_stream_as_named_file { my ( $fh_stream, $fh_name ) = Perl::Tidy::streamhandle( $stream, 'r' ); if ($fh_stream) { - my ( $fout, $tmpnam ); - - # FIXME: fix the tmpnam routine to return an open filehandle - $tmpnam = Perl::Tidy::make_temporary_filename(); - $fout = IO::File->new( $tmpnam, 'w' ); - + my ( $fout, $tmpnam ) = File::Temp::tempfile(); if ($fout) { $fname = $tmpnam; $is_tmpfile = 1; @@ -1446,7 +1452,6 @@ sub generate_options { # fll --> fuzzy-line-length # a trivial parameter which gets # turned off for the extrude option # which is mainly for debugging - # chk --> check-multiline-quotes # check for old bug; to be deleted # scl --> short-concatenation-item-length # helps break at '.' # recombine # for debugging line breaks # valign # for debugging vertical alignment @@ -1527,7 +1532,7 @@ sub generate_options { if ($short_name) { if ( $expansion{$short_name} ) { my $existing_name = $expansion{$short_name}[0]; - die + Die "redefining abbreviation $short_name for $long_name; already used for $existing_name\n"; } $expansion{$short_name} = [$long_name]; @@ -1536,7 +1541,7 @@ sub generate_options { my $nolong_name = 'no' . $long_name; if ( $expansion{$nshort_name} ) { my $existing_name = $expansion{$nshort_name}[0]; - die + Die "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"; } $expansion{$nshort_name} = [$nolong_name]; @@ -1566,6 +1571,7 @@ sub generate_options { $add_option->( 'standard-error-output', 'se', '!' ); $add_option->( 'standard-output', 'st', '!' ); $add_option->( 'warning-output', 'w', '!' ); + $add_option->( 'character-encoding', 'enc', '=s' ); # options which are both toggle switches and values moved here # to hide from tidyview (which does not show category 0 flags): @@ -1577,13 +1583,17 @@ sub generate_options { ######################################## $category = 1; # Basic formatting options ######################################## - $add_option->( 'check-syntax', 'syn', '!' ); - $add_option->( 'entab-leading-whitespace', 'et', '=i' ); - $add_option->( 'indent-columns', 'i', '=i' ); - $add_option->( 'maximum-line-length', 'l', '=i' ); - $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' ); - $add_option->( 'preserve-line-endings', 'ple', '!' ); - $add_option->( 'tabs', 't', '!' ); + $add_option->( 'check-syntax', 'syn', '!' ); + $add_option->( 'entab-leading-whitespace', 'et', '=i' ); + $add_option->( 'indent-columns', 'i', '=i' ); + $add_option->( 'maximum-line-length', 'l', '=i' ); + $add_option->( 'variable-maximum-line-length', 'vmll', '!' ); + $add_option->( 'whitespace-cycle', 'wc', '=i' ); + $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' ); + $add_option->( 'preserve-line-endings', 'ple', '!' ); + $add_option->( 'tabs', 't', '!' ); + $add_option->( 'default-tabsize', 'dt', '=i' ); + $add_option->( 'extended-syntax', 'xs', '!' ); ######################################## $category = 2; # Code indentation control @@ -1623,7 +1633,9 @@ sub generate_options { $add_option->( 'square-bracket-tightness', 'sbt', '=i' ); $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' ); $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' ); + $add_option->( 'tight-secret-operators', 'tso', '!' ); $add_option->( 'trim-qw', 'tqw', '!' ); + $add_option->( 'trim-pod', 'trp', '!' ); $add_option->( 'want-left-space', 'wls', '=s' ); $add_option->( 'want-right-space', 'wrs', '=s' ); @@ -1652,6 +1664,7 @@ sub generate_options { $add_option->( 'static-block-comments', 'sbc', '!' ); $add_option->( 'static-side-comment-prefix', 'sscp', '=s' ); $add_option->( 'static-side-comments', 'ssc', '!' ); + $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' ); ######################################## $category = 5; # Linebreak controls @@ -1672,9 +1685,11 @@ sub generate_options { $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); + $add_option->( 'stack-closing-block-brace', 'scbb', '!' ); $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); $add_option->( 'stack-closing-paren', 'scp', '!' ); $add_option->( 'stack-closing-square-bracket', 'scsb', '!' ); + $add_option->( 'stack-opening-block-brace', 'sobb', '!' ); $add_option->( 'stack-opening-hash-brace', 'sohb', '!' ); $add_option->( 'stack-opening-paren', 'sop', '!' ); $add_option->( 'stack-opening-square-bracket', 'sosb', '!' ); @@ -1713,6 +1728,11 @@ sub generate_options { $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); $add_option->( 'keep-old-blank-lines', 'kbl', '=i' ); + $add_option->( 'blank-lines-after-opening-block', 'blao', '=i' ); + $add_option->( 'blank-lines-before-closing-block', 'blbc', '=i' ); + $add_option->( 'blank-lines-after-opening-block-list', 'blaol', '=s' ); + $add_option->( 'blank-lines-before-closing-block-list', 'blbcl', '=s' ); + ######################################## $category = 9; # Other controls ######################################## @@ -1733,7 +1753,6 @@ sub generate_options { ######################################## $add_option->( 'DEBUG', 'D', '!' ); $add_option->( 'DIAGNOSTICS', 'I', '!' ); - $add_option->( 'check-multiline-quotes', 'chk', '!' ); $add_option->( 'dump-defaults', 'ddf', '!' ); $add_option->( 'dump-long-names', 'dln', '!' ); $add_option->( 'dump-options', 'dop', '!' ); @@ -1747,6 +1766,7 @@ sub generate_options { $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); $add_option->( 'show-options', 'opt', '!' ); $add_option->( 'version', 'v', '' ); + $add_option->( 'memoize', 'mem', '!' ); #--------------------------------------------------------------------- @@ -1789,6 +1809,7 @@ sub generate_options { %option_range = ( 'format' => [ 'tidy', 'html', 'user' ], 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ], + 'character-encoding' => [ 'none', 'utf8' ], 'block-brace-tightness' => [ 0, 2 ], 'brace-tightness' => [ 0, 2 ], @@ -1811,7 +1832,7 @@ sub generate_options { 'closing-token-indentation' => [ 0, 3 ], 'closing-side-comment-else-flag' => [ 0, 2 ], - 'comma-arrow-breakpoints' => [ 0, 3 ], + 'comma-arrow-breakpoints' => [ 0, 5 ], ); # Note: we could actually allow negative ci if someone really wants it: @@ -1839,7 +1860,7 @@ sub generate_options { break-at-old-ternary-breakpoints break-at-old-attribute-breakpoints break-at-old-keyword-breakpoints - comma-arrow-breakpoints=1 + comma-arrow-breakpoints=5 nocheck-syntax closing-side-comment-interval=6 closing-side-comment-maximum-text=20 @@ -1851,6 +1872,7 @@ sub generate_options { continuation-indentation=2 delete-old-newlines delete-semicolons + extended-syntax fuzzy-line-length hanging-side-comments indent-block-comments @@ -1863,6 +1885,7 @@ sub generate_options { maximum-consecutive-blank-lines=1 maximum-fields-per-table=0 maximum-line-length=80 + memoize minimum-space-to-comment=4 nobrace-left-and-indent nocuddled-else @@ -1874,6 +1897,7 @@ sub generate_options { nostatic-side-comments notabs nowarning-output + character-encoding=none outdent-labels outdent-long-quotes outdent-long-comments @@ -1893,6 +1917,7 @@ sub generate_options { format=tidy backup-file-extension=bak format-skipping + default-tabsize=8 pod2html html-table-of-contents @@ -1938,6 +1963,9 @@ sub generate_options { 'nhtml' => [qw(format=tidy)], 'tidy' => [qw(format=tidy)], + 'utf8' => [qw(character-encoding=utf8)], + 'UTF8' => [qw(character-encoding=utf8)], + 'swallow-optional-blank-lines' => [qw(kbl=0)], 'noswallow-optional-blank-lines' => [qw(kbl=1)], 'sob' => [qw(kbl=0)], @@ -1996,7 +2024,29 @@ sub generate_options { 'sct' => [qw(scp schb scsb)], 'stack-closing-tokens' => => [qw(scp schb scsb)], 'nsct' => [qw(nscp nschb nscsb)], - 'nostack-opening-tokens' => [qw(nscp nschb nscsb)], + 'nostack-closing-tokens' => [qw(nscp nschb nscsb)], + + 'sac' => [qw(sot sct)], + 'nsac' => [qw(nsot nsct)], + 'stack-all-containers' => [qw(sot sct)], + 'nostack-all-containers' => [qw(nsot nsct)], + + 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)], + 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)], + 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)], + 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)], + 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)], + 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)], + + 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)], + 'sobb' => [qw(bbvt=2 bbvtl=*)], + 'nostack-opening-block-brace' => [qw(bbvt=0)], + 'nsobb' => [qw(bbvt=0)], + + 'converge' => [qw(it=4)], + 'noconverge' => [qw(it=1)], + 'conv' => [qw(it=4)], + 'nconv' => [qw(it=1)], # 'mangle' originally deleted pod and comments, but to keep it # reversible, it no longer does. But if you really want to @@ -2086,6 +2136,13 @@ q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= } # end of generate_options +# Memoize process_command_line. Given same @ARGV passed in, return same +# values and same @ARGV back. +# This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds +# up masontidy (https://metacpan.org/module/masontidy) + +my %process_command_line_cache; + sub process_command_line { my ( @@ -2093,8 +2150,47 @@ sub process_command_line { $rpending_complaint, $dump_options_type ) = @_; + my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type; + if ($use_cache) { + my $cache_key = join( chr(28), @ARGV ); + if ( my $result = $process_command_line_cache{$cache_key} ) { + my ( $argv, @retvals ) = @$result; + @ARGV = @$argv; + return @retvals; + } + else { + my @retvals = _process_command_line(@_); + $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ] + if $retvals[0]->{'memoize'}; + return @retvals; + } + } + else { + return _process_command_line(@_); + } +} + +# (note the underscore here) +sub _process_command_line { + + my ( + $perltidyrc_stream, $is_Windows, $Windows_type, + $rpending_complaint, $dump_options_type + ) = @_; + use Getopt::Long; + # Save any current Getopt::Long configuration + # and set to Getopt::Long defaults. Use eval to avoid + # breaking old versions of Perl without these routines. + # Previous configuration is reset at the exit of this routine. + my $glc; + eval { $glc = Getopt::Long::Configure() }; + unless ($@) { + eval { Getopt::Long::ConfigDefaults() }; + } + else { $glc = undef } + my ( $roption_string, $rdefaults, $rexpansion, $roption_category, $roption_range @@ -2112,30 +2208,15 @@ sub process_command_line { unless ( $dump_options_type eq 'perltidyrc' ) { for $i (@$rdefaults) { push @ARGV, "--" . $i } } - - # Patch to save users Getopt::Long configuration - # and set to Getopt::Long defaults. Use eval to avoid - # breaking old versions of Perl without these routines. - my $glc; - eval { $glc = Getopt::Long::Configure() }; - unless ($@) { - eval { Getopt::Long::ConfigDefaults() }; - } - else { $glc = undef } - if ( !GetOptions( \%Opts, @$roption_string ) ) { - die "Programming Bug: error in setting default options"; + Die "Programming Bug: error in setting default options"; } - - # Patch to put the previous Getopt::Long configuration back - eval { Getopt::Long::Configure($glc) } if defined $glc; } my $word; my @raw_options = (); my $config_file = ""; my $saw_ignore_profile = 0; - my $saw_extrude = 0; my $saw_dump_profile = 0; my $i; @@ -2157,7 +2238,7 @@ sub process_command_line { } elsif ( $i =~ /^-(pro|profile)=(.+)/ ) { if ($config_file) { - warn + Warn "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; } $config_file = $2; @@ -2177,45 +2258,42 @@ sub process_command_line { } } unless ( -e $config_file ) { - warn "cannot find file given with -pro=$config_file: $!\n"; + Warn "cannot find file given with -pro=$config_file: $!\n"; $config_file = ""; } } elsif ( $i =~ /^-(pro|profile)=?$/ ) { - die "usage: -pro=filename or --profile=filename, no spaces\n"; - } - elsif ( $i =~ /^-extrude$/ ) { - $saw_extrude = 1; + Die "usage: -pro=filename or --profile=filename, no spaces\n"; } elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) { usage(); - exit 0; + Exit 0; } elsif ( $i =~ /^-(version|v)$/ ) { show_version(); - exit 0; + Exit 0; } elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { dump_defaults(@$rdefaults); - exit 0; + Exit 0; } elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { dump_long_names(@$roption_string); - exit 0; + Exit 0; } elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { dump_short_names($rexpansion); - exit 0; + Exit 0; } elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); - exit 0; + Exit 0; } } if ( $saw_dump_profile && $saw_ignore_profile ) { - warn "No profile to dump because of -npro\n"; - exit 1; + Warn "No profile to dump because of -npro\n"; + Exit 1; } #--------------------------------------------------------------- @@ -2228,7 +2306,7 @@ sub process_command_line { # line. if ($perltidyrc_stream) { if ($config_file) { - warn <{'iterations'} = 1; } - # check for reasonable number of blank lines and fix to avoid problems - if ( $rOpts->{'blank-lines-before-subs'} ) { - if ( $rOpts->{'blank-lines-before-subs'} < 0 ) { - $rOpts->{'blank-lines-before-subs'} = 0; - warn "negative value of -blbs, setting 0\n"; - } - if ( $rOpts->{'blank-lines-before-subs'} > 100 ) { - warn "unreasonably large value of -blbs, reducing\n"; - $rOpts->{'blank-lines-before-subs'} = 100; - } - } - if ( $rOpts->{'blank-lines-before-packages'} ) { - if ( $rOpts->{'blank-lines-before-packages'} < 0 ) { - warn "negative value of -blbp, setting 0\n"; - $rOpts->{'blank-lines-before-packages'} = 0; - } - if ( $rOpts->{'blank-lines-before-packages'} > 100 ) { - warn "unreasonably large value of -blbp, reducing\n"; - $rOpts->{'blank-lines-before-packages'} = 100; + my $check_blank_count = sub { + my ( $key, $abbrev ) = @_; + if ( $rOpts->{$key} ) { + if ( $rOpts->{$key} < 0 ) { + $rOpts->{$key} = 0; + Warn "negative value of $abbrev, setting 0\n"; + } + if ( $rOpts->{$key} > 100 ) { + Warn "unreasonably large value of $abbrev, reducing\n"; + $rOpts->{$key} = 100; + } } - } - - # see if user set a non-negative logfile-gap - if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) { + }; - # a zero gap will be taken as a 1 - if ( $rOpts->{'logfile-gap'} == 0 ) { - $rOpts->{'logfile-gap'} = 1; - } + # check for reasonable number of blank lines and fix to avoid problems + $check_blank_count->( 'blank-lines-before-subs', '-blbs' ); + $check_blank_count->( 'blank-lines-before-packages', '-blbp' ); + $check_blank_count->( 'blank-lines-after-block-opening', '-blao' ); + $check_blank_count->( 'blank-lines-before-block-closing', '-blbc' ); - # setting a non-negative logfile gap causes logfile to be saved + # setting a non-negative logfile gap causes logfile to be saved + if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) { $rOpts->{'logfile'} = 1; } - # not setting logfile gap, or setting it negative, causes default of 50 - else { - $rOpts->{'logfile-gap'} = 50; - } - # set short-cut flag when only indentation is to be done. # Note that the user may or may not have already set the # indent-only flag. @@ -2494,7 +2562,7 @@ sub check_options { if ( $rOpts->{'opening-brace-always-on-right'} && $rOpts->{'opening-brace-on-new-line'} ) { - warn <{'entab-leading-whitespace'} ) { if ( $rOpts->{'entab-leading-whitespace'} < 0 ) { - warn "-et=n must use a positive integer; ignoring -et\n"; + Warn "-et=n must use a positive integer; ignoring -et\n"; $rOpts->{'entab-leading-whitespace'} = undef; } # entab leading whitespace has priority over the older 'tabs' option if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; } } + + # set a default tabsize to be used in guessing the starting indentation + # level if and only if this run does not use tabs and the old code does + # use tabs + if ( $rOpts->{'default-tabsize'} ) { + if ( $rOpts->{'default-tabsize'} < 0 ) { + Warn "negative value of -dt, setting 0\n"; + $rOpts->{'default-tabsize'} = 0; + } + if ( $rOpts->{'default-tabsize'} > 20 ) { + Warn "unreasonably large value of -dt, reducing\n"; + $rOpts->{'default-tabsize'} = 20; + } + } + else { + $rOpts->{'default-tabsize'} = 8; + } + + # Define $tabsize, the number of spaces per tab for use in + # guessing the indentation of source lines with leading tabs. + # Assume same as for this run if tabs are used , otherwise assume + # a default value, typically 8 + my $tabsize = + $rOpts->{'entab-leading-whitespace'} + ? $rOpts->{'entab-leading-whitespace'} + : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'} + : $rOpts->{'default-tabsize'}; + return $tabsize; } sub find_file_upwards { my ( $search_dir, $search_file ) = @_; - $search_dir =~ s{/+$}{}; + $search_dir =~ s{/+$}{}; $search_file =~ s{^/+}{}; while (1) { @@ -2616,29 +2712,33 @@ sub expand_command_abbreviations { # make sure we are not in an infinite loop if ( $pass_count == $max_passes ) { - print STDERR -"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n"; - print STDERR "Here are the raw options\n"; local $" = ')('; - print STDERR "(@$rraw_options)\n"; + Warn <getline() ) { $line_no++; chomp $line; @@ -2963,63 +3064,84 @@ sub read_config_file { $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends next unless $line; - # look for something of the general form - # newname { body } - # or just - # body - my $body = $line; - my ($newname); - if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) { - ( $newname, $body ) = ( $2, $3, ); - } - if ($body) { + my $newname; + + # Look for complete or partial abbreviation definition of the form + # name { body } or name { or name { body + # See rules in perltidy's perldoc page + # Section: Other Controls - Creating a new abbreviation + if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) { + my $oldname = $name; + ( $name, $body ) = ( $2, $3 ); + + # Cannot start new abbreviation unless old abbreviation is complete + last if ($opening_brace_line); + + $opening_brace_line = $line_no unless ( $body && $body =~ s/\}$// ); # handle a new alias definition - if ($newname) { - if ($name) { - $death_message = -"No '}' seen after $name and before $newname in config file $config_file line $.\n"; - last; - } - $name = $newname; + if ( ${$rexpansion}{$name} ) { + local $" = ')('; + my @names = sort keys %$rexpansion; + $death_message = + "Here is a list of all installed aliases\n(@names)\n" + . "Attempting to redefine alias ($name) in config file $config_file line $.\n"; + last; + } + ${$rexpansion}{$name} = []; + } - if ( ${$rexpansion}{$name} ) { - local $" = ')('; - my @names = sort keys %$rexpansion; - $death_message = - "Here is a list of all installed aliases\n(@names)\n" - . "Attempting to redefine alias ($name) in config file $config_file line $.\n"; - last; - } - ${$rexpansion}{$name} = []; + # leading opening braces not allowed + elsif ( $line =~ /^{/ ) { + $opening_brace_line = undef; + $death_message = + "Unexpected '{' at line $line_no in config file '$config_file'\n"; + last; + } + + # Look for abbreviation closing: body } or } + elsif ( $line =~ /^(.*)?\}$/ ) { + $body = $1; + if ($opening_brace_line) { + $opening_brace_line = undef; } + else { + $death_message = +"Unexpected '}' at line $line_no in config file '$config_file'\n"; + last; + } + } - # now do the body - if ($body) { + # Now store any parameters + if ($body) { - my ( $rbody_parts, $msg ) = parse_args($body); - if ($msg) { - $death_message = <close() }; return ( \@config_list, $death_message ); } @@ -3234,10 +3356,10 @@ sub readable_options { } sub show_version { - print <<"EOM"; + print STDOUT <<"EOM"; This is perltidy, v$VERSION -Copyright 2000-2012, Steve Hancock +Copyright 2000-2017, Steve Hancock Perltidy is free software and may be copied under the terms of the GNU General Public License, which is included in the distribution files. @@ -3279,7 +3401,7 @@ I/O control -npro ignore .perltidyrc configuration command file -pro=file read configuration commands from file instead of .perltidyrc -st send output to standard output, STDOUT - -se send error output to standard error output, STDERR + -se send all error output to standard error output, STDERR -v display version number to standard output and quit Basic Options: @@ -3487,7 +3609,7 @@ sub check_syntax { if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" } } - # this shouldn't happen unless a termporary file couldn't be made + # this shouldn't happen unless a temporary file couldn't be made if ( $istream eq '-' ) { $logger_object->write_logfile_entry( "Cannot run perl -c on STDIN and STDOUT\n"); @@ -3580,7 +3702,10 @@ sub do_syntax_check { # now wish for luck... my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/; - unlink $stream_filename if ($is_tmpfile); + if ($is_tmpfile) { + unlink $stream_filename + or Perl::Tidy::Die("couldn't unlink stream $stream_filename: $!\n"); + } return $stream_filename, $msg; } @@ -3671,7 +3796,7 @@ sub close { return } # a getline method which reads lines (mode='r'), or # a print method which reads lines (mode='w') # -# NOTE: this routine assumes that that there aren't any embedded +# NOTE: this routine assumes that there aren't any embedded # newlines within any of the array elements. There are no checks # for that. # @@ -3783,7 +3908,12 @@ EOM sub close_input_file { my $self = shift; - eval { $self->{_fh}->close() }; + + # Only close physical files, not STDIN and other objects + my $filename = $self->{_filename}; + if ( $filename ne '-' && !ref $filename ) { + eval { $self->{_fh}->close() }; + } } sub get_line { @@ -3835,13 +3965,20 @@ sub new { if ( $rOpts->{'format'} eq 'tidy' ) { ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); - unless ($fh) { die "Cannot write to output stream\n"; } + unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; } $output_file_open = 1; if ($binmode) { - if ( ref($fh) eq 'IO::File' ) { - binmode $fh; + if ( $rOpts->{'character-encoding'} + && $rOpts->{'character-encoding'} eq 'utf8' ) + { + if ( ref($fh) eq 'IO::File' ) { + $fh->binmode(":encoding(UTF-8)"); + } + elsif ( $output_file eq '-' ) { + binmode STDOUT, ":encoding(UTF-8)"; + } } - if ( $output_file eq '-' ) { binmode STDOUT } + elsif ( $output_file eq '-' ) { binmode STDOUT } } } @@ -3907,7 +4044,7 @@ sub really_open_tee_file { my $tee_file = $self->{_tee_file}; my $fh_tee; $fh_tee = IO::File->new(">$tee_file") - or die("couldn't open TEE file $tee_file: $!\n"); + or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n"); binmode $fh_tee if $self->{_binmode}; $self->{_tee_file_opened} = 1; $self->{_fh_tee} = $fh_tee; @@ -3915,16 +4052,25 @@ sub really_open_tee_file { sub close_output_file { my $self = shift; - eval { $self->{_fh}->close() } if $self->{_output_file_open}; + + # Only close physical files, not STDOUT and other objects + my $output_file = $self->{_output_file}; + if ( $output_file ne '-' && !ref $output_file ) { + eval { $self->{_fh}->close() } if $self->{_output_file_open}; + } $self->close_tee_file(); } sub close_tee_file { my $self = shift; + # Only close physical files, not STDOUT and other objects if ( $self->{_tee_file_opened} ) { - eval { $self->{_fh_tee}->close() }; - $self->{_tee_file_opened} = 0; + my $tee_file = $self->{_tee_file}; + if ( $tee_file ne '-' && !ref $tee_file ) { + eval { $self->{_fh_tee}->close() }; + $self->{_tee_file_opened} = 0; + } } } @@ -3991,17 +4137,30 @@ package Perl::Tidy::Logger; sub new { my $class = shift; my $fh; - my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_; + my ( $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude, ) = @_; + + my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef; - # remove any old error output file - unless ( ref($warning_file) ) { - if ( -e $warning_file ) { unlink($warning_file) } + # remove any old error output file if we might write a new one + unless ( $fh_warnings || ref($warning_file) ) { + if ( -e $warning_file ) { + unlink($warning_file) + or Perl::Tidy::Die( + "couldn't unlink warning file $warning_file: $!\n"); + } } + my $logfile_gap = + defined( $rOpts->{'logfile-gap'} ) + ? $rOpts->{'logfile-gap'} + : 50; + if ( $logfile_gap == 0 ) { $logfile_gap = 1 } + bless { _log_file => $log_file, + _logfile_gap => $logfile_gap, _rOpts => $rOpts, - _fh_warnings => undef, + _fh_warnings => $fh_warnings, _last_input_line_written => 0, _at_end_of_file => 0, _use_prefix => 1, @@ -4020,15 +4179,6 @@ sub new { }, $class; } -sub close_log_file { - - my $self = shift; - if ( $self->{_fh_warnings} ) { - eval { $self->{_fh_warnings}->close() }; - $self->{_fh_warnings} = undef; - } -} - sub get_warning_count { my $self = shift; return $self->{_warning_count}; @@ -4087,7 +4237,7 @@ sub black_box { if ( ( ( $input_line_number - $last_input_line_written ) >= - $rOpts->{'logfile-gap'} + $self->{_logfile_gap} ) || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) ) @@ -4110,7 +4260,7 @@ sub black_box { sub write_logfile_entry { my $self = shift; - # add leading >>> to avoid confusing error mesages and code + # add leading >>> to avoid confusing error messages and code $self->logfile_output( ">>>", "@_" ); } @@ -4143,8 +4293,8 @@ sub make_line_information_string { my $brace_depth = $line_of_tokens->{_curly_brace_depth}; my $paren_depth = $line_of_tokens->{_paren_depth}; my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; - my $python_indentation_level = - $line_of_tokens->{_python_indentation_level}; + my $guessed_indentation_level = + $line_of_tokens->{_guessed_indentation_level}; my $rlevels = $line_of_tokens->{_rlevels}; my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; my $rci_levels = $line_of_tokens->{_rci_levels}; @@ -4183,9 +4333,8 @@ sub make_line_information_string { $nesting_string = $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); } - if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 } $line_information_string = -"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; +"L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; } return $line_information_string; } @@ -4266,26 +4415,23 @@ sub warning { unless ( $rOpts->{'quiet'} ) { my $warning_count = $self->{_warning_count}; - unless ($warning_count) { + my $fh_warnings = $self->{_fh_warnings}; + if ( !$fh_warnings ) { my $warning_file = $self->{_warning_file}; - my $fh_warnings; - if ( $rOpts->{'standard-error-output'} ) { - $fh_warnings = *STDERR; - } - else { - ( $fh_warnings, my $filename ) = - Perl::Tidy::streamhandle( $warning_file, 'w' ); - $fh_warnings or die("couldn't open $filename $!\n"); - warn "## Please see file $filename\n" unless ref($warning_file); - } + ( $fh_warnings, my $filename ) = + Perl::Tidy::streamhandle( $warning_file, 'w' ); + $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n"); + Perl::Tidy::Warn "## Please see file $filename\n" + unless ref($warning_file); $self->{_fh_warnings} = $fh_warnings; + $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n"); } - my $fh_warnings = $self->{_fh_warnings}; if ( $warning_count < WARNING_LIMIT ) { if ( $self->get_use_prefix() > 0 ) { my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); + if ( !defined($input_line_number) ) { $input_line_number = -1 } $fh_warnings->print("$input_line_number:\t@_"); $self->write_logfile_entry("WARNING: @_"); } @@ -4408,7 +4554,7 @@ sub finish { } if ( $self->{_saw_brace_error} - && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) ) + && ( $self->{_logfile_gap} > 1 || !$save_logfile ) ) { $self->warning("To save a full .LOG file rerun with -g\n"); } @@ -4421,7 +4567,9 @@ sub finish { if ($fh) { my $routput_array = $self->{_output_array}; foreach ( @{$routput_array} ) { $fh->print($_) } - eval { $fh->close() }; + if ( $log_file ne '-' && !ref $log_file ) { + eval { $fh->close() }; + } } } } @@ -4475,7 +4623,7 @@ sub new { ( $html_fh, my $html_filename ) = Perl::Tidy::streamhandle( $html_file, 'w' ); unless ($html_fh) { - warn("can't open $html_file: $!\n"); + Perl::Tidy::Warn("can't open $html_file: $!\n"); return undef; } $html_file_opened = 1; @@ -4516,7 +4664,7 @@ PRE_END else { eval "use Pod::Html"; if ($@) { - warn + Perl::Tidy::Warn "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n"; undef $rOpts->{'pod2html'}; } @@ -4530,7 +4678,7 @@ PRE_END my $src_filename; if ( $rOpts->{'frames'} ) { unless ($extension) { - warn + Perl::Tidy::Warn "cannot use frames without a specified output extension; ignoring -frm\n"; undef $rOpts->{'frames'}; } @@ -4755,8 +4903,8 @@ BEGIN { ); # These token types will all be called identifiers for now - # FIXME: need to separate user defined modules as separate type - my @identifier = qw" i t U C Y Z G :: "; + # FIXME: could separate user defined modules as separate type + my @identifier = qw" i t U C Y Z G :: CORE::"; @token_short_names{@identifier} = ('i') x scalar(@identifier); # These token types will be called 'structure' @@ -4922,14 +5070,14 @@ sub check_options { # write style sheet to STDOUT and die if requested if ( defined( $rOpts->{'stylesheet'} ) ) { write_style_sheet_file('-'); - exit 0; + Perl::Tidy::Exit 0; } # make sure user gives a file name after -css if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { $css_linkname = $rOpts->{'html-linked-style-sheet'}; if ( $css_linkname =~ /^-/ ) { - die "You must specify a valid filename after -css\n"; + Perl::Tidy::Die "You must specify a valid filename after -css\n"; } } @@ -4961,7 +5109,7 @@ sub write_style_sheet_file { my $css_filename = shift; my $fh; unless ( $fh = IO::File->new("> $css_filename") ) { - die "can't open $css_filename: $!\n"; + Perl::Tidy::Die "can't open $css_filename: $!\n"; } write_style_sheet_data($fh); eval { $fh->close }; @@ -5054,18 +5202,10 @@ sub pod_to_html { } # Pod::Html requires a real temporary filename - # If we are making a frame, we have a name available - # Otherwise, we have to fine one - my $tmpfile; - if ( $rOpts->{'frames'} ) { - $tmpfile = $self->{_toc_filename}; - } - else { - $tmpfile = Perl::Tidy::make_temporary_filename(); - } - my $fh_tmp = IO::File->new( $tmpfile, 'w' ); + my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile(); unless ($fh_tmp) { - warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; + Perl::Tidy::Warn + "unable to open temporary file $tmpfile; cannot use pod2html\n"; return $success_flag; } @@ -5114,9 +5254,8 @@ sub pod_to_html { # Must clean up if pod2html dies (it can); # Be careful not to overwrite callers __DIE__ routine local $SIG{__DIE__} = sub { - print $_[0]; unlink $tmpfile if -e $tmpfile; - exit 1; + Perl::Tidy::Die $_[0]; }; pod2html(@args); @@ -5125,13 +5264,15 @@ sub pod_to_html { unless ($fh_tmp) { # this error shouldn't happen ... we just used this filename - warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; + Perl::Tidy::Warn + "unable to open temporary file $tmpfile; cannot use pod2html\n"; goto RETURN; } my $html_fh = $self->{_html_fh}; my @toc; my $in_toc; + my $ul_level = 0; my $no_print; # This routine will write the html selectively and store the toc @@ -5164,8 +5305,34 @@ sub pod_to_html { $title = escape_html($title); $html_print->("

$title

\n"); } + + # check for start of index, old pod2html + # before Pod::Html VERSION 1.15_02 it is delimited by comments as: + # + #
    + # ... + #
+ # + # elsif ( $line =~ /^\s*\s*$/i ) { - $in_toc = 1; + $in_toc = 'INDEX'; + + # when frames are used, an extra table of contents in the + # contents panel is confusing, so don't print it + $no_print = $rOpts->{'frames'} + || !$rOpts->{'html-table-of-contents'}; + $html_print->("

Doc Index:

\n") if $rOpts->{'frames'}; + $html_print->($line); + } + + # check for start of index, new pod2html + # After Pod::Html VERSION 1.15_02 it is delimited as: + #
    + # ... + #
+ elsif ( $line =~ /^\s*/i ) { + $in_toc = 'UL'; + $ul_level = 1; # when frames are used, an extra table of contents in the # contents panel is confusing, so don't print it @@ -5175,20 +5342,48 @@ sub pod_to_html { $html_print->($line); } - # Copy the perltidy toc, if any, after the Pod::Html toc + # Check for end of index, old pod2html elsif ( $line =~ /^\s*\s*$/i ) { $saw_index = 1; $html_print->($line); + + # Copy the perltidy toc, if any, after the Pod::Html toc if ($toc_string) { $html_print->("
\n") if $rOpts->{'frames'}; $html_print->("

Code Index:

\n"); my @toc = map { $_ .= "\n" } split /\n/, $toc_string; $html_print->(@toc); } - $in_toc = 0; + $in_toc = ""; $no_print = 0; } + # must track
    depth level for new pod2html + elsif ( $line =~ /\s*
      \s*$/i && $in_toc eq 'UL' ) { + $ul_level++; + $html_print->($line); + } + + # Check for end of index, for new pod2html + elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) { + $ul_level--; + $html_print->($line); + + # Copy the perltidy toc, if any, after the Pod::Html toc + if ( $ul_level <= 0 ) { + $saw_index = 1; + if ($toc_string) { + $html_print->("
      \n") if $rOpts->{'frames'}; + $html_print->("

      Code Index:

      \n"); + my @toc = map { $_ .= "\n" } split /\n/, $toc_string; + $html_print->(@toc); + } + $in_toc = ""; + $ul_level = 0; + $no_print = 0; + } + } + # Copy one perltidy section after each marker elsif ( $line =~ /^(.*)(.*)$/ ) { $line = $2; @@ -5206,7 +5401,7 @@ sub pod_to_html { # shouldn't happen: we stored a string before writing # each marker. - warn + Perl::Tidy::Warn "Problem merging html stream with pod2html; order may be wrong\n"; } $html_print->($line); @@ -5244,15 +5439,15 @@ sub pod_to_html { $success_flag = 1; unless ($saw_body) { - warn "Did not see in pod2html output\n"; + Perl::Tidy::Warn "Did not see in pod2html output\n"; $success_flag = 0; } unless ($saw_body_end) { - warn "Did not see in pod2html output\n"; + Perl::Tidy::Warn "Did not see in pod2html output\n"; $success_flag = 0; } unless ($saw_index) { - warn "Did not find INDEX END in pod2html output\n"; + Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n"; $success_flag = 0; } @@ -5261,7 +5456,13 @@ sub pod_to_html { # note that we have to unlink tmpfile before making frames # because the tmpfile may be one of the names used for frames - unlink $tmpfile if -e $tmpfile; + if ( -e $tmpfile ) { + unless ( unlink($tmpfile) ) { + Perl::Tidy::Warn("couldn't unlink temporary file $tmpfile: $!\n"); + $success_flag = 0; + } + } + if ( $success_flag && $rOpts->{'frames'} ) { $self->make_frame( \@toc ); } @@ -5305,7 +5506,7 @@ sub make_frame { # 2. The current .html filename is renamed to be the contents panel rename( $html_filename, $src_filename ) - or die "Cannot rename $html_filename to $src_filename:$!\n"; + or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n"; # 3. Then use the original html filename for the frame write_frame_html( @@ -5319,7 +5520,7 @@ sub write_toc_html { # write a separate html table of contents file for frames my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; my $fh = IO::File->new( $toc_filename, 'w' ) - or die "Cannot open $toc_filename:$!\n"; + or Perl::Tidy::Die "Cannot open $toc_filename:$!\n"; $fh->print(< @@ -5349,7 +5550,7 @@ sub write_frame_html { ) = @_; my $fh = IO::File->new( $frame_filename, 'w' ) - or die "Cannot open $toc_basename:$!\n"; + or Perl::Tidy::Die "Cannot open $toc_basename:$!\n"; $fh->print(< 0; - use constant FORMATTER_DEBUG_FLAG_BREAK => 0; - use constant FORMATTER_DEBUG_FLAG_CI => 0; - use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; - use constant FORMATTER_DEBUG_FLAG_FORCE => 0; - use constant FORMATTER_DEBUG_FLAG_LIST => 0; - use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; - use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; - use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; - use constant FORMATTER_DEBUG_FLAG_STORE => 0; - use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; - use constant FORMATTER_DEBUG_FLAG_WHITE => 0; + use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0; + use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0; + use constant FORMATTER_DEBUG_FLAG_BOND => 0; + use constant FORMATTER_DEBUG_FLAG_BREAK => 0; + use constant FORMATTER_DEBUG_FLAG_CI => 0; + use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; + use constant FORMATTER_DEBUG_FLAG_FORCE => 0; + use constant FORMATTER_DEBUG_FLAG_LIST => 0; + use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; + use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; + use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; + use constant FORMATTER_DEBUG_FLAG_STORE => 0; + use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; + use constant FORMATTER_DEBUG_FLAG_WHITE => 0; my $debug_warning = sub { - print "FORMATTER_DEBUGGING with key $_[0]\n"; + print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n"; }; - FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); - FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); - FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); - FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); - FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); - FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); - FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); - FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); - FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); - FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); - FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); - FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); + FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE'); + FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES'); + FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); + FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); + FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); + FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); + FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); + FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); + FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); + FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); + FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); + FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); + FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); + FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); } use Carp; @@ -5879,7 +6084,8 @@ use vars qw{ @container_environment_to_go @bond_strength_to_go @forced_breakpoint_to_go - @lengths_to_go + @token_lengths_to_go + @summed_lengths_to_go @levels_to_go @leading_spaces_to_go @reduced_spaces_to_go @@ -5892,6 +6098,8 @@ use vars qw{ @old_breakpoint_to_go @tokens_to_go @types_to_go + @inext_to_go + @iprev_to_go %saved_opening_indentation @@ -5907,6 +6115,8 @@ use vars qw{ @nonblank_lines_at_depth $starting_in_quote $ending_in_quote + @whitespace_level_stack + $whitespace_last_level $in_format_skipping_section $format_skipping_pattern_begin @@ -5953,6 +6163,9 @@ use vars qw{ $closing_side_comment_prefix_pattern $closing_side_comment_list_pattern + $blank_lines_after_opening_block_pattern + $blank_lines_before_closing_block_pattern + $last_nonblank_token $last_nonblank_type $last_last_nonblank_token @@ -5978,7 +6191,7 @@ use vars qw{ %is_assignment %is_chain_operator %is_if_unless_and_or_last_next_redo_return - %is_until_while_for_if_elsif_else + %ok_to_add_semicolon_for_block_type @has_broken_sublist @dont_align @@ -6032,6 +6245,7 @@ use vars qw{ $rOpts_line_up_parentheses $rOpts_maximum_fields_per_table $rOpts_maximum_line_length + $rOpts_variable_maximum_line_length $rOpts_short_concatenation_item_length $rOpts_keep_old_blank_lines $rOpts_ignore_old_breakpoints @@ -6039,8 +6253,10 @@ use vars qw{ $rOpts_space_function_paren $rOpts_space_keyword_paren $rOpts_keep_interior_semicolons - - $half_maximum_line_length + $rOpts_ignore_side_comment_lengths + $rOpts_stack_closing_block_brace + $rOpts_whitespace_cycle + $rOpts_tight_secret_operators %is_opening_type %is_closing_type @@ -6061,6 +6277,9 @@ use vars qw{ %is_opening_type %is_closing_token %is_opening_token + + $SUB_PATTERN + $ASUB_PATTERN }; BEGIN { @@ -6098,10 +6317,6 @@ BEGIN { @_ = qw(is if unless and or err last next redo return); @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_); - # always break after a closing curly of these block types: - @_ = qw(until while for if elsif else); - @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_); - @_ = qw(last next redo return); @is_last_next_redo_return{@_} = (1) x scalar(@_); @@ -6135,6 +6350,20 @@ BEGIN { unless while until for foreach given when default); @is_block_without_semicolon{@_} = (1) x scalar(@_); + # We will allow semicolons to be added within these block types + # as well as sub and package blocks. + # NOTES: + # 1. Note that these keywords are omitted: + # switch case given when default sort map grep + # 2. It is also ok to add for sub and package blocks and a labeled block + # 3. But not okay for other perltidy types including: + # { } ; G t + # 4. Test files: blktype.t, blktype1.t, semicolon.t + @_ = + qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else + unless do while until eval for foreach ); + @ok_to_add_semicolon_for_block_type{@_} = (1) x scalar(@_); + # 'L' is token for opening { at hash key @_ = qw" L { ( [ "; @is_opening_type{@_} = (1) x scalar(@_); @@ -6148,6 +6377,16 @@ BEGIN { @_ = qw" } ) ] "; @is_closing_token{@_} = (1) x scalar(@_); + + # Patterns for standardizing matches to block types for regular subs and + # anonymous subs. Examples + # 'sub process' is a named sub + # 'sub ::m' is a named sub + # 'sub' is an anonymous sub + # 'sub:' is a label, not a sub + # 'substr' is a keyword + $SUB_PATTERN = '^sub\s+(::|\w)'; + $ASUB_PATTERN = '^sub$'; } # whitespace codes @@ -6190,6 +6429,22 @@ sub trim { return $_[0]; } +sub max { + my $max = shift; + foreach (@_) { + $max = ( $max < $_ ) ? $_ : $max; + } + return $max; +} + +sub min { + my $min = shift; + foreach (@_) { + $min = ( $min > $_ ) ? $_ : $min; + } + return $min; +} + sub split_words { # given a string containing words separated by whitespace, @@ -6302,7 +6557,8 @@ sub new { @container_environment_to_go = (); @bond_strength_to_go = (); @forced_breakpoint_to_go = (); - @lengths_to_go = (); # line length to start of ith token + @summed_lengths_to_go = (); # line length to start of ith token + @token_lengths_to_go = (); @levels_to_go = (); @matching_token_to_go = (); @mate_index_to_go = (); @@ -6315,6 +6571,11 @@ sub new { @types_to_go = (); @leading_spaces_to_go = (); @reduced_spaces_to_go = (); + @inext_to_go = (); + @iprev_to_go = (); + + @whitespace_level_stack = (); + $whitespace_last_level = -1; @dont_align = (); @has_broken_sublist = (); @@ -6415,7 +6676,7 @@ sub prepare_for_new_input_lines { $forced_breakpoint_count = 0; $forced_breakpoint_undo_count = 0; $rbrace_follower = undef; - $lengths_to_go[0] = 0; + $summed_lengths_to_go[0] = 0; $old_line_count_in_batch = 1; $comma_count_in_batch = 0; $starting_in_quote = 0; @@ -6485,6 +6746,7 @@ sub write_line { # the user may be using this section for any purpose whatsoever if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } + if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// } if ( !$skip_line && $line_type eq 'POD_START' && !$saw_END_or_DATA_ ) @@ -6525,7 +6787,9 @@ sub leading_spaces_to_go { # return the number of indentation spaces for a token in the output stream; # these were previously stored by 'set_leading_whitespace'. - return get_SPACES( $leading_spaces_to_go[ $_[0] ] ); + my $ii = shift; + if ( $ii < 0 ) { $ii = 0 } + return get_SPACES( $leading_spaces_to_go[$ii] ); } @@ -6588,7 +6852,46 @@ sub set_leading_whitespace { # define: space count of leading string which would apply if it # were the first token of a new line. - my ( $level, $ci_level, $in_continued_quote ) = @_; + my ( $level_abs, $ci_level, $in_continued_quote ) = @_; + + # Adjust levels if necessary to recycle whitespace: + # given $level_abs, the absolute level + # define $level, a possibly reduced level for whitespace + my $level = $level_abs; + if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) { + if ( $level_abs < $whitespace_last_level ) { + pop(@whitespace_level_stack); + } + if ( !@whitespace_level_stack ) { + push @whitespace_level_stack, $level_abs; + } + elsif ( $level_abs > $whitespace_last_level ) { + $level = $whitespace_level_stack[-1] + + ( $level_abs - $whitespace_last_level ); + + if ( + # 1 Try to break at a block brace + ( + $level > $rOpts_whitespace_cycle + && $last_nonblank_type eq '{' + && $last_nonblank_token eq '{' + ) + + # 2 Then either a brace or bracket + || ( $level > $rOpts_whitespace_cycle + 1 + && $last_nonblank_token =~ /^[\{\[]$/ ) + + # 3 Then a paren too + || $level > $rOpts_whitespace_cycle + 2 + ) + { + $level = 1; + } + push @whitespace_level_stack, $level; + } + $level = $whitespace_level_stack[-1]; + } + $whitespace_last_level = $level_abs; # modify for -bli, which adds one continuation indentation for # opening braces @@ -6669,6 +6972,7 @@ sub set_leading_whitespace { ##my $too_close = ($i_test==$max_index_to_go-1); my $test_position = total_line_length( $i_test, $max_index_to_go ); + my $mll = maximum_line_length($i_test); if ( @@ -6676,12 +6980,13 @@ sub set_leading_whitespace { ##!$too_close && # if we are beyond the midpoint - $gnu_position_predictor > $half_maximum_line_length + $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2 - # or we are beyont the 1/4 point and there was an old + # or we are beyond the 1/4 point and there was an old # break at the equals || ( - $gnu_position_predictor > $half_maximum_line_length / 2 + $gnu_position_predictor > + $mll - $rOpts_maximum_line_length * 3 / 4 && ( $old_breakpoint_to_go[$last_equals] || ( $last_equals > 0 @@ -6703,6 +7008,9 @@ sub set_leading_whitespace { } } + my $halfway = + maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2; + # Check for decreasing depth .. # Note that one token may have both decreasing and then increasing # depth. For example, (level, ci) can go from (1,1) to (2,0). So, @@ -6896,10 +7204,8 @@ sub set_leading_whitespace { # to this minimum standard indentation. But the most deeply # nested container will still probably be able to shift its # parameters to the right for proper alignment, so in most - # cases this will not be noticable. - if ( $available_space > 0 - && $space_count > $half_maximum_line_length ) - { + # cases this will not be noticeable. + if ( $available_space > 0 && $space_count > $halfway ) { $gnu_stack[$max_gnu_stack_index] ->tentatively_decrease_AVAILABLE_SPACES($available_space); } @@ -6963,7 +7269,7 @@ sub set_leading_whitespace { $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ # and it is significantly to the right - || $gnu_position_predictor > $half_maximum_line_length + || $gnu_position_predictor > $halfway ) ) ) @@ -6993,8 +7299,8 @@ sub set_leading_whitespace { total_line_length( $line_start_index_to_go, $max_index_to_go ); } else { - $gnu_position_predictor = $space_count + - token_sequence_length( $max_index_to_go, $max_index_to_go ); + $gnu_position_predictor = + $space_count + $token_lengths_to_go[$max_index_to_go]; } # store the indentation object for this token @@ -7024,7 +7330,7 @@ sub check_for_long_gnu_style_lines { # keep 2 extra free because they are needed in some cases # (result of trial-and-error testing) my $spaces_needed = - $gnu_position_predictor - $rOpts_maximum_line_length + 2; + $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2; return if ( $spaces_needed <= 0 ); @@ -7099,7 +7405,7 @@ sub check_for_long_gnu_style_lines { sub finish_lp_batch { - # This routine is called once after each each output stream batch is + # This routine is called once after each output stream batch is # finished to undo indentation for all incomplete -lp # indentation levels. It is too risky to leave a level open, # because then we can't backtrack in case of a long line to follow. @@ -7171,35 +7477,48 @@ sub reduce_lp_indentation { sub token_sequence_length { - # return length of tokens ($ifirst .. $ilast) including first & last - # returns 0 if $ifirst > $ilast - my $ifirst = shift; - my $ilast = shift; - return 0 if ( $ilast < 0 || $ifirst > $ilast ); - return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 ); - return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst]; + # return length of tokens ($ibeg .. $iend) including $ibeg & $iend + # returns 0 if $ibeg > $iend (shouldn't happen) + my ( $ibeg, $iend ) = @_; + return 0 if ( $iend < 0 || $ibeg > $iend ); + return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 ); + return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg]; } sub total_line_length { - # return length of a line of tokens ($ifirst .. $ilast) - my $ifirst = shift; - my $ilast = shift; - if ( $ifirst < 0 ) { $ifirst = 0 } + # return length of a line of tokens ($ibeg .. $iend) + my ( $ibeg, $iend ) = @_; + return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend ); +} + +sub maximum_line_length_for_level { + + # return maximum line length for line starting with a given level + my $maximum_line_length = $rOpts_maximum_line_length; + + # Modify if -vmll option is selected + if ($rOpts_variable_maximum_line_length) { + my $level = shift; + if ( $level < 0 ) { $level = 0 } + $maximum_line_length += $level * $rOpts_indent_columns; + } + return $maximum_line_length; +} + +sub maximum_line_length { + + # return maximum line length for line starting with the token at given index + return maximum_line_length_for_level( $levels_to_go[ $_[0] ] ); - return leading_spaces_to_go($ifirst) + - token_sequence_length( $ifirst, $ilast ); } sub excess_line_length { - # return number of characters by which a line of tokens ($ifirst..$ilast) + # return number of characters by which a line of tokens ($ibeg..$iend) # exceeds the allowable line length. - my $ifirst = shift; - my $ilast = shift; - if ( $ifirst < 0 ) { $ifirst = 0 } - return leading_spaces_to_go($ifirst) + - token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length; + my ( $ibeg, $iend ) = @_; + return total_line_length( $ibeg, $iend ) - maximum_line_length($ibeg); } sub finish_formatting { @@ -7288,6 +7607,11 @@ sub finish_formatting { write_logfile_entry("No indentation disagreement seen\n"); } } + if ($first_tabbing_disagreement) { + write_logfile_entry( +"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n" + ); + } write_logfile_entry("\n"); $vertical_aligner_object->report_anything_unusual(); @@ -7300,7 +7624,6 @@ sub check_options { # This routine is called to check the Opts hash after it is defined ($rOpts) = @_; - my ( $tabbing_string, $tab_msg ); make_static_block_comment_pattern(); make_static_side_comment_pattern(); @@ -7338,6 +7661,7 @@ sub check_options { make_bli_pattern(); make_block_brace_vertical_tightness_pattern(); + make_blank_line_pattern(); if ( $rOpts->{'line-up-parentheses'} ) { @@ -7345,7 +7669,7 @@ sub check_options { || !$rOpts->{'add-newlines'} || !$rOpts->{'delete-old-newlines'} ) { - warn <{'line-up-parentheses'} && $rOpts->{'tabs'} ) { - warn <{'tabs'} = 0; } - # Likewise, tabs are not compatable with outdenting.. + # Likewise, tabs are not compatible with outdenting.. if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { - warn <{'tabs'} = 0; } if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { - warn <{'tabs'} = 0; @@ -7403,7 +7727,7 @@ EOM $outdent_keyword{$_} = 1; } else { - warn "ignoring '$_' in -okwl list; not a perl keyword"; + Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword"; } } @@ -7425,18 +7749,18 @@ EOM } if ( $rOpts->{'dump-want-left-space'} ) { dump_want_left_space(*STDOUT); - exit 0; + Perl::Tidy::Exit 0; } if ( $rOpts->{'dump-want-right-space'} ) { dump_want_right_space(*STDOUT); - exit 0; + Perl::Tidy::Exit 0; } # default keywords for which space is introduced before an opening paren # (at present, including them messes up vertical alignment) @_ = qw(my local our and or err eq ne if else elsif until - unless while for foreach return switch case given when); + unless while for foreach return switch case given when catch); @space_after_keyword{@_} = (1) x scalar(@_); # first remove any or all of these if desired @@ -7529,8 +7853,8 @@ EOM push @_, ','; @is_anon_sub_brace_follower{@_} = (1) x scalar(@_); - # what can follow a one-line anonynomous sub closing curly: - # one-line anonumous subs also have ']' here... + # what can follow a one-line anonymous sub closing curly: + # one-line anonymous subs also have ']' here... # see tk3.t and PP.pm @_ = qw# ; : => or and && || ) ] ~~ !~~ #; push @_, ','; @@ -7560,6 +7884,13 @@ EOM $rOpts->{'long-block-line-count'} = 1000000; } + my $enc = $rOpts->{'character-encoding'}; + if ( $enc && $enc !~ /^(none|utf8)$/i ) { + Perl::Tidy::Die <{'output-line-ending'}; if ($ole) { my %endings = ( @@ -7568,16 +7899,38 @@ EOM mac => "\015", unix => "\012", ); - $ole = lc $ole; - unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { - my $str = join " ", keys %endings; - die < 'dos', + "\015\012" => 'win', + "\015" => 'mac', + "\012" => 'unix', + ); + + if ( defined( $endings_inverted{$ole} ) ) { + + # we already have valid line ending, nothing more to do + } + else { + $ole = lc $ole; + unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { + my $str = join " ", keys %endings; + Perl::Tidy::Die <{'preserve-line-endings'} ) { - warn "Ignoring -ple; conflicts with -ole\n"; - $rOpts->{'preserve-line-endings'} = undef; + } + if ( $rOpts->{'preserve-line-endings'} ) { + Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n"; + $rOpts->{'preserve-line-endings'} = undef; + } } } @@ -7627,15 +7980,21 @@ EOM $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; + $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'}; + + $rOpts_variable_maximum_line_length = + $rOpts->{'variable-maximum-line-length'}; $rOpts_short_concatenation_item_length = $rOpts->{'short-concatenation-item-length'}; + $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; $rOpts_format_skipping = $rOpts->{'format-skipping'}; $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; - $half_maximum_line_length = $rOpts_maximum_line_length / 2; + $rOpts_ignore_side_comment_lengths = + $rOpts->{'ignore-side-comment-lengths'}; # Note that both opening and closing tokens can access the opening # and closing flags of their container types. @@ -7657,6 +8016,8 @@ EOM ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, ); + $rOpts_tight_secret_operators = $rOpts->{'tight-secret-operators'}; + # assume flag for '>' same as ')' for closing qw quotes %closing_token_indentation = ( ')' => $rOpts->{'closing-paren-indentation'}, @@ -7689,6 +8050,7 @@ EOM '}' => $rOpts->{'stack-closing-hash-brace'}, ']' => $rOpts->{'stack-closing-square-bracket'}, ); + $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'}; } sub make_static_block_comment_pattern { @@ -7705,14 +8067,14 @@ sub make_static_block_comment_pattern { # user may give leading caret to force matching left comments only if ( $prefix !~ /^\^#/ ) { if ( $prefix !~ /^#/ ) { - die + Perl::Tidy::Die "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"; } $pattern = '^\s*' . $prefix; } eval "'##'=~/$pattern/"; if ($@) { - die + Perl::Tidy::Die "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"; } $static_block_comment_pattern = $pattern; @@ -7725,12 +8087,13 @@ sub make_format_skipping_pattern { unless ($param) { $param = $default } $param =~ s/^\s*//; if ( $param !~ /^#/ ) { - die "ERROR: the $opt_name parameter '$param' must begin with '#'\n"; + Perl::Tidy::Die + "ERROR: the $opt_name parameter '$param' must begin with '#'\n"; } my $pattern = '^' . $param . '\s'; eval "'#'=~/$pattern/"; if ($@) { - die + Perl::Tidy::Die "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"; } return $pattern; @@ -7764,7 +8127,6 @@ sub make_block_brace_vertical_tightness_pattern { # turn any input list into a regex for recognizing selected block types $block_brace_vertical_tightness_pattern = '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; - if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) && $rOpts->{'block-brace-vertical-tightness-list'} ) { @@ -7774,6 +8136,23 @@ sub make_block_brace_vertical_tightness_pattern { } } +sub make_blank_line_pattern { + + $blank_lines_before_closing_block_pattern = $SUB_PATTERN; + my $key = 'blank-lines-before-closing-block-list'; + if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) { + $blank_lines_before_closing_block_pattern = + make_block_pattern( '-blbcl', $rOpts->{$key} ); + } + + $blank_lines_after_opening_block_pattern = $SUB_PATTERN; + $key = 'blank-lines-after-opening-block-list'; + if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) { + $blank_lines_after_opening_block_pattern = + make_block_pattern( '-blaol', $rOpts->{$key} ); + } +} + sub make_block_pattern { # given a string of block-type keywords, return a regex to match them @@ -7786,15 +8165,29 @@ sub make_block_pattern { # input string: "if else elsif unless while for foreach do : sub"; # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; + # Minor Update: + # + # To distinguish between anonymous subs and named subs, use 'sub' to + # indicate a named sub, and 'asub' to indicate an anonymous sub + my ( $abbrev, $string ) = @_; my @list = split_words($string); my @words = (); my %seen; for my $i (@list) { + if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern } next if $seen{$i}; $seen{$i} = 1; if ( $i eq 'sub' ) { } + elsif ( $i eq 'asub' ) { + } + elsif ( $i eq ';' ) { + push @words, ';'; + } + elsif ( $i eq '{' ) { + push @words, '\{'; + } elsif ( $i eq ':' ) { push @words, '\w+:'; } @@ -7802,12 +8195,20 @@ sub make_block_pattern { push @words, $i; } else { - warn "unrecognized block type $i after $abbrev, ignoring\n"; + Perl::Tidy::Warn + "unrecognized block type $i after $abbrev, ignoring\n"; } } my $pattern = '(' . join( '|', @words ) . ')$'; + my $sub_patterns = ""; if ( $seen{'sub'} ) { - $pattern = '(' . $pattern . '|sub)'; + $sub_patterns .= '|' . $SUB_PATTERN; + } + if ( $seen{'asub'} ) { + $sub_patterns .= '|' . $ASUB_PATTERN; + } + if ($sub_patterns) { + $pattern = '(' . $pattern . $sub_patterns . ')'; } $pattern = '^' . $pattern; return $pattern; @@ -7825,7 +8226,7 @@ sub make_static_side_comment_pattern { my $pattern = '^' . $prefix; eval "'##'=~/$pattern/"; if ($@) { - die + Perl::Tidy::Die "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"; } $static_side_comment_pattern = $pattern; @@ -7866,12 +8267,13 @@ sub make_closing_side_comment_prefix { # shouldn't happen..must have screwed up escaping, above report_definite_bug(); - warn + Perl::Tidy::Warn "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"; # just warn and keep going with defaults - warn "Please consider using a simpler -cscp prefix\n"; - warn "Using default -cscp instead; please check output\n"; + Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n"; + Perl::Tidy::Warn + "Using default -cscp instead; please check output\n"; } else { $csc_prefix = $test_csc_prefix; @@ -7956,9 +8358,10 @@ EOM # my $size=-s::SINK if $file; <==OK but we won't do it # don't join something like: for bla::bla:: abc # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl - ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) ) + ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' ) + && ( $tokenr =~ /^([\'\w]|\:\:)/ ) ) - # do not combine a number with a concatination dot + # do not combine a number with a concatenation dot # example: pom.caputo: # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n"); || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) ) @@ -7983,7 +8386,7 @@ EOM # || ($tokenr eq '-') # keep a space between a quote and a bareword to prevent the - # bareword from becomming a quote modifier. + # bareword from becoming a quote modifier. || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) # keep a space between a token ending in '$' and any word; @@ -8017,8 +8420,8 @@ EOM # keep paren separate in 'use Foo::Bar ()' || ( $tokenr eq '(' - && $typel eq 'w' - && $typell eq 'k' + && $typel eq 'w' + && $typell eq 'k' && $tokenll eq 'use' ) # keep any space between filehandle and paren: @@ -8066,11 +8469,88 @@ EOM # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY); || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) + # space stacked labels (TODO: check if really necessary) + || ( $typel eq 'J' && $typer eq 'J' ) + ; # the value of this long logic sequence is the result we want return $result; } } +{ + my %secret_operators; + my %is_leading_secret_token; + + BEGIN { + + # token lists for perl secret operators as compiled by Philippe Bruhat + # at: https://metacpan.org/module/perlsecret + %secret_operators = ( + 'Goatse' => [qw#= ( ) =#], #=( )= + 'Venus1' => [qw#0 +#], # 0+ + 'Venus2' => [qw#+ 0#], # +0 + 'Enterprise' => [qw#) x ! !#], # ()x!! + 'Kite1' => [qw#~ ~ <>#], # ~~<> + 'Kite2' => [qw#~~ <>#], # ~~<> + 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=> + ); + + # The following operators and constants are not included because they + # are normally kept tight by perltidy: + # !! ~~ <~> + # + + # Make a lookup table indexed by the first token of each operator: + # first token => [list, list, ...] + foreach my $value ( values(%secret_operators) ) { + my $tok = $value->[0]; + push @{ $is_leading_secret_token{$tok} }, $value; + } + } + + sub secret_operator_whitespace { + + my ( $jmax, $rtokens, $rtoken_type, $rwhite_space_flag ) = @_; + + # Loop over all tokens in this line + my ( $j, $token, $type ); + for ( $j = 0 ; $j <= $jmax ; $j++ ) { + + $token = $$rtokens[$j]; + $type = $$rtoken_type[$j]; + + # Skip unless this token might start a secret operator + next if ( $type eq 'b' ); + next unless ( $is_leading_secret_token{$token} ); + + # Loop over all secret operators with this leading token + foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) { + my $jend = $j - 1; + foreach my $tok ( @{$rpattern} ) { + $jend++; + $jend++ + + if ( $jend <= $jmax && $$rtoken_type[$jend] eq 'b' ); + if ( $jend > $jmax || $tok ne $$rtokens[$jend] ) { + $jend = undef; + last; + } + } + + if ($jend) { + + # set flags to prevent spaces within this operator + for ( my $jj = $j + 1 ; $jj <= $jend ; $jj++ ) { + $rwhite_space_flag->[$jj] = WS_NO; + } + $j = $jend; + last; + } + } ## End Loop over all operators + } ## End loop over all tokens + } # End sub +} + sub set_white_space_flag { # This routine examines each pair of nonblank tokens and @@ -8079,9 +8559,9 @@ sub set_white_space_flag { # $white_space_flag[$j] is a flag indicating whether a white space # BEFORE token $j is needed, with the following values: # - # -1 do not want a space before token $j - # 0 optional space or $j is a whitespace - # 1 want a space before token $j + # WS_NO = -1 do not want a space before token $j + # WS_OPTIONAL= 0 optional space or $j is a whitespace + # WS_YES = 1 want a space before token $j # # # The values for the first token will be defined based @@ -8137,6 +8617,12 @@ sub set_white_space_flag { ; } ) ] R J ++ -- **= "; push( @spaces_right_side, ',' ); # avoids warning message + + # Note that we are in a BEGIN block here. Later in processing + # the values of %want_left_space and %want_right_space + # may be overridden by any user settings specified by the + # -wls and -wrs parameters. However the binary_whitespace_rules + # are hardwired and have priority. @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); @want_right_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); @@ -8146,12 +8632,16 @@ sub set_white_space_flag { (-1) x scalar(@spaces_right_side); @want_right_space{@spaces_right_side} = (1) x scalar(@spaces_right_side); - $want_left_space{'L'} = WS_NO; - $want_left_space{'->'} = WS_NO; - $want_right_space{'->'} = WS_NO; - $want_left_space{'**'} = WS_NO; - $want_right_space{'**'} = WS_NO; - + $want_left_space{'->'} = WS_NO; + $want_right_space{'->'} = WS_NO; + $want_left_space{'**'} = WS_NO; + $want_right_space{'**'} = WS_NO; + $want_right_space{'CORE::'} = WS_NO; + + # These binary_ws_rules are hardwired and have priority over the above + # settings. It would be nice to allow adjustment by the user, + # but it would be complicated to specify. + # # hash type information must stay tightly bound # as in : ${xxxx} $binary_ws_rules{'i'}{'L'} = WS_NO; @@ -8170,6 +8660,7 @@ sub set_white_space_flag { $binary_ws_rules{'@'}{'L'} = WS_NO; $binary_ws_rules{'@'}{'{'} = WS_NO; $binary_ws_rules{'='}{'L'} = WS_YES; + $binary_ws_rules{'J'}{'J'} = WS_YES; # the following includes ') {' # as in : if ( xxx ) { yyy } @@ -8190,22 +8681,18 @@ sub set_white_space_flag { $binary_ws_rules{'R'}{'++'} = WS_NO; $binary_ws_rules{'R'}{'--'} = WS_NO; - ######################################################## - # should no longer be necessary (see niek.pl) - ##$binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label - ##$binary_ws_rules{'w'}{':'} = WS_NO; - ######################################################## $binary_ws_rules{'i'}{'Q'} = WS_YES; $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' - # FIXME: we need to split 'i' into variables and functions + # FIXME: we could to split 'i' into variables and functions # and have no space for functions but space for variables. For now, # I have a special patch in the special rules below $binary_ws_rules{'i'}{'('} = WS_NO; $binary_ws_rules{'w'}{'('} = WS_NO; $binary_ws_rules{'w'}{'{'} = WS_YES; - } + } ## end BEGIN block + my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_; my ( $last_token, $last_type, $last_block_type, $token, $type, $block_type ); @@ -8216,6 +8703,33 @@ sub set_white_space_flag { $token = $tokens_to_go[$max_index_to_go]; $type = $types_to_go[$max_index_to_go]; $block_type = $block_type_to_go[$max_index_to_go]; + + #--------------------------------------------------------------- + # Patch due to splitting of tokens with leading -> + #--------------------------------------------------------------- + # + # This routine is dealing with the raw tokens from the tokenizer, + # but to get started it needs the previous token, which will + # have been stored in the '_to_go' arrays. + # + # This patch avoids requiring two iterations to + # converge for cases such as the following, where a paren + # comes in on a line following a variable with leading arrow: + # $self->{main}->add_content_defer_opening + # ($name, $wmkf, $self->{attrs}, $self); + # In this case when we see the opening paren on line 2 we need + # to know if the last token on the previous line had an arrow, + # but it has already been split off so we have to add it back + # in to avoid getting an unwanted space before the paren. + if ( $type =~ /^[wi]$/ ) { + my $im = $iprev_to_go[$max_index_to_go]; + my $tm = ( $im >= 0 ) ? $types_to_go[$im] : ""; + if ( $tm eq '->' ) { $token = $tm . $token } + } + + #--------------------------------------------------------------- + # End patch due to splitting of tokens with leading -> + #--------------------------------------------------------------- } else { $token = ' '; @@ -8223,9 +8737,9 @@ sub set_white_space_flag { $block_type = ''; } - # loop over all tokens my ( $j, $ws ); + # main loop over all tokens to define the whitespace flags for ( $j = 0 ; $j <= $jmax ; $j++ ) { if ( $$rtoken_type[$j] eq 'b' ) { @@ -8243,8 +8757,8 @@ sub set_white_space_flag { $block_type = $$rblock_type[$j]; #--------------------------------------------------------------- - # section 1: - # handle space on the inside of opening braces + # Whitespace Rules Section 1: + # Handle space on the inside of opening braces. #--------------------------------------------------------------- # /^[L\{\(\[]$/ @@ -8278,20 +8792,22 @@ sub set_white_space_flag { } else { $tightness = $tightness{$last_token} } - #================================================================= - # Patch for fabrice_bug.pl - # We must always avoid spaces around a bare word beginning with ^ as in: - # my $before = ${^PREMATCH}; - # Because all of the following cause an error in perl: - # my $before = ${ ^PREMATCH }; - # my $before = ${ ^PREMATCH}; - # my $before = ${^PREMATCH }; - # So if brace tightness flag is -bt=0 we must temporarily reset to bt=1. - # Note that here we must set tightness=1 and not 2 so that the closing space - # is also avoided (via the $j_tight_closing_paren flag in coding) + #============================================================= + # Patch for test problem fabrice_bug.pl + # We must always avoid spaces around a bare word beginning + # with ^ as in: + # my $before = ${^PREMATCH}; + # Because all of the following cause an error in perl: + # my $before = ${ ^PREMATCH }; + # my $before = ${ ^PREMATCH}; + # my $before = ${^PREMATCH }; + # So if brace tightness flag is -bt=0 we must temporarily reset + # to bt=1. Note that here we must set tightness=1 and not 2 so + # that the closing space + # is also avoided (via the $j_tight_closing_paren flag in coding) if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 } - #================================================================= + #============================================================= if ( $tightness <= 0 ) { $ws = WS_YES; @@ -8326,6 +8842,10 @@ sub set_white_space_flag { # but watch out for this: [ [ ] (misc.t) && $last_token ne $token + + # double diamond is usually spaced + && $token ne '<<>>' + ) { @@ -8338,13 +8858,13 @@ sub set_white_space_flag { } } } - } # done with opening braces and brackets + } # end setting space flag inside opening tokens my $ws_1 = $ws if FORMATTER_DEBUG_FLAG_WHITE; #--------------------------------------------------------------- - # section 2: - # handle space on inside of closing brace pairs + # Whitespace Rules Section 2: + # Handle space on inside of closing brace pairs. #--------------------------------------------------------------- # /[\}\)\]R]/ @@ -8368,14 +8888,14 @@ sub set_white_space_flag { $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; } } - } + } # end setting space flag inside closing tokens my $ws_2 = $ws if FORMATTER_DEBUG_FLAG_WHITE; #--------------------------------------------------------------- - # section 3: - # use the binary table + # Whitespace Rules Section 3: + # Use the binary rule table. #--------------------------------------------------------------- if ( !defined($ws) ) { $ws = $binary_ws_rules{$last_type}{$type}; @@ -8384,8 +8904,8 @@ sub set_white_space_flag { if FORMATTER_DEBUG_FLAG_WHITE; #--------------------------------------------------------------- - # section 4: - # some special cases + # Whitespace Rules Section 4: + # Handle some special cases. #--------------------------------------------------------------- if ( $token eq '(' ) { @@ -8501,14 +9021,15 @@ sub set_white_space_flag { if FORMATTER_DEBUG_FLAG_WHITE; #--------------------------------------------------------------- - # section 5: - # default rules not covered above + # Whitespace Rules Section 5: + # Apply default rules not covered above. #--------------------------------------------------------------- - # if we fall through to here, - # look at the pre-defined hash tables for the two tokens, and - # if (they are equal) use the common value - # if (either is zero or undef) use the other - # if (either is -1) use it + + # If we fall through to here, look at the pre-defined hash tables for + # the two tokens, and: + # if (they are equal) use the common value + # if (either is zero or undef) use the other + # if (either is -1) use it # That is, # left vs right # 1 vs 1 --> 1 @@ -8563,12 +9084,18 @@ sub set_white_space_flag { if ( !defined($ws_2) ) { $ws_2 = "*" } if ( !defined($ws_3) ) { $ws_3 = "*" } if ( !defined($ws_4) ) { $ws_4 = "*" } - print + print STDOUT "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; }; + } ## end main loop + + if ($rOpts_tight_secret_operators) { + secret_operator_whitespace( $jmax, $rtokens, $rtoken_type, + \@white_space_flag ); } + return \@white_space_flag; -} +} ## end sub set_white_space_flag { # begin print_line_of_tokens @@ -8586,7 +9113,7 @@ sub set_white_space_flag { my $rnesting_blocks; my $in_quote; - my $python_indentation_level; + my $guessed_indentation_level; # These local token variables are stored by store_token_to_go: my $block_type; @@ -8644,6 +9171,35 @@ sub set_white_space_flag { } } + sub token_length { + + # Returns the length of a token, given: + # $token=text of the token + # $type = type + # $not_first_token = should be TRUE if this is not the first token of + # the line. It might the index of this token in an array. It is + # used to test for a side comment vs a block comment. + # Note: Eventually this should be the only routine determining the + # length of a token in this package. + my ( $token, $type, $not_first_token ) = @_; + my $token_length = length($token); + + # We mark lengths of side comments as just 1 if we are + # ignoring their lengths when setting line breaks. + $token_length = 1 + if ( $rOpts_ignore_side_comment_lengths + && $not_first_token + && $type eq '#' ); + return $token_length; + } + + sub rtoken_length { + + # return length of ith token in @{$rtokens} + my ($i) = @_; + return token_length( $$rtokens[$i], $$rtoken_type[$i], $i ); + } + # Routine to place the current token into the output stream. # Called once per output token. sub store_token_to_go { @@ -8673,14 +9229,31 @@ sub set_white_space_flag { ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; $levels_to_go[$max_index_to_go] = $level; $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; - $lengths_to_go[ $max_index_to_go + 1 ] = - $lengths_to_go[$max_index_to_go] + length($token); + + # link the non-blank tokens + my $iprev = $max_index_to_go - 1; + $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' ); + $iprev_to_go[$max_index_to_go] = $iprev; + $inext_to_go[$iprev] = $max_index_to_go + if ( $iprev >= 0 && $type ne 'b' ); + $inext_to_go[$max_index_to_go] = $max_index_to_go + 1; + + $token_lengths_to_go[$max_index_to_go] = + token_length( $token, $type, $max_index_to_go ); + + # We keep a running sum of token lengths from the start of this batch: + # summed_lengths_to_go[$i] = total length to just before token $i + # summed_lengths_to_go[$i+1] = total length to just after token $i + $summed_lengths_to_go[ $max_index_to_go + 1 ] = + $summed_lengths_to_go[$max_index_to_go] + + $token_lengths_to_go[$max_index_to_go]; # Define the indentation that this token would have if it started # a new line. We have to do this now because we need to know this # when considering one-line blocks. set_leading_whitespace( $level, $ci_level, $in_continued_quote ); + # remember previous nonblank tokens seen if ( $type ne 'b' ) { $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; @@ -8695,7 +9268,7 @@ sub set_white_space_flag { FORMATTER_DEBUG_FLAG_STORE && do { my ( $a, $b, $c ) = caller(); - print + print STDOUT "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; }; } @@ -8743,7 +9316,7 @@ sub set_white_space_flag { # processing. This routine decides if there should be # whitespace between each pair of non-white tokens, so later # routines only need to decide on any additional line breaks. - # Any whitespace is initally a single space character. Later, + # Any whitespace is initially a single space character. Later, # the vertical aligner may expand that to be multiple space # characters if necessary for alignment. @@ -8767,8 +9340,8 @@ sub set_white_space_flag { $line_of_tokens->{_starting_in_quote}; $in_quote = $line_of_tokens->{_ending_in_quote}; $ending_in_quote = $in_quote; - $python_indentation_level = - $line_of_tokens->{_python_indentation_level}; + $guessed_indentation_level = + $line_of_tokens->{_guessed_indentation_level}; my $j; my $j_next; @@ -8798,19 +9371,6 @@ sub set_white_space_flag { $last_line_had_side_comment = 0; return; } - - # prior to version 20010406, perltidy had a bug which placed - # continuation indentation before the last line of some multiline - # quotes and patterns -- exactly the lines passing this way. - # To help find affected lines in scripts run with these - # versions, run with '-chk', and it will warn of any quotes or - # patterns which might have been modified by these early - # versions. - if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) { - warning( -"-chk: please check this line for extra leading whitespace\n" - ); - } } # Write line verbatim if we are in a formatting skip section @@ -8893,11 +9453,12 @@ sub set_white_space_flag { } # create a hanging side comment if appropriate + my $is_hanging_side_comment; if ( $jmax == 0 - && $$rtoken_type[0] eq '#' # only token is a comment - && $last_line_had_side_comment # last line had side comment - && $input_line =~ /^\s/ # there is some leading space + && $$rtoken_type[0] eq '#' # only token is a comment + && $last_line_had_side_comment # last line had side comment + && $input_line =~ /^\s/ # there is some leading space && !$is_static_block_comment # do not make static comment hanging && $rOpts->{'hanging-side-comments'} # user is allowing # hanging side comments @@ -8908,6 +9469,7 @@ sub set_white_space_flag { # We will insert an empty qw string at the start of the token list # to force this comment to be a side comment. The vertical aligner # should then line it up with the previous side comment. + $is_hanging_side_comment = 1; unshift @$rtoken_type, 'q'; unshift @$rtokens, ''; unshift @$rlevels, $$rlevels[0]; @@ -8995,12 +9557,11 @@ sub set_white_space_flag { # Note: this test is placed here because we know the continuation flag # at this point, which allows us to avoid non-meaningful checks. my $structural_indentation_level = $$rlevels[0]; - compare_indentation_levels( $python_indentation_level, + compare_indentation_levels( $guessed_indentation_level, $structural_indentation_level ) - unless ( $python_indentation_level < 0 - || ( $$rci_levels[0] > 0 ) - || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' ) - ); + unless ( $is_hanging_side_comment + || $$rci_levels[0] > 0 + || $guessed_indentation_level == 0 && $$rtoken_type[0] eq 'Q' ); # Patch needed for MakeMaker. Do not break a statement # in which $VERSION may be calculated. See MakeMaker.pm; @@ -9011,15 +9572,23 @@ sub set_white_space_flag { # *VERSION = \'1.01'; # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; # We will pass such a line straight through without breaking - # it unless -npvl is used + # it unless -npvl is used. + + # Patch for problem reported in RT #81866, where files + # had been flattened into a single line and couldn't be + # tidied without -npvl. There are two parts to this patch: + # First, it is not done for a really long line (80 tokens for now). + # Second, we will only allow up to one semicolon + # before the VERSION. We need to allow at least one semicolon + # for statements like this: + # require Exporter; our $VERSION = $Exporter::VERSION; + # where both statements must be on a single line for MakeMaker my $is_VERSION_statement = 0; - - if ( - !$saw_VERSION_in_this_file - && $input_line =~ /VERSION/ # quick check to reject most lines - && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ - ) + if ( !$saw_VERSION_in_this_file + && $jmax < 80 + && $input_line =~ + /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ ) { $saw_VERSION_in_this_file = 1; $is_VERSION_statement = 1; @@ -9034,10 +9603,20 @@ sub set_white_space_flag { # qw lines will still go out at the end of this routine. if ( $rOpts->{'indent-only'} ) { flush(); - trim($input_line); + my $line = $input_line; + + # delete side comments if requested with -io, but + # we will not allow deleting of closing side comments with -io + # because the coding would be more complex + if ( $rOpts->{'delete-side-comments'} + && $rtoken_type->[$jmax] eq '#' ) + { + $line = join "", @{$rtokens}[ 0 .. $jmax - 1 ]; + } + trim($line); extract_token(0); - $token = $input_line; + $token = $line; $type = 'q'; $block_type = ""; $container_type = ""; @@ -9053,11 +9632,6 @@ sub set_white_space_flag { ($rwhite_space_flag) = set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type ); - # find input tabbing to allow checks for tabbing disagreement - ## not used for now - ##$input_line_tabbing = ""; - ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; } - # if the buffer hasn't been flushed, add a leading space if # necessary to keep essential whitespace. This is really only # necessary if we are squeezing out all ws. @@ -9093,11 +9667,22 @@ sub set_white_space_flag { } # This is a good place to kill incomplete one-line blocks - if ( ( $semicolons_before_block_self_destruct == 0 ) - && ( $max_index_to_go >= 0 ) - && ( $types_to_go[$max_index_to_go] eq ';' ) - && ( $$rtokens[0] ne '}' ) ) + if ( + ( + ( $semicolons_before_block_self_destruct == 0 ) + && ( $max_index_to_go >= 0 ) + && ( $types_to_go[$max_index_to_go] eq ';' ) + && ( $$rtokens[0] ne '}' ) + ) + + # Patch for RT #98902. Honor request to break at old commas. + || ( $rOpts_break_at_old_comma_breakpoints + && $max_index_to_go >= 0 + && $types_to_go[$max_index_to_go] eq ',' ) + ) { + $forced_breakpoint_to_go[$max_index_to_go] = 1 + if ($rOpts_break_at_old_comma_breakpoints); destroy_one_line_block(); output_line_to_go(); } @@ -9165,7 +9750,38 @@ sub set_white_space_flag { $token =~ s/\s*//g; } - if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g } + # Split identifiers with leading arrows, inserting blanks if + # necessary. It is easier and safer here than in the + # tokenizer. For example '->new' becomes two tokens, '->' and + # 'new' with a possible blank between. + # + # Note: there is a related patch in sub set_white_space_flag + if ( $token =~ /^\-\>(.*)$/ && $1 ) { + my $token_save = $1; + my $type_save = $type; + + # store a blank to left of arrow if necessary + if ( $max_index_to_go >= 0 + && $types_to_go[$max_index_to_go] ne 'b' + && $want_left_space{'->'} == WS_YES ) + { + insert_new_token_to_go( ' ', 'b', $slevel, + $no_internal_newlines ); + } + + # then store the arrow + $token = '->'; + $type = $token; + store_token_to_go(); + + # then reset the current token to be the remainder, + # and reset the whitespace flag according to the arrow + $$rwhite_space_flag[$j] = $want_right_space{'->'}; + $token = $token_save; + $type = $type_save; + } + + if ( $token =~ /$SUB_PATTERN/ ) { $token =~ s/\s+/ /g } # trim identifiers of trailing blanks which can occur # under some unusual circumstances, such as if the @@ -9196,7 +9812,7 @@ sub set_white_space_flag { $token =~ /^(s|tr|y|m|\/)/ && $last_nonblank_token =~ /^(=|==|!=)$/ - # precededed by simple scalar + # preceded by simple scalar && $last_last_nonblank_type eq 'i' && $last_last_nonblank_token =~ /^\$/ @@ -9204,7 +9820,7 @@ sub set_white_space_flag { # (but give complaint if we can's see far enough ahead) && $next_nonblank_token =~ /^[; \)\}]$/ - # scalar is not decleared + # scalar is not declared && !( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(my|our|local)$/ @@ -9304,11 +9920,12 @@ sub set_white_space_flag { my $want_break = # use -bl flag if not a sub block of any type - $block_type !~ /^sub/ + #$block_type !~ /^sub/ + $block_type !~ /^sub\b/ ? $rOpts->{'opening-brace-on-new-line'} # use -sbl flag for a named sub block - : $block_type !~ /^sub\W*$/ + : $block_type !~ /$ASUB_PATTERN/ ? $rOpts->{'opening-sub-brace-on-new-line'} # use -asbl flag for an anonymous sub block @@ -9390,24 +10007,14 @@ sub set_white_space_flag { # and we don't have one && ( $last_nonblank_type ne ';' ) - # patch until some block type issues are fixed: - # Do not add semi-colon for block types '{', - # '}', and ';' because we cannot be sure yet - # that this is a block and not an anonomyous - # hash (blktype.t, blktype1.t) - && ( $block_type !~ /^[\{\};]$/ ) - - # patch: and do not add semi-colons for recently - # added block types (see tmp/semicolon.t) - && ( $block_type !~ - /^(switch|case|given|when|default)$/ ) - - # it seems best not to add semicolons in these - # special block types: sort|map|grep - && ( !$is_sort_map_grep{$block_type} ) - # and we are allowed to do so. && $rOpts->{'add-semicolons'} + + # and we are allowed to for this block type + && ( $ok_to_add_semicolon_for_block_type{$block_type} + || $block_type =~ /^(sub|package)/ + || $block_type =~ /^\w+\:$/ ) + ) { @@ -9477,7 +10084,13 @@ sub set_white_space_flag { # But make a line break if the curly ends a # significant block: if ( - $is_block_without_semicolon{$block_type} + ( + $is_block_without_semicolon{$block_type} + + # Follow users break point for + # one line block types U & G, such as a 'try' block + || $is_one_line_block =~ /^[UG]$/ && $j == $jmax + ) # if needless semicolon follows we handle it later && $next_nonblank_token ne ';' @@ -9508,7 +10121,7 @@ sub set_white_space_flag { } # anonymous sub - elsif ( $block_type =~ /^sub\W*$/ ) { + elsif ( $block_type =~ /$ASUB_PATTERN/ ) { if ($is_one_line_block) { $rbrace_follower = \%is_anon_sub_1_brace_follower; @@ -9595,7 +10208,7 @@ sub set_white_space_flag { && ( $is_block_without_semicolon{ $last_nonblank_block_type} - || $last_nonblank_block_type =~ /^sub\s+\w/ + || $last_nonblank_block_type =~ /$SUB_PATTERN/ || $last_nonblank_block_type =~ /^\w+:$/ ) ) || $last_nonblank_type eq ';' @@ -9698,8 +10311,8 @@ sub set_white_space_flag { # if this is a VERSION statement || $is_VERSION_statement - # to keep a label on one line if that is how it is now - || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) ) + # to keep a label at the end of a line + || $type eq 'J' # if we are instructed to keep all old line breaks || !$rOpts->{'delete-old-newlines'} @@ -9713,8 +10326,8 @@ sub set_white_space_flag { if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) { $old_breakpoint_to_go[$max_index_to_go] = 1; } - } # end sub print_line_of_tokens -} # end print_line_of_tokens + } ## end sub print_line_of_tokens +} ## end block print_line_of_tokens # sub output_line_to_go sends one logical line of tokens on down the # pipeline to the VerticalAligner package, breaking the line into continuation @@ -9742,7 +10355,7 @@ sub output_line_to_go { $cscw_block_comment = add_closing_side_comment() if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); - match_opening_and_closing_tokens(); + my $comma_arrow_count_contained = match_opening_and_closing_tokens(); # tell the -lp option we are outputting a batch so it can close # any unfinished items in its stack @@ -9799,7 +10412,6 @@ sub output_line_to_go { if ( $imin <= $imax ) { # add a blank line before certain key types but not after a comment - ##if ( $last_line_leading_type !~ /^[#b]/ ) { if ( $last_line_leading_type !~ /^[#]/ ) { my $want_blank = 0; my $leading_token = $tokens_to_go[$imin]; @@ -9853,6 +10465,20 @@ sub output_line_to_go { ); } + # Check for blank lines wanted before a closing brace + if ( $leading_token eq '}' ) { + if ( $rOpts->{'blank-lines-before-closing-block'} + && $block_type_to_go[$imin] + && $block_type_to_go[$imin] =~ + /$blank_lines_before_closing_block_pattern/ ) + { + my $nblanks = $rOpts->{'blank-lines-before-closing-block'}; + if ( $nblanks > $want_blank ) { + $want_blank = $nblanks; + } + } + } + if ($want_blank) { # future: send blank line down normal path to VerticalAligner @@ -9880,7 +10506,7 @@ sub output_line_to_go { FORMATTER_DEBUG_FLAG_FLUSH && do { my ( $package, $file, $line ) = caller; - print + print STDOUT "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; }; @@ -9891,20 +10517,30 @@ sub output_line_to_go { my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; if ( - $max_index_to_go > 0 - && ( - $is_long_line - || $old_line_count_in_batch > 1 - || is_unbalanced_batch() - || ( - $comma_count_in_batch - && ( $rOpts_maximum_fields_per_table > 0 - || $rOpts_comma_arrow_breakpoints == 0 ) - ) + $is_long_line + || $old_line_count_in_batch > 1 + + # must always call scan_list() with unbalanced batches because it + # is maintaining some stacks + || is_unbalanced_batch() + + # call scan_list if we might want to break at commas + || ( + $comma_count_in_batch + && ( $rOpts_maximum_fields_per_table > 0 + || $rOpts_comma_arrow_breakpoints == 0 ) ) + + # call scan_list if user may want to break open some one-line + # hash references + || ( $comma_arrow_count_contained + && $rOpts_comma_arrow_breakpoints != 3 ) ) { - $saw_good_break ||= scan_list(); + ## This caused problems in one version of perl for unknown reasons: + ## $saw_good_break ||= scan_list(); + my $sgb = scan_list(); + $saw_good_break ||= $sgb; } # let $ri_first and $ri_last be references to lists of @@ -9958,7 +10594,30 @@ sub output_line_to_go { $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); } send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad ); + + # Insert any requested blank lines after an opening brace. We have to + # skip back before any side comment to find the terminal token + my $iterm; + for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) { + next if $types_to_go[$iterm] eq '#'; + next if $types_to_go[$iterm] eq 'b'; + last; + } + + # write requested number of blank lines after an opening block brace + if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) { + if ( $rOpts->{'blank-lines-after-opening-block'} + && $block_type_to_go[$iterm] + && $block_type_to_go[$iterm] =~ + /$blank_lines_after_opening_block_pattern/ ) + { + my $nblanks = $rOpts->{'blank-lines-after-opening-block'}; + Perl::Tidy::VerticalAligner::flush(); + $file_writer_object->require_blank_code_lines($nblanks); + } + } } + prepare_for_new_input_lines(); # output any new -cscw block comment @@ -10029,7 +10688,7 @@ sub starting_one_line_block { } else { - # cannot use one-line blocks with cuddled else else/elsif lines + # cannot use one-line blocks with cuddled else/elsif lines if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { return 0; } @@ -10039,38 +10698,68 @@ sub starting_one_line_block { # find the starting keyword for this block (such as 'if', 'else', ...) - if ( $block_type =~ /^[\{\}\;\:]$/ ) { + if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) { $i_start = $max_index_to_go; } + # the previous nonblank token should start these block types + elsif (( $last_last_nonblank_token_to_go eq $block_type ) + || ( $block_type =~ /^sub\b/ ) + || $block_type =~ /\(\)/ ) + { + $i_start = $last_last_nonblank_index_to_go; + + # For signatures and extended syntax ... + # If this brace follows a parenthesized list, we should look back to + # find the keyword before the opening paren because otherwise we might + # form a one line block which stays intack, and cause the parenthesized + # expression to break open. That looks bad. However, actually + # searching for the opening paren is slow and tedius. + # The actual keyword is often at the start of a line, but might not be. + # For example, we might have an anonymous sub with signature list + # following a =>. It is safe to mark the start anywhere before the + # opening paren, so we just go back to the prevoious break (or start of + # the line) if that is before the opening paren. The minor downside is + # that we may very occasionally break open a block unnecessarily. + if ( $tokens_to_go[$i_start] eq ')' ) { + $i_start = $index_max_forced_break + 1; + if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; } + my $lev = $levels_to_go[$i_start]; + if ( $lev > $level ) { return 0 } + } + } + elsif ( $last_last_nonblank_token_to_go eq ')' ) { # For something like "if (xxx) {", the keyword "if" will be # just after the most recent break. This will be 0 unless # we have just killed a one-line block and are starting another. # (doif.t) + # Note: cannot use inext_index_to_go[] here because that array + # is still being constructed. $i_start = $index_max_forced_break + 1; if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; } - unless ( $tokens_to_go[$i_start] eq $block_type ) { - return 0; - } - } - - # the previous nonblank token should start these block types - elsif ( - ( $last_last_nonblank_token_to_go eq $block_type ) - || ( $block_type =~ /^sub/ - && $last_last_nonblank_token_to_go =~ /^sub/ ) - ) - { - $i_start = $last_last_nonblank_index_to_go; + # Patch to avoid breaking short blocks defined with extended_syntax: + # Strip off any trailing () which was added in the parser to mark + # the opening keyword. For example, in the following + # create( TypeFoo $e) {$bubba} + # the blocktype would be marked as create() + my $stripped_block_type = $block_type; + $stripped_block_type =~ s/\(\)$//; + + unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) { + return 0; + } } # patch for SWITCH/CASE to retain one-line case/when blocks elsif ( $block_type eq 'case' || $block_type eq 'when' ) { + + # Note: cannot use inext_index_to_go[] here because that array + # is still being constructed. $i_start = $index_max_forced_break + 1; if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; @@ -10089,7 +10778,7 @@ sub starting_one_line_block { my $i; # see if length is too long to even start - if ( $pos > $rOpts_maximum_line_length ) { + if ( $pos > maximum_line_length($i_start) ) { return 1; } @@ -10097,10 +10786,10 @@ sub starting_one_line_block { # old whitespace could be arbitrarily large, so don't use it if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 } - else { $pos += length( $$rtokens[$i] ) } + else { $pos += rtoken_length($i) } # Return false result if we exceed the maximum line length, - if ( $pos > $rOpts_maximum_line_length ) { + if ( $pos > maximum_line_length($i_start) ) { return 0; } @@ -10152,20 +10841,17 @@ sub starting_one_line_block { && !$is_sort_map_grep{$block_type} ) { - ## POSSIBLE FUTURE PATCH FOR IGNORING SIDE COMMENT LENGTHS - ## WHEN CHECKING FOR ONE-LINE BLOCKS: - ## if (flag set) then (just add 1 to pos) - $pos += length( $$rtokens[$i_nonblank] ); + $pos += rtoken_length($i_nonblank); if ( $i_nonblank > $i + 1 ) { # source whitespace could be anything, assume # at least one space before the hash on output if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 } - else { $pos += length( $$rtokens[ $i + 1 ] ) } + else { $pos += rtoken_length( $i + 1 ) } } - if ( $pos >= $rOpts_maximum_line_length ) { + if ( $pos >= maximum_line_length($i_start) ) { return 0; } } @@ -10206,7 +10892,7 @@ sub unstore_token_to_go { sub want_blank_line { flush(); - $file_writer_object->want_blank_line(); + $file_writer_object->want_blank_line() unless $in_format_skipping_section; } sub write_unindented_line { @@ -10217,7 +10903,7 @@ sub write_unindented_line { sub undo_ci { # Undo continuation indentation in certain sequences - # For example, we can undo continuation indation in sort/map/grep chains + # For example, we can undo continuation indentation in sort/map/grep chains # my $dat1 = pack( "n*", # map { $_, $lookup->{$_} } # sort { $a <=> $b } @@ -10251,7 +10937,7 @@ sub undo_ci { { # chain continues... - # check for chain ending at end of a a statement + # check for chain ending at end of a statement if ( $line == $max_line ) { # see of this line ends a statement @@ -10359,143 +11045,179 @@ sub undo_lp_ci { @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ]; } -sub set_logical_padding { +sub pad_token { - # Look at a batch of lines and see if extra padding can improve the - # alignment when there are certain leading operators. Here is an - # example, in which some extra space is introduced before - # '( $year' to make it line up with the subsequent lines: - # - # if ( ( $Year < 1601 ) - # || ( $Year > 2899 ) - # || ( $EndYear < 1601 ) - # || ( $EndYear > 2899 ) ) - # { - # &Error_OutOfRange; - # } - # - my ( $ri_first, $ri_last ) = @_; - my $max_line = @$ri_first - 1; + # insert $pad_spaces before token number $ipad + my ( $ipad, $pad_spaces ) = @_; + if ( $pad_spaces > 0 ) { + $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad]; + } + elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) { + $tokens_to_go[$ipad] = ""; + } + else { - my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces, - $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); + # shouldn't happen + return; + } - # looking at each line of this batch.. - foreach $line ( 0 .. $max_line - 1 ) { - - # see if the next line begins with a logical operator - $ibeg = $$ri_first[$line]; - $iend = $$ri_last[$line]; - $ibeg_next = $$ri_first[ $line + 1 ]; - $tok_next = $tokens_to_go[$ibeg_next]; - $type_next = $types_to_go[$ibeg_next]; - - $has_leading_op_next = ( $tok_next =~ /^\w/ ) - ? $is_chain_operator{$tok_next} # + - * / : ? && || - : $is_chain_operator{$type_next}; # and, or - - next unless ($has_leading_op_next); - - # next line must not be at lesser depth - next - if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] ); - - # identify the token in this line to be padded on the left - $ipad = undef; - - # handle lines at same depth... - if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) { - - # if this is not first line of the batch ... - if ( $line > 0 ) { - - # and we have leading operator.. - next if $has_leading_op; - - # Introduce padding if.. - # 1. the previous line is at lesser depth, or - # 2. the previous line ends in an assignment - # 3. the previous line ends in a 'return' - # 4. the previous line ends in a comma - # Example 1: previous line at lesser depth - # if ( ( $Year < 1601 ) # <- we are here but - # || ( $Year > 2899 ) # list has not yet - # || ( $EndYear < 1601 ) # collapsed vertically - # || ( $EndYear > 2899 ) ) - # { - # - # Example 2: previous line ending in assignment: - # $leapyear = - # $year % 4 ? 0 # <- We are here - # : $year % 100 ? 1 - # : $year % 400 ? 0 - # : 1; - # - # Example 3: previous line ending in comma: - # push @expr, - # /test/ ? undef - # : eval($_) ? 1 - # : eval($_) ? 1 - # : 0; - - # be sure levels agree (do not indent after an indented 'if') - next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); - - # allow padding on first line after a comma but only if: - # (1) this is line 2 and - # (2) there are at more than three lines and - # (3) lines 3 and 4 have the same leading operator - # These rules try to prevent padding within a long - # comma-separated list. - my $ok_comma; - if ( $types_to_go[$iendm] eq ',' - && $line == 1 - && $max_line > 2 ) - { - my $ibeg_next_next = $$ri_first[ $line + 2 ]; - my $tok_next_next = $tokens_to_go[$ibeg_next_next]; - $ok_comma = $tok_next_next eq $tok_next; - } + $token_lengths_to_go[$ipad] += $pad_spaces; + for ( my $i = $ipad ; $i <= $max_index_to_go ; $i++ ) { + $summed_lengths_to_go[ $i + 1 ] += $pad_spaces; + } +} - next - unless ( - $is_assignment{ $types_to_go[$iendm] } - || $ok_comma - || ( $nesting_depth_to_go[$ibegm] < - $nesting_depth_to_go[$ibeg] ) - || ( $types_to_go[$iendm] eq 'k' - && $tokens_to_go[$iendm] eq 'return' ) - ); +{ + my %is_math_op; - # we will add padding before the first token - $ipad = $ibeg; - } + BEGIN { - # for first line of the batch.. - else { + @_ = qw( + - * / ); + @is_math_op{@_} = (1) x scalar(@_); + } + + sub set_logical_padding { - # WARNING: Never indent if first line is starting in a - # continued quote, which would change the quote. - next if $starting_in_quote; + # Look at a batch of lines and see if extra padding can improve the + # alignment when there are certain leading operators. Here is an + # example, in which some extra space is introduced before + # '( $year' to make it line up with the subsequent lines: + # + # if ( ( $Year < 1601 ) + # || ( $Year > 2899 ) + # || ( $EndYear < 1601 ) + # || ( $EndYear > 2899 ) ) + # { + # &Error_OutOfRange; + # } + # + my ( $ri_first, $ri_last ) = @_; + my $max_line = @$ri_first - 1; + + my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, + $pad_spaces, + $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); + + # looking at each line of this batch.. + foreach $line ( 0 .. $max_line - 1 ) { + + # see if the next line begins with a logical operator + $ibeg = $$ri_first[$line]; + $iend = $$ri_last[$line]; + $ibeg_next = $$ri_first[ $line + 1 ]; + $tok_next = $tokens_to_go[$ibeg_next]; + $type_next = $types_to_go[$ibeg_next]; + + $has_leading_op_next = ( $tok_next =~ /^\w/ ) + ? $is_chain_operator{$tok_next} # + - * / : ? && || + : $is_chain_operator{$type_next}; # and, or + + next unless ($has_leading_op_next); + + # next line must not be at lesser depth + next + if ( $nesting_depth_to_go[$ibeg] > + $nesting_depth_to_go[$ibeg_next] ); + + # identify the token in this line to be padded on the left + $ipad = undef; + + # handle lines at same depth... + if ( $nesting_depth_to_go[$ibeg] == + $nesting_depth_to_go[$ibeg_next] ) + { + + # if this is not first line of the batch ... + if ( $line > 0 ) { + + # and we have leading operator.. + next if $has_leading_op; + + # Introduce padding if.. + # 1. the previous line is at lesser depth, or + # 2. the previous line ends in an assignment + # 3. the previous line ends in a 'return' + # 4. the previous line ends in a comma + # Example 1: previous line at lesser depth + # if ( ( $Year < 1601 ) # <- we are here but + # || ( $Year > 2899 ) # list has not yet + # || ( $EndYear < 1601 ) # collapsed vertically + # || ( $EndYear > 2899 ) ) + # { + # + # Example 2: previous line ending in assignment: + # $leapyear = + # $year % 4 ? 0 # <- We are here + # : $year % 100 ? 1 + # : $year % 400 ? 0 + # : 1; + # + # Example 3: previous line ending in comma: + # push @expr, + # /test/ ? undef + # : eval($_) ? 1 + # : eval($_) ? 1 + # : 0; + + # be sure levels agree (do not indent after an indented 'if') + next + if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); + + # allow padding on first line after a comma but only if: + # (1) this is line 2 and + # (2) there are at more than three lines and + # (3) lines 3 and 4 have the same leading operator + # These rules try to prevent padding within a long + # comma-separated list. + my $ok_comma; + if ( $types_to_go[$iendm] eq ',' + && $line == 1 + && $max_line > 2 ) + { + my $ibeg_next_next = $$ri_first[ $line + 2 ]; + my $tok_next_next = $tokens_to_go[$ibeg_next_next]; + $ok_comma = $tok_next_next eq $tok_next; + } - # if this is text after closing '}' - # then look for an interior token to pad - if ( $types_to_go[$ibeg] eq '}' ) { + next + unless ( + $is_assignment{ $types_to_go[$iendm] } + || $ok_comma + || ( $nesting_depth_to_go[$ibegm] < + $nesting_depth_to_go[$ibeg] ) + || ( $types_to_go[$iendm] eq 'k' + && $tokens_to_go[$iendm] eq 'return' ) + ); + # we will add padding before the first token + $ipad = $ibeg; } - # otherwise, we might pad if it looks really good + # for first line of the batch.. else { - # we might pad token $ibeg, so be sure that it - # is at the same depth as the next line. - next - if ( $nesting_depth_to_go[$ibeg] != - $nesting_depth_to_go[$ibeg_next] ); + # WARNING: Never indent if first line is starting in a + # continued quote, which would change the quote. + next if $starting_in_quote; - # We can pad on line 1 of a statement if at least 3 - # lines will be aligned. Otherwise, it - # can look very confusing. + # if this is text after closing '}' + # then look for an interior token to pad + if ( $types_to_go[$ibeg] eq '}' ) { + + } + + # otherwise, we might pad if it looks really good + else { + + # we might pad token $ibeg, so be sure that it + # is at the same depth as the next line. + next + if ( $nesting_depth_to_go[$ibeg] != + $nesting_depth_to_go[$ibeg_next] ); + + # We can pad on line 1 of a statement if at least 3 + # lines will be aligned. Otherwise, it + # can look very confusing. # We have to be careful not to pad if there are too few # lines. The current rule is: @@ -10510,286 +11232,301 @@ sub set_logical_padding { # : $i == 2 ? ( "Then", "Rarity" ) # : ( "Then", "Name" ); - if ( $max_line > 1 ) { - my $leading_token = $tokens_to_go[$ibeg_next]; - my $tokens_differ; - - # never indent line 1 of a '.' series because - # previous line is most likely at same level. - # TODO: we should also look at the leasing_spaces - # of the last output line and skip if it is same - # as this line. - next if ( $leading_token eq '.' ); - - my $count = 1; - foreach my $l ( 2 .. 3 ) { - last if ( $line + $l > $max_line ); - my $ibeg_next_next = $$ri_first[ $line + $l ]; - if ( $tokens_to_go[$ibeg_next_next] ne - $leading_token ) - { - $tokens_differ = 1; - last; + if ( $max_line > 1 ) { + my $leading_token = $tokens_to_go[$ibeg_next]; + my $tokens_differ; + + # never indent line 1 of a '.' series because + # previous line is most likely at same level. + # TODO: we should also look at the leasing_spaces + # of the last output line and skip if it is same + # as this line. + next if ( $leading_token eq '.' ); + + my $count = 1; + foreach my $l ( 2 .. 3 ) { + last if ( $line + $l > $max_line ); + my $ibeg_next_next = $$ri_first[ $line + $l ]; + if ( $tokens_to_go[$ibeg_next_next] ne + $leading_token ) + { + $tokens_differ = 1; + last; + } + $count++; } - $count++; + next if ($tokens_differ); + next if ( $count < 3 && $leading_token ne ':' ); + $ipad = $ibeg; + } + else { + next; } - next if ($tokens_differ); - next if ( $count < 3 && $leading_token ne ':' ); - $ipad = $ibeg; - } - else { - next; } } } - } - # find interior token to pad if necessary - if ( !defined($ipad) ) { + # find interior token to pad if necessary + if ( !defined($ipad) ) { - for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { + for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { - # find any unclosed container - next - unless ( $type_sequence_to_go[$i] - && $mate_index_to_go[$i] > $iend ); + # find any unclosed container + next + unless ( $type_sequence_to_go[$i] + && $mate_index_to_go[$i] > $iend ); - # find next nonblank token to pad - $ipad = $i + 1; - if ( $types_to_go[$ipad] eq 'b' ) { - $ipad++; + # find next nonblank token to pad + $ipad = $inext_to_go[$i]; last if ( $ipad > $iend ); } + last unless $ipad; } - last unless $ipad; - } - # We cannot pad a leading token at the lowest level because - # it could cause a bug in which the starting indentation - # level is guessed incorrectly each time the code is run - # though perltidy, thus causing the code to march off to - # the right. For example, the following snippet would have - # this problem: + # We cannot pad a leading token at the lowest level because + # it could cause a bug in which the starting indentation + # level is guessed incorrectly each time the code is run + # though perltidy, thus causing the code to march off to + # the right. For example, the following snippet would have + # this problem: ## ov_method mycan( $package, '(""' ), $package ## or ov_method mycan( $package, '(0+' ), $package ## or ov_method mycan( $package, '(bool' ), $package ## or ov_method mycan( $package, '(nomethod' ), $package; - # If this snippet is within a block this won't happen - # unless the user just processes the snippet alone within - # an editor. In that case either the user will see and - # fix the problem or it will be corrected next time the - # entire file is processed with perltidy. - next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 ); - - # next line must not be at greater depth - my $iend_next = $$ri_last[ $line + 1 ]; - next - if ( $nesting_depth_to_go[ $iend_next + 1 ] > - $nesting_depth_to_go[$ipad] ); - - # lines must be somewhat similar to be padded.. - my $inext_next = $ibeg_next + 1; - if ( $types_to_go[$inext_next] eq 'b' ) { - $inext_next++; - } - my $type = $types_to_go[$ipad]; - my $type_next = $types_to_go[ $ipad + 1 ]; - - # see if there are multiple continuation lines - my $logical_continuation_lines = 1; - if ( $line + 2 <= $max_line ) { - my $leading_token = $tokens_to_go[$ibeg_next]; - my $ibeg_next_next = $$ri_first[ $line + 2 ]; - if ( $tokens_to_go[$ibeg_next_next] eq $leading_token - && $nesting_depth_to_go[$ibeg_next] eq - $nesting_depth_to_go[$ibeg_next_next] ) - { - $logical_continuation_lines++; + # If this snippet is within a block this won't happen + # unless the user just processes the snippet alone within + # an editor. In that case either the user will see and + # fix the problem or it will be corrected next time the + # entire file is processed with perltidy. + next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 ); + +## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT +## IT DID MORE HARM THAN GOOD +## ceil( +## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000 +## / $upem +## ), +##? # do not put leading padding for just 2 lines of math +##? if ( $ipad == $ibeg +##? && $line > 0 +##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ] +##? && $is_math_op{$type_next} +##? && $line + 2 <= $max_line ) +##? { +##? my $ibeg_next_next = $$ri_first[ $line + 2 ]; +##? my $type_next_next = $types_to_go[$ibeg_next_next]; +##? next if !$is_math_op{$type_next_next}; +##? } + + # next line must not be at greater depth + my $iend_next = $$ri_last[ $line + 1 ]; + next + if ( $nesting_depth_to_go[ $iend_next + 1 ] > + $nesting_depth_to_go[$ipad] ); + + # lines must be somewhat similar to be padded.. + my $inext_next = $inext_to_go[$ibeg_next]; + my $type = $types_to_go[$ipad]; + my $type_next = $types_to_go[ $ipad + 1 ]; + + # see if there are multiple continuation lines + my $logical_continuation_lines = 1; + if ( $line + 2 <= $max_line ) { + my $leading_token = $tokens_to_go[$ibeg_next]; + my $ibeg_next_next = $$ri_first[ $line + 2 ]; + if ( $tokens_to_go[$ibeg_next_next] eq $leading_token + && $nesting_depth_to_go[$ibeg_next] eq + $nesting_depth_to_go[$ibeg_next_next] ) + { + $logical_continuation_lines++; + } } - } - # see if leading types match - my $types_match = $types_to_go[$inext_next] eq $type; - my $matches_without_bang; + # see if leading types match + my $types_match = $types_to_go[$inext_next] eq $type; + my $matches_without_bang; - # if first line has leading ! then compare the following token - if ( !$types_match && $type eq '!' ) { - $types_match = $matches_without_bang = - $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; - } + # if first line has leading ! then compare the following token + if ( !$types_match && $type eq '!' ) { + $types_match = $matches_without_bang = + $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; + } - if ( + if ( - # either we have multiple continuation lines to follow - # and we are not padding the first token - ( $logical_continuation_lines > 1 && $ipad > 0 ) + # either we have multiple continuation lines to follow + # and we are not padding the first token + ( $logical_continuation_lines > 1 && $ipad > 0 ) - # or.. - || ( + # or.. + || ( - # types must match - $types_match + # types must match + $types_match - # and keywords must match if keyword - && !( - $type eq 'k' - && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] + # and keywords must match if keyword + && !( + $type eq 'k' + && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] + ) ) - ) - ) - { + ) + { - #----------------------begin special checks-------------- - # - # SPECIAL CHECK 1: - # A check is needed before we can make the pad. - # If we are in a list with some long items, we want each - # item to stand out. So in the following example, the - # first line begining with '$casefold->' would look good - # padded to align with the next line, but then it - # would be indented more than the last line, so we - # won't do it. - # - # ok( - # $casefold->{code} eq '0041' - # && $casefold->{status} eq 'C' - # && $casefold->{mapping} eq '0061', - # 'casefold 0x41' - # ); - # - # Note: - # It would be faster, and almost as good, to use a comma - # count, and not pad if comma_count > 1 and the previous - # line did not end with a comma. - # - my $ok_to_pad = 1; + #----------------------begin special checks-------------- + # + # SPECIAL CHECK 1: + # A check is needed before we can make the pad. + # If we are in a list with some long items, we want each + # item to stand out. So in the following example, the + # first line beginning with '$casefold->' would look good + # padded to align with the next line, but then it + # would be indented more than the last line, so we + # won't do it. + # + # ok( + # $casefold->{code} eq '0041' + # && $casefold->{status} eq 'C' + # && $casefold->{mapping} eq '0061', + # 'casefold 0x41' + # ); + # + # Note: + # It would be faster, and almost as good, to use a comma + # count, and not pad if comma_count > 1 and the previous + # line did not end with a comma. + # + my $ok_to_pad = 1; - my $ibg = $$ri_first[ $line + 1 ]; - my $depth = $nesting_depth_to_go[ $ibg + 1 ]; + my $ibg = $$ri_first[ $line + 1 ]; + my $depth = $nesting_depth_to_go[ $ibg + 1 ]; - # just use simplified formula for leading spaces to avoid - # needless sub calls - my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; + # just use simplified formula for leading spaces to avoid + # needless sub calls + my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; - # look at each line beyond the next .. - my $l = $line + 1; - foreach $l ( $line + 2 .. $max_line ) { - my $ibg = $$ri_first[$l]; + # look at each line beyond the next .. + my $l = $line + 1; + foreach $l ( $line + 2 .. $max_line ) { + my $ibg = $$ri_first[$l]; - # quit looking at the end of this container - last - if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) - || ( $nesting_depth_to_go[$ibg] < $depth ); + # quit looking at the end of this container + last + if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) + || ( $nesting_depth_to_go[$ibg] < $depth ); - # cannot do the pad if a later line would be - # outdented more - if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { - $ok_to_pad = 0; - last; + # cannot do the pad if a later line would be + # outdented more + if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { + $ok_to_pad = 0; + last; + } } - } - # don't pad if we end in a broken list - if ( $l == $max_line ) { - my $i2 = $$ri_last[$l]; - if ( $types_to_go[$i2] eq '#' ) { - my $i1 = $$ri_first[$l]; - next - if ( - terminal_type( \@types_to_go, \@block_type_to_go, $i1, - $i2 ) eq ',' - ); + # don't pad if we end in a broken list + if ( $l == $max_line ) { + my $i2 = $$ri_last[$l]; + if ( $types_to_go[$i2] eq '#' ) { + my $i1 = $$ri_first[$l]; + next + if ( + terminal_type( \@types_to_go, \@block_type_to_go, + $i1, $i2 ) eq ',' + ); + } } - } - # SPECIAL CHECK 2: - # a minus may introduce a quoted variable, and we will - # add the pad only if this line begins with a bare word, - # such as for the word 'Button' here: - # [ - # Button => "Print letter \"~$_\"", - # -command => [ sub { print "$_[0]\n" }, $_ ], - # -accelerator => "Meta+$_" - # ]; - # - # On the other hand, if 'Button' is quoted, it looks best - # not to pad: - # [ - # 'Button' => "Print letter \"~$_\"", - # -command => [ sub { print "$_[0]\n" }, $_ ], - # -accelerator => "Meta+$_" - # ]; - if ( $types_to_go[$ibeg_next] eq 'm' ) { - $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; - } - - next unless $ok_to_pad; - - #----------------------end special check--------------- - - my $length_1 = total_line_length( $ibeg, $ipad - 1 ); - my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); - $pad_spaces = $length_2 - $length_1; - - # If the first line has a leading ! and the second does - # not, then remove one space to try to align the next - # leading characters, which are often the same. For example: - # if ( !$ts - # || $ts == $self->Holder - # || $self->Holder->Type eq "Arena" ) - # - # This usually helps readability, but if there are subsequent - # ! operators things will still get messed up. For example: - # - # if ( !exists $Net::DNS::typesbyname{$qtype} - # && exists $Net::DNS::classesbyname{$qtype} - # && !exists $Net::DNS::classesbyname{$qclass} - # && exists $Net::DNS::typesbyname{$qclass} ) - # We can't fix that. - if ($matches_without_bang) { $pad_spaces-- } - - # make sure this won't change if -lp is used - my $indentation_1 = $leading_spaces_to_go[$ibeg]; - if ( ref($indentation_1) ) { - if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) { - my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; - unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) { - $pad_spaces = 0; + # SPECIAL CHECK 2: + # a minus may introduce a quoted variable, and we will + # add the pad only if this line begins with a bare word, + # such as for the word 'Button' here: + # [ + # Button => "Print letter \"~$_\"", + # -command => [ sub { print "$_[0]\n" }, $_ ], + # -accelerator => "Meta+$_" + # ]; + # + # On the other hand, if 'Button' is quoted, it looks best + # not to pad: + # [ + # 'Button' => "Print letter \"~$_\"", + # -command => [ sub { print "$_[0]\n" }, $_ ], + # -accelerator => "Meta+$_" + # ]; + if ( $types_to_go[$ibeg_next] eq 'm' ) { + $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; + } + + next unless $ok_to_pad; + + #----------------------end special check--------------- + + my $length_1 = total_line_length( $ibeg, $ipad - 1 ); + my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); + $pad_spaces = $length_2 - $length_1; + + # If the first line has a leading ! and the second does + # not, then remove one space to try to align the next + # leading characters, which are often the same. For example: + # if ( !$ts + # || $ts == $self->Holder + # || $self->Holder->Type eq "Arena" ) + # + # This usually helps readability, but if there are subsequent + # ! operators things will still get messed up. For example: + # + # if ( !exists $Net::DNS::typesbyname{$qtype} + # && exists $Net::DNS::classesbyname{$qtype} + # && !exists $Net::DNS::classesbyname{$qclass} + # && exists $Net::DNS::typesbyname{$qclass} ) + # We can't fix that. + if ($matches_without_bang) { $pad_spaces-- } + + # make sure this won't change if -lp is used + my $indentation_1 = $leading_spaces_to_go[$ibeg]; + if ( ref($indentation_1) ) { + if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) { + my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; + unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) + { + $pad_spaces = 0; + } } } - } - # we might be able to handle a pad of -1 by removing a blank - # token - if ( $pad_spaces < 0 ) { + # we might be able to handle a pad of -1 by removing a blank + # token + if ( $pad_spaces < 0 ) { - if ( $pad_spaces == -1 ) { - if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) { - $tokens_to_go[ $ipad - 1 ] = ''; + if ( $pad_spaces == -1 ) { + if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) + { + pad_token( $ipad - 1, $pad_spaces ); + } } + $pad_spaces = 0; } - $pad_spaces = 0; - } - # now apply any padding for alignment - if ( $ipad >= 0 && $pad_spaces ) { + # now apply any padding for alignment + if ( $ipad >= 0 && $pad_spaces ) { - my $length_t = total_line_length( $ibeg, $iend ); - if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) { - $tokens_to_go[$ipad] = - ' ' x $pad_spaces . $tokens_to_go[$ipad]; + my $length_t = total_line_length( $ibeg, $iend ); + if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) ) + { + pad_token( $ipad, $pad_spaces ); + } } } } + continue { + $iendm = $iend; + $ibegm = $ibeg; + $has_leading_op = $has_leading_op_next; + } # end of loop over lines + return; } - continue { - $iendm = $iend; - $ibegm = $ibeg; - $has_leading_op = $has_leading_op_next; - } # end of loop over lines - return; } sub correct_lp_indentation { @@ -10849,8 +11586,7 @@ sub correct_lp_indentation { # skip closed container on this line if ( $i > $ibeg ) { - my $im = $i - 1; - if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- } + my $im = max( $ibeg, $iprev_to_go[$i] ); if ( $type_sequence_to_go[$im] && $mate_index_to_go[$im] <= $iend ) { @@ -10958,7 +11694,7 @@ sub correct_lp_indentation { $max_length = $length_t; } } - $right_margin = $rOpts_maximum_line_length - $max_length; + $right_margin = maximum_line_length($ibeg) - $max_length; if ( $right_margin < 0 ) { $right_margin = 0 } } @@ -11055,7 +11791,6 @@ sub set_block_text_accumulator { # this will contain the column number of the last character # of the closing side comment - ##$csc_last_label="" unless $csc_last_label; $leading_block_text_line_length = length($csc_last_label) + length($accumulating_text_for_block) + @@ -11072,7 +11807,7 @@ sub accumulate_block_text { && $types_to_go[$i] ne '#' ) { - my $added_length = length( $tokens_to_go[$i] ); + my $added_length = $token_lengths_to_go[$i]; $added_length += 1 if $i == 0; my $new_line_length = $leading_block_text_line_length + $added_length; @@ -11087,9 +11822,13 @@ sub accumulate_block_text { # the new total line length must be below the line length limit # or the new length must be below the text length limit # (ie, we may allow one token to exceed the text length limit) - && ( $new_line_length < $rOpts_maximum_line_length + && ( + $new_line_length < + maximum_line_length_for_level($leading_block_text_level) + || length($leading_block_text) + $added_length < - $rOpts_closing_side_comment_maximum_text ) + $rOpts_closing_side_comment_maximum_text + ) # UNLESS: we are adding a closing paren before the brace we seek. # This is an attempt to avoid situations where the ... to be @@ -11126,6 +11865,7 @@ sub accumulate_block_text { # show that text was truncated if necessary elsif ( $types_to_go[$i] ne 'b' ) { $leading_block_text_length_exceeded = 1; +## Please see file perltidy.ERR $leading_block_text .= '...'; } } @@ -11140,8 +11880,9 @@ sub accumulate_block_text { # curly. Note: 'else' does not, but must be included to allow trailing # if/elsif text to be appended. # patch for SWITCH/CASE: added 'case' and 'when' - @_ = qw(if elsif else unless while until for foreach case when); - @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_); + @_ = qw(if elsif else unless while until for foreach case when catch); + @is_if_elsif_else_unless_while_until_for_foreach{@_} = + (1) x scalar(@_); } sub accumulate_csc_text { @@ -11184,8 +11925,8 @@ sub accumulate_block_text { # restore any leading text saved when we entered this block if ( defined( $block_leading_text{$type_sequence} ) ) { - ( $block_leading_text, $rblock_leading_if_elsif_text ) = - @{ $block_leading_text{$type_sequence} }; + ( $block_leading_text, $rblock_leading_if_elsif_text ) + = @{ $block_leading_text{$type_sequence} }; $i_block_leading_text = $i; delete $block_leading_text{$type_sequence}; $rleading_block_if_elsif_text = @@ -11327,7 +12068,8 @@ sub make_else_csc_text { my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; my $csc_text = $block_leading_text; - if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 ) + if ( $block_type eq 'elsif' + && $rOpts_closing_side_comment_else_flag == 0 ) { return $csc_text; } @@ -11373,7 +12115,7 @@ sub make_else_csc_text { length($block_type) + length( $rOpts->{'closing-side-comment-prefix'} ) + $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; - if ( $length > $rOpts_maximum_line_length ) { + if ( $length > maximum_line_length_for_level($leading_block_text_level) ) { $csc_text = $saved_text; } return $csc_text; @@ -11403,7 +12145,7 @@ sub make_else_csc_text { # output = ## end foreach my $foo ( sort { $b ...}) # NOTE: This routine does not currently filter out structures within - # quoted text because the bounce algorithims in text editors do not + # quoted text because the bounce algorithms in text editors do not # necessarily do this either (a version of vim was checked and # did not do this). @@ -11491,7 +12233,7 @@ sub add_closing_side_comment { # ..and either && ( - # this is the last token (line doesnt have a side comment) + # this is the last token (line doesn't have a side comment) !$have_side_comment # or the old side comment is a closing side comment @@ -11551,7 +12293,8 @@ sub add_closing_side_comment { # if the new comment is shorter and has been limited, # only compare the common part. - if ( length($new_csc) < length($old_csc) && $new_trailing_dots ) + if ( length($new_csc) < length($old_csc) + && $new_trailing_dots ) { $old_csc = substr( $old_csc, 0, length($new_csc) ); } @@ -11727,20 +12470,55 @@ sub send_lines_to_vertical_aligner { # flush an outdented line to avoid any unwanted vertical alignment Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); + # Set a flag at the final ':' of a ternary chain to request + # vertical alignment of the final term. Here is a + # slightly complex example: + # + # $self->{_text} = ( + # !$section ? '' + # : $type eq 'item' ? "the $section entry" + # : "the section on $section" + # ) + # . ( + # $page + # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage" + # : ' elsewhere in this document' + # ); + # my $is_terminal_ternary = 0; if ( $tokens_to_go[$ibeg] eq ':' || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' ) { - if ( ( $terminal_type eq ';' && $level_end <= $lev ) - || ( $level_end < $lev ) ) + my $last_leading_type = ":"; + if ( $n > 0 ) { + my $iprev = $$ri_first[ $n - 1 ]; + $last_leading_type = $types_to_go[$iprev]; + } + if ( $terminal_type ne ';' + && $n_last_line > $n + && $level_end == $lev ) { - $is_terminal_ternary = 1; + my $inext = $$ri_first[ $n + 1 ]; + $level_end = $levels_to_go[$inext]; + $terminal_type = $types_to_go[$inext]; } + + $is_terminal_ternary = $last_leading_type eq ':' + && ( ( $terminal_type eq ';' && $level_end <= $lev ) + || ( $terminal_type ne ':' && $level_end < $lev ) ) + + # the terminal term must not contain any ternary terms, as in + # my $ECHO = ( + # $Is_MSWin32 ? ".\\echo$$" + # : $Is_MacOS ? ":echo$$" + # : ( $Is_NetWare ? "echo$$" : "./echo$$" ) + # ); + && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ]; } # send this new line down the pipe my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; - Perl::Tidy::VerticalAligner::append_line( + Perl::Tidy::VerticalAligner::valign_input( $lev, $level_end, $indentation, @@ -11790,12 +12568,6 @@ sub send_lines_to_vertical_aligner { # and limit total to 10 character widths && token_sequence_length( $ibeg, $iend ) <= 10; -## $last_output_short_opening_token = -## $types_to_go[$iend] =~ /^[\{\(\[L]$/ -## && $iend - $ibeg <= 2 -## && $tokens_to_go[$ibeg] !~ /^sub/ -## && token_sequence_length( $ibeg, $iend ) <= 10; - } # end of loop to output each line # remember indentation of lines containing opening containers for @@ -11803,7 +12575,7 @@ sub send_lines_to_vertical_aligner { save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); } -{ # begin make_alignment_patterns +{ # begin make_alignment_patterns my %block_type_map; my %keyword_map; @@ -11900,7 +12672,7 @@ sub send_lines_to_vertical_aligner { # Make the container name even more unique if necessary. # If we are not vertically aligning this opening paren, # append a character count to avoid bad alignment because - # it usually looks bad to align commas within continers + # it usually looks bad to align commas within containers # for which the opening parens do not align. Here # is an example very BAD alignment of commas (because # the atan2 functions are not all aligned): @@ -11927,14 +12699,10 @@ sub send_lines_to_vertical_aligner { if ( $matching_token_to_go[$i] eq '' ) { # Sum length from previous alignment, or start of line. - # Note that we have to sum token lengths here because - # padding has been done and so array $lengths_to_go - # is now wrong. my $len = - length( - join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); - $len += leading_spaces_to_go($i_start) - if ( $i_start == $ibeg ); + ( $i_start == $ibeg ) + ? total_line_length( $i_start, $i - 1 ) + : token_sequence_length( $i_start, $i - 1 ); # tack length onto the container name to make unique $container_name[$depth] .= "-" . $len; @@ -11999,7 +12767,8 @@ sub send_lines_to_vertical_aligner { # remove sub names to allow one-line sub braces to align # regardless of name - if ( $block_type =~ /^sub / ) { $block_type = 'sub' } + #if ( $block_type =~ /^sub / ) { $block_type = 'sub' } + if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' } # allow all control-type blocks to align if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } @@ -12101,6 +12870,7 @@ sub send_lines_to_vertical_aligner { @unmatched_opening_indexes_in_this_batch = (); @unmatched_closing_indexes_in_this_batch = (); %comma_arrow_count = (); + my $comma_arrow_count_contained = 0; my ( $i, $i_mate, $token ); foreach $i ( 0 .. $max_index_to_go ) { @@ -12118,6 +12888,11 @@ sub send_lines_to_vertical_aligner { { $mate_index_to_go[$i] = $i_mate; $mate_index_to_go[$i_mate] = $i; + my $seqno = $type_sequence_to_go[$i]; + if ( $comma_arrow_count{$seqno} ) { + $comma_arrow_count_contained += + $comma_arrow_count{$seqno}; + } } else { push @unmatched_opening_indexes_in_this_batch, @@ -12138,6 +12913,7 @@ sub send_lines_to_vertical_aligner { } } } + return $comma_arrow_count_contained; } sub save_opening_indentation { @@ -12294,7 +13070,8 @@ sub lookup_opening_indentation { # if/elsif text to be appended. # patch for SWITCH/CASE: added 'case' and 'when' @_ = qw(if elsif else unless while until for foreach case when); - @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_); + @is_if_elsif_else_unless_while_until_for_foreach{@_} = + (1) x scalar(@_); } sub set_adjusted_indentation { @@ -12383,7 +13160,7 @@ sub lookup_opening_indentation { # allow just one character before the comma && $i_terminal == $ibeg + 1 - # requre LIST environment; otherwise, we may outdent too much -- + # require LIST environment; otherwise, we may outdent too much - # this can happen in calls without parentheses (overload.t); && $container_environment_to_go[$i_terminal] eq 'LIST' ) @@ -12408,6 +13185,31 @@ sub lookup_opening_indentation { { $adjust_indentation = 1; } + + # Patch for RT #96101, in which closing brace of anonymous subs + # was not outdented. We should look ahead and see if there is + # a level decrease at the next token (i.e., a closing token), + # but right now we do not have that information. For now + # we see if we are in a list, and this works well. + # See test files 'sub*.t' for good test cases. + if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/ + && $container_environment_to_go[$i_terminal] eq 'LIST' + && !$rOpts->{'indent-closing-brace'} ) + { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + my $indentation = $leading_spaces_to_go[$ibeg]; + if ( defined($opening_indentation) + && get_SPACES($indentation) > + get_SPACES($opening_indentation) ) + { + $adjust_indentation = 1; + } + } } # YVES patch 1 of 2: @@ -12426,7 +13228,8 @@ sub lookup_opening_indentation { $rindentation_list ); my $indentation = $leading_spaces_to_go[$ibeg]; if ( defined($opening_indentation) - && $indentation > $opening_indentation ) + && get_SPACES($indentation) > + get_SPACES($opening_indentation) ) { $adjust_indentation = 1; } @@ -12567,7 +13370,7 @@ sub lookup_opening_indentation { } } - # revert to default if it doesnt work + # revert to default if it doesn't work else { $space_count = leading_spaces_to_go($ibeg); if ( $default_adjust_indentation == 0 ) { @@ -12742,7 +13545,9 @@ sub set_vertical_tightness_flags { # if we should combine this line with the next line to achieve the # desired vertical tightness. The array of parameters contains: # - # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace + # [0] type: 1=opening non-block 2=closing non-block + # 3=opening block brace 4=closing block brace + # # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok # if closing: spaces of padding to use # [2] sequence number of container @@ -12755,11 +13560,18 @@ sub set_vertical_tightness_flags { my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ]; + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1: + # Handle Lines 1 .. n-1 but not the last line # For non-BLOCK tokens, we will need to examine the next line # too, so we won't consider the last line. + #-------------------------------------------------------------- if ( $n < $n_last_line ) { - # see if last token is an opening token...not a BLOCK... + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1a: + # Look for Type 1, last token of this line is a non-block opening token + #-------------------------------------------------------------- my $ibeg_next = $$ri_first[ $n + 1 ]; my $token_end = $tokens_to_go[$iend]; my $iend_next = $$ri_last[ $n + 1 ]; @@ -12799,8 +13611,11 @@ sub set_vertical_tightness_flags { } } - # see if first token of next line is a closing token... - # ..and be sure this line does not have a side comment + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1b: + # Look for Type 2, first token of next line is a non-block closing + # token .. and be sure this line does not have a side comment + #-------------------------------------------------------------- my $token_next = $tokens_to_go[$ibeg_next]; if ( $type_sequence_to_go[$ibeg_next] && !$block_type_to_go[$ibeg_next] @@ -12855,7 +13670,9 @@ sub set_vertical_tightness_flags { } } - # Opening Token Right + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1c: + # Implement the Opening Token Right flag (Type 2).. # If requested, move an isolated trailing opening token to the end of # the previous line which ended in a comma. We could do this # in sub recombine_breakpoints but that would cause problems @@ -12863,7 +13680,8 @@ sub set_vertical_tightness_flags { # quickly move far to the right in nested expressions. By # doing it after indentation has been set, we avoid changes # to the indentation. Actual movement of the token takes place - # in sub write_leader_and_string. + # in sub valign_output_step_B. + #-------------------------------------------------------------- if ( $opening_token_right{ $tokens_to_go[$ibeg_next] } @@ -12873,7 +13691,6 @@ sub set_vertical_tightness_flags { # previous line ended in one of these # (add other cases if necessary; '=>' and '.' are not necessary - ##&& ($is_opening_token{$token_end} || $token_end eq ',') && !$block_type_to_go[$ibeg_next] # this is a line with just an opening token @@ -12891,7 +13708,10 @@ sub set_vertical_tightness_flags { ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); } - # Stacking of opening and closing tokens + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1d: + # Stacking of opening and closing tokens (Type 2) + #-------------------------------------------------------------- my $stackable; my $token_beg_next = $tokens_to_go[$ibeg_next]; @@ -12949,7 +13769,11 @@ sub set_vertical_tightness_flags { } } + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 2: + # Handle type 3, opening block braces on last line of the batch # Check for a last line with isolated opening BLOCK curly + #-------------------------------------------------------------- elsif ($rOpts_block_brace_vertical_tightness && $ibeg eq $iend && $types_to_go[$iend] eq '{' @@ -12960,6 +13784,21 @@ sub set_vertical_tightness_flags { ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); } + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 3: + # Handle type 4, a closing block brace on the last line of the batch Check + # for a last line with isolated closing BLOCK curly + #-------------------------------------------------------------- + elsif ($rOpts_stack_closing_block_brace + && $ibeg eq $iend + && $block_type_to_go[$iend] + && $types_to_go[$iend] eq '}' ) + { + my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1; + @{$rvertical_tightness_flags} = + ( 4, $spaces, $type_sequence_to_go[$iend], 1 ); + } + # pack in the sequence numbers of the ends of this line $rvertical_tightness_flags->[4] = get_seqno($ibeg); $rvertical_tightness_flags->[5] = get_seqno($iend); @@ -12991,16 +13830,23 @@ sub get_seqno { { my %is_vertical_alignment_type; my %is_vertical_alignment_keyword; + my %is_terminal_alignment_type; BEGIN { + # Removed =~ from list to improve chances of alignment @_ = qw# = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= - { ? : => =~ && || // ~~ !~~ + { ? : => && || // ~~ !~~ #; @is_vertical_alignment_type{@_} = (1) x scalar(@_); - @_ = qw(if unless and or err eq ne for foreach while until); + # only align these at end of line + @_ = qw(&& ||); + @is_terminal_alignment_type{@_} = (1) x scalar(@_); + + # eq and ne were removed from this list to improve alignment chances + @_ = qw(if unless and or err for foreach while until); @is_vertical_alignment_keyword{@_} = (1) x scalar(@_); } @@ -13118,14 +13964,16 @@ sub get_seqno { $alignment_type = $token; # Do not align a terminal token. Although it might - # occasionally look ok to do this, it has been found to be + # occasionally look ok to do this, this has been found to be # a good general rule. The main problems are: # (1) that the terminal token (such as an = or :) might get # moved far to the right where it is hard to see because # nothing follows it, and # (2) doing so may prevent other good alignments. + # Current exceptions are && and || if ( $i == $iend || $i >= $i_terminal ) { - $alignment_type = ""; + $alignment_type = "" + unless ( $is_terminal_alignment_type{$type} ); } # Do not align leading ': (' or '. ('. This would prevent @@ -13228,7 +14076,7 @@ sub terminal_type { } else { - # start at end and walk bakwards.. + # start at end and walk backwards.. for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { # skip past any side comment and blanks @@ -13255,10 +14103,28 @@ sub terminal_type { } } -{ +{ # set_bond_strengths + my %is_good_keyword_breakpoint; my %is_lt_gt_le_ge; + my %binary_bond_strength; + my %nobreak_lhs; + my %nobreak_rhs; + + my @bias_tokens; + my $delta_bias; + + sub bias_table_key { + my ( $type, $token ) = @_; + my $bias_table_key = $type; + if ( $type eq 'k' ) { + $bias_table_key = $token; + if ( $token eq 'err' ) { $bias_table_key = 'or' } + } + return $bias_table_key; + } + sub set_bond_strengths { BEGIN { @@ -13268,20 +14134,69 @@ sub terminal_type { @_ = qw(lt gt le ge); @is_lt_gt_le_ge{@_} = (1) x scalar(@_); + # + # The decision about where to break a line depends upon a "bond + # strength" between tokens. The LOWER the bond strength, the MORE + # likely a break. A bond strength may be any value but to simplify + # things there are several pre-defined strength levels: + + # NO_BREAK => 10000; + # VERY_STRONG => 100; + # STRONG => 2.1; + # NOMINAL => 1.1; + # WEAK => 0.8; + # VERY_WEAK => 0.55; + + # The strength values are based on trial-and-error, and need to be + # tweaked occasionally to get desired results. Some comments: + # + # 1. Only relative strengths are important. small differences + # in strengths can make big formatting differences. + # 2. Each indentation level adds one unit of bond strength. + # 3. A value of NO_BREAK makes an unbreakable bond + # 4. A value of VERY_WEAK is the strength of a ',' + # 5. Values below NOMINAL are considered ok break points. + # 6. Values above NOMINAL are considered poor break points. + # + # The bond strengths should roughly follow precedence order where + # possible. If you make changes, please check the results very + # carefully on a variety of scripts. Testing with the -extrude + # options is particularly helpful in exercising all of the rules. - ############################################################### - # NOTE: NO_BREAK's set here are HINTS which may not be honored; - # essential NO_BREAKS's must be enforced in section 2, below. - ############################################################### + # Wherever possible, bond strengths are defined in the following + # tables. There are two main stages to setting bond strengths and + # two types of tables: + # + # The first stage involves looking at each token individually and + # defining left and right bond strengths, according to if we want + # to break to the left or right side, and how good a break point it + # is. For example tokens like =, ||, && make good break points and + # will have low strengths, but one might want to break on either + # side to put them at the end of one line or beginning of the next. + # + # The second stage involves looking at certain pairs of tokens and + # defining a bond strength for that particular pair. This second + # stage has priority. - # adding NEW_TOKENS: add a left and right bond strength by - # mimmicking what is done for an existing token type. You - # can skip this step at first and take the default, then - # tweak later to get desired results. + #--------------------------------------------------------------- + # Bond Strength BEGIN Section 1. + # Set left and right bond strengths of individual tokens. + #--------------------------------------------------------------- - # The bond strengths should roughly follow precenence order where - # possible. If you make changes, please check the results very - # carefully on a variety of scripts. + # NOTE: NO_BREAK's set in this section first are HINTS which will + # probably not be honored. Essential NO_BREAKS's should be set in + # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end + # of this subroutine. + + # Note that we are setting defaults in this section. The user + # cannot change bond strengths but can cause the left and right + # bond strengths of any token type to be swapped through the use of + # the -wba and -wbb flags. In this way the user can determine if a + # breakpoint token should appear at the end of one line or the + # beginning of the next line. + + # The hash keys in this section are token types, plus the text of + # certain keywords like 'or', 'and'. # no break around possible filehandle $left_bond_strength{'Z'} = NO_BREAK; @@ -13291,7 +14206,8 @@ sub terminal_type { # example print (STDERR, "bla"); will fail with break after ( $left_bond_strength{'w'} = NO_BREAK; - # blanks always have infinite strength to force breaks after real tokens + # blanks always have infinite strength to force breaks after + # real tokens $right_bond_strength{'b'} = NO_BREAK; # try not to break on exponentation @@ -13314,6 +14230,9 @@ sub terminal_type { $left_bond_strength{'->'} = STRONG; $right_bond_strength{'->'} = VERY_STRONG; + $left_bond_strength{'CORE::'} = NOMINAL; + $right_bond_strength{'CORE::'} = NO_BREAK; + # breaking AFTER modulus operator is ok: @_ = qw" % "; @left_bond_strength{@_} = (STRONG) x scalar(@_); @@ -13342,7 +14261,7 @@ sub terminal_type { $right_bond_strength{'.'} = STRONG; $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; - @_ = qw"} ] ) "; + @_ = qw"} ] ) R"; @left_bond_strength{@_} = (STRONG) x scalar(@_); @right_bond_strength{@_} = (NOMINAL) x scalar(@_); @@ -13376,18 +14295,20 @@ sub terminal_type { $left_bond_strength{'G'} = NOMINAL; $right_bond_strength{'G'} = STRONG; - # it is good to break AFTER various assignment operators + # assignment operators @_ = qw( = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= ); + + # Default is to break AFTER various assignment operators @left_bond_strength{@_} = (STRONG) x scalar(@_); @right_bond_strength{@_} = ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_); - # break BEFORE '&&' and '||' and '//' + # Default is to break BEFORE '&&' and '||' and '//' # set strength of '||' to same as '=' so that chains like # $a = $b || $c || $d will break before the first '||' $right_bond_strength{'||'} = NOMINAL; @@ -13424,6 +14345,11 @@ sub terminal_type { $left_bond_strength{','} = VERY_STRONG; $right_bond_strength{','} = VERY_WEAK; + # remaining digraphs and trigraphs not defined above + @_ = qw( :: <> ++ --); + @left_bond_strength{@_} = (WEAK) x scalar(@_); + @right_bond_strength{@_} = (STRONG) x scalar(@_); + # Set bond strengths of certain keywords # make 'or', 'err', 'and' slightly weaker than a ',' $left_bond_strength{'and'} = VERY_WEAK - 0.01; @@ -13434,37 +14360,204 @@ sub terminal_type { $right_bond_strength{'or'} = NOMINAL; $right_bond_strength{'err'} = NOMINAL; $right_bond_strength{'xor'} = STRONG; - } + + #--------------------------------------------------------------- + # Bond Strength BEGIN Section 2. + # Set binary rules for bond strengths between certain token types. + #--------------------------------------------------------------- + + # We have a little problem making tables which apply to the + # container tokens. Here is a list of container tokens and + # their types: + # + # type tokens // meaning + # { {, [, ( // indent + # } }, ], ) // outdent + # [ [ // left non-structural [ (enclosing an array index) + # ] ] // right non-structural square bracket + # ( ( // left non-structural paren + # ) ) // right non-structural paren + # L { // left non-structural curly brace (enclosing a key) + # R } // right non-structural curly brace + # + # Some rules apply to token types and some to just the token + # itself. We solve the problem by combining type and token into a + # new hash key for the container types. + # + # If a rule applies to a token 'type' then we need to make rules + # for each of these 'type.token' combinations: + # Type Type.Token + # { {{, {[, {( + # [ [[ + # ( (( + # L L{ + # } }}, }], }) + # ] ]] + # ) )) + # R R} + # + # If a rule applies to a token then we need to make rules for + # these 'type.token' combinations: + # Token Type.Token + # { {{, L{ + # [ {[, [[ + # ( {(, (( + # } }}, R} + # ] }], ]] + # ) }), )) + + # allow long lines before final { in an if statement, as in: + # if (.......... + # ..........) + # { + # + # Otherwise, the line before the { tends to be too short. + + $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03; + $binary_bond_strength{'(('}{'{{'} = NOMINAL; + + # break on something like '} (', but keep this stronger than a ',' + # example is in 'howe.pl' + $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; + $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; + + # keep matrix and hash indices together + # but make them a little below STRONG to allow breaking open + # something like {'some-word'}{'some-very-long-word'} at the }{ + # (bracebrk.t) + $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; + $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; + $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; + $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; + + # increase strength to the point where a break in the following + # will be after the opening paren rather than at the arrow: + # $a->$b($c); + $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG; + + $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + + $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; + $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; + $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; + $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; + + #--------------------------------------------------------------- + # Binary NO_BREAK rules + #--------------------------------------------------------------- + + # use strict requires that bare word and => not be separated + $binary_bond_strength{'C'}{'=>'} = NO_BREAK; + $binary_bond_strength{'U'}{'=>'} = NO_BREAK; + + # Never break between a bareword and a following paren because + # perl may give an error. For example, if a break is placed + # between 'to_filehandle' and its '(' the following line will + # give a syntax error [Carp.pm]: my( $no) =fileno( + # to_filehandle( $in)) ; + $binary_bond_strength{'C'}{'(('} = NO_BREAK; + $binary_bond_strength{'C'}{'{('} = NO_BREAK; + $binary_bond_strength{'U'}{'(('} = NO_BREAK; + $binary_bond_strength{'U'}{'{('} = NO_BREAK; + + # use strict requires that bare word within braces not start new + # line + $binary_bond_strength{'L{'}{'w'} = NO_BREAK; + + $binary_bond_strength{'w'}{'R}'} = NO_BREAK; + + # use strict requires that bare word and => not be separated + $binary_bond_strength{'w'}{'=>'} = NO_BREAK; + + # use strict does not allow separating type info from trailing { } + # testfile is readmail.pl + $binary_bond_strength{'t'}{'L{'} = NO_BREAK; + $binary_bond_strength{'i'}{'L{'} = NO_BREAK; + + # As a defensive measure, do not break between a '(' and a + # filehandle. In some cases, this can cause an error. For + # example, the following program works: + # my $msg="hi!\n"; + # print + # ( STDOUT + # $msg + # ); + # + # But this program fails: + # my $msg="hi!\n"; + # print + # ( + # STDOUT + # $msg + # ); + # + # This is normally only a problem with the 'extrude' option + $binary_bond_strength{'(('}{'Y'} = NO_BREAK; + $binary_bond_strength{'{('}{'Y'} = NO_BREAK; + + # never break between sub name and opening paren + $binary_bond_strength{'w'}{'(('} = NO_BREAK; + $binary_bond_strength{'w'}{'{('} = NO_BREAK; + + # keep '}' together with ';' + $binary_bond_strength{'}}'}{';'} = NO_BREAK; + + # Breaking before a ++ can cause perl to guess wrong. For + # example the following line will cause a syntax error + # with -extrude if we break between '$i' and '++' [fixstyle2] + # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); + $nobreak_lhs{'++'} = NO_BREAK; + + # Do not break before a possible file handle + $nobreak_lhs{'Z'} = NO_BREAK; + + # use strict hates bare words on any new line. For + # example, a break before the underscore here provokes the + # wrath of use strict: + # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { + $nobreak_rhs{'F'} = NO_BREAK; + $nobreak_rhs{'CORE::'} = NO_BREAK; + + #--------------------------------------------------------------- + # Bond Strength BEGIN Section 3. + # Define tables and values for applying a small bias to the above + # values. + #--------------------------------------------------------------- + # Adding a small 'bias' to strengths is a simple way to make a line + # break at the first of a sequence of identical terms. For + # example, to force long string of conditional operators to break + # with each line ending in a ':', we can add a small number to the + # bond strength of each ':' (colon.t) + @bias_tokens = qw( : && || f and or . ); # tokens which get bias + $delta_bias = 0.0001; # a very small strength level + + } ## end BEGIN # patch-its always ok to break at end of line $nobreak_to_go[$max_index_to_go] = 0; - # adding a small 'bias' to strengths is a simple way to make a line - # break at the first of a sequence of identical terms. For example, - # to force long string of conditional operators to break with - # each line ending in a ':', we can add a small number to the bond - # strength of each ':' - my $colon_bias = 0; - my $amp_bias = 0; - my $bar_bias = 0; - my $and_bias = 0; - my $or_bias = 0; - my $dot_bias = 0; - my $f_bias = 0; - my $code_bias = -.01; - my $type = 'b'; - my $token = ' '; + # we start a new set of bias values for each line + my %bias; + @bias{@bias_tokens} = (0) x scalar(@bias_tokens); + my $code_bias = -.01; # bias for closing block braces + + my $type = 'b'; + my $token = ' '; my $last_type; my $last_nonblank_type = $type; my $last_nonblank_token = $token; - my $delta_bias = 0.0001; my $list_str = $left_bond_strength{'?'}; my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, ); - # preliminary loop to compute bond strengths + # main loop to compute bond strengths between each pair of tokens for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) { $last_type = $type; if ( $type ne 'b' ) { @@ -13489,39 +14582,17 @@ sub terminal_type { $next_nonblank_type = $types_to_go[$i_next_nonblank]; $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - # Some token chemistry... The decision about where to break a - # line depends upon a "bond strength" between tokens. The LOWER - # the bond strength, the MORE likely a break. The strength - # values are based on trial-and-error, and need to be tweaked - # occasionally to get desired results. Things to keep in mind - # are: - # 1. relative strengths are important. small differences - # in strengths can make big formatting differences. - # 2. each indentation level adds one unit of bond strength - # 3. a value of NO_BREAK makes an unbreakable bond - # 4. a value of VERY_WEAK is the strength of a ',' - # 5. values below NOMINAL are considered ok break points - # 6. values above NOMINAL are considered poor break points # We are computing the strength of the bond between the current # token and the NEXT token. - my $bond_str = VERY_STRONG; # a default, high strength #--------------------------------------------------------------- - # section 1: - # use minimum of left and right bond strengths if defined; - # digraphs and trigraphs like to break on their left + # Bond Strength Section 1: + # First Approximation. + # Use minimum of individual left and right tabulated bond + # strengths. #--------------------------------------------------------------- my $bsr = $right_bond_strength{$type}; - - if ( !defined($bsr) ) { - - if ( $is_digraph{$type} || $is_trigraph{$type} ) { - $bsr = STRONG; - } - else { - $bsr = VERY_STRONG; - } - } + my $bsl = $left_bond_strength{$next_nonblank_type}; # define right bond strengths of certain keywords if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { @@ -13530,7 +14601,6 @@ sub terminal_type { elsif ( $token eq 'ne' or $token eq 'eq' ) { $bsr = NOMINAL; } - my $bsl = $left_bond_strength{$next_nonblank_type}; # set terminal bond strength to the nominal value # this will cause good preceding breaks to be retained @@ -13538,18 +14608,6 @@ sub terminal_type { $bsl = NOMINAL; } - if ( !defined($bsl) ) { - - if ( $is_digraph{$next_nonblank_type} - || $is_trigraph{$next_nonblank_type} ) - { - $bsl = WEAK; - } - else { - $bsl = VERY_STRONG; - } - } - # define right bond strengths of certain keywords if ( $next_nonblank_type eq 'k' && defined( $left_bond_strength{$next_nonblank_token} ) ) @@ -13565,220 +14623,52 @@ sub terminal_type { $bsl = 0.9 * NOMINAL + 0.1 * STRONG; } - # Note: it might seem that we would want to keep a NO_BREAK if - # either token has this value. This didn't work, because in an - # arrow list, it prevents the comma from separating from the - # following bare word (which is probably quoted by its arrow). - # So necessary NO_BREAK's have to be handled as special cases - # in the final section. - $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; + # Use the minimum of the left and right strengths. Note: it might + # seem that we would want to keep a NO_BREAK if either token has + # this value. This didn't work, for example because in an arrow + # list, it prevents the comma from separating from the following + # bare word (which is probably quoted by its arrow). So necessary + # NO_BREAK's have to be handled as special cases in the final + # section. + if ( !defined($bsr) ) { $bsr = VERY_STRONG } + if ( !defined($bsl) ) { $bsl = VERY_STRONG } + my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; my $bond_str_1 = $bond_str; #--------------------------------------------------------------- - # section 2: - # special cases + # Bond Strength Section 2: + # Apply hardwired rules.. #--------------------------------------------------------------- - # allow long lines before final { in an if statement, as in: - # if (.......... - # ..........) - # { + # Patch to put terminal or clauses on a new line: Weaken the bond + # at an || followed by die or similar keyword to make the terminal + # or clause fall on a new line, like this: # - # Otherwise, the line before the { tends to be too short. - if ( $type eq ')' ) { - if ( $next_nonblank_type eq '{' ) { - $bond_str = VERY_WEAK + 0.03; - } - } - - elsif ( $type eq '(' ) { - if ( $next_nonblank_type eq '{' ) { - $bond_str = NOMINAL; - } - } - - # break on something like '} (', but keep this stronger than a ',' - # example is in 'howe.pl' - elsif ( $type eq 'R' or $type eq '}' ) { - if ( $next_nonblank_type eq '(' ) { - $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK; - } - } - - #----------------------------------------------------------------- - # adjust bond strength bias - #----------------------------------------------------------------- - - # add any bias set by sub scan_list at old comma break points. - elsif ( $type eq ',' ) { - $bond_str += $bond_strength_to_go[$i]; - } - - elsif ( $type eq 'f' ) { - $bond_str += $f_bias; - $f_bias += $delta_bias; - } - - # in long ?: conditionals, bias toward just one set per line (colon.t) - elsif ( $type eq ':' ) { - if ( !$want_break_before{$type} ) { - $bond_str += $colon_bias; - $colon_bias += $delta_bias; - } - } - - if ( $next_nonblank_type eq ':' - && $want_break_before{$next_nonblank_type} ) - { - $bond_str += $colon_bias; - $colon_bias += $delta_bias; - } - - # if leading '.' is used, align all but 'short' quotes; - # the idea is to not place something like "\n" on a single line. - elsif ( $next_nonblank_type eq '.' ) { - if ( $want_break_before{'.'} ) { - unless ( - $last_nonblank_type eq '.' - && ( - length($token) <= - $rOpts_short_concatenation_item_length ) - && ( $token !~ /^[\)\]\}]$/ ) - ) - { - $dot_bias += $delta_bias; + # my $class = shift + # || die "Cannot add broadcast: No class identifier found"; + # + # Otherwise the break will be at the previous '=' since the || and + # = have the same starting strength and the or is biased, like + # this: + # + # my $class = + # shift || die "Cannot add broadcast: No class identifier found"; + # + # In any case if the user places a break at either the = or the || + # it should remain there. + if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) { + if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) { + if ( $want_break_before{$token} && $i > 0 ) { + $bond_strength_to_go[ $i - 1 ] -= $delta_bias; } - $bond_str += $dot_bias; - } - } - elsif ($next_nonblank_type eq '&&' - && $want_break_before{$next_nonblank_type} ) - { - $bond_str += $amp_bias; - $amp_bias += $delta_bias; - } - elsif ($next_nonblank_type eq '||' - && $want_break_before{$next_nonblank_type} ) - { - $bond_str += $bar_bias; - $bar_bias += $delta_bias; - } - elsif ( $next_nonblank_type eq 'k' ) { - - if ( $next_nonblank_token eq 'and' - && $want_break_before{$next_nonblank_token} ) - { - $bond_str += $and_bias; - $and_bias += $delta_bias; - } - elsif ($next_nonblank_token =~ /^(or|err)$/ - && $want_break_before{$next_nonblank_token} ) - { - $bond_str += $or_bias; - $or_bias += $delta_bias; - } - - # FIXME: needs more testing - elsif ( $is_keyword_returning_list{$next_nonblank_token} ) { - $bond_str = $list_str if ( $bond_str > $list_str ); - } - elsif ( $token eq 'err' - && !$want_break_before{$token} ) - { - $bond_str += $or_bias; - $or_bias += $delta_bias; - } - } - - if ( $type eq ':' - && !$want_break_before{$type} ) - { - $bond_str += $colon_bias; - $colon_bias += $delta_bias; - } - elsif ( $type eq '&&' - && !$want_break_before{$type} ) - { - $bond_str += $amp_bias; - $amp_bias += $delta_bias; - } - elsif ( $type eq '||' - && !$want_break_before{$type} ) - { - $bond_str += $bar_bias; - $bar_bias += $delta_bias; - } - elsif ( $type eq 'k' ) { - - if ( $token eq 'and' - && !$want_break_before{$token} ) - { - $bond_str += $and_bias; - $and_bias += $delta_bias; - } - elsif ( $token eq 'or' - && !$want_break_before{$token} ) - { - $bond_str += $or_bias; - $or_bias += $delta_bias; - } - } - - # keep matrix and hash indices together - # but make them a little below STRONG to allow breaking open - # something like {'some-word'}{'some-very-long-word'} at the }{ - # (bracebrk.t) - if ( ( $type eq ']' or $type eq 'R' ) - && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' ) - ) - { - $bond_str = 0.9 * STRONG + 0.1 * NOMINAL; - } - - if ( $next_nonblank_token =~ /^->/ ) { - - # increase strength to the point where a break in the following - # will be after the opening paren rather than at the arrow: - # $a->$b($c); - if ( $type eq 'i' ) { - $bond_str = 1.45 * STRONG; - } - - elsif ( $type =~ /^[\)\]\}R]$/ ) { - $bond_str = 0.1 * STRONG + 0.9 * NOMINAL; - } - - # otherwise make strength before an '->' a little over a '+' - else { - if ( $bond_str <= NOMINAL ) { - $bond_str = NOMINAL + 0.01; + else { + $bond_str -= $delta_bias; } } } - if ( $token eq ')' && $next_nonblank_token eq '[' ) { - $bond_str = 0.2 * STRONG + 0.8 * NOMINAL; - } - - # map1.t -- correct for a quirk in perl - if ( $token eq '(' - && $next_nonblank_type eq 'i' - && $last_nonblank_type eq 'k' - && $is_sort_map_grep{$last_nonblank_token} ) - - # /^(sort|map|grep)$/ ) - { - $bond_str = NO_BREAK; - } - - # extrude.t: do not break before paren at: - # -l pid_filename( - if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { - $bond_str = NO_BREAK; - } - # good to break after end of code blocks - if ( $type eq '}' && $block_type ) { + if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) { $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; $code_bias += $delta_bias; @@ -13793,10 +14683,12 @@ sub terminal_type { $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; } -# Don't break after keyword my. This is a quick fix for a -# rare problem with perl. An example is this line from file -# Container.pm: -# foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) ) + # Don't break after keyword my. This is a quick fix for a + # rare problem with perl. An example is this line from file + # Container.pm: + + # foreach my $question( Debian::DebConf::ConfigDb::gettree( + # $this->{'question'} ) ) if ( $token eq 'my' ) { $bond_str = NO_BREAK; @@ -13809,7 +14701,12 @@ sub terminal_type { $bond_str = VERY_WEAK; } - if ( $next_nonblank_type eq 'k' ) { + if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) { + + # FIXME: needs more testing + if ( $is_keyword_returning_list{$next_nonblank_token} ) { + $bond_str = $list_str if ( $bond_str > $list_str ); + } # keywords like 'unless', 'if', etc, within statements # make good breaks @@ -13823,42 +14720,33 @@ sub terminal_type { if ( $bond_str < STRONG ) { $bond_str = STRONG } } - #---------------------------------------------------------------------- - # only set NO_BREAK's from here on - #---------------------------------------------------------------------- - if ( $type eq 'C' or $type eq 'U' ) { + #--------------------------------------------------------------- + # Additional hardwired NOBREAK rules + #--------------------------------------------------------------- - # use strict requires that bare word and => not be separated - if ( $next_nonblank_type eq '=>' ) { - $bond_str = NO_BREAK; - } + # map1.t -- correct for a quirk in perl + if ( $token eq '(' + && $next_nonblank_type eq 'i' + && $last_nonblank_type eq 'k' + && $is_sort_map_grep{$last_nonblank_token} ) - # Never break between a bareword and a following paren because - # perl may give an error. For example, if a break is placed - # between 'to_filehandle' and its '(' the following line will - # give a syntax error [Carp.pm]: my( $no) =fileno( - # to_filehandle( $in)) ; - if ( $next_nonblank_token eq '(' ) { - $bond_str = NO_BREAK; - } + # /^(sort|map|grep)$/ ) + { + $bond_str = NO_BREAK; } - # use strict requires that bare word within braces not start new line - elsif ( $type eq 'L' ) { - - if ( $next_nonblank_type eq 'w' ) { - $bond_str = NO_BREAK; - } + # extrude.t: do not break before paren at: + # -l pid_filename( + if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { + $bond_str = NO_BREAK; } # in older version of perl, use strict can cause problems with # breaks before bare words following opening parens. For example, # this will fail under older versions if a break is made between - # '(' and 'MAIL': - # use strict; - # open( MAIL, "a long filename or command"); - # close MAIL; - elsif ( $type eq '{' ) { + # '(' and 'MAIL': use strict; open( MAIL, "a long filename or + # command"); close MAIL; + if ( $type eq '{' ) { if ( $token eq '(' && $next_nonblank_type eq 'w' ) { @@ -13873,9 +14761,6 @@ sub terminal_type { $next_next_type = $types_to_go[$i_next_next_nonblank]; } - ##if ( $next_next_type ne '=>' ) { - # these are ok: '->xxx', '=>', '(' - # We'll check for an old breakpoint and keep a leading # bareword if it was that way in the input file. # Presumably it was ok that way. For example, the @@ -13888,8 +14773,10 @@ sub terminal_type { # ); # # This should be sufficient: - if ( !$old_breakpoint_to_go[$i] - && ( $next_next_type eq ',' || $next_next_type eq '}' ) + if ( + !$old_breakpoint_to_go[$i] + && ( $next_next_type eq ',' + || $next_next_type eq '}' ) ) { $bond_str = NO_BREAK; @@ -13897,41 +14784,12 @@ sub terminal_type { } } - elsif ( $type eq 'w' ) { - - if ( $next_nonblank_type eq 'R' ) { - $bond_str = NO_BREAK; - } - - # use strict requires that bare word and => not be separated - if ( $next_nonblank_type eq '=>' ) { - $bond_str = NO_BREAK; - } - } - - # in fact, use strict hates bare words on any new line. For - # example, a break before the underscore here provokes the - # wrath of use strict: - # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { - elsif ( $type eq 'F' ) { - $bond_str = NO_BREAK; - } - - # use strict does not allow separating type info from trailing { } - # testfile is readmail.pl - elsif ( $type eq 't' or $type eq 'i' ) { - - if ( $next_nonblank_type eq 'L' ) { - $bond_str = NO_BREAK; - } - } - # Do not break between a possible filehandle and a ? or / and do # not introduce a break after it if there is no blank # (extrude.t) elsif ( $type eq 'Z' ) { - # dont break.. + # don't break.. if ( # if there is no blank and we do not want one. Examples: @@ -13951,47 +14809,12 @@ sub terminal_type { } } - # Do not break before a possible file handle - if ( $next_nonblank_type eq 'Z' ) { - $bond_str = NO_BREAK; - } - - # As a defensive measure, do not break between a '(' and a - # filehandle. In some cases, this can cause an error. For - # example, the following program works: - # my $msg="hi!\n"; - # print - # ( STDOUT - # $msg - # ); - # - # But this program fails: - # my $msg="hi!\n"; - # print - # ( - # STDOUT - # $msg - # ); - # - # This is normally only a problem with the 'extrude' option - if ( $next_nonblank_type eq 'Y' && $token eq '(' ) { - $bond_str = NO_BREAK; - } - - # Breaking before a ++ can cause perl to guess wrong. For - # example the following line will cause a syntax error - # with -extrude if we break between '$i' and '++' [fixstyle2] - # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); - elsif ( $next_nonblank_type eq '++' ) { - $bond_str = NO_BREAK; - } - # Breaking before a ? before a quote can cause trouble if # they are not separated by a blank. # Example: a syntax error occurs if you break before the ? here # my$logic=join$all?' && ':' || ',@regexps; # From: Professional_Perl_Programming_Code/multifind.pl - elsif ( $next_nonblank_type eq '?' ) { + if ( $next_nonblank_type eq '?' ) { $bond_str = NO_BREAK if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); } @@ -14016,22 +14839,109 @@ sub terminal_type { $bond_str = NO_BREAK; } } + my $bond_str_2 = $bond_str; - # keep '}' together with ';' - if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) { - $bond_str = NO_BREAK; + #--------------------------------------------------------------- + # End of hardwired rules + #--------------------------------------------------------------- + + #--------------------------------------------------------------- + # Bond Strength Section 3: + # Apply table rules. These have priority over the above + # hardwired rules. + #--------------------------------------------------------------- + + my $tabulated_bond_str; + my $ltype = $type; + my $rtype = $next_nonblank_type; + if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token } + if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) { + $rtype = $next_nonblank_type . $next_nonblank_token; + } + + if ( $binary_bond_strength{$ltype}{$rtype} ) { + $bond_str = $binary_bond_strength{$ltype}{$rtype}; + $tabulated_bond_str = $bond_str; + } + + if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) { + $bond_str = NO_BREAK; + $tabulated_bond_str = $bond_str; + } + my $bond_str_3 = $bond_str; + + # If the hardwired rules conflict with the tabulated bond + # strength then there is an inconsistency that should be fixed + FORMATTER_DEBUG_FLAG_BOND_TABLES + && $tabulated_bond_str + && $bond_str_1 + && $bond_str_1 != $bond_str_2 + && $bond_str_2 != $tabulated_bond_str + && do { + print STDERR +"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n"; + }; + + #----------------------------------------------------------------- + # Bond Strength Section 4: + # Modify strengths of certain tokens which often occur in sequence + # by adding a small bias to each one in turn so that the breaks + # occur from left to right. + # + # Note that we only changing strengths by small amounts here, + # and usually increasing, so we should not be altering any NO_BREAKs. + # Other routines which check for NO_BREAKs will use a tolerance + # of one to avoid any problem. + #----------------------------------------------------------------- + + # The bias tables use special keys + my $left_key = bias_table_key( $type, $token ); + my $right_key = + bias_table_key( $next_nonblank_type, $next_nonblank_token ); + + # add any bias set by sub scan_list at old comma break points. + if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] } + + # bias left token + elsif ( defined( $bias{$left_key} ) ) { + if ( !$want_break_before{$left_key} ) { + $bias{$left_key} += $delta_bias; + $bond_str += $bias{$left_key}; + } } - # never break between sub name and opening paren - if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) { - $bond_str = NO_BREAK; + # bias right token + if ( defined( $bias{$right_key} ) ) { + if ( $want_break_before{$right_key} ) { + + # for leading '.' align all but 'short' quotes; the idea + # is to not place something like "\n" on a single line. + if ( $right_key eq '.' ) { + unless ( + $last_nonblank_type eq '.' + && ( + length($token) <= + $rOpts_short_concatenation_item_length ) + && ( $token !~ /^[\)\]\}]$/ ) + ) + { + $bias{$right_key} += $delta_bias; + } + } + else { + $bias{$right_key} += $delta_bias; + } + $bond_str += $bias{$right_key}; + } } + my $bond_str_4 = $bond_str; #--------------------------------------------------------------- - # section 3: - # now take nesting depth into account + # Bond Strength Section 5: + # Fifth Approximation. + # Take nesting depth into account by adding the nesting depth + # to the bond strength. #--------------------------------------------------------------- - # final strength incorporates the bond strength and nesting depth my $strength; if ( defined($bond_str) && !$nobreak_to_go[$i] ) { @@ -14054,12 +14964,11 @@ sub terminal_type { FORMATTER_DEBUG_FLAG_BOND && do { my $str = substr( $token, 0, 15 ); $str .= ' ' x ( 16 - length($str) ); - print -"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n"; + print STDOUT +"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n"; }; - } - } - + } ## end main loop + } ## end sub set_bond_strengths } sub pad_array_to_go { @@ -14099,16 +15008,16 @@ sub pad_array_to_go { { # begin scan_list my ( - $block_type, $current_depth, - $depth, $i, - $i_last_nonblank_token, $last_colon_sequence_number, - $last_nonblank_token, $last_nonblank_type, - $last_old_breakpoint_count, $minimum_depth, - $next_nonblank_block_type, $next_nonblank_token, - $next_nonblank_type, $old_breakpoint_count, - $starting_breakpoint_count, $starting_depth, - $token, $type, - $type_sequence, + $block_type, $current_depth, + $depth, $i, + $i_last_nonblank_token, $last_colon_sequence_number, + $last_nonblank_token, $last_nonblank_type, + $last_nonblank_block_type, $last_old_breakpoint_count, + $minimum_depth, $next_nonblank_block_type, + $next_nonblank_token, $next_nonblank_type, + $old_breakpoint_count, $starting_breakpoint_count, + $starting_depth, $token, + $type, $type_sequence, ); my ( @@ -14236,8 +15145,9 @@ sub pad_array_to_go { # Also put a break before the first comma if # (1) there was a break there in the input, and - # (2) that was exactly one previous break in the input - # (3) there are multiple old comma breaks + # (2) there was exactly one old break before the first comma break + # (3) OLD: there are multiple old comma breaks + # (3) NEW: there are one or more old comma breaks (see return example) # # For example, we will follow the user and break after # 'print' in this snippet: @@ -14246,7 +15156,19 @@ sub pad_array_to_go { # "\t", $have, " is ", text_unit($hu), "\n", # "\t", $want, " is ", text_unit($wu), "\n", # ; - # But we will not force a break after the first comma here + # + # Another example, just one comma, where we will break after + # the return: + # return + # $x * cos($a) - $y * sin($a), + # $x * sin($a) + $y * cos($a); + + # Breaking a print statement: + # print SAVEOUT + # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "", + # ( $? & 128 ) ? " -- core dumped" : "", "\n"; + # + # But we will not force a break after the opening paren here # (causes a blinker): # $heap->{stream}->set_output_filter( # poe::filter::reference->new('myotherfreezer') ), @@ -14265,9 +15187,18 @@ sub pad_array_to_go { if ( $levels_to_go[$ii] == $level_comma ); } } - if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 1 ) + + # Changed rule from multiple old commas to just one here: + if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 ) { - set_forced_breakpoint($ibreak); + # Do not to break before an opening token because + # it can lead to "blinkers". + my $ibreakm = $ibreak; + $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' ); + if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ ) + { + set_forced_breakpoint($ibreak); + } } } } @@ -14349,6 +15280,10 @@ sub pad_array_to_go { $type = ';'; $type_sequence = ''; + my $total_depth_variation = 0; + my $i_old_assignment_break; + my $depth_last = $starting_depth; + check_for_new_minimum_depth($current_depth); my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; @@ -14365,7 +15300,7 @@ sub pad_array_to_go { $last_nonblank_type = $type; $last_nonblank_token = $token; $last_nonblank_block_type = $block_type; - } + } ## end if ( $type ne 'b' ) $type = $types_to_go[$i]; $block_type = $block_type_to_go[$i]; $token = $tokens_to_go[$i]; @@ -14409,8 +15344,8 @@ sub pad_array_to_go { # as '}') which forms a one-line block, this break might # get undone. $want_previous_breakpoint = $i; - } - } + } ## end if ( $next_nonblank_type...) + } ## end if ($rOpts_break_at_old_keyword_breakpoints) # Break before attributes if user broke there if ($rOpts_break_at_old_attribute_breakpoints) { @@ -14418,10 +15353,21 @@ sub pad_array_to_go { $want_previous_breakpoint = $i; } } - } + + # remember an = break as possible good break point + if ( $is_assignment{$type} ) { + $i_old_assignment_break = $i; + } + elsif ( $is_assignment{$next_nonblank_type} ) { + $i_old_assignment_break = $i_next_nonblank; + } + } ## end if ( $old_breakpoint_to_go...) next if ( $type eq 'b' ); $depth = $nesting_depth_to_go[ $i + 1 ]; + $total_depth_variation += abs( $depth - $depth_last ); + $depth_last = $depth; + # safety check - be sure we always break after a comment # Shouldn't happen .. an error here probably means that the # nobreak flag did not get turned off correctly during @@ -14434,8 +15380,8 @@ sub pad_array_to_go { report_definite_bug(); $nobreak_to_go[$i] = 0; set_forced_breakpoint($i); - } - } + } ## end if ( $i != $max_index_to_go) + } ## end if ( $type eq '#' ) # Force breakpoints at certain tokens in long lines. # Note that such breakpoints will be undone later if these tokens @@ -14465,7 +15411,7 @@ sub pad_array_to_go { ) { set_forced_breakpoint( $i - 1 ); - } + } ## end if ( $type eq 'k' && $i...) # remember locations of '||' and '&&' for possible breaks if we # decide this is a long logical expression. @@ -14474,13 +15420,13 @@ sub pad_array_to_go { ++$has_old_logical_breakpoints[$depth] if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ); - } + } ## end if ( $type eq '||' ) elsif ( $type eq '&&' ) { push @{ $rand_or_list[$depth][3] }, $i; ++$has_old_logical_breakpoints[$depth] if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ); - } + } ## end elsif ( $type eq '&&' ) elsif ( $type eq 'f' ) { push @{ $rfor_semicolon_list[$depth] }, $i; } @@ -14490,7 +15436,7 @@ sub pad_array_to_go { ++$has_old_logical_breakpoints[$depth] if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ); - } + } ## end if ( $token eq 'and' ) # break immediately at 'or's which are probably not in a logical # block -- but we will break in logical breaks below so that @@ -14509,8 +15455,8 @@ sub pad_array_to_go { { $saw_good_breakpoint = 1; } - } - } + } ## end else [ if ( $is_logical_container...)] + } ## end elsif ( $token eq 'or' ) elsif ( $token eq 'if' || $token eq 'unless' ) { push @{ $rand_or_list[$depth][4] }, $i; if ( ( $i == $i_line_start || $i == $i_line_end ) @@ -14518,8 +15464,8 @@ sub pad_array_to_go { { set_forced_breakpoint($i); } - } - } + } ## end elsif ( $token eq 'if' ||...) + } ## end elsif ( $type eq 'k' ) elsif ( $is_assignment{$type} ) { $i_equals[$depth] = $i; } @@ -14536,22 +15482,21 @@ sub pad_array_to_go { && $rOpts_break_at_old_ternary_breakpoints ) { - # TESTING: set_forced_breakpoint($i); # break at previous '=' if ( $i_equals[$depth] > 0 ) { set_forced_breakpoint( $i_equals[$depth] ); $i_equals[$depth] = -1; - } - } - } + } + } ## end if ( ( $i == $i_line_start...)) + } ## end if ( $type eq ':' ) if ( defined( $postponed_breakpoint{$type_sequence} ) ) { my $inc = ( $type eq ':' ) ? 0 : 1; set_forced_breakpoint( $i - $inc ); delete $postponed_breakpoint{$type_sequence}; } - } + } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(]) # set breaks at ?/: if they will get separated (and are # not a ?/: chain), or if the '?' is at the end of the @@ -14580,9 +15525,9 @@ sub pad_array_to_go { || $tokens_to_go[$max_index_to_go] eq '#' ); set_closing_breakpoint($i); - } - } - } + } ## end if ( $i_colon <= 0 ||...) + } ## end elsif ( $token eq '?' ) + } ## end if ($type_sequence) #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; @@ -14657,13 +15602,13 @@ sub pad_array_to_go { # and user wants brace to left && !$rOpts->{'opening-brace-always-on-right'} - && ( $type eq '{' ) # should be true + && ( $type eq '{' ) # should be true && ( $token eq '{' ) # should be true ) { set_forced_breakpoint( $i - 1 ); - } - } + } ## end if ( $block_type && ( ...)) + } ## end if ( $depth > $current_depth) #------------------------------------------------------------ # Handle Decreasing Depth.. @@ -14690,7 +15635,7 @@ sub pad_array_to_go { && !$rOpts->{'opening-brace-always-on-right'} ) { set_forced_breakpoint($i); - } + } ## end if ( $token eq ')' && ... #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; @@ -14704,8 +15649,30 @@ sub pad_array_to_go { # this term is long if we had to break at interior commas.. my $is_long_term = $bp_count > 0; - # ..or if the length between opening and closing parens exceeds - # allowed line length + # If this is a short container with one or more comma arrows, + # then we will mark it as a long term to open it if requested. + # $rOpts_comma_arrow_breakpoints = + # 0 - open only if comma precedes closing brace + # 1 - stable: except for one line blocks + # 2 - try to form 1 line blocks + # 3 - ignore => + # 4 - always open up if vt=0 + # 5 - stable: even for one line blocks if vt=0 + if ( !$is_long_term + && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/ + && $index_before_arrow[ $depth + 1 ] > 0 + && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } + ) + { + $is_long_term = $rOpts_comma_arrow_breakpoints == 4 + || ( $rOpts_comma_arrow_breakpoints == 0 + && $last_nonblank_token eq ',' ) + || ( $rOpts_comma_arrow_breakpoints == 5 + && $old_breakpoint_to_go[$i_opening] ); + } ## end if ( !$is_long_term &&...) + + # mark term as long if the length between opening and closing + # parens exceeds allowed line length if ( !$is_long_term && $saw_opening_structure ) { my $i_opening_minus = find_token_starting_list($i_opening); @@ -14714,7 +15681,7 @@ sub pad_array_to_go { # semicolon, hence the '>=' here (oneline.t) $is_long_term = excess_line_length( $i_opening_minus, $i ) >= 0; - } + } ## end if ( !$is_long_term &&...) # We've set breaks after all comma-arrows. Now we have to # undo them if this can be a one-line block @@ -14723,6 +15690,7 @@ sub pad_array_to_go { # user doesn't require breaking after all comma-arrows ( $rOpts_comma_arrow_breakpoints != 0 ) + && ( $rOpts_comma_arrow_breakpoints != 4 ) # and if the opening structure is in this batch && $saw_opening_structure @@ -14748,7 +15716,7 @@ sub pad_array_to_go { { undo_forced_breakpoint_stack( $breakpoint_undo_stack[$current_depth] ); - } + } ## end if ( ( $rOpts_comma_arrow_breakpoints...)) # now see if we have any comma breakpoints left my $has_comma_breakpoints = @@ -14864,7 +15832,7 @@ sub pad_array_to_go { else { set_logical_breakpoints($current_depth); } - } + } ## end if ( $item_count_stack...) if ( $is_long_term && @{ $rfor_semicolon_list[$current_depth] } ) @@ -14875,7 +15843,7 @@ sub pad_array_to_go { # leading term alignment unless -lp is used. $has_comma_breakpoints = 1 unless $rOpts_line_up_parentheses; - } + } ## end if ( $is_long_term && ...) if ( @@ -14941,9 +15909,9 @@ sub pad_array_to_go { if ( $test2 == $test1 ) { set_forced_breakpoint( $i_start_2 - 1 ); } - } - } - } + } ## end if ( defined($i_start_2...)) + } ## end if ( defined($item) ) + } ## end if ( $rOpts_line_up_parentheses...) # break after opening structure. # note: break before closing structure will be automatic @@ -14953,12 +15921,17 @@ sub pad_array_to_go { unless ( $do_not_break_apart || is_unbreakable_container($current_depth) ); + # break at ',' of lower depth level before opening token + if ( $last_comma_index[$depth] ) { + set_forced_breakpoint( $last_comma_index[$depth] ); + } + # break at '.' of lower depth level before opening token if ( $last_dot_index[$depth] ) { set_forced_breakpoint( $last_dot_index[$depth] ); } - # break before opening structure if preeced by another + # break before opening structure if preceded by another # closing structure and a comma. This is normally # done by the previous closing brace, but not # if it was a one-line block. @@ -14983,9 +15956,9 @@ sub pad_array_to_go { if ( $want_break_before{$token_prev} ) { set_forced_breakpoint($i_prev); } - } - } - } + } ## end elsif ( $types_to_go[$i_prev...]) + } ## end if ( $i_opening > 2 ) + } ## end if ( $minimum_depth <=...) # break after comma following closing structure if ( $next_type eq ',' ) { @@ -15000,7 +15973,7 @@ sub pad_array_to_go { ) { set_forced_breakpoint($i); - } + } ## end if ( $is_assignment{$next_nonblank_type...}) # break at any comma before the opening structure Added # for -lp, but seems to be good in general. It isn't @@ -15029,8 +16002,9 @@ sub pad_array_to_go { # must set fake breakpoint to alert outer containers that # they are complex set_fake_breakpoint(); - } - } + } ## end elsif ($is_long_term) + + } ## end elsif ( $depth < $current_depth) #------------------------------------------------------------ # Handle this token @@ -15046,7 +16020,7 @@ sub pad_array_to_go { $want_comma_break[$depth] = 1; $index_before_arrow[$depth] = $i_last_nonblank_token; next; - } + } ## end if ( $type eq '=>' ) elsif ( $type eq '.' ) { $last_dot_index[$depth] = $i; @@ -15064,7 +16038,7 @@ sub pad_array_to_go { $dont_align[$depth] = 1; $want_comma_break[$depth] = 0; $index_before_arrow[$depth] = -1; - } + } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...)) # now just handle any commas next unless ( $type eq ',' ); @@ -15077,9 +16051,11 @@ sub pad_array_to_go { if ( $want_comma_break[$depth] ) { if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { - $want_comma_break[$depth] = 0; - $index_before_arrow[$depth] = -1; - next; + if ($rOpts_comma_arrow_breakpoints) { + $want_comma_break[$depth] = 0; + ##$index_before_arrow[$depth] = -1; + next; + } } set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); @@ -15098,17 +16074,28 @@ sub pad_array_to_go { && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) { if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } - if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } + if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { # don't break pointer calls, such as the following: # File::Spec->curdir => 1, # (This is tokenized as adjacent 'w' tokens) - if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { + ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { + + # And don't break before a comma, as in the following: + # ( LONGER_THAN,=> 1, + # EIGHTY_CHARACTERS,=> 2, + # CAUSES_FORMATTING,=> 3, + # LIKE_THIS,=> 4, + # ); + # This example is for -tso but should be general rule + if ( $tokens_to_go[ $ibreak + 1 ] ne '->' + && $tokens_to_go[ $ibreak + 1 ] ne ',' ) + { set_forced_breakpoint($ibreak); } - } - } + } ## end if ( $types_to_go[$ibreak...]) + } ## end if ( $ibreak > 0 && $tokens_to_go...) $want_comma_break[$depth] = 0; $index_before_arrow[$depth] = -1; @@ -15117,7 +16104,7 @@ sub pad_array_to_go { # treat any list items so far as an interrupted list $interrupted_list[$depth] = 1; next; - } + } ## end if ( $want_comma_break...) # break after all commas above starting depth if ( $depth < $starting_depth && !$dont_align[$depth] ) { @@ -15140,14 +16127,14 @@ sub pad_array_to_go { { $dont_align[$depth] = 1; } - } + } ## end if ( $item_count == 0 ) $comma_index[$depth][$item_count] = $i; ++$item_count_stack[$depth]; if ( $last_nonblank_type =~ /^[iR\]]$/ ) { $identifier_count_stack[$depth]++; } - } + } ## end while ( ++$i <= $max_index_to_go) #------------------------------------------- # end of loop over all tokens in this batch @@ -15175,7 +16162,7 @@ sub pad_array_to_go { && $i_opening >= $max_index_to_go - 2 && $token =~ /^['"]$/ ) ); - } + } ## end for ( my $dd = $current_depth...) # Return a flag indicating if the input file had some good breakpoints. # This flag will be used to force a break in a line shorter than the @@ -15183,8 +16170,24 @@ sub pad_array_to_go { if ( $has_old_logical_breakpoints[$current_depth] ) { $saw_good_breakpoint = 1; } + + # A complex line with one break at an = has a good breakpoint. + # This is not complex ($total_depth_variation=0): + # $res1 + # = 10; + # + # This is complex ($total_depth_variation=6): + # $res2 = + # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); + elsif ($i_old_assignment_break + && $total_depth_variation > 4 + && $old_breakpoint_count == 1 ) + { + $saw_good_breakpoint = 1; + } ## end elsif ( $i_old_assignment_break...) + return $saw_good_breakpoint; - } + } ## end sub scan_list } # end scan_list sub find_token_starting_list { @@ -15403,13 +16406,13 @@ sub find_token_starting_list { } #my ( $a, $b, $c ) = caller(); -#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count +#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; #--------------------------------------------------------------- # Interrupted List Rule: - # A list is is forced to use old breakpoints if it was interrupted + # A list is forced to use old breakpoints if it was interrupted # by side comments or blank lines, or requested by user. #--------------------------------------------------------------- if ( $rOpts_break_at_old_comma_breakpoints @@ -15453,7 +16456,8 @@ sub find_token_starting_list { # exceeds the available space after the '('. my $need_lp_break_open = $must_break_open; if ( $rOpts_line_up_parentheses && !$must_break_open ) { - my $columns_if_unbroken = $rOpts_maximum_line_length - + my $columns_if_unbroken = + maximum_line_length($i_opening_minus) - total_line_length( $i_opening_minus, $i_opening_paren ); $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken ) @@ -15692,7 +16696,7 @@ sub find_token_starting_list { # or if this is a sublist of a larger list || $in_hierarchical_list - # or if multiple commas and we dont have a long first or last + # or if multiple commas and we don't have a long first or last # term || ( $comma_count > 1 && !( $long_last_term || $long_first_term ) ) @@ -15768,18 +16772,18 @@ sub find_token_starting_list { # as a table for relatively small parenthesized lists. These # are usually easier to read if not formatted as tables. if ( - $packed_lines <= 2 # probably can fit in 2 lines - && $item_count < 9 # doesn't have too many items + $packed_lines <= 2 # probably can fit in 2 lines + && $item_count < 9 # doesn't have too many items && $opening_environment eq 'BLOCK' # not a sub-container - && $opening_token eq '(' # is paren list + && $opening_token eq '(' # is paren list ) { # Shortcut method 1: for -lp and just one comma: # This is a no-brainer, just break at the comma. if ( - $rOpts_line_up_parentheses # -lp - && $item_count == 2 # two items, one comma + $rOpts_line_up_parentheses # -lp + && $item_count == 2 # two items, one comma && !$must_break_open ) { @@ -15828,7 +16832,7 @@ sub find_token_starting_list { # debug stuff FORMATTER_DEBUG_FLAG_SPARSE && do { - print + print STDOUT "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; }; @@ -16135,7 +17139,8 @@ sub get_maximum_fields_wanted { sub table_columns_available { my $i_first_comma = shift; my $columns = - $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma); + maximum_line_length($i_first_comma) - + leading_spaces_to_go($i_first_comma); # Patch: the vertical formatter does not line up lines whose lengths # exactly equal the available line length because of allowances @@ -16213,9 +17218,8 @@ sub set_nobreaks { FORMATTER_DEBUG_FLAG_NOBREAK && do { my ( $a, $b, $c ) = caller(); - print( -"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n" - ); + print STDOUT +"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"; }; @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); @@ -16225,9 +17229,8 @@ sub set_nobreaks { else { FORMATTER_DEBUG_FLAG_NOBREAK && do { my ( $a, $b, $c ) = caller(); - print( -"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n" - ); + print STDOUT + "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"; }; } } @@ -16261,8 +17264,8 @@ sub set_forced_breakpoint { FORMATTER_DEBUG_FLAG_FORCE && do { my ( $a, $b, $c ) = caller(); - print -"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; + print STDOUT +"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; }; if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { @@ -16307,9 +17310,8 @@ sub undo_forced_breakpoint_stack { FORMATTER_DEBUG_FLAG_UNDOBP && do { my ( $a, $b, $c ) = caller(); - print( -"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n" - ); + print STDOUT +"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"; }; } @@ -16317,9 +17319,8 @@ sub undo_forced_breakpoint_stack { else { FORMATTER_DEBUG_FLAG_UNDOBP && do { my ( $a, $b, $c ) = caller(); - print( -"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go" - ); + print STDOUT +"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"; }; } } @@ -16330,6 +17331,8 @@ sub undo_forced_breakpoint_stack { my %is_amp_amp; my %is_ternary; my %is_math_op; + my %is_plus_minus; + my %is_mult_div; BEGIN { @@ -16341,20 +17344,72 @@ sub undo_forced_breakpoint_stack { @_ = qw( + - * / ); @is_math_op{@_} = (1) x scalar(@_); + + @_ = qw( + - ); + @is_plus_minus{@_} = (1) x scalar(@_); + + @_ = qw( * / ); + @is_mult_div{@_} = (1) x scalar(@_); + } + + sub DUMP_BREAKPOINTS { + + # Debug routine to dump current breakpoints...not normally called + # We are given indexes to the current lines: + # $ri_beg = ref to array of BEGinning indexes of each line + # $ri_end = ref to array of ENDing indexes of each line + my ( $ri_beg, $ri_end, $msg ) = @_; + print STDERR "----Dumping breakpoints from: $msg----\n"; + for my $n ( 0 .. @{$ri_end} - 1 ) { + my $ibeg = $$ri_beg[$n]; + my $iend = $$ri_end[$n]; + my $text = ""; + foreach my $i ( $ibeg .. $iend ) { + $text .= $tokens_to_go[$i]; + } + print STDERR "$n ($ibeg:$iend) $text\n"; + } + print STDERR "----\n"; } sub recombine_breakpoints { # sub set_continuation_breaks is very liberal in setting line breaks # for long lines, always setting breaks at good breakpoints, even - # when that creates small lines. Occasionally small line fragments + # when that creates small lines. Sometimes small line fragments # are produced which would look better if they were combined. - # That's the task of this routine, recombine_breakpoints. + # That's the task of this routine. # + # We are given indexes to the current lines: # $ri_beg = ref to array of BEGinning indexes of each line # $ri_end = ref to array of ENDing indexes of each line my ( $ri_beg, $ri_end ) = @_; + # Make a list of all good joining tokens between the lines + # n-1 and n. + my @joint; + my $nmax = @$ri_end - 1; + for my $n ( 1 .. $nmax ) { + my $ibeg_1 = $$ri_beg[ $n - 1 ]; + my $iend_1 = $$ri_end[ $n - 1 ]; + my $iend_2 = $$ri_end[$n]; + my $ibeg_2 = $$ri_beg[$n]; + + my ( $itok, $itokp, $itokm ); + + foreach my $itest ( $iend_1, $ibeg_2 ) { + my $type = $types_to_go[$itest]; + if ( $is_math_op{$type} + || $is_amp_amp{$type} + || $is_assignment{$type} + || $type eq ':' ) + { + $itok = $itest; + } + } + $joint[$n] = [$itok]; + } + my $more_to_do = 1; # We keep looping over all of the lines of this batch @@ -16366,12 +17421,13 @@ sub undo_forced_breakpoint_stack { my $n; my $nmax = @$ri_end - 1; - # safety check for infinite loop + # Safety check for infinite loop unless ( $nmax < $nmax_last ) { - # shouldn't happen because splice below decreases nmax on each pass: - # but i get paranoid sometimes - die "Program bug-infinite loop in recombine breakpoints\n"; + # Shouldn't happen because splice below decreases nmax on each + # pass. + Perl::Tidy::Die + "Program bug-infinite loop in recombine breakpoints\n"; } $nmax_last = $nmax; $more_to_do = 0; @@ -16394,7 +17450,7 @@ sub undo_forced_breakpoint_stack { # ^ # | # We want to decide if we should remove the line break - # betwen the tokens at $iend_1 and $ibeg_2 + # between the tokens at $iend_1 and $ibeg_2 # # We will apply a number of ad-hoc tests to see if joining # here will look ok. The code will just issue a 'next' @@ -16403,13 +17459,17 @@ sub undo_forced_breakpoint_stack { #---------------------------------------------------------- # # beginning and ending tokens of the lines we are working on - my $ibeg_1 = $$ri_beg[ $n - 1 ]; - my $iend_1 = $$ri_end[ $n - 1 ]; - my $iend_2 = $$ri_end[$n]; - my $ibeg_2 = $$ri_beg[$n]; - + my $ibeg_1 = $$ri_beg[ $n - 1 ]; + my $iend_1 = $$ri_end[ $n - 1 ]; + my $iend_2 = $$ri_end[$n]; + my $ibeg_2 = $$ri_beg[$n]; my $ibeg_nmax = $$ri_beg[$nmax]; + my $type_iend_1 = $types_to_go[$iend_1]; + my $type_iend_2 = $types_to_go[$iend_2]; + my $type_ibeg_1 = $types_to_go[$ibeg_1]; + my $type_ibeg_2 = $types_to_go[$ibeg_2]; + # some beginning indexes of other lines, which may not exist my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1; my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1; @@ -16420,33 +17480,216 @@ sub undo_forced_breakpoint_stack { #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - # $nesting_depth_to_go[$ibeg_1] ); -##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n"; + FORMATTER_DEBUG_FLAG_RECOMBINE && do { + print STDERR +"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; + }; # If line $n is the last line, we set some flags and # do any special checks for it if ( $n == $nmax ) { # a terminal '{' should stay where it is - next if $types_to_go[$ibeg_2] eq '{'; + next if $type_ibeg_2 eq '{'; # set flag if statement $n ends in ';' - $this_line_is_semicolon_terminated = - $types_to_go[$iend_2] eq ';' + $this_line_is_semicolon_terminated = $type_iend_2 eq ';' # with possible side comment - || ( $types_to_go[$iend_2] eq '#' + || ( $type_iend_2 eq '#' && $iend_2 - $ibeg_2 >= 2 && $types_to_go[ $iend_2 - 2 ] eq ';' && $types_to_go[ $iend_2 - 1 ] eq 'b' ); } #---------------------------------------------------------- - # Section 1: examine token at $iend_1 (right end of first line - # of pair) + # Recombine Section 1: + # Examine the special token joining this line pair, if any. + # Put as many tests in this section to avoid duplicate code and + # to make formatting independent of whether breaks are to the + # left or right of an operator. + #---------------------------------------------------------- + + my ($itok) = @{ $joint[$n] }; + if ($itok) { + + # FIXME: Patch - may not be necessary + my $iend_1 = + $type_iend_1 eq 'b' + ? $iend_1 - 1 + : $iend_1; + + my $iend_2 = + $type_iend_2 eq 'b' + ? $iend_2 - 1 + : $iend_2; + ## END PATCH + + my $type = $types_to_go[$itok]; + + if ( $type eq ':' ) { + + # do not join at a colon unless it disobeys the break request + if ( $itok eq $iend_1 ) { + next unless $want_break_before{$type}; + } + else { + $leading_amp_count++; + next if $want_break_before{$type}; + } + } ## end if ':' + + # handle math operators + - * / + elsif ( $is_math_op{$type} ) { + + # Combine these lines if this line is a single + # number, or if it is a short term with same + # operator as the previous line. For example, in + # the following code we will combine all of the + # short terms $A, $B, $C, $D, $E, $F, together + # instead of leaving them one per line: + # my $time = + # $A * $B * $C * $D * $E * $F * + # ( 2. * $eps * $sigma * $area ) * + # ( 1. / $tcold**3 - 1. / $thot**3 ); + + # This can be important in math-intensive code. + + my $good_combo; + + my $itokp = min( $inext_to_go[$itok], $iend_2 ); + my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); + my $itokm = max( $iprev_to_go[$itok], $ibeg_1 ); + my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 ); + + # check for a number on the right + if ( $types_to_go[$itokp] eq 'n' ) { + + # ok if nothing else on right + if ( $itokp == $iend_2 ) { + $good_combo = 1; + } + else { + + # look one more token to right.. + # okay if math operator or some termination + $good_combo = + ( ( $itokpp == $iend_2 ) + && $is_math_op{ $types_to_go[$itokpp] } ) + || $types_to_go[$itokpp] =~ /^[#,;]$/; + } + } + + # check for a number on the left + if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { + + # okay if nothing else to left + if ( $itokm == $ibeg_1 ) { + $good_combo = 1; + } + + # otherwise look one more token to left + else { + + # okay if math operator, comma, or assignment + $good_combo = ( $itokmm == $ibeg_1 ) + && ( $is_math_op{ $types_to_go[$itokmm] } + || $types_to_go[$itokmm] =~ /^[,]$/ + || $is_assignment{ $types_to_go[$itokmm] } + ); + } + } + + # look for a single short token either side of the + # operator + if ( !$good_combo ) { + + # Slight adjustment factor to make results + # independent of break before or after operator in + # long summed lists. (An operator and a space make + # two spaces). + my $two = ( $itok eq $iend_1 ) ? 2 : 0; + + $good_combo = + + # numbers or id's on both sides of this joint + $types_to_go[$itokp] =~ /^[in]$/ + && $types_to_go[$itokm] =~ /^[in]$/ + + # one of the two lines must be short: + && ( + ( + # no more than 2 nonblank tokens right of + # joint + $itokpp == $iend_2 + + # short + && token_sequence_length( $itokp, $iend_2 ) + < $two + + $rOpts_short_concatenation_item_length + ) + || ( + # no more than 2 nonblank tokens left of + # joint + $itokmm == $ibeg_1 + + # short + && token_sequence_length( $ibeg_1, $itokm ) + < 2 - $two + + $rOpts_short_concatenation_item_length + ) + + ) + + # keep pure terms; don't mix +- with */ + && !( + $is_plus_minus{$type} + && ( $is_mult_div{ $types_to_go[$itokmm] } + || $is_mult_div{ $types_to_go[$itokpp] } ) + ) + && !( + $is_mult_div{$type} + && ( $is_plus_minus{ $types_to_go[$itokmm] } + || $is_plus_minus{ $types_to_go[$itokpp] } ) + ) + + ; + } + + # it is also good to combine if we can reduce to 2 lines + if ( !$good_combo ) { + + # index on other line where same token would be in a + # long chain. + my $iother = + ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; + + $good_combo = + $n == 2 + && $n == $nmax + && $types_to_go[$iother] ne $type; + } + + next unless ($good_combo); + + } ## end math + + elsif ( $is_amp_amp{$type} ) { + ##TBD + } ## end &&, || + + elsif ( $is_assignment{$type} ) { + ##TBD + } ## end assignment + } + + #---------------------------------------------------------- + # Recombine Section 2: + # Examine token at $iend_1 (right end of first line of pair) #---------------------------------------------------------- # an isolated '}' may join with a ';' terminated segment - if ( $types_to_go[$iend_1] eq '}' ) { + if ( $type_iend_1 eq '}' ) { # Check for cases where combining a semicolon terminated # statement with a previous isolated closing paren will @@ -16474,21 +17717,35 @@ sub undo_forced_breakpoint_stack { # PARAM2 => 'bar' # ) or die "Some_method didn't work"; # + # But we do not want to do this for something like the -lp + # option where the paren is not outdentable because the + # trailing clause will be far to the right. + # + # The logic here is synchronized with the logic in sub + # sub set_adjusted_indentation, which actually does + # the outdenting. + # $previous_outdentable_closing_paren = - $this_line_is_semicolon_terminated # ends in ';' - && $ibeg_1 == $iend_1 # only one token on last line - && $tokens_to_go[$iend_1] eq - ')' # must be structural paren + $this_line_is_semicolon_terminated + + # only one token on last line + && $ibeg_1 == $iend_1 + + # must be structural paren + && $tokens_to_go[$iend_1] eq ')' - # only &&, ||, and : if no others seen + # style must allow outdenting, + && !$closing_token_indentation{')'} + + # only leading '&&', '||', and ':' if no others seen # (but note: our count made below could be wrong # due to intervening comments) && ( $leading_amp_count == 0 - || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ ) + || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ ) - # but leading colons probably line up with with a + # but leading colons probably line up with a # previous colon or question (count could be wrong). - && $types_to_go[$ibeg_2] ne ':' + && $type_ibeg_2 ne ':' # only one step in depth allowed. this line must not # begin with a ')' itself. @@ -16526,8 +17783,8 @@ sub undo_forced_breakpoint_stack { && !$rOpts->{'indent-closing-brace'} && $tokens_to_go[$iend_2] eq '{' && ( - ( $types_to_go[$ibeg_2] =~ /^(|\&\&|\|\|)$/ ) - || ( $types_to_go[$ibeg_2] eq 'k' + ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ ) + || ( $type_ibeg_2 eq 'k' && $is_and_or{ $tokens_to_go[$ibeg_2] } ) || $is_if_unless{ $tokens_to_go[$ibeg_2] } ) @@ -16541,7 +17798,7 @@ sub undo_forced_breakpoint_stack { $previous_outdentable_closing_paren # handle '.' and '?' specially below - || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ ) + || ( $type_ibeg_2 =~ /^[\.\?]$/ ) ); } @@ -16549,33 +17806,28 @@ sub undo_forced_breakpoint_stack { # honor breaks at opening brace # Added to prevent recombining something like this: # } || eval { package main; - elsif ( $types_to_go[$iend_1] eq '{' ) { + elsif ( $type_iend_1 eq '{' ) { next if $forced_breakpoint_to_go[$iend_1]; } # do not recombine lines with ending &&, ||, - elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) { - next unless $want_break_before{ $types_to_go[$iend_1] }; - } - - # keep a terminal colon - elsif ( $types_to_go[$iend_1] eq ':' ) { - next unless $want_break_before{ $types_to_go[$iend_1] }; + elsif ( $is_amp_amp{$type_iend_1} ) { + next unless $want_break_before{$type_iend_1}; } # Identify and recombine a broken ?/: chain - elsif ( $types_to_go[$iend_1] eq '?' ) { + elsif ( $type_iend_1 eq '?' ) { # Do not recombine different levels next if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); # do not recombine unless next line ends in : - next unless $types_to_go[$iend_2] eq ':'; + next unless $type_iend_2 eq ':'; } # for lines ending in a comma... - elsif ( $types_to_go[$iend_1] eq ',' ) { + elsif ( $type_iend_1 eq ',' ) { # Do not recombine at comma which is following the # input bias. @@ -16584,8 +17836,8 @@ sub undo_forced_breakpoint_stack { # an isolated '},' may join with an identifier + ';' # this is useful for the class of a 'bless' statement (bless.t) - if ( $types_to_go[$ibeg_1] eq '}' - && $types_to_go[$ibeg_2] eq 'i' ) + if ( $type_ibeg_1 eq '}' + && $type_ibeg_2 eq 'i' ) { next unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) @@ -16622,23 +17874,23 @@ sub undo_forced_breakpoint_stack { } # opening paren.. - elsif ( $types_to_go[$iend_1] eq '(' ) { + elsif ( $type_iend_1 eq '(' ) { # No longer doing this } - elsif ( $types_to_go[$iend_1] eq ')' ) { + elsif ( $type_iend_1 eq ')' ) { # No longer doing this } # keep a terminal for-semicolon - elsif ( $types_to_go[$iend_1] eq 'f' ) { + elsif ( $type_iend_1 eq 'f' ) { next; } # if '=' at end of line ... - elsif ( $is_assignment{ $types_to_go[$iend_1] } ) { + elsif ( $is_assignment{$type_iend_1} ) { # keep break after = if it was in input stream # this helps prevent 'blinkers' @@ -16648,12 +17900,12 @@ sub undo_forced_breakpoint_stack { && $iend_1 != $ibeg_1; my $is_short_quote = - ( $types_to_go[$ibeg_2] eq 'Q' + ( $type_ibeg_2 eq 'Q' && $ibeg_2 == $iend_2 - && length( $tokens_to_go[$ibeg_2] ) < + && token_sequence_length( $ibeg_2, $ibeg_2 ) < $rOpts_short_concatenation_item_length ); my $is_ternary = - ( $types_to_go[$ibeg_1] eq '?' + ( $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); # always join an isolated '=', a short quote, or if this @@ -16674,28 +17926,33 @@ sub undo_forced_breakpoint_stack { && $types_to_go[$ibeg_nmax] eq ';' ) # or the next line ends with a here doc - || $types_to_go[$iend_2] eq 'h' + || $type_iend_2 eq 'h' # or the next line ends in an open paren or brace # and the break hasn't been forced [dima.t] || ( !$forced_breakpoint_to_go[$iend_1] - && $types_to_go[$iend_2] eq '{' ) + && $type_iend_2 eq '{' ) ) # do not recombine if the two lines might align well # this is a very approximate test for this && ( $ibeg_3 >= 0 - && $types_to_go[$ibeg_2] ne - $types_to_go[$ibeg_3] ) + && $type_ibeg_2 ne $types_to_go[$ibeg_3] ) ); - # -lp users often prefer this: - # my $title = function($env, $env, $sysarea, - # "bubba Borrower Entry"); - # so we will recombine if -lp is used we have ending - # comma - if ( !$rOpts_line_up_parentheses - || $types_to_go[$iend_2] ne ',' ) + if ( + + # Recombine if we can make two lines + $nmax >= $n + 2 + + # -lp users often prefer this: + # my $title = function($env, $env, $sysarea, + # "bubba Borrower Entry"); + # so we will recombine if -lp is used we have + # ending comma + && ( !$rOpts_line_up_parentheses + || $type_iend_2 ne ',' ) + ) { # otherwise, scan the rhs line up to last token for @@ -16742,7 +17999,7 @@ sub undo_forced_breakpoint_stack { } # for keywords.. - elsif ( $types_to_go[$iend_1] eq 'k' ) { + elsif ( $type_iend_1 eq 'k' ) { # make major control keywords stand out # (recombine.t) @@ -16762,67 +18019,27 @@ sub undo_forced_breakpoint_stack { } } - # handle trailing + - * / - elsif ( $is_math_op{ $types_to_go[$iend_1] } ) { - - # combine lines if next line has single number - # or a short term followed by same operator - my $i_next_nonblank = $ibeg_2; - my $i_next_next = $i_next_nonblank + 1; - $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); - my $number_follows = $types_to_go[$i_next_nonblank] eq 'n' - && ( - $i_next_nonblank == $iend_2 - || ( $i_next_next == $iend_2 - && $is_math_op{ $types_to_go[$i_next_next] } ) - || $types_to_go[$i_next_next] eq ';' - ); - - # find token before last operator of previous line - my $iend_1_minus = $iend_1; - $iend_1_minus-- - if ( $iend_1_minus > $ibeg_1 ); - $iend_1_minus-- - if ( $types_to_go[$iend_1_minus] eq 'b' - && $iend_1_minus > $ibeg_1 ); - - my $short_term_follows = - ( $types_to_go[$iend_2] eq $types_to_go[$iend_1] - && $types_to_go[$iend_1_minus] =~ /^[in]$/ - && $iend_2 <= $ibeg_2 + 2 - && length( $tokens_to_go[$ibeg_2] ) < - $rOpts_short_concatenation_item_length ); - - next - unless ( $number_follows || $short_term_follows ); - } - #---------------------------------------------------------- - # Section 2: Now examine token at $ibeg_2 (left end of second - # line of pair) + # Recombine Section 3: + # Examine token at $ibeg_2 (left end of second line of pair) #---------------------------------------------------------- # join lines identified above as capable of # causing an outdented line with leading closing paren + # Note that we are skipping the rest of this section if ($previous_outdentable_closing_paren) { $forced_breakpoint_to_go[$iend_1] = 0; } - # do not recombine lines with leading : - elsif ( $types_to_go[$ibeg_2] eq ':' ) { - $leading_amp_count++; - next if $want_break_before{ $types_to_go[$ibeg_2] }; - } - # handle lines with leading &&, || - elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { + elsif ( $is_amp_amp{$type_ibeg_2} ) { $leading_amp_count++; # ok to recombine if it follows a ? or : # and is followed by an open paren.. my $ok = - ( $is_ternary{ $types_to_go[$ibeg_1] } + ( $is_ternary{$type_ibeg_1} && $tokens_to_go[$iend_2] eq '(' ) # or is followed by a ? or : at same depth @@ -16853,7 +18070,7 @@ sub undo_forced_breakpoint_stack { && $nesting_depth_to_go[$ibeg_3] == $nesting_depth_to_go[$ibeg_2] ); - next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] }; + next if !$ok && $want_break_before{$type_ibeg_2}; $forced_breakpoint_to_go[$iend_1] = 0; # tweak the bond strength to give this joint priority @@ -16862,7 +18079,7 @@ sub undo_forced_breakpoint_stack { } # Identify and recombine a broken ?/: chain - elsif ( $types_to_go[$ibeg_2] eq '?' ) { + elsif ( $type_ibeg_2 eq '?' ) { # Do not recombine different levels my $lev = $levels_to_go[$ibeg_2]; @@ -16873,8 +18090,7 @@ sub undo_forced_breakpoint_stack { # are that (1) no alignment of the ? will be possible # and (2) the expression is somewhat complex, so the # '?' is harder to see in the interior of the line. - my $follows_colon = - $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':'; + my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; my $precedes_colon = $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; next unless ( $follows_colon || $precedes_colon ); @@ -16898,12 +18114,8 @@ sub undo_forced_breakpoint_stack { } # do not recombine lines with leading '.' - elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) { - my $i_next_nonblank = $ibeg_2 + 1; - if ( $types_to_go[$i_next_nonblank] eq 'b' ) { - $i_next_nonblank++; - } - + elsif ( $type_ibeg_2 eq '.' ) { + my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 ); next unless ( @@ -16921,21 +18133,22 @@ sub undo_forced_breakpoint_stack { ( $n == 2 && $n == $nmax - && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] + && $type_ibeg_1 ne $type_ibeg_2 ) # ... or this would strand a short quote , like this - # . "some long qoute" + # . "some long quote" # . "\n"; + || ( $types_to_go[$i_next_nonblank] eq 'Q' && $i_next_nonblank >= $iend_2 - 1 - && length( $tokens_to_go[$i_next_nonblank] ) < + && $token_lengths_to_go[$i_next_nonblank] < $rOpts_short_concatenation_item_length ) ); } # handle leading keyword.. - elsif ( $types_to_go[$ibeg_2] eq 'k' ) { + elsif ( $type_ibeg_2 eq 'k' ) { # handle leading "or" if ( $tokens_to_go[$ibeg_2] eq 'or' ) { @@ -16945,7 +18158,7 @@ sub undo_forced_breakpoint_stack { && ( # following 'if' or 'unless' or 'or' - $types_to_go[$ibeg_1] eq 'k' + $type_ibeg_1 eq 'k' && $is_if_unless{ $tokens_to_go[$ibeg_1] } # important: only combine a very simple or @@ -16956,6 +18169,9 @@ sub undo_forced_breakpoint_stack { && ( $iend_2 - $ibeg_2 <= 7 ) ) ); +##X: RT #81854 + $forced_breakpoint_to_go[$iend_1] = 0 + unless $old_breakpoint_to_go[$iend_1]; } # handle leading 'and' @@ -16982,7 +18198,7 @@ sub undo_forced_breakpoint_stack { && ( # following 'if' or 'unless' or 'or' - $types_to_go[$ibeg_1] eq 'k' + $type_ibeg_1 eq 'k' && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } || $tokens_to_go[$ibeg_1] eq 'or' ) ) @@ -16998,7 +18214,7 @@ sub undo_forced_breakpoint_stack { $this_line_is_semicolon_terminated # previous line begins with 'and' or 'or' - && $types_to_go[$ibeg_1] eq 'k' + && $type_ibeg_1 eq 'k' && $is_and_or{ $tokens_to_go[$ibeg_1] } ); @@ -17009,9 +18225,9 @@ sub undo_forced_breakpoint_stack { # keywords look best at start of lines, # but combine things like "1 while" - unless ( $is_assignment{ $types_to_go[$iend_1] } ) { + unless ( $is_assignment{$type_iend_1} ) { next - if ( ( $types_to_go[$iend_1] ne 'k' ) + if ( ( $type_iend_1 ne 'k' ) && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); } } @@ -17020,7 +18236,7 @@ sub undo_forced_breakpoint_stack { # similar treatment of && and || as above for 'and' and 'or': # NOTE: This block of code is currently bypassed because # of a previous block but is retained for possible future use. - elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { + elsif ( $is_amp_amp{$type_ibeg_2} ) { # maybe looking at something like: # unless $TEXTONLY || $item =~ m%|p>|a|img)%i; @@ -17030,67 +18246,16 @@ sub undo_forced_breakpoint_stack { $this_line_is_semicolon_terminated # previous line begins with an 'if' or 'unless' keyword - && $types_to_go[$ibeg_1] eq 'k' + && $type_ibeg_1 eq 'k' && $is_if_unless{ $tokens_to_go[$ibeg_1] } ); } - # handle leading + - * / - elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) { - my $i_next_nonblank = $ibeg_2 + 1; - if ( $types_to_go[$i_next_nonblank] eq 'b' ) { - $i_next_nonblank++; - } - - my $i_next_next = $i_next_nonblank + 1; - $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); - - my $is_number = ( - $types_to_go[$i_next_nonblank] eq 'n' - && ( $i_next_nonblank >= $iend_2 - 1 - || $types_to_go[$i_next_next] eq ';' ) - ); - - my $iend_1_nonblank = - $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1; - my $iend_2_nonblank = - $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2; - - my $is_short_term = - ( $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1] - && $types_to_go[$iend_2_nonblank] =~ /^[in]$/ - && $types_to_go[$iend_1_nonblank] =~ /^[in]$/ - && $iend_2_nonblank <= $ibeg_2 + 2 - && length( $tokens_to_go[$iend_2_nonblank] ) < - $rOpts_short_concatenation_item_length ); - - # Combine these lines if this line is a single - # number, or if it is a short term with same - # operator as the previous line. For example, in - # the following code we will combine all of the - # short terms $A, $B, $C, $D, $E, $F, together - # instead of leaving them one per line: - # my $time = - # $A * $B * $C * $D * $E * $F * - # ( 2. * $eps * $sigma * $area ) * - # ( 1. / $tcold**3 - 1. / $thot**3 ); - # This can be important in math-intensive code. - next - unless ( - $is_number - || $is_short_term - - # or if we can reduce this to two lines if we do. - || ( $n == 2 - && $n == $nmax - && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] ) - ); - } - # handle line with leading = or similar - elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) { + elsif ( $is_assignment{$type_ibeg_2} ) { next unless ( $n == 1 || $n == $nmax ); + next if $old_breakpoint_to_go[$iend_1]; next unless ( @@ -17101,7 +18266,7 @@ sub undo_forced_breakpoint_stack { || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) # or the next line ends with a here doc - || $types_to_go[$iend_2] eq 'h' + || $type_iend_2 eq 'h' # or this is a short line ending in ; || ( $n == $nmax && $this_line_is_semicolon_terminated ) @@ -17110,7 +18275,7 @@ sub undo_forced_breakpoint_stack { } #---------------------------------------------------------- - # Section 3: + # Recombine Section 4: # Combine the lines if we arrive here and it is possible #---------------------------------------------------------- @@ -17138,7 +18303,7 @@ sub undo_forced_breakpoint_stack { && !$this_line_is_semicolon_terminated && $n < $nmax && $excess + 4 > 0 - && $types_to_go[$iend_2] ne ',' ); + && $type_iend_2 ne ',' ); # do not recombine if we would skip in indentation levels if ( $n < $nmax ) { @@ -17152,7 +18317,7 @@ sub undo_forced_breakpoint_stack { && !( $n == 1 && $iend_1 - $ibeg_1 <= 2 - && $types_to_go[$ibeg_1] eq 'k' + && $type_ibeg_1 eq 'k' && $tokens_to_go[$ibeg_1] eq 'if' && $tokens_to_go[$iend_1] ne '(' ) @@ -17160,7 +18325,7 @@ sub undo_forced_breakpoint_stack { } # honor no-break's - next if ( $bs == NO_BREAK ); + next if ( $bs >= NO_BREAK - 1 ); # remember the pair with the greatest bond strength if ( !$n_best ) { @@ -17180,6 +18345,7 @@ sub undo_forced_breakpoint_stack { if ($n_best) { splice @$ri_beg, $n_best, 1; splice @$ri_end, $n_best - 1, 1; + splice @joint, $n_best, 1; # keep going if we are still making progress $more_to_do++; @@ -17407,7 +18573,7 @@ sub break_equals { return unless (@insert_list); # One final check... - # scan second and thrid lines and be sure there are no assignments + # scan second and third lines and be sure there are no assignments # we want to avoid breaking at an = to make something like this: # unless ( $icon = # $html_icons{"$type-$state"} @@ -17572,7 +18738,7 @@ sub set_continuation_breaks { # see if any ?/:'s are in order my $colons_in_order = 1; my $last_tok = ""; - my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ]; + my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ]; my $colon_count = @colon_list; foreach (@colon_list) { if ( $_ eq $last_tok ) { $colons_in_order = 0; last } @@ -17588,7 +18754,7 @@ sub set_continuation_breaks { #------------------------------------------------------- while ( $i_begin <= $imax ) { my $lowest_strength = NO_BREAK; - my $starting_sum = $lengths_to_go[$i_begin]; + my $starting_sum = $summed_lengths_to_go[$i_begin]; my $i_lowest = -1; my $i_test = -1; my $lowest_next_token = ''; @@ -17599,16 +18765,16 @@ sub set_continuation_breaks { # BEGINNING of inner loop to find the best next breakpoint #------------------------------------------------------- for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { - my $type = $types_to_go[$i_test]; - my $token = $tokens_to_go[$i_test]; - my $next_type = $types_to_go[ $i_test + 1 ]; - my $next_token = $tokens_to_go[ $i_test + 1 ]; - my $i_next_nonblank = - ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 ); + my $type = $types_to_go[$i_test]; + my $token = $tokens_to_go[$i_test]; + my $next_type = $types_to_go[ $i_test + 1 ]; + my $next_token = $tokens_to_go[ $i_test + 1 ]; + my $i_next_nonblank = $inext_to_go[$i_test]; my $next_nonblank_type = $types_to_go[$i_next_nonblank]; my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; my $strength = $bond_strength_to_go[$i_test]; + my $maximum_line_length = maximum_line_length($i_begin); # use old breaks as a tie-breaker. For example to # prevent blinkers with -pbp in this code: @@ -17626,28 +18792,60 @@ sub set_continuation_breaks { ## * ( ( 1 - $x )**( $b - 1 ) ); # reduce strength a bit to break ties at an old breakpoint ... - $strength -= $tiny_bias - if $old_breakpoint_to_go[$i_test] - - # which is a 'good' breakpoint, meaning ... - # we don't want to break before it - && !$want_break_before{$type} - - # and either we want to break before the next token - # or the next token is not short (i.e. not a '*', '/' etc.) - && $i_next_nonblank <= $imax - && ( - $want_break_before{$next_nonblank_type} - || ( $lengths_to_go[ $i_next_nonblank + 1 ] - - $lengths_to_go[$i_next_nonblank] > 2 ) - || $next_nonblank_type =~ /^[\(\[\{L]$/ - ); + if ( + $old_breakpoint_to_go[$i_test] + + # which is a 'good' breakpoint, meaning ... + # we don't want to break before it + && !$want_break_before{$type} + + # and either we want to break before the next token + # or the next token is not short (i.e. not a '*', '/' etc.) + && $i_next_nonblank <= $imax + && ( $want_break_before{$next_nonblank_type} + || $token_lengths_to_go[$i_next_nonblank] > 2 + || $next_nonblank_type =~ /^[\,\(\[\{L]$/ ) + ) + { + $strength -= $tiny_bias; + } + + # otherwise increase strength a bit if this token would be at the + # maximum line length. This is necessary to avoid blinking + # in the above example when the -iob flag is added. + else { + my $len = + $leading_spaces + + $summed_lengths_to_go[ $i_test + 1 ] - + $starting_sum; + if ( $len >= $maximum_line_length ) { + $strength += $tiny_bias; + } + } my $must_break = 0; - # FIXME: Might want to be able to break after these - # force an immediate break at certain operators - # with lower level than the start of the line + # Force an immediate break at certain operators + # with lower level than the start of the line, + # unless we've already seen a better break. + # + ############################################## + # Note on an issue with a preceding ? + ############################################## + # We don't include a ? in the above list, but there may + # be a break at a previous ? if the line is long. + # Because of this we do not want to force a break if + # there is a previous ? on this line. For now the best way + # to do this is to not break if we have seen a lower strength + # point, which is probably a ?. + # + # Example of unwanted breaks we are avoiding at a '.' following a ? + # from pod2html using perltidy -gnu: + # ) + # ? "\n<A NAME=\"" + # . $value + # . "\">\n$text</A>\n" + # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; if ( ( $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ @@ -17656,6 +18854,7 @@ sub set_continuation_breaks { ) && ( $nesting_depth_to_go[$i_begin] > $nesting_depth_to_go[$i_next_nonblank] ) + && ( $strength <= $lowest_strength ) ) { set_forced_breakpoint($i_next_nonblank); @@ -17669,12 +18868,34 @@ sub set_continuation_breaks { # break between ) { in a continued line so that the '{' can # be outdented # See similar logic in scan_list which catches instances - # where a line is just something like ') {' - || ( $line_count - && ( $token eq ')' ) + # where a line is just something like ') {'. We have to + # be careful because the corresponding block keyword might + # not be on the first line, such as 'for' here: + # + # eval { + # for ("a") { + # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } + # } + # }; + # + || ( + $line_count + && ( $token eq ')' ) && ( $next_nonblank_type eq '{' ) && ($next_nonblank_block_type) - && !$rOpts->{'opening-brace-always-on-right'} ) + && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) + + # RT #104427: Dont break before opening sub brace because + # sub block breaks handled at higher level, unless + # it looks like the preceeding list is long and broken + && !( + $next_nonblank_block_type =~ /^sub\b/ + && ( $nesting_depth_to_go[$i_begin] == + $nesting_depth_to_go[$i_next_nonblank] ) + ) + + && !$rOpts->{'opening-brace-always-on-right'} + ) # There is an implied forced break at a terminal opening brace || ( ( $type eq '{' ) && ( $i_test == $imax ) ) @@ -17684,7 +18905,7 @@ sub set_continuation_breaks { # Forced breakpoints must sometimes be overridden, for example # because of a side comment causing a NO_BREAK. It is easier # to catch this here than when they are set. - if ( $strength < NO_BREAK ) { + if ( $strength < NO_BREAK - 1 ) { $strength = $lowest_strength - $tiny_bias; $must_break = 1; } @@ -17698,9 +18919,9 @@ sub set_continuation_breaks { && ( ( $leading_spaces + - $lengths_to_go[ $i_next_nonblank + 1 ] - + $summed_lengths_to_go[ $i_next_nonblank + 1 ] - $starting_sum - ) > $rOpts_maximum_line_length + ) > $maximum_line_length ) ) { @@ -17720,17 +18941,13 @@ sub set_continuation_breaks { && ( ( $leading_spaces + - $lengths_to_go[ $i_test + 1 ] - + $summed_lengths_to_go[ $i_test + 1 ] - $starting_sum - ) < $rOpts_maximum_line_length + ) < $maximum_line_length ) ) { - $i_test++; - - if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) { - $i_test++; - } + $i_test = min( $imax, $inext_to_go[$i_test] ); redo; } @@ -17747,21 +18964,50 @@ sub set_continuation_breaks { # break It is only called if a breakpoint is required or # desired. This will probably need some adjustments # over time. A goal is to try to be sure that, if a new - # side comment is introduced into formated text, then + # side comment is introduced into formatted text, then # the same breakpoints will occur. scbreak.t last if ( - $i_test == $imax # we are at the end - && !$forced_breakpoint_count # - && $saw_good_break # old line had good break - && $type =~ /^[#;\{]$/ # and this line ends in - # ';' or side comment - && $i_last_break < 0 # and we haven't made a break - && $i_lowest > 0 # and we saw a possible break - && $i_lowest < $imax - 1 # (but not just before this ;) + $i_test == $imax # we are at the end + && !$forced_breakpoint_count # + && $saw_good_break # old line had good break + && $type =~ /^[#;\{]$/ # and this line ends in + # ';' or side comment + && $i_last_break < 0 # and we haven't made a break + && $i_lowest >= 0 # and we saw a possible break + && $i_lowest < $imax - 1 # (but not just before this ;) && $strength - $lowest_strength < 0.5 * WEAK # and it's good ); + # Do not skip past an important break point in a short final + # segment. For example, without this check we would miss the + # break at the final / in the following code: + # + # $depth_stop = + # ( $tau * $mass_pellet * $q_0 * + # ( 1. - exp( -$t_stop / $tau ) ) - + # 4. * $pi * $factor * $k_ice * + # ( $t_melt - $t_ice ) * + # $r_pellet * + # $t_stop ) / + # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); + # + if ( $line_count > 2 + && $i_lowest < $i_test + && $i_test > $imax - 2 + && $nesting_depth_to_go[$i_begin] > + $nesting_depth_to_go[$i_lowest] + && $lowest_strength < $last_break_strength - .5 * WEAK ) + { + # Make this break for math operators for now + my $ir = $inext_to_go[$i_lowest]; + my $il = $iprev_to_go[$ir]; + last + if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ + || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ); + } + + # Update the minimum bond strength location $lowest_strength = $strength; $i_lowest = $i_test; $lowest_next_token = $next_nonblank_token; @@ -17776,10 +19022,9 @@ sub set_continuation_breaks { && ( $lowest_strength - $last_break_strength <= $max_bias ) ) { - my $i_last_end = $i_begin - 1; - if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 } - my $tok_beg = $tokens_to_go[$i_begin]; - my $type_beg = $types_to_go[$i_begin]; + my $i_last_end = $iprev_to_go[$i_begin]; + my $tok_beg = $tokens_to_go[$i_begin]; + my $type_beg = $types_to_go[$i_begin]; if ( # check for leading alignment of certain tokens @@ -17805,28 +19050,58 @@ sub set_continuation_breaks { } } - my $too_long = - ( $i_test >= $imax ) - ? 1 - : ( - ( - $leading_spaces + - $lengths_to_go[ $i_test + 2 ] - - $starting_sum - ) > $rOpts_maximum_line_length - ); + my $too_long = ( $i_test >= $imax ); + if ( !$too_long ) { + my $next_length = + $leading_spaces + + $summed_lengths_to_go[ $i_test + 2 ] - + $starting_sum; + $too_long = $next_length > $maximum_line_length; + + # To prevent blinkers we will avoid leaving a token exactly at + # the line length limit unless it is the last token or one of + # several "good" types. + # + # The following code was a blinker with -pbp before this + # modification: +## $last_nonblank_token eq '(' +## && $is_indirect_object_taker{ $paren_type +## [$paren_depth] } + # The issue causing the problem is that if the + # term [$paren_depth] gets broken across a line then + # the whitespace routine doesn't see both opening and closing + # brackets and will format like '[ $paren_depth ]'. This + # leads to an oscillation in length depending if we break + # before the closing bracket or not. + if ( !$too_long + && $i_test + 1 < $imax + && $next_nonblank_type !~ /^[,\}\]\)R]$/ ) + { + $too_long = $next_length >= $maximum_line_length; + } + } FORMATTER_DEBUG_FLAG_BREAK - && print -"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n"; + && do { + my $ltok = $token; + my $rtok = $next_nonblank_token ? $next_nonblank_token : ""; + my $i_testp2 = $i_test + 2; + if ( $i_testp2 > $max_index_to_go + 1 ) { + $i_testp2 = $max_index_to_go + 1; + } + if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } + if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } + print STDOUT +"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n"; + }; # allow one extra terminal token after exceeding line length # if it would strand this token. if ( $rOpts_fuzzy_line_length && $too_long - && ( $i_lowest == $i_test ) - && ( length($token) > 1 ) - && ( $next_nonblank_type =~ /^[\;\,]$/ ) ) + && $i_lowest == $i_test + && $token_lengths_to_go[$i_test] > 1 + && $next_nonblank_type =~ /^[\;\,]$/ ) { $too_long = 0; } @@ -17850,11 +19125,7 @@ sub set_continuation_breaks { if ( $i_lowest < 0 ) { $i_lowest = $imax } # semi-final index calculation - my $i_next_nonblank = ( - ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) - ? $i_lowest + 2 - : $i_lowest + 1 - ); + my $i_next_nonblank = $inext_to_go[$i_lowest]; my $next_nonblank_type = $types_to_go[$i_next_nonblank]; my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; @@ -17893,16 +19164,13 @@ sub set_continuation_breaks { #------------------------------------------------------- # final index calculation - $i_next_nonblank = ( - ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) - ? $i_lowest + 2 - : $i_lowest + 1 - ); + $i_next_nonblank = $inext_to_go[$i_lowest]; $next_nonblank_type = $types_to_go[$i_next_nonblank]; $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; FORMATTER_DEBUG_FLAG_BREAK - && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; + && print STDOUT + "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; #------------------------------------------------------- # ?/: rule 2 : if we break at a '?', then break at its ':' @@ -17991,12 +19259,7 @@ sub set_continuation_breaks { my $i_question = $mate_index_to_go[$_]; if ( $i_question >= 0 ) { if ( $want_break_before{'?'} ) { - $i_question--; - if ( $i_question > 0 - && $types_to_go[$i_question] eq 'b' ) - { - $i_question--; - } + $i_question = $iprev_to_go[$i_question]; } if ( $i_question >= 0 ) { @@ -18042,9 +19305,7 @@ sub insert_additional_breaks { # Do not leave a blank at the end of a line; back up if necessary if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- } - my $i_break_right = $i_break_left + 1; - if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ } - + my $i_break_right = $inext_to_go[$i_break_left]; if ( $i_break_left >= $i_f && $i_break_left < $i_l && $i_break_right > $i_f @@ -18086,13 +19347,13 @@ sub set_closing_breakpoint { } } -# check to see if output line tabbing agrees with input line -# this can be very useful for debugging a script which has an extra -# or missing brace sub compare_indentation_levels { - my ( $python_indentation_level, $structural_indentation_level ) = @_; - if ( ( $python_indentation_level ne $structural_indentation_level ) ) { + # check to see if output line tabbing agrees with input line + # this can be very useful for debugging a script which has an extra + # or missing brace + my ( $guessed_indentation_level, $structural_indentation_level ) = @_; + if ( $guessed_indentation_level ne $structural_indentation_level ) { $last_tabbing_disagreement = $input_line_number; if ($in_tabbing_disagreement) { @@ -18102,7 +19363,7 @@ sub compare_indentation_levels { if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { write_logfile_entry( -"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n" +"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n" ); } $in_tabbing_disagreement = $input_line_number; @@ -18646,12 +19907,12 @@ package Perl::Tidy::VerticalAligner; # attempts to line up certain common tokens, such as => and #, which are # identified by the calling routine. # -# There are two main routines: append_line and flush. Append acts as a +# There are two main routines: valign_input and flush. Append acts as a # storage buffer, collecting lines into a group which can be vertically # aligned. When alignment is no longer possible or desirable, it dumps # the group to flush. # -# append_line -----> flush +# valign_input -----> flush # # collects writes # vertical one @@ -18665,13 +19926,16 @@ BEGIN { use constant VALIGN_DEBUG_FLAG_APPEND => 0; use constant VALIGN_DEBUG_FLAG_APPEND0 => 0; use constant VALIGN_DEBUG_FLAG_TERNARY => 0; + use constant VALIGN_DEBUG_FLAG_TABS => 0; my $debug_warning = sub { - print "VALIGN_DEBUGGING with key $_[0]\n"; + print STDOUT "VALIGN_DEBUGGING with key $_[0]\n"; }; VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); + VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY'); + VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS'); } @@ -18689,7 +19953,7 @@ use vars qw( $group_type $group_maximum_gap $marginal_match - $last_group_level_written + $last_level_written $last_leading_space_count $extra_indent_ok $zero_count @@ -18707,6 +19971,7 @@ use vars qw( @side_comment_history $comment_leading_space_count $is_matching_terminal_line + $consecutive_block_comments $cached_line_text $cached_line_type @@ -18716,12 +19981,16 @@ use vars qw( $cached_line_leading_space_count $cached_seqno_string + $valign_buffer_filling + @valign_buffer + $seqno_string $last_nonblank_seqno_string $rOpts $rOpts_maximum_line_length + $rOpts_variable_maximum_line_length $rOpts_continuation_indentation $rOpts_indent_columns $rOpts_tabs @@ -18743,7 +20012,7 @@ sub initialize { # variables describing the entire space group: $ralignment_list = []; $group_level = 0; - $last_group_level_written = -1; + $last_level_written = -1; $extra_indent_ok = 0; # can we move all lines to the right? $last_side_comment_length = 0; $maximum_jmax_seen = 0; @@ -18766,7 +20035,7 @@ sub initialize { $side_comment_history[1] = [ -200, 0 ]; $side_comment_history[2] = [ -100, 0 ]; - # write_leader_and_string cache: + # valign_output_step_B cache: $cached_line_text = ""; $cached_line_type = 0; $cached_line_flag = 0; @@ -18787,8 +20056,11 @@ sub initialize { $rOpts->{'fixed-position-side-comment'}; $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; - $rOpts_valign = $rOpts->{'valign'}; + $rOpts_variable_maximum_line_length = + $rOpts->{'variable-maximum-line-length'}; + $rOpts_valign = $rOpts->{'valign'}; + $consecutive_block_comments = 0; forget_side_comment(); initialize_for_new_group(); @@ -18878,7 +20150,7 @@ sub make_alignment { } sub dump_alignments { - print + print STDOUT "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; for my $i ( 0 .. $maximum_alignment_index ) { my $column = $ralignment_list->[$i]->get_column(); @@ -18886,7 +20158,7 @@ sub dump_alignments { my $matching_token = $ralignment_list->[$i]->get_matching_token(); my $starting_line = $ralignment_list->[$i]->get_starting_line(); my $ending_line = $ralignment_list->[$i]->get_ending_line(); - print + print STDOUT "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; } } @@ -18907,9 +20179,21 @@ sub forget_side_comment { $last_comment_column = 0; } -sub append_line { +sub maximum_line_length_for_level { + + # return maximum line length for line starting with a given level + my $maximum_line_length = $rOpts_maximum_line_length; + if ($rOpts_variable_maximum_line_length) { + my $level = shift; + if ( $level < 0 ) { $level = 0 } + $maximum_line_length += $level * $rOpts_indent_columns; + } + return $maximum_line_length; +} + +sub valign_input { - # sub append is called to place one line in the current vertical group. + # Place one line in the current vertical group. # # The input parameters are: # $level = indentation level of this line @@ -18946,7 +20230,7 @@ sub append_line { # first one is always at zero. The interior columns are at the start of # the matching tokens, and the last one tracks the maximum line length. # - # Basically, each time a new line comes in, it joins the current vertical + # Each time a new line comes in, it joins the current vertical # group if possible. Otherwise it causes the current group to be dumped # and a new group is started. # @@ -18986,8 +20270,18 @@ sub append_line { ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); $is_outdented = 0 if $is_hanging_side_comment; + # Forget side comment alignment after seeing 2 or more block comments + my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); + if ($is_block_comment) { + $consecutive_block_comments++; + } + else { + if ( $consecutive_block_comments > 1 ) { forget_side_comment() } + $consecutive_block_comments = 0; + } + VALIGN_DEBUG_FLAG_APPEND0 && do { - print + print STDOUT "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n"; }; @@ -19030,7 +20324,7 @@ sub append_line { # we are allowed to shift a group of lines to the right if its # level is greater than the previous and next group $extra_indent_ok = - ( $level < $group_level && $last_group_level_written < $group_level ); + ( $level < $group_level && $last_level_written < $group_level ); my_flush(); @@ -19053,7 +20347,6 @@ sub append_line { # Patch to collect outdentable block COMMENTS # -------------------------------------------------------------------- my $is_blank_line = ""; - my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); if ( $group_type eq 'COMMENT' ) { if ( ( @@ -19136,8 +20429,8 @@ sub append_line { # and no space recovery is needed. if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) ) { - write_leader_and_string( $leading_space_count, $$rfields[0], 0, - $outdent_long_lines, $rvertical_tightness_flags ); + valign_output_step_B( $leading_space_count, $$rfields[0], 0, + $outdent_long_lines, $rvertical_tightness_flags, $level ); return; } } @@ -19168,7 +20461,7 @@ sub append_line { outdent_long_lines => $outdent_long_lines, list_type => "", is_hanging_side_comment => $is_hanging_side_comment, - maximum_line_length => $rOpts->{'maximum-line-length'}, + maximum_line_length => maximum_line_length_for_level($level), rvertical_tightness_flags => $rvertical_tightness_flags, ); @@ -19249,7 +20542,7 @@ sub append_line { # -------------------------------------------------------------------- # Append this line to the current group (or start new group) # -------------------------------------------------------------------- - accept_line($new_line); + add_to_group($new_line); # Future update to allow this to vary: $current_line = $new_line if ( $maximum_line_index == 0 ); @@ -19278,11 +20571,11 @@ sub append_line { # Step 8. Some old debugging stuff # -------------------------------------------------------------------- VALIGN_DEBUG_FLAG_APPEND && do { - print "APPEND fields:"; + print STDOUT "APPEND fields:"; dump_array(@$rfields); - print "APPEND tokens:"; + print STDOUT "APPEND tokens:"; dump_array(@$rtokens); - print "APPEND patterns:"; + print STDOUT "APPEND patterns:"; dump_array(@$rpatterns); dump_alignments(); }; @@ -19346,13 +20639,13 @@ sub eliminate_old_fields { my $case = 1; # See if case 2: both lines have leading '=' - # We'll require smiliar leading patterns in this case + # We'll require similar leading patterns in this case my $old_rtokens = $old_line->get_rtokens(); my $rtokens = $new_line->get_rtokens(); my $rpatterns = $new_line->get_rpatterns(); my $old_rpatterns = $old_line->get_rpatterns(); if ( $rtokens->[0] =~ /^=\d*$/ - && $old_rtokens->[0] eq $rtokens->[0] + && $old_rtokens->[0] eq $rtokens->[0] && $old_rpatterns->[0] eq $rpatterns->[0] ) { $case = 2; @@ -19622,12 +20915,12 @@ sub fix_terminal_ternary { VALIGN_DEBUG_FLAG_TERNARY && do { local $" = '><'; - print "CURRENT FIELDS=<@{$rfields_old}>\n"; - print "CURRENT TOKENS=<@{$rtokens_old}>\n"; - print "CURRENT PATTERNS=<@{$rpatterns_old}>\n"; - print "UNMODIFIED FIELDS=<@{$rfields}>\n"; - print "UNMODIFIED TOKENS=<@{$rtokens}>\n"; - print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n"; + print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n"; + print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n"; + print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n"; + print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n"; + print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n"; + print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n"; }; # handle cases of leading colon on this line @@ -19702,9 +20995,9 @@ sub fix_terminal_ternary { VALIGN_DEBUG_FLAG_TERNARY && do { local $" = '><'; - print "MODIFIED TOKENS=<@tokens>\n"; - print "MODIFIED PATTERNS=<@patterns>\n"; - print "MODIFIED FIELDS=<@fields>\n"; + print STDOUT "MODIFIED TOKENS=<@tokens>\n"; + print STDOUT "MODIFIED PATTERNS=<@patterns>\n"; + print STDOUT "MODIFIED FIELDS=<@fields>\n"; }; # all ok .. update the arrays @@ -19737,7 +21030,7 @@ sub fix_terminal_else { # TBD: add handling for 'case' return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ ); - # look for the opening brace after the else, and extrace the depth + # look for the opening brace after the else, and extract the depth my $tok_brace = $rtokens->[0]; my $depth_brace; if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; } @@ -19963,7 +21256,7 @@ sub fix_terminal_else { # when the pattern's don't match, because it can be # worse to create an alignment where none is needed # than to omit one. Here's an example where the ','s - # are not in named continers. The first line below + # are not in named containers. The first line below # should not match the next two: # ( $a, $b ) = ( $b, $r ); # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); @@ -19999,7 +21292,7 @@ sub fix_terminal_else { # well enough. if ( substr( $$old_rpatterns[$j], 0, 1 ) ne - substr( $$rpatterns[$j], 0, 1 ) ) + substr( $$rpatterns[$j], 0, 1 ) ) { goto NO_MATCH; } @@ -20158,7 +21451,7 @@ sub check_fit { } } -sub accept_line { +sub add_to_group { # The current line either starts a new alignment group or is # accepted into the current alignment group. @@ -20195,7 +21488,7 @@ sub accept_line { $new_line->set_alignments(@new_alignments); } - # remember group jmax extremes for next call to append_line + # remember group jmax extremes for next call to valign_input $previous_minimum_jmax_seen = $minimum_jmax_seen; $previous_maximum_jmax_seen = $maximum_jmax_seen; } @@ -20204,21 +21497,24 @@ sub dump_array { # debug routine to dump array contents local $" = ')('; - print "(@_)\n"; + print STDOUT "(@_)\n"; } # flush() sends the current Perl::Tidy::VerticalAligner group down the # pipeline to Perl::Tidy::FileWriter. -# This is the external flush, which also empties the cache +# This is the external flush, which also empties the buffer and cache sub flush { + # the buffer must be emptied first, then any cached text + dump_valign_buffer(); + if ( $maximum_line_index < 0 ) { if ($cached_line_type) { $seqno_string = $cached_seqno_string; - entab_and_output( $cached_line_text, + valign_output_step_C( $cached_line_text, $cached_line_leading_space_count, - $last_group_level_written ); + $last_level_written ); $cached_line_type = 0; $cached_line_text = ""; $cached_seqno_string = ""; @@ -20229,6 +21525,52 @@ sub flush { } } +sub reduce_valign_buffer_indentation { + + my ($diff) = @_; + if ( $valign_buffer_filling && $diff ) { + my $max_valign_buffer = @valign_buffer; + for ( my $i = 0 ; $i < $max_valign_buffer ; $i++ ) { + my ( $line, $leading_space_count, $level ) = + @{ $valign_buffer[$i] }; + my $ws = substr( $line, 0, $diff ); + if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { + $line = substr( $line, $diff ); + } + if ( $leading_space_count >= $diff ) { + $leading_space_count -= $diff; + $level = level_change( $leading_space_count, $diff, $level ); + } + $valign_buffer[$i] = [ $line, $leading_space_count, $level ]; + } + } +} + +sub level_change { + + # compute decrease in level when we remove $diff spaces from the + # leading spaces + my ( $leading_space_count, $diff, $level ) = @_; + if ($rOpts_indent_columns) { + my $olev = + int( ( $leading_space_count + $diff ) / $rOpts_indent_columns ); + my $nlev = int( $leading_space_count / $rOpts_indent_columns ); + $level -= ( $olev - $nlev ); + if ( $level < 0 ) { $level = 0 } + } + return $level; +} + +sub dump_valign_buffer { + if (@valign_buffer) { + foreach (@valign_buffer) { + valign_output_step_D( @{$_} ); + } + @valign_buffer = (); + } + $valign_buffer_filling = ""; +} + # This is the internal flush, which leaves the cache intact sub my_flush { @@ -20239,7 +21581,7 @@ sub my_flush { VALIGN_DEBUG_FLAG_APPEND0 && do { my ( $a, $b, $c ) = caller(); - print + print STDOUT "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n"; }; @@ -20251,7 +21593,9 @@ sub my_flush { for my $i ( 0 .. $maximum_line_index ) { my $str = $group_lines[$i]; my $excess = - length($str) + $leading_space_count - $rOpts_maximum_line_length; + length($str) + + $leading_space_count - + maximum_line_length_for_level($group_level); if ( $excess > $max_excess ) { $max_excess = $excess; } @@ -20271,8 +21615,8 @@ sub my_flush { # write the group of lines my $outdent_long_lines = 0; for my $i ( 0 .. $maximum_line_index ) { - write_leader_and_string( $leading_space_count, $group_lines[$i], 0, - $outdent_long_lines, "" ); + valign_output_step_B( $leading_space_count, $group_lines[$i], 0, + $outdent_long_lines, "", $group_level ); } } @@ -20283,7 +21627,7 @@ sub my_flush { my $group_list_type = $group_lines[0]->get_list_type(); my ( $a, $b, $c ) = caller(); my $maximum_field_index = $group_lines[0]->get_jmax(); - print + print STDOUT "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n"; }; @@ -20307,7 +21651,7 @@ sub my_flush { # loop to output all lines for my $i ( 0 .. $maximum_line_index ) { my $line = $group_lines[$i]; - write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align, + valign_output_step_A( $line, $min_ci_gap, $do_not_align, $group_leader_length, $extra_leading_spaces ); } } @@ -20415,7 +21759,7 @@ sub adjust_side_comment { if ( $move >= 0 && $last_side_comment_length > 0 && ( $first_side_comment_line == 0 ) - && $group_level == $last_group_level_written ) + && $group_level == $last_level_written ) { $min_move = 0; } @@ -20424,7 +21768,7 @@ sub adjust_side_comment { $move = $min_move; } - # prevously, an upper bound was placed on $move here, + # previously, an upper bound was placed on $move here, # (maximum_space_to_comment), but it was not helpful # don't exceed the available space @@ -20483,7 +21827,7 @@ sub improve_continuation_indentation { # 'tan' => \&tan, # 'atan2' => \&atan2, - ## BUB: Deactivated#################### + ## Deactivated#################### # The trouble with this patch is that it may, for example, # move in some 'or's or ':'s, and leave some out, so that the # left edge alignment suffers. @@ -20492,7 +21836,7 @@ sub improve_continuation_indentation { my $maximum_field_index = $group_lines[0]->get_jmax(); - my $min_ci_gap = $rOpts_maximum_line_length; + my $min_ci_gap = maximum_line_length_for_level($group_level); if ( $maximum_field_index > 1 && !$do_not_align ) { for my $i ( 0 .. $maximum_line_index ) { @@ -20510,7 +21854,7 @@ sub improve_continuation_indentation { } } - if ( $min_ci_gap >= $rOpts_maximum_line_length ) { + if ( $min_ci_gap >= maximum_line_length_for_level($group_level) ) { $min_ci_gap = 0; } } @@ -20520,7 +21864,13 @@ sub improve_continuation_indentation { return $min_ci_gap; } -sub write_vertically_aligned_line { +sub valign_output_step_A { + + ############################################################### + # This is Step A in writing vertically aligned lines. + # The line is prepared according to the alignments which have + # been found and shipped to the next step. + ############################################################### my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, $extra_leading_spaces ) @@ -20599,9 +21949,9 @@ sub write_vertically_aligned_line { my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) ); # ship this line off - write_leader_and_string( $leading_space_count + $extra_leading_spaces, + valign_output_step_B( $leading_space_count + $extra_leading_spaces, $str, $side_comment_length, $outdent_long_lines, - $rvertical_tightness_flags ); + $rvertical_tightness_flags, $group_level ); } sub get_extra_leading_spaces { @@ -20613,7 +21963,7 @@ sub get_extra_leading_spaces { # list before it sees everything. When this happens, it sets # the indentation to the standard scheme, but notes how # many spaces it would have liked to use. We may be able - # to recover that space here in the event that that all of the + # to recover that space here in the event that all of the # lines of a list are back together again. #---------------------------------------------------------- @@ -20698,10 +22048,17 @@ sub get_output_line_number { 1 + $maximum_line_index + $file_writer_object->get_output_line_number(); } -sub write_leader_and_string { +sub valign_output_step_B { + + ############################################################### + # This is Step B in writing vertically aligned lines. + # Vertical tightness is applied according to preset flags. + # In particular this routine handles stacking of opening + # and closing tokens. + ############################################################### my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, - $rvertical_tightness_flags ) + $rvertical_tightness_flags, $level ) = @_; # handle outdenting of long lines: @@ -20710,7 +22067,7 @@ sub write_leader_and_string { length($str) - $side_comment_length + $leading_space_count - - $rOpts_maximum_line_length; + maximum_line_length_for_level($level); if ( $excess > 0 ) { $leading_space_count = 0; $last_outdented_line_at = @@ -20732,7 +22089,8 @@ sub write_leader_and_string { # Unpack any recombination data; it was packed by # sub send_lines_to_vertical_aligner. Contents: # - # [0] type: 1=opening 2=closing 3=opening block brace + # [0] type: 1=opening non-block 2=closing non-block + # 3=opening block brace 4=closing block brace # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok # if closing: spaces of padding to use # [2] sequence number of container @@ -20753,13 +22111,14 @@ sub write_leader_and_string { # either append this line to it or write it out if ( length($cached_line_text) ) { + # Dump an invalid cached line if ( !$cached_line_valid ) { - entab_and_output( $cached_line_text, + valign_output_step_C( $cached_line_text, $cached_line_leading_space_count, - $last_group_level_written ); + $last_level_written ); } - # handle cached line with opening container token + # Handle cached line ending in OPENING tokens elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { my $gap = $leading_space_count - length($cached_line_text); @@ -20771,23 +22130,47 @@ sub write_leader_and_string { } } - if ( $gap >= 0 ) { + if ( $gap >= 0 && defined($seqno_beg) ) { $leading_string = $cached_line_text . ' ' x $gap; $leading_space_count = $cached_line_leading_space_count; $seqno_string = $cached_seqno_string . ':' . $seqno_beg; + $level = $last_level_written; } else { - entab_and_output( $cached_line_text, + valign_output_step_C( $cached_line_text, $cached_line_leading_space_count, - $last_group_level_written ); + $last_level_written ); } } - # handle cached line to place before this closing container token + # Handle cached line ending in CLOSING tokens else { my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; + if ( + + # The new line must start with container + $seqno_beg + + # The container combination must be okay.. + && ( + + # okay to combine like types + ( $open_or_close == $cached_line_type ) + + # closing block brace may append to non-block + || ( $cached_line_type == 2 && $open_or_close == 4 ) + + # something like ');' + || ( !$open_or_close && $cached_line_type == 2 ) + + ) - if ( length($test_line) <= $rOpts_maximum_line_length ) { + # The combined line must fit + && ( + length($test_line) <= + maximum_line_length_for_level($last_level_written) ) + ) + { $seqno_string = $cached_seqno_string . ':' . $seqno_beg; @@ -20831,9 +22214,9 @@ sub write_leader_and_string { # and eliminate multiple colons might appear to be slow, # but it's not an issue because we almost never come # through here. In a typical file we don't. - $seqno_string =~ s/^:+//; + $seqno_string =~ s/^:+//; $last_nonblank_seqno_string =~ s/^:+//; - $seqno_string =~ s/:+/:/g; + $seqno_string =~ s/:+/:/g; $last_nonblank_seqno_string =~ s/:+/:/g; # how many spaces can we outdent? @@ -20859,6 +22242,11 @@ sub write_leader_and_string { $test_line = substr( $test_line, $diff ); $cached_line_leading_space_count -= $diff; + $last_level_written = + level_change( + $cached_line_leading_space_count, + $diff, $last_level_written ); + reduce_valign_buffer_indentation($diff); } # shouldn't happen, but not critical: @@ -20872,11 +22260,12 @@ sub write_leader_and_string { $str = $test_line; $leading_string = ""; $leading_space_count = $cached_line_leading_space_count; + $level = $last_level_written; } else { - entab_and_output( $cached_line_text, + valign_output_step_C( $cached_line_text, $cached_line_leading_space_count, - $last_group_level_written ); + $last_level_written ); } } } @@ -20888,7 +22277,7 @@ sub write_leader_and_string { # write or cache this line if ( !$open_or_close || $side_comment_length > 0 ) { - entab_and_output( $line, $leading_space_count, $group_level ); + valign_output_step_C( $line, $leading_space_count, $level ); } else { $cached_line_text = $line; @@ -20900,12 +22289,82 @@ sub write_leader_and_string { $cached_seqno_string = $seqno_string; } - $last_group_level_written = $group_level; - $last_side_comment_length = $side_comment_length; - $extra_indent_ok = 0; + $last_level_written = $level; + $last_side_comment_length = $side_comment_length; + $extra_indent_ok = 0; +} + +sub valign_output_step_C { + + ############################################################### + # This is Step C in writing vertically aligned lines. + # Lines are either stored in a buffer or passed along to the next step. + # The reason for storing lines is that we may later want to reduce their + # indentation when -sot and -sct are both used. + ############################################################### + my @args = @_; + + # Dump any saved lines if we see a line with an unbalanced opening or + # closing token. + dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling ); + + # Either store or write this line + if ($valign_buffer_filling) { + push @valign_buffer, [@args]; + } + else { + valign_output_step_D(@args); + } + + # For lines starting or ending with opening or closing tokens.. + if ($seqno_string) { + $last_nonblank_seqno_string = $seqno_string; + + # Start storing lines when we see a line with multiple stacked opening + # tokens. + # patch for RT #94354, requested by Colin Williams + if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ ) + { + + # This test is efficient but a little subtle: The first test says + # that we have multiple sequence numbers and hence multiple opening + # or closing tokens in this line. The second part of the test + # rejects stacked closing and ternary tokens. So if we get here + # then we should have stacked unbalanced opening tokens. + + # Here is a complex example: + + # Foo($Bar[0], { # (side comment) + # baz => 1, + # }); + + # The first line has sequence 6::4. It does not begin with + # a closing token or ternary, so it passes the test and must be + # stacked opening tokens. + + # The last line has sequence 4:6 but is a stack of closing tokens, + # so it gets rejected. + + # Note that the sequence number of an opening token for a qw quote + # is a negative number and will be rejected. + # For example, for the following line: + # skip_symbols([qw( + # $seqno_string='10:5:-1'. It would be okay to accept it but + # I decided not to do this after testing. + + $valign_buffer_filling = $seqno_string; + + } + } } -sub entab_and_output { +sub valign_output_step_D { + + ############################################################### + # This is Step D in writing vertically aligned lines. + # Write one vertically aligned line of code to the output object. + ############################################################### + my ( $line, $leading_space_count, $level ) = @_; # The line is currently correct if there is no tabbing (recommended!) @@ -20933,10 +22392,11 @@ sub entab_and_output { else { # shouldn't happen - program error counting whitespace - # we'll skip entabbing - warning( -"Error entabbing in entab_and_output: expected count=$leading_space_count\n" - ); + # - skip entabbing + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" + ); } } @@ -20948,9 +22408,14 @@ sub entab_and_output { # shouldn't happen: if ( $space_count < 0 ) { - warning( -"Error entabbing in append_line: for level=$group_level count=$leading_space_count\n" - ); + + # But it could be an outdented comment + if ( $line !~ /^\s*#/ ) { + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n" + ); + } $leading_string = ( ' ' x $leading_space_count ); } else { @@ -20963,16 +22428,14 @@ sub entab_and_output { # shouldn't happen - program error counting whitespace # we'll skip entabbing - warning( -"Error entabbing in entab_and_output: expected count=$leading_space_count\n" - ); + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" + ); } } } $file_writer_object->write_code_line( $line . "\n" ); - if ($seqno_string) { - $last_nonblank_seqno_string = $seqno_string; - } } { # begin get_leading_string @@ -21022,9 +22485,12 @@ sub entab_and_output { # shouldn't happen: if ( $space_count < 0 ) { - warning( -"Error in append_line: for level=$group_level count=$leading_whitespace_count\n" - ); + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n" + ); + + # -- skip entabbing $leading_string = ( ' ' x $leading_whitespace_count ); } else { @@ -21297,7 +22763,7 @@ sub really_open_debug_file { my $debug_file = $self->{_debug_file}; my $fh; unless ( $fh = IO::File->new("> $debug_file") ) { - warn("can't open $debug_file: $!\n"); + Perl::Tidy::Warn("can't open $debug_file: $!\n"); } $self->{_debug_file_opened} = 1; $self->{_fh} = $fh; @@ -21474,7 +22940,7 @@ BEGIN { use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0; my $debug_warning = sub { - print "TOKENIZER_DEBUGGING with key $_[0]\n"; + print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n"; }; TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); @@ -21487,7 +22953,7 @@ BEGIN { use Carp; -# PACKAGE VARIABLES for for processing an entire FILE. +# PACKAGE VARIABLES for processing an entire FILE. use vars qw{ $tokenizer_self @@ -21541,6 +23007,7 @@ use vars qw{ %is_digraph %is_file_test_operator %is_trigraph + %is_tetragraph %is_valid_token_type %is_keyword %is_code_block_token @@ -21590,13 +23057,13 @@ sub new { logger_object => undef, starting_level => undef, indent_columns => 4, - tabs => 0, - entab_leading_space => undef, + tabsize => 8, look_for_hash_bang => 0, trim_qw => 1, look_for_autoloader => 1, look_for_selfloader => 1, starting_line_number => 1, + extended_syntax => 0, ); my %args = ( %defaults, @_ ); @@ -21622,8 +23089,6 @@ sub new { # _in_attribute_list flag telling if we are looking for attributes # _in_quote flag telling if we are chasing a quote # _starting_level indentation level of first line - # _input_tabstr string denoting one indentation level of input file - # _know_input_tabstr flag indicating if we know _input_tabstr # _line_buffer_object object with get_line() method to supply source code # _diagnostics_object place to write debugging information # _unexpected_error_count error count used to limit output @@ -21644,13 +23109,12 @@ sub new { _line_start_quote => -1, _starting_level => $args{starting_level}, _know_starting_level => defined( $args{starting_level} ), - _tabs => $args{tabs}, - _entab_leading_space => $args{entab_leading_space}, + _tabsize => $args{tabsize}, _indent_columns => $args{indent_columns}, _look_for_hash_bang => $args{look_for_hash_bang}, _trim_qw => $args{trim_qw}, - _input_tabstr => "", - _know_input_tabstr => -1, + _continuation_indentation => $args{continuation_indentation}, + _outdent_labels => $args{outdent_labels}, _last_line_number => $args{starting_line_number} - 1, _saw_perl_dash_P => 0, _saw_perl_dash_w => 0, @@ -21674,6 +23138,7 @@ sub new { _nearly_matched_here_target_at => undef, _line_text => "", _rlower_case_labels_at => undef, + _extended_syntax => $args{extended_syntax}, }; prepare_for_a_new_file(); @@ -21860,7 +23325,7 @@ sub report_tokenization_errors { write_logfile_entry("Suggest including 'use strict;'\n"); } - # it is suggested that lables have at least one upper case character + # it is suggested that labels have at least one upper case character # for legibility and to avoid code breakage as new keywords are introduced if ( $tokenizer_self->{_rlower_case_labels_at} ) { my @lower_case_labels_at = @@ -21915,7 +23380,7 @@ sub get_line { $input_line_separator = $2 . $input_line_separator; } - # for backwards compatability we keep the line text terminated with + # for backwards compatibility we keep the line text terminated with # a newline character $input_line .= "\n"; $tokenizer_self->{_line_text} = $input_line; # update @@ -21948,21 +23413,21 @@ sub get_line { # _ending_in_quote - this line ends in a multi-line quote # (so don't trim trailing blanks!) my $line_of_tokens = { - _line_type => 'EOF', - _line_text => $input_line, - _line_number => $input_line_number, - _rtoken_type => undef, - _rtokens => undef, - _rlevels => undef, - _rslevels => undef, - _rblock_type => undef, - _rcontainer_type => undef, - _rcontainer_environment => undef, - _rtype_sequence => undef, - _rnesting_tokens => undef, - _rci_levels => undef, - _rnesting_blocks => undef, - _python_indentation_level => -1, ## 0, + _line_type => 'EOF', + _line_text => $input_line, + _line_number => $input_line_number, + _rtoken_type => undef, + _rtokens => undef, + _rlevels => undef, + _rslevels => undef, + _rblock_type => undef, + _rcontainer_type => undef, + _rcontainer_environment => undef, + _rtype_sequence => undef, + _rnesting_tokens => undef, + _rci_levels => undef, + _rnesting_blocks => undef, + _guessed_indentation_level => 0, _starting_in_quote => 0, # to be set by subroutine _ending_in_quote => 0, _curly_brace_depth => $brace_depth, @@ -22050,7 +23515,7 @@ sub get_line { } # must print line unchanged if we have seen a severe error (i.e., we - # are seeing illegal tokens and connot continue. Syntax errors do + # are seeing illegal tokens and cannot continue. Syntax errors do # not pass this route). Calling routine can decide what to do, but # the default can be to just pass all lines as if they were after __END__ elsif ( $tokenizer_self->{_in_error} ) { @@ -22206,15 +23671,9 @@ sub get_line { # update indentation levels for log messages if ( $input_line !~ /^\s*$/ ) { - my $rlevels = $line_of_tokens->{_rlevels}; - my $structural_indentation_level = $$rlevels[0]; - my ( $python_indentation_level, $msg ) = - find_indentation_level( $input_line, $structural_indentation_level ); - if ($msg) { write_logfile_entry("$msg") } - if ( $tokenizer_self->{_know_input_tabstr} == 1 ) { - $line_of_tokens->{_python_indentation_level} = - $python_indentation_level; - } + my $rlevels = $line_of_tokens->{_rlevels}; + $line_of_tokens->{_guessed_indentation_level} = + guess_old_indentation_level($input_line); } # see if this line contains here doc targets @@ -22310,9 +23769,14 @@ sub get_line { sub find_starting_indentation_level { + # We need to find the indentation level of the first line of the + # script being formatted. Often it will be zero for an entire file, + # but if we are formatting a local block of code (within an editor for + # example) it may not be zero. The user may specify this with the + # -sil=n parameter but normally doesn't so we have to guess. + # # USES GLOBAL VARIABLES: $tokenizer_self - my $starting_level = 0; - my $know_input_tabstr = -1; # flag for find_indentation_level + my $starting_level = 0; # use value if given as parameter if ( $tokenizer_self->{_know_starting_level} ) { @@ -22327,8 +23791,7 @@ sub find_starting_indentation_level { # otherwise figure it out from the input file else { my $line; - my $i = 0; - my $structural_indentation_level = -1; # flag for find_indentation_level + my $i = 0; # keep looking at lines until we find a hash bang or piece of code my $msg = ""; @@ -22343,171 +23806,59 @@ sub find_starting_indentation_level { } next if ( $line =~ /^\s*#/ ); # skip past comments next if ( $line =~ /^\s*$/ ); # skip past blank lines - ( $starting_level, $msg ) = - find_indentation_level( $line, $structural_indentation_level ); - if ($msg) { write_logfile_entry("$msg") } + $starting_level = guess_old_indentation_level($line); last; } $msg = "Line $i implies starting-indentation-level = $starting_level\n"; - - if ( $starting_level > 0 ) { - - my $input_tabstr = $tokenizer_self->{_input_tabstr}; - if ( $input_tabstr eq "\t" ) { - $msg .= "by guessing input tabbing uses 1 tab per level\n"; - } - else { - my $cols = length($input_tabstr); - $msg .= - "by guessing input tabbing uses $cols blanks per level\n"; - } - } write_logfile_entry("$msg"); } $tokenizer_self->{_starting_level} = $starting_level; reset_indentation_level($starting_level); } -# Find indentation level given a input line. At the same time, try to -# figure out the input tabbing scheme. -# -# There are two types of calls: -# -# Type 1: $structural_indentation_level < 0 -# In this case we have to guess $input_tabstr to figure out the level. -# -# Type 2: $structural_indentation_level >= 0 -# In this case the level of this line is known, and this routine can -# update the tabbing string, if still unknown, to make the level correct. - -sub find_indentation_level { - my ( $line, $structural_indentation_level ) = @_; +sub guess_old_indentation_level { + my ($line) = @_; + # Guess the indentation level of an input line. + # + # For the first line of code this result will define the starting + # indentation level. It will mainly be non-zero when perltidy is applied + # within an editor to a local block of code. + # + # This is an impossible task in general because we can't know what tabs + # meant for the old script and how many spaces were used for one + # indentation level in the given input script. For example it may have + # been previously formatted with -i=7 -et=3. But we can at least try to + # make sure that perltidy guesses correctly if it is applied repeatedly to + # a block of code within an editor, so that the block stays at the same + # level when perltidy is applied repeatedly. + # # USES GLOBAL VARIABLES: $tokenizer_self my $level = 0; - my $msg = ""; - - my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr}; - my $input_tabstr = $tokenizer_self->{_input_tabstr}; - - # find leading whitespace - my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : ""; - - # make first guess at input tabbing scheme if necessary - if ( $know_input_tabstr < 0 ) { - - $know_input_tabstr = 0; - - # When -et=n is used for the output formatting, we will assume that - # tabs in the input formatting were also produced with -et=n. This may - # not be true, but it is the best guess because it will keep leading - # whitespace unchanged on repeated formatting on small pieces of code - # when -et=n is used. Thanks to Sam Kington for this patch. - if ( my $tabsize = $tokenizer_self->{_entab_leading_space} ) { - $leading_whitespace =~ s{^ (\t*) } - { " " x (length($1) * $tabsize) }xe; - $input_tabstr = " " x $tokenizer_self->{_indent_columns}; - } - elsif ( $tokenizer_self->{_tabs} ) { - $input_tabstr = "\t"; - if ( length($leading_whitespace) > 0 ) { - if ( $leading_whitespace !~ /\t/ ) { - - my $cols = $tokenizer_self->{_indent_columns}; - - if ( length($leading_whitespace) < $cols ) { - $cols = length($leading_whitespace); - } - $input_tabstr = " " x $cols; - } - } - } - else { - $input_tabstr = " " x $tokenizer_self->{_indent_columns}; - - if ( length($leading_whitespace) > 0 ) { - if ( $leading_whitespace =~ /^\t/ ) { - $input_tabstr = "\t"; - } - } - } - $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; - $tokenizer_self->{_input_tabstr} = $input_tabstr; - } - # determine the input tabbing scheme if possible - if ( ( $know_input_tabstr == 0 ) - && ( length($leading_whitespace) > 0 ) - && ( $structural_indentation_level > 0 ) ) - { - my $saved_input_tabstr = $input_tabstr; - - # check for common case of one tab per indentation level - if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { - if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { - $input_tabstr = "\t"; - $msg = "Guessing old indentation was tab character\n"; - } - } - - else { - - # detab any tabs based on 8 blanks per tab - my $entabbed = ""; - if ( $leading_whitespace =~ s/^\t+/ /g ) { - $entabbed = "entabbed"; - } + # find leading tabs, spaces, and any statement label + my $spaces = 0; + if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) { - # now compute tabbing from number of spaces - my $columns = - length($leading_whitespace) / $structural_indentation_level; - if ( $columns == int $columns ) { - $msg = - "Guessing old indentation was $columns $entabbed spaces\n"; - } - else { - $columns = int $columns; - $msg = -"old indentation is unclear, using $columns $entabbed spaces\n"; - } - $input_tabstr = " " x $columns; - } - $know_input_tabstr = 1; - $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; - $tokenizer_self->{_input_tabstr} = $input_tabstr; + # If there are leading tabs, we use the tab scheme for this run, if + # any, so that the code will remain stable when editing. + if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} } - # see if mistakes were made - if ( ( $tokenizer_self->{_starting_level} > 0 ) - && !$tokenizer_self->{_know_starting_level} ) - { + if ($2) { $spaces += length($2) } - if ( $input_tabstr ne $saved_input_tabstr ) { - complain( -"I made a bad starting level guess; rerun with a value for -sil \n" - ); - } + # correct for outdented labels + if ( $3 && $tokenizer_self->{'_outdent_labels'} ) { + $spaces += $tokenizer_self->{_continuation_indentation}; } } - # use current guess at input tabbing to get input indentation level - # - # Patch to handle a common case of entabbed leading whitespace - # If the leading whitespace equals 4 spaces and we also have - # tabs, detab the input whitespace assuming 8 spaces per tab. - if ( length($input_tabstr) == 4 ) { - $leading_whitespace =~ s/^\t+/ /g; - } - - if ( ( my $len_tab = length($input_tabstr) ) > 0 ) { - my $pos = 0; - - while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr ) - { - $pos += $len_tab; - $level++; - } - } - return ( $level, $msg ); + # compute indentation using the value of -i for this run. + # If -i=0 is used for this run (which is possible) it doesn't matter + # what we do here but we'll guess that the old run used 4 spaces per level. + my $indent_columns = $tokenizer_self->{_indent_columns}; + $indent_columns = 4 if ( !$indent_columns ); + $level = int( $spaces / $indent_columns ); + return ($level); } # This is a currently unused debug routine @@ -22921,7 +24272,7 @@ sub prepare_for_a_new_file { sub scan_identifier { ( $i, $tok, $type, $id_scan_state, $identifier ) = scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, - $max_token_index, $expecting ); + $max_token_index, $expecting, $paren_type[$paren_depth] ); } sub scan_id { @@ -22981,7 +24332,8 @@ sub prepare_for_a_new_file { # keyword ( .... ) { BLOCK } # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' my %is_blocktype_with_paren; - @_ = qw(if elsif unless while until for foreach switch case given when); + @_ = + qw(if elsif unless while until for foreach switch case given when catch); @is_blocktype_with_paren{@_} = (1) x scalar(@_); # ------------------------------------------------------------ @@ -23042,7 +24394,7 @@ sub prepare_for_a_new_file { $tokenizer_self->{_saw_perl_dash_w} = 1; } - # Check for indentifier in indirect object slot + # Check for identifier in indirect object slot # (vorboard.pl, sort.t). Something like: # /^(print|printf|sort|exec|system)$/ if ( @@ -23064,6 +24416,9 @@ sub prepare_for_a_new_file { $container_type = $want_paren; $want_paren = ""; } + elsif ( $statement_type =~ /^sub\b/ ) { + $container_type = $statement_type; + } else { $container_type = $last_nonblank_token; @@ -23180,6 +24535,12 @@ sub prepare_for_a_new_file { $container_type = $paren_type[$paren_depth]; + # restore statement type as 'sub' at closing paren of a signature + # so that a subsequent ':' is identified as an attribute + if ( $container_type =~ /^sub\b/ ) { + $statement_type = $container_type; + } + # /^(for|foreach)$/ if ( $is_for_foreach{ $paren_type[$paren_depth] } ) { my $num_sc = $paren_semicolon_count[$paren_depth]; @@ -23205,6 +24566,7 @@ sub prepare_for_a_new_file { ';' => sub { $context = UNKNOWN_CONTEXT; $statement_type = ''; + $want_paren = ""; # /^(for|foreach)$/ if ( $is_for_foreach{ $paren_type[$paren_depth] } ) @@ -23250,7 +24612,7 @@ sub prepare_for_a_new_file { '/' => sub { my $is_pattern; - if ( $expecting == UNKNOWN ) { # indeterminte, must guess.. + if ( $expecting == UNKNOWN ) { # indeterminate, must guess.. my $msg; ( $is_pattern, $msg ) = guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, @@ -23266,7 +24628,7 @@ sub prepare_for_a_new_file { if ($is_pattern) { $in_quote = 1; $type = 'Q'; - $allowed_quote_modifiers = '[msixpodualgc]'; + $allowed_quote_modifiers = '[msixpodualngc]'; } else { # not a pattern; check for a /= token @@ -23321,9 +24683,21 @@ sub prepare_for_a_new_file { # check for syntax error here; unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { - my $list = join( ' ', sort keys %is_blocktype_with_paren ); - warning( - "syntax error at ') {', didn't see one of: $list\n"); + if ( $tokenizer_self->{'_extended_syntax'} ) { + + # we append a trailing () to mark this as an unknown + # block type. This allows perltidy to format some + # common extensions of perl syntax. + # This is used by sub code_block_type + $last_nonblank_token .= '()'; + } + else { + my $list = + join( ' ', sort keys %is_blocktype_with_paren ); + warning( +"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n" + ); + } } } @@ -23355,7 +24729,7 @@ sub prepare_for_a_new_file { # allow paren-less identifier after 'when' # if the brace is preceded by a space if ( $statement_type eq 'when' - && $last_nonblank_type eq 'i' + && $last_nonblank_type eq 'i' && $last_last_nonblank_type eq 'k' && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) { @@ -23392,6 +24766,7 @@ sub prepare_for_a_new_file { } } } + $brace_type[ ++$brace_depth ] = $block_type; $brace_package[$brace_depth] = $current_package; $brace_structural_type[$brace_depth] = $type; @@ -23416,11 +24791,11 @@ sub prepare_for_a_new_file { $type = 'R'; } - # propagate type information for 'do' and 'eval' blocks. - # This is necessary to enable us to know if an operator - # or term is expected next - if ( $is_block_operator{ $brace_type[$brace_depth] } ) { - $tok = $brace_type[$brace_depth]; + # propagate type information for 'do' and 'eval' blocks, and also + # for smartmatch operator. This is necessary to enable us to know + # if an operator or term is expected next. + if ( $is_block_operator{$block_type} ) { + $tok = $block_type; } $context = $brace_context[$brace_depth]; @@ -23480,7 +24855,7 @@ sub prepare_for_a_new_file { if ($is_pattern) { $in_quote = 1; $type = 'Q'; - $allowed_quote_modifiers = '[msixpodualgc]'; + $allowed_quote_modifiers = '[msixpodualngc]'; } else { ( $type_sequence, $indent_flag ) = @@ -23534,7 +24909,7 @@ sub prepare_for_a_new_file { # ATTRS: check for a ':' which introduces an attribute list # (this might eventually get its own token type) - elsif ( $statement_type =~ /^sub/ ) { + elsif ( $statement_type =~ /^sub\b/ ) { $type = 'A'; $in_attribute_list = 1; } @@ -23609,6 +24984,14 @@ sub prepare_for_a_new_file { { $type = '}'; } + + # propagate type information for smartmatch operator. This is + # necessary to enable us to know if an operator or term is expected + # next. + if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) { + $tok = $square_bracket_type[$square_bracket_depth]; + } + if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } }, '-' => sub { # what kind of minus? @@ -23836,21 +25219,22 @@ sub prepare_for_a_new_file { '__DATA__' => '_in_data', ); - # ref: camel 3 p 147, + # original ref: camel 3 p 147, # but perl may accept undocumented flags # perl 5.10 adds 'p' (preserve) - # Perl version 5.16, http://perldoc.perl.org/perlop.html, has these: - # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc - # s/PATTERN/REPLACEMENT/msixpodualgcer + # Perl version 5.22 added 'n' + # From http://perldoc.perl.org/perlop.html we have + # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc + # s/PATTERN/REPLACEMENT/msixpodualngcer # y/SEARCHLIST/REPLACEMENTLIST/cdsr # tr/SEARCHLIST/REPLACEMENTLIST/cdsr - # qr/STRING/msixpodual + # qr/STRING/msixpodualn my %quote_modifiers = ( - 's' => '[msixpodualgcer]', + 's' => '[msixpodualngcer]', 'y' => '[cdsr]', 'tr' => '[cdsr]', - 'm' => '[msixpodualgc]', - 'qr' => '[msixpodual]', + 'm' => '[msixpodualngc]', + 'qr' => '[msixpodualn]', 'q' => "", 'qq' => "", 'qw' => "", @@ -23920,7 +25304,7 @@ sub prepare_for_a_new_file { # For example, I used 'v' for v-strings. # # *. Implement coding to recognize the $type of the token in this routine. - # This is the hardest part, and is best done by immitating or modifying + # This is the hardest part, and is best done by imitating or modifying # some of the existing coding. For example, to recognize v-strings, I # patched 'sub scan_bare_identifier' to recognize v-strings beginning with # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. @@ -23984,7 +25368,7 @@ sub prepare_for_a_new_file { if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { # must not be in multi-line quote - # and must not be in an eqn + # and must not be in an equation if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) ) { $tokenizer_self->{_in_pod} = 1; @@ -24003,6 +25387,11 @@ sub prepare_for_a_new_file { $input_line =~ s/^\s*//; # trim left end } + # Set a flag to indicate if we might be at an __END__ or __DATA__ line + # This will be used below to avoid quoting a bare word followed by + # a fat comma. + my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/; + # update the copy of the line for use in error messages # This must be exactly what we give the pre_tokenizer $tokenizer_self->{_line_text} = $input_line; @@ -24183,7 +25572,7 @@ EOM } } - unless ( $tok =~ /^\s*$/ ) { + unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) { # try to catch some common errors if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { @@ -24277,11 +25666,20 @@ EOM # '//' must be defined_or operator if an operator is expected. # TODO: Code for other ambiguous digraphs (/=, x=, **, *=) # could be migrated here for clarity - if ( $test_tok eq '//' ) { + + # Patch for RT#102371, misparsing a // in the following snippet: + # state $b //= ccc(); + # The solution is to always accept the digraph (or trigraph) after + # token type 'Z' (possible file handle). The reason is that + # sub operator_expected gives TERM expected here, which is + # wrong in this case. + if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) { my $next_type = $$rtokens[ $i + 1 ]; my $expecting = operator_expected( $prev_type, $tok, $next_type ); - $combine_ok = 0 unless ( $expecting == OPERATOR ); + + # Patched for RT#101547, was 'unless ($expecting==OPERATOR)' + $combine_ok = 0 if ( $expecting == TERM ); } } @@ -24305,6 +25703,17 @@ EOM $tok = $test_tok; $i++; } + + # The only current tetragraph is the double diamond operator + # and its first three characters are not a trigraph, so + # we do can do a special test for it + elsif ( $test_tok eq '<<>' ) { + $test_tok .= $$rtokens[ $i + 2 ]; + if ( $is_tetragraph{$test_tok} ) { + $tok = $test_tok; + $i += 2; + } + } } $type = $tok; @@ -24319,7 +25728,7 @@ EOM $brace_type[$brace_depth], $paren_depth, $paren_type[$paren_depth] ); - print "TOKENIZE:(@debug_list)\n"; + print STDOUT "TOKENIZE:(@debug_list)\n"; }; # turn off attribute list on first non-blank, non-bareword @@ -24358,7 +25767,9 @@ EOM } # quote a word followed by => operator - if ( $next_nonblank_token eq '=' ) { + # unless the word __END__ or __DATA__ and the only word on + # the line. + if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) { if ( $$rtokens[ $i_next + 1 ] eq '>' ) { if ( $is_constant{$current_package}{$tok} ) { @@ -24426,7 +25837,10 @@ EOM $type = 'n'; } } - + elsif ( $tok_kw eq 'CORE::' ) { + $type = $tok = $tok_kw; + $i += 2; + } elsif ( ( $tok eq 'strict' ) and ( $last_nonblank_token eq 'use' ) ) { @@ -24492,7 +25906,11 @@ EOM # Assume qw is used as a quote and okay, as in: # use constant qw{ DEBUG 0 }; # Not worth trying to parse for just a warning - if ( $next_nonblank_token ne 'qw' ) { + + # NOTE: This warning is deactivated because recent + # versions of perl do not complain here, but + # the coding is retained for reference. + if ( 0 && $next_nonblank_token ne 'qw' ) { warning( "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" ); @@ -24502,23 +25920,40 @@ EOM # FIXME: could check for error in which next token is # not a word (number, punctuation, ..) else { - $is_constant{$current_package} - {$next_nonblank_token} = 1; + $is_constant{$current_package}{$next_nonblank_token} + = 1; } } } # various quote operators elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { +##NICOL PATCH if ( $expecting == OPERATOR ) { - # patch for paren-less for/foreach glitch, part 1 - # perl will accept this construct as valid: + # Be careful not to call an error for a qw quote + # where a parenthesized list is allowed. For example, + # it could also be a for/foreach construct such as # # foreach my $key qw\Uno Due Tres Quadro\ { # print "Set $key\n"; # } - unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} ) + # + + # Or it could be a function call. + # NOTE: Braces in something like &{ xxx } are not + # marked as a block, we might have a method call. + # &method(...), $method->(..), &{method}(...), + # $ref[2](list) is ok & short for $ref[2]->(list) + # + # See notes in 'sub code_block_type' and + # 'sub is_non_structural_brace' + + unless ( + $tok eq 'qw' + && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/ + || $is_for_foreach{$want_paren} ) + ) { error_if_expecting_OPERATOR(); } @@ -24541,7 +25976,7 @@ EOM elsif ( ( $next_nonblank_token eq ':' ) && ( $$rtokens[ $i_next + 1 ] ne ':' ) - && ( $i_next <= $max_token_index ) # colon on same line + && ( $i_next <= $max_token_index ) # colon on same line && label_ok() ) { @@ -24612,9 +26047,17 @@ EOM elsif ( $tok eq 'else' ) { # patched for SWITCH/CASE - if ( $last_nonblank_token ne ';' + if ( + $last_nonblank_token ne ';' && $last_nonblank_block_type !~ - /^(if|elsif|unless|case|when)$/ ) + /^(if|elsif|unless|case|when)$/ + + # patch to avoid an unwanted error message for + # the case of a parenless 'case' (RT 105484): + # switch ( 1 ) { case x { 2 } else { } } + && $statement_type !~ + /^(if|elsif|unless|case|when)$/ + ) { warning( "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" @@ -24878,7 +26321,7 @@ EOM # running value of this variable is $level_in_tokenizer. # # The total continuation is much more difficult to compute, and requires -# several variables. These veriables are: +# several variables. These variables are: # # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for # each indentation level, if there are intervening open secondary @@ -24890,7 +26333,7 @@ EOM # indentation level, if the level is of type BLOCK or not. # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string # $nesting_list_string = a string of 1's and 0's indicating, for each -# indentation level, if it is is appropriate for list formatting. +# indentation level, if it is appropriate for list formatting. # If so, continuation indentation is used to indent long list items. # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string # @{$rslevel_stack} = a stack of total nesting depths at each @@ -25110,7 +26553,7 @@ EOM $indented_if_level = $level_in_tokenizer; } - # do not change container environement here if we are not + # do not change container environment here if we are not # at a real list. Adding this check prevents "blinkers" # often near 'unless" clauses, such as in the following # code: @@ -25267,7 +26710,7 @@ EOM } # If we are in a list, then - # we must set continuatoin indentation at the closing + # we must set continuation indentation at the closing # paren of something like this (paren after $check): # assert( # __LINE__, @@ -25353,8 +26796,15 @@ EOM $in_statement_continuation = 0; } - # otherwise, the next token after a ',' starts a new term - elsif ( $type eq ',' ) { + # otherwise, the token after a ',' starts a new term + + # Patch FOR RT#99961; no continuation after a ';' + # This is needed because perltidy currently marks + # a block preceded by a type character like % or @ + # as a non block, to simplify formatting. But these + # are actually blocks and can have semicolons. + # See code_block_type() and is_non_structural_brace(). + elsif ( $type eq ',' || $type eq ';' ) { $in_statement_continuation = 0; } @@ -25373,10 +26823,10 @@ EOM } } - # set secondary nesting levels based on all continment token types + # set secondary nesting levels based on all containment token types # Note: these are set so that the nesting depth is the depth # of the PREVIOUS TOKEN, which is convenient for setting - # the stength of token bonds + # the strength of token bonds my $slevel_i = $slevel_in_tokenizer; # /^[L\{\(\[]$/ @@ -25480,7 +26930,7 @@ sub operator_expected { # OPERATOR. # # If a UNKNOWN is returned, the calling routine must guess. A major - # goal of this tokenizer is to minimize the possiblity of returning + # goal of this tokenizer is to minimize the possibility of returning # UNKNOWN, because a wrong guess can spoil the formatting of a # script. # @@ -25497,7 +26947,7 @@ sub operator_expected { my $op_expected = UNKNOWN; -#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; +##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; # Note: function prototype is available for token type 'U' for future # program development. It contains the leading and trailing parens, @@ -25536,6 +26986,16 @@ sub operator_expected { } } + # Check for smartmatch operator before preceding brace or square bracket. + # For example, at the ? after the ] in the following expressions we are + # expecting an operator: + # + # qr/3/ ~~ ['1234'] ? 1 : 0; + # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; + elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) { + $op_expected = OPERATOR; + } + # handle something after 'do' and 'eval' elsif ( $is_block_operator{$last_nonblank_token} ) { @@ -25546,6 +27006,8 @@ sub operator_expected { } # something like $a = do { BLOCK } / 2; + # or this ? after a smartmatch anonynmous hash or array reference: + # qr/3/ ~~ ['1234'] ? 1 : 0; # ^ else { $op_expected = OPERATOR; # block mode following } @@ -25583,6 +27045,13 @@ sub operator_expected { { $op_expected = UNKNOWN; } + + # expecting VERSION or {} after package NAMESPACE + elsif ($statement_type =~ /^package\b/ + && $last_nonblank_token =~ /^package\b/ ) + { + $op_expected = TERM; + } } # no operator after many keywords, such as "die", "warn", etc @@ -25593,7 +27062,7 @@ sub operator_expected { # TODO: This list is incomplete, and these should be put # into a hash. if ( $tok eq '/' - && $next_type eq '/' + && $next_type eq '/' && $last_nonblank_type eq 'k' && $last_nonblank_token =~ /^eof|undef|shift|pop$/ ) { @@ -25638,6 +27107,17 @@ sub operator_expected { { $op_expected = OPERATOR; } + + # Patch for RT #116344: misparse a ternary operator after an anonymous + # hash, like this: + # return ref {} ? 1 : 0; + # The right brace should really be marked type 'R' in this case, and + # it is safest to return an UNKNOWN here. Expecting a TERM will + # cause the '?' to always be interpreted as a pattern delimiter + # rather than introducing a ternary operator. + elsif ( $tok eq '?' ) { + $op_expected = UNKNOWN; + } else { $op_expected = TERM; } @@ -25654,7 +27134,7 @@ sub operator_expected { } TOKENIZER_DEBUG_FLAG_EXPECT && do { - print + print STDOUT "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; }; return $op_expected; @@ -25686,10 +27166,10 @@ sub label_ok { return $brace_type[$brace_depth]; } - # otherwise, it is a label if and only if it follows a ';' - # (real or fake) + # otherwise, it is a label if and only if it follows a ';' (real or fake) + # or another label else { - return ( $last_nonblank_type eq ';' ); + return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' ); } } @@ -25753,12 +27233,15 @@ sub code_block_type { } } + ################################################################ # NOTE: braces after type characters start code blocks, but for # simplicity these are not identified as such. See also # sub is_non_structural_brace. - # elsif ( $last_nonblank_type eq 't' ) { - # return $last_nonblank_token; - # } + ################################################################ + +## elsif ( $last_nonblank_type eq 't' ) { +## return $last_nonblank_token; +## } # brace after label: elsif ( $last_nonblank_type eq 'J' ) { @@ -25785,13 +27268,17 @@ sub code_block_type { } } - # or a sub definition + # or a sub or package BLOCK elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) && $last_nonblank_token =~ /^(sub|package)\b/ ) { return $last_nonblank_token; } + elsif ( $statement_type =~ /^(sub|package)\b/ ) { + return $statement_type; + } + # user-defined subs with block parameters (like grep/map/eval) elsif ( $last_nonblank_type eq 'G' ) { return $last_nonblank_token; @@ -25803,6 +27290,33 @@ sub code_block_type { $max_token_index ); } + # Patch for bug # RT #94338 reported by Daniel Trizen + # for-loop in a parenthesized block-map triggering an error message: + # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) ); + # Check for a code block within a parenthesized function call + elsif ( $last_nonblank_token eq '(' ) { + my $paren_type = $paren_type[$paren_depth]; + if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) { + + # We will mark this as a code block but use type 't' instead + # of the name of the contining function. This will allow for + # correct parsing but will usually produce better formatting. + # Braces with block type 't' are not broken open automatically + # in the formatter as are other code block types, and this usually + # works best. + return 't'; # (Not $paren_type) + } + else { + return ""; + } + } + + # handle unknown syntax ') {' + # we previously appended a '()' to mark this case + elsif ( $last_nonblank_token =~ /\(\)$/ ) { + return $last_nonblank_token; + } + # anything else must be anonymous hash reference else { return ""; @@ -25813,6 +27327,7 @@ sub decide_if_code_block { # USES GLOBAL VARIABLES: $last_nonblank_token my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; + my ( $next_nonblank_token, $i_next ) = find_next_nonblank_token( $i, $rtokens, $max_token_index ); @@ -25850,8 +27365,14 @@ sub decide_if_code_block { # We are only going to look ahead one more (nonblank/comment) line. # Strange formatting could cause a bad guess, but that's unlikely. - my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ]; - my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ]; + my @pre_types; + my @pre_tokens; + + # Ignore the rest of this line if it is a side comment + if ( $next_nonblank_token ne '#' ) { + @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ]; + @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ]; + } my ( $rpre_tokens, $rpre_types ) = peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but # generous, and prevents @@ -25862,7 +27383,8 @@ sub decide_if_code_block { push @pre_tokens, @$rpre_tokens; } - # put a sentinal token to simplify stopping the search + # put a sentinel token to simplify stopping the search + push @pre_types, '}'; push @pre_types, '}'; my $jbeg = 0; @@ -25890,9 +27412,7 @@ sub decide_if_code_block { $j++; } elsif ( $pre_types[$j] eq 'w' ) { - unless ( $is_keyword{ $pre_tokens[$j] } ) { - $j++; - } + $j++; } elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { $j++; @@ -25901,9 +27421,18 @@ sub decide_if_code_block { $j++ if $pre_types[$j] eq 'b'; - # it's a hash ref if a comma or => follow next - if ( $pre_types[$j] eq ',' - || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) ) + # Patched for RT #95708 + if ( + + # it is a comma which is not a pattern delimeter except for qw + ( + $pre_types[$j] eq ',' + && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/ + ) + + # or a => + || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) + ) { $code_block_type = ""; } @@ -25968,10 +27497,13 @@ sub is_non_structural_brace { # return 0; # } + ################################################################ # NOTE: braces after type characters start code blocks, but for # simplicity these are not identified as such. See also # sub code_block_type - # if ($last_nonblank_type eq 't') {return 0} + ################################################################ + + ##if ($last_nonblank_type eq 't') {return 0} # otherwise, it is non-structural if it is decorated # by type information. @@ -26116,7 +27648,7 @@ sub decrease_nesting_depth { if ( $saw_brace_error <= MAX_NAG_MESSAGES - # if too many closing types have occured, we probably + # if too many closing types have occurred, we probably # already caught this error && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) ) @@ -26329,7 +27861,7 @@ sub guess_if_pattern_or_division { my $msg = "guessing that / after $last_nonblank_token starts a "; if ( $i >= $max_token_index ) { - "division (no end to pattern found on the line)\n"; + $msg .= "division (no end to pattern found on the line)\n"; } else { my $ibeg = $i; @@ -26526,7 +28058,7 @@ sub scan_bare_identifier_do { if ( $type eq 'w' ) { # check for v-string with leading 'v' type character - # (This seems to have presidence over filehandle, type 'Y') + # (This seems to have precedence over filehandle, type 'Y') if ( $tok =~ /^v\d[_\d]*$/ ) { # we only have the first part - something like 'v101' - @@ -26764,7 +28296,7 @@ sub scan_id_do { } TOKENIZER_DEBUG_FLAG_NSCAN && do { - print + print STDOUT "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; }; return ( $i, $tok, $type, $id_scan_state ); @@ -26812,6 +28344,19 @@ sub do_scan_package { # token following a 'package' token. # USES GLOBAL VARIABLES: $current_package, + # package NAMESPACE + # package NAMESPACE VERSION + # package NAMESPACE BLOCK + # package NAMESPACE VERSION BLOCK + # + # If VERSION is provided, package sets the $VERSION variable in the given + # namespace to a version object with the VERSION provided. VERSION must be + # a "strict" style version number as defined by the version module: a + # positive decimal number (integer or decimal-fraction) without + # exponentiation or else a dotted-decimal v-string with a leading 'v' + # character and at least three components. + # reference http://perldoc.perl.org/functions/package.html + my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map, $max_token_index ) = @_; @@ -26840,10 +28385,25 @@ sub do_scan_package { if ($error) { warning("Possibly invalid package\n") } $current_package = $package; - # check for error + # we should now have package NAMESPACE + # now expecting VERSION, BLOCK, or ; to follow ... + # package NAMESPACE VERSION + # package NAMESPACE BLOCK + # package NAMESPACE VERSION BLOCK my ( $next_nonblank_token, $i_next ) = find_next_nonblank_token( $i, $rtokens, $max_token_index ); - if ( $next_nonblank_token !~ /^[;\{\}]$/ ) { + + # check that something recognizable follows, but do not parse. + # A VERSION number will be parsed later as a number or v-string in the + # normal way. What is important is to set the statement type if + # everything looks okay so that the operator_expected() routine + # knows that the number is in a package statement. + # Examples of valid primitive tokens that might follow are: + # 1235 . ; { } v3 v + if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) { + $statement_type = $tok; + } + else { warning( "Unexpected '$next_nonblank_token' after package name '$tok'\n" ); @@ -26869,7 +28429,7 @@ sub scan_identifier_do { # $last_nonblank_type my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, - $expecting ) + $expecting, $container_type ) = @_; my $i_begin = $i; my $type = ''; @@ -26880,6 +28440,8 @@ sub scan_identifier_do { my $tok = $tok_begin; my $message = ""; + my $in_prototype_or_signature = $container_type =~ /^sub/; + # these flags will be used to help figure out the type: my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); my $saw_type; @@ -26963,6 +28525,11 @@ sub scan_identifier_do { last; } } + + # POSTDEFREF ->@ ->% ->& ->* + elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) { + $identifier .= $tok; + } elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. $saw_alpha = 1; $id_scan_state = ':'; # now need :: @@ -26981,25 +28548,34 @@ sub scan_identifier_do { # howdy::123::bubba(); # } - elsif ( $tok =~ /^[0-9]/ ) { # numeric + elsif ( $tok =~ /^[0-9]/ ) { # numeric $saw_alpha = 1; - $id_scan_state = ':'; # now need :: + $id_scan_state = ':'; # now need :: $identifier .= $tok; } elsif ( $tok eq '::' ) { $id_scan_state = 'A'; $identifier .= $tok; } - elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array + + # $# and POSTDEFREF ->$# + elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array $identifier .= $tok; # keep same state, a $ could follow } elsif ( $tok eq '{' ) { # check for something like ${#} or ${©} - if ( $identifier eq '$' + ##if ( $identifier eq '$' + if ( + ( + $identifier eq '$' + || $identifier eq '@' + || $identifier eq '$#' + ) && $i + 2 <= $max_token_index && $$rtokens[ $i + 2 ] eq '}' - && $$rtokens[ $i + 1 ] !~ /[\s\w]/ ) + && $$rtokens[ $i + 1 ] !~ /[\s\w]/ + ) { my $next2 = $$rtokens[ $i + 2 ]; my $next1 = $$rtokens[ $i + 1 ]; @@ -27073,11 +28649,23 @@ sub scan_identifier_do { } else { # something else + if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) { + $id_scan_state = ''; + $i = $i_save; + $type = 'i'; # probably punctuation variable + last; + } + # check for various punctuation variables if ( $identifier =~ /^[\$\*\@\%]$/ ) { $identifier .= $tok; } + # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#* + elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) { + $identifier .= $tok; + } + elsif ( $identifier eq '$#' ) { if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } @@ -27153,10 +28741,10 @@ sub scan_identifier_do { # # We have to be careful here. If we are in an unknown state, # we will reject the punctuation variable. In the following - # example the '&' is a binary opeator but we are in an unknown + # example the '&' is a binary operator but we are in an unknown # state because there is no sigil on 'Prima', so we don't # know what it is. But it is a bad guess that - # '&~' is a punction variable. + # '&~' is a function variable. # $self->{text}->{colorMap}->[ # Prima::PodView::COLOR_CODE_FOREGROUND # & ~tb::COLOR_INDEX ] = @@ -27341,9 +28929,9 @@ sub scan_identifier_do { TOKENIZER_DEBUG_FLAG_SCAN_ID && do { my ( $a, $b, $c ) = caller; - print + print STDOUT "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; - print + print STDOUT "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; }; return ( $i, $tok, $type, $id_scan_state, $identifier ); @@ -27383,20 +28971,16 @@ sub scan_identifier_do { my $pos_beg = $$rtoken_map[$i_beg]; pos($input_line) = $pos_beg; - # sub NAME PROTO ATTRS + # Look for the sub NAME if ( $input_line =~ m/\G\s* ((?:\w*(?:'|::))*) # package - something that ends in :: or ' (\w+) # NAME - required - (\s*\([^){]*\))? # PROTO - something in parens - (\s*:)? # ATTRS - leading : of attribute list /gcx ) { $match = 1; $subname = $2; - $proto = $3; - $attrs = $4; $package = ( defined($1) && $1 ) ? $1 : $current_package; $package =~ s/\'/::/g; @@ -27408,20 +28992,35 @@ sub scan_identifier_do { $type = 'i'; } - # Look for prototype/attributes not preceded on this line by subname; - # This might be an anonymous sub with attributes, + # Now look for PROTO ATTRS + # Look for prototype/attributes which are usually on the same + # line as the sub name but which might be on a separate line. + # For example, we might have an anonymous sub with attributes, # or a prototype on a separate line from its sub name - elsif ( - $input_line =~ m/\G(\s*\([^){]*\))? # PROTO + + # NOTE: We only want to parse PROTOTYPES here. If we see anything that + # does not look like a prototype, we assume it is a SIGNATURE and we + # will stop and let the the standard tokenizer handle it. In + # particular, we stop if we see any nested parens, braces, or commas. + my $saw_opening_paren = $input_line =~ /\G\s*\(/; + if ( + $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO (\s*:)? # ATTRS leading ':' /gcx && ( $1 || $2 ) ) { - $match = 1; $proto = $1; $attrs = $2; + # If we also found the sub name on this call then append PROTO. + # This is not necessary but for compatability with previous + # versions when the -csc flag is used: + if ( $match && $proto ) { + $tok .= $proto; + } + $match ||= 1; + # Handle prototype on separate line from subname if ($subname_saved) { $package = $package_saved; @@ -27448,8 +29047,8 @@ sub scan_identifier_do { $in_attribute_list = 1; } - # We must convert back from character position - # to pre_token index. + # Otherwise, if we found a match we must convert back from + # string position to the pre_token index for continued parsing. else { # I don't think an error flag can occur here ..but ? @@ -27477,6 +29076,8 @@ sub scan_identifier_do { } $package_saved = ""; $subname_saved = ""; + + # See what's next... if ( $next_nonblank_token eq '{' ) { if ($subname) { @@ -27508,19 +29109,21 @@ sub scan_identifier_do { $statement_type = $tok; } - # see if PROTO follows on another line: + # if we stopped before an open paren ... elsif ( $next_nonblank_token eq '(' ) { - if ( $attrs || $proto ) { - warning( -"unexpected '(' after definition or declaration of sub '$subname'\n" - ); - } - else { - $id_scan_state = 'sub'; # we must come back to get proto - $statement_type = $tok; - $package_saved = $package; - $subname_saved = $subname; + + # If we DID NOT see this paren above then it must be on the + # next line so we will set a flag to come back here and see if + # it is a PROTOTYPE + + # Otherwise, we assume it is a SIGNATURE rather than a + # PROTOTYPE and let the normal tokenizer handle it as a list + if ( !$saw_opening_paren ) { + $id_scan_state = 'sub'; # we must come back to get proto + $package_saved = $package; + $subname_saved = $subname; } + $statement_type = $tok; } elsif ($next_nonblank_token) { # EOF technically ok warning( @@ -27592,7 +29195,7 @@ sub numerator_expected { sub pattern_expected { # This is the start of a filter for a possible pattern. - # It looks at the token after a possbible pattern and tries to + # It looks at the token after a possible pattern and tries to # determine if that token could end a pattern. # returns - # 1 - yes @@ -27702,7 +29305,7 @@ sub find_angle_operator_termination { my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); # Reject if the closing '>' follows a '-' as in: - # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { } + # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { } if ( $expecting eq UNKNOWN ) { my $check = substr( $input_line, $pos - 2, 1 ); if ( $check eq '-' ) { @@ -27826,7 +29429,8 @@ sub scan_number_do { # handle octal, hex, binary if ( !defined($number) ) { pos($input_line) = $pos_beg; - if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g ) + if ( $input_line =~ + /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g ) { $pos = pos($input_line); my $numc = $pos - $pos_beg; @@ -28095,7 +29699,7 @@ sub follow_quoted_string { my $quoted_string = ""; TOKENIZER_DEBUG_FLAG_QUOTE && do { - print + print STDOUT "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; }; @@ -28398,7 +30002,7 @@ sub show_tokens { for ( $i = 0 ; $i < $num ; $i++ ) { my $len = length( $$rtokens[$i] ); - print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n"; + print STDOUT "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n"; } } @@ -28450,7 +30054,7 @@ The following additional token types are defined: [ left non-structural square bracket (enclosing an array index) ] right non-structural square bracket ( left non-structural paren (all but a list right of an =) - ) right non-structural parena + ) right non-structural paren L left non-structural curly brace (enclosing a key) R right non-structural curly brace ; terminal semicolon @@ -28507,13 +30111,16 @@ BEGIN { my @digraphs = qw( .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> - <= >= == =~ !~ != ++ -- /= x= ~~ + <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^. ); @is_digraph{@digraphs} = (1) x scalar(@digraphs); - my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ ); + my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.=); @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); + my @tetragraphs = qw( <<>> ); + @is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs); + # make a hash of all valid token types for self-checking the tokenizer # (adding NEW_TOKENS : select a new character and add to this list) my @valid_token_types = qw# @@ -28522,8 +30129,8 @@ BEGIN { #; push( @valid_token_types, @digraphs ); push( @valid_token_types, @trigraphs ); - push( @valid_token_types, '#' ); - push( @valid_token_types, ',' ); + push( @valid_token_types, @tetragraphs ); + push( @valid_token_types, ( '#', ',', 'CORE::' ) ); @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); # a list of file test letters, as in -e (Table 3-4 of 'camel 3') @@ -28534,6 +30141,7 @@ BEGIN { # these functions have prototypes of the form (&), so when they are # followed by a block, that block MAY BE followed by an operator. + # Smartmatch operator ~~ may be followed by anonymous hash or array ref @_ = qw( do eval ); @is_block_operator{@_} = (1) x scalar(@_); @@ -28542,11 +30150,12 @@ BEGIN { @is_indirect_object_taker{@_} = (1) x scalar(@_); # These tokens may precede a code block - # patched for SWITCH/CASE + # patched for SWITCH/CASE/CATCH. Actually these could be removed + # now and we could let the extended-syntax coding handle them @_ = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else unless do while until eval for foreach map grep sort - switch case given when); + switch case given when catch try finally); @is_code_block_token{@_} = (1) x scalar(@_); # I'll build the list of keywords incrementally @@ -28775,6 +30384,8 @@ BEGIN { when err say + + catch ); # patched above for SWITCH/CASE given/when err say @@ -28848,6 +30459,7 @@ BEGIN { **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ f F pp mm Y p m U J G j >> << ^ t + ~. ^. |. &. ^.= |.= &.= #; push( @value_requestor_type, ',' ) ; # (perl doesn't like a ',' in a qw block) @@ -28974,7 +30586,6 @@ BEGIN { # These are not used in any way yet # my @unused_keywords = qw( - # CORE # __FILE__ # __LINE__ # __PACKAGE__ @@ -28993,373 +30604,3 @@ BEGIN { @is_keyword{@Keywords} = (1) x scalar(@Keywords); } 1; -__END__ - -=head1 NAME - -Perl::Tidy - Parses and beautifies perl source - -=head1 SYNOPSIS - - use Perl::Tidy; - - Perl::Tidy::perltidy( - source => $source, - destination => $destination, - stderr => $stderr, - argv => $argv, - perltidyrc => $perltidyrc, - logfile => $logfile, - errorfile => $errorfile, - formatter => $formatter, # callback object (see below) - dump_options => $dump_options, - dump_options_type => $dump_options_type, - prefilter => $prefilter_coderef, - postfilter => $postfilter_coderef, - ); - -=head1 DESCRIPTION - -This module makes the functionality of the perltidy utility available to perl -scripts. Any or all of the input parameters may be omitted, in which case the -@ARGV array will be used to provide input parameters as described -in the perltidy(1) man page. - -For example, the perltidy script is basically just this: - - use Perl::Tidy; - Perl::Tidy::perltidy(); - -The module accepts input and output streams by a variety of methods. -The following list of parameters may be any of the following: a -filename, an ARRAY reference, a SCALAR reference, or an object with -either a B or B method, as appropriate. - - source - the source of the script to be formatted - destination - the destination of the formatted output - stderr - standard error output - perltidyrc - the .perltidyrc file - logfile - the .LOG file stream, if any - errorfile - the .ERR file stream, if any - dump_options - ref to a hash to receive parameters (see below), - dump_options_type - controls contents of dump_options - dump_getopt_flags - ref to a hash to receive Getopt flags - dump_options_category - ref to a hash giving category of options - dump_abbreviations - ref to a hash giving all abbreviations - -The following chart illustrates the logic used to decide how to -treat a parameter. - - ref($param) $param is assumed to be: - ----------- --------------------- - undef a filename - SCALAR ref to string - ARRAY ref to array - (other) object with getline (if source) or print method - -If the parameter is an object, and the object has a B method, that -close method will be called at the end of the stream. - -=over 4 - -=item source - -If the B parameter is given, it defines the source of the input stream. -If an input stream is defined with the B parameter then no other source -filenames may be specified in the @ARGV array or B parameter. - -=item destination - -If the B parameter is given, it will be used to define the -file or memory location to receive output of perltidy. - -=item stderr - -The B parameter allows the calling program to redirect to a file the -output of what would otherwise go to the standard error output device. Unlike -many other parameters, $stderr must be a file or file handle; it may not be a -reference to a SCALAR or ARRAY. - -=item perltidyrc - -If the B file is given, it will be used instead of any -F<.perltidyrc> configuration file that would otherwise be used. - -=item argv - -If the B parameter is given, it will be used instead of the -B<@ARGV> array. The B parameter may be a string, a reference to a -string, or a reference to an array. If it is a string or reference to a -string, it will be parsed into an array of items just as if it were a -command line string. - -=item dump_options - -If the B parameter is given, it must be the reference to a hash. -In this case, the parameters contained in any perltidyrc configuration file -will be placed in this hash and perltidy will return immediately. This is -equivalent to running perltidy with --dump-options, except that the perameters -are returned in a hash rather than dumped to standard output. Also, by default -only the parameters in the perltidyrc file are returned, but this can be -changed (see the next parameter). This parameter provides a convenient method -for external programs to read a perltidyrc file. An example program using -this feature, F, is included in the distribution. - -Any combination of the B parameters may be used together. - -=item dump_options_type - -This parameter is a string which can be used to control the parameters placed -in the hash reference supplied by B. The possible values are -'perltidyrc' (default) and 'full'. The 'full' parameter causes both the -default options plus any options found in a perltidyrc file to be returned. - -=item dump_getopt_flags - -If the B parameter is given, it must be the reference to a -hash. This hash will receive all of the parameters that perltidy understands -and flags that are passed to Getopt::Long. This parameter may be -used alone or with the B flag. Perltidy will -exit immediately after filling this hash. See the demo program -F for example usage. - -=item dump_options_category - -If the B parameter is given, it must be the reference to a -hash. This hash will receive a hash with keys equal to all long parameter names -and values equal to the title of the corresponding section of the perltidy manual. -See the demo program F for example usage. - -=item dump_abbreviations - -If the B parameter is given, it must be the reference to a -hash. This hash will receive all abbreviations used by Perl::Tidy. See the -demo program F for example usage. - -=item prefilter - -A code reference that will be applied to the source before tidying. It is -expected to take the full content as a string in its input, and output the -transformed content. - -=item postfilter - -A code reference that will be applied to the tidied result before outputting. -It is expected to take the full content as a string in its input, and output -the transformed content. - -Note: A convenient way to check the function of your custom prefilter and -postfilter code is to use the --notidy option, first with just the prefilter -and then with both the prefilter and postfilter. See also the file -B in the perltidy distribution. - -=back - -=head1 NOTES ON FORMATTING PARAMETERS - -Parameters which control formatting may be passed in several ways: in a -F<.perltidyrc> configuration file, in the B parameter, and in the -B parameter. - -The B<-syn> (B<--check-syntax>) flag may be used with all source and -destination streams except for standard input and output. However -data streams which are not associated with a filename will -be copied to a temporary file before being be passed to Perl. This -use of temporary files can cause somewhat confusing output from Perl. - -=head1 EXAMPLES - -The perltidy script itself is a simple example, and several -examples are given in the perltidy distribution. - -The following example passes perltidy a snippet as a reference -to a string and receives the result back in a reference to -an array. - - use Perl::Tidy; - - # some messy source code to format - my $source = <<'EOM'; - use strict; - my @editors=('Emacs', 'Vi '); my $rand = rand(); - print "A poll of 10 random programmers gave these results:\n"; - foreach(0..10) { - my $i=int ($rand+rand()); - print " $editors[$i] users are from Venus" . ", " . - "$editors[1-$i] users are from Mars" . - "\n"; - } - EOM - - # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY - my @dest; - perltidy( source => \$source, destination => \@dest ); - foreach (@dest) {print} - -=head1 Using the B Callback Object - -The B parameter is an optional callback object which allows -the calling program to receive tokenized lines directly from perltidy for -further specialized processing. When this parameter is used, the two -formatting options which are built into perltidy (beautification or -html) are ignored. The following diagram illustrates the logical flow: - - |-- (normal route) -> code beautification - caller->perltidy->|-- (-html flag ) -> create html - |-- (formatter given)-> callback to write_line - -This can be useful for processing perl scripts in some way. The -parameter C<$formatter> in the perltidy call, - - formatter => $formatter, - -is an object created by the caller with a C method which -will accept and process tokenized lines, one line per call. Here is -a simple example of a C which merely prints the line number, -the line type (as determined by perltidy), and the text of the line: - - sub write_line { - - # This is called from perltidy line-by-line - my $self = shift; - my $line_of_tokens = shift; - my $line_type = $line_of_tokens->{_line_type}; - my $input_line_number = $line_of_tokens->{_line_number}; - my $input_line = $line_of_tokens->{_line_text}; - print "$input_line_number:$line_type:$input_line"; - } - -The complete program, B, is contained in the examples section of -the source distribution. As this example shows, the callback method -receives a parameter B<$line_of_tokens>, which is a reference to a hash -of other useful information. This example uses these hash entries: - - $line_of_tokens->{_line_number} - the line number (1,2,...) - $line_of_tokens->{_line_text} - the text of the line - $line_of_tokens->{_line_type} - the type of the line, one of: - - SYSTEM - system-specific code before hash-bang line - CODE - line of perl code (including comments) - POD_START - line starting pod, such as '=head' - POD - pod documentation text - POD_END - last line of pod section, '=cut' - HERE - text of here-document - HERE_END - last line of here-doc (target word) - FORMAT - format section - FORMAT_END - last line of format section, '.' - DATA_START - __DATA__ line - DATA - unidentified text following __DATA__ - END_START - __END__ line - END - unidentified text following __END__ - ERROR - we are in big trouble, probably not a perl script - -Most applications will be only interested in lines of type B. For -another example, let's write a program which checks for one of the -so-called I C<&`>, C<$&>, and C<$'>, which -can slow down processing. Here is a B, from the example -program B, which does that: - - sub write_line { - - # This is called back from perltidy line-by-line - # We're looking for $`, $&, and $' - my ( $self, $line_of_tokens ) = @_; - - # pull out some stuff we might need - my $line_type = $line_of_tokens->{_line_type}; - my $input_line_number = $line_of_tokens->{_line_number}; - my $input_line = $line_of_tokens->{_line_text}; - my $rtoken_type = $line_of_tokens->{_rtoken_type}; - my $rtokens = $line_of_tokens->{_rtokens}; - chomp $input_line; - - # skip comments, pod, etc - return if ( $line_type ne 'CODE' ); - - # loop over tokens looking for $`, $&, and $' - for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) { - - # we only want to examine token types 'i' (identifier) - next unless $$rtoken_type[$j] eq 'i'; - - # pull out the actual token text - my $token = $$rtokens[$j]; - - # and check it - if ( $token =~ /^\$[\`\&\']$/ ) { - print STDERR - "$input_line_number: $token\n"; - } - } - } - -This example pulls out these tokenization variables from the $line_of_tokens -hash reference: - - $rtoken_type = $line_of_tokens->{_rtoken_type}; - $rtokens = $line_of_tokens->{_rtokens}; - -The variable C<$rtoken_type> is a reference to an array of token type codes, -and C<$rtokens> is a reference to a corresponding array of token text. -These are obviously only defined for lines of type B. -Perltidy classifies tokens into types, and has a brief code for each type. -You can get a complete list at any time by running perltidy from the -command line with - - perltidy --dump-token-types - -In the present example, we are only looking for tokens of type B -(identifiers), so the for loop skips past all other types. When an -identifier is found, its actual text is checked to see if it is one -being sought. If so, the above write_line prints the token and its -line number. - -The B feature is relatively new in perltidy, and further -documentation needs to be written to complete its description. However, -several example programs have been written and can be found in the -B section of the source distribution. Probably the best way -to get started is to find one of the examples which most closely matches -your application and start modifying it. - -For help with perltidy's pecular way of breaking lines into tokens, you -might run, from the command line, - - perltidy -D filename - -where F is a short script of interest. This will produce -F with interleaved lines of text and their token types. -The B<-D> flag has been in perltidy from the beginning for this purpose. -If you want to see the code which creates this file, it is -C in Tidy.pm. - -=head1 EXPORT - - &perltidy - -=head1 CREDITS - -Thanks to Hugh Myers who developed the initial modular interface -to perltidy. - -=head1 VERSION - -This man page documents Perl::Tidy version 20120701. - -=head1 LICENSE - -This package is free software; you can redistribute it and/or modify it -under the terms of the "GNU General Public License". - -Please refer to the file "COPYING" for details. - -=head1 AUTHOR - - Steve Hancock - perltidy at users.sourceforge.net - -=head1 SEE ALSO - -The perltidy(1) man page describes all of the features of perltidy. It -can be found at http://perltidy.sourceforge.net. - -=cut