X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=lib%2FPerl%2FTidy.pm;h=64e72d59e371e9003143df40cd813e343ca48fc6;hb=736e5d55044509ce0deaaf0e7299a98f4d4b8d15;hp=86764c33f65c68e6f00cbd9561e07785d272628f;hpb=8aa69fbac36a21cad0a1c0d5b3452a546d427d7f;p=perltidy.git diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm index 86764c3..64e72d5 100644 --- a/lib/Perl/Tidy.pm +++ b/lib/Perl/Tidy.pm @@ -1,8 +1,9 @@ +# ############################################################ # # perltidy - a perl script indenter and formatter # -# Copyright (c) 2000-2007 by Steve Hancock +# Copyright (c) 2000-2012 by Steve Hancock # Distributed under the GPL license agreement; see file COPYING # # This program is free software; you can redistribute it and/or modify @@ -27,7 +28,7 @@ # # perltidy Tidy.pm # -# Code Contributions: +# Code Contributions: See ChangeLog.html for a complete history. # Michael Cartmell supplied code for adaptation to VMS and helped with # v-strings. # Hugh S. Myers supplied sub streamhandle and the supporting code to @@ -35,7 +36,17 @@ # Yves Orton supplied coding to help detect Windows versions. # Axel Rose supplied a patch for MacPerl. # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator. -# Dan Tyrell sent a patch for binary I/O. +# Dan Tyrell contributed a patch for binary I/O. +# Ueli Hugenschmidt contributed a patch for -fpsc +# Sam Kington supplied a patch to identify the initial indentation of +# entabbed code. +# jonathan swartz supplied patches for: +# * .../ pattern, which looks upwards from directory +# * --notidy, to be used in directories where we want to avoid +# accidentally tidying +# * prefilter and postfilter +# * iterations option +# # Many others have supplied key ideas, suggestions, and bug reports; # see the CHANGES file. # @@ -60,11 +71,13 @@ use vars qw{ @ISA = qw( Exporter ); @EXPORT = qw( &perltidy ); +use Cwd; use IO::File; use File::Basename; +use File::Copy; BEGIN { - ( $VERSION = q($Id: Tidy.pm,v 1.61 2007/04/24 13:31:15 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker + ( $VERSION = q($Id: Tidy.pm,v 1.74 2012/07/01 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker } sub streamhandle { @@ -212,7 +225,7 @@ sub catfile { my $test_file = $path . $name; my ( $test_name, $test_path ) = fileparse($test_file); return $test_file if ( $test_name eq $name ); - return undef if ( $^O eq 'VMS' ); + return undef if ( $^O eq 'VMS' ); # this should work at least for Windows and Unix: $test_file = $path . '/' . $name; @@ -224,15 +237,14 @@ sub catfile { sub make_temporary_filename { # Make a temporary filename. + # FIXME: return both a name and opened filehandle # - # The POSIX tmpnam() function tends to be unreliable for non-unix - # systems (at least for the win32 systems that I've tested), so use - # a pre-defined name. A slight disadvantage of this is that two - # perltidy runs in the same working directory may conflict. - # However, the chance of that is small and managable by the user. - # An alternative would be to check for the file's existance and use, - # say .TMP0, .TMP1, etc, but that scheme has its own problems. So, - # keep it simple. + # The POSIX tmpnam() function tends to be unreliable for non-unix systems + # (at least for the win32 systems that I've tested), so use a pre-defined + # name for them. A disadvantage of this is that two perltidy + # runs in the same working directory may conflict. However, the chance of + # that is small and managable by the user, especially on systems for which + # the POSIX tmpnam function doesn't work. my $name = "perltidy.TMP"; if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) { return $name; @@ -242,7 +254,7 @@ sub make_temporary_filename { use IO::File; # just make a couple of tries before giving up and using the default - for ( 0 .. 1 ) { + for ( 0 .. 3 ) { my $tmpname = tmpnam(); my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL ); if ($fh) { @@ -334,6 +346,8 @@ sub make_temporary_filename { dump_options_category => undef, dump_options_range => undef, dump_abbreviations => undef, + prefilter => undef, + postfilter => undef, ); # don't overwrite callers ARGV @@ -382,6 +396,8 @@ EOM my $source_stream = $input_hash{'source'}; my $stderr_stream = $input_hash{'stderr'}; my $user_formatter = $input_hash{'formatter'}; + my $prefilter = $input_hash{'prefilter'}; + my $postfilter = $input_hash{'postfilter'}; # various dump parameters my $dump_options_type = $input_hash{'dump_options_type'}; @@ -454,6 +470,16 @@ EOM # redirect STDERR if requested if ($stderr_stream) { + my $ref_type = ref($stderr_stream); + if ( $ref_type eq 'SCALAR' or $ref_type eq 'ARRAY' ) { + croak <{'dump-options'} ) { - dump_options( $rOpts, $roption_string ); - exit 1; + print STDOUT $readable_options; + exit 0; } + #--------------------------------------------------------------- + # check parameters and their interactions + #--------------------------------------------------------------- check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ); @@ -573,6 +611,29 @@ EOM make_extension( $rOpts->{'output-file-extension'}, $default_file_extension{ $rOpts->{'format'} }, $dot ); + # If the backup extension contains a / character then the backup should + # be deleted when the -b option is used. On older versions of + # perltidy this will generate an error message due to an illegal + # file name. + # + # A backup file will still be generated but will be deleted + # at the end. If -bext='/' then this extension will be + # the default 'bak'. Otherwise it will be whatever characters + # remains after all '/' characters are removed. For example: + # -bext extension slashes + # '/' bak 1 + # '/delete' delete 1 + # 'delete/' delete 1 + # '/dev/null' devnull 2 (Currently not allowed) + my $bext = $rOpts->{'backup-file-extension'}; + my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g ); + + # At present only one forward slash is allowed. In the future multiple + # slashes may be allowed to allow for other options + if ( $delete_backup > 1 ) { + die "-bext=$bext contains more than one '/'\n"; + } + my $backup_extension = make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); @@ -583,11 +644,9 @@ EOM make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); # check for -b option; + # silently ignore unless beautify mode my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} - && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode - && @ARGV > 0; # silently ignore if standard input; - # this allows -b to be in a .perltidyrc file - # without error messages when running from an editor + && $rOpts->{'format'} eq 'tidy'; # turn off -b with warnings in case of conflicts with other options if ($in_place_modify) { @@ -597,10 +656,10 @@ EOM } if ($destination_stream) { warn -"Ignoring -b; you may not specify a destination array and -b together\n"; +"Ignoring -b; you may not specify a destination stream and -b together\n"; $in_place_modify = 0; } - if ($source_stream) { + if ( ref($source_stream) ) { warn "Ignoring -b; you may not specify a source array and -b together\n"; $in_place_modify = 0; @@ -659,7 +718,10 @@ EOM unshift( @ARGV, '-' ) unless @ARGV; } - # loop to process all files in argument list + #--------------------------------------------------------------- + # Ready to go... + # main loop to process all files in argument list + #--------------------------------------------------------------- my $number_of_files = @ARGV; my $formatter = undef; $tokenizer = undef; @@ -668,7 +730,7 @@ EOM my $input_file_permissions; #--------------------------------------------------------------- - # determine the input file name + # prepare this input stream #--------------------------------------------------------------- if ($source_stream) { $fileroot = "perltidy"; @@ -689,7 +751,7 @@ EOM if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } my $pattern = fileglob_to_re($input_file); - eval "/$pattern/"; + ##eval "/$pattern/"; if ( !$@ && opendir( DIR, './' ) ) { my @files = grep { /$pattern/ && !-d $_ } readdir(DIR); @@ -709,6 +771,15 @@ EOM next; } + # As a safety precaution, skip zero length files. + # If for example a source file got clobberred somehow, + # the old .tdy or .bak files might still exist so we + # shouldn't overwrite them with zero length files. + unless ( -s $input_file ) { + print "skipping file: $input_file: Zero size\n"; + next; + } + unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { print "skipping file: $input_file: Non-text (override with -f)\n"; @@ -765,12 +836,26 @@ EOM $rpending_logfile_message ); next unless ($source_object); + # Prefilters and postfilters: The prefilter is a code reference + # that will be applied to the source before tidying, and the + # postfilter is a code reference to the result before outputting. + if ($prefilter) { + my $buf = ''; + while ( my $line = $source_object->get_line() ) { + $buf .= $line; + } + $buf = $prefilter->($buf); + + $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts, + $rpending_logfile_message ); + } + # register this file name with the Diagnostics package $diagnostics_object->set_input_file($input_file) if $diagnostics_object; #--------------------------------------------------------------- - # determine the output file name + # prepare the output stream #--------------------------------------------------------------- my $output_file = undef; my $actual_output_extension; @@ -859,9 +944,19 @@ EOM if ( defined($line_separator) ) { $binmode = 1 } else { $line_separator = "\n" } - my $sink_object = - Perl::Tidy::LineSink->new( $output_file, $tee_file, - $line_separator, $rOpts, $rpending_logfile_message, $binmode ); + my ( $sink_object, $postfilter_buffer ); + if ($postfilter) { + $sink_object = + Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file, + $line_separator, $rOpts, $rpending_logfile_message, + $binmode ); + } + else { + $sink_object = + Perl::Tidy::LineSink->new( $output_file, $tee_file, + $line_separator, $rOpts, $rpending_logfile_message, + $binmode ); + } #--------------------------------------------------------------- # initialize the error logger @@ -876,7 +971,7 @@ EOM $saw_extrude ); write_logfile_header( $rOpts, $logger_object, $config_file, - $rraw_options, $Windows_type + $rraw_options, $Windows_type, $readable_options, ); if ($$rpending_logfile_message) { $logger_object->write_logfile_entry($$rpending_logfile_message); @@ -895,68 +990,209 @@ EOM } #--------------------------------------------------------------- - # create a formatter for this file : html writer or pretty printer + # loop over iterations for one source stream #--------------------------------------------------------------- - # we have to delete any old formatter because, for safety, - # the formatter will check to see that there is only one. - $formatter = undef; + # We will do a convergence test if 3 or more iterations are allowed. + # It would be pointless for fewer because we have to make at least + # two passes before we can see if we are converged, and the test + # would just slow things down. + my $max_iterations = $rOpts->{'iterations'}; + my $convergence_log_message; + my %saw_md5; + my $do_convergence_test = $max_iterations > 2; + if ($do_convergence_test) { + eval "use Digest::MD5 qw(md5_hex)"; + $do_convergence_test = !$@; + } + + # save objects to allow redirecting output during iterations + my $sink_object_final = $sink_object; + my $debugger_object_final = $debugger_object; + my $logger_object_final = $logger_object; + + for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) { + + # send output stream to temp buffers until last iteration + my $sink_buffer; + if ( $iter < $max_iterations ) { + $sink_object = + Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file, + $line_separator, $rOpts, $rpending_logfile_message, + $binmode ); + } + else { + $sink_object = $sink_object_final; + } + + # Save logger, debugger output only on pass 1 because: + # (1) line number references must be to the starting + # source, not an intermediate result, and + # (2) we need to know if there are errors so we can stop the + # iterations early if necessary. + if ( $iter > 1 ) { + $debugger_object = undef; + $logger_object = undef; + } + + #------------------------------------------------------------ + # create a formatter for this file : html writer or + # pretty printer + #------------------------------------------------------------ + + # we have to delete any old formatter because, for safety, + # the formatter will check to see that there is only one. + $formatter = undef; + + if ($user_formatter) { + $formatter = $user_formatter; + } + elsif ( $rOpts->{'format'} eq 'html' ) { + $formatter = + Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, + $actual_output_extension, $html_toc_extension, + $html_src_extension ); + } + elsif ( $rOpts->{'format'} eq 'tidy' ) { + $formatter = Perl::Tidy::Formatter->new( + logger_object => $logger_object, + diagnostics_object => $diagnostics_object, + sink_object => $sink_object, + ); + } + else { + die "I don't know how to do -format=$rOpts->{'format'}\n"; + } - if ($user_formatter) { - $formatter = $user_formatter; - } - elsif ( $rOpts->{'format'} eq 'html' ) { - $formatter = - Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, - $actual_output_extension, $html_toc_extension, - $html_src_extension ); - } - elsif ( $rOpts->{'format'} eq 'tidy' ) { - $formatter = Perl::Tidy::Formatter->new( + unless ($formatter) { + die + "Unable to continue with $rOpts->{'format'} formatting\n"; + } + + #--------------------------------------------------------------- + # create the tokenizer for this file + #--------------------------------------------------------------- + $tokenizer = undef; # must destroy old tokenizer + $tokenizer = Perl::Tidy::Tokenizer->new( + source_object => $source_object, logger_object => $logger_object, + debugger_object => $debugger_object, diagnostics_object => $diagnostics_object, - sink_object => $sink_object, + starting_level => $rOpts->{'starting-indentation-level'}, + tabs => $rOpts->{'tabs'}, + entab_leading_space => $rOpts->{'entab-leading-whitespace'}, + indent_columns => $rOpts->{'indent-columns'}, + look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, + look_for_autoloader => $rOpts->{'look-for-autoloader'}, + look_for_selfloader => $rOpts->{'look-for-selfloader'}, + trim_qw => $rOpts->{'trim-qw'}, ); - } - else { - die "I don't know how to do -format=$rOpts->{'format'}\n"; - } - unless ($formatter) { - die "Unable to continue with $rOpts->{'format'} formatting\n"; - } + #--------------------------------------------------------------- + # now we can do it + #--------------------------------------------------------------- + process_this_file( $tokenizer, $formatter ); + + #--------------------------------------------------------------- + # close the input source and report errors + #--------------------------------------------------------------- + $source_object->close_input_file(); + + # line source for next iteration (if any) comes from the current + # temporary output buffer + if ( $iter < $max_iterations ) { + + $sink_object->close_output_file(); + $source_object = + Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts, + $rpending_logfile_message ); + + # stop iterations if errors or converged + my $stop_now = $logger_object->{_warning_count}; + if ($stop_now) { + $convergence_log_message = <new( - source_object => $source_object, - logger_object => $logger_object, - debugger_object => $debugger_object, - diagnostics_object => $diagnostics_object, - starting_level => $rOpts->{'starting-indentation-level'}, - tabs => $rOpts->{'tabs'}, - indent_columns => $rOpts->{'indent-columns'}, - look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, - look_for_autoloader => $rOpts->{'look-for-autoloader'}, - look_for_selfloader => $rOpts->{'look-for-selfloader'}, - trim_qw => $rOpts->{'trim-qw'}, - ); + # Saw this result before, stop iterating + $stop_now = 1; + my $iterm = $iter - 1; + if ( $saw_md5{$digest} != $iterm ) { - #--------------------------------------------------------------- - # now we can do it - #--------------------------------------------------------------- - process_this_file( $tokenizer, $formatter ); + # Blinking (oscillating) between two stable + # end states. This has happened in the past + # but at present there are no known instances. + $convergence_log_message = <write_diagnostics( + $convergence_log_message) + if $diagnostics_object; + } + else { + $convergence_log_message = <write_diagnostics( + $convergence_log_message) + if $diagnostics_object && $iterm > 2; + } + } + } ## end if ($do_convergence_test) + + if ($stop_now) { + + # we are stopping the iterations early; + # copy the output stream to its final destination + $sink_object = $sink_object_final; + while ( my $line = $source_object->get_line() ) { + $sink_object->write_line($line); + } + $source_object->close_input_file(); + last; + } + } ## end if ( $iter < $max_iterations) + } # end loop over iterations for one source file + + # restore objects which have been temporarily undefined + # for second and higher iterations + $debugger_object = $debugger_object_final; + $logger_object = $logger_object_final; + + $logger_object->write_logfile_entry($convergence_log_message) + if $convergence_log_message; #--------------------------------------------------------------- - # close the input source and report errors + # Perform any postfilter operation #--------------------------------------------------------------- - $source_object->close_input_file(); - - # get file names to use for syntax check - my $ifname = $source_object->get_input_file_copy_name(); - my $ofname = $sink_object->get_output_file_copy(); + if ($postfilter) { + $sink_object->close_output_file(); + $sink_object = + Perl::Tidy::LineSink->new( $output_file, $tee_file, + $line_separator, $rOpts, $rpending_logfile_message, + $binmode ); + my $buf = $postfilter->($postfilter_buffer); + $source_object = + Perl::Tidy::LineSource->new( \$buf, $rOpts, + $rpending_logfile_message ); + ##chomp $buf; + ##foreach my $line ( split( "\n", $buf , -1) ) { + while ( my $line = $source_object->get_line() ) { + $sink_object->write_line($line); + } + $source_object->close_input_file(); + } + + # Save names of the input and output files for syntax check + my $ifname = $input_file; + my $ofname = $output_file; #--------------------------------------------------------------- # handle the -b option (backup and modify in-place) @@ -966,7 +1202,7 @@ EOM # oh, oh, no real file to backup .. # shouldn't happen because of numerous preliminary checks - die print + die "problem with -b backing up input file '$input_file': not a file\n"; } my $backup_name = $input_file . $backup_extension; @@ -975,17 +1211,31 @@ EOM or die "unable to remove previous '$backup_name' for -b option; check permissions: $!\n"; } - rename( $input_file, $backup_name ) - or die + + # backup the input file + # we use copy for symlinks, move for regular files + if ( -l $input_file ) { + File::Copy::copy( $input_file, $backup_name ) + or die "File::Copy failed trying to backup source: $!"; + } + else { + rename( $input_file, $backup_name ) + or die "problem renaming $input_file to $backup_name for -b option: $!\n"; + } $ifname = $backup_name; + # copy the output to the original input file + # NOTE: it would be nice to just close $output_file and use + # File::Copy::copy here, but in this case $output_file is the + # handle of an open nameless temporary file so we would lose + # everything if we closed it. seek( $output_file, 0, 0 ) - or die "unable to rewind tmp file for -b option: $!\n"; - + or die + "unable to rewind a temporary file for -b option: $!\n"; my $fout = IO::File->new("> $input_file") or die -"problem opening $input_file for write for -b option; check directory permissions: $!\n"; +"problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n"; binmode $fout; my $line; while ( $line = $output_file->getline() ) { @@ -1002,9 +1252,8 @@ EOM $sink_object->close_output_file() if $sink_object; $debugger_object->close_debug_file() if $debugger_object; - my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes - if ($output_file) { - + # set output file permissions + if ( $output_file && -f $output_file && !-l $output_file ) { if ($input_file_permissions) { # give output script same permissions as input script, but @@ -1015,18 +1264,101 @@ EOM } # else use default permissions for html and any other format + } + } + + #--------------------------------------------------------------- + # Do syntax check if requested and possible + #--------------------------------------------------------------- + my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes + if ( $logger_object + && $rOpts->{'check-syntax'} + && $ifname + && $ofname ) + { + $infile_syntax_ok = + check_syntax( $ifname, $ofname, $logger_object, $rOpts ); + } + + #--------------------------------------------------------------- + # remove the original file for in-place modify as follows: + # $delete_backup=0 never + # $delete_backup=1 only if no errors + # $delete_backup>1 always : CURRENTLY NOT ALLOWED, see above + #--------------------------------------------------------------- + if ( $in_place_modify + && $delete_backup + && -f $ifname + && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) ) + { + # As an added safety precaution, do not delete the source file + # if its size has dropped from positive to zero, since this + # could indicate a disaster of some kind, including a hardware + # failure. Actually, this could happen if you had a file of + # all comments (or pod) and deleted everything with -dac (-dap) + # for some reason. + if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) { + warn( +"output file '$output_file' missing or zero length; original '$ifname' not deleted\n" + ); } - if ( $logger_object && $rOpts->{'check-syntax'} ) { - $infile_syntax_ok = - check_syntax( $ifname, $ofname, $logger_object, $rOpts ); + else { + unlink($ifname) + or die +"unable to remove previous '$ifname' for -b option; check permissions: $!\n"; } } $logger_object->finish( $infile_syntax_ok, $formatter ) if $logger_object; - } # end of loop to process all files - } # end of main program + } # end of main loop to process all files + } # end of main program perltidy +} + +sub get_stream_as_named_file { + + # Return the name of a file containing a stream of data, creating + # a temporary file if necessary. + # Given: + # $stream - the name of a file or stream + # Returns: + # $fname = name of file if possible, or undef + # $if_tmpfile = true if temp file, undef if not temp file + # + # This routine is needed for passing actual files to Perl for + # a syntax check. + my ($stream) = @_; + my $is_tmpfile; + my $fname; + if ($stream) { + if ( ref($stream) ) { + my ( $fh_stream, $fh_name ) = + Perl::Tidy::streamhandle( $stream, 'r' ); + if ($fh_stream) { + my ( $fout, $tmpnam ); + + # FIXME: fix the tmpnam routine to return an open filehandle + $tmpnam = Perl::Tidy::make_temporary_filename(); + $fout = IO::File->new( $tmpnam, 'w' ); + + if ($fout) { + $fname = $tmpnam; + $is_tmpfile = 1; + binmode $fout; + while ( my $line = $fh_stream->getline() ) { + $fout->print($line); + } + $fout->close(); + } + $fh_stream->close(); + } + } + elsif ( $stream ne '-' && -f $stream ) { + $fname = $stream; + } + } + return ( $fname, $is_tmpfile ); } sub fileglob_to_re { @@ -1057,8 +1389,10 @@ sub make_extension { } sub write_logfile_header { - my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) = - @_; + my ( + $rOpts, $logger_object, $config_file, + $rraw_options, $Windows_type, $readable_options + ) = @_; $logger_object->write_logfile_entry( "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n" ); @@ -1082,9 +1416,8 @@ sub write_logfile_header { $logger_object->write_logfile_entry( "------------------------------------\n"); - foreach ( keys %{$rOpts} ) { - $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" ); - } + $logger_object->write_logfile_entry($readable_options); + $logger_object->write_logfile_entry( "------------------------------------\n"); } @@ -1173,6 +1506,7 @@ sub generate_options { npro recombine! valign! + notidy ); my $category = 13; # Debugging @@ -1221,6 +1555,7 @@ sub generate_options { $add_option->( 'backup-file-extension', 'bext', '=s' ); $add_option->( 'force-read-binary', 'f', '!' ); $add_option->( 'format', 'fmt', '=s' ); + $add_option->( 'iterations', 'it', '=i' ); $add_option->( 'logfile', 'log', '!' ); $add_option->( 'logfile-gap', 'g', ':i' ); $add_option->( 'outfile', 'o', '=s' ); @@ -1302,12 +1637,14 @@ sub generate_options { $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' ); $add_option->( 'closing-side-comment-warnings', 'cscw', '!' ); $add_option->( 'closing-side-comments', 'csc', '!' ); + $add_option->( 'closing-side-comments-balanced', 'cscb', '!' ); $add_option->( 'format-skipping', 'fs', '!' ); $add_option->( 'format-skipping-begin', 'fsb', '=s' ); $add_option->( 'format-skipping-end', 'fse', '=s' ); $add_option->( 'hanging-side-comments', 'hsc', '!' ); $add_option->( 'indent-block-comments', 'ibc', '!' ); $add_option->( 'indent-spaced-block-comments', 'isbc', '!' ); + $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' ); $add_option->( 'minimum-space-to-comment', 'msc', '=i' ); $add_option->( 'outdent-long-comments', 'olc', '!' ); $add_option->( 'outdent-static-block-comments', 'osbc', '!' ); @@ -1319,31 +1656,35 @@ sub generate_options { ######################################## $category = 5; # Linebreak controls ######################################## - $add_option->( 'add-newlines', 'anl', '!' ); - $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); - $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); - $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); - $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); - $add_option->( 'cuddled-else', 'ce', '!' ); - $add_option->( 'delete-old-newlines', 'dnl', '!' ); - $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); - $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); - $add_option->( 'opening-hash-brace-right', 'ohbr', '!' ); - $add_option->( 'opening-paren-right', 'opr', '!' ); - $add_option->( 'opening-square-bracket-right', 'osbr', '!' ); - $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); - $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); - $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); - $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); - $add_option->( 'stack-closing-paren', 'scp', '!' ); - $add_option->( 'stack-closing-square-bracket', 'scsb', '!' ); - $add_option->( 'stack-opening-hash-brace', 'sohb', '!' ); - $add_option->( 'stack-opening-paren', 'sop', '!' ); - $add_option->( 'stack-opening-square-bracket', 'sosb', '!' ); - $add_option->( 'vertical-tightness', 'vt', '=i' ); - $add_option->( 'vertical-tightness-closing', 'vtc', '=i' ); - $add_option->( 'want-break-after', 'wba', '=s' ); - $add_option->( 'want-break-before', 'wbb', '=s' ); + $add_option->( 'add-newlines', 'anl', '!' ); + $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); + $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); + $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); + $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); + $add_option->( 'cuddled-else', 'ce', '!' ); + $add_option->( 'delete-old-newlines', 'dnl', '!' ); + $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); + $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); + $add_option->( 'opening-hash-brace-right', 'ohbr', '!' ); + $add_option->( 'opening-paren-right', 'opr', '!' ); + $add_option->( 'opening-square-bracket-right', 'osbr', '!' ); + $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' ); + $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); + $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); + $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); + $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); + $add_option->( 'stack-closing-paren', 'scp', '!' ); + $add_option->( 'stack-closing-square-bracket', 'scsb', '!' ); + $add_option->( 'stack-opening-hash-brace', 'sohb', '!' ); + $add_option->( 'stack-opening-paren', 'sop', '!' ); + $add_option->( 'stack-opening-square-bracket', 'sosb', '!' ); + $add_option->( 'vertical-tightness', 'vt', '=i' ); + $add_option->( 'vertical-tightness-closing', 'vtc', '=i' ); + $add_option->( 'want-break-after', 'wba', '=s' ); + $add_option->( 'want-break-before', 'wbb', '=s' ); + $add_option->( 'break-after-all-operators', 'baao', '!' ); + $add_option->( 'break-before-all-operators', 'bbao', '!' ); + $add_option->( 'keep-interior-semicolons', 'kis', '!' ); ######################################## $category = 6; # Controlling list formatting @@ -1355,20 +1696,22 @@ sub generate_options { ######################################## $category = 7; # Retaining or ignoring existing line breaks ######################################## - $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); - $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); - $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' ); - $add_option->( 'ignore-old-breakpoints', 'iob', '!' ); + $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); + $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); + $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' ); + $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' ); + $add_option->( 'ignore-old-breakpoints', 'iob', '!' ); ######################################## $category = 8; # Blank line control ######################################## - $add_option->( 'blanks-before-blocks', 'bbb', '!' ); - $add_option->( 'blanks-before-comments', 'bbc', '!' ); - $add_option->( 'blanks-before-subs', 'bbs', '!' ); - $add_option->( 'long-block-line-count', 'lbl', '=i' ); - $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); - $add_option->( 'swallow-optional-blank-lines', 'sob', '!' ); + $add_option->( 'blanks-before-blocks', 'bbb', '!' ); + $add_option->( 'blanks-before-comments', 'bbc', '!' ); + $add_option->( 'blank-lines-before-subs', 'blbs', '=i' ); + $add_option->( 'blank-lines-before-packages', 'blbp', '=i' ); + $add_option->( 'long-block-line-count', 'lbl', '=i' ); + $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); + $add_option->( 'keep-old-blank-lines', 'kbl', '=i' ); ######################################## $category = 9; # Other controls @@ -1485,7 +1828,8 @@ sub generate_options { add-whitespace blanks-before-blocks blanks-before-comments - blanks-before-subs + blank-lines-before-subs=1 + blank-lines-before-packages=1 block-brace-tightness=0 block-brace-vertical-tightness=0 brace-tightness=1 @@ -1493,12 +1837,14 @@ sub generate_options { brace-vertical-tightness=0 break-at-old-logical-breakpoints break-at-old-ternary-breakpoints + break-at-old-attribute-breakpoints break-at-old-keyword-breakpoints comma-arrow-breakpoints=1 nocheck-syntax closing-side-comment-interval=6 closing-side-comment-maximum-text=20 closing-side-comment-else-flag=0 + closing-side-comments-balanced closing-paren-indentation=0 closing-brace-indentation=0 closing-square-bracket-indentation=0 @@ -1509,6 +1855,8 @@ sub generate_options { hanging-side-comments indent-block-comments indent-columns=4 + iterations=1 + keep-old-blank-lines=1 long-block-line-count=8 look-for-autoloader look-for-selfloader @@ -1524,7 +1872,6 @@ sub generate_options { noquiet noshow-options nostatic-side-comments - noswallow-optional-blank-lines notabs nowarning-output outdent-labels @@ -1560,10 +1907,13 @@ sub generate_options { #--------------------------------------------------------------- %expansion = ( %expansion, - 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], - 'fnl' => [qw(freeze-newlines)], - 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], - 'fws' => [qw(freeze-whitespace)], + 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], + 'fnl' => [qw(freeze-newlines)], + 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], + 'fws' => [qw(freeze-whitespace)], + 'freeze-blank-lines' => + [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)], + 'fbl' => [qw(freeze-blank-lines)], 'indent-only' => [qw(freeze-newlines freeze-whitespace)], 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], 'nooutdent-long-lines' => @@ -1588,11 +1938,21 @@ sub generate_options { 'nhtml' => [qw(format=tidy)], 'tidy' => [qw(format=tidy)], + 'swallow-optional-blank-lines' => [qw(kbl=0)], + 'noswallow-optional-blank-lines' => [qw(kbl=1)], + 'sob' => [qw(kbl=0)], + 'nsob' => [qw(kbl=1)], + 'break-after-comma-arrows' => [qw(cab=0)], 'nobreak-after-comma-arrows' => [qw(cab=1)], 'baa' => [qw(cab=0)], 'nbaa' => [qw(cab=1)], + 'blanks-before-subs' => [qw(blbs=1 blbp=1)], + 'bbs' => [qw(blbs=1 blbp=1)], + 'noblanks-before-subs' => [qw(blbs=0 blbp=0)], + 'nbbs' => [qw(blbs=0 blbp=0)], + 'break-at-old-trinary-breakpoints' => [qw(bot)], 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)], @@ -1650,6 +2010,7 @@ sub generate_options { 'mangle' => [ qw( check-syntax + keep-old-blank-lines=0 delete-old-newlines delete-old-whitespace delete-semicolons @@ -1660,7 +2021,8 @@ sub generate_options { noadd-semicolons noadd-whitespace noblanks-before-blocks - noblanks-before-subs + blank-lines-before-subs=0 + blank-lines-before-packages=0 notabs ) ], @@ -1687,7 +2049,8 @@ sub generate_options { noadd-semicolons noadd-whitespace noblanks-before-blocks - noblanks-before-subs + blank-lines-before-subs=0 + blank-lines-before-packages=0 nofuzzy-line-length notabs norecombine @@ -1798,6 +2161,21 @@ sub process_command_line { "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; } $config_file = $2; + + # resolve /.../, meaning look upwards from directory + if ( defined($config_file) ) { + if ( my ( $start_dir, $search_file ) = + ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) ) + { + $start_dir = '.' if !$start_dir; + $start_dir = Cwd::realpath($start_dir); + if ( my $found_file = + find_file_upwards( $start_dir, $search_file ) ) + { + $config_file = $found_file; + } + } + } unless ( -e $config_file ) { warn "cannot find file given with -pro=$config_file: $!\n"; $config_file = ""; @@ -1809,29 +2187,29 @@ sub process_command_line { elsif ( $i =~ /^-extrude$/ ) { $saw_extrude = 1; } - elsif ( $i =~ /^-(help|h|HELP|H)$/ ) { + elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) { usage(); - exit 1; + exit 0; } elsif ( $i =~ /^-(version|v)$/ ) { show_version(); - exit 1; + exit 0; } elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { dump_defaults(@$rdefaults); - exit 1; + exit 0; } elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { dump_long_names(@$roption_string); - exit 1; + exit 0; } elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { dump_short_names($rexpansion); - exit 1; + exit 0; } elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); - exit 1; + exit 0; } } @@ -1882,11 +2260,8 @@ EOM } if ($saw_dump_profile) { - if ($saw_dump_profile) { - dump_config_file( $fh_config, $config_file, - $rconfig_file_chatter ); - exit 1; - } + dump_config_file( $fh_config, $config_file, $rconfig_file_chatter ); + exit 0; } if ($fh_config) { @@ -2041,6 +2416,43 @@ sub check_options { } } + # check iteration count and quietly fix if necessary: + # - iterations option only applies to code beautification mode + # - the convergence check should stop most runs on iteration 2, and + # virtually all on iteration 3. But we'll allow up to 6. + if ( $rOpts->{'format'} ne 'tidy' ) { + $rOpts->{'iterations'} = 1; + } + elsif ( defined( $rOpts->{'iterations'} ) ) { + if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 } + elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 } + } + else { + $rOpts->{'iterations'} = 1; + } + + # check for reasonable number of blank lines and fix to avoid problems + if ( $rOpts->{'blank-lines-before-subs'} ) { + if ( $rOpts->{'blank-lines-before-subs'} < 0 ) { + $rOpts->{'blank-lines-before-subs'} = 0; + warn "negative value of -blbs, setting 0\n"; + } + if ( $rOpts->{'blank-lines-before-subs'} > 100 ) { + warn "unreasonably large value of -blbs, reducing\n"; + $rOpts->{'blank-lines-before-subs'} = 100; + } + } + if ( $rOpts->{'blank-lines-before-packages'} ) { + if ( $rOpts->{'blank-lines-before-packages'} < 0 ) { + warn "negative value of -blbp, setting 0\n"; + $rOpts->{'blank-lines-before-packages'} = 0; + } + if ( $rOpts->{'blank-lines-before-packages'} > 100 ) { + warn "unreasonably large value of -blbp, reducing\n"; + $rOpts->{'blank-lines-before-packages'} = 100; + } + } + # see if user set a non-negative logfile-gap if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) { @@ -2100,11 +2512,6 @@ EOM $rOpts->{'opening-brace-on-new-line'}; } - # set shortcut flag if no blanks to be written - unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) { - $rOpts->{'swallow-optional-blank-lines'} = 1; - } - if ( $rOpts->{'entab-leading-whitespace'} ) { if ( $rOpts->{'entab-leading-whitespace'} < 0 ) { warn "-et=n must use a positive integer; ignoring -et\n"; @@ -2116,6 +2523,26 @@ EOM } } +sub find_file_upwards { + my ( $search_dir, $search_file ) = @_; + + $search_dir =~ s{/+$}{}; + $search_file =~ s{^/+}{}; + + while (1) { + my $try_path = "$search_dir/$search_file"; + if ( -f $try_path ) { + return $try_path; + } + elsif ( $search_dir eq '/' ) { + return undef; + } + else { + $search_dir = dirname($search_dir); + } + } +} + sub expand_command_abbreviations { # go through @ARGV and expand any abbreviations @@ -2331,7 +2758,8 @@ EOS } sub is_unix { - return ( $^O !~ /win32|dos/i ) + return + ( $^O !~ /win32|dos/i ) && ( $^O ne 'VMS' ) && ( $^O ne 'OS2' ) && ( $^O ne 'MacOS' ); @@ -2350,6 +2778,7 @@ sub look_for_Windows { sub find_config_file { # look for a .perltidyrc configuration file + # For Windows also look for a file named perltidy.ini my ( $is_Windows, $Windows_type, $rconfig_file_chatter, $rpending_complaint ) = @_; @@ -2374,6 +2803,10 @@ sub find_config_file { # look in current directory first $config_file = ".perltidyrc"; return $config_file if $exists_config_file->($config_file); + if ($is_Windows) { + $config_file = "perltidy.ini"; + return $config_file if $exists_config_file->($config_file); + } # Default environment vars. my @envs = qw(PERLTIDY HOME); @@ -2397,6 +2830,11 @@ sub find_config_file { # test ENV as directory: $config_file = catfile( $ENV{$var}, ".perltidyrc" ); return $config_file if $exists_config_file->($config_file); + + if ($is_Windows) { + $config_file = catfile( $ENV{$var}, "perltidy.ini" ); + return $config_file if $exists_config_file->($config_file); + } } else { $$rconfig_file_chatter .= "\n"; @@ -2412,14 +2850,24 @@ sub find_config_file { Win_Config_Locs( $rpending_complaint, $Windows_type ); # Check All Users directory, if there is one. + # i.e. C:\Documents and Settings\User\perltidy.ini if ($allusers) { + $config_file = catfile( $allusers, ".perltidyrc" ); return $config_file if $exists_config_file->($config_file); + + $config_file = catfile( $allusers, "perltidy.ini" ); + return $config_file if $exists_config_file->($config_file); } # Check system directory. + # retain old code in case someone has been able to create + # a file with a leading period. $config_file = catfile( $system, ".perltidyrc" ); return $config_file if $exists_config_file->($config_file); + + $config_file = catfile( $system, "perltidy.ini" ); + return $config_file if $exists_config_file->($config_file); } } @@ -2508,10 +2956,10 @@ sub read_config_file { while ( my $line = $fh->getline() ) { $line_no++; chomp $line; - next if $line =~ /^\s*#/; # skip full-line comment ( $line, $death_message ) = strip_comment( $line, $config_file, $line_no ); last if ($death_message); + next unless $line; $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends next unless $line; @@ -2520,8 +2968,12 @@ sub read_config_file { # or just # body - if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) { - my ( $newname, $body, $curly ) = ( $2, $3, $4 ); + my $body = $line; + my ($newname); + if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) { + ( $newname, $body ) = ( $2, $3, ); + } + if ($body) { # handle a new alias definition if ($newname) { @@ -2566,15 +3018,6 @@ EOM push( @config_list, @$rbody_parts ); } } - - if ($curly) { - unless ($name) { - $death_message = -"Unexpected '}' seen in config file $config_file line $.\n"; - last; - } - $name = undef; - } } } eval { $fh->close() }; @@ -2583,17 +3026,29 @@ EOM sub strip_comment { + # Strip any comment from a command line my ( $instr, $config_file, $line_no ) = @_; my $msg = ""; + # check for full-line comment + if ( $instr =~ /^\s*#/ ) { + return ( "", $msg ); + } + # nothing to do if no comments if ( $instr !~ /#/ ) { return ( $instr, $msg ); } - # use simple method of no quotes + # handle case of no quotes elsif ( $instr !~ /['"]/ ) { - $instr =~ s/\s*\#.*$//; # simple trim + + # We now require a space before the # of a side comment + # this allows something like: + # -sbcp=# + # Otherwise, it would have to be quoted: + # -sbcp='#' + $instr =~ s/\s+\#.*$//; return ( $instr, $msg ); } @@ -2630,6 +3085,9 @@ EOM $outstr .= $1; $quote_char = $1; } + + # Note: not yet enforcing the space-before-hash rule for side + # comments if the parameter is quoted. elsif ( $instr =~ /\G#/gc ) { last; } @@ -2730,12 +3188,16 @@ sub dump_defaults { foreach (@_) { print STDOUT "$_\n" } } -sub dump_options { +sub readable_options { - # write the options back out as a valid .perltidyrc file + # return options for this run as a string which could be + # put in a perltidyrc file my ( $rOpts, $roption_string ) = @_; my %Getopt_flags; - my $rGetopt_flags = \%Getopt_flags; + my $rGetopt_flags = \%Getopt_flags; + my $readable_options = "# Final parameter set for this run.\n"; + $readable_options .= + "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n"; foreach my $opt ( @{$roption_string} ) { my $flag = ""; if ( $opt =~ /(.*)(!|=.*)$/ ) { @@ -2746,7 +3208,6 @@ sub dump_options { $rGetopt_flags->{$opt} = $flag; } } - print STDOUT "# Final parameter set for this run:\n"; foreach my $key ( sort keys %{$rOpts} ) { my $flag = $rGetopt_flags->{$key}; my $value = $rOpts->{$key}; @@ -2763,19 +3224,20 @@ sub dump_options { else { # shouldn't happen - print + $readable_options .= "# ERROR in dump_options: unrecognized flag $flag for $key\n"; } } - print STDOUT $prefix . $key . $suffix . "\n"; + $readable_options .= $prefix . $key . $suffix . "\n"; } + return $readable_options; } sub show_version { print <<"EOM"; This is perltidy, v$VERSION -Copyright 2000-2007, Steve Hancock +Copyright 2000-2012, Steve Hancock Perltidy is free software and may be copied under the terms of the GNU General Public License, which is included in the distribution files. @@ -2869,10 +3331,10 @@ Line Break Control -bbs add blank line before subs and packages -bbc add blank line before block comments -bbb add blank line between major blocks - -sob swallow optional blank lines + -kbl=n keep old blank lines? 0=no, 1=some, 2=all + -mbl=n maximum consecutive blank lines to output (default=1) -ce cuddled else; use this style: '} else {' -dnl delete old newlines (default) - -mbl=n maximum consecutive blank lines (default=1) -l=n maximum line length; default n=80 -bl opening brace on new line -sbl opening sub brace on new line. value of -bl is used if not given. @@ -2886,10 +3348,12 @@ Line Break Control -wbb=s want break before tokens in string Following Old Breakpoints + -kis keep interior semicolons. Allows multiple statements per line. -boc break at old comma breaks: turns off all automatic list formatting -bol break at old logical breakpoints: or, and, ||, && (default) -bok break at old list keyword breakpoints such as map, sort (default) -bot break at old conditional (ternary ?:) operator breakpoints (default) + -boa break at old attribute breakpoints -cab=n break at commas after a comma-arrow (=>): n=0 break at all commas after => n=1 stable: break unless this breaks an existing one-line container @@ -2900,6 +3364,7 @@ Comment controls -ibc indent block comments (default) -isbc indent spaced block comments; may indent unless no leading space -msc=n minimum desired spaces to side comment, default 4 + -fpsc=n fix position for side comments; default 0; -csc add or update closing side comments after closing BLOCK brace -dcsc delete closing side comments created by a -csc command -cscp=s change closing side comment prefix to be other than '## end' @@ -2996,7 +3461,7 @@ sub check_syntax { # Use 'perl -c' to make sure that we did not create bad syntax # This is a very good independent check for programming errors # - # Given names of the input and output files, ($ifname, $ofname), + # Given names of the input and output files, ($istream, $ostream), # we do the following: # - check syntax of the input file # - if bad, all done (could be an incomplete code snippet) @@ -3004,7 +3469,7 @@ sub check_syntax { # - if outfile syntax bad, issue warning; this implies a code bug! # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good - my ( $ifname, $ofname, $logger_object, $rOpts ) = @_; + my ( $istream, $ostream, $logger_object, $rOpts ) = @_; my $infile_syntax_ok = 0; my $line_of_dashes = '-' x 42 . "\n"; @@ -3023,7 +3488,7 @@ sub check_syntax { } # this shouldn't happen unless a termporary file couldn't be made - if ( $ifname eq '-' ) { + if ( $istream eq '-' ) { $logger_object->write_logfile_entry( "Cannot run perl -c on STDIN and STDOUT\n"); return $infile_syntax_ok; @@ -3031,13 +3496,16 @@ sub check_syntax { $logger_object->write_logfile_entry( "checking input file syntax with perl $flags\n"); - $logger_object->write_logfile_entry($line_of_dashes); # Not all operating systems/shells support redirection of the standard # error output. my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1'; - my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection ); + my ( $istream_filename, $perl_output ) = + do_syntax_check( $istream, $flags, $error_redirection ); + $logger_object->write_logfile_entry( + "Input stream passed to Perl as file $istream_filename\n"); + $logger_object->write_logfile_entry($line_of_dashes); $logger_object->write_logfile_entry("$perl_output\n"); if ( $perl_output =~ /syntax\s*OK/ ) { @@ -3045,19 +3513,21 @@ sub check_syntax { $logger_object->write_logfile_entry($line_of_dashes); $logger_object->write_logfile_entry( "checking output file syntax with perl $flags ...\n"); + my ( $ostream_filename, $perl_output ) = + do_syntax_check( $ostream, $flags, $error_redirection ); + $logger_object->write_logfile_entry( + "Output stream passed to Perl as file $ostream_filename\n"); $logger_object->write_logfile_entry($line_of_dashes); - - my $perl_output = - do_syntax_check( $ofname, $flags, $error_redirection ); $logger_object->write_logfile_entry("$perl_output\n"); unless ( $perl_output =~ /syntax\s*OK/ ) { $logger_object->write_logfile_entry($line_of_dashes); $logger_object->warning( -"The output file has a syntax error when tested with perl $flags $ofname !\n" +"The output file has a syntax error when tested with perl $flags $ostream !\n" ); $logger_object->warning( - "This implies an error in perltidy; the file $ofname is bad\n"); + "This implies an error in perltidy; the file $ostream is bad\n" + ); $logger_object->report_definite_bug(); # the perl version number will be helpful for diagnosing the problem @@ -3070,7 +3540,9 @@ sub check_syntax { # Only warn of perl -c syntax errors. Other messages, # such as missing modules, are too common. They can be # seen by running with perltidy -w - $logger_object->complain("A syntax check using perl $flags gives: \n"); + $logger_object->complain("A syntax check using perl $flags\n"); + $logger_object->complain( + "for the output in file $istream_filename gives:\n"); $logger_object->complain($line_of_dashes); $logger_object->complain("$perl_output\n"); $logger_object->complain($line_of_dashes); @@ -3084,11 +3556,18 @@ sub check_syntax { } sub do_syntax_check { - my ( $fname, $flags, $error_redirection ) = @_; + my ( $stream, $flags, $error_redirection ) = @_; + + # We need a named input file for executing perl + my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream); + + # TODO: Need to add name of file to log somewhere + # otherwise Perl output is hard to read + if ( !$stream_filename ) { return $stream_filename, "" } # We have to quote the filename in case it has unusual characters # or spaces. Example: this filename #CM11.pm# gives trouble. - $fname = '"' . $fname . '"'; + my $quoted_stream_filename = '"' . $stream_filename . '"'; # Under VMS something like -T will become -t (and an error) so we # will put quotes around the flags. Double quotes seem to work on @@ -3099,7 +3578,10 @@ sub do_syntax_check { $flags = '"' . $flags . '"'; # now wish for luck... - return qx/perl $flags $fname $error_redirection/; + my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/; + + unlink $stream_filename if ($is_tmpfile); + return $stream_filename, $msg; } ##################################################################### @@ -3132,7 +3614,16 @@ EOM # Convert a scalar to an array. # This avoids looking for "\n" on each call to getline - my @array = map { $_ .= "\n" } split /\n/, ${$rscalar}; + # + # NOTES: The -1 count is needed to avoid loss of trailing blank lines + # (which might be important in a DATA section). + my @array; + if ( $rscalar && ${$rscalar} ) { + @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1; + + # remove possible extra blank line introduced with split + if ( @array && $array[-1] eq "\n" ) { pop @array } + } my $i_next = 0; return bless [ \@array, $mode, $i_next ], $package; } @@ -3156,7 +3647,6 @@ getline call requires mode = 'r' but mode = ($mode); trace follows: EOM } my $i = $self->[2]++; - ##my $line = $self->[0]->[$i]; return $self->[0]->[$i]; } @@ -3228,7 +3718,6 @@ getline requires mode = 'r' but mode = ($mode); trace follows: EOM } my $i = $self->[2]++; - ##my $line = $self->[0]->[$i]; return $self->[0]->[$i]; } @@ -3258,8 +3747,6 @@ package Perl::Tidy::LineSource; sub new { my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; - my $input_file_copy = undef; - my $fh_copy; my $input_line_ending; if ( $rOpts->{'preserve-line-endings'} ) { @@ -3278,7 +3765,6 @@ sub new { # The reason is that temporary files cause problems on # on many systems. $rOpts->{'check-syntax'} = 0; - $input_file_copy = '-'; $$rpending_logfile_message .= < $fh, - _fh_copy => $fh_copy, _filename => $input_file, - _input_file_copy => $input_file_copy, _input_line_ending => $input_line_ending, _rinput_buffer => [], _started => 0, }, $class; } -sub get_input_file_copy_name { - my $self = shift; - my $ifname = $self->{_input_file_copy}; - unless ($ifname) { - $ifname = $self->{_filename}; - } - return $ifname; -} - sub close_input_file { my $self = shift; eval { $self->{_fh}->close() }; - eval { $self->{_fh_copy}->close() } if $self->{_fh_copy}; } sub get_line { my $self = shift; my $line = undef; my $fh = $self->{_fh}; - my $fh_copy = $self->{_fh_copy}; my $rinput_buffer = $self->{_rinput_buffer}; if ( scalar(@$rinput_buffer) ) { @@ -3338,17 +3811,6 @@ sub get_line { $self->{_started}++; } } - if ( $line && $fh_copy ) { $fh_copy->print($line); } - return $line; -} - -sub old_get_line { - my $self = shift; - my $line = undef; - my $fh = $self->{_fh}; - my $fh_copy = $self->{_fh_copy}; - $line = $fh->getline(); - if ( $line && $fh_copy ) { $fh_copy->print($line); } return $line; } @@ -3366,10 +3828,9 @@ sub new { my ( $class, $output_file, $tee_file, $line_separator, $rOpts, $rpending_logfile_message, $binmode ) = @_; - my $fh = undef; - my $fh_copy = undef; - my $fh_tee = undef; - my $output_file_copy = ""; + my $fh = undef; + my $fh_tee = undef; + my $output_file_open = 0; if ( $rOpts->{'format'} eq 'tidy' ) { @@ -3393,7 +3854,6 @@ sub new { # The reason is that temporary files cause problems on # on many systems. $rOpts->{'check-syntax'} = 0; - $output_file_copy = '-'; $$rpending_logfile_message .= < $fh, - _fh_copy => $fh_copy, _fh_tee => $fh_tee, _output_file => $output_file, _output_file_open => $output_file_open, - _output_file_copy => $output_file_copy, _tee_flag => 0, _tee_file => $tee_file, _tee_file_opened => 0, @@ -3418,16 +3876,14 @@ EOM sub write_line { - my $self = shift; - my $fh = $self->{_fh}; - my $fh_copy = $self->{_fh_copy}; + my $self = shift; + my $fh = $self->{_fh}; my $output_file_open = $self->{_output_file_open}; chomp $_[0]; $_[0] .= $self->{_line_separator}; $fh->print( $_[0] ) if ( $self->{_output_file_open} ); - print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} ); if ( $self->{_tee_flag} ) { unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } @@ -3436,15 +3892,6 @@ sub write_line { } } -sub get_output_file_copy { - my $self = shift; - my $ofname = $self->{_output_file_copy}; - unless ($ofname) { - $ofname = $self->{_output_file}; - } - return $ofname; -} - sub tee_on { my $self = shift; $self->{_tee_flag} = 1; @@ -3468,8 +3915,7 @@ sub really_open_tee_file { sub close_output_file { my $self = shift; - eval { $self->{_fh}->close() } if $self->{_output_file_open}; - eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} ); + eval { $self->{_fh}->close() } if $self->{_output_file_open}; $self->close_tee_file(); } @@ -3554,7 +4000,6 @@ sub new { bless { _log_file => $log_file, - _fh_warnings => undef, _rOpts => $rOpts, _fh_warnings => undef, _last_input_line_written => 0, @@ -3831,7 +4276,7 @@ sub warning { ( $fh_warnings, my $filename ) = Perl::Tidy::streamhandle( $warning_file, 'w' ); $fh_warnings or die("couldn't open $filename $!\n"); - warn "## Please see file $filename\n"; + warn "## Please see file $filename\n" unless ref($warning_file); } $self->{_fh_warnings} = $fh_warnings; } @@ -3943,7 +4388,8 @@ sub finish { my $warning_count = $self->{_warning_count}; my $saw_code_bug = $self->{_saw_code_bug}; - my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) + my $save_logfile = + ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) || $saw_code_bug == 1 || $rOpts->{'logfile'}; my $log_file = $self->{_log_file}; @@ -3975,7 +4421,7 @@ sub finish { if ($fh) { my $routput_array = $self->{_output_array}; foreach ( @{$routput_array} ) { $fh->print($_) } - eval { $fh->close() }; + eval { $fh->close() }; } } } @@ -4476,7 +4922,7 @@ sub check_options { # write style sheet to STDOUT and die if requested if ( defined( $rOpts->{'stylesheet'} ) ) { write_style_sheet_file('-'); - exit 1; + exit 0; } # make sure user gives a file name after -css @@ -5415,6 +5861,7 @@ use vars qw{ $last_indentation_written $last_unadjusted_indentation $last_leading_token + $last_output_short_opening_token $saw_VERSION_in_this_file $saw_END_or_DATA_ @@ -5494,6 +5941,8 @@ use vars qw{ %block_leading_text %block_opening_line_number $csc_new_statement_ok + $csc_last_label + %csc_block_label $accumulating_text_for_block $leading_block_text $rleading_block_if_elsif_text @@ -5554,6 +6003,7 @@ use vars qw{ %opening_vertical_tightness %closing_vertical_tightness %closing_token_indentation + $some_closing_token_indentation %opening_token_right %stack_opening_token @@ -5571,6 +6021,7 @@ use vars qw{ $rOpts_break_at_old_comma_breakpoints $rOpts_break_at_old_logical_breakpoints $rOpts_break_at_old_ternary_breakpoints + $rOpts_break_at_old_attribute_breakpoints $rOpts_closing_side_comment_else_flag $rOpts_closing_side_comment_maximum_text $rOpts_continuation_indentation @@ -5582,11 +6033,12 @@ use vars qw{ $rOpts_maximum_fields_per_table $rOpts_maximum_line_length $rOpts_short_concatenation_item_length - $rOpts_swallow_optional_blank_lines + $rOpts_keep_old_blank_lines $rOpts_ignore_old_breakpoints $rOpts_format_skipping $rOpts_space_function_paren $rOpts_space_keyword_paren + $rOpts_keep_interior_semicolons $half_maximum_line_length @@ -5678,8 +6130,9 @@ BEGIN { @is_chain_operator{@_} = (1) x scalar(@_); # We can remove semicolons after blocks preceded by these keywords - @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else - unless while until for foreach); + @_ = + qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else + unless while until for foreach given when default); @is_block_without_semicolon{@_} = (1) x scalar(@_); # 'L' is token for opening { at hash key @@ -5834,11 +6287,12 @@ sub new { $max_gnu_stack_index = 0; $max_gnu_item_index = -1; $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); - @gnu_item_list = (); - $last_output_indentation = 0; - $last_indentation_written = 0; - $last_unadjusted_indentation = 0; - $last_leading_token = ""; + @gnu_item_list = (); + $last_output_indentation = 0; + $last_indentation_written = 0; + $last_unadjusted_indentation = 0; + $last_leading_token = ""; + $last_output_short_opening_token = 0; $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; $saw_END_or_DATA_ = 0; @@ -5902,6 +6356,7 @@ sub new { %block_leading_text = (); %block_opening_line_number = (); $csc_new_statement_ok = 1; + %csc_block_label = (); %saved_opening_indentation = (); $in_format_skipping_section = 0; @@ -5976,7 +6431,11 @@ sub write_line { my $line_type = $line_of_tokens->{_line_type}; my $input_line = $line_of_tokens->{_line_text}; - my $want_blank_line_next = 0; + if ( $rOpts->{notidy} ) { + write_unindented_line($input_line); + $last_line_type = $line_type; + return; + } # _line_type codes are: # SYSTEM - system-specific code before hash-bang line @@ -5993,7 +6452,14 @@ sub write_line { # END_START - __END__ line # END - unidentified text following __END__ # ERROR - we are in big trouble, probably not a perl script - # + + # put a blank line after an =cut which comes before __END__ and __DATA__ + # (required by podchecker) + if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { + $file_writer_object->reset_consecutive_blank_lines(); + if ( $input_line !~ /^\s*$/ ) { want_blank_line() } + } + # handle line of code.. if ( $line_type eq 'CODE' ) { @@ -6014,29 +6480,20 @@ sub write_line { my $tee_line = 0; if ( $line_type =~ /^POD/ ) { - # Pod docs should have a preceding blank line. But be - # very careful in __END__ and __DATA__ sections, because: - # 1. the user may be using this section for any purpose whatsoever - # 2. the blank counters are not active there - # It should be safe to request a blank line between an - # __END__ or __DATA__ and an immediately following '=head' - # type line, (types END_START and DATA_START), but not for - # any other lines of type END or DATA. + # Pod docs should have a preceding blank line. But stay + # out of __END__ and __DATA__ sections, because + # the user may be using this section for any purpose whatsoever if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } - if ( !$skip_line + if ( !$skip_line && $line_type eq 'POD_START' - && $last_line_type !~ /^(END|DATA)$/ ) + # If the previous line is a __DATA__ line (or data + # contents, it's not valid to change it at all, no + # matter what is in the data + && !$saw_END_or_DATA_ ) { want_blank_line(); } - - # patch to put a blank line after =cut - # (required by podchecker) - if ( $line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { - $file_writer_object->reset_consecutive_blank_lines(); - $want_blank_line_next = 1; - } } # leave the blank counters in a predictable state @@ -6050,8 +6507,7 @@ sub write_line { if ( !$skip_line ) { if ($tee_line) { $file_writer_object->tee_on() } write_unindented_line($input_line); - if ($tee_line) { $file_writer_object->tee_off() } - if ($want_blank_line_next) { want_blank_line(); } + if ($tee_line) { $file_writer_object->tee_off() } } } $last_line_type = $line_type; @@ -6573,7 +7029,7 @@ sub check_for_long_gnu_style_lines { my $spaces_needed = $gnu_position_predictor - $rOpts_maximum_line_length + 2; - return if ( $spaces_needed < 0 ); + return if ( $spaces_needed <= 0 ); # We are over the limit, so try to remove a requested number of # spaces from leading whitespace. We are only allowed to remove @@ -6622,7 +7078,7 @@ sub check_for_long_gnu_style_lines { for ( ; $i <= $max_gnu_item_index ; $i++ ) { my $old_spaces = $gnu_item_list[$i]->get_SPACES(); - if ( $old_spaces > $deleted_spaces ) { + if ( $old_spaces >= $deleted_spaces ) { $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); } @@ -6972,12 +7428,12 @@ EOM } if ( $rOpts->{'dump-want-left-space'} ) { dump_want_left_space(*STDOUT); - exit 1; + exit 0; } if ( $rOpts->{'dump-want-right-space'} ) { dump_want_right_space(*STDOUT); - exit 1; + exit 0; } # default keywords for which space is introduced before an opening paren @@ -6986,42 +7442,58 @@ EOM unless while for foreach return switch case given when); @space_after_keyword{@_} = (1) x scalar(@_); - # allow user to modify these defaults - if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) { - @space_after_keyword{@_} = (1) x scalar(@_); - } - + # first remove any or all of these if desired if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) { + + # -nsak='*' selects all the above keywords + if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) } @space_after_keyword{@_} = (0) x scalar(@_); } + # then allow user to add to these defaults + if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) { + @space_after_keyword{@_} = (1) x scalar(@_); + } + # implement user break preferences - foreach my $tok ( split_words( $rOpts->{'want-break-after'} ) ) { - if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: - my $lbs = $left_bond_strength{$tok}; - my $rbs = $right_bond_strength{$tok}; - if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { - ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = - ( $lbs, $rbs ); + my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & + = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= + . : ? && || and or err xor + ); + + my $break_after = sub { + foreach my $tok (@_) { + if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: + my $lbs = $left_bond_strength{$tok}; + my $rbs = $right_bond_strength{$tok}; + if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { + ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = + ( $lbs, $rbs ); + } } - } + }; - foreach my $tok ( split_words( $rOpts->{'want-break-before'} ) ) { - my $lbs = $left_bond_strength{$tok}; - my $rbs = $right_bond_strength{$tok}; - if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { - ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = - ( $lbs, $rbs ); + my $break_before = sub { + foreach my $tok (@_) { + my $lbs = $left_bond_strength{$tok}; + my $rbs = $right_bond_strength{$tok}; + if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { + ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = + ( $lbs, $rbs ); + } } - } + }; + + $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); + $break_before->(@all_operators) + if ( $rOpts->{'break-before-all-operators'} ); + + $break_after->( split_words( $rOpts->{'want-break-after'} ) ); + $break_before->( split_words( $rOpts->{'want-break-before'} ) ); # make note if breaks are before certain key types %want_break_before = (); - foreach my $tok ( - '=', '.', ',', ':', '?', '&&', '||', 'and', - 'or', 'err', 'xor', '+', '-', '*', '/', - ) - { + foreach my $tok ( @all_operators, ',' ) { $want_break_before{$tok} = $left_bond_strength{$tok} < $right_bond_strength{$tok}; } @@ -7138,6 +7610,8 @@ EOM $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; $rOpts_break_at_old_ternary_breakpoints = $rOpts->{'break-at-old-ternary-breakpoints'}; + $rOpts_break_at_old_attribute_breakpoints = + $rOpts->{'break-at-old-attribute-breakpoints'}; $rOpts_break_at_old_comma_breakpoints = $rOpts->{'break-at-old-comma-breakpoints'}; $rOpts_break_at_old_keyword_breakpoints = @@ -7158,13 +7632,13 @@ EOM $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; $rOpts_short_concatenation_item_length = $rOpts->{'short-concatenation-item-length'}; - $rOpts_swallow_optional_blank_lines = - $rOpts->{'swallow-optional-blank-lines'}; - $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; - $rOpts_format_skipping = $rOpts->{'format-skipping'}; - $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; - $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; - $half_maximum_line_length = $rOpts_maximum_line_length / 2; + $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; + $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; + $rOpts_format_skipping = $rOpts->{'format-skipping'}; + $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; + $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; + $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; + $half_maximum_line_length = $rOpts_maximum_line_length / 2; # Note that both opening and closing tokens can access the opening # and closing flags of their container types. @@ -7194,6 +7668,13 @@ EOM '>' => $rOpts->{'closing-paren-indentation'}, ); + # flag indicating if any closing tokens are indented + $some_closing_token_indentation = + $rOpts->{'closing-paren-indentation'} + || $rOpts->{'closing-brace-indentation'} + || $rOpts->{'closing-square-bracket-indentation'} + || $rOpts->{'indent-closing-brace'}; + %opening_token_right = ( '(' => $rOpts->{'opening-paren-right'}, '{' => $rOpts->{'opening-hash-brace-right'}, @@ -7550,16 +8031,6 @@ EOM # retain any space after here doc operator ( hereerr.t) || ( $typel eq 'h' ) - # FIXME: this needs some further work; extrude.t has test cases - # it is safest to retain any space after start of ? : operator - # because of perl's quirky parser. - # ie, this line will fail if you remove the space after the '?': - # $b=join $comma ? ',' : ':', @_; # ok - # $b=join $comma ?',' : ':', @_; # error! - # but this is ok :) - # $b=join $comma?',' : ':', @_; # not a problem! - ## || ($typel eq '?') - # be careful with a space around ++ and --, to avoid ambiguity as to # which token it applies || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) ) @@ -7572,7 +8043,7 @@ EOM $tokenl eq 'my' # /^(for|foreach)$/ - && $is_for_foreach{$tokenll} + && $is_for_foreach{$tokenll} && $tokenr =~ /^\$/ ) @@ -7583,6 +8054,21 @@ EOM #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm) || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) ) + # We must be sure that a space between a ? and a quoted string + # remains if the space before the ? remains. [Loca.pm, lockarea] + # ie, + # $b=join $comma ? ',' : ':', @_; # ok + # $b=join $comma?',' : ':', @_; # ok! + # $b=join $comma ?',' : ':', @_; # error! + # Not really required: + ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) ) + + # do not remove space between an '&' and a bare word because + # it may turn into a function evaluation, like here + # between '&' and 'O_ACCMODE', producing a syntax error [File.pm] + # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY); + || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) + ; # the value of this long logic sequence is the result we want return $result; } @@ -7795,6 +8281,21 @@ sub set_white_space_flag { } else { $tightness = $tightness{$last_token} } + #================================================================= + # Patch for fabrice_bug.pl + # We must always avoid spaces around a bare word beginning with ^ as in: + # my $before = ${^PREMATCH}; + # Because all of the following cause an error in perl: + # my $before = ${ ^PREMATCH }; + # my $before = ${ ^PREMATCH}; + # my $before = ${^PREMATCH }; + # So if brace tightness flag is -bt=0 we must temporarily reset to bt=1. + # Note that here we must set tightness=1 and not 2 so that the closing space + # is also avoided (via the $j_tight_closing_paren flag in coding) + if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 } + + #================================================================= + if ( $tightness <= 0 ) { $ws = WS_YES; } @@ -7809,7 +8310,7 @@ sub set_white_space_flag { my $j_here = $j; ++$j_here if ( $token eq '-' - && $last_token eq '{' + && $last_token eq '{' && $$rtoken_type[ $j + 1 ] eq 'w' ); # $j_next is where a closing token should be if @@ -7918,7 +8419,7 @@ sub set_white_space_flag { # 'w' and 'i' checks for something like: # myfun( &myfun( ->myfun( # ----------------------------------------------------- - elsif (( $last_type =~ /^[wU]$/ ) + elsif (( $last_type =~ /^[wUG]$/ ) || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) { $ws = WS_NO unless ($rOpts_space_function_paren); @@ -8165,6 +8666,7 @@ sub set_white_space_flag { $ci_levels_to_go[$max_index_to_go] = $ci_level; $mate_index_to_go[$max_index_to_go] = -1; $matching_token_to_go[$max_index_to_go] = ''; + $bond_strength_to_go[$max_index_to_go] = 0; # Note: negative levels are currently retained as a diagnostic so that # the 'final indentation level' is correctly reported for bad scripts. @@ -8326,6 +8828,7 @@ sub set_white_space_flag { { $in_format_skipping_section = 0; write_logfile_entry("Exiting formatting skip section\n"); + $file_writer_object->reset_consecutive_blank_lines(); } return; } @@ -8350,12 +8853,13 @@ sub set_white_space_flag { # Handle a blank line.. if ( $jmax < 0 ) { - # For the 'swallow-optional-blank-lines' option, we delete all + # If keep-old-blank-lines is zero, we delete all # old blank lines and let the blank line rules generate any # needed blanks. - if ( !$rOpts_swallow_optional_blank_lines ) { + if ($rOpts_keep_old_blank_lines) { flush(); - $file_writer_object->write_blank_code_line(); + $file_writer_object->write_blank_code_line( + $rOpts_keep_old_blank_lines == 2 ); $last_line_leading_type = 'b'; } $last_line_had_side_comment = 0; @@ -8374,6 +8878,23 @@ sub set_white_space_flag { substr( $input_line, 0, 1 ) eq '#'; } + # Check for comments which are line directives + # Treat exactly as static block comments without leading space + # reference: perlsyn, near end, section Plain Old Comments (Not!) + # example: '# line 42 "new_filename.plx"' + if ( + $jmax == 0 + && $$rtoken_type[0] eq '#' + && $input_line =~ /^\# \s* + line \s+ (\d+) \s* + (?:\s("?)([^"]+)\2)? \s* + $/x + ) + { + $is_static_block_comment = 1; + $is_static_block_comment_without_leading_space = 1; + } + # create a hanging side comment if appropriate if ( $jmax == 0 @@ -8381,7 +8902,9 @@ sub set_white_space_flag { && $last_line_had_side_comment # last line had side comment && $input_line =~ /^\s/ # there is some leading space && !$is_static_block_comment # do not make static comment hanging - && $rOpts->{'hanging-side-comments'} # user is allowing this + && $rOpts->{'hanging-side-comments'} # user is allowing + # hanging side comments + # like this ) { @@ -8420,23 +8943,36 @@ sub set_white_space_flag { # output a blank line before block comments if ( - $last_line_leading_type !~ /^[#b]$/ - && $rOpts->{'blanks-before-comments'} # only if allowed - && ! - $is_static_block_comment # never before static block comments + # unless we follow a blank or comment line + $last_line_leading_type !~ /^[#b]$/ + + # only if allowed + && $rOpts->{'blanks-before-comments'} + + # not if this is an empty comment line + && $$rtokens[0] ne '#' + + # not after a short line ending in an opening token + # because we already have space above this comment. + # Note that the first comment in this if block, after + # the 'if (', does not get a blank line because of this. + && !$last_output_short_opening_token + + # never before static block comments + && !$is_static_block_comment ) { - flush(); # switching to new output stream + flush(); # switching to new output stream $file_writer_object->write_blank_code_line(); $last_line_leading_type = 'b'; } # TRIM COMMENTS -- This could be turned off as a option - $$rtokens[0] =~ s/\s*$//; # trim right end + $$rtokens[0] =~ s/\s*$//; # trim right end if ( $rOpts->{'indent-block-comments'} - && ( !$rOpts->{'indent-spaced-block-comments'} + && ( !$rOpts->{'indent-spaced-block-comments'} || $input_line =~ /^\s+/ ) && !$is_static_block_comment_without_leading_space ) @@ -8476,14 +9012,14 @@ sub set_white_space_flag { # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ # Examples: # *VERSION = \'1.01'; - # ( $VERSION ) = '$Revision: 1.61 $ ' =~ /\$Revision:\s+([^\s]+)/; + # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; # We will pass such a line straight through without breaking # it unless -npvl is used my $is_VERSION_statement = 0; if ( - !$saw_VERSION_in_this_file + !$saw_VERSION_in_this_file && $input_line =~ /VERSION/ # quick check to reject most lines && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ ) @@ -8633,6 +9169,16 @@ sub set_white_space_flag { } if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g } + + # trim identifiers of trailing blanks which can occur + # under some unusual circumstances, such as if the + # identifier 'witch' has trailing blanks on input here: + # + # sub + # witch + # () # prototype may be on new line ... + # ... + if ( $type eq 'i' ) { $token =~ s/\s+$//g } } # change 'LABEL :' to 'LABEL:' @@ -8650,7 +9196,7 @@ sub set_white_space_flag { # make note of something like '$var = s/xxx/yyy/;' # in case it should have been '$var =~ s/xxx/yyy/;' if ( - $token =~ /^(s|tr|y|m|\/)/ + $token =~ /^(s|tr|y|m|\/)/ && $last_nonblank_token =~ /^(=|==|!=)$/ # precededed by simple scalar @@ -8764,12 +9310,12 @@ sub set_white_space_flag { $block_type !~ /^sub/ ? $rOpts->{'opening-brace-on-new-line'} - # use -sbl flag unless this is an anonymous sub block + # use -sbl flag for a named sub block : $block_type !~ /^sub\W*$/ ? $rOpts->{'opening-sub-brace-on-new-line'} - # do not break for anonymous subs - : 0; + # use -asbl flag for an anonymous sub block + : $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; # Break before an opening '{' ... if ( @@ -8854,6 +9400,11 @@ sub set_white_space_flag { # hash (blktype.t, blktype1.t) && ( $block_type !~ /^[\{\};]$/ ) + # patch: and do not add semi-colons for recently + # added block types (see tmp/semicolon.t) + && ( $block_type !~ + /^(switch|case|given|when|default)$/ ) + # it seems best not to add semicolons in these # special block types: sort|map|grep && ( !$is_sort_map_grep{$block_type} ) @@ -9076,6 +9627,7 @@ sub set_white_space_flag { output_line_to_go() unless ( $no_internal_newlines + || ( $rOpts_keep_interior_semicolons && $j < $jmax ) || ( $next_nonblank_token eq '}' ) ); } @@ -9249,8 +9801,9 @@ sub output_line_to_go { # anything left to write? if ( $imin <= $imax ) { - # add a blank line before certain key types - if ( $last_line_leading_type !~ /^[#b]/ ) { + # add a blank line before certain key types but not after a comment + ##if ( $last_line_leading_type !~ /^[#b]/ ) { + if ( $last_line_leading_type !~ /^[#]/ ) { my $want_blank = 0; my $leading_token = $tokens_to_go[$imin]; my $leading_type = $types_to_go[$imin]; @@ -9258,8 +9811,8 @@ sub output_line_to_go { # blank lines before subs except declarations and one-liners # MCONVERSION LOCATION - for sub tokenization change if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { - $want_blank = ( $rOpts->{'blanks-before-subs'} ) - && ( + $want_blank = $rOpts->{'blank-lines-before-subs'} + if ( terminal_type( \@types_to_go, \@block_type_to_go, $imin, $imax ) !~ /^[\;\}]$/ ); @@ -9270,13 +9823,13 @@ sub output_line_to_go { elsif ($leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) { - $want_blank = ( $rOpts->{'blanks-before-subs'} ); + $want_blank = $rOpts->{'blank-lines-before-packages'}; } # break before certain key blocks except one-liners if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { - $want_blank = ( $rOpts->{'blanks-before-subs'} ) - && ( + $want_blank = $rOpts->{'blank-lines-before-subs'} + if ( terminal_type( \@types_to_go, \@block_type_to_go, $imin, $imax ) ne '}' ); @@ -9285,13 +9838,15 @@ sub output_line_to_go { # Break before certain block types if we haven't had a # break at this level for a while. This is the # difficult decision.. - elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/ - && $leading_type eq 'k' ) + elsif ($leading_type eq 'k' + && $last_line_leading_type ne 'b' + && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ ) { my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; if ( !defined($lc) ) { $lc = 0 } - $want_blank = $rOpts->{'blanks-before-blocks'} + $want_blank = + $rOpts->{'blanks-before-blocks'} && $lc >= $rOpts->{'long-block-line-count'} && $file_writer_object->get_consecutive_nonblank_lines() >= $rOpts->{'long-block-line-count'} @@ -9305,7 +9860,7 @@ sub output_line_to_go { # future: send blank line down normal path to VerticalAligner Perl::Tidy::VerticalAligner::flush(); - $file_writer_object->write_blank_code_line(); + $file_writer_object->require_blank_code_lines($want_blank); } } @@ -9383,16 +9938,21 @@ sub output_line_to_go { # otherwise use multiple lines else { - ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break); + ( $ri_first, $ri_last, my $colon_count ) = + set_continuation_breaks($saw_good_break); break_all_chain_tokens( $ri_first, $ri_last ); + break_equals( $ri_first, $ri_last ); + # now we do a correction step to clean this up a bit # (The only time we would not do this is for debugging) if ( $rOpts->{'recombine'} ) { ( $ri_first, $ri_last ) = recombine_breakpoints( $ri_first, $ri_last ); } + + insert_final_breaks( $ri_first, $ri_last ) if $colon_count; } # do corrector step if -lp option is used @@ -9565,14 +10125,50 @@ sub starting_one_line_block { my $i_nonblank = ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1; - if ( $$rtoken_type[$i_nonblank] eq '#' ) { + # Patch for one-line sort/map/grep/eval blocks with side comments: + # We will ignore the side comment length for sort/map/grep/eval + # because this can lead to statements which change every time + # perltidy is run. Here is an example from Denis Moskowitz which + # oscillates between these two states without this patch: + +## -------- +## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf +## @baz; +## +## grep { +## $_->foo ne 'bar' +## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf +## @baz; +## -------- + + # When the first line is input it gets broken apart by the main + # line break logic in sub print_line_of_tokens. + # When the second line is input it gets recombined by + # print_line_of_tokens and passed to the output routines. The + # output routines (set_continuation_breaks) do not break it apart + # because the bond strengths are set to the highest possible value + # for grep/map/eval/sort blocks, so the first version gets output. + # It would be possible to fix this by changing bond strengths, + # but they are high to prevent errors in older versions of perl. + + if ( $$rtoken_type[$i_nonblank] eq '#' + && !$is_sort_map_grep{$block_type} ) + { + + ## POSSIBLE FUTURE PATCH FOR IGNORING SIDE COMMENT LENGTHS + ## WHEN CHECKING FOR ONE-LINE BLOCKS: + ## if (flag set) then (just add 1 to pos) $pos += length( $$rtokens[$i_nonblank] ); if ( $i_nonblank > $i + 1 ) { - $pos += length( $$rtokens[ $i + 1 ] ); + + # source whitespace could be anything, assume + # at least one space before the hash on output + if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 } + else { $pos += length( $$rtokens[ $i + 1 ] ) } } - if ( $pos > $rOpts_maximum_line_length ) { + if ( $pos >= $rOpts_maximum_line_length ) { return 0; } } @@ -9621,6 +10217,104 @@ sub write_unindented_line { $file_writer_object->write_line( $_[0] ); } +sub undo_ci { + + # Undo continuation indentation in certain sequences + # For example, we can undo continuation indation in sort/map/grep chains + # my $dat1 = pack( "n*", + # map { $_, $lookup->{$_} } + # sort { $a <=> $b } + # grep { $lookup->{$_} ne $default } keys %$lookup ); + # To align the map/sort/grep keywords like this: + # my $dat1 = pack( "n*", + # map { $_, $lookup->{$_} } + # sort { $a <=> $b } + # grep { $lookup->{$_} ne $default } keys %$lookup ); + my ( $ri_first, $ri_last ) = @_; + my ( $line_1, $line_2, $lev_last ); + my $this_line_is_semicolon_terminated; + my $max_line = @$ri_first - 1; + + # looking at each line of this batch.. + # We are looking at leading tokens and looking for a sequence + # all at the same level and higher level than enclosing lines. + foreach my $line ( 0 .. $max_line ) { + + my $ibeg = $$ri_first[$line]; + my $lev = $levels_to_go[$ibeg]; + if ( $line > 0 ) { + + # if we have started a chain.. + if ($line_1) { + + # see if it continues.. + if ( $lev == $lev_last ) { + if ( $types_to_go[$ibeg] eq 'k' + && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) + { + + # chain continues... + # check for chain ending at end of a a statement + if ( $line == $max_line ) { + + # see of this line ends a statement + my $iend = $$ri_last[$line]; + $this_line_is_semicolon_terminated = + $types_to_go[$iend] eq ';' + + # with possible side comment + || ( $types_to_go[$iend] eq '#' + && $iend - $ibeg >= 2 + && $types_to_go[ $iend - 2 ] eq ';' + && $types_to_go[ $iend - 1 ] eq 'b' ); + } + $line_2 = $line if ($this_line_is_semicolon_terminated); + } + else { + + # kill chain + $line_1 = undef; + } + } + elsif ( $lev < $lev_last ) { + + # chain ends with previous line + $line_2 = $line - 1; + } + elsif ( $lev > $lev_last ) { + + # kill chain + $line_1 = undef; + } + + # undo the continuation indentation if a chain ends + if ( defined($line_2) && defined($line_1) ) { + my $continuation_line_count = $line_2 - $line_1 + 1; + @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = + (0) x ($continuation_line_count); + @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = + @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ]; + $line_1 = undef; + } + } + + # not in a chain yet.. + else { + + # look for start of a new sort/map/grep chain + if ( $lev > $lev_last ) { + if ( $types_to_go[$ibeg] eq 'k' + && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) + { + $line_1 = $line; + } + } + } + } + $lev_last = $lev; + } +} + sub undo_lp_ci { # If there is a single, long parameter within parens, like this: @@ -9687,17 +10381,22 @@ sub set_logical_padding { my $max_line = @$ri_first - 1; my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces, - $tok_next, $has_leading_op_next, $has_leading_op ); + $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); # looking at each line of this batch.. foreach $line ( 0 .. $max_line - 1 ) { # see if the next line begins with a logical operator - $ibeg = $$ri_first[$line]; - $iend = $$ri_last[$line]; - $ibeg_next = $$ri_first[ $line + 1 ]; - $tok_next = $tokens_to_go[$ibeg_next]; - $has_leading_op_next = $is_chain_operator{$tok_next}; + $ibeg = $$ri_first[$line]; + $iend = $$ri_last[$line]; + $ibeg_next = $$ri_first[ $line + 1 ]; + $tok_next = $tokens_to_go[$ibeg_next]; + $type_next = $types_to_go[$ibeg_next]; + + $has_leading_op_next = ( $tok_next =~ /^\w/ ) + ? $is_chain_operator{$tok_next} # + - * / : ? && || + : $is_chain_operator{$type_next}; # and, or + next unless ($has_leading_op_next); # next line must not be at lesser depth @@ -9713,13 +10412,14 @@ sub set_logical_padding { # if this is not first line of the batch ... if ( $line > 0 ) { - # and we have leading operator + # and we have leading operator.. next if $has_leading_op; - # and .. + # Introduce padding if.. # 1. the previous line is at lesser depth, or # 2. the previous line ends in an assignment - # + # 3. the previous line ends in a 'return' + # 4. the previous line ends in a comma # Example 1: previous line at lesser depth # if ( ( $Year < 1601 ) # <- we are here but # || ( $Year > 2899 ) # list has not yet @@ -9733,11 +10433,41 @@ sub set_logical_padding { # : $year % 100 ? 1 # : $year % 400 ? 0 # : 1; + # + # Example 3: previous line ending in comma: + # push @expr, + # /test/ ? undef + # : eval($_) ? 1 + # : eval($_) ? 1 + # : 0; + + # be sure levels agree (do not indent after an indented 'if') + next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); + + # allow padding on first line after a comma but only if: + # (1) this is line 2 and + # (2) there are at more than three lines and + # (3) lines 3 and 4 have the same leading operator + # These rules try to prevent padding within a long + # comma-separated list. + my $ok_comma; + if ( $types_to_go[$iendm] eq ',' + && $line == 1 + && $max_line > 2 ) + { + my $ibeg_next_next = $$ri_first[ $line + 2 ]; + my $tok_next_next = $tokens_to_go[$ibeg_next_next]; + $ok_comma = $tok_next_next eq $tok_next; + } + next unless ( - $is_assignment{ $types_to_go[$iendm] } + $is_assignment{ $types_to_go[$iendm] } + || $ok_comma || ( $nesting_depth_to_go[$ibegm] < $nesting_depth_to_go[$ibeg] ) + || ( $types_to_go[$iendm] eq 'k' + && $tokens_to_go[$iendm] eq 'return' ) ); # we will add padding before the first token @@ -9837,6 +10567,25 @@ sub set_logical_padding { last unless $ipad; } + # We cannot pad a leading token at the lowest level because + # it could cause a bug in which the starting indentation + # level is guessed incorrectly each time the code is run + # though perltidy, thus causing the code to march off to + # the right. For example, the following snippet would have + # this problem: + +## ov_method mycan( $package, '(""' ), $package +## or ov_method mycan( $package, '(0+' ), $package +## or ov_method mycan( $package, '(bool' ), $package +## or ov_method mycan( $package, '(nomethod' ), $package; + + # If this snippet is within a block this won't happen + # unless the user just processes the snippet alone within + # an editor. In that case either the user will see and + # fix the problem or it will be corrected next time the + # entire file is processed with perltidy. + next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 ); + # next line must not be at greater depth my $iend_next = $$ri_last[ $line + 1 ]; next @@ -9848,7 +10597,8 @@ sub set_logical_padding { if ( $types_to_go[$inext_next] eq 'b' ) { $inext_next++; } - my $type = $types_to_go[$ipad]; + my $type = $types_to_go[$ipad]; + my $type_next = $types_to_go[ $ipad + 1 ]; # see if there are multiple continuation lines my $logical_continuation_lines = 1; @@ -9862,6 +10612,17 @@ sub set_logical_padding { $logical_continuation_lines++; } } + + # see if leading types match + my $types_match = $types_to_go[$inext_next] eq $type; + my $matches_without_bang; + + # if first line has leading ! then compare the following token + if ( !$types_match && $type eq '!' ) { + $types_match = $matches_without_bang = + $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; + } + if ( # either we have multiple continuation lines to follow @@ -9872,7 +10633,7 @@ sub set_logical_padding { || ( # types must match - $types_to_go[$inext_next] eq $type + $types_match # and keywords must match if keyword && !( @@ -9975,6 +10736,23 @@ sub set_logical_padding { my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); $pad_spaces = $length_2 - $length_1; + # If the first line has a leading ! and the second does + # not, then remove one space to try to align the next + # leading characters, which are often the same. For example: + # if ( !$ts + # || $ts == $self->Holder + # || $self->Holder->Type eq "Arena" ) + # + # This usually helps readability, but if there are subsequent + # ! operators things will still get messed up. For example: + # + # if ( !exists $Net::DNS::typesbyname{$qtype} + # && exists $Net::DNS::classesbyname{$qtype} + # && !exists $Net::DNS::classesbyname{$qclass} + # && exists $Net::DNS::typesbyname{$qclass} ) + # We can't fix that. + if ($matches_without_bang) { $pad_spaces-- } + # make sure this won't change if -lp is used my $indentation_1 = $leading_spaces_to_go[$ibeg]; if ( ref($indentation_1) ) { @@ -9989,6 +10767,7 @@ sub set_logical_padding { # we might be able to handle a pad of -1 by removing a blank # token if ( $pad_spaces < 0 ) { + if ( $pad_spaces == -1 ) { if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) { $tokens_to_go[ $ipad - 1 ] = ''; @@ -9999,6 +10778,7 @@ sub set_logical_padding { # now apply any padding for alignment if ( $ipad >= 0 && $pad_spaces ) { + my $length_t = total_line_length( $ibeg, $iend ); if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) { $tokens_to_go[$ipad] = @@ -10278,7 +11058,9 @@ sub set_block_text_accumulator { # this will contain the column number of the last character # of the closing side comment + ##$csc_last_label="" unless $csc_last_label; $leading_block_text_line_length = + length($csc_last_label) + length($accumulating_text_for_block) + length( $rOpts->{'closing-side-comment-prefix'} ) + $leading_block_text_level * $rOpts_indent_columns + 3; @@ -10380,6 +11162,12 @@ sub accumulate_block_text { my $i_terminal = 0; # index of last nonblank token my $terminal_block_type = ""; + # update most recent statement label + $csc_last_label = "" unless ($csc_last_label); + if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] } + my $block_label = $csc_last_label; + + # Loop over all tokens of this batch for my $i ( 0 .. $max_index_to_go ) { my $type = $types_to_go[$i]; my $block_type = $block_type_to_go[$i]; @@ -10407,6 +11195,11 @@ sub accumulate_block_text { $rblock_leading_if_elsif_text; } + if ( defined( $csc_block_label{$type_sequence} ) ) { + $block_label = $csc_block_label{$type_sequence}; + delete $csc_block_label{$type_sequence}; + } + # if we run into a '}' then we probably started accumulating # at something like a trailing 'if' clause..no harm done. if ( $accumulating_text_for_block @@ -10439,6 +11232,13 @@ sub accumulate_block_text { $vertical_aligner_object->get_output_line_number(); $block_opening_line_number{$type_sequence} = $line_number; + # set a label for this block, except for + # a bare block which already has the label + # A label can only be used on the next { + if ( $block_type =~ /:$/ ) { $csc_last_label = "" } + $csc_block_label{$type_sequence} = $csc_last_label; + $csc_last_label = ""; + if ( $accumulating_text_for_block && $levels_to_go[$i] == $leading_block_text_level ) { @@ -10501,8 +11301,14 @@ sub accumulate_block_text { $block_leading_text, $rblock_leading_if_elsif_text ); } + # if this line ends in a label then remember it for the next pass + $csc_last_label = ""; + if ( $terminal_type eq 'J' ) { + $csc_last_label = $tokens_to_go[$i_terminal]; + } + return ( $terminal_type, $i_terminal, $i_block_leading_text, - $block_leading_text, $block_line_count ); + $block_leading_text, $block_line_count, $block_label ); } } @@ -10576,6 +11382,64 @@ sub make_else_csc_text { return $csc_text; } +{ # sub balance_csc_text + + my %matching_char; + + BEGIN { + %matching_char = ( + '{' => '}', + '(' => ')', + '[' => ']', + '}' => '{', + ')' => '(', + ']' => '[', + ); + } + + sub balance_csc_text { + + # Append characters to balance a closing side comment so that editors + # such as vim can correctly jump through code. + # Simple Example: + # input = ## end foreach my $foo ( sort { $b ... + # output = ## end foreach my $foo ( sort { $b ...}) + + # NOTE: This routine does not currently filter out structures within + # quoted text because the bounce algorithims in text editors do not + # necessarily do this either (a version of vim was checked and + # did not do this). + + # Some complex examples which will cause trouble for some editors: + # while ( $mask_string =~ /\{[^{]*?\}/g ) { + # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) { + # if ( $1 eq '{' ) { + # test file test1/braces.pl has many such examples. + + my ($csc) = @_; + + # loop to examine characters one-by-one, RIGHT to LEFT and + # build a balancing ending, LEFT to RIGHT. + for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) { + + my $char = substr( $csc, $pos, 1 ); + + # ignore everything except structural characters + next unless ( $matching_char{$char} ); + + # pop most recently appended character + my $top = chop($csc); + + # push it back plus the mate to the newest character + # unless they balance each other. + $csc = $csc . $top . $matching_char{$char} unless $top eq $char; + } + + # return the balanced string + return $csc; + } +} + sub add_closing_side_comment { # add closing side comments after closing block braces if -csc used @@ -10588,7 +11452,7 @@ sub add_closing_side_comment { #--------------------------------------------------------------- my ( $terminal_type, $i_terminal, $i_block_leading_text, - $block_leading_text, $block_line_count ) + $block_leading_text, $block_line_count, $block_label ) = accumulate_csc_text(); #--------------------------------------------------------------- @@ -10641,13 +11505,18 @@ sub add_closing_side_comment { { # then make the closing side comment text + if ($block_label) { $block_label .= " " } my $token = -"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]"; +"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]"; # append any extra descriptive text collected above if ( $i_block_leading_text == $i_terminal ) { $token .= $block_leading_text; } + + $token = balance_csc_text($token) + if $rOpts->{'closing-side-comments-balanced'}; + $token =~ s/\s*$//; # trim any trailing whitespace # handle case of existing closing side comment @@ -10657,11 +11526,13 @@ sub add_closing_side_comment { if ( $rOpts->{'closing-side-comment-warnings'} ) { my $old_csc = $tokens_to_go[$max_index_to_go]; my $new_csc = $token; - $new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' - my $new_trailing_dots = $1; - $old_csc =~ s/\.\.\.\s*$//; $new_csc =~ s/\s+//g; # trim all whitespace - $old_csc =~ s/\s+//g; + $old_csc =~ s/\s+//g; # trim all whitespace + $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures + $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures + $new_csc =~ s/(\.\.\.)$//; # trim trailing '...' + my $new_trailing_dots = $1; + $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' # Patch to handle multiple closing side comments at # else and elsif's. These have become too complicated @@ -10763,19 +11634,23 @@ sub add_closing_side_comment { } sub previous_nonblank_token { - my ($i) = @_; - if ( $i <= 0 ) { - return ""; - } - elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { - return $tokens_to_go[ $i - 1 ]; - } - elsif ( $i > 1 ) { - return $tokens_to_go[ $i - 2 ]; - } - else { - return ""; + my ($i) = @_; + my $name = ""; + my $im = $i - 1; + return "" if ( $im < 0 ); + if ( $types_to_go[$im] eq 'b' ) { $im--; } + return "" if ( $im < 0 ); + $name = $tokens_to_go[$im]; + + # prepend any sub name to an isolated -> to avoid unwanted alignments + # [test case is test8/penco.pl] + if ( $name eq '->' ) { + $im--; + if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { + $name = $tokens_to_go[$im] . $name; + } } + return $name; } sub send_lines_to_vertical_aligner { @@ -10802,6 +11677,8 @@ sub send_lines_to_vertical_aligner { Perl::Tidy::VerticalAligner::flush(); } + undo_ci( $ri_first, $ri_last ); + set_logical_padding( $ri_first, $ri_last ); # loop to prepare each line for shipment @@ -10811,9 +11688,184 @@ sub send_lines_to_vertical_aligner { my $ibeg = $$ri_first[$n]; my $iend = $$ri_last[$n]; - my @patterns = (); + my ( $rtokens, $rfields, $rpatterns ) = + make_alignment_patterns( $ibeg, $iend ); + + # Set flag to show how much level changes between this line + # and the next line, if we have it. + my $ljump = 0; + if ( $n < $n_last_line ) { + my $ibegp = $$ri_first[ $n + 1 ]; + $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend]; + } + + my ( $indentation, $lev, $level_end, $terminal_type, + $is_semicolon_terminated, $is_outdented_line ) + = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, + $ri_first, $ri_last, $rindentation_list, $ljump ); + + # we will allow outdenting of long lines.. + my $outdent_long_lines = ( + + # which are long quotes, if allowed + ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) + + # which are long block comments, if allowed + || ( + $types_to_go[$ibeg] eq '#' + && $rOpts->{'outdent-long-comments'} + + # but not if this is a static block comment + && !$is_static_block_comment + ) + ); + + my $level_jump = + $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; + + my $rvertical_tightness_flags = + set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, + $ri_first, $ri_last ); + + # flush an outdented line to avoid any unwanted vertical alignment + Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); + + my $is_terminal_ternary = 0; + if ( $tokens_to_go[$ibeg] eq ':' + || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' ) + { + if ( ( $terminal_type eq ';' && $level_end <= $lev ) + || ( $level_end < $lev ) ) + { + $is_terminal_ternary = 1; + } + } + + # send this new line down the pipe + my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; + Perl::Tidy::VerticalAligner::append_line( + $lev, + $level_end, + $indentation, + $rfields, + $rtokens, + $rpatterns, + $forced_breakpoint_to_go[$iend] || $in_comma_list, + $outdent_long_lines, + $is_terminal_ternary, + $is_semicolon_terminated, + $do_not_pad, + $rvertical_tightness_flags, + $level_jump, + ); + $in_comma_list = + $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; + + # flush an outdented line to avoid any unwanted vertical alignment + Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); + + $do_not_pad = 0; + + # Set flag indicating if this line ends in an opening + # token and is very short, so that a blank line is not + # needed if the subsequent line is a comment. + # Examples of what we are looking for: + # { + # && ( + # BEGIN { + # default { + # sub { + $last_output_short_opening_token + + # line ends in opening token + = $types_to_go[$iend] =~ /^[\{\(\[L]$/ + + # and either + && ( + # line has either single opening token + $iend == $ibeg + + # or is a single token followed by opening token. + # Note that sub identifiers have blanks like 'sub doit' + || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ ) + ) + + # and limit total to 10 character widths + && token_sequence_length( $ibeg, $iend ) <= 10; + +## $last_output_short_opening_token = +## $types_to_go[$iend] =~ /^[\{\(\[L]$/ +## && $iend - $ibeg <= 2 +## && $tokens_to_go[$ibeg] !~ /^sub/ +## && token_sequence_length( $ibeg, $iend ) <= 10; + + } # end of loop to output each line + + # remember indentation of lines containing opening containers for + # later use by sub set_adjusted_indentation + save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); +} + +{ # begin make_alignment_patterns + + my %block_type_map; + my %keyword_map; + + BEGIN { + + # map related block names into a common name to + # allow alignment + %block_type_map = ( + 'unless' => 'if', + 'else' => 'if', + 'elsif' => 'if', + 'when' => 'if', + 'default' => 'if', + 'case' => 'if', + 'sort' => 'map', + 'grep' => 'map', + ); + + # map certain keywords to the same 'if' class to align + # long if/elsif sequences. [elsif.pl] + %keyword_map = ( + 'unless' => 'if', + 'else' => 'if', + 'elsif' => 'if', + 'when' => 'given', + 'default' => 'given', + 'case' => 'switch', + + # treat an 'undef' similar to numbers and quotes + 'undef' => 'Q', + ); + } + + sub make_alignment_patterns { + + # Here we do some important preliminary work for the + # vertical aligner. We create three arrays for one + # output line. These arrays contain strings that can + # be tested by the vertical aligner to see if + # consecutive lines can be aligned vertically. + # + # The three arrays are indexed on the vertical + # alignment fields and are: + # @tokens - a list of any vertical alignment tokens for this line. + # These are tokens, such as '=' '&&' '#' etc which + # we want to might align vertically. These are + # decorated with various information such as + # nesting depth to prevent unwanted vertical + # alignment matches. + # @fields - the actual text of the line between the vertical alignment + # tokens. + # @patterns - a modified list of token types, one for each alignment + # field. These should normally each match before alignment is + # allowed, even when the alignment tokens match. + my ( $ibeg, $iend ) = @_; my @tokens = (); my @fields = (); + my @patterns = (); my $i_start = $ibeg; my $i; @@ -10831,15 +11883,65 @@ sub send_lines_to_vertical_aligner { # Unbalanced containers already avoid aligning across # container boundaries. if ( $tokens_to_go[$i] eq '(' ) { + + # if container is balanced on this line... my $i_mate = $mate_index_to_go[$i]; if ( $i_mate > $i && $i_mate <= $iend ) { $depth++; my $seqno = $type_sequence_to_go[$i]; my $count = comma_arrow_count($seqno); $multiple_comma_arrows[$depth] = $count && $count > 1; + + # Append the previous token name to make the container name + # more unique. This name will also be given to any commas + # within this container, and it helps avoid undesirable + # alignments of different types of containers. my $name = previous_nonblank_token($i); $name =~ s/^->//; $container_name[$depth] = "+" . $name; + + # Make the container name even more unique if necessary. + # If we are not vertically aligning this opening paren, + # append a character count to avoid bad alignment because + # it usually looks bad to align commas within continers + # for which the opening parens do not align. Here + # is an example very BAD alignment of commas (because + # the atan2 functions are not all aligned): + # $XY = + # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + + # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - + # $X * atan2( $X, 1 ) - + # $Y * atan2( $Y, 1 ); + # + # On the other hand, it is usually okay to align commas if + # opening parens align, such as: + # glVertex3d( $cx + $s * $xs, $cy, $z ); + # glVertex3d( $cx, $cy + $s * $ys, $z ); + # glVertex3d( $cx - $s * $xs, $cy, $z ); + # glVertex3d( $cx, $cy - $s * $ys, $z ); + # + # To distinguish between these situations, we will + # append the length of the line from the previous matching + # token, or beginning of line, to the function name. This + # will allow the vertical aligner to reject undesirable + # matches. + + # if we are not aligning on this paren... + if ( $matching_token_to_go[$i] eq '' ) { + + # Sum length from previous alignment, or start of line. + # Note that we have to sum token lengths here because + # padding has been done and so array $lengths_to_go + # is now wrong. + my $len = + length( + join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); + $len += leading_spaces_to_go($i_start) + if ( $i_start == $ibeg ); + + # tack length onto the container name to make unique + $container_name[$depth] .= "-" . $len; + } } } elsif ( $tokens_to_go[$i] eq ')' ) { @@ -10858,29 +11960,56 @@ sub send_lines_to_vertical_aligner { $tok .= "$nesting_depth_to_go[$i]"; } - # do any special decorations for commas to avoid unwanted - # cross-line alignments. - if ( $raw_tok eq ',' ) { + # also decorate commas with any container name to avoid + # unwanted cross-line alignments. + if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { if ( $container_name[$depth] ) { $tok .= $container_name[$depth]; } } - # decorate '=>' with: - # - Nothing if this container is unbalanced on this line. - # - The previous token if it is balanced and multiple '=>'s - # - The container name if it is bananced and no other '=>'s - elsif ( $raw_tok eq '=>' ) { - if ( $container_name[$depth] ) { - if ( $multiple_comma_arrows[$depth] ) { - $tok .= "+" . previous_nonblank_token($i); - } - else { - $tok .= $container_name[$depth]; - } + # Patch to avoid aligning leading and trailing if, unless. + # Mark trailing if, unless statements with container names. + # This makes them different from leading if, unless which + # are not so marked at present. If we ever need to name + # them too, we could use ci to distinguish them. + # Example problem to avoid: + # return ( 2, "DBERROR" ) + # if ( $retval == 2 ); + # if ( scalar @_ ) { + # my ( $a, $b, $c, $d, $e, $f ) = @_; + # } + if ( $raw_tok eq '(' ) { + my $ci = $ci_levels_to_go[$ibeg]; + if ( $container_name[$depth] =~ /^\+(if|unless)/ + && $ci ) + { + $tok .= $container_name[$depth]; } } + # Decorate block braces with block types to avoid + # unwanted alignments such as the following: + # foreach ( @{$routput_array} ) { $fh->print($_) } + # eval { $fh->close() }; + if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { + my $block_type = $block_type_to_go[$i]; + + # map certain related block types to allow + # else blocks to align + $block_type = $block_type_map{$block_type} + if ( defined( $block_type_map{$block_type} ) ); + + # remove sub names to allow one-line sub braces to align + # regardless of name + if ( $block_type =~ /^sub / ) { $block_type = 'sub' } + + # allow all control-type blocks to align + if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } + + $tok .= $block_type; + } + # concatenate the text of the consecutive tokens to form # the field push( @fields, @@ -10909,124 +12038,64 @@ sub send_lines_to_vertical_aligner { if ( $types_to_go[$i_next_nonblank] eq '=>' ) { $type = 'Q'; + + # Patch to ignore leading minus before words, + # by changing pattern 'mQ' into just 'Q', + # so that we can align things like this: + # Button => "Print letter \"~$_\"", + # -command => [ sub { print "$_[0]\n" }, $_ ], + if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" } } } - # minor patch to make numbers and quotes align + # patch to make numbers and quotes align if ( $type eq 'n' ) { $type = 'Q' } + # patch to ignore any ! in patterns + if ( $type eq '!' ) { $type = '' } + $patterns[$j] .= $type; } # for keywords we have to use the actual text else { - # map certain keywords to the same 'if' class to align - # long if/elsif sequences. my testfile: elsif.pl my $tok = $tokens_to_go[$i]; - if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) { - $tok = 'if'; - } + + # but map certain keywords to a common string to allow + # alignment. + $tok = $keyword_map{$tok} + if ( defined( $keyword_map{$tok} ) ); $patterns[$j] .= $tok; } } # done with this line .. join text of tokens to make the last field push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); + return ( \@tokens, \@fields, \@patterns ); + } - my ( $indentation, $lev, $level_end, $terminal_type, - $is_semicolon_terminated, $is_outdented_line ) - = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns, - $ri_first, $ri_last, $rindentation_list ); +} # end make_alignment_patterns - # we will allow outdenting of long lines.. - my $outdent_long_lines = ( +{ # begin unmatched_indexes - # which are long quotes, if allowed - ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) + # closure to keep track of unbalanced containers. + # arrays shared by the routines in this block: + my @unmatched_opening_indexes_in_this_batch; + my @unmatched_closing_indexes_in_this_batch; + my %comma_arrow_count; - # which are long block comments, if allowed - || ( - $types_to_go[$ibeg] eq '#' - && $rOpts->{'outdent-long-comments'} + sub is_unbalanced_batch { + @unmatched_opening_indexes_in_this_batch + + @unmatched_closing_indexes_in_this_batch; + } - # but not if this is a static block comment - && !$is_static_block_comment - ) - ); + sub comma_arrow_count { + my $seqno = $_[0]; + return $comma_arrow_count{$seqno}; + } - my $level_jump = - $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; - - my $rvertical_tightness_flags = - set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, - $ri_first, $ri_last ); - - # flush an outdented line to avoid any unwanted vertical alignment - Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); - - my $is_terminal_ternary = 0; - if ( $tokens_to_go[$ibeg] eq ':' - || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' ) - { - if ( ( $terminal_type eq ';' && $level_end <= $lev ) - || ( $level_end < $lev ) ) - { - $is_terminal_ternary = 1; - } - } - - # send this new line down the pipe - my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; - Perl::Tidy::VerticalAligner::append_line( - $lev, - $level_end, - $indentation, - \@fields, - \@tokens, - \@patterns, - $forced_breakpoint_to_go[$iend] || $in_comma_list, - $outdent_long_lines, - $is_terminal_ternary, - $is_semicolon_terminated, - $do_not_pad, - $rvertical_tightness_flags, - $level_jump, - ); - $in_comma_list = - $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; - - # flush an outdented line to avoid any unwanted vertical alignment - Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); - - $do_not_pad = 0; - - } # end of loop to output each line - - # remember indentation of lines containing opening containers for - # later use by sub set_adjusted_indentation - save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); -} - -{ # begin unmatched_indexes - - # closure to keep track of unbalanced containers. - # arrays shared by the routines in this block: - my @unmatched_opening_indexes_in_this_batch; - my @unmatched_closing_indexes_in_this_batch; - my %comma_arrow_count; - - sub is_unbalanced_batch { - @unmatched_opening_indexes_in_this_batch + - @unmatched_closing_indexes_in_this_batch; - } - - sub comma_arrow_count { - my $seqno = $_[0]; - return $comma_arrow_count{$seqno}; - } - - sub match_opening_and_closing_tokens { + sub match_opening_and_closing_tokens { # Match up indexes of opening and closing braces, etc, in this batch. # This has to be done after all tokens are stored because unstoring @@ -11128,11 +12197,12 @@ sub get_opening_indentation { # first, see if the opening token is in the current batch my $i_opening = $mate_index_to_go[$i_closing]; - my ( $indent, $offset ); + my ( $indent, $offset, $is_leading, $exists ); + $exists = 1; if ( $i_opening >= 0 ) { # it is..look up the indentation - ( $indent, $offset ) = + ( $indent, $offset, $is_leading ) = lookup_opening_indentation( $i_opening, $ri_first, $ri_last, $rindentation_list ); } @@ -11142,24 +12212,29 @@ sub get_opening_indentation { my $seqno = $type_sequence_to_go[$i_closing]; if ($seqno) { if ( $saved_opening_indentation{$seqno} ) { - ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} }; + ( $indent, $offset, $is_leading ) = + @{ $saved_opening_indentation{$seqno} }; } # some kind of serious error # (example is badfile.t) else { - $indent = 0; - $offset = 0; + $indent = 0; + $offset = 0; + $is_leading = 0; + $exists = 0; } } # if no sequence number it must be an unbalanced container else { - $indent = 0; - $offset = 0; + $indent = 0; + $offset = 0; + $is_leading = 0; + $exists = 0; } } - return ( $indent, $offset ); + return ( $indent, $offset, $is_leading, $exists ); } sub lookup_opening_indentation { @@ -11206,9 +12281,10 @@ sub lookup_opening_indentation { $rindentation_list->[0] = $nline; # save line number to start looking next call - my $ibeg = $ri_start->[$nline]; - my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; - return ( $rindentation_list->[ $nline + 1 ], $offset ); + my $ibeg = $ri_start->[$nline]; + my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; + my $is_leading = ( $ibeg == $i_opening ); + return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); } { @@ -11233,7 +12309,7 @@ sub lookup_opening_indentation { # outdenting. my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, - $rindentation_list ) + $rindentation_list, $level_jump ) = @_; # we need to know the last token of this line @@ -11260,36 +12336,50 @@ sub lookup_opening_indentation { my $adjust_indentation = 0; my $default_adjust_indentation = $adjust_indentation; - my ( $opening_indentation, $opening_offset ); + my ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ); # if we are at a closing token of some type.. - if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) { + if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) { # get the indentation of the line containing the corresponding # opening token - ( $opening_indentation, $opening_offset ) = - get_opening_indentation( $ibeg, $ri_first, $ri_last, + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg, $ri_first, $ri_last, $rindentation_list ); # First set the default behavior: - # default behavior is to outdent closing lines - # of the form: "); }; ]; )->xxx;" if ( + + # default behavior is to outdent closing lines + # of the form: "); }; ]; )->xxx;" $is_semicolon_terminated # and 'cuddled parens' of the form: ")->pack(" || ( - $terminal_type eq '(' + $terminal_type eq '(' && $types_to_go[$ibeg] eq ')' && ( $nesting_depth_to_go[$iend] + 1 == $nesting_depth_to_go[$ibeg] ) ) + + # and when the next line is at a lower indentation level + # PATCH: and only if the style allows undoing continuation + # for all closing token types. We should really wait until + # the indentation of the next line is known and then make + # a decision, but that would require another pass. + || ( $level_jump < 0 && !$some_closing_token_indentation ) ) { $adjust_indentation = 1; } - # TESTING: outdent something like '),' + # outdent something like '),' if ( $terminal_type eq ',' @@ -11323,6 +12413,28 @@ sub lookup_opening_indentation { } } + # YVES patch 1 of 2: + # Undo ci of line with leading closing eval brace, + # but not beyond the indention of the line with + # the opening brace. + if ( $block_type_to_go[$ibeg] eq 'eval' + && !$rOpts->{'line-up-parentheses'} + && !$rOpts->{'indent-closing-brace'} ) + { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + my $indentation = $leading_spaces_to_go[$ibeg]; + if ( defined($opening_indentation) + && $indentation > $opening_indentation ) + { + $adjust_indentation = 1; + } + } + $default_adjust_indentation = $adjust_indentation; # Now modify default behavior according to user request: @@ -11380,6 +12492,18 @@ sub lookup_opening_indentation { } } + # if line begins with a ':', align it with any + # previous line leading with corresponding ? + elsif ( $types_to_go[$ibeg] eq ':' ) { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + if ($is_leading) { $adjust_indentation = 2; } + } + ########################################################## # Section 2: set indentation according to flag set above # @@ -11536,12 +12660,19 @@ sub lookup_opening_indentation { # we must treat something like '} else {' as if it were # an isolated brace my $is_isolated_block_brace = ( # $iend == $ibeg ) && $block_type_to_go[$ibeg]; + ############################################################# my $is_isolated_block_brace = $block_type_to_go[$ibeg] && ( $iend == $ibeg || $is_if_elsif_else_unless_while_until_for_foreach{ - $block_type_to_go[$ibeg] } ); - ############################################################# - if ( !$is_isolated_block_brace && defined($opening_indentation) ) { + $block_type_to_go[$ibeg] + } ); + + # only do this for a ':; which is aligned with its leading '?' + my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading; + if ( defined($opening_indentation) + && !$is_isolated_block_brace + && !$is_unaligned_colon ) + { if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) { $indentation = $opening_indentation; } @@ -11823,7 +12954,7 @@ sub set_vertical_tightness_flags { # Check for a last line with isolated opening BLOCK curly elsif ($rOpts_block_brace_vertical_tightness - && $ibeg eq $iend + && $ibeg eq $iend && $types_to_go[$iend] eq '{' && $block_type_to_go[$iend] =~ /$block_brace_vertical_tightness_pattern/o ) @@ -12481,6 +13612,11 @@ sub terminal_type { # adjust bond strength bias #----------------------------------------------------------------- + # add any bias set by sub scan_list at old comma break points. + elsif ( $type eq ',' ) { + $bond_str += $bond_strength_to_go[$i]; + } + elsif ( $type eq 'f' ) { $bond_str += $f_bias; $f_bias += $delta_bias; @@ -12700,6 +13836,14 @@ sub terminal_type { $bond_str = NO_BREAK; } + # Never break between a bareword and a following paren because + # perl may give an error. For example, if a break is placed + # between 'to_filehandle' and its '(' the following line will + # give a syntax error [Carp.pm]: my( $no) =fileno( + # to_filehandle( $in)) ; + if ( $next_nonblank_token eq '(' ) { + $bond_str = NO_BREAK; + } } # use strict requires that bare word within braces not start new line @@ -12837,6 +13981,34 @@ sub terminal_type { $bond_str = NO_BREAK; } + # Breaking before a ++ can cause perl to guess wrong. For + # example the following line will cause a syntax error + # with -extrude if we break between '$i' and '++' [fixstyle2] + # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); + elsif ( $next_nonblank_type eq '++' ) { + $bond_str = NO_BREAK; + } + + # Breaking before a ? before a quote can cause trouble if + # they are not separated by a blank. + # Example: a syntax error occurs if you break before the ? here + # my$logic=join$all?' && ':' || ',@regexps; + # From: Professional_Perl_Programming_Code/multifind.pl + elsif ( $next_nonblank_type eq '?' ) { + $bond_str = NO_BREAK + if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); + } + + # Breaking before a . followed by a number + # can cause trouble if there is no intervening space + # Example: a syntax error occurs if you break before the .2 here + # $str .= pack($endian.2, ensurrogate($ord)); + # From: perl58/Unicode.pm + elsif ( $next_nonblank_type eq '.' ) { + $bond_str = NO_BREAK + if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); + } + # patch to put cuddled elses back together when on multiple # lines, as in: } \n else \n { \n if ($rOpts_cuddled_else) { @@ -13001,34 +14173,108 @@ sub pad_array_to_go { my $dd = shift; my $bp_count = 0; my $do_not_break_apart = 0; - if ( $item_count_stack[$dd] && !$dont_align[$dd] ) { - - my $fbc = $forced_breakpoint_count; - - # always open comma lists not preceded by keywords, - # barewords, identifiers (that is, anything that doesn't - # look like a function call) - my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; - - set_comma_breakpoints_do( - $dd, - $opening_structure_index_stack[$dd], - $i, - $item_count_stack[$dd], - $identifier_count_stack[$dd], - $comma_index[$dd], - $next_nonblank_type, - $container_type[$dd], - $interrupted_list[$dd], - \$do_not_break_apart, - $must_break_open, - ); - $bp_count = $forced_breakpoint_count - $fbc; - $do_not_break_apart = 0 if $must_break_open; + + # anything to do? + if ( $item_count_stack[$dd] ) { + + # handle commas not in containers... + if ( $dont_align[$dd] ) { + do_uncontained_comma_breaks($dd); + } + + # handle commas within containers... + else { + my $fbc = $forced_breakpoint_count; + + # always open comma lists not preceded by keywords, + # barewords, identifiers (that is, anything that doesn't + # look like a function call) + my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; + + set_comma_breakpoints_do( + $dd, + $opening_structure_index_stack[$dd], + $i, + $item_count_stack[$dd], + $identifier_count_stack[$dd], + $comma_index[$dd], + $next_nonblank_type, + $container_type[$dd], + $interrupted_list[$dd], + \$do_not_break_apart, + $must_break_open, + ); + $bp_count = $forced_breakpoint_count - $fbc; + $do_not_break_apart = 0 if $must_break_open; + } } return ( $bp_count, $do_not_break_apart ); } + sub do_uncontained_comma_breaks { + + # Handle commas not in containers... + # This is a catch-all routine for commas that we + # don't know what to do with because the don't fall + # within containers. We will bias the bond strength + # to break at commas which ended lines in the input + # file. This usually works better than just trying + # to put as many items on a line as possible. A + # downside is that if the input file is garbage it + # won't work very well. However, the user can always + # prevent following the old breakpoints with the + # -iob flag. + my $dd = shift; + my $bias = -.01; + my $old_comma_break_count = 0; + foreach my $ii ( @{ $comma_index[$dd] } ) { + if ( $old_breakpoint_to_go[$ii] ) { + $old_comma_break_count++; + $bond_strength_to_go[$ii] = $bias; + + # reduce bias magnitude to force breaks in order + $bias *= 0.99; + } + } + + # Also put a break before the first comma if + # (1) there was a break there in the input, and + # (2) that was exactly one previous break in the input + # (3) there are multiple old comma breaks + # + # For example, we will follow the user and break after + # 'print' in this snippet: + # print + # "conformability (Not the same dimension)\n", + # "\t", $have, " is ", text_unit($hu), "\n", + # "\t", $want, " is ", text_unit($wu), "\n", + # ; + # But we will not force a break after the first comma here + # (causes a blinker): + # $heap->{stream}->set_output_filter( + # poe::filter::reference->new('myotherfreezer') ), + # ; + # + my $i_first_comma = $comma_index[$dd]->[0]; + if ( $old_breakpoint_to_go[$i_first_comma] ) { + my $level_comma = $levels_to_go[$i_first_comma]; + my $ibreak = -1; + my $obp_count = 0; + for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) { + if ( $old_breakpoint_to_go[$ii] ) { + $obp_count++; + last if ( $obp_count > 1 ); + $ibreak = $ii + if ( $levels_to_go[$ii] == $level_comma ); + } + } + if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 1 ) + { + set_forced_breakpoint($ibreak); + } + } + } + my %is_logical_container; BEGIN { @@ -13049,7 +14295,6 @@ sub pad_array_to_go { $item_count_stack[$dd] == 0 && $is_logical_container{ $container_type[$dd] } - # TESTING: || $has_old_logical_breakpoints[$dd] ) { @@ -13169,6 +14414,13 @@ sub pad_array_to_go { $want_previous_breakpoint = $i; } } + + # Break before attributes if user broke there + if ($rOpts_break_at_old_attribute_breakpoints) { + if ( $next_nonblank_type eq 'A' ) { + $want_previous_breakpoint = $i; + } + } } next if ( $type eq 'b' ); $depth = $nesting_depth_to_go[ $i + 1 ]; @@ -13282,7 +14534,7 @@ sub pad_array_to_go { if ( $type eq ':' ) { $last_colon_sequence_number = $type_sequence; - # TESTING: retain break at a ':' line break + # retain break at a ':' line break if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_ternary_breakpoints ) { @@ -13507,7 +14759,8 @@ sub pad_array_to_go { $forced_breakpoint_count ); # update broken-sublist flag of the outer container - $has_broken_sublist[$depth] = $has_broken_sublist[$depth] + $has_broken_sublist[$depth] = + $has_broken_sublist[$depth] || $has_broken_sublist[$current_depth] || $is_long_term || $has_comma_breakpoints; @@ -13869,11 +15122,8 @@ sub pad_array_to_go { next; } - # skip past these commas if we are not supposed to format them - next if ( $dont_align[$depth] ); - # break after all commas above starting depth - if ( $depth < $starting_depth ) { + if ( $depth < $starting_depth && !$dont_align[$depth] ) { set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); next; } @@ -13892,7 +15142,6 @@ sub pad_array_to_go { && $container_environment_to_go[$i] eq 'BLOCK' ) { $dont_align[$depth] = 1; - next; } } @@ -14209,7 +15458,8 @@ sub find_token_starting_list { if ( $rOpts_line_up_parentheses && !$must_break_open ) { my $columns_if_unbroken = $rOpts_maximum_line_length - total_line_length( $i_opening_minus, $i_opening_paren ); - $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken ) + $need_lp_break_open = + ( $max_length[0] > $columns_if_unbroken ) || ( $max_length[1] > $columns_if_unbroken ) || ( $first_term_length > $columns_if_unbroken ); } @@ -15078,393 +16328,587 @@ sub undo_forced_breakpoint_stack { } } -sub recombine_breakpoints { +{ # begin recombine_breakpoints - # sub set_continuation_breaks is very liberal in setting line breaks - # for long lines, always setting breaks at good breakpoints, even - # when that creates small lines. Occasionally small line fragments - # are produced which would look better if they were combined. - # That's the task of this routine, recombine_breakpoints. - my ( $ri_first, $ri_last ) = @_; - my $more_to_do = 1; + my %is_amp_amp; + my %is_ternary; + my %is_math_op; + + BEGIN { + + @_ = qw( && || ); + @is_amp_amp{@_} = (1) x scalar(@_); + + @_ = qw( ? : ); + @is_ternary{@_} = (1) x scalar(@_); + + @_ = qw( + - * / ); + @is_math_op{@_} = (1) x scalar(@_); + } + + sub recombine_breakpoints { + + # sub set_continuation_breaks is very liberal in setting line breaks + # for long lines, always setting breaks at good breakpoints, even + # when that creates small lines. Occasionally small line fragments + # are produced which would look better if they were combined. + # That's the task of this routine, recombine_breakpoints. + # + # $ri_beg = ref to array of BEGinning indexes of each line + # $ri_end = ref to array of ENDing indexes of each line + my ( $ri_beg, $ri_end ) = @_; - # We keep looping over all of the lines of this batch - # until there are no more possible recombinations - my $nmax_last = @$ri_last; - while ($more_to_do) { - my $n_best = 0; - my $bs_best; - my $n; - my $nmax = @$ri_last - 1; + my $more_to_do = 1; - # safety check for infinite loop - unless ( $nmax < $nmax_last ) { + # We keep looping over all of the lines of this batch + # until there are no more possible recombinations + my $nmax_last = @$ri_end; + while ($more_to_do) { + my $n_best = 0; + my $bs_best; + my $n; + my $nmax = @$ri_end - 1; + + # safety check for infinite loop + unless ( $nmax < $nmax_last ) { # shouldn't happen because splice below decreases nmax on each pass: # but i get paranoid sometimes - die "Program bug-infinite loop in recombine breakpoints\n"; - } - $nmax_last = $nmax; - $more_to_do = 0; - my $previous_outdentable_closing_paren; - my $leading_amp_count = 0; - my $this_line_is_semicolon_terminated; + die "Program bug-infinite loop in recombine breakpoints\n"; + } + $nmax_last = $nmax; + $more_to_do = 0; + my $previous_outdentable_closing_paren; + my $leading_amp_count = 0; + my $this_line_is_semicolon_terminated; - # loop over all remaining lines in this batch - for $n ( 1 .. $nmax ) { + # loop over all remaining lines in this batch + for $n ( 1 .. $nmax ) { - #---------------------------------------------------------- - # If we join the current pair of lines, - # line $n-1 will become the left part of the joined line - # line $n will become the right part of the joined line - # - # Here are Indexes of the endpoint tokens of the two lines: - # - # ---left---- | ---right--- - # $if $imid | $imidr $il - # - # We want to decide if we should join tokens $imid to $imidr - # - # We will apply a number of ad-hoc tests to see if joining - # here will look ok. The code will just issue a 'next' - # command if the join doesn't look good. If we get through - # the gauntlet of tests, the lines will be recombined. - #---------------------------------------------------------- - my $if = $$ri_first[ $n - 1 ]; - my $il = $$ri_last[$n]; - my $imid = $$ri_last[ $n - 1 ]; - my $imidr = $$ri_first[$n]; - - #my $depth_increase=( $nesting_depth_to_go[$imidr] - - # $nesting_depth_to_go[$if] ); - -##print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n"; - - # If line $n is the last line, we set some flags and - # do any special checks for it - if ( $n == $nmax ) { - - # a terminal '{' should stay where it is - next if $types_to_go[$imidr] eq '{'; - - # set flag if statement $n ends in ';' - $this_line_is_semicolon_terminated = $types_to_go[$il] eq ';' - - # with possible side comment - || ( $types_to_go[$il] eq '#' - && $il - $imidr >= 2 - && $types_to_go[ $il - 2 ] eq ';' - && $types_to_go[ $il - 1 ] eq 'b' ); - } - - #---------------------------------------------------------- - # Section 1: examine token at $imid (right end of first line - # of pair) - #---------------------------------------------------------- - - # an isolated '}' may join with a ';' terminated segment - if ( $types_to_go[$imid] eq '}' ) { - - # Check for cases where combining a semicolon terminated - # statement with a previous isolated closing paren will - # allow the combined line to be outdented. This is - # generally a good move. For example, we can join up - # the last two lines here: - # ( - # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, - # $size, $atime, $mtime, $ctime, $blksize, $blocks - # ) - # = stat($file); + #---------------------------------------------------------- + # If we join the current pair of lines, + # line $n-1 will become the left part of the joined line + # line $n will become the right part of the joined line # - # to get: - # ( - # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, - # $size, $atime, $mtime, $ctime, $blksize, $blocks - # ) = stat($file); + # Here are Indexes of the endpoint tokens of the two lines: # - # which makes the parens line up. + # -----line $n-1--- | -----line $n----- + # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 + # ^ + # | + # We want to decide if we should remove the line break + # betwen the tokens at $iend_1 and $ibeg_2 # - # Another example, from Joe Matarazzo, probably looks best - # with the 'or' clause appended to the trailing paren: - # $self->some_method( - # PARAM1 => 'foo', - # PARAM2 => 'bar' - # ) or die "Some_method didn't work"; + # We will apply a number of ad-hoc tests to see if joining + # here will look ok. The code will just issue a 'next' + # command if the join doesn't look good. If we get through + # the gauntlet of tests, the lines will be recombined. + #---------------------------------------------------------- # - $previous_outdentable_closing_paren = - $this_line_is_semicolon_terminated # ends in ';' - && $if == $imid # only one token on last line - && $tokens_to_go[$imid] eq ')' # must be structural paren - - # only &&, ||, and : if no others seen - # (but note: our count made below could be wrong - # due to intervening comments) - && ( $leading_amp_count == 0 - || $types_to_go[$imidr] !~ /^(:|\&\&|\|\|)$/ ) - - # but leading colons probably line up with with a - # previous colon or question (count could be wrong). - && $types_to_go[$imidr] ne ':' - - # only one step in depth allowed. this line must not - # begin with a ')' itself. - && ( $nesting_depth_to_go[$imid] == - $nesting_depth_to_go[$il] + 1 ); + # beginning and ending tokens of the lines we are working on + my $ibeg_1 = $$ri_beg[ $n - 1 ]; + my $iend_1 = $$ri_end[ $n - 1 ]; + my $iend_2 = $$ri_end[$n]; + my $ibeg_2 = $$ri_beg[$n]; + + my $ibeg_nmax = $$ri_beg[$nmax]; + + # some beginning indexes of other lines, which may not exist + my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1; + my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1; + my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1; + + my $bs_tweak = 0; + + #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - + # $nesting_depth_to_go[$ibeg_1] ); + +##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n"; + + # If line $n is the last line, we set some flags and + # do any special checks for it + if ( $n == $nmax ) { + + # a terminal '{' should stay where it is + next if $types_to_go[$ibeg_2] eq '{'; + + # set flag if statement $n ends in ';' + $this_line_is_semicolon_terminated = + $types_to_go[$iend_2] eq ';' + + # with possible side comment + || ( $types_to_go[$iend_2] eq '#' + && $iend_2 - $ibeg_2 >= 2 + && $types_to_go[ $iend_2 - 2 ] eq ';' + && $types_to_go[ $iend_2 - 1 ] eq 'b' ); + } + + #---------------------------------------------------------- + # Section 1: examine token at $iend_1 (right end of first line + # of pair) + #---------------------------------------------------------- + + # an isolated '}' may join with a ';' terminated segment + if ( $types_to_go[$iend_1] eq '}' ) { + + # Check for cases where combining a semicolon terminated + # statement with a previous isolated closing paren will + # allow the combined line to be outdented. This is + # generally a good move. For example, we can join up + # the last two lines here: + # ( + # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, + # $size, $atime, $mtime, $ctime, $blksize, $blocks + # ) + # = stat($file); + # + # to get: + # ( + # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, + # $size, $atime, $mtime, $ctime, $blksize, $blocks + # ) = stat($file); + # + # which makes the parens line up. + # + # Another example, from Joe Matarazzo, probably looks best + # with the 'or' clause appended to the trailing paren: + # $self->some_method( + # PARAM1 => 'foo', + # PARAM2 => 'bar' + # ) or die "Some_method didn't work"; + # + $previous_outdentable_closing_paren = + $this_line_is_semicolon_terminated # ends in ';' + && $ibeg_1 == $iend_1 # only one token on last line + && $tokens_to_go[$iend_1] eq + ')' # must be structural paren + + # only &&, ||, and : if no others seen + # (but note: our count made below could be wrong + # due to intervening comments) + && ( $leading_amp_count == 0 + || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ ) + + # but leading colons probably line up with with a + # previous colon or question (count could be wrong). + && $types_to_go[$ibeg_2] ne ':' + + # only one step in depth allowed. this line must not + # begin with a ')' itself. + && ( $nesting_depth_to_go[$iend_1] == + $nesting_depth_to_go[$iend_2] + 1 ); + + # YVES patch 2 of 2: + # Allow cuddled eval chains, like this: + # eval { + # #STUFF; + # 1; # return true + # } or do { + # #handle error + # }; + # This patch works together with a patch in + # setting adjusted indentation (where the closing eval + # brace is outdented if possible). + # The problem is that an 'eval' block has continuation + # indentation and it looks better to undo it in some + # cases. If we do not use this patch we would get: + # eval { + # #STUFF; + # 1; # return true + # } + # or do { + # #handle error + # }; + # The alternative, for uncuddled style, is to create + # a patch in set_adjusted_indentation which undoes + # the indentation of a leading line like 'or do {'. + # This doesn't work well with -icb through + if ( + $block_type_to_go[$iend_1] eq 'eval' + && !$rOpts->{'line-up-parentheses'} + && !$rOpts->{'indent-closing-brace'} + && $tokens_to_go[$iend_2] eq '{' + && ( + ( $types_to_go[$ibeg_2] =~ /^(|\&\&|\|\|)$/ ) + || ( $types_to_go[$ibeg_2] eq 'k' + && $is_and_or{ $tokens_to_go[$ibeg_2] } ) + || $is_if_unless{ $tokens_to_go[$ibeg_2] } + ) + ) + { + $previous_outdentable_closing_paren ||= 1; + } - next - unless ( - $previous_outdentable_closing_paren + next + unless ( + $previous_outdentable_closing_paren - # handle '.' and '?' specially below - || ( $types_to_go[$imidr] =~ /^[\.\?]$/ ) - ); - } + # handle '.' and '?' specially below + || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ ) + ); + } - # do not recombine lines with ending &&, ||, or : - elsif ( $types_to_go[$imid] =~ /^(|:|\&\&|\|\|)$/ ) { - next unless $want_break_before{ $types_to_go[$imid] }; - } + # YVES + # honor breaks at opening brace + # Added to prevent recombining something like this: + # } || eval { package main; + elsif ( $types_to_go[$iend_1] eq '{' ) { + next if $forced_breakpoint_to_go[$iend_1]; + } + + # do not recombine lines with ending &&, ||, + elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) { + next unless $want_break_before{ $types_to_go[$iend_1] }; + } + + # keep a terminal colon + elsif ( $types_to_go[$iend_1] eq ':' ) { + next unless $want_break_before{ $types_to_go[$iend_1] }; + } - # for lines ending in a comma... - elsif ( $types_to_go[$imid] eq ',' ) { + # Identify and recombine a broken ?/: chain + elsif ( $types_to_go[$iend_1] eq '?' ) { - # an isolated '},' may join with an identifier + ';' - # this is useful for the class of a 'bless' statement (bless.t) - if ( $types_to_go[$if] eq '}' - && $types_to_go[$imidr] eq 'i' ) - { + # Do not recombine different levels next - unless ( ( $if == ( $imid - 1 ) ) - && ( $il == ( $imidr + 1 ) ) - && $this_line_is_semicolon_terminated ); + if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); - # override breakpoint - $forced_breakpoint_to_go[$imid] = 0; + # do not recombine unless next line ends in : + next unless $types_to_go[$iend_2] eq ':'; } - # but otherwise, do not recombine unless this will leave - # just 1 more line - else { - next unless ( $n + 1 >= $nmax ); - } - } + # for lines ending in a comma... + elsif ( $types_to_go[$iend_1] eq ',' ) { - # opening paren.. - elsif ( $types_to_go[$imid] eq '(' ) { + # Do not recombine at comma which is following the + # input bias. + # TODO: might be best to make a special flag + next if ( $old_breakpoint_to_go[$iend_1] ); - # No longer doing this - } + # an isolated '},' may join with an identifier + ';' + # this is useful for the class of a 'bless' statement (bless.t) + if ( $types_to_go[$ibeg_1] eq '}' + && $types_to_go[$ibeg_2] eq 'i' ) + { + next + unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) + && ( $iend_2 == ( $ibeg_2 + 1 ) ) + && $this_line_is_semicolon_terminated ); - elsif ( $types_to_go[$imid] eq ')' ) { + # override breakpoint + $forced_breakpoint_to_go[$iend_1] = 0; + } - # No longer doing this - } + # but otherwise .. + else { - # keep a terminal colon - elsif ( $types_to_go[$imid] eq ':' ) { - next; - } + # do not recombine after a comma unless this will leave + # just 1 more line + next unless ( $n + 1 >= $nmax ); - # keep a terminal for-semicolon - elsif ( $types_to_go[$imid] eq 'f' ) { - next; - } + # do not recombine if there is a change in indentation depth + next + if ( + $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); + + # do not recombine a "complex expression" after a + # comma. "complex" means no parens. + my $saw_paren; + foreach my $ii ( $ibeg_2 .. $iend_2 ) { + if ( $tokens_to_go[$ii] eq '(' ) { + $saw_paren = 1; + last; + } + } + next if $saw_paren; + } + } - # if '=' at end of line ... - elsif ( $is_assignment{ $types_to_go[$imid] } ) { - - my $is_short_quote = - ( $types_to_go[$imidr] eq 'Q' - && $imidr == $il - && length( $tokens_to_go[$imidr] ) < - $rOpts_short_concatenation_item_length ); - my $ifnmax = $$ri_first[$nmax]; - my $ifnp = ( $nmax > $n ) ? $$ri_first[ $n + 1 ] : $ifnmax; - my $is_qk = - ( $types_to_go[$if] eq '?' && $types_to_go[$ifnp] eq ':' ); - - # always join an isolated '=', a short quote, or if this - # will put ?/: at start of adjacent lines - if ( $if != $imid - && !$is_short_quote - && !$is_qk ) - { - next - unless ( - ( + # opening paren.. + elsif ( $types_to_go[$iend_1] eq '(' ) { - # unless we can reduce this to two lines - $nmax < $n + 2 + # No longer doing this + } - # or three lines, the last with a leading semicolon - || ( $nmax == $n + 2 - && $types_to_go[$ifnmax] eq ';' ) + elsif ( $types_to_go[$iend_1] eq ')' ) { - # or the next line ends with a here doc - || $types_to_go[$il] eq 'h' - ) + # No longer doing this + } - # do not recombine if the two lines might align well - # this is a very approximate test for this - && $types_to_go[$imidr] ne $types_to_go[$ifnp] - ); + # keep a terminal for-semicolon + elsif ( $types_to_go[$iend_1] eq 'f' ) { + next; + } + + # if '=' at end of line ... + elsif ( $is_assignment{ $types_to_go[$iend_1] } ) { + + # keep break after = if it was in input stream + # this helps prevent 'blinkers' + next if $old_breakpoint_to_go[$iend_1] + + # don't strand an isolated '=' + && $iend_1 != $ibeg_1; - # -lp users often prefer this: - # my $title = function($env, $env, $sysarea, - # "bubba Borrower Entry"); - # so we will recombine if -lp is used we have ending comma - if ( !$rOpts_line_up_parentheses - || $types_to_go[$il] ne ',' ) + my $is_short_quote = + ( $types_to_go[$ibeg_2] eq 'Q' + && $ibeg_2 == $iend_2 + && length( $tokens_to_go[$ibeg_2] ) < + $rOpts_short_concatenation_item_length ); + my $is_ternary = + ( $types_to_go[$ibeg_1] eq '?' + && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); + + # always join an isolated '=', a short quote, or if this + # will put ?/: at start of adjacent lines + if ( $ibeg_1 != $iend_1 + && !$is_short_quote + && !$is_ternary ) { + next + unless ( + ( - # otherwise, scan the rhs line up to last token for - # complexity. Note that we are not counting the last - # token in case it is an opening paren. - my $tv = 0; - my $depth = $nesting_depth_to_go[$imidr]; - for ( my $i = $imidr + 1 ; $i < $il ; $i++ ) { - if ( $nesting_depth_to_go[$i] != $depth ) { - $tv++; - last if ( $tv > 1 ); - } - $depth = $nesting_depth_to_go[$i]; - } + # unless we can reduce this to two lines + $nmax < $n + 2 - # ok to recombine if no level changes before last token - if ( $tv > 0 ) { + # or three lines, the last with a leading semicolon + || ( $nmax == $n + 2 + && $types_to_go[$ibeg_nmax] eq ';' ) - # otherwise, do not recombine if more than two - # level changes. - next if ( $tv > 1 ); + # or the next line ends with a here doc + || $types_to_go[$iend_2] eq 'h' - # check total complexity of the two adjacent lines - # that will occur if we do this join - my $istop = - ( $n < $nmax ) ? $$ri_last[ $n + 1 ] : $il; - for ( my $i = $il ; $i <= $istop ; $i++ ) { + # or the next line ends in an open paren or brace + # and the break hasn't been forced [dima.t] + || ( !$forced_breakpoint_to_go[$iend_1] + && $types_to_go[$iend_2] eq '{' ) + ) + + # do not recombine if the two lines might align well + # this is a very approximate test for this + && ( $ibeg_3 >= 0 + && $types_to_go[$ibeg_2] ne + $types_to_go[$ibeg_3] ) + ); + + # -lp users often prefer this: + # my $title = function($env, $env, $sysarea, + # "bubba Borrower Entry"); + # so we will recombine if -lp is used we have ending + # comma + if ( !$rOpts_line_up_parentheses + || $types_to_go[$iend_2] ne ',' ) + { + + # otherwise, scan the rhs line up to last token for + # complexity. Note that we are not counting the last + # token in case it is an opening paren. + my $tv = 0; + my $depth = $nesting_depth_to_go[$ibeg_2]; + for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) { if ( $nesting_depth_to_go[$i] != $depth ) { $tv++; - last if ( $tv > 2 ); + last if ( $tv > 1 ); } $depth = $nesting_depth_to_go[$i]; } + # ok to recombine if no level changes before last token + if ( $tv > 0 ) { + + # otherwise, do not recombine if more than two + # level changes. + next if ( $tv > 1 ); + + # check total complexity of the two adjacent lines + # that will occur if we do this join + my $istop = + ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2; + for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) { + if ( $nesting_depth_to_go[$i] != $depth ) { + $tv++; + last if ( $tv > 2 ); + } + $depth = $nesting_depth_to_go[$i]; + } + # do not recombine if total is more than 2 level changes - next if ( $tv > 2 ); + next if ( $tv > 2 ); + } } } - } - unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) { - $forced_breakpoint_to_go[$imid] = 0; + unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { + $forced_breakpoint_to_go[$iend_1] = 0; + } } - } - # for keywords.. - elsif ( $types_to_go[$imid] eq 'k' ) { + # for keywords.. + elsif ( $types_to_go[$iend_1] eq 'k' ) { - # make major control keywords stand out - # (recombine.t) - next - if ( + # make major control keywords stand out + # (recombine.t) + next + if ( - #/^(last|next|redo|return)$/ - $is_last_next_redo_return{ $tokens_to_go[$imid] } + #/^(last|next|redo|return)$/ + $is_last_next_redo_return{ $tokens_to_go[$iend_1] } - # but only if followed by multiple lines - && $n < $nmax - ); + # but only if followed by multiple lines + && $n < $nmax + ); - if ( $is_and_or{ $tokens_to_go[$imid] } ) { - next unless $want_break_before{ $tokens_to_go[$imid] }; + if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { + next + unless $want_break_before{ $tokens_to_go[$iend_1] }; + } } - } - # handle trailing + - * / - elsif ( $types_to_go[$imid] =~ /^[\+\-\*\/]$/ ) { - my $i_next_nonblank = $imidr; - my $i_next_next = $i_next_nonblank + 1; - $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); + # handle trailing + - * / + elsif ( $is_math_op{ $types_to_go[$iend_1] } ) { - # do not strand numbers - next - unless ( - $types_to_go[$i_next_nonblank] eq 'n' - && ( - $i_next_nonblank == $il - || ( $i_next_next == $il - && $types_to_go[$i_next_next] =~ /^[\+\-\*\/]$/ ) + # combine lines if next line has single number + # or a short term followed by same operator + my $i_next_nonblank = $ibeg_2; + my $i_next_next = $i_next_nonblank + 1; + $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); + my $number_follows = $types_to_go[$i_next_nonblank] eq 'n' + && ( + $i_next_nonblank == $iend_2 + || ( $i_next_next == $iend_2 + && $is_math_op{ $types_to_go[$i_next_next] } ) || $types_to_go[$i_next_next] eq ';' - ) - ); - } + ); - #---------------------------------------------------------- - # Section 2: Now examine token at $imidr (left end of second - # line of pair) - #---------------------------------------------------------- - - # join lines identified above as capable of - # causing an outdented line with leading closing paren - if ($previous_outdentable_closing_paren) { - $forced_breakpoint_to_go[$imid] = 0; - } - - # do not recombine lines with leading &&, ||, or : - elsif ( $types_to_go[$imidr] =~ /^(:|\&\&|\|\|)$/ ) { - $leading_amp_count++; - next if $want_break_before{ $types_to_go[$imidr] }; - } - - # Identify and recombine a broken ?/: chain - elsif ( $types_to_go[$imidr] eq '?' ) { - - # indexes of line first tokens -- - # mm - line before previous line - # f - previous line - # <-- this line - # ff - next line - # fff - line after next - my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1; - my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1; - my $imm = $n > 1 ? $$ri_first[ $n - 2 ] : -1; - my $seqno = $type_sequence_to_go[$imidr]; - my $f_ok = - ( $types_to_go[$if] eq ':' - && $type_sequence_to_go[$if] == - $seqno - TYPE_SEQUENCE_INCREMENT ); - my $mm_ok = - ( $imm >= 0 - && $types_to_go[$imm] eq ':' - && $type_sequence_to_go[$imm] == - $seqno - 2 * TYPE_SEQUENCE_INCREMENT ); - - my $ff_ok = - ( $iff > 0 - && $types_to_go[$iff] eq ':' - && $type_sequence_to_go[$iff] == $seqno ); - my $fff_ok = - ( $ifff > 0 - && $types_to_go[$ifff] eq ':' - && $type_sequence_to_go[$ifff] == - $seqno + TYPE_SEQUENCE_INCREMENT ); - - # we require that this '?' be part of a correct sequence - # of 3 in a row or else no recombination is done. - next - unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) ); - $forced_breakpoint_to_go[$imid] = 0; - } + # find token before last operator of previous line + my $iend_1_minus = $iend_1; + $iend_1_minus-- + if ( $iend_1_minus > $ibeg_1 ); + $iend_1_minus-- + if ( $types_to_go[$iend_1_minus] eq 'b' + && $iend_1_minus > $ibeg_1 ); + + my $short_term_follows = + ( $types_to_go[$iend_2] eq $types_to_go[$iend_1] + && $types_to_go[$iend_1_minus] =~ /^[in]$/ + && $iend_2 <= $ibeg_2 + 2 + && length( $tokens_to_go[$ibeg_2] ) < + $rOpts_short_concatenation_item_length ); - # do not recombine lines with leading '.' - elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) { - my $i_next_nonblank = $imidr + 1; - if ( $types_to_go[$i_next_nonblank] eq 'b' ) { - $i_next_nonblank++; + next + unless ( $number_follows || $short_term_follows ); } - next - unless ( + #---------------------------------------------------------- + # Section 2: Now examine token at $ibeg_2 (left end of second + # line of pair) + #---------------------------------------------------------- + + # join lines identified above as capable of + # causing an outdented line with leading closing paren + if ($previous_outdentable_closing_paren) { + $forced_breakpoint_to_go[$iend_1] = 0; + } + + # do not recombine lines with leading : + elsif ( $types_to_go[$ibeg_2] eq ':' ) { + $leading_amp_count++; + next if $want_break_before{ $types_to_go[$ibeg_2] }; + } + + # handle lines with leading &&, || + elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { + + $leading_amp_count++; + + # ok to recombine if it follows a ? or : + # and is followed by an open paren.. + my $ok = + ( $is_ternary{ $types_to_go[$ibeg_1] } + && $tokens_to_go[$iend_2] eq '(' ) + + # or is followed by a ? or : at same depth + # + # We are looking for something like this. We can + # recombine the && line with the line above to make the + # structure more clear: + # return + # exists $G->{Attr}->{V} + # && exists $G->{Attr}->{V}->{$u} + # ? %{ $G->{Attr}->{V}->{$u} } + # : (); + # + # We should probably leave something like this alone: + # return + # exists $G->{Attr}->{E} + # && exists $G->{Attr}->{E}->{$u} + # && exists $G->{Attr}->{E}->{$u}->{$v} + # ? %{ $G->{Attr}->{E}->{$u}->{$v} } + # : (); + # so that we either have all of the &&'s (or ||'s) + # on one line, as in the first example, or break at + # each one as in the second example. However, it + # sometimes makes things worse to check for this because + # it prevents multiple recombinations. So this is not done. + || ( $ibeg_3 >= 0 + && $is_ternary{ $types_to_go[$ibeg_3] } + && $nesting_depth_to_go[$ibeg_3] == + $nesting_depth_to_go[$ibeg_2] ); + + next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] }; + $forced_breakpoint_to_go[$iend_1] = 0; + + # tweak the bond strength to give this joint priority + # over ? and : + $bs_tweak = 0.25; + } + + # Identify and recombine a broken ?/: chain + elsif ( $types_to_go[$ibeg_2] eq '?' ) { + + # Do not recombine different levels + my $lev = $levels_to_go[$ibeg_2]; + next if ( $lev ne $levels_to_go[$ibeg_1] ); + + # Do not recombine a '?' if either next line or + # previous line does not start with a ':'. The reasons + # are that (1) no alignment of the ? will be possible + # and (2) the expression is somewhat complex, so the + # '?' is harder to see in the interior of the line. + my $follows_colon = + $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':'; + my $precedes_colon = + $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; + next unless ( $follows_colon || $precedes_colon ); + + # we will always combining a ? line following a : line + if ( !$follows_colon ) { + + # ...otherwise recombine only if it looks like a chain. + # we will just look at a few nearby lines to see if + # this looks like a chain. + my $local_count = 0; + foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { + $local_count++ + if $ii >= 0 + && $types_to_go[$ii] eq ':' + && $levels_to_go[$ii] == $lev; + } + next unless ( $local_count > 1 ); + } + $forced_breakpoint_to_go[$iend_1] = 0; + } + + # do not recombine lines with leading '.' + elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) { + my $i_next_nonblank = $ibeg_2 + 1; + if ( $types_to_go[$i_next_nonblank] eq 'b' ) { + $i_next_nonblank++; + } + + next + unless ( # ... unless there is just one and we can reduce # this to two lines if we do. For example, this @@ -15477,220 +16921,276 @@ sub recombine_breakpoints { # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' # . '$args .= $pat;' - ( - $n == 2 - && $n == $nmax - && $types_to_go[$if] ne $types_to_go[$imidr] - ) - - # ... or this would strand a short quote , like this - # . "some long qoute" - # . "\n"; + ( + $n == 2 + && $n == $nmax + && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] + ) - || ( $types_to_go[$i_next_nonblank] eq 'Q' - && $i_next_nonblank >= $il - 1 - && length( $tokens_to_go[$i_next_nonblank] ) < - $rOpts_short_concatenation_item_length ) - ); - } + # ... or this would strand a short quote , like this + # . "some long qoute" + # . "\n"; + || ( $types_to_go[$i_next_nonblank] eq 'Q' + && $i_next_nonblank >= $iend_2 - 1 + && length( $tokens_to_go[$i_next_nonblank] ) < + $rOpts_short_concatenation_item_length ) + ); + } - # handle leading keyword.. - elsif ( $types_to_go[$imidr] eq 'k' ) { + # handle leading keyword.. + elsif ( $types_to_go[$ibeg_2] eq 'k' ) { - # handle leading "and" and "or" - if ( $is_and_or{ $tokens_to_go[$imidr] } ) { + # handle leading "or" + if ( $tokens_to_go[$ibeg_2] eq 'or' ) { + next + unless ( + $this_line_is_semicolon_terminated + && ( + + # following 'if' or 'unless' or 'or' + $types_to_go[$ibeg_1] eq 'k' + && $is_if_unless{ $tokens_to_go[$ibeg_1] } + + # important: only combine a very simple or + # statement because the step below may have + # combined a trailing 'and' with this or, + # and we do not want to then combine + # everything together + && ( $iend_2 - $ibeg_2 <= 7 ) + ) + ); + } - # Decide if we will combine a single terminal 'and' and - # 'or' after an 'if' or 'unless'. We should consider the - # possible vertical alignment, and visual clutter. + # handle leading 'and' + elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) { - # This looks best with the 'and' on the same - # line as the 'if': - # - # $a = 1 - # if $seconds and $nu < 2; - # - # But this looks better as shown: - # - # $a = 1 - # if !$this->{Parents}{$_} - # or $this->{Parents}{$_} eq $_; - # - # Eventually, it would be nice to look for - # similarities (such as 'this' or 'Parents'), but - # for now I'm using a simple rule that says that - # the resulting line length must not be more than - # half the maximum line length (making it 80/2 = - # 40 characters by default). - next - unless ( - $this_line_is_semicolon_terminated - && ( + # Decide if we will combine a single terminal 'and' + # after an 'if' or 'unless'. - # following 'if' or 'unless' - $types_to_go[$if] eq 'k' - && $is_if_unless{ $tokens_to_go[$if] } + # This looks best with the 'and' on the same + # line as the 'if': + # + # $a = 1 + # if $seconds and $nu < 2; + # + # But this looks better as shown: + # + # $a = 1 + # if !$this->{Parents}{$_} + # or $this->{Parents}{$_} eq $_; + # + next + unless ( + $this_line_is_semicolon_terminated + && ( - ) - ); - } + # following 'if' or 'unless' or 'or' + $types_to_go[$ibeg_1] eq 'k' + && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } + || $tokens_to_go[$ibeg_1] eq 'or' ) + ) + ); + } - # handle leading "if" and "unless" - elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) { + # handle leading "if" and "unless" + elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { - # FIXME: This is still experimental..may not be too useful - next - unless ( - $this_line_is_semicolon_terminated + # FIXME: This is still experimental..may not be too useful + next + unless ( + $this_line_is_semicolon_terminated - # previous line begins with 'and' or 'or' - && $types_to_go[$if] eq 'k' - && $is_and_or{ $tokens_to_go[$if] } + # previous line begins with 'and' or 'or' + && $types_to_go[$ibeg_1] eq 'k' + && $is_and_or{ $tokens_to_go[$ibeg_1] } - ); - } + ); + } - # handle all other leading keywords - else { + # handle all other leading keywords + else { - # keywords look best at start of lines, - # but combine things like "1 while" - unless ( $is_assignment{ $types_to_go[$imid] } ) { - next - if ( ( $types_to_go[$imid] ne 'k' ) - && ( $tokens_to_go[$imidr] ne 'while' ) ); + # keywords look best at start of lines, + # but combine things like "1 while" + unless ( $is_assignment{ $types_to_go[$iend_1] } ) { + next + if ( ( $types_to_go[$iend_1] ne 'k' ) + && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); + } } } - } - # similar treatment of && and || as above for 'and' and 'or': - # NOTE: This block of code is currently bypassed because - # of a previous block but is retained for possible future use. - elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) { + # similar treatment of && and || as above for 'and' and 'or': + # NOTE: This block of code is currently bypassed because + # of a previous block but is retained for possible future use. + elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { - # maybe looking at something like: - # unless $TEXTONLY || $item =~ m%|p>|a|img)%i; + # maybe looking at something like: + # unless $TEXTONLY || $item =~ m%|p>|a|img)%i; - next - unless ( - $this_line_is_semicolon_terminated - - # previous line begins with an 'if' or 'unless' keyword - && $types_to_go[$if] eq 'k' - && $is_if_unless{ $tokens_to_go[$if] } + next + unless ( + $this_line_is_semicolon_terminated - ); - } + # previous line begins with an 'if' or 'unless' keyword + && $types_to_go[$ibeg_1] eq 'k' + && $is_if_unless{ $tokens_to_go[$ibeg_1] } - # handle leading + - * / - elsif ( $types_to_go[$imidr] =~ /^[\+\-\*\/]$/ ) { - my $i_next_nonblank = $imidr + 1; - if ( $types_to_go[$i_next_nonblank] eq 'b' ) { - $i_next_nonblank++; + ); } - my $i_next_next = $i_next_nonblank + 1; - $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); + # handle leading + - * / + elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) { + my $i_next_nonblank = $ibeg_2 + 1; + if ( $types_to_go[$i_next_nonblank] eq 'b' ) { + $i_next_nonblank++; + } - next - unless ( + my $i_next_next = $i_next_nonblank + 1; + $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); - # unless there is just one and we can reduce - # this to two lines if we do. For example, this - ( - $n == 2 - && $n == $nmax - && $types_to_go[$if] ne $types_to_go[$imidr] - ) - - # do not strand numbers - || ( + my $is_number = ( $types_to_go[$i_next_nonblank] eq 'n' - && ( $i_next_nonblank >= $il - 1 + && ( $i_next_nonblank >= $iend_2 - 1 || $types_to_go[$i_next_next] eq ';' ) - ) - ); - } + ); - # handle line with leading = or similar - elsif ( $is_assignment{ $types_to_go[$imidr] } ) { - next unless $n == 1; - my $ifnmax = $$ri_first[$nmax]; - next - unless ( + my $iend_1_nonblank = + $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1; + my $iend_2_nonblank = + $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2; + + my $is_short_term = + ( $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1] + && $types_to_go[$iend_2_nonblank] =~ /^[in]$/ + && $types_to_go[$iend_1_nonblank] =~ /^[in]$/ + && $iend_2_nonblank <= $ibeg_2 + 2 + && length( $tokens_to_go[$iend_2_nonblank] ) < + $rOpts_short_concatenation_item_length ); + + # Combine these lines if this line is a single + # number, or if it is a short term with same + # operator as the previous line. For example, in + # the following code we will combine all of the + # short terms $A, $B, $C, $D, $E, $F, together + # instead of leaving them one per line: + # my $time = + # $A * $B * $C * $D * $E * $F * + # ( 2. * $eps * $sigma * $area ) * + # ( 1. / $tcold**3 - 1. / $thot**3 ); + # This can be important in math-intensive code. + next + unless ( + $is_number + || $is_short_term - # unless we can reduce this to two lines - $nmax == 2 + # or if we can reduce this to two lines if we do. + || ( $n == 2 + && $n == $nmax + && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] ) + ); + } - # or three lines, the last with a leading semicolon - || ( $nmax == 3 && $types_to_go[$ifnmax] eq ';' ) + # handle line with leading = or similar + elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) { + next unless ( $n == 1 || $n == $nmax ); + next + unless ( - # or the next line ends with a here doc - || $types_to_go[$il] eq 'h' - ); - } + # unless we can reduce this to two lines + $nmax == 2 - #---------------------------------------------------------- - # Section 3: - # Combine the lines if we arrive here and it is possible - #---------------------------------------------------------- + # or three lines, the last with a leading semicolon + || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) - # honor hard breakpoints - next if ( $forced_breakpoint_to_go[$imid] > 0 ); + # or the next line ends with a here doc + || $types_to_go[$iend_2] eq 'h' - my $bs = $bond_strength_to_go[$imid]; + # or this is a short line ending in ; + || ( $n == $nmax && $this_line_is_semicolon_terminated ) + ); + $forced_breakpoint_to_go[$iend_1] = 0; + } - # combined line cannot be too long - next - if excess_line_length( $if, $il ) > 0; + #---------------------------------------------------------- + # Section 3: + # Combine the lines if we arrive here and it is possible + #---------------------------------------------------------- - # do not recombine if we would skip in indentation levels - if ( $n < $nmax ) { - my $if_next = $$ri_first[ $n + 1 ]; - next - if ( - $levels_to_go[$if] < $levels_to_go[$imidr] - && $levels_to_go[$imidr] < $levels_to_go[$if_next] + # honor hard breakpoints + next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); - # but an isolated 'if (' is undesirable - && !( - $n == 1 - && $imid - $if <= 2 - && $types_to_go[$if] eq 'k' - && $tokens_to_go[$if] eq 'if' - && $tokens_to_go[$imid] ne '(' - ) - ); - } + my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak; - # honor no-break's - next if ( $bs == NO_BREAK ); + # combined line cannot be too long + my $excess = excess_line_length( $ibeg_1, $iend_2 ); + next if ( $excess > 0 ); - # remember the pair with the greatest bond strength - if ( !$n_best ) { - $n_best = $n; - $bs_best = $bs; - } - else { + # Require a few extra spaces before recombining lines if we are + # at an old breakpoint unless this is a simple list or terminal + # line. The goal is to avoid oscillating between two + # quasi-stable end states. For example this snippet caused + # problems: +## my $this = +## bless { +## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" +## }, +## $type; + next + if ( $old_breakpoint_to_go[$iend_1] + && !$this_line_is_semicolon_terminated + && $n < $nmax + && $excess + 4 > 0 + && $types_to_go[$iend_2] ne ',' ); + + # do not recombine if we would skip in indentation levels + if ( $n < $nmax ) { + my $if_next = $$ri_beg[ $n + 1 ]; + next + if ( + $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] + && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] + + # but an isolated 'if (' is undesirable + && !( + $n == 1 + && $iend_1 - $ibeg_1 <= 2 + && $types_to_go[$ibeg_1] eq 'k' + && $tokens_to_go[$ibeg_1] eq 'if' + && $tokens_to_go[$iend_1] ne '(' + ) + ); + } + + # honor no-break's + next if ( $bs == NO_BREAK ); - if ( $bs > $bs_best ) { + # remember the pair with the greatest bond strength + if ( !$n_best ) { $n_best = $n; $bs_best = $bs; } + else { - # we have 2 or more candidates, so need another pass - $more_to_do++; + if ( $bs > $bs_best ) { + $n_best = $n; + $bs_best = $bs; + } + } } - } - # recombine the pair with the greatest bond strength - if ($n_best) { - splice @$ri_first, $n_best, 1; - splice @$ri_last, $n_best - 1, 1; + # recombine the pair with the greatest bond strength + if ($n_best) { + splice @$ri_beg, $n_best, 1; + splice @$ri_end, $n_best - 1, 1; + + # keep going if we are still making progress + $more_to_do++; + } } + return ( $ri_beg, $ri_end ); } - return ( $ri_first, $ri_last ); -} +} # end recombine_breakpoints sub break_all_chain_tokens { @@ -15699,9 +17199,6 @@ sub break_all_chain_tokens { # statement. If we see a break at any one, break at all similar tokens # within the same container. # - # TODO: - # does not handle nested ?: operators correctly - # coordinate better with ?: logic in set_continuation_breaks my ( $ri_left, $ri_right ) = @_; my %saw_chain_type; @@ -15737,65 +17234,252 @@ sub break_all_chain_tokens { $count++; } } - return unless $count; + return unless $count; + + # now look for any interior tokens of the same types + $count = 0; + for my $n ( 0 .. $nmax ) { + my $il = $$ri_left[$n]; + my $ir = $$ri_right[$n]; + for ( my $i = $il + 1 ; $i < $ir ; $i++ ) { + my $type = $types_to_go[$i]; + $type = '+' if ( $type eq '-' ); + $type = '*' if ( $type eq '/' ); + if ( $saw_chain_type{$type} ) { + push @{ $interior_chain_type{$type} }, $i; + $count++; + } + } + } + return unless $count; + + # now make a list of all new break points + my @insert_list; + + # loop over all chain types + foreach my $type ( keys %saw_chain_type ) { + + # quit if just ONE continuation line with leading . For example-- + # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' + # . $contents; + last if ( $nmax == 1 && $type =~ /^[\.\+]$/ ); + + # loop over all interior chain tokens + foreach my $itest ( @{ $interior_chain_type{$type} } ) { + + # loop over all left end tokens of same type + if ( $left_chain_type{$type} ) { + next if $nobreak_to_go[ $itest - 1 ]; + foreach my $i ( @{ $left_chain_type{$type} } ) { + next unless in_same_container( $i, $itest ); + push @insert_list, $itest - 1; + + # Break at matching ? if this : is at a different level. + # For example, the ? before $THRf_DEAD in the following + # should get a break if its : gets a break. + # + # my $flags = + # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE + # : ( $_ & 4 ) ? $THRf_R_DETACHED + # : $THRf_R_JOINABLE; + if ( $type eq ':' + && $levels_to_go[$i] != $levels_to_go[$itest] ) + { + my $i_question = $mate_index_to_go[$itest]; + if ( $i_question > 0 ) { + push @insert_list, $i_question - 1; + } + } + last; + } + } + + # loop over all right end tokens of same type + if ( $right_chain_type{$type} ) { + next if $nobreak_to_go[$itest]; + foreach my $i ( @{ $right_chain_type{$type} } ) { + next unless in_same_container( $i, $itest ); + push @insert_list, $itest; + + # break at matching ? if this : is at a different level + if ( $type eq ':' + && $levels_to_go[$i] != $levels_to_go[$itest] ) + { + my $i_question = $mate_index_to_go[$itest]; + if ( $i_question >= 0 ) { + push @insert_list, $i_question; + } + } + last; + } + } + } + } + + # insert any new break points + if (@insert_list) { + insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); + } +} + +sub break_equals { + + # Look for assignment operators that could use a breakpoint. + # For example, in the following snippet + # + # $HOME = $ENV{HOME} + # || $ENV{LOGDIR} + # || $pw[7] + # || die "no home directory for user $<"; + # + # we could break at the = to get this, which is a little nicer: + # $HOME = + # $ENV{HOME} + # || $ENV{LOGDIR} + # || $pw[7] + # || die "no home directory for user $<"; + # + # The logic here follows the logic in set_logical_padding, which + # will add the padding in the second line to improve alignment. + # + my ( $ri_left, $ri_right ) = @_; + my $nmax = @$ri_right - 1; + return unless ( $nmax >= 2 ); + + # scan the left ends of first two lines + my $tokbeg = ""; + my $depth_beg; + for my $n ( 1 .. 2 ) { + my $il = $$ri_left[$n]; + my $typel = $types_to_go[$il]; + my $tokenl = $tokens_to_go[$il]; + + my $has_leading_op = ( $tokenl =~ /^\w/ ) + ? $is_chain_operator{$tokenl} # + - * / : ? && || + : $is_chain_operator{$typel}; # and, or + return unless ($has_leading_op); + if ( $n > 1 ) { + return + unless ( $tokenl eq $tokbeg + && $nesting_depth_to_go[$il] eq $depth_beg ); + } + $tokbeg = $tokenl; + $depth_beg = $nesting_depth_to_go[$il]; + } + + # now look for any interior tokens of the same types + my $il = $$ri_left[0]; + my $ir = $$ri_right[0]; + + # now make a list of all new break points + my @insert_list; + for ( my $i = $ir - 1 ; $i > $il ; $i-- ) { + my $type = $types_to_go[$i]; + if ( $is_assignment{$type} + && $nesting_depth_to_go[$i] eq $depth_beg ) + { + if ( $want_break_before{$type} ) { + push @insert_list, $i - 1; + } + else { + push @insert_list, $i; + } + } + } + + # Break after a 'return' followed by a chain of operators + # return ( $^O !~ /win32|dos/i ) + # && ( $^O ne 'VMS' ) + # && ( $^O ne 'OS2' ) + # && ( $^O ne 'MacOS' ); + # To give: + # return + # ( $^O !~ /win32|dos/i ) + # && ( $^O ne 'VMS' ) + # && ( $^O ne 'OS2' ) + # && ( $^O ne 'MacOS' ); + my $i = 0; + if ( $types_to_go[$i] eq 'k' + && $tokens_to_go[$i] eq 'return' + && $ir > $il + && $nesting_depth_to_go[$i] eq $depth_beg ) + { + push @insert_list, $i; + } - # now look for any interior tokens of the same types - $count = 0; - for my $n ( 0 .. $nmax ) { + return unless (@insert_list); + + # One final check... + # scan second and thrid lines and be sure there are no assignments + # we want to avoid breaking at an = to make something like this: + # unless ( $icon = + # $html_icons{"$type-$state"} + # or $icon = $html_icons{$type} + # or $icon = $html_icons{$state} ) + for my $n ( 1 .. 2 ) { my $il = $$ri_left[$n]; my $ir = $$ri_right[$n]; - for ( my $i = $il + 1 ; $i < $ir ; $i++ ) { + for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) { my $type = $types_to_go[$i]; - $type = '+' if ( $type eq '-' ); - $type = '*' if ( $type eq '/' ); - if ( $saw_chain_type{$type} ) { - push @{ $interior_chain_type{$type} }, $i; - $count++; - } + return + if ( $is_assignment{$type} + && $nesting_depth_to_go[$i] eq $depth_beg ); } } - return unless $count; - # now make a list of all new break points - my @insert_list; + # ok, insert any new break point + if (@insert_list) { + insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); + } +} - # loop over all chain types - foreach my $type ( keys %saw_chain_type ) { +sub insert_final_breaks { - # quit if just ONE continuation line with leading . For example-- - # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' - # . $contents; - last if ( $nmax == 1 && $type =~ /^[\.\+]$/ ); + my ( $ri_left, $ri_right ) = @_; - # loop over all interior chain tokens - foreach my $itest ( @{ $interior_chain_type{$type} } ) { + my $nmax = @$ri_right - 1; - # loop over all left end tokens of same type - if ( $left_chain_type{$type} ) { - next if $nobreak_to_go[ $itest - 1 ]; - foreach my $i ( @{ $left_chain_type{$type} } ) { - next unless in_same_container( $i, $itest ); - push @insert_list, $itest - 1; + # scan the left and right end tokens of all lines + my $count = 0; + my $i_first_colon = -1; + for my $n ( 0 .. $nmax ) { + my $il = $$ri_left[$n]; + my $ir = $$ri_right[$n]; + my $typel = $types_to_go[$il]; + my $typer = $types_to_go[$ir]; + return if ( $typel eq '?' ); + return if ( $typer eq '?' ); + if ( $typel eq ':' ) { $i_first_colon = $il; last; } + elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } + } + + # For long ternary chains, + # if the first : we see has its # ? is in the interior + # of a preceding line, then see if there are any good + # breakpoints before the ?. + if ( $i_first_colon > 0 ) { + my $i_question = $mate_index_to_go[$i_first_colon]; + if ( $i_question > 0 ) { + my @insert_list; + for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) { + my $token = $tokens_to_go[$ii]; + my $type = $types_to_go[$ii]; + + # For now, a good break is either a comma or a 'return'. + if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' ) + && in_same_container( $ii, $i_question ) ) + { + push @insert_list, $ii; last; } } - # loop over all right end tokens of same type - if ( $right_chain_type{$type} ) { - next if $nobreak_to_go[$itest]; - foreach my $i ( @{ $right_chain_type{$type} } ) { - next unless in_same_container( $i, $itest ); - push @insert_list, $itest; - last; - } + # insert any new break points + if (@insert_list) { + insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); } } } - - # insert any new break points - if (@insert_list) { - insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); - } } sub in_same_container { @@ -15807,6 +17491,15 @@ sub in_same_container { my $depth = $nesting_depth_to_go[$i1]; return unless ( $nesting_depth_to_go[$i2] == $depth ); if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } + + ########################################################### + # This is potentially a very slow routine and not critical. + # For safety just give up for large differences. + # See test file 'infinite_loop.txt' + # TODO: replace this loop with a data structure + ########################################################### + return if ( $i2 - $i1 > 200 ); + for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) { next if ( $nesting_depth_to_go[$i] > $depth ); return if ( $nesting_depth_to_go[$i] < $depth ); @@ -15882,7 +17575,8 @@ sub set_continuation_breaks { # see if any ?/:'s are in order my $colons_in_order = 1; my $last_tok = ""; - my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ]; + my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ]; + my $colon_count = @colon_list; foreach (@colon_list) { if ( $_ eq $last_tok ) { $colons_in_order = 0; last } $last_tok = $_; @@ -15918,9 +17612,43 @@ sub set_continuation_breaks { my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; my $strength = $bond_strength_to_go[$i_test]; - my $must_break = 0; - # FIXME: TESTING: Might want to be able to break after these + # use old breaks as a tie-breaker. For example to + # prevent blinkers with -pbp in this code: + +##@keywords{ +## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/} +## = (); + + # At the same time try to prevent a leading * in this code + # with the default formatting: + # +## return +## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 ) +## * ( $x**( $a - 1 ) ) +## * ( ( 1 - $x )**( $b - 1 ) ); + + # reduce strength a bit to break ties at an old breakpoint ... + $strength -= $tiny_bias + if $old_breakpoint_to_go[$i_test] + + # which is a 'good' breakpoint, meaning ... + # we don't want to break before it + && !$want_break_before{$type} + + # and either we want to break before the next token + # or the next token is not short (i.e. not a '*', '/' etc.) + && $i_next_nonblank <= $imax + && ( + $want_break_before{$next_nonblank_type} + || ( $lengths_to_go[ $i_next_nonblank + 1 ] - + $lengths_to_go[$i_next_nonblank] > 2 ) + || $next_nonblank_type =~ /^[\(\[\{L]$/ + ); + + my $must_break = 0; + + # FIXME: Might want to be able to break after these # force an immediate break at certain operators # with lower level than the start of the line if ( @@ -15946,7 +17674,7 @@ sub set_continuation_breaks { # See similar logic in scan_list which catches instances # where a line is just something like ') {' || ( $line_count - && ( $token eq ')' ) + && ( $token eq ')' ) && ( $next_nonblank_type eq '{' ) && ($next_nonblank_block_type) && !$rOpts->{'opening-brace-always-on-right'} ) @@ -15985,6 +17713,8 @@ sub set_continuation_breaks { # Avoid a break which would strand a single punctuation # token. For example, we do not want to strand a leading # '.' which is followed by a long quoted string. + # But note that we do want to do this with -extrude (l=1) + # so please test any changes to this code on -extrude. if ( !$must_break && ( $i_test == $i_begin ) @@ -15995,7 +17725,7 @@ sub set_continuation_breaks { $leading_spaces + $lengths_to_go[ $i_test + 1 ] - $starting_sum - ) <= $rOpts_maximum_line_length + ) < $rOpts_maximum_line_length ) ) { @@ -16280,7 +18010,7 @@ sub set_continuation_breaks { } } } - return \@i_first, \@i_last; + return ( \@i_first, \@i_last, $colon_count ); } sub insert_additional_breaks { @@ -16312,6 +18042,9 @@ sub insert_additional_breaks { $i_l = $$ri_last[$line_number]; } + # Do not leave a blank at the end of a line; back up if necessary + if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- } + my $i_break_right = $i_break_left + 1; if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ } @@ -16998,6 +18731,7 @@ use vars qw( $rOpts_entab_leading_whitespace $rOpts_valign + $rOpts_fixed_position_side_comment $rOpts_minimum_space_to_comment ); @@ -17052,6 +18786,8 @@ sub initialize { $rOpts_indent_columns = $rOpts->{'indent-columns'}; $rOpts_tabs = $rOpts->{'tabs'}; $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; + $rOpts_fixed_position_side_comment = + $rOpts->{'fixed-position-side-comment'}; $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; $rOpts_valign = $rOpts->{'valign'}; @@ -17270,7 +19006,7 @@ sub append_line { && $rvertical_tightness_flags->[2] == $cached_seqno ) { $rvertical_tightness_flags->[3] ||= 1; - $cached_line_valid ||= 1; + $cached_line_valid ||= 1; } } @@ -17599,8 +19335,10 @@ sub eliminate_old_fields { my $old_line = shift; my $maximum_field_index = $old_line->get_jmax(); + ############################################### # this line must have fewer fields return unless $maximum_field_index > $jmax; + ############################################### # Identify specific cases where field elimination is allowed: # case=1: both lines have comma-separated lists, and the first @@ -18005,7 +19743,7 @@ sub fix_terminal_else { # look for the opening brace after the else, and extrace the depth my $tok_brace = $rtokens->[0]; my $depth_brace; - if ( $tok_brace =~ /^\{(\d+)$/ ) { $depth_brace = $1; } + if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; } # probably: "else # side_comment" else { return } @@ -18044,185 +19782,282 @@ sub fix_terminal_else { unless ( $rfields_old->[0] =~ /^case\s*$/ ); } -sub check_match { - - my $new_line = shift; - my $old_line = shift; - - # uses global variables: - # $previous_minimum_jmax_seen - # $maximum_jmax_seen - # $maximum_line_index - # $marginal_match - my $jmax = $new_line->get_jmax(); - my $maximum_field_index = $old_line->get_jmax(); - - # flush if this line has too many fields - if ( $jmax > $maximum_field_index ) { my_flush(); return } - - # flush if adding this line would make a non-monotonic field count - if ( - ( $maximum_field_index > $jmax ) # this has too few fields - && ( - ( $previous_minimum_jmax_seen < $jmax ) # and wouldn't be monotonic - || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) - ) - ) - { - my_flush(); - return; - } - - # otherwise append this line if everything matches - my $jmax_original_line = $new_line->get_jmax_original_line(); - my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); - my $rtokens = $new_line->get_rtokens(); - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - my $list_type = $new_line->get_list_type(); +{ # sub check_match + my %is_good_alignment; - my $group_list_type = $old_line->get_list_type(); - my $old_rpatterns = $old_line->get_rpatterns(); - my $old_rtokens = $old_line->get_rtokens(); + BEGIN { - my $jlimit = $jmax - 1; - if ( $maximum_field_index > $jmax ) { - $jlimit = $jmax_original_line; - --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); + # Vertically aligning on certain "good" tokens is usually okay + # so we can be less restrictive in marginal cases. + @_ = qw( { ? => = ); + push @_, (','); + @is_good_alignment{@_} = (1) x scalar(@_); } - my $everything_matches = 1; - - # common list types always match - unless ( ( $group_list_type && ( $list_type eq $group_list_type ) ) - || $is_hanging_side_comment ) - { + sub check_match { - my $leading_space_count = $new_line->get_leading_space_count(); - my $saw_equals = 0; - for my $j ( 0 .. $jlimit ) { - my $match = 1; + # See if the current line matches the current vertical alignment group. + # If not, flush the current group. + my $new_line = shift; + my $old_line = shift; - my $old_tok = $$old_rtokens[$j]; - my $new_tok = $$rtokens[$j]; + # uses global variables: + # $previous_minimum_jmax_seen + # $maximum_jmax_seen + # $maximum_line_index + # $marginal_match + my $jmax = $new_line->get_jmax(); + my $maximum_field_index = $old_line->get_jmax(); - # Dumb down the match AFTER an equals and - # also dumb down after seeing a ? ternary operator ... - # Everything after a + is the token which preceded the previous - # opening paren (container name). We won't require them to match. - if ( $saw_equals && $new_tok =~ /(.*)\+/ ) { - $new_tok = $1; - $old_tok =~ s/\+.*$//; - } + # flush if this line has too many fields + if ( $jmax > $maximum_field_index ) { goto NO_MATCH } - if ( $new_tok =~ /^[\?=]\d*$/ ) { $saw_equals = 1 } + # flush if adding this line would make a non-monotonic field count + if ( + ( $maximum_field_index > $jmax ) # this has too few fields + && ( + ( $previous_minimum_jmax_seen < + $jmax ) # and wouldn't be monotonic + || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) + ) + ) + { + goto NO_MATCH; + } + + # otherwise see if this line matches the current group + my $jmax_original_line = $new_line->get_jmax_original_line(); + my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); + my $rtokens = $new_line->get_rtokens(); + my $rfields = $new_line->get_rfields(); + my $rpatterns = $new_line->get_rpatterns(); + my $list_type = $new_line->get_list_type(); + + my $group_list_type = $old_line->get_list_type(); + my $old_rpatterns = $old_line->get_rpatterns(); + my $old_rtokens = $old_line->get_rtokens(); + + my $jlimit = $jmax - 1; + if ( $maximum_field_index > $jmax ) { + $jlimit = $jmax_original_line; + --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); + } + + # handle comma-separated lists .. + if ( $group_list_type && ( $list_type eq $group_list_type ) ) { + for my $j ( 0 .. $jlimit ) { + my $old_tok = $$old_rtokens[$j]; + next unless $old_tok; + my $new_tok = $$rtokens[$j]; + next unless $new_tok; + + # lists always match ... + # unless they would align any '=>'s with ','s + goto NO_MATCH + if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/ + || $new_tok =~ /^=>/ && $old_tok =~ /^,/ ); + } + } + + # do detailed check for everything else except hanging side comments + elsif ( !$is_hanging_side_comment ) { + + my $leading_space_count = $new_line->get_leading_space_count(); + + my $max_pad = 0; + my $min_pad = 0; + my $saw_good_alignment; + + for my $j ( 0 .. $jlimit ) { + + my $old_tok = $$old_rtokens[$j]; + my $new_tok = $$rtokens[$j]; + + # Note on encoding used for alignment tokens: + # ------------------------------------------- + # Tokens are "decorated" with information which can help + # prevent unwanted alignments. Consider for example the + # following two lines: + # local ( $xn, $xd ) = split( '/', &'rnorm(@_) ); + # local ( $i, $f ) = &'bdiv( $xn, $xd ); + # There are three alignment tokens in each line, a comma, + # an =, and a comma. In the first line these three tokens + # are encoded as: + # ,4+local-18 =3 ,4+split-7 + # and in the second line they are encoded as + # ,4+local-18 =3 ,4+&'bdiv-8 + # Tokens always at least have token name and nesting + # depth. So in this example the ='s are at depth 3 and + # the ,'s are at depth 4. This prevents aligning tokens + # of different depths. Commas contain additional + # information, as follows: + # , {depth} + {container name} - {spaces to opening paren} + # This allows us to reject matching the rightmost commas + # in the above two lines, since they are for different + # function calls. This encoding is done in + # 'sub send_lines_to_vertical_aligner'. + + # Pick off actual token. + # Everything up to the first digit is the actual token. + my $alignment_token = $new_tok; + if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 } + + # see if the decorated tokens match + my $tokens_match = $new_tok eq $old_tok + + # Exception for matching terminal : of ternary statement.. + # consider containers prefixed by ? and : a match + || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ ); + + # No match if the alignment tokens differ... + if ( !$tokens_match ) { + + # ...Unless this is a side comment + if ( + $j == $jlimit + + # and there is either at least one alignment token + # or this is a single item following a list. This + # latter rule is required for 'December' to join + # the following list: + # my (@months) = ( + # '', 'January', 'February', 'March', + # 'April', 'May', 'June', 'July', + # 'August', 'September', 'October', 'November', + # 'December' + # ); + # If it doesn't then the -lp formatting will fail. + && ( $j > 0 || $old_tok =~ /^,/ ) + ) + { + $marginal_match = 1 + if ( $marginal_match == 0 + && $maximum_line_index == 0 ); + last; + } - # we never match if the matching tokens differ - if ( $j < $jlimit - && $old_tok ne $new_tok ) - { - $match = 0; - } + goto NO_MATCH; + } - # otherwise, if patterns match, we always have a match. - # However, if patterns don't match, we have to be careful... - elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) { + # Calculate amount of padding required to fit this in. + # $pad is the number of spaces by which we must increase + # the current field to squeeze in this field. + my $pad = + length( $$rfields[$j] ) - $old_line->current_field_width($j); + if ( $j == 0 ) { $pad += $leading_space_count; } - # We have to be very careful about aligning commas when the - # pattern's don't match, because it can be worse to create an - # alignment where none is needed than to omit one. The current - # rule: if we are within a matching sub call (indicated by '+' - # in the matching token), we'll allow a marginal match, but - # otherwise not. - # - # Here's an example where we'd like to align the '=' - # my $cfile = File::Spec->catfile( 't', 'callext.c' ); - # my $inc = File::Spec->catdir( 'Basic', 'Core' ); - # because the function names differ. - # Future alignment logic should make this unnecessary. - # - # Here's an example where the ','s are not contained in a call. - # The first line below should probably not match the next two: - # ( $a, $b ) = ( $b, $r ); - # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); - # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); - if ( $new_tok =~ /^,/ ) { - if ( $$rtokens[$j] =~ /[A-Za-z]/ ) { - $marginal_match = 1; - } - else { - $match = 0; - } + # remember max pads to limit marginal cases + if ( $alignment_token ne '#' ) { + if ( $pad > $max_pad ) { $max_pad = $pad } + if ( $pad < $min_pad ) { $min_pad = $pad } } - - # parens don't align well unless patterns match - elsif ( $new_tok =~ /^\(/ ) { - $match = 0; + if ( $is_good_alignment{$alignment_token} ) { + $saw_good_alignment = 1; } - # Handle an '=' alignment with different patterns to - # the left. - elsif ( $new_tok =~ /^=\d*$/ ) { + # If patterns don't match, we have to be careful... + if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) { - $saw_equals = 1; + # flag this as a marginal match since patterns differ + $marginal_match = 1 + if ( $marginal_match == 0 && $maximum_line_index == 0 ); - # It is best to be a little restrictive when - # aligning '=' tokens. Here is an example of - # two lines that we will not align: - # my $variable=6; - # $bb=4; - # The problem is that one is a 'my' declaration, - # and the other isn't, so they're not very similar. - # We will filter these out by comparing the first - # letter of the pattern. This is crude, but works - # well enough. - if ( - substr( $$old_rpatterns[$j], 0, 1 ) ne - substr( $$rpatterns[$j], 0, 1 ) ) - { - $match = 0; + # We have to be very careful about aligning commas + # when the pattern's don't match, because it can be + # worse to create an alignment where none is needed + # than to omit one. Here's an example where the ','s + # are not in named continers. The first line below + # should not match the next two: + # ( $a, $b ) = ( $b, $r ); + # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); + # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); + if ( $alignment_token eq ',' ) { + + # do not align commas unless they are in named containers + goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ ); } - # If we pass that test, we'll call it a marginal match. - # Here is an example of a marginal match: - # $done{$$op} = 1; - # $op = compile_bblock($op); - # The left tokens are both identifiers, but - # one accesses a hash and the other doesn't. - # We'll let this be a tentative match and undo - # it later if we don't find more than 2 lines - # in the group. - elsif ( $maximum_line_index == 0 ) { - $marginal_match = 1; + # do not align parens unless patterns match; + # large ugly spaces can occur in math expressions. + elsif ( $alignment_token eq '(' ) { + + # But we can allow a match if the parens don't + # require any padding. + if ( $pad != 0 ) { goto NO_MATCH } } - } - } - # Don't let line with fewer fields increase column widths - # ( align3.t ) - if ( $maximum_field_index > $jmax ) { - my $pad = - length( $$rfields[$j] ) - $old_line->current_field_width($j); + # Handle an '=' alignment with different patterns to + # the left. + elsif ( $alignment_token eq '=' ) { + + # It is best to be a little restrictive when + # aligning '=' tokens. Here is an example of + # two lines that we will not align: + # my $variable=6; + # $bb=4; + # The problem is that one is a 'my' declaration, + # and the other isn't, so they're not very similar. + # We will filter these out by comparing the first + # letter of the pattern. This is crude, but works + # well enough. + if ( + substr( $$old_rpatterns[$j], 0, 1 ) ne + substr( $$rpatterns[$j], 0, 1 ) ) + { + goto NO_MATCH; + } - if ( $j == 0 ) { - $pad += $leading_space_count; + # If we pass that test, we'll call it a marginal match. + # Here is an example of a marginal match: + # $done{$$op} = 1; + # $op = compile_bblock($op); + # The left tokens are both identifiers, but + # one accesses a hash and the other doesn't. + # We'll let this be a tentative match and undo + # it later if we don't find more than 2 lines + # in the group. + elsif ( $maximum_line_index == 0 ) { + $marginal_match = + 2; # =2 prevents being undone below + } + } } - # TESTING: suspend this rule to allow last lines to join - if ( $pad > 0 ) { $match = 0; } - } - - unless ($match) { - $everything_matches = 0; - last; + # Don't let line with fewer fields increase column widths + # ( align3.t ) + if ( $maximum_field_index > $jmax ) { + + # Exception: suspend this rule to allow last lines to join + if ( $pad > 0 ) { goto NO_MATCH; } + } + } ## end for my $j ( 0 .. $jlimit) + + # Turn off the "marginal match" flag in some cases... + # A "marginal match" occurs when the alignment tokens agree + # but there are differences in the other tokens (patterns). + # If we leave the marginal match flag set, then the rule is that we + # will align only if there are more than two lines in the group. + # We will turn of the flag if we almost have a match + # and either we have seen a good alignment token or we + # just need a small pad (2 spaces) to fit. These rules are + # the result of experimentation. Tokens which misaligned by just + # one or two characters are annoying. On the other hand, + # large gaps to less important alignment tokens are also annoying. + if ( $marginal_match == 1 + && $jmax == $maximum_field_index + && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) ) + ) + { + $marginal_match = 0; } + ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; } - } - - if ( $maximum_field_index > $jmax ) { - - if ($everything_matches) { + # We have a match (even if marginal). + # If the current line has fewer fields than the current group + # but otherwise matches, copy the remaining group fields to + # make it a perfect match. + if ( $maximum_field_index > $jmax ) { my $comment = $$rfields[$jmax]; for $jmax ( $jlimit .. $maximum_field_index ) { $$rtokens[$jmax] = $$old_rtokens[$jmax]; @@ -18232,9 +20067,13 @@ sub check_match { $$rfields[$jmax] = $comment; $new_line->set_jmax($jmax); } - } + return; - my_flush() unless ($everything_matches); + NO_MATCH: + ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n"; + my_flush(); + return; + } } sub check_fit { @@ -18724,6 +20563,15 @@ sub write_vertically_aligned_line { : $rOpts_minimum_space_to_comment - 1; } + # if the -fpsc flag is set, move the side comment to the selected + # column if and only if it is possible, ignoring constraints on + # line length and minimum space to comment + if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index ) + { + my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1; + if ( $newpad >= 0 ) { $pad = $newpad; } + } + # accumulate the padding if ( $pad > 0 ) { $total_pad_count += $pad; } @@ -19087,7 +20935,6 @@ sub entab_and_output { } else { - # REMOVE AFTER TESTING # shouldn't happen - program error counting whitespace # we'll skip entabbing warning( @@ -19117,7 +20964,6 @@ sub entab_and_output { } else { - # REMOVE AFTER TESTING # shouldn't happen - program error counting whitespace # we'll skip entabbing warning( @@ -19291,11 +21137,28 @@ sub want_blank_line { } } +sub require_blank_code_lines { + + # write out the requested number of blanks regardless of the value of -mbl + # unless -mbl=0. This allows extra blank lines to be written for subs and + # packages even with the default -mbl=1 + my $self = shift; + my $count = shift; + my $need = $count - $self->{_consecutive_blank_lines}; + my $rOpts = $self->{_rOpts}; + my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0; + for ( my $i = 0 ; $i < $need ; $i++ ) { + $self->write_blank_code_line($forced); + } +} + sub write_blank_code_line { - my $self = shift; - my $rOpts = $self->{_rOpts}; + my $self = shift; + my $forced = shift; + my $rOpts = $self->{_rOpts}; return - if ( $self->{_consecutive_blank_lines} >= + if (!$forced + && $self->{_consecutive_blank_lines} >= $rOpts->{'maximum-consecutive-blank-lines'} ); $self->{_consecutive_blank_lines}++; $self->{_consecutive_nonblank_lines} = 0; @@ -19497,7 +21360,7 @@ sub write_debug_entry { $pattern .= $$rtoken_type[$j]; } $reconstructed_original .= $$rtokens[$j]; - $block_str .= "($$rblock_type[$j])"; + $block_str .= "($$rblock_type[$j])"; $num = length( $$rtokens[$j] ); my $type_str = $$rtoken_type[$j]; @@ -19651,6 +21514,8 @@ use vars qw{ $square_bracket_depth @current_depth + @total_depth + $total_depth @nesting_sequence_number @current_sequence_number @paren_type @@ -19658,12 +21523,13 @@ use vars qw{ @paren_structural_type @brace_type @brace_structural_type - @brace_statement_type @brace_context @brace_package @square_bracket_type @square_bracket_structural_type @depth_array + @nested_ternary_flag + @nested_statement_type @starting_line_of_current_depth }; @@ -19728,6 +21594,7 @@ sub new { starting_level => undef, indent_columns => 4, tabs => 0, + entab_leading_space => undef, look_for_hash_bang => 0, trim_qw => 1, look_for_autoloader => 1, @@ -19781,6 +21648,7 @@ sub new { _starting_level => $args{starting_level}, _know_starting_level => defined( $args{starting_level} ), _tabs => $args{tabs}, + _entab_leading_space => $args{entab_leading_space}, _indent_columns => $args{indent_columns}, _look_for_hash_bang => $args{look_for_hash_bang}, _trim_qw => $args{trim_qw}, @@ -20320,10 +22188,10 @@ sub get_line { if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { complain("=cut while not in pod ignored\n"); $tokenizer_self->{_in_pod} = 0; - $line_of_tokens->{_line_type} = 'POD_STOP'; + $line_of_tokens->{_line_type} = 'POD_END'; } else { - $line_of_tokens->{_line_type} = 'POD_END'; + $line_of_tokens->{_line_type} = 'POD_START'; complain( "=cut starts a pod section .. this can fool pod utilities.\n" ); @@ -20402,7 +22270,7 @@ sub get_line { $line_of_tokens->{_line_type} = 'CODE'; # remember if we have seen any real code - if ( !$tokenizer_self->{_started_tokenizing} + if ( !$tokenizer_self->{_started_tokenizing} && $input_line !~ /^\s*$/ && $input_line !~ /^\s*#/ ) { @@ -20465,6 +22333,7 @@ sub find_starting_indentation_level { my $i = 0; my $structural_indentation_level = -1; # flag for find_indentation_level + # keep looking at lines until we find a hash bang or piece of code my $msg = ""; while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) @@ -20475,8 +22344,8 @@ sub find_starting_indentation_level { $starting_level = 0; last; } - next if ( $line =~ /^\s*#/ ); # must not be comment - next if ( $line =~ /^\s*$/ ); # must not be blank + next if ( $line =~ /^\s*#/ ); # skip past comments + next if ( $line =~ /^\s*$/ ); # skip past blank lines ( $starting_level, $msg ) = find_indentation_level( $line, $structural_indentation_level ); if ($msg) { write_logfile_entry("$msg") } @@ -20532,7 +22401,17 @@ sub find_indentation_level { $know_input_tabstr = 0; - if ( $tokenizer_self->{_tabs} ) { + # When -et=n is used for the output formatting, we will assume that + # tabs in the input formatting were also produced with -et=n. This may + # not be true, but it is the best guess because it will keep leading + # whitespace unchanged on repeated formatting on small pieces of code + # when -et=n is used. Thanks to Sam Kington for this patch. + if ( my $tabsize = $tokenizer_self->{_entab_leading_space} ) { + $leading_whitespace =~ s{^ (\t*) } + { " " x (length($1) * $tabsize) }xe; + $input_tabstr = " " x $tokenizer_self->{_indent_columns}; + } + elsif ( $tokenizer_self->{_tabs} ) { $input_tabstr = "\t"; if ( length($leading_whitespace) > 0 ) { if ( $leading_whitespace !~ /\t/ ) { @@ -20701,6 +22580,8 @@ sub prepare_for_a_new_file { $square_bracket_depth = 0; @current_depth[ 0 .. $#closing_brace_names ] = (0) x scalar @closing_brace_names; + $total_depth = 0; + @total_depth = (); @nesting_sequence_number[ 0 .. $#closing_brace_names ] = ( 0 .. $#closing_brace_names ); @current_sequence_number = (); @@ -20709,7 +22590,6 @@ sub prepare_for_a_new_file { $paren_structural_type[$brace_depth] = ''; $brace_type[$brace_depth] = ';'; # identify opening brace as code block $brace_structural_type[$brace_depth] = ''; - $brace_statement_type[$brace_depth] = ""; $brace_context[$brace_depth] = UNKNOWN_CONTEXT; $brace_package[$paren_depth] = $current_package; $square_bracket_type[$square_bracket_depth] = ''; @@ -20734,7 +22614,7 @@ sub prepare_for_a_new_file { $next_tok, $next_type, $peeked_ahead, $prototype, $rhere_target_list, $rtoken_map, $rtoken_type, $rtokens, $tok, - $type, $type_sequence, + $type, $type_sequence, $indent_flag, ); # TV2: refs to ARRAYS for processing one LINE @@ -20744,6 +22624,7 @@ sub prepare_for_a_new_file { my $routput_block_type = []; # types of code block my $routput_container_type = []; # paren types, such as if, elsif, .. my $routput_type_sequence = []; # nesting sequential number + my $routput_indent_flag = []; # # TV3: SCALARS for quote variables. These are initialized with a # subroutine call and continually updated as lines are processed. @@ -20753,7 +22634,7 @@ sub prepare_for_a_new_file { # TV4: SCALARS for multi-line identifiers and # statements. These are initialized with a subroutine call # and continually updated as lines are processed. - my ( $id_scan_state, $identifier, $want_paren, ); + my ( $id_scan_state, $identifier, $want_paren, $indented_if_level ); # TV5: SCALARS for tracking indentation level. # Initialized once and continually updated as lines are @@ -20796,9 +22677,10 @@ sub prepare_for_a_new_file { $allowed_quote_modifiers = ""; # TV4: - $id_scan_state = ''; - $identifier = ''; - $want_paren = ""; + $id_scan_state = ''; + $identifier = ''; + $want_paren = ""; + $indented_if_level = 0; # TV5: $nesting_token_string = ""; @@ -20834,13 +22716,13 @@ sub prepare_for_a_new_file { $next_tok, $next_type, $peeked_ahead, $prototype, $rhere_target_list, $rtoken_map, $rtoken_type, $rtokens, $tok, - $type, $type_sequence, + $type, $type_sequence, $indent_flag, ]; my $rTV2 = [ - $routput_token_list, $routput_token_type, - $routput_block_type, $routput_container_type, - $routput_type_sequence, + $routput_token_list, $routput_token_type, + $routput_block_type, $routput_container_type, + $routput_type_sequence, $routput_indent_flag, ]; my $rTV3 = [ @@ -20850,7 +22732,8 @@ sub prepare_for_a_new_file { $quoted_string_2, $allowed_quote_modifiers, ]; - my $rTV4 = [ $id_scan_state, $identifier, $want_paren, ]; + my $rTV4 = + [ $id_scan_state, $identifier, $want_paren, $indented_if_level ]; my $rTV5 = [ $nesting_token_string, $nesting_type_string, @@ -20884,13 +22767,13 @@ sub prepare_for_a_new_file { $next_tok, $next_type, $peeked_ahead, $prototype, $rhere_target_list, $rtoken_map, $rtoken_type, $rtokens, $tok, - $type, $type_sequence, + $type, $type_sequence, $indent_flag, ) = @{$rTV1}; ( - $routput_token_list, $routput_token_type, - $routput_block_type, $routput_container_type, - $routput_type_sequence, + $routput_token_list, $routput_token_type, + $routput_block_type, $routput_container_type, + $routput_type_sequence, $routput_type_sequence, ) = @{$rTV2}; ( @@ -20898,7 +22781,8 @@ sub prepare_for_a_new_file { $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ) = @{$rTV3}; - ( $id_scan_state, $identifier, $want_paren, ) = @{$rTV4}; + ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) = + @{$rTV4}; ( $nesting_token_string, $nesting_type_string, @@ -20922,6 +22806,9 @@ sub prepare_for_a_new_file { } sub get_indentation_level { + + # patch to avoid reporting error if indented if is not terminated + if ($indented_if_level) { return $level_in_tokenizer - 1 } return $level_in_tokenizer; } @@ -20963,22 +22850,24 @@ sub prepare_for_a_new_file { # localize all package variables local ( - $tokenizer_self, $last_nonblank_token, - $last_nonblank_type, $last_nonblank_block_type, - $statement_type, $in_attribute_list, - $current_package, $context, - %is_constant, %is_user_function, - %user_function_prototype, %is_block_function, - %is_block_list_function, %saw_function_definition, - $brace_depth, $paren_depth, - $square_bracket_depth, @current_depth, - @nesting_sequence_number, @current_sequence_number, - @paren_type, @paren_semicolon_count, - @paren_structural_type, @brace_type, - @brace_structural_type, @brace_statement_type, - @brace_context, @brace_package, - @square_bracket_type, @square_bracket_structural_type, - @depth_array, @starting_line_of_current_depth, + $tokenizer_self, $last_nonblank_token, + $last_nonblank_type, $last_nonblank_block_type, + $statement_type, $in_attribute_list, + $current_package, $context, + %is_constant, %is_user_function, + %user_function_prototype, %is_block_function, + %is_block_list_function, %saw_function_definition, + $brace_depth, $paren_depth, + $square_bracket_depth, @current_depth, + @total_depth, $total_depth, + @nesting_sequence_number, @current_sequence_number, + @paren_type, @paren_semicolon_count, + @paren_structural_type, @brace_type, + @brace_structural_type, @brace_context, + @brace_package, @square_bracket_type, + @square_bracket_structural_type, @depth_array, + @starting_line_of_current_depth, @nested_ternary_flag, + @nested_statement_type, ); # save all lexical variables @@ -21035,7 +22924,7 @@ sub prepare_for_a_new_file { sub scan_identifier { ( $i, $tok, $type, $id_scan_state, $identifier ) = scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, - $max_token_index ); + $max_token_index, $expecting ); } sub scan_id { @@ -21236,7 +23125,7 @@ sub prepare_for_a_new_file { } ## end if ( $expecting == OPERATOR... } $paren_type[$paren_depth] = $container_type; - $type_sequence = + ( $type_sequence, $indent_flag ) = increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); # propagate types down through nested parens @@ -21285,7 +23174,7 @@ sub prepare_for_a_new_file { }, ')' => sub { - $type_sequence = + ( $type_sequence, $indent_flag ) = decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); if ( $paren_structural_type[$paren_depth] eq '{' ) { @@ -21380,7 +23269,7 @@ sub prepare_for_a_new_file { if ($is_pattern) { $in_quote = 1; $type = 'Q'; - $allowed_quote_modifiers = '[cgimosx]'; + $allowed_quote_modifiers = '[msixpodualgc]'; } else { # not a pattern; check for a /= token @@ -21506,13 +23395,12 @@ sub prepare_for_a_new_file { } } } - $brace_type[ ++$brace_depth ] = $block_type; - $brace_package[$brace_depth] = $current_package; - $type_sequence = - increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); + $brace_type[ ++$brace_depth ] = $block_type; + $brace_package[$brace_depth] = $current_package; $brace_structural_type[$brace_depth] = $type; $brace_context[$brace_depth] = $context; - $brace_statement_type[$brace_depth] = $statement_type; + ( $type_sequence, $indent_flag ) = + increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); }, '}' => sub { $block_type = $brace_type[$brace_depth]; @@ -21524,7 +23412,7 @@ sub prepare_for_a_new_file { # can happen on brace error (caught elsewhere) else { } - $type_sequence = + ( $type_sequence, $indent_flag ) = decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); if ( $brace_structural_type[$brace_depth] eq 'L' ) { @@ -21538,8 +23426,7 @@ sub prepare_for_a_new_file { $tok = $brace_type[$brace_depth]; } - $context = $brace_context[$brace_depth]; - $statement_type = $brace_statement_type[$brace_depth]; + $context = $brace_context[$brace_depth]; if ( $brace_depth > 0 ) { $brace_depth--; } }, '&' => sub { # maybe sub call? start looking @@ -21549,7 +23436,14 @@ sub prepare_for_a_new_file { # got mistaken as a q operator in an early version: # print BODY &q(<<'EOT'); if ( $expecting != OPERATOR ) { - scan_identifier(); + + # But only look for a sub call if we are expecting a term or + # if there is no existing space after the &. + # For example we probably don't want & as sub call here: + # Fcntl::S_IRUSR & $mode; + if ( $expecting == TERM || $next_type ne 'b' ) { + scan_identifier(); + } } else { } @@ -21561,6 +23455,12 @@ sub prepare_for_a_new_file { find_angle_operator_termination( $input_line, $i, $rtoken_map, $expecting, $max_token_index ); + if ( $type eq '<' && $expecting == TERM ) { + error_if_expecting_TERM(); + interrupt_logfile(); + warning("Unterminated <> operator?\n"); + resume_logfile(); + } } else { } @@ -21583,10 +23483,10 @@ sub prepare_for_a_new_file { if ($is_pattern) { $in_quote = 1; $type = 'Q'; - $allowed_quote_modifiers = '[cgimosx]'; # TBD:check this + $allowed_quote_modifiers = '[msixpodualgc]'; } else { - $type_sequence = + ( $type_sequence, $indent_flag ) = increase_nesting_depth( QUESTION_COLON, $$rtoken_map[$i_tok] ); } @@ -21653,7 +23553,7 @@ sub prepare_for_a_new_file { # otherwise, it should be part of a ?/: operator else { - $type_sequence = + ( $type_sequence, $indent_flag ) = decrease_nesting_depth( QUESTION_COLON, $$rtoken_map[$i_tok] ); if ( $last_nonblank_token eq '?' ) { @@ -21694,7 +23594,7 @@ sub prepare_for_a_new_file { '[' => sub { $square_bracket_type[ ++$square_bracket_depth ] = $last_nonblank_token; - $type_sequence = + ( $type_sequence, $indent_flag ) = increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); # It may seem odd, but structural square brackets have @@ -21705,7 +23605,7 @@ sub prepare_for_a_new_file { $square_bracket_structural_type[$square_bracket_depth] = $type; }, ']' => sub { - $type_sequence = + ( $type_sequence, $indent_flag ) = decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) @@ -21719,9 +23619,20 @@ sub prepare_for_a_new_file { if ( ( $expecting != OPERATOR ) && $is_file_test_operator{$next_tok} ) { - $i++; - $tok .= $next_tok; - $type = 'F'; + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i + 1, $rtokens, + $max_token_index ); + + # check for a quoted word like "-w=>xx"; + # it is sufficient to just check for a following '=' + if ( $next_nonblank_token eq '=' ) { + $type = 'm'; + } + else { + $i++; + $tok .= $next_tok; + $type = 'F'; + } } elsif ( $expecting == TERM ) { my $number = scan_number(); @@ -21877,9 +23788,9 @@ sub prepare_for_a_new_file { # These block types terminate statements and do not need a trailing # semicolon - # patched for SWITCH/CASE: + # patched for SWITCH/CASE/ my %is_zero_continuation_block_type; - @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ; + @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; if elsif else unless while until for foreach switch case given when); @is_zero_continuation_block_type{@_} = (1) x scalar(@_); @@ -21930,12 +23841,19 @@ sub prepare_for_a_new_file { # ref: camel 3 p 147, # but perl may accept undocumented flags + # perl 5.10 adds 'p' (preserve) + # Perl version 5.16, http://perldoc.perl.org/perlop.html, has these: + # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc + # s/PATTERN/REPLACEMENT/msixpodualgcer + # y/SEARCHLIST/REPLACEMENTLIST/cdsr + # tr/SEARCHLIST/REPLACEMENTLIST/cdsr + # qr/STRING/msixpodual my %quote_modifiers = ( - 's' => '[cegimosx]', - 'y' => '[cds]', - 'tr' => '[cds]', - 'm' => '[cgimosx]', - 'qr' => '[imosx]', + 's' => '[msixpodualgcer]', + 'y' => '[cdsr]', + 'tr' => '[cdsr]', + 'm' => '[msixpodualgc]', + 'qr' => '[msixpodual]', 'q' => "", 'qq' => "", 'qw' => "", @@ -22108,6 +24026,7 @@ sub prepare_for_a_new_file { $block_type = $last_nonblank_block_type; $container_type = $last_nonblank_container_type; $type_sequence = $last_nonblank_type_sequence; + $indent_flag = 0; $peeked_ahead = 0; # tokenization is done in two stages.. @@ -22134,6 +24053,7 @@ sub prepare_for_a_new_file { $routput_block_type->[$i] = ""; $routput_container_type->[$i] = ""; $routput_type_sequence->[$i] = ""; + $routput_indent_flag->[$i] = 0; } $i = -1; $i_tok = -1; @@ -22301,6 +24221,7 @@ EOM $routput_block_type->[$i_tok] = $block_type; $routput_container_type->[$i_tok] = $container_type; $routput_type_sequence->[$i_tok] = $type_sequence; + $routput_indent_flag->[$i_tok] = $indent_flag; } my $pre_tok = $$rtokens[$i]; # get the next pre-token my $pre_type = $$rtoken_type[$i]; # and type @@ -22309,6 +24230,7 @@ EOM $block_type = ""; # blank for all tokens except code block braces $container_type = ""; # blank for all tokens except some parens $type_sequence = ""; # blank for all tokens except ?/: + $indent_flag = 0; $prototype = ""; # blank for all tokens except user defined subs $i_tok = $i; @@ -22460,12 +24382,19 @@ EOM } } - # quote a bare word within braces..like xxx->{s}; note that we - # must be sure this is not a structural brace, to avoid - # mistaking {s} in the following for a quoted bare word: - # for(@[){s}bla}BLA} - if ( ( $last_nonblank_type eq 'L' ) - && ( $next_nonblank_token eq '}' ) ) + # quote a bare word within braces..like xxx->{s}; note that we + # must be sure this is not a structural brace, to avoid + # mistaking {s} in the following for a quoted bare word: + # for(@[){s}bla}BLA} + # Also treat q in something like var{-q} as a bare word, not qoute operator + if ( + $next_nonblank_token eq '}' + && ( + $last_nonblank_type eq 'L' + || ( $last_nonblank_type eq 'm' + && $last_last_nonblank_type eq 'L' ) + ) + ) { $type = 'w'; next; @@ -22562,9 +24491,15 @@ EOM if ($next_nonblank_token) { if ( $is_keyword{$next_nonblank_token} ) { - warning( + + # Assume qw is used as a quote and okay, as in: + # use constant qw{ DEBUG 0 }; + # Not worth trying to parse for just a warning + if ( $next_nonblank_token ne 'qw' ) { + warning( "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" - ); + ); + } } # FIXME: could check for error in which next token is @@ -22613,7 +24548,7 @@ EOM && label_ok() ) { - if ( $tok !~ /A-Z/ ) { + if ( $tok !~ /[A-Z]/ ) { push @{ $tokenizer_self->{_rlower_case_labels_at} }, $input_line_number; } @@ -22698,7 +24633,13 @@ EOM # note: ';' '{' and '}' in list above # because continues can follow bare blocks; # ':' is labeled block - warning("'$tok' should follow a block\n"); + # + ############################################ + # NOTE: This check has been deactivated because + # continue has an alternative usage for given/when + # blocks in perl 5.10 + ## warning("'$tok' should follow a block\n"); + ############################################ } } @@ -22707,6 +24648,27 @@ EOM elsif ( $tok eq 'when' || $tok eq 'case' ) { $statement_type = $tok; # next '{' is block } + + # + # indent trailing if/unless/while/until + # outdenting will be handled by later indentation loop +## DEACTIVATED: unfortunately this can cause some unwanted indentation like: +##$opt_o = 1 +## if !( +## $opt_b +## || $opt_c +## || $opt_d +## || $opt_f +## || $opt_i +## || $opt_l +## || $opt_o +## || $opt_x +## ); +## if ( $tok =~ /^(if|unless|while|until)$/ +## && $next_nonblank_token ne '(' ) +## { +## $indent_flag = 1; +## } } # check for inline label following @@ -22765,7 +24727,7 @@ EOM # not treated as keywords: if ( ( - $tok eq 'case' + $tok eq 'case' && $brace_type[$brace_depth] eq 'switch' ) || ( $tok eq 'when' @@ -22831,6 +24793,7 @@ EOM $routput_block_type->[$i_tok] = $block_type; $routput_container_type->[$i_tok] = $container_type; $routput_type_sequence->[$i_tok] = $type_sequence; + $routput_indent_flag->[$i_tok] = $indent_flag; } unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { @@ -22950,7 +24913,96 @@ EOM { # scan the list of pre-tokens indexes # self-checking for valid token types - my $type = $routput_token_type->[$i]; + my $type = $routput_token_type->[$i]; + my $forced_indentation_flag = $routput_indent_flag->[$i]; + + # See if we should undo the $forced_indentation_flag. + # Forced indentation after 'if', 'unless', 'while' and 'until' + # expressions without trailing parens is optional and doesn't + # always look good. It is usually okay for a trailing logical + # expression, but if the expression is a function call, code block, + # or some kind of list it puts in an unwanted extra indentation + # level which is hard to remove. + # + # Example where extra indentation looks ok: + # return 1 + # if $det_a < 0 and $det_b > 0 + # or $det_a > 0 and $det_b < 0; + # + # Example where extra indentation is not needed because + # the eval brace also provides indentation: + # print "not " if defined eval { + # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4; + # }; + # + # The following rule works fairly well: + # Undo the flag if the end of this line, or start of the next + # line, is an opening container token or a comma. + # This almost always works, but if not after another pass it will + # be stable. + if ( $forced_indentation_flag && $type eq 'k' ) { + my $ixlast = -1; + my $ilast = $routput_token_list->[$ixlast]; + my $toklast = $routput_token_type->[$ilast]; + if ( $toklast eq '#' ) { + $ixlast--; + $ilast = $routput_token_list->[$ixlast]; + $toklast = $routput_token_type->[$ilast]; + } + if ( $toklast eq 'b' ) { + $ixlast--; + $ilast = $routput_token_list->[$ixlast]; + $toklast = $routput_token_type->[$ilast]; + } + if ( $toklast =~ /^[\{,]$/ ) { + $forced_indentation_flag = 0; + } + else { + ( $toklast, my $i_next ) = + find_next_nonblank_token( $max_token_index, $rtokens, + $max_token_index ); + if ( $toklast =~ /^[\{,]$/ ) { + $forced_indentation_flag = 0; + } + } + } + + # if we are already in an indented if, see if we should outdent + if ($indented_if_level) { + + # don't try to nest trailing if's - shouldn't happen + if ( $type eq 'k' ) { + $forced_indentation_flag = 0; + } + + # check for the normal case - outdenting at next ';' + elsif ( $type eq ';' ) { + if ( $level_in_tokenizer == $indented_if_level ) { + $forced_indentation_flag = -1; + $indented_if_level = 0; + } + } + + # handle case of missing semicolon + elsif ( $type eq '}' ) { + if ( $level_in_tokenizer == $indented_if_level ) { + $indented_if_level = 0; + + # TBD: This could be a subroutine call + $level_in_tokenizer--; + if ( @{$rslevel_stack} > 1 ) { + pop( @{$rslevel_stack} ); + } + if ( length($nesting_block_string) > 1 ) + { # true for valid script + chop $nesting_block_string; + chop $nesting_list_string; + } + + } + } + } + my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken $level_i = $level_in_tokenizer; @@ -22985,7 +25037,8 @@ EOM # Note: these are set so that the leading braces have a HIGHER # level than their CONTENTS, which is convenient for indentation # Also, define continuation indentation for each token. - if ( $type eq '{' || $type eq 'L' ) { + if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 ) + { # use environment before updating $container_environment = @@ -23052,13 +25105,36 @@ EOM push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer ); $level_in_tokenizer++; - if ( $routput_block_type->[$i] ) { - $nesting_block_flag = 1; - $nesting_block_string .= '1'; + if ($forced_indentation_flag) { + + # break BEFORE '?' when there is forced indentation + if ( $type eq '?' ) { $level_i = $level_in_tokenizer; } + if ( $type eq 'k' ) { + $indented_if_level = $level_in_tokenizer; + } + + # do not change container environement here if we are not + # at a real list. Adding this check prevents "blinkers" + # often near 'unless" clauses, such as in the following + # code: +## next +## unless -e ( +## $archive = +## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" ) +## ); + + $nesting_block_string .= "$nesting_block_flag"; } else { - $nesting_block_flag = 0; - $nesting_block_string .= '0'; + + if ( $routput_block_type->[$i] ) { + $nesting_block_flag = 1; + $nesting_block_string .= '1'; + } + else { + $nesting_block_flag = 0; + $nesting_block_string .= '0'; + } } # we will use continuation indentation within containers @@ -23110,6 +25186,7 @@ EOM if ( !$routput_block_type->[$i] # patch: skip for BLOCK && ($in_statement_continuation) + && !( $forced_indentation_flag && $type eq ':' ) ) { $total_ci += $in_statement_continuation @@ -23120,7 +25197,10 @@ EOM $in_statement_continuation = 0; } - elsif ( $type eq '}' || $type eq 'R' ) { + elsif ($type eq '}' + || $type eq 'R' + || $forced_indentation_flag < 0 ) + { # only a nesting error in the script would prevent popping here if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); } @@ -23159,10 +25239,11 @@ EOM # ...and include all block types except user subs with # block prototypes and these: (sort|grep|map|do|eval) -# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ +# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ elsif ( $is_zero_continuation_block_type{ - $routput_block_type->[$i] } ) + $routput_block_type->[$i] + } ) { $in_statement_continuation = 0; } @@ -23171,7 +25252,8 @@ EOM # /^(sort|grep|map|do|eval)$/ ) elsif ( $is_not_zero_continuation_block_type{ - $routput_block_type->[$i] } ) + $routput_block_type->[$i] + } ) { } @@ -23201,6 +25283,8 @@ EOM $in_statement_continuation = 1 if $routput_container_type->[$i] =~ /^[;,\{\}]$/; } + + elsif ( $tok eq ';' ) { $in_statement_continuation = 0 } } # use environment after updating @@ -23552,7 +25636,7 @@ sub operator_expected { # patch for dor.t (defined or). if ( $tok eq '/' - && $next_type eq '/' + && $next_type eq '/' && $last_nonblank_token eq ']' ) { $op_expected = OPERATOR; @@ -23686,14 +25770,27 @@ sub code_block_type { # otherwise, look at previous token. This must be a code block if # it follows any of these: -# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ +# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ elsif ( $is_code_block_token{$last_nonblank_token} ) { - return $last_nonblank_token; + + # Bug Patch: Note that the opening brace after the 'if' in the following + # snippet is an anonymous hash ref and not a code block! + # print 'hi' if { x => 1, }->{x}; + # We can identify this situation because the last nonblank type + # will be a keyword (instead of a closing peren) + if ( $last_nonblank_token =~ /^(if|unless)$/ + && $last_nonblank_type eq 'k' ) + { + return ""; + } + else { + return $last_nonblank_token; + } } # or a sub definition elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) - && $last_nonblank_token =~ /^sub\b/ ) + && $last_nonblank_token =~ /^(sub|package)\b/ ) { return $last_nonblank_token; } @@ -23937,57 +26034,85 @@ sub is_non_structural_brace { # way. sub increase_nesting_depth { - my ( $a, $pos ) = @_; + my ( $aa, $pos ) = @_; # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, - # @current_sequence_number, @depth_array, @starting_line_of_current_depth - my $b; - $current_depth[$a]++; + # @current_sequence_number, @depth_array, @starting_line_of_current_depth, + # $statement_type + my $bb; + $current_depth[$aa]++; + $total_depth++; + $total_depth[$aa][ $current_depth[$aa] ] = $total_depth; my $input_line_number = $tokenizer_self->{_last_line_number}; my $input_line = $tokenizer_self->{_line_text}; # Sequence numbers increment by number of items. This keeps # a unique set of numbers but still allows the relative location # of any type to be determined. - $nesting_sequence_number[$a] += scalar(@closing_brace_names); - my $seqno = $nesting_sequence_number[$a]; - $current_sequence_number[$a][ $current_depth[$a] ] = $seqno; + $nesting_sequence_number[$aa] += scalar(@closing_brace_names); + my $seqno = $nesting_sequence_number[$aa]; + $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno; - $starting_line_of_current_depth[$a][ $current_depth[$a] ] = + $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] = [ $input_line_number, $input_line, $pos ]; - for $b ( 0 .. $#closing_brace_names ) { - next if ( $b == $a ); - $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; + for $bb ( 0 .. $#closing_brace_names ) { + next if ( $bb == $aa ); + $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb]; + } + + # set a flag for indenting a nested ternary statement + my $indent = 0; + if ( $aa == QUESTION_COLON ) { + $nested_ternary_flag[ $current_depth[$aa] ] = 0; + if ( $current_depth[$aa] > 1 ) { + if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) { + my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ]; + if ( $pdepth == $total_depth - 1 ) { + $indent = 1; + $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1; + } + } + } } - return $seqno; + $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type; + $statement_type = ""; + return ( $seqno, $indent ); } sub decrease_nesting_depth { - my ( $a, $pos ) = @_; + my ( $aa, $pos ) = @_; # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, # @current_sequence_number, @depth_array, @starting_line_of_current_depth - my $b; + # $statement_type + my $bb; my $seqno = 0; my $input_line_number = $tokenizer_self->{_last_line_number}; my $input_line = $tokenizer_self->{_line_text}; - if ( $current_depth[$a] > 0 ) { + my $outdent = 0; + $total_depth--; + if ( $current_depth[$aa] > 0 ) { - $seqno = $current_sequence_number[$a][ $current_depth[$a] ]; + # set a flag for un-indenting after seeing a nested ternary statement + $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ]; + if ( $aa == QUESTION_COLON ) { + $outdent = $nested_ternary_flag[ $current_depth[$aa] ]; + } + $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ]; - # check that any brace types $b contained within are balanced - for $b ( 0 .. $#closing_brace_names ) { - next if ( $b == $a ); + # check that any brace types $bb contained within are balanced + for $bb ( 0 .. $#closing_brace_names ) { + next if ( $bb == $aa ); - unless ( $depth_array[$a][$b][ $current_depth[$a] ] == - $current_depth[$b] ) + unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] == + $current_depth[$bb] ) { my $diff = - $current_depth[$b] - - $depth_array[$a][$b][ $current_depth[$a] ]; + $current_depth[$bb] - + $depth_array[$aa][$bb][ $current_depth[$aa] ]; # don't whine too many times my $saw_brace_error = get_saw_brace_error(); @@ -24001,7 +26126,8 @@ sub decrease_nesting_depth { { interrupt_logfile(); my $rsl = - $starting_line_of_current_depth[$a][ $current_depth[$a] ]; + $starting_line_of_current_depth[$aa] + [ $current_depth[$aa] ]; my $sl = $$rsl[0]; my $rel = [ $input_line_number, $input_line, $pos ]; my $el = $$rel[0]; @@ -24015,17 +26141,17 @@ sub decrease_nesting_depth { } my $bname = ( $diff > 0 ) - ? $opening_brace_names[$b] - : $closing_brace_names[$b]; + ? $opening_brace_names[$bb] + : $closing_brace_names[$bb]; write_error_indicator_pair( @$rsl, '^' ); my $msg = <<"EOM"; -Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el +Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el EOM if ( $diff > 0 ) { my $rml = - $starting_line_of_current_depth[$b] - [ $current_depth[$b] ]; + $starting_line_of_current_depth[$bb] + [ $current_depth[$bb] ]; my $ml = $$rml[0]; $msg .= " The most recent un-matched $bname is on line $ml\n"; @@ -24038,35 +26164,36 @@ EOM increment_brace_error(); } } - $current_depth[$a]--; + $current_depth[$aa]--; } else { my $saw_brace_error = get_saw_brace_error(); if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { my $msg = <<"EOM"; -There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number +There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number EOM indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); } increment_brace_error(); } - return $seqno; + return ( $seqno, $outdent ); } sub check_final_nesting_depths { - my ($a); + my ($aa); # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth - for $a ( 0 .. $#closing_brace_names ) { + for $aa ( 0 .. $#closing_brace_names ) { - if ( $current_depth[$a] ) { - my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ]; + if ( $current_depth[$aa] ) { + my $rsl = + $starting_line_of_current_depth[$aa][ $current_depth[$aa] ]; my $sl = $$rsl[0]; my $msg = <<"EOM"; -Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a] -The most recent un-matched $opening_brace_names[$a] is on line $sl +Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa] +The most recent un-matched $opening_brace_names[$aa] is on line $sl EOM indicate_error( $msg, @$rsl, '^' ); increment_brace_error(); @@ -24493,19 +26620,8 @@ sub scan_bare_identifier_do { # doesn't get in the way of good scripts. # Complain if a filehandle has any lower case - # letters. This is suggested good practice, but the - # main reason for this warning is that prior to - # release 20010328, perltidy incorrectly parsed a - # function call after a print/printf, with the - # result that a space got added before the opening - # paren, thereby converting the function name to a - # filehandle according to perl's weird rules. This - # will not usually generate a syntax error, so this - # is a potentially serious bug. By warning - # of filehandles with any lower case letters, - # followed by opening parens, we will help the user - # find almost all of these older errors. - # use 'sub_name' because something like + # letters. This is suggested good practice. + # Use 'sub_name' because something like # main::MYHANDLE is ok for filehandle if ( $sub_name =~ /[a-z]/ ) { @@ -24730,7 +26846,7 @@ sub do_scan_package { # check for error my ( $next_nonblank_token, $i_next ) = find_next_nonblank_token( $i, $rtokens, $max_token_index ); - if ( $next_nonblank_token !~ /^[;\}]$/ ) { + if ( $next_nonblank_token !~ /^[;\{\}]$/ ) { warning( "Unexpected '$next_nonblank_token' after package name '$tok'\n" ); @@ -24755,7 +26871,9 @@ sub scan_identifier_do { # USES GLOBAL VARIABLES: $context, $last_nonblank_token, # $last_nonblank_type - my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index ) = @_; + my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, + $expecting ) + = @_; my $i_begin = $i; my $type = ''; my $tok_begin = $$rtokens[$i_begin]; @@ -25035,7 +27153,18 @@ sub scan_identifier_do { # punctuation variable? # testfile: cunningham4.pl - if ( $identifier eq '&' ) { + # + # We have to be careful here. If we are in an unknown state, + # we will reject the punctuation variable. In the following + # example the '&' is a binary opeator but we are in an unknown + # state because there is no sigil on 'Prima', so we don't + # know what it is. But it is a bad guess that + # '&~' is a punction variable. + # $self->{text}->{colorMap}->[ + # Prima::PodView::COLOR_CODE_FOREGROUND + # & ~tb::COLOR_INDEX ] = + # $sec->{ColorCode} + if ( $identifier eq '&' && $expecting ) { $identifier .= $tok; } else { @@ -25474,7 +27603,7 @@ sub pattern_expected { # -1 - no my ( $i, $rtokens, $max_token_index ) = @_; my $next_token = $$rtokens[ $i + 1 ]; - if ( $next_token =~ /^[cgimosx]/ ) { $i++; } # skip possible modifier + if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier my ( $next_nonblank_token, $i_next ) = find_next_nonblank_token( $i, $rtokens, $max_token_index ); @@ -26417,7 +28546,8 @@ BEGIN { # These tokens may precede a code block # patched for SWITCH/CASE - @_ = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else + @_ = + qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else unless do while until eval for foreach map grep sort switch case given when); @is_code_block_token{@_} = (1) x scalar(@_); @@ -26442,6 +28572,7 @@ BEGIN { LE LT NE + UNITCHECK abs accept alarm @@ -26450,6 +28581,7 @@ BEGIN { bind binmode bless + break caller chdir chmod @@ -26837,6 +28969,8 @@ BEGIN { vec warn while + given + when ); @is_keyword_taking_list{@keyword_taking_list} = (1) x scalar(@keyword_taking_list); @@ -26849,7 +28983,7 @@ BEGIN { # __PACKAGE__ # ); - # The list of keywords was extracted from function 'keyword' in + # The list of keywords was originally extracted from function 'keyword' in # perl file toke.c version 5.005.03, using this utility, plus a # little editing: (file getkwd.pl): # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } @@ -26883,6 +29017,8 @@ Perl::Tidy - Parses and beautifies perl source formatter => $formatter, # callback object (see below) dump_options => $dump_options, dump_options_type => $dump_options_type, + prefilter => $prefilter_coderef, + postfilter => $postfilter_coderef, ); =head1 DESCRIPTION @@ -26898,7 +29034,7 @@ For example, the perltidy script is basically just this: Perl::Tidy::perltidy(); The module accepts input and output streams by a variety of methods. -The following list of parameters may be any of a the following: a +The following list of parameters may be any of the following: a filename, an ARRAY reference, a SCALAR reference, or an object with either a B or B method, as appropriate. @@ -26931,8 +29067,9 @@ close method will be called at the end of the stream. =item source -If the B parameter is given, it defines the source of the -input stream. +If the B parameter is given, it defines the source of the input stream. +If an input stream is defined with the B parameter then no other source +filenames may be specified in the @ARGV array or B parameter. =item destination @@ -26941,8 +29078,10 @@ file or memory location to receive output of perltidy. =item stderr -The B parameter allows the calling program to capture the output -to what would otherwise go to the standard error output device. +The B parameter allows the calling program to redirect to a file the +output of what would otherwise go to the standard error output device. Unlike +many other parameters, $stderr must be a file or file handle; it may not be a +reference to a SCALAR or ARRAY. =item perltidyrc @@ -27000,9 +29139,41 @@ If the B parameter is given, it must be the reference to a hash. This hash will receive all abbreviations used by Perl::Tidy. See the demo program F for example usage. +=item prefilter + +A code reference that will be applied to the source before tidying. It is +expected to take the full content as a string in its input, and output the +transformed content. + +=item postfilter + +A code reference that will be applied to the tidied result before outputting. +It is expected to take the full content as a string in its input, and output +the transformed content. + +Note: A convenient way to check the function of your custom prefilter and +postfilter code is to use the --notidy option, first with just the prefilter +and then with both the prefilter and postfilter. See also the file +B in the perltidy distribution. + =back -=head1 EXAMPLE +=head1 NOTES ON FORMATTING PARAMETERS + +Parameters which control formatting may be passed in several ways: in a +F<.perltidyrc> configuration file, in the B parameter, and in the +B parameter. + +The B<-syn> (B<--check-syntax>) flag may be used with all source and +destination streams except for standard input and output. However +data streams which are not associated with a filename will +be copied to a temporary file before being be passed to Perl. This +use of temporary files can cause somewhat confusing output from Perl. + +=head1 EXAMPLES + +The perltidy script itself is a simple example, and several +examples are given in the perltidy distribution. The following example passes perltidy a snippet as a reference to a string and receives the result back in a reference to @@ -27175,7 +29346,14 @@ to perltidy. =head1 VERSION -This man page documents Perl::Tidy version 20070424. +This man page documents Perl::Tidy version 20120701. + +=head1 LICENSE + +This package is free software; you can redistribute it and/or modify it +under the terms of the "GNU General Public License". + +Please refer to the file "COPYING" for details. =head1 AUTHOR