2 ############################################################
4 # perltidy - a perl script indenter and formatter
6 # Copyright (c) 2000-2014 by Steve Hancock
7 # Distributed under the GPL license agreement; see file COPYING
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License along
20 # with this program; if not, write to the Free Software Foundation, Inc.,
21 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 # For brief instructions, try 'perltidy -h'.
24 # For more complete documentation, try 'man perltidy'
25 # or visit http://perltidy.sourceforge.net
27 # This script is an example of the default style. It was formatted with:
31 # Code Contributions: See ChangeLog.html for a complete history.
32 # Michael Cartmell supplied code for adaptation to VMS and helped with
34 # Hugh S. Myers supplied sub streamhandle and the supporting code to
35 # create a Perl::Tidy module which can operate on strings, arrays, etc.
36 # Yves Orton supplied coding to help detect Windows versions.
37 # Axel Rose supplied a patch for MacPerl.
38 # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
39 # Dan Tyrell contributed a patch for binary I/O.
40 # Ueli Hugenschmidt contributed a patch for -fpsc
41 # Sam Kington supplied a patch to identify the initial indentation of
43 # jonathan swartz supplied patches for:
44 # * .../ pattern, which looks upwards from directory
45 # * --notidy, to be used in directories where we want to avoid
46 # accidentally tidying
47 # * prefilter and postfilter
50 # Many others have supplied key ideas, suggestions, and bug reports;
51 # see the CHANGES file.
53 ############################################################
56 use 5.004; # need IO::File from 5.004 or later
57 BEGIN { $^W = 1; } # turn on warnings
72 @ISA = qw( Exporter );
73 @EXPORT = qw( &perltidy );
79 use File::Temp qw(tempfile);
82 ( $VERSION = q($Id: Tidy.pm,v 1.74 2014/03/28 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
87 # given filename and mode (r or w), create an object which:
88 # has a 'getline' method if mode='r', and
89 # has a 'print' method if mode='w'.
90 # The objects also need a 'close' method.
92 # How the object is made:
94 # if $filename is: Make object using:
95 # ---------------- -----------------
96 # '-' (STDIN if mode = 'r', STDOUT if mode='w')
98 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
99 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar)
101 # (check for 'print' method for 'w' mode)
102 # (check for 'getline' method for 'r' mode)
103 my $ref = ref( my $filename = shift );
110 if ( $ref eq 'ARRAY' ) {
111 $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
113 elsif ( $ref eq 'SCALAR' ) {
114 $New = sub { Perl::Tidy::IOScalar->new(@_) };
118 # Accept an object with a getline method for reading. Note:
119 # IO::File is built-in and does not respond to the defined
120 # operator. If this causes trouble, the check can be
121 # skipped and we can just let it crash if there is no
123 if ( $mode =~ /[rR]/ ) {
124 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
125 $New = sub { $filename };
128 $New = sub { undef };
130 ------------------------------------------------------------------------
131 No 'getline' method is defined for object of class $ref
132 Please check your call to Perl::Tidy::perltidy. Trace follows.
133 ------------------------------------------------------------------------
138 # Accept an object with a print method for writing.
139 # See note above about IO::File
140 if ( $mode =~ /[wW]/ ) {
141 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
142 $New = sub { $filename };
145 $New = sub { undef };
147 ------------------------------------------------------------------------
148 No 'print' method is defined for object of class $ref
149 Please check your call to Perl::Tidy::perltidy. Trace follows.
150 ------------------------------------------------------------------------
159 if ( $filename eq '-' ) {
160 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
163 $New = sub { IO::File->new(@_) };
166 $fh = $New->( $filename, $mode )
167 or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
168 return $fh, ( $ref or $filename );
171 sub find_input_line_ending {
173 # Peek at a file and return first line ending character.
174 # Quietly return undef in case of any trouble.
175 my ($input_file) = @_;
178 # silently ignore input from object or stdin
179 if ( ref($input_file) || $input_file eq '-' ) {
182 open( INFILE, $input_file ) || return $ending;
186 read( INFILE, $buf, 1024 );
188 if ( $buf && $buf =~ /([\012\015]+)/ ) {
192 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
195 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
198 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
212 # concatenate a path and file basename
213 # returns undef in case of error
215 BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
217 # use File::Spec if we can
218 unless ($missing_file_spec) {
219 return File::Spec->catfile(@_);
222 # Perl 5.004 systems may not have File::Spec so we'll make
223 # a simple try. We assume File::Basename is available.
224 # return undef if not successful.
226 my $path = join '/', @_;
227 my $test_file = $path . $name;
228 my ( $test_name, $test_path ) = fileparse($test_file);
229 return $test_file if ( $test_name eq $name );
230 return undef if ( $^O eq 'VMS' );
232 # this should work at least for Windows and Unix:
233 $test_file = $path . '/' . $name;
234 ( $test_name, $test_path ) = fileparse($test_file);
235 return $test_file if ( $test_name eq $name );
239 # Here is a map of the flow of data from the input source to the output
242 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
243 # input groups output
244 # lines tokens lines of lines lines
247 # The names correspond to the package names responsible for the unit processes.
249 # The overall process is controlled by the "main" package.
251 # LineSource is the stream of input lines
253 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
254 # if necessary. A token is any section of the input line which should be
255 # manipulated as a single entity during formatting. For example, a single
256 # ',' character is a token, and so is an entire side comment. It handles
257 # the complexities of Perl syntax, such as distinguishing between '<<' as
258 # a shift operator and as a here-document, or distinguishing between '/'
259 # as a divide symbol and as a pattern delimiter.
261 # Formatter inserts and deletes whitespace between tokens, and breaks
262 # sequences of tokens at appropriate points as output lines. It bases its
263 # decisions on the default rules as modified by any command-line options.
265 # VerticalAligner collects groups of lines together and tries to line up
266 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
268 # FileWriter simply writes lines to the output stream.
270 # The Logger package, not shown, records significant events and warning
271 # messages. It writes a .LOG file, which may be saved with a
272 # '-log' or a '-g' flag.
278 destination => undef,
285 dump_options => undef,
286 dump_options_type => undef,
287 dump_getopt_flags => undef,
288 dump_options_category => undef,
289 dump_options_range => undef,
290 dump_abbreviations => undef,
295 # don't overwrite callers ARGV
297 local *STDERR = *STDERR;
301 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
303 my @good_keys = sort keys %defaults;
304 @bad_keys = sort @bad_keys;
306 ------------------------------------------------------------------------
307 Unknown perltidy parameter : (@bad_keys)
308 perltidy only understands : (@good_keys)
309 ------------------------------------------------------------------------
314 my $get_hash_ref = sub {
316 my $hash_ref = $input_hash{$key};
317 if ( defined($hash_ref) ) {
318 unless ( ref($hash_ref) eq 'HASH' ) {
319 my $what = ref($hash_ref);
321 $what ? "but is ref to $what" : "but is not a reference";
323 ------------------------------------------------------------------------
324 error in call to perltidy:
325 -$key must be reference to HASH $but_is
326 ------------------------------------------------------------------------
333 %input_hash = ( %defaults, %input_hash );
334 my $argv = $input_hash{'argv'};
335 my $destination_stream = $input_hash{'destination'};
336 my $errorfile_stream = $input_hash{'errorfile'};
337 my $logfile_stream = $input_hash{'logfile'};
338 my $perltidyrc_stream = $input_hash{'perltidyrc'};
339 my $source_stream = $input_hash{'source'};
340 my $stderr_stream = $input_hash{'stderr'};
341 my $user_formatter = $input_hash{'formatter'};
342 my $prefilter = $input_hash{'prefilter'};
343 my $postfilter = $input_hash{'postfilter'};
345 if ($stderr_stream) {
346 ( $fh_stderr, my $stderr_file ) =
347 Perl::Tidy::streamhandle( $stderr_stream, 'w' );
350 ------------------------------------------------------------------------
351 Unable to redirect STDERR to $stderr_stream
352 Please check value of -stderr in call to perltidy
353 ------------------------------------------------------------------------
358 $fh_stderr = *STDERR;
361 sub Warn ($) { $fh_stderr->print( $_[0] ); }
364 if ( $_[0] ) { goto ERROR_EXIT }
365 else { goto NORMAL_EXIT }
368 sub Die ($) { Warn $_[0]; Exit(1); }
370 # extract various dump parameters
371 my $dump_options_type = $input_hash{'dump_options_type'};
372 my $dump_options = $get_hash_ref->('dump_options');
373 my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
374 my $dump_options_category = $get_hash_ref->('dump_options_category');
375 my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
376 my $dump_options_range = $get_hash_ref->('dump_options_range');
378 # validate dump_options_type
379 if ( defined($dump_options) ) {
380 unless ( defined($dump_options_type) ) {
381 $dump_options_type = 'perltidyrc';
383 unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
385 ------------------------------------------------------------------------
386 Please check value of -dump_options_type in call to perltidy;
387 saw: '$dump_options_type'
388 expecting: 'perltidyrc' or 'full'
389 ------------------------------------------------------------------------
395 $dump_options_type = "";
398 if ($user_formatter) {
400 # if the user defines a formatter, there is no output stream,
401 # but we need a null stream to keep coding simple
402 $destination_stream = Perl::Tidy::DevNull->new();
405 # see if ARGV is overridden
406 if ( defined($argv) ) {
408 my $rargv = ref $argv;
409 if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
413 if ( $rargv eq 'ARRAY' ) {
418 ------------------------------------------------------------------------
419 Please check value of -argv in call to perltidy;
420 it must be a string or ref to ARRAY but is: $rargv
421 ------------------------------------------------------------------------
428 my ( $rargv, $msg ) = parse_args($argv);
431 Error parsing this string passed to to perltidy with 'argv':
439 my $rpending_complaint;
440 $$rpending_complaint = "";
441 my $rpending_logfile_message;
442 $$rpending_logfile_message = "";
444 my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint);
446 # VMS file names are restricted to a 40.40 format, so we append _tdy
447 # instead of .tdy, etc. (but see also sub check_vms_filename)
450 if ( $^O eq 'VMS' ) {
456 $dot_pattern = '\.'; # must escape for use in regex
459 #---------------------------------------------------------------
460 # get command line options
461 #---------------------------------------------------------------
463 $rOpts, $config_file, $rraw_options,
464 $saw_extrude, $saw_pbp, $roption_string,
465 $rexpansion, $roption_category, $roption_range
467 = process_command_line(
468 $perltidyrc_stream, $is_Windows, $Windows_type,
469 $rpending_complaint, $dump_options_type,
472 #---------------------------------------------------------------
473 # Handle requests to dump information
474 #---------------------------------------------------------------
476 # return or exit immediately after all dumps
479 # Getopt parameters and their flags
480 if ( defined($dump_getopt_flags) ) {
482 foreach my $op ( @{$roption_string} ) {
491 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
495 $dump_getopt_flags->{$opt} = $flag;
499 if ( defined($dump_options_category) ) {
501 %{$dump_options_category} = %{$roption_category};
504 if ( defined($dump_options_range) ) {
506 %{$dump_options_range} = %{$roption_range};
509 if ( defined($dump_abbreviations) ) {
511 %{$dump_abbreviations} = %{$rexpansion};
514 if ( defined($dump_options) ) {
516 %{$dump_options} = %{$rOpts};
519 Exit 0 if ($quit_now);
521 # make printable string of options for this run as possible diagnostic
522 my $readable_options = readable_options( $rOpts, $roption_string );
524 # dump from command line
525 if ( $rOpts->{'dump-options'} ) {
526 print STDOUT $readable_options;
530 #---------------------------------------------------------------
531 # check parameters and their interactions
532 #---------------------------------------------------------------
534 check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint );
536 if ($user_formatter) {
537 $rOpts->{'format'} = 'user';
540 # there must be one entry here for every possible format
541 my %default_file_extension = (
547 # be sure we have a valid output format
548 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
549 my $formats = join ' ',
550 sort map { "'" . $_ . "'" } keys %default_file_extension;
551 my $fmt = $rOpts->{'format'};
552 Die "-format='$fmt' but must be one of: $formats\n";
555 my $output_extension = make_extension( $rOpts->{'output-file-extension'},
556 $default_file_extension{ $rOpts->{'format'} }, $dot );
558 # If the backup extension contains a / character then the backup should
559 # be deleted when the -b option is used. On older versions of
560 # perltidy this will generate an error message due to an illegal
563 # A backup file will still be generated but will be deleted
564 # at the end. If -bext='/' then this extension will be
565 # the default 'bak'. Otherwise it will be whatever characters
566 # remains after all '/' characters are removed. For example:
567 # -bext extension slashes
571 # '/dev/null' devnull 2 (Currently not allowed)
572 my $bext = $rOpts->{'backup-file-extension'};
573 my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g );
575 # At present only one forward slash is allowed. In the future multiple
576 # slashes may be allowed to allow for other options
577 if ( $delete_backup > 1 ) {
578 Die "-bext=$bext contains more than one '/'\n";
581 my $backup_extension =
582 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
584 my $html_toc_extension =
585 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
587 my $html_src_extension =
588 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
590 # check for -b option;
591 # silently ignore unless beautify mode
592 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
593 && $rOpts->{'format'} eq 'tidy';
595 # Turn off -b with warnings in case of conflicts with other options.
596 # NOTE: Do this silently, without warnings, if there is a source or
597 # destination stream, or standard output is used. This is because the -b
598 # flag may have been in a .perltidyrc file and warnings break
599 # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014.
600 if ($in_place_modify) {
601 if ( $rOpts->{'standard-output'} ) {
602 ## my $msg = "Ignoring -b; you may not use -b and -st together";
603 ## $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
605 $in_place_modify = 0;
607 if ($destination_stream) {
608 ##Warn "Ignoring -b; you may not specify a destination stream and -b together\n";
609 $in_place_modify = 0;
611 if ( ref($source_stream) ) {
612 ##Warn "Ignoring -b; you may not specify a source array and -b together\n";
613 $in_place_modify = 0;
615 if ( $rOpts->{'outfile'} ) {
616 ##Warn "Ignoring -b; you may not use -b and -o together\n";
617 $in_place_modify = 0;
619 if ( defined( $rOpts->{'output-path'} ) ) {
620 ##Warn "Ignoring -b; you may not use -b and -opath together\n";
621 $in_place_modify = 0;
625 Perl::Tidy::Formatter::check_options($rOpts);
626 if ( $rOpts->{'format'} eq 'html' ) {
627 Perl::Tidy::HtmlWriter->check_options($rOpts);
630 # make the pattern of file extensions that we shouldn't touch
631 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
632 if ($output_extension) {
633 my $ext = quotemeta($output_extension);
634 $forbidden_file_extensions .= "|$ext";
636 if ( $in_place_modify && $backup_extension ) {
637 my $ext = quotemeta($backup_extension);
638 $forbidden_file_extensions .= "|$ext";
640 $forbidden_file_extensions .= ')$';
642 # Create a diagnostics object if requested;
643 # This is only useful for code development
644 my $diagnostics_object = undef;
645 if ( $rOpts->{'DIAGNOSTICS'} ) {
646 $diagnostics_object = Perl::Tidy::Diagnostics->new();
649 # no filenames should be given if input is from an array
650 if ($source_stream) {
653 "You may not specify any filenames when a source array is given\n";
656 # we'll stuff the source array into ARGV
657 unshift( @ARGV, $source_stream );
659 # No special treatment for source stream which is a filename.
660 # This will enable checks for binary files and other bad stuff.
661 $source_stream = undef unless ref($source_stream);
664 # use stdin by default if no source array and no args
666 unshift( @ARGV, '-' ) unless @ARGV;
669 #---------------------------------------------------------------
671 # main loop to process all files in argument list
672 #---------------------------------------------------------------
673 my $number_of_files = @ARGV;
674 my $formatter = undef;
675 my $tokenizer = undef;
676 while ( my $input_file = shift @ARGV ) {
678 my $input_file_permissions;
680 #---------------------------------------------------------------
681 # prepare this input stream
682 #---------------------------------------------------------------
683 if ($source_stream) {
684 $fileroot = "perltidy";
686 # If the source is from an array or string, then .LOG output
687 # is only possible if a logfile stream is specified. This prevents
688 # unexpected perltidy.LOG files.
689 if ( !defined($logfile_stream) ) {
690 $logfile_stream = Perl::Tidy::DevNull->new();
693 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
694 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
695 $in_place_modify = 0;
698 $fileroot = $input_file;
699 unless ( -e $input_file ) {
701 # file doesn't exist - check for a file glob
702 if ( $input_file =~ /([\?\*\[\{])/ ) {
704 # Windows shell may not remove quotes, so do it
705 my $input_file = $input_file;
706 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
707 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
708 my $pattern = fileglob_to_re($input_file);
710 if ( !$@ && opendir( DIR, './' ) ) {
712 grep { /$pattern/ && !-d $_ } readdir(DIR);
715 unshift @ARGV, @files;
720 Warn "skipping file: '$input_file': no matches found\n";
724 unless ( -f $input_file ) {
725 Warn "skipping file: $input_file: not a regular file\n";
729 # As a safety precaution, skip zero length files.
730 # If for example a source file got clobbered somehow,
731 # the old .tdy or .bak files might still exist so we
732 # shouldn't overwrite them with zero length files.
733 unless ( -s $input_file ) {
734 Warn "skipping file: $input_file: Zero size\n";
738 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
740 "skipping file: $input_file: Non-text (override with -f)\n";
744 # we should have a valid filename now
745 $fileroot = $input_file;
746 $input_file_permissions = ( stat $input_file )[2] & 07777;
748 if ( $^O eq 'VMS' ) {
749 ( $fileroot, $dot ) = check_vms_filename($fileroot);
752 # add option to change path here
753 if ( defined( $rOpts->{'output-path'} ) ) {
755 my ( $base, $old_path ) = fileparse($fileroot);
756 my $new_path = $rOpts->{'output-path'};
757 unless ( -d $new_path ) {
758 unless ( mkdir $new_path, 0777 ) {
759 Die "unable to create directory $new_path: $!\n";
762 my $path = $new_path;
763 $fileroot = catfile( $path, $base );
766 ------------------------------------------------------------------------
767 Problem combining $new_path and $base to make a filename; check -opath
768 ------------------------------------------------------------------------
774 # Skip files with same extension as the output files because
775 # this can lead to a messy situation with files like
776 # script.tdy.tdy.tdy ... or worse problems ... when you
777 # rerun perltidy over and over with wildcard input.
780 && ( $input_file =~ /$forbidden_file_extensions/o
781 || $input_file eq 'DIAGNOSTICS' )
784 Warn "skipping file: $input_file: wrong extension\n";
788 # the 'source_object' supplies a method to read the input file
790 Perl::Tidy::LineSource->new( $input_file, $rOpts,
791 $rpending_logfile_message );
792 next unless ($source_object);
794 # Prefilters and postfilters: The prefilter is a code reference
795 # that will be applied to the source before tidying, and the
796 # postfilter is a code reference to the result before outputting.
799 while ( my $line = $source_object->get_line() ) {
802 $buf = $prefilter->($buf);
804 $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
805 $rpending_logfile_message );
808 # register this file name with the Diagnostics package
809 $diagnostics_object->set_input_file($input_file)
810 if $diagnostics_object;
812 #---------------------------------------------------------------
813 # prepare the output stream
814 #---------------------------------------------------------------
815 my $output_file = undef;
816 my $actual_output_extension;
818 if ( $rOpts->{'outfile'} ) {
820 if ( $number_of_files <= 1 ) {
822 if ( $rOpts->{'standard-output'} ) {
823 my $msg = "You may not use -o and -st together";
824 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
827 elsif ($destination_stream) {
829 "You may not specify a destination array and -o together\n";
831 elsif ( defined( $rOpts->{'output-path'} ) ) {
832 Die "You may not specify -o and -opath together\n";
834 elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
835 Die "You may not specify -o and -oext together\n";
837 $output_file = $rOpts->{outfile};
839 # make sure user gives a file name after -o
840 if ( $output_file =~ /^-/ ) {
841 Die "You must specify a valid filename after -o\n";
844 # do not overwrite input file with -o
845 if ( defined($input_file_permissions)
846 && ( $output_file eq $input_file ) )
848 Die "Use 'perltidy -b $input_file' to modify in-place\n";
852 Die "You may not use -o with more than one input file\n";
855 elsif ( $rOpts->{'standard-output'} ) {
856 if ($destination_stream) {
858 "You may not specify a destination array and -st together\n";
859 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
864 if ( $number_of_files <= 1 ) {
867 Die "You may not use -st with more than one input file\n";
870 elsif ($destination_stream) {
871 $output_file = $destination_stream;
873 elsif ($source_stream) { # source but no destination goes to stdout
876 elsif ( $input_file eq '-' ) {
880 if ($in_place_modify) {
881 $output_file = IO::File->new_tmpfile()
882 or Die "cannot open temp file for -b option: $!\n";
885 $actual_output_extension = $output_extension;
886 $output_file = $fileroot . $output_extension;
890 # the 'sink_object' knows how to write the output file
891 my $tee_file = $fileroot . $dot . "TEE";
893 my $line_separator = $rOpts->{'output-line-ending'};
894 if ( $rOpts->{'preserve-line-endings'} ) {
895 $line_separator = find_input_line_ending($input_file);
898 # Eventually all I/O may be done with binmode, but for now it is
899 # only done when a user requests a particular line separator
900 # through the -ple or -ole flags
902 if ( defined($line_separator) ) { $binmode = 1 }
903 else { $line_separator = "\n" }
905 my ( $sink_object, $postfilter_buffer );
908 Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
909 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
913 Perl::Tidy::LineSink->new( $output_file, $tee_file,
914 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
917 #---------------------------------------------------------------
918 # initialize the error logger for this file
919 #---------------------------------------------------------------
920 my $warning_file = $fileroot . $dot . "ERR";
921 if ($errorfile_stream) { $warning_file = $errorfile_stream }
922 my $log_file = $fileroot . $dot . "LOG";
923 if ($logfile_stream) { $log_file = $logfile_stream }
926 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
927 $fh_stderr, $saw_extrude );
928 write_logfile_header(
929 $rOpts, $logger_object, $config_file,
930 $rraw_options, $Windows_type, $readable_options,
932 if ($$rpending_logfile_message) {
933 $logger_object->write_logfile_entry($$rpending_logfile_message);
935 if ($$rpending_complaint) {
936 $logger_object->complain($$rpending_complaint);
939 #---------------------------------------------------------------
940 # initialize the debug object, if any
941 #---------------------------------------------------------------
942 my $debugger_object = undef;
943 if ( $rOpts->{DEBUG} ) {
945 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
948 #---------------------------------------------------------------
949 # loop over iterations for one source stream
950 #---------------------------------------------------------------
952 # We will do a convergence test if 3 or more iterations are allowed.
953 # It would be pointless for fewer because we have to make at least
954 # two passes before we can see if we are converged, and the test
955 # would just slow things down.
956 my $max_iterations = $rOpts->{'iterations'};
957 my $convergence_log_message;
959 my $do_convergence_test = $max_iterations > 2;
960 if ($do_convergence_test) {
961 eval "use Digest::MD5 qw(md5_hex)";
962 $do_convergence_test = !$@;
964 # Trying to avoid problems with ancient versions of perl because
965 # I don't know in which version number utf8::encode was introduced.
966 eval { my $string = "perltidy"; utf8::encode($string) };
967 $do_convergence_test = $do_convergence_test && !$@;
970 # save objects to allow redirecting output during iterations
971 my $sink_object_final = $sink_object;
972 my $debugger_object_final = $debugger_object;
973 my $logger_object_final = $logger_object;
975 for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
977 # send output stream to temp buffers until last iteration
979 if ( $iter < $max_iterations ) {
981 Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file,
982 $line_separator, $rOpts, $rpending_logfile_message,
986 $sink_object = $sink_object_final;
989 # Save logger, debugger output only on pass 1 because:
990 # (1) line number references must be to the starting
991 # source, not an intermediate result, and
992 # (2) we need to know if there are errors so we can stop the
993 # iterations early if necessary.
995 $debugger_object = undef;
996 $logger_object = undef;
999 #------------------------------------------------------------
1000 # create a formatter for this file : html writer or
1002 #------------------------------------------------------------
1004 # we have to delete any old formatter because, for safety,
1005 # the formatter will check to see that there is only one.
1008 if ($user_formatter) {
1009 $formatter = $user_formatter;
1011 elsif ( $rOpts->{'format'} eq 'html' ) {
1013 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
1014 $actual_output_extension, $html_toc_extension,
1015 $html_src_extension );
1017 elsif ( $rOpts->{'format'} eq 'tidy' ) {
1018 $formatter = Perl::Tidy::Formatter->new(
1019 logger_object => $logger_object,
1020 diagnostics_object => $diagnostics_object,
1021 sink_object => $sink_object,
1025 Die "I don't know how to do -format=$rOpts->{'format'}\n";
1028 unless ($formatter) {
1029 Die "Unable to continue with $rOpts->{'format'} formatting\n";
1032 #---------------------------------------------------------------
1033 # create the tokenizer for this file
1034 #---------------------------------------------------------------
1035 $tokenizer = undef; # must destroy old tokenizer
1036 $tokenizer = Perl::Tidy::Tokenizer->new(
1037 source_object => $source_object,
1038 logger_object => $logger_object,
1039 debugger_object => $debugger_object,
1040 diagnostics_object => $diagnostics_object,
1041 tabsize => $tabsize,
1043 starting_level => $rOpts->{'starting-indentation-level'},
1044 indent_columns => $rOpts->{'indent-columns'},
1045 look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
1046 look_for_autoloader => $rOpts->{'look-for-autoloader'},
1047 look_for_selfloader => $rOpts->{'look-for-selfloader'},
1048 trim_qw => $rOpts->{'trim-qw'},
1050 continuation_indentation =>
1051 $rOpts->{'continuation-indentation'},
1052 outdent_labels => $rOpts->{'outdent-labels'},
1055 #---------------------------------------------------------------
1057 #---------------------------------------------------------------
1058 process_this_file( $tokenizer, $formatter );
1060 #---------------------------------------------------------------
1061 # close the input source and report errors
1062 #---------------------------------------------------------------
1063 $source_object->close_input_file();
1065 # line source for next iteration (if any) comes from the current
1066 # temporary output buffer
1067 if ( $iter < $max_iterations ) {
1069 $sink_object->close_output_file();
1071 Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts,
1072 $rpending_logfile_message );
1074 # stop iterations if errors or converged
1075 my $stop_now = $logger_object->{_warning_count};
1077 $convergence_log_message = <<EOM;
1078 Stopping iterations because of errors.
1081 elsif ($do_convergence_test) {
1083 # Patch for [rt.cpan.org #88020]
1084 # Use utf8::encode since md5_hex() only operates on bytes.
1085 my $digest = md5_hex( utf8::encode($sink_buffer) );
1086 if ( !$saw_md5{$digest} ) {
1087 $saw_md5{$digest} = $iter;
1091 # Deja vu, stop iterating
1093 my $iterm = $iter - 1;
1094 if ( $saw_md5{$digest} != $iterm ) {
1096 # Blinking (oscillating) between two stable
1097 # end states. This has happened in the past
1098 # but at present there are no known instances.
1099 $convergence_log_message = <<EOM;
1100 Blinking. Output for iteration $iter same as for $saw_md5{$digest}.
1102 $diagnostics_object->write_diagnostics(
1103 $convergence_log_message)
1104 if $diagnostics_object;
1107 $convergence_log_message = <<EOM;
1108 Converged. Output for iteration $iter same as for iter $iterm.
1110 $diagnostics_object->write_diagnostics(
1111 $convergence_log_message)
1112 if $diagnostics_object && $iterm > 2;
1115 } ## end if ($do_convergence_test)
1119 # we are stopping the iterations early;
1120 # copy the output stream to its final destination
1121 $sink_object = $sink_object_final;
1122 while ( my $line = $source_object->get_line() ) {
1123 $sink_object->write_line($line);
1125 $source_object->close_input_file();
1128 } ## end if ( $iter < $max_iterations)
1129 } # end loop over iterations for one source file
1131 # restore objects which have been temporarily undefined
1132 # for second and higher iterations
1133 $debugger_object = $debugger_object_final;
1134 $logger_object = $logger_object_final;
1136 $logger_object->write_logfile_entry($convergence_log_message)
1137 if $convergence_log_message;
1139 #---------------------------------------------------------------
1140 # Perform any postfilter operation
1141 #---------------------------------------------------------------
1143 $sink_object->close_output_file();
1145 Perl::Tidy::LineSink->new( $output_file, $tee_file,
1146 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
1147 my $buf = $postfilter->($postfilter_buffer);
1149 Perl::Tidy::LineSource->new( \$buf, $rOpts,
1150 $rpending_logfile_message );
1151 while ( my $line = $source_object->get_line() ) {
1152 $sink_object->write_line($line);
1154 $source_object->close_input_file();
1157 # Save names of the input and output files for syntax check
1158 my $ifname = $input_file;
1159 my $ofname = $output_file;
1161 #---------------------------------------------------------------
1162 # handle the -b option (backup and modify in-place)
1163 #---------------------------------------------------------------
1164 if ($in_place_modify) {
1165 unless ( -f $input_file ) {
1167 # oh, oh, no real file to backup ..
1168 # shouldn't happen because of numerous preliminary checks
1170 "problem with -b backing up input file '$input_file': not a file\n";
1172 my $backup_name = $input_file . $backup_extension;
1173 if ( -f $backup_name ) {
1174 unlink($backup_name)
1176 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1179 # backup the input file
1180 # we use copy for symlinks, move for regular files
1181 if ( -l $input_file ) {
1182 File::Copy::copy( $input_file, $backup_name )
1183 or Die "File::Copy failed trying to backup source: $!";
1186 rename( $input_file, $backup_name )
1188 "problem renaming $input_file to $backup_name for -b option: $!\n";
1190 $ifname = $backup_name;
1192 # copy the output to the original input file
1193 # NOTE: it would be nice to just close $output_file and use
1194 # File::Copy::copy here, but in this case $output_file is the
1195 # handle of an open nameless temporary file so we would lose
1196 # everything if we closed it.
1197 seek( $output_file, 0, 0 )
1198 or Die "unable to rewind a temporary file for -b option: $!\n";
1199 my $fout = IO::File->new("> $input_file")
1201 "problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
1204 while ( $line = $output_file->getline() ) {
1205 $fout->print($line);
1208 $output_file = $input_file;
1209 $ofname = $input_file;
1212 #---------------------------------------------------------------
1213 # clean up and report errors
1214 #---------------------------------------------------------------
1215 $sink_object->close_output_file() if $sink_object;
1216 $debugger_object->close_debug_file() if $debugger_object;
1218 # set output file permissions
1219 if ( $output_file && -f $output_file && !-l $output_file ) {
1220 if ($input_file_permissions) {
1222 # give output script same permissions as input script, but
1223 # make it user-writable or else we can't run perltidy again.
1224 # Thus we retain whatever executable flags were set.
1225 if ( $rOpts->{'format'} eq 'tidy' ) {
1226 chmod( $input_file_permissions | 0600, $output_file );
1229 # else use default permissions for html and any other format
1233 #---------------------------------------------------------------
1234 # Do syntax check if requested and possible
1235 #---------------------------------------------------------------
1236 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
1238 && $rOpts->{'check-syntax'}
1243 check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1246 #---------------------------------------------------------------
1247 # remove the original file for in-place modify as follows:
1248 # $delete_backup=0 never
1249 # $delete_backup=1 only if no errors
1250 # $delete_backup>1 always : NOT ALLOWED, too risky, see above
1251 #---------------------------------------------------------------
1252 if ( $in_place_modify
1255 && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) )
1258 # As an added safety precaution, do not delete the source file
1259 # if its size has dropped from positive to zero, since this
1260 # could indicate a disaster of some kind, including a hardware
1261 # failure. Actually, this could happen if you had a file of
1262 # all comments (or pod) and deleted everything with -dac (-dap)
1264 if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) {
1266 "output file '$output_file' missing or zero length; original '$ifname' not deleted\n"
1272 "unable to remove previous '$ifname' for -b option; check permissions: $!\n";
1276 $logger_object->finish( $infile_syntax_ok, $formatter )
1278 } # end of main loop to process all files
1285 } # end of main program perltidy
1287 sub get_stream_as_named_file {
1289 # Return the name of a file containing a stream of data, creating
1290 # a temporary file if necessary.
1292 # $stream - the name of a file or stream
1294 # $fname = name of file if possible, or undef
1295 # $if_tmpfile = true if temp file, undef if not temp file
1297 # This routine is needed for passing actual files to Perl for
1303 if ( ref($stream) ) {
1304 my ( $fh_stream, $fh_name ) =
1305 Perl::Tidy::streamhandle( $stream, 'r' );
1307 my ( $fout, $tmpnam ) = tempfile();
1312 while ( my $line = $fh_stream->getline() ) {
1313 $fout->print($line);
1317 $fh_stream->close();
1320 elsif ( $stream ne '-' && -f $stream ) {
1324 return ( $fname, $is_tmpfile );
1327 sub fileglob_to_re {
1329 # modified (corrected) from version in find2perl
1331 $x =~ s#([./^\$()])#\\$1#g; # escape special characters
1332 $x =~ s#\*#.*#g; # '*' -> '.*'
1333 $x =~ s#\?#.#g; # '?' -> '.'
1334 "^$x\\z"; # match whole word
1337 sub make_extension {
1339 # Make a file extension, including any leading '.' if necessary
1340 # The '.' may actually be an '_' under VMS
1341 my ( $extension, $default, $dot ) = @_;
1343 # Use the default if none specified
1344 $extension = $default unless ($extension);
1346 # Only extensions with these leading characters get a '.'
1347 # This rule gives the user some freedom
1348 if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1349 $extension = $dot . $extension;
1354 sub write_logfile_header {
1356 $rOpts, $logger_object, $config_file,
1357 $rraw_options, $Windows_type, $readable_options
1359 $logger_object->write_logfile_entry(
1360 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1362 if ($Windows_type) {
1363 $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1365 my $options_string = join( ' ', @$rraw_options );
1368 $logger_object->write_logfile_entry(
1369 "Found Configuration File >>> $config_file \n");
1371 $logger_object->write_logfile_entry(
1372 "Configuration and command line parameters for this run:\n");
1373 $logger_object->write_logfile_entry("$options_string\n");
1375 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1376 $rOpts->{'logfile'} = 1; # force logfile to be saved
1377 $logger_object->write_logfile_entry(
1378 "Final parameter set for this run\n");
1379 $logger_object->write_logfile_entry(
1380 "------------------------------------\n");
1382 $logger_object->write_logfile_entry($readable_options);
1384 $logger_object->write_logfile_entry(
1385 "------------------------------------\n");
1387 $logger_object->write_logfile_entry(
1388 "To find error messages search for 'WARNING' with your editor\n");
1391 sub generate_options {
1393 ######################################################################
1394 # Generate and return references to:
1395 # @option_string - the list of options to be passed to Getopt::Long
1396 # @defaults - the list of default options
1397 # %expansion - a hash showing how all abbreviations are expanded
1398 # %category - a hash giving the general category of each option
1399 # %option_range - a hash giving the valid ranges of certain options
1401 # Note: a few options are not documented in the man page and usage
1402 # message. This is because these are experimental or debug options and
1403 # may or may not be retained in future versions.
1405 # Here are the undocumented flags as far as I know. Any of them
1406 # may disappear at any time. They are mainly for fine-tuning
1409 # fll --> fuzzy-line-length # a trivial parameter which gets
1410 # turned off for the extrude option
1411 # which is mainly for debugging
1412 # scl --> short-concatenation-item-length # helps break at '.'
1413 # recombine # for debugging line breaks
1414 # valign # for debugging vertical alignment
1415 # I --> DIAGNOSTICS # for debugging
1416 ######################################################################
1418 # here is a summary of the Getopt codes:
1419 # <none> does not take an argument
1420 # =s takes a mandatory string
1421 # :s takes an optional string (DO NOT USE - filenames will get eaten up)
1422 # =i takes a mandatory integer
1423 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1424 # ! does not take an argument and may be negated
1425 # i.e., -foo and -nofoo are allowed
1426 # a double dash signals the end of the options list
1428 #---------------------------------------------------------------
1429 # Define the option string passed to GetOptions.
1430 #---------------------------------------------------------------
1432 my @option_string = ();
1434 my %option_category = ();
1435 my %option_range = ();
1436 my $rexpansion = \%expansion;
1438 # names of categories in manual
1439 # leading integers will allow sorting
1440 my @category_name = (
1442 '1. Basic formatting options',
1443 '2. Code indentation control',
1444 '3. Whitespace control',
1445 '4. Comment controls',
1446 '5. Linebreak controls',
1447 '6. Controlling list formatting',
1448 '7. Retaining or ignoring existing line breaks',
1449 '8. Blank line control',
1450 '9. Other controls',
1452 '11. pod2html options',
1453 '12. Controlling HTML properties',
1457 # These options are parsed directly by perltidy:
1460 # However, they are included in the option set so that they will
1461 # be seen in the options dump.
1463 # These long option names have no abbreviations or are treated specially
1464 @option_string = qw(
1474 my $category = 13; # Debugging
1475 foreach (@option_string) {
1476 my $opt = $_; # must avoid changing the actual flag
1478 $option_category{$opt} = $category_name[$category];
1481 $category = 11; # HTML
1482 $option_category{html} = $category_name[$category];
1484 # routine to install and check options
1485 my $add_option = sub {
1486 my ( $long_name, $short_name, $flag ) = @_;
1487 push @option_string, $long_name . $flag;
1488 $option_category{$long_name} = $category_name[$category];
1490 if ( $expansion{$short_name} ) {
1491 my $existing_name = $expansion{$short_name}[0];
1493 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1495 $expansion{$short_name} = [$long_name];
1496 if ( $flag eq '!' ) {
1497 my $nshort_name = 'n' . $short_name;
1498 my $nolong_name = 'no' . $long_name;
1499 if ( $expansion{$nshort_name} ) {
1500 my $existing_name = $expansion{$nshort_name}[0];
1502 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1504 $expansion{$nshort_name} = [$nolong_name];
1509 # Install long option names which have a simple abbreviation.
1510 # Options with code '!' get standard negation ('no' for long names,
1511 # 'n' for abbreviations). Categories follow the manual.
1513 ###########################
1514 $category = 0; # I/O_Control
1515 ###########################
1516 $add_option->( 'backup-and-modify-in-place', 'b', '!' );
1517 $add_option->( 'backup-file-extension', 'bext', '=s' );
1518 $add_option->( 'force-read-binary', 'f', '!' );
1519 $add_option->( 'format', 'fmt', '=s' );
1520 $add_option->( 'iterations', 'it', '=i' );
1521 $add_option->( 'logfile', 'log', '!' );
1522 $add_option->( 'logfile-gap', 'g', ':i' );
1523 $add_option->( 'outfile', 'o', '=s' );
1524 $add_option->( 'output-file-extension', 'oext', '=s' );
1525 $add_option->( 'output-path', 'opath', '=s' );
1526 $add_option->( 'profile', 'pro', '=s' );
1527 $add_option->( 'quiet', 'q', '!' );
1528 $add_option->( 'standard-error-output', 'se', '!' );
1529 $add_option->( 'standard-output', 'st', '!' );
1530 $add_option->( 'warning-output', 'w', '!' );
1532 # options which are both toggle switches and values moved here
1533 # to hide from tidyview (which does not show category 0 flags):
1534 # -ole moved here from category 1
1535 # -sil moved here from category 2
1536 $add_option->( 'output-line-ending', 'ole', '=s' );
1537 $add_option->( 'starting-indentation-level', 'sil', '=i' );
1539 ########################################
1540 $category = 1; # Basic formatting options
1541 ########################################
1542 $add_option->( 'check-syntax', 'syn', '!' );
1543 $add_option->( 'entab-leading-whitespace', 'et', '=i' );
1544 $add_option->( 'indent-columns', 'i', '=i' );
1545 $add_option->( 'maximum-line-length', 'l', '=i' );
1546 $add_option->( 'variable-maximum-line-length', 'vmll', '!' );
1547 $add_option->( 'whitespace-cycle', 'wc', '=i' );
1548 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
1549 $add_option->( 'preserve-line-endings', 'ple', '!' );
1550 $add_option->( 'tabs', 't', '!' );
1551 $add_option->( 'default-tabsize', 'dt', '=i' );
1553 ########################################
1554 $category = 2; # Code indentation control
1555 ########################################
1556 $add_option->( 'continuation-indentation', 'ci', '=i' );
1557 $add_option->( 'line-up-parentheses', 'lp', '!' );
1558 $add_option->( 'outdent-keyword-list', 'okwl', '=s' );
1559 $add_option->( 'outdent-keywords', 'okw', '!' );
1560 $add_option->( 'outdent-labels', 'ola', '!' );
1561 $add_option->( 'outdent-long-quotes', 'olq', '!' );
1562 $add_option->( 'indent-closing-brace', 'icb', '!' );
1563 $add_option->( 'closing-token-indentation', 'cti', '=i' );
1564 $add_option->( 'closing-paren-indentation', 'cpi', '=i' );
1565 $add_option->( 'closing-brace-indentation', 'cbi', '=i' );
1566 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1567 $add_option->( 'brace-left-and-indent', 'bli', '!' );
1568 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
1570 ########################################
1571 $category = 3; # Whitespace control
1572 ########################################
1573 $add_option->( 'add-semicolons', 'asc', '!' );
1574 $add_option->( 'add-whitespace', 'aws', '!' );
1575 $add_option->( 'block-brace-tightness', 'bbt', '=i' );
1576 $add_option->( 'brace-tightness', 'bt', '=i' );
1577 $add_option->( 'delete-old-whitespace', 'dws', '!' );
1578 $add_option->( 'delete-semicolons', 'dsm', '!' );
1579 $add_option->( 'nospace-after-keyword', 'nsak', '=s' );
1580 $add_option->( 'nowant-left-space', 'nwls', '=s' );
1581 $add_option->( 'nowant-right-space', 'nwrs', '=s' );
1582 $add_option->( 'paren-tightness', 'pt', '=i' );
1583 $add_option->( 'space-after-keyword', 'sak', '=s' );
1584 $add_option->( 'space-for-semicolon', 'sfs', '!' );
1585 $add_option->( 'space-function-paren', 'sfp', '!' );
1586 $add_option->( 'space-keyword-paren', 'skp', '!' );
1587 $add_option->( 'space-terminal-semicolon', 'sts', '!' );
1588 $add_option->( 'square-bracket-tightness', 'sbt', '=i' );
1589 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
1590 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1591 $add_option->( 'tight-secret-operators', 'tso', '!' );
1592 $add_option->( 'trim-qw', 'tqw', '!' );
1593 $add_option->( 'trim-pod', 'trp', '!' );
1594 $add_option->( 'want-left-space', 'wls', '=s' );
1595 $add_option->( 'want-right-space', 'wrs', '=s' );
1597 ########################################
1598 $category = 4; # Comment controls
1599 ########################################
1600 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
1601 $add_option->( 'closing-side-comment-interval', 'csci', '=i' );
1602 $add_option->( 'closing-side-comment-list', 'cscl', '=s' );
1603 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1604 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
1605 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
1606 $add_option->( 'closing-side-comments', 'csc', '!' );
1607 $add_option->( 'closing-side-comments-balanced', 'cscb', '!' );
1608 $add_option->( 'format-skipping', 'fs', '!' );
1609 $add_option->( 'format-skipping-begin', 'fsb', '=s' );
1610 $add_option->( 'format-skipping-end', 'fse', '=s' );
1611 $add_option->( 'hanging-side-comments', 'hsc', '!' );
1612 $add_option->( 'indent-block-comments', 'ibc', '!' );
1613 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
1614 $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
1615 $add_option->( 'minimum-space-to-comment', 'msc', '=i' );
1616 $add_option->( 'outdent-long-comments', 'olc', '!' );
1617 $add_option->( 'outdent-static-block-comments', 'osbc', '!' );
1618 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
1619 $add_option->( 'static-block-comments', 'sbc', '!' );
1620 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
1621 $add_option->( 'static-side-comments', 'ssc', '!' );
1622 $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' );
1624 ########################################
1625 $category = 5; # Linebreak controls
1626 ########################################
1627 $add_option->( 'add-newlines', 'anl', '!' );
1628 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
1629 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1630 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
1631 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
1632 $add_option->( 'cuddled-else', 'ce', '!' );
1633 $add_option->( 'delete-old-newlines', 'dnl', '!' );
1634 $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
1635 $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
1636 $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
1637 $add_option->( 'opening-paren-right', 'opr', '!' );
1638 $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
1639 $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' );
1640 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
1641 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
1642 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
1643 $add_option->( 'stack-closing-block-brace', 'scbb', '!' );
1644 $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
1645 $add_option->( 'stack-closing-paren', 'scp', '!' );
1646 $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
1647 $add_option->( 'stack-opening-block-brace', 'sobb', '!' );
1648 $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
1649 $add_option->( 'stack-opening-paren', 'sop', '!' );
1650 $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
1651 $add_option->( 'vertical-tightness', 'vt', '=i' );
1652 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
1653 $add_option->( 'want-break-after', 'wba', '=s' );
1654 $add_option->( 'want-break-before', 'wbb', '=s' );
1655 $add_option->( 'break-after-all-operators', 'baao', '!' );
1656 $add_option->( 'break-before-all-operators', 'bbao', '!' );
1657 $add_option->( 'keep-interior-semicolons', 'kis', '!' );
1659 ########################################
1660 $category = 6; # Controlling list formatting
1661 ########################################
1662 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1663 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
1664 $add_option->( 'maximum-fields-per-table', 'mft', '=i' );
1666 ########################################
1667 $category = 7; # Retaining or ignoring existing line breaks
1668 ########################################
1669 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1670 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1671 $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1672 $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
1673 $add_option->( 'ignore-old-breakpoints', 'iob', '!' );
1675 ########################################
1676 $category = 8; # Blank line control
1677 ########################################
1678 $add_option->( 'blanks-before-blocks', 'bbb', '!' );
1679 $add_option->( 'blanks-before-comments', 'bbc', '!' );
1680 $add_option->( 'blank-lines-before-subs', 'blbs', '=i' );
1681 $add_option->( 'blank-lines-before-packages', 'blbp', '=i' );
1682 $add_option->( 'long-block-line-count', 'lbl', '=i' );
1683 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1684 $add_option->( 'keep-old-blank-lines', 'kbl', '=i' );
1686 ########################################
1687 $category = 9; # Other controls
1688 ########################################
1689 $add_option->( 'delete-block-comments', 'dbc', '!' );
1690 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1691 $add_option->( 'delete-pod', 'dp', '!' );
1692 $add_option->( 'delete-side-comments', 'dsc', '!' );
1693 $add_option->( 'tee-block-comments', 'tbc', '!' );
1694 $add_option->( 'tee-pod', 'tp', '!' );
1695 $add_option->( 'tee-side-comments', 'tsc', '!' );
1696 $add_option->( 'look-for-autoloader', 'lal', '!' );
1697 $add_option->( 'look-for-hash-bang', 'x', '!' );
1698 $add_option->( 'look-for-selfloader', 'lsl', '!' );
1699 $add_option->( 'pass-version-line', 'pvl', '!' );
1701 ########################################
1702 $category = 13; # Debugging
1703 ########################################
1704 $add_option->( 'DEBUG', 'D', '!' );
1705 $add_option->( 'DIAGNOSTICS', 'I', '!' );
1706 $add_option->( 'dump-defaults', 'ddf', '!' );
1707 $add_option->( 'dump-long-names', 'dln', '!' );
1708 $add_option->( 'dump-options', 'dop', '!' );
1709 $add_option->( 'dump-profile', 'dpro', '!' );
1710 $add_option->( 'dump-short-names', 'dsn', '!' );
1711 $add_option->( 'dump-token-types', 'dtt', '!' );
1712 $add_option->( 'dump-want-left-space', 'dwls', '!' );
1713 $add_option->( 'dump-want-right-space', 'dwrs', '!' );
1714 $add_option->( 'fuzzy-line-length', 'fll', '!' );
1715 $add_option->( 'help', 'h', '' );
1716 $add_option->( 'short-concatenation-item-length', 'scl', '=i' );
1717 $add_option->( 'show-options', 'opt', '!' );
1718 $add_option->( 'version', 'v', '' );
1719 $add_option->( 'memoize', 'mem', '!' );
1721 #---------------------------------------------------------------------
1723 # The Perl::Tidy::HtmlWriter will add its own options to the string
1724 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1726 ########################################
1727 # Set categories 10, 11, 12
1728 ########################################
1729 # Based on their known order
1730 $category = 12; # HTML properties
1731 foreach my $opt (@option_string) {
1732 my $long_name = $opt;
1733 $long_name =~ s/(!|=.*|:.*)$//;
1734 unless ( defined( $option_category{$long_name} ) ) {
1735 if ( $long_name =~ /^html-linked/ ) {
1736 $category = 10; # HTML options
1738 elsif ( $long_name =~ /^pod2html/ ) {
1739 $category = 11; # Pod2html
1741 $option_category{$long_name} = $category_name[$category];
1745 #---------------------------------------------------------------
1746 # Assign valid ranges to certain options
1747 #---------------------------------------------------------------
1748 # In the future, these may be used to make preliminary checks
1749 # hash keys are long names
1750 # If key or value is undefined:
1751 # strings may have any value
1752 # integer ranges are >=0
1753 # If value is defined:
1754 # value is [qw(any valid words)] for strings
1755 # value is [min, max] for integers
1756 # if min is undefined, there is no lower limit
1757 # if max is undefined, there is no upper limit
1758 # Parameters not listed here have defaults
1760 'format' => [ 'tidy', 'html', 'user' ],
1761 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
1763 'block-brace-tightness' => [ 0, 2 ],
1764 'brace-tightness' => [ 0, 2 ],
1765 'paren-tightness' => [ 0, 2 ],
1766 'square-bracket-tightness' => [ 0, 2 ],
1768 'block-brace-vertical-tightness' => [ 0, 2 ],
1769 'brace-vertical-tightness' => [ 0, 2 ],
1770 'brace-vertical-tightness-closing' => [ 0, 2 ],
1771 'paren-vertical-tightness' => [ 0, 2 ],
1772 'paren-vertical-tightness-closing' => [ 0, 2 ],
1773 'square-bracket-vertical-tightness' => [ 0, 2 ],
1774 'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1775 'vertical-tightness' => [ 0, 2 ],
1776 'vertical-tightness-closing' => [ 0, 2 ],
1778 'closing-brace-indentation' => [ 0, 3 ],
1779 'closing-paren-indentation' => [ 0, 3 ],
1780 'closing-square-bracket-indentation' => [ 0, 3 ],
1781 'closing-token-indentation' => [ 0, 3 ],
1783 'closing-side-comment-else-flag' => [ 0, 2 ],
1784 'comma-arrow-breakpoints' => [ 0, 5 ],
1787 # Note: we could actually allow negative ci if someone really wants it:
1788 # $option_range{'continuation-indentation'} = [ undef, undef ];
1790 #---------------------------------------------------------------
1791 # Assign default values to the above options here, except
1792 # for 'outfile' and 'help'.
1793 # These settings should approximate the perlstyle(1) suggestions.
1794 #---------------------------------------------------------------
1799 blanks-before-blocks
1800 blanks-before-comments
1801 blank-lines-before-subs=1
1802 blank-lines-before-packages=1
1803 block-brace-tightness=0
1804 block-brace-vertical-tightness=0
1806 brace-vertical-tightness-closing=0
1807 brace-vertical-tightness=0
1808 break-at-old-logical-breakpoints
1809 break-at-old-ternary-breakpoints
1810 break-at-old-attribute-breakpoints
1811 break-at-old-keyword-breakpoints
1812 comma-arrow-breakpoints=5
1814 closing-side-comment-interval=6
1815 closing-side-comment-maximum-text=20
1816 closing-side-comment-else-flag=0
1817 closing-side-comments-balanced
1818 closing-paren-indentation=0
1819 closing-brace-indentation=0
1820 closing-square-bracket-indentation=0
1821 continuation-indentation=2
1825 hanging-side-comments
1826 indent-block-comments
1829 keep-old-blank-lines=1
1830 long-block-line-count=8
1833 maximum-consecutive-blank-lines=1
1834 maximum-fields-per-table=0
1835 maximum-line-length=80
1837 minimum-space-to-comment=4
1838 nobrace-left-and-indent
1840 nodelete-old-whitespace
1845 nostatic-side-comments
1850 outdent-long-comments
1852 paren-vertical-tightness-closing=0
1853 paren-vertical-tightness=0
1857 short-concatenation-item-length=8
1859 square-bracket-tightness=1
1860 square-bracket-vertical-tightness-closing=0
1861 square-bracket-vertical-tightness=0
1862 static-block-comments
1865 backup-file-extension=bak
1870 html-table-of-contents
1874 push @defaults, "perl-syntax-check-flags=-c -T";
1876 #---------------------------------------------------------------
1877 # Define abbreviations which will be expanded into the above primitives.
1878 # These may be defined recursively.
1879 #---------------------------------------------------------------
1882 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
1883 'fnl' => [qw(freeze-newlines)],
1884 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1885 'fws' => [qw(freeze-whitespace)],
1886 'freeze-blank-lines' =>
1887 [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1888 'fbl' => [qw(freeze-blank-lines)],
1889 'indent-only' => [qw(freeze-newlines freeze-whitespace)],
1890 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1891 'nooutdent-long-lines' =>
1892 [qw(nooutdent-long-quotes nooutdent-long-comments)],
1893 'noll' => [qw(nooutdent-long-lines)],
1894 'io' => [qw(indent-only)],
1895 'delete-all-comments' =>
1896 [qw(delete-block-comments delete-side-comments delete-pod)],
1897 'nodelete-all-comments' =>
1898 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1899 'dac' => [qw(delete-all-comments)],
1900 'ndac' => [qw(nodelete-all-comments)],
1901 'gnu' => [qw(gnu-style)],
1902 'pbp' => [qw(perl-best-practices)],
1903 'tee-all-comments' =>
1904 [qw(tee-block-comments tee-side-comments tee-pod)],
1905 'notee-all-comments' =>
1906 [qw(notee-block-comments notee-side-comments notee-pod)],
1907 'tac' => [qw(tee-all-comments)],
1908 'ntac' => [qw(notee-all-comments)],
1909 'html' => [qw(format=html)],
1910 'nhtml' => [qw(format=tidy)],
1911 'tidy' => [qw(format=tidy)],
1913 'swallow-optional-blank-lines' => [qw(kbl=0)],
1914 'noswallow-optional-blank-lines' => [qw(kbl=1)],
1915 'sob' => [qw(kbl=0)],
1916 'nsob' => [qw(kbl=1)],
1918 'break-after-comma-arrows' => [qw(cab=0)],
1919 'nobreak-after-comma-arrows' => [qw(cab=1)],
1920 'baa' => [qw(cab=0)],
1921 'nbaa' => [qw(cab=1)],
1923 'blanks-before-subs' => [qw(blbs=1 blbp=1)],
1924 'bbs' => [qw(blbs=1 blbp=1)],
1925 'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
1926 'nbbs' => [qw(blbs=0 blbp=0)],
1928 'break-at-old-trinary-breakpoints' => [qw(bot)],
1930 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1931 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1932 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1933 'icp' => [qw(cpi=2 cbi=2 csbi=2)],
1934 'nicp' => [qw(cpi=0 cbi=0 csbi=0)],
1936 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1937 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1938 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1939 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],
1940 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],
1942 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1943 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1944 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1946 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1947 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1948 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1950 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1951 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1952 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1954 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1955 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1956 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1958 'otr' => [qw(opr ohbr osbr)],
1959 'opening-token-right' => [qw(opr ohbr osbr)],
1960 'notr' => [qw(nopr nohbr nosbr)],
1961 'noopening-token-right' => [qw(nopr nohbr nosbr)],
1963 'sot' => [qw(sop sohb sosb)],
1964 'nsot' => [qw(nsop nsohb nsosb)],
1965 'stack-opening-tokens' => [qw(sop sohb sosb)],
1966 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1968 'sct' => [qw(scp schb scsb)],
1969 'stack-closing-tokens' => => [qw(scp schb scsb)],
1970 'nsct' => [qw(nscp nschb nscsb)],
1971 'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1973 'sac' => [qw(sot sct)],
1974 'nsac' => [qw(nsot nsct)],
1975 'stack-all-containers' => [qw(sot sct)],
1976 'nostack-all-containers' => [qw(nsot nsct)],
1978 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
1979 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
1980 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
1981 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
1982 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
1983 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
1985 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)],
1986 'sobb' => [qw(bbvt=2 bbvtl=*)],
1987 'nostack-opening-block-brace' => [qw(bbvt=0)],
1988 'nsobb' => [qw(bbvt=0)],
1990 'converge' => [qw(it=4)],
1991 'noconverge' => [qw(it=1)],
1992 'conv' => [qw(it=4)],
1993 'nconv' => [qw(it=1)],
1995 # 'mangle' originally deleted pod and comments, but to keep it
1996 # reversible, it no longer does. But if you really want to
1997 # delete them, just use:
2000 # An interesting use for 'mangle' is to do this:
2001 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
2002 # which will form as many one-line blocks as possible
2007 keep-old-blank-lines=0
2009 delete-old-whitespace
2012 maximum-consecutive-blank-lines=0
2013 maximum-line-length=100000
2017 noblanks-before-blocks
2018 blank-lines-before-subs=0
2019 blank-lines-before-packages=0
2024 # 'extrude' originally deleted pod and comments, but to keep it
2025 # reversible, it no longer does. But if you really want to
2026 # delete them, just use
2029 # An interesting use for 'extrude' is to do this:
2030 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
2031 # which will break up all one-line blocks.
2038 delete-old-whitespace
2041 maximum-consecutive-blank-lines=0
2042 maximum-line-length=1
2045 noblanks-before-blocks
2046 blank-lines-before-subs=0
2047 blank-lines-before-packages=0
2054 # this style tries to follow the GNU Coding Standards (which do
2055 # not really apply to perl but which are followed by some perl
2059 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
2063 # Style suggested in Damian Conway's Perl Best Practices
2064 'perl-best-practices' => [
2065 qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
2066 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
2069 # Additional styles can be added here
2072 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
2074 # Uncomment next line to dump all expansions for debugging:
2075 # dump_short_names(\%expansion);
2077 \@option_string, \@defaults, \%expansion,
2078 \%option_category, \%option_range
2081 } # end of generate_options
2083 # Memoize process_command_line. Given same @ARGV passed in, return same
2084 # values and same @ARGV back.
2085 # This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds
2086 # up masontidy (https://metacpan.org/module/masontidy)
2088 my %process_command_line_cache;
2090 sub process_command_line {
2093 $perltidyrc_stream, $is_Windows, $Windows_type,
2094 $rpending_complaint, $dump_options_type
2097 my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type;
2099 my $cache_key = join( chr(28), @ARGV );
2100 if ( my $result = $process_command_line_cache{$cache_key} ) {
2101 my ( $argv, @retvals ) = @$result;
2106 my @retvals = _process_command_line(@_);
2107 $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ]
2108 if $retvals[0]->{'memoize'};
2113 return _process_command_line(@_);
2117 # (note the underscore here)
2118 sub _process_command_line {
2121 $perltidyrc_stream, $is_Windows, $Windows_type,
2122 $rpending_complaint, $dump_options_type
2128 $roption_string, $rdefaults, $rexpansion,
2129 $roption_category, $roption_range
2130 ) = generate_options();
2132 #---------------------------------------------------------------
2133 # set the defaults by passing the above list through GetOptions
2134 #---------------------------------------------------------------
2140 # do not load the defaults if we are just dumping perltidyrc
2141 unless ( $dump_options_type eq 'perltidyrc' ) {
2142 for $i (@$rdefaults) { push @ARGV, "--" . $i }
2145 # Patch to save users Getopt::Long configuration
2146 # and set to Getopt::Long defaults. Use eval to avoid
2147 # breaking old versions of Perl without these routines.
2149 eval { $glc = Getopt::Long::Configure() };
2151 eval { Getopt::Long::ConfigDefaults() };
2153 else { $glc = undef }
2155 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2156 Die "Programming Bug: error in setting default options";
2159 # Patch to put the previous Getopt::Long configuration back
2160 eval { Getopt::Long::Configure($glc) } if defined $glc;
2164 my @raw_options = ();
2165 my $config_file = "";
2166 my $saw_ignore_profile = 0;
2167 my $saw_extrude = 0;
2169 my $saw_dump_profile = 0;
2172 #---------------------------------------------------------------
2173 # Take a first look at the command-line parameters. Do as many
2174 # immediate dumps as possible, which can avoid confusion if the
2175 # perltidyrc file has an error.
2176 #---------------------------------------------------------------
2177 foreach $i (@ARGV) {
2180 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
2181 $saw_ignore_profile = 1;
2184 # note: this must come before -pro and -profile, below:
2185 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
2186 $saw_dump_profile = 1;
2188 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
2191 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
2195 # resolve <dir>/.../<file>, meaning look upwards from directory
2196 if ( defined($config_file) ) {
2197 if ( my ( $start_dir, $search_file ) =
2198 ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
2200 $start_dir = '.' if !$start_dir;
2201 $start_dir = Cwd::realpath($start_dir);
2202 if ( my $found_file =
2203 find_file_upwards( $start_dir, $search_file ) )
2205 $config_file = $found_file;
2209 unless ( -e $config_file ) {
2210 Warn "cannot find file given with -pro=$config_file: $!\n";
2214 elsif ( $i =~ /^-(pro|profile)=?$/ ) {
2215 Die "usage: -pro=filename or --profile=filename, no spaces\n";
2217 elsif ( $i =~ /^-extrude$/ ) {
2220 elsif ( $i =~ /^-(pbp|perl-best-practices)$/ ) {
2223 elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
2227 elsif ( $i =~ /^-(version|v)$/ ) {
2231 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
2232 dump_defaults(@$rdefaults);
2235 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
2236 dump_long_names(@$roption_string);
2239 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
2240 dump_short_names($rexpansion);
2243 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
2244 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
2249 if ( $saw_dump_profile && $saw_ignore_profile ) {
2250 Warn "No profile to dump because of -npro\n";
2254 #---------------------------------------------------------------
2255 # read any .perltidyrc configuration file
2256 #---------------------------------------------------------------
2257 unless ($saw_ignore_profile) {
2259 # resolve possible conflict between $perltidyrc_stream passed
2260 # as call parameter to perltidy and -pro=filename on command
2262 if ($perltidyrc_stream) {
2265 Conflict: a perltidyrc configuration file was specified both as this
2266 perltidy call parameter: $perltidyrc_stream
2267 and with this -profile=$config_file.
2268 Using -profile=$config_file.
2272 $config_file = $perltidyrc_stream;
2276 # look for a config file if we don't have one yet
2277 my $rconfig_file_chatter;
2278 $$rconfig_file_chatter = "";
2280 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2281 $rpending_complaint )
2282 unless $config_file;
2284 # open any config file
2287 ( $fh_config, $config_file ) =
2288 Perl::Tidy::streamhandle( $config_file, 'r' );
2289 unless ($fh_config) {
2290 $$rconfig_file_chatter .=
2291 "# $config_file exists but cannot be opened\n";
2295 if ($saw_dump_profile) {
2296 dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
2302 my ( $rconfig_list, $death_message, $_saw_pbp ) =
2303 read_config_file( $fh_config, $config_file, $rexpansion );
2304 Die $death_message if ($death_message);
2305 $saw_pbp ||= $_saw_pbp;
2307 # process any .perltidyrc parameters right now so we can
2309 if (@$rconfig_list) {
2310 local @ARGV = @$rconfig_list;
2312 expand_command_abbreviations( $rexpansion, \@raw_options,
2315 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2317 "Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
2320 # Anything left in this local @ARGV is an error and must be
2321 # invalid bare words from the configuration file. We cannot
2322 # check this earlier because bare words may have been valid
2323 # values for parameters. We had to wait for GetOptions to have
2327 my $str = "\'" . pop(@ARGV) . "\'";
2328 while ( my $param = pop(@ARGV) ) {
2329 if ( length($str) < 70 ) {
2330 $str .= ", '$param'";
2338 There are $count unrecognized values in the configuration file '$config_file':
2340 Use leading dashes for parameters. Use -npro to ignore this file.
2344 # Undo any options which cause premature exit. They are not
2345 # appropriate for a config file, and it could be hard to
2346 # diagnose the cause of the premature exit.
2355 dump-want-left-space
2356 dump-want-right-space
2364 if ( defined( $Opts{$_} ) ) {
2366 Warn "ignoring --$_ in config file: $config_file\n";
2373 #---------------------------------------------------------------
2374 # now process the command line parameters
2375 #---------------------------------------------------------------
2376 expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2378 local $SIG{'__WARN__'} = sub { Warn $_[0] };
2379 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2380 Die "Error on command line; for help try 'perltidy -h'\n";
2384 \%Opts, $config_file, \@raw_options,
2385 $saw_extrude, $saw_pbp, $roption_string,
2386 $rexpansion, $roption_category, $roption_range
2388 } # end of process_command_line
2392 my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2394 #---------------------------------------------------------------
2395 # check and handle any interactions among the basic options..
2396 #---------------------------------------------------------------
2398 # Since -vt, -vtc, and -cti are abbreviations, but under
2399 # msdos, an unquoted input parameter like vtc=1 will be
2400 # seen as 2 parameters, vtc and 1, so the abbreviations
2401 # won't be seen. Therefore, we will catch them here if
2404 if ( defined $rOpts->{'vertical-tightness'} ) {
2405 my $vt = $rOpts->{'vertical-tightness'};
2406 $rOpts->{'paren-vertical-tightness'} = $vt;
2407 $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2408 $rOpts->{'brace-vertical-tightness'} = $vt;
2411 if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2412 my $vtc = $rOpts->{'vertical-tightness-closing'};
2413 $rOpts->{'paren-vertical-tightness-closing'} = $vtc;
2414 $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2415 $rOpts->{'brace-vertical-tightness-closing'} = $vtc;
2418 if ( defined $rOpts->{'closing-token-indentation'} ) {
2419 my $cti = $rOpts->{'closing-token-indentation'};
2420 $rOpts->{'closing-square-bracket-indentation'} = $cti;
2421 $rOpts->{'closing-brace-indentation'} = $cti;
2422 $rOpts->{'closing-paren-indentation'} = $cti;
2425 # In quiet mode, there is no log file and hence no way to report
2426 # results of syntax check, so don't do it.
2427 if ( $rOpts->{'quiet'} ) {
2428 $rOpts->{'check-syntax'} = 0;
2431 # can't check syntax if no output
2432 if ( $rOpts->{'format'} ne 'tidy' ) {
2433 $rOpts->{'check-syntax'} = 0;
2436 # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2437 # wide variety of nasty problems on these systems, because they cannot
2438 # reliably run backticks. Don't even think about changing this!
2439 if ( $rOpts->{'check-syntax'}
2441 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2443 $rOpts->{'check-syntax'} = 0;
2446 # It's really a bad idea to check syntax as root unless you wrote
2447 # the script yourself. FIXME: not sure if this works with VMS
2448 unless ($is_Windows) {
2450 if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2451 $rOpts->{'check-syntax'} = 0;
2452 $$rpending_complaint .=
2453 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2457 # check iteration count and quietly fix if necessary:
2458 # - iterations option only applies to code beautification mode
2459 # - the convergence check should stop most runs on iteration 2, and
2460 # virtually all on iteration 3. But we'll allow up to 6.
2461 if ( $rOpts->{'format'} ne 'tidy' ) {
2462 $rOpts->{'iterations'} = 1;
2464 elsif ( defined( $rOpts->{'iterations'} ) ) {
2465 if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2466 elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 }
2469 $rOpts->{'iterations'} = 1;
2472 # check for reasonable number of blank lines and fix to avoid problems
2473 if ( $rOpts->{'blank-lines-before-subs'} ) {
2474 if ( $rOpts->{'blank-lines-before-subs'} < 0 ) {
2475 $rOpts->{'blank-lines-before-subs'} = 0;
2476 Warn "negative value of -blbs, setting 0\n";
2478 if ( $rOpts->{'blank-lines-before-subs'} > 100 ) {
2479 Warn "unreasonably large value of -blbs, reducing\n";
2480 $rOpts->{'blank-lines-before-subs'} = 100;
2483 if ( $rOpts->{'blank-lines-before-packages'} ) {
2484 if ( $rOpts->{'blank-lines-before-packages'} < 0 ) {
2485 Warn "negative value of -blbp, setting 0\n";
2486 $rOpts->{'blank-lines-before-packages'} = 0;
2488 if ( $rOpts->{'blank-lines-before-packages'} > 100 ) {
2489 Warn "unreasonably large value of -blbp, reducing\n";
2490 $rOpts->{'blank-lines-before-packages'} = 100;
2494 # setting a non-negative logfile gap causes logfile to be saved
2495 if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2496 $rOpts->{'logfile'} = 1;
2499 # set short-cut flag when only indentation is to be done.
2500 # Note that the user may or may not have already set the
2502 if ( !$rOpts->{'add-whitespace'}
2503 && !$rOpts->{'delete-old-whitespace'}
2504 && !$rOpts->{'add-newlines'}
2505 && !$rOpts->{'delete-old-newlines'} )
2507 $rOpts->{'indent-only'} = 1;
2510 # -isbc implies -ibc
2511 if ( $rOpts->{'indent-spaced-block-comments'} ) {
2512 $rOpts->{'indent-block-comments'} = 1;
2515 # -bli flag implies -bl
2516 if ( $rOpts->{'brace-left-and-indent'} ) {
2517 $rOpts->{'opening-brace-on-new-line'} = 1;
2520 if ( $rOpts->{'opening-brace-always-on-right'}
2521 && $rOpts->{'opening-brace-on-new-line'} )
2524 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2525 'opening-brace-on-new-line' (-bl). Ignoring -bl.
2527 $rOpts->{'opening-brace-on-new-line'} = 0;
2530 # it simplifies things if -bl is 0 rather than undefined
2531 if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2532 $rOpts->{'opening-brace-on-new-line'} = 0;
2535 # -sbl defaults to -bl if not defined
2536 if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2537 $rOpts->{'opening-sub-brace-on-new-line'} =
2538 $rOpts->{'opening-brace-on-new-line'};
2541 if ( $rOpts->{'entab-leading-whitespace'} ) {
2542 if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2543 Warn "-et=n must use a positive integer; ignoring -et\n";
2544 $rOpts->{'entab-leading-whitespace'} = undef;
2547 # entab leading whitespace has priority over the older 'tabs' option
2548 if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2551 # set a default tabsize to be used in guessing the starting indentation
2552 # level if and only if this run does not use tabs and the old code does
2554 if ( $rOpts->{'default-tabsize'} ) {
2555 if ( $rOpts->{'default-tabsize'} < 0 ) {
2556 Warn "negative value of -dt, setting 0\n";
2557 $rOpts->{'default-tabsize'} = 0;
2559 if ( $rOpts->{'default-tabsize'} > 20 ) {
2560 Warn "unreasonably large value of -dt, reducing\n";
2561 $rOpts->{'default-tabsize'} = 20;
2565 $rOpts->{'default-tabsize'} = 8;
2568 # Define $tabsize, the number of spaces per tab for use in
2569 # guessing the indentation of source lines with leading tabs.
2570 # Assume same as for this run if tabs are used , otherwise assume
2571 # a default value, typically 8
2573 $rOpts->{'entab-leading-whitespace'}
2574 ? $rOpts->{'entab-leading-whitespace'}
2575 : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
2576 : $rOpts->{'default-tabsize'};
2580 sub find_file_upwards {
2581 my ( $search_dir, $search_file ) = @_;
2583 $search_dir =~ s{/+$}{};
2584 $search_file =~ s{^/+}{};
2587 my $try_path = "$search_dir/$search_file";
2588 if ( -f $try_path ) {
2591 elsif ( $search_dir eq '/' ) {
2595 $search_dir = dirname($search_dir);
2600 sub expand_command_abbreviations {
2602 # go through @ARGV and expand any abbreviations
2604 my ( $rexpansion, $rraw_options, $config_file ) = @_;
2607 # set a pass limit to prevent an infinite loop;
2608 # 10 should be plenty, but it may be increased to allow deeply
2609 # nested expansions.
2610 my $max_passes = 10;
2613 # keep looping until all expansions have been converted into actual
2615 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2617 my $abbrev_count = 0;
2619 # loop over each item in @ARGV..
2620 foreach $word (@ARGV) {
2622 # convert any leading 'no-' to just 'no'
2623 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2625 # if it is a dash flag (instead of a file name)..
2626 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2631 # save the raw input for debug output in case of circular refs
2632 if ( $pass_count == 0 ) {
2633 push( @$rraw_options, $word );
2636 # recombine abbreviation and flag, if necessary,
2637 # to allow abbreviations with arguments such as '-vt=1'
2638 if ( $rexpansion->{ $abr . $flags } ) {
2639 $abr = $abr . $flags;
2643 # if we see this dash item in the expansion hash..
2644 if ( $rexpansion->{$abr} ) {
2647 # stuff all of the words that it expands to into the
2648 # new arg list for the next pass
2649 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2650 next unless $abbrev; # for safety; shouldn't happen
2651 push( @new_argv, '--' . $abbrev . $flags );
2655 # not in expansion hash, must be actual long name
2657 push( @new_argv, $word );
2661 # not a dash item, so just save it for the next pass
2663 push( @new_argv, $word );
2665 } # end of this pass
2667 # update parameter list @ARGV to the new one
2669 last unless ( $abbrev_count > 0 );
2671 # make sure we are not in an infinite loop
2672 if ( $pass_count == $max_passes ) {
2675 I'm tired. We seem to be in an infinite loop trying to expand aliases.
2676 Here are the raw options;
2679 my $num = @new_argv;
2682 After $max_passes passes here is ARGV
2688 After $max_passes passes ARGV has $num entries
2694 Please check your configuration file $config_file for circular-references.
2695 To deactivate it, use -npro.
2700 Program bug - circular-references in the %expansion hash, probably due to
2701 a recent program change.
2704 } # end of check for circular references
2705 } # end of loop over all passes
2708 # Debug routine -- this will dump the expansion hash
2709 sub dump_short_names {
2710 my $rexpansion = shift;
2712 List of short names. This list shows how all abbreviations are
2713 translated into other abbreviations and, eventually, into long names.
2714 New abbreviations may be defined in a .perltidyrc file.
2715 For a list of all long names, use perltidy --dump-long-names (-dln).
2716 --------------------------------------------------------------------------
2718 foreach my $abbrev ( sort keys %$rexpansion ) {
2719 my @list = @{ $$rexpansion{$abbrev} };
2720 print STDOUT "$abbrev --> @list\n";
2724 sub check_vms_filename {
2726 # given a valid filename (the perltidy input file)
2727 # create a modified filename and separator character
2730 # Contributed by Michael Cartmell
2732 my ( $base, $path ) = fileparse( $_[0] );
2734 # remove explicit ; version
2735 $base =~ s/;-?\d*$//
2737 # remove explicit . version ie two dots in filename NB ^ escapes a dot
2738 or $base =~ s/( # begin capture $1
2739 (?:^|[^^])\. # match a dot not preceded by a caret
2740 (?: # followed by nothing
2742 .*[^^] # anything ending in a non caret
2745 \.-?\d*$ # match . version number
2748 # normalise filename, if there are no unescaped dots then append one
2749 $base .= '.' unless $base =~ /(?:^|[^^])\./;
2751 # if we don't already have an extension then we just append the extension
2752 my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2753 return ( $path . $base, $separator );
2758 # TODO: are these more standard names?
2759 # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2761 # Returns a string that determines what MS OS we are on.
2762 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2763 # Returns blank string if not an MS system.
2764 # Original code contributed by: Yves Orton
2765 # We need to know this to decide where to look for config files
2767 my $rpending_complaint = shift;
2769 return $os unless $^O =~ /win32|dos/i; # is it a MS box?
2771 # Systems built from Perl source may not have Win32.pm
2772 # But probably have Win32::GetOSVersion() anyway so the
2773 # following line is not 'required':
2774 # return $os unless eval('require Win32');
2776 # Use the standard API call to determine the version
2777 my ( $undef, $major, $minor, $build, $id );
2778 eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2781 # NAME ID MAJOR MINOR
2782 # Windows NT 4 2 4 0
2783 # Windows 2000 2 5 0
2785 # Windows Server 2003 2 5 2
2787 return "win32s" unless $id; # If id==0 then its a win32s box.
2788 $os = { # Magic numbers from MSDN
2789 # documentation of GetOSVersion
2796 0 => "2000", # or NT 4, see below
2803 # If $os is undefined, the above code is out of date. Suggested updates
2805 unless ( defined $os ) {
2807 $$rpending_complaint .= <<EOS;
2808 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2809 We won't be able to look for a system-wide config file.
2813 # Unfortunately the logic used for the various versions isn't so clever..
2814 # so we have to handle an outside case.
2815 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2820 ( $^O !~ /win32|dos/i )
2823 && ( $^O ne 'MacOS' );
2826 sub look_for_Windows {
2828 # determine Windows sub-type and location of
2829 # system-wide configuration files
2830 my $rpending_complaint = shift;
2831 my $is_Windows = ( $^O =~ /win32|dos/i );
2832 my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
2833 return ( $is_Windows, $Windows_type );
2836 sub find_config_file {
2838 # look for a .perltidyrc configuration file
2839 # For Windows also look for a file named perltidy.ini
2840 my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2841 $rpending_complaint ) = @_;
2843 $$rconfig_file_chatter .= "# Config file search...system reported as:";
2845 $$rconfig_file_chatter .= "Windows $Windows_type\n";
2848 $$rconfig_file_chatter .= " $^O\n";
2851 # sub to check file existence and record all tests
2852 my $exists_config_file = sub {
2853 my $config_file = shift;
2854 return 0 unless $config_file;
2855 $$rconfig_file_chatter .= "# Testing: $config_file\n";
2856 return -f $config_file;
2861 # look in current directory first
2862 $config_file = ".perltidyrc";
2863 return $config_file if $exists_config_file->($config_file);
2865 $config_file = "perltidy.ini";
2866 return $config_file if $exists_config_file->($config_file);
2869 # Default environment vars.
2870 my @envs = qw(PERLTIDY HOME);
2872 # Check the NT/2k/XP locations, first a local machine def, then a
2874 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2876 # Now go through the environment ...
2877 foreach my $var (@envs) {
2878 $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2879 if ( defined( $ENV{$var} ) ) {
2880 $$rconfig_file_chatter .= " = $ENV{$var}\n";
2882 # test ENV{ PERLTIDY } as file:
2883 if ( $var eq 'PERLTIDY' ) {
2884 $config_file = "$ENV{$var}";
2885 return $config_file if $exists_config_file->($config_file);
2888 # test ENV as directory:
2889 $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2890 return $config_file if $exists_config_file->($config_file);
2893 $config_file = catfile( $ENV{$var}, "perltidy.ini" );
2894 return $config_file if $exists_config_file->($config_file);
2898 $$rconfig_file_chatter .= "\n";
2902 # then look for a system-wide definition
2903 # where to look varies with OS
2906 if ($Windows_type) {
2907 my ( $os, $system, $allusers ) =
2908 Win_Config_Locs( $rpending_complaint, $Windows_type );
2910 # Check All Users directory, if there is one.
2911 # i.e. C:\Documents and Settings\User\perltidy.ini
2914 $config_file = catfile( $allusers, ".perltidyrc" );
2915 return $config_file if $exists_config_file->($config_file);
2917 $config_file = catfile( $allusers, "perltidy.ini" );
2918 return $config_file if $exists_config_file->($config_file);
2921 # Check system directory.
2922 # retain old code in case someone has been able to create
2923 # a file with a leading period.
2924 $config_file = catfile( $system, ".perltidyrc" );
2925 return $config_file if $exists_config_file->($config_file);
2927 $config_file = catfile( $system, "perltidy.ini" );
2928 return $config_file if $exists_config_file->($config_file);
2932 # Place to add customization code for other systems
2933 elsif ( $^O eq 'OS2' ) {
2935 elsif ( $^O eq 'MacOS' ) {
2937 elsif ( $^O eq 'VMS' ) {
2940 # Assume some kind of Unix
2943 $config_file = "/usr/local/etc/perltidyrc";
2944 return $config_file if $exists_config_file->($config_file);
2946 $config_file = "/etc/perltidyrc";
2947 return $config_file if $exists_config_file->($config_file);
2950 # Couldn't find a config file
2954 sub Win_Config_Locs {
2956 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2957 # or undef if its not a win32 OS. In list context returns OS, System
2958 # Directory, and All Users Directory. All Users will be empty on a
2959 # 9x/Me box. Contributed by: Yves Orton.
2961 my $rpending_complaint = shift;
2962 my $os = (@_) ? shift : Win_OS_Type();
2968 if ( $os =~ /9[58]|Me/ ) {
2969 $system = "C:/Windows";
2971 elsif ( $os =~ /NT|XP|200?/ ) {
2972 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2975 ? "C:/WinNT/profiles/All Users/"
2976 : "C:/Documents and Settings/All Users/";
2980 # This currently would only happen on a win32s computer. I don't have
2981 # one to test, so I am unsure how to proceed. Suggestions welcome!
2982 $$rpending_complaint .=
2983 "I dont know a sensible place to look for config files on an $os system.\n";
2986 return wantarray ? ( $os, $system, $allusers ) : $os;
2989 sub dump_config_file {
2991 my $config_file = shift;
2992 my $rconfig_file_chatter = shift;
2993 print STDOUT "$$rconfig_file_chatter";
2995 print STDOUT "# Dump of file: '$config_file'\n";
2996 while ( my $line = $fh->getline() ) { print STDOUT $line }
2997 eval { $fh->close() };
3000 print STDOUT "# ...no config file found\n";
3004 sub read_config_file {
3006 my ( $fh, $config_file, $rexpansion ) = @_;
3007 my @config_list = ();
3010 # file is bad if non-empty $death_message is returned
3011 my $death_message = "";
3015 while ( my $line = $fh->getline() ) {
3018 ( $line, $death_message ) =
3019 strip_comment( $line, $config_file, $line_no );
3020 last if ($death_message);
3022 $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
3025 # look for something of the general form
3032 if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) {
3033 ( $newname, $body ) = ( $2, $3, );
3037 if ( !$saw_pbp && $body =~ /-(pbp|perl-best-practices)/ ) {
3041 # handle a new alias definition
3045 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
3050 if ( ${$rexpansion}{$name} ) {
3052 my @names = sort keys %$rexpansion;
3054 "Here is a list of all installed aliases\n(@names)\n"
3055 . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
3058 ${$rexpansion}{$name} = [];
3064 my ( $rbody_parts, $msg ) = parse_args($body);
3066 $death_message = <<EOM;
3067 Error reading file '$config_file' at line number $line_no.
3069 Please fix this line or use -npro to avoid reading this file
3076 # remove leading dashes if this is an alias
3077 foreach (@$rbody_parts) { s/^\-+//; }
3078 push @{ ${$rexpansion}{$name} }, @$rbody_parts;
3081 push( @config_list, @$rbody_parts );
3086 eval { $fh->close() };
3087 return ( \@config_list, $death_message, $saw_pbp );
3092 # Strip any comment from a command line
3093 my ( $instr, $config_file, $line_no ) = @_;
3096 # check for full-line comment
3097 if ( $instr =~ /^\s*#/ ) {
3098 return ( "", $msg );
3101 # nothing to do if no comments
3102 if ( $instr !~ /#/ ) {
3103 return ( $instr, $msg );
3106 # handle case of no quotes
3107 elsif ( $instr !~ /['"]/ ) {
3109 # We now require a space before the # of a side comment
3110 # this allows something like:
3112 # Otherwise, it would have to be quoted:
3114 $instr =~ s/\s+\#.*$//;
3115 return ( $instr, $msg );
3118 # handle comments and quotes
3120 my $quote_char = "";
3123 # looking for ending quote character
3125 if ( $instr =~ /\G($quote_char)/gc ) {
3129 elsif ( $instr =~ /\G(.)/gc ) {
3133 # error..we reached the end without seeing the ending quote char
3136 Error reading file $config_file at line number $line_no.
3137 Did not see ending quote character <$quote_char> in this text:
3139 Please fix this line or use -npro to avoid reading this file
3145 # accumulating characters and looking for start of a quoted string
3147 if ( $instr =~ /\G([\"\'])/gc ) {
3152 # Note: not yet enforcing the space-before-hash rule for side
3153 # comments if the parameter is quoted.
3154 elsif ( $instr =~ /\G#/gc ) {
3157 elsif ( $instr =~ /\G(.)/gc ) {
3165 return ( $outstr, $msg );
3170 # Parse a command string containing multiple string with possible
3171 # quotes, into individual commands. It might look like this, for example:
3173 # -wba=" + - " -some-thing -wbb='. && ||'
3175 # There is no need, at present, to handle escaped quote characters.
3176 # (They are not perltidy tokens, so needn't be in strings).
3179 my @body_parts = ();
3180 my $quote_char = "";
3185 # looking for ending quote character
3187 if ( $body =~ /\G($quote_char)/gc ) {
3190 elsif ( $body =~ /\G(.)/gc ) {
3194 # error..we reached the end without seeing the ending quote char
3196 if ( length($part) ) { push @body_parts, $part; }
3198 Did not see ending quote character <$quote_char> in this text:
3205 # accumulating characters and looking for start of a quoted string
3207 if ( $body =~ /\G([\"\'])/gc ) {
3210 elsif ( $body =~ /\G(\s+)/gc ) {
3211 if ( length($part) ) { push @body_parts, $part; }
3214 elsif ( $body =~ /\G(.)/gc ) {
3218 if ( length($part) ) { push @body_parts, $part; }
3223 return ( \@body_parts, $msg );
3226 sub dump_long_names {
3228 my @names = sort @_;
3230 # Command line long names (passed to GetOptions)
3231 #---------------------------------------------------------------
3232 # here is a summary of the Getopt codes:
3233 # <none> does not take an argument
3234 # =s takes a mandatory string
3235 # :s takes an optional string
3236 # =i takes a mandatory integer
3237 # :i takes an optional integer
3238 # ! does not take an argument and may be negated
3239 # i.e., -foo and -nofoo are allowed
3240 # a double dash signals the end of the options list
3242 #---------------------------------------------------------------
3245 foreach (@names) { print STDOUT "$_\n" }
3249 my @defaults = sort @_;
3250 print STDOUT "Default command line options:\n";
3251 foreach (@_) { print STDOUT "$_\n" }
3254 sub readable_options {
3256 # return options for this run as a string which could be
3257 # put in a perltidyrc file
3258 my ( $rOpts, $roption_string ) = @_;
3260 my $rGetopt_flags = \%Getopt_flags;
3261 my $readable_options = "# Final parameter set for this run.\n";
3262 $readable_options .=
3263 "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
3264 foreach my $opt ( @{$roption_string} ) {
3266 if ( $opt =~ /(.*)(!|=.*)$/ ) {
3270 if ( defined( $rOpts->{$opt} ) ) {
3271 $rGetopt_flags->{$opt} = $flag;
3274 foreach my $key ( sort keys %{$rOpts} ) {
3275 my $flag = $rGetopt_flags->{$key};
3276 my $value = $rOpts->{$key};
3280 if ( $flag =~ /^=/ ) {
3281 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
3282 $suffix = "=" . $value;
3284 elsif ( $flag =~ /^!/ ) {
3285 $prefix .= "no" unless ($value);
3290 $readable_options .=
3291 "# ERROR in dump_options: unrecognized flag $flag for $key\n";
3294 $readable_options .= $prefix . $key . $suffix . "\n";
3296 return $readable_options;
3300 print STDOUT <<"EOM";
3301 This is perltidy, v$VERSION
3303 Copyright 2000-2014, Steve Hancock
3305 Perltidy is free software and may be copied under the terms of the GNU
3306 General Public License, which is included in the distribution files.
3308 Complete documentation for perltidy can be found using 'man perltidy'
3309 or on the internet at http://perltidy.sourceforge.net.
3316 This is perltidy version $VERSION, a perl script indenter. Usage:
3318 perltidy [ options ] file1 file2 file3 ...
3319 (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
3320 perltidy [ options ] file1 -o outfile
3321 perltidy [ options ] file1 -st >outfile
3322 perltidy [ options ] <infile >outfile
3324 Options have short and long forms. Short forms are shown; see
3325 man pages for long forms. Note: '=s' indicates a required string,
3326 and '=n' indicates a required integer.
3330 -o=file name of the output file (only if single input file)
3331 -oext=s change output extension from 'tdy' to s
3332 -opath=path change path to be 'path' for output files
3333 -b backup original to .bak and modify file in-place
3334 -bext=s change default backup extension from 'bak' to s
3335 -q deactivate error messages (for running under editor)
3336 -w include non-critical warning messages in the .ERR error output
3337 -syn run perl -c to check syntax (default under unix systems)
3338 -log save .LOG file, which has useful diagnostics
3339 -f force perltidy to read a binary file
3340 -g like -log but writes more detailed .LOG file, for debugging scripts
3341 -opt write the set of options actually used to a .LOG file
3342 -npro ignore .perltidyrc configuration command file
3343 -pro=file read configuration commands from file instead of .perltidyrc
3344 -st send output to standard output, STDOUT
3345 -se send all error output to standard error output, STDERR
3346 -v display version number to standard output and quit
3349 -i=n use n columns per indentation level (default n=4)
3350 -t tabs: use one tab character per indentation level, not recommeded
3351 -nt no tabs: use n spaces per indentation level (default)
3352 -et=n entab leading whitespace n spaces per tab; not recommended
3353 -io "indent only": just do indentation, no other formatting.
3354 -sil=n set starting indentation level to n; use if auto detection fails
3355 -ole=s specify output line ending (s=dos or win, mac, unix)
3356 -ple keep output line endings same as input (input must be filename)
3359 -fws freeze whitespace; this disables all whitespace changes
3360 and disables the following switches:
3361 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)
3362 -bbt same as -bt but for code block braces; same as -bt if not given
3363 -bbvt block braces vertically tight; use with -bl or -bli
3364 -bbvtl=s make -bbvt to apply to selected list of block types
3365 -pt=n paren tightness (n=0, 1 or 2)
3366 -sbt=n square bracket tightness (n=0, 1, or 2)
3367 -bvt=n brace vertical tightness,
3368 n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3369 -pvt=n paren vertical tightness (see -bvt for n)
3370 -sbvt=n square bracket vertical tightness (see -bvt for n)
3371 -bvtc=n closing brace vertical tightness:
3372 n=(0=open, 1=sometimes close, 2=always close)
3373 -pvtc=n closing paren vertical tightness, see -bvtc for n.
3374 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3375 -ci=n sets continuation indentation=n, default is n=2 spaces
3376 -lp line up parentheses, brackets, and non-BLOCK braces
3377 -sfs add space before semicolon in for( ; ; )
3378 -aws allow perltidy to add whitespace (default)
3379 -dws delete all old non-essential whitespace
3380 -icb indent closing brace of a code block
3381 -cti=n closing indentation of paren, square bracket, or non-block brace:
3382 n=0 none, =1 align with opening, =2 one full indentation level
3383 -icp equivalent to -cti=2
3384 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /'
3385 -wrs=s want space right of tokens in string;
3386 -sts put space before terminal semicolon of a statement
3387 -sak=s put space between keywords given in s and '(';
3388 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3391 -fnl freeze newlines; this disables all line break changes
3392 and disables the following switches:
3393 -anl add newlines; ok to introduce new line breaks
3394 -bbs add blank line before subs and packages
3395 -bbc add blank line before block comments
3396 -bbb add blank line between major blocks
3397 -kbl=n keep old blank lines? 0=no, 1=some, 2=all
3398 -mbl=n maximum consecutive blank lines to output (default=1)
3399 -ce cuddled else; use this style: '} else {'
3400 -dnl delete old newlines (default)
3401 -l=n maximum line length; default n=80
3402 -bl opening brace on new line
3403 -sbl opening sub brace on new line. value of -bl is used if not given.
3404 -bli opening brace on new line and indented
3405 -bar opening brace always on right, even for long clauses
3406 -vt=n vertical tightness (requires -lp); n controls break after opening
3407 token: 0=never 1=no break if next line balanced 2=no break
3408 -vtc=n vertical tightness of closing container; n controls if closing
3409 token starts new line: 0=always 1=not unless list 1=never
3410 -wba=s want break after tokens in string; i.e. wba=': .'
3411 -wbb=s want break before tokens in string
3413 Following Old Breakpoints
3414 -kis keep interior semicolons. Allows multiple statements per line.
3415 -boc break at old comma breaks: turns off all automatic list formatting
3416 -bol break at old logical breakpoints: or, and, ||, && (default)
3417 -bok break at old list keyword breakpoints such as map, sort (default)
3418 -bot break at old conditional (ternary ?:) operator breakpoints (default)
3419 -boa break at old attribute breakpoints
3420 -cab=n break at commas after a comma-arrow (=>):
3421 n=0 break at all commas after =>
3422 n=1 stable: break unless this breaks an existing one-line container
3423 n=2 break only if a one-line container cannot be formed
3424 n=3 do not treat commas after => specially at all
3427 -ibc indent block comments (default)
3428 -isbc indent spaced block comments; may indent unless no leading space
3429 -msc=n minimum desired spaces to side comment, default 4
3430 -fpsc=n fix position for side comments; default 0;
3431 -csc add or update closing side comments after closing BLOCK brace
3432 -dcsc delete closing side comments created by a -csc command
3433 -cscp=s change closing side comment prefix to be other than '## end'
3434 -cscl=s change closing side comment to apply to selected list of blocks
3435 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3436 -csct=n maximum number of columns of appended text, default n=20
3437 -cscw causes warning if old side comment is overwritten with -csc
3439 -sbc use 'static block comments' identified by leading '##' (default)
3440 -sbcp=s change static block comment identifier to be other than '##'
3441 -osbc outdent static block comments
3443 -ssc use 'static side comments' identified by leading '##' (default)
3444 -sscp=s change static side comment identifier to be other than '##'
3446 Delete selected text
3447 -dac delete all comments AND pod
3448 -dbc delete block comments
3449 -dsc delete side comments
3452 Send selected text to a '.TEE' file
3453 -tac tee all comments AND pod
3454 -tbc tee block comments
3455 -tsc tee side comments
3459 -olq outdent long quoted strings (default)
3460 -olc outdent a long block comment line
3461 -ola outdent statement labels
3462 -okw outdent control keywords (redo, next, last, goto, return)
3463 -okwl=s specify alternative keywords for -okw command
3466 -mft=n maximum fields per table; default n=40
3467 -x do not format lines before hash-bang line (i.e., for VMS)
3468 -asc allows perltidy to add a ';' when missing (default)
3469 -dsm allows perltidy to delete an unnecessary ';' (default)
3471 Combinations of other parameters
3472 -gnu attempt to follow GNU Coding Standards as applied to perl
3473 -mangle remove as many newlines as possible (but keep comments and pods)
3474 -extrude insert as many newlines as possible
3476 Dump and die, debugging
3477 -dop dump options used in this run to standard output and quit
3478 -ddf dump default options to standard output and quit
3479 -dsn dump all option short names to standard output and quit
3480 -dln dump option long names to standard output and quit
3481 -dpro dump whatever configuration file is in effect to standard output
3482 -dtt dump all token types to standard output and quit
3485 -html write an html file (see 'man perl2web' for many options)
3486 Note: when -html is used, no indentation or formatting are done.
3487 Hint: try perltidy -html -css=mystyle.css filename.pl
3488 and edit mystyle.css to change the appearance of filename.html.
3489 -nnn gives line numbers
3490 -pre only writes out <pre>..</pre> code section
3491 -toc places a table of contents to subs at the top (default)
3492 -pod passes pod text through pod2html (default)
3493 -frm write html as a frame (3 files)
3494 -text=s extra extension for table of contents if -frm, default='toc'
3495 -sext=s extra extension for file content if -frm, default='src'
3497 A prefix of "n" negates short form toggle switches, and a prefix of "no"
3498 negates the long forms. For example, -nasc means don't add missing
3501 If you are unable to see this entire text, try "perltidy -h | more"
3502 For more detailed information, and additional options, try "man perltidy",
3503 or go to the perltidy home page at http://perltidy.sourceforge.net
3508 sub process_this_file {
3510 my ( $truth, $beauty ) = @_;
3512 # loop to process each line of this file
3513 while ( my $line_of_tokens = $truth->get_line() ) {
3514 $beauty->write_line($line_of_tokens);
3518 eval { $beauty->finish_formatting() };
3519 $truth->report_tokenization_errors();
3524 # Use 'perl -c' to make sure that we did not create bad syntax
3525 # This is a very good independent check for programming errors
3527 # Given names of the input and output files, ($istream, $ostream),
3528 # we do the following:
3529 # - check syntax of the input file
3530 # - if bad, all done (could be an incomplete code snippet)
3531 # - if infile syntax ok, then check syntax of the output file;
3532 # - if outfile syntax bad, issue warning; this implies a code bug!
3533 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3535 my ( $istream, $ostream, $logger_object, $rOpts ) = @_;
3536 my $infile_syntax_ok = 0;
3537 my $line_of_dashes = '-' x 42 . "\n";
3539 my $flags = $rOpts->{'perl-syntax-check-flags'};
3541 # be sure we invoke perl with -c
3542 # note: perl will accept repeated flags like '-c -c'. It is safest
3543 # to append another -c than try to find an interior bundled c, as
3544 # in -Tc, because such a 'c' might be in a quoted string, for example.
3545 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3547 # be sure we invoke perl with -x if requested
3548 # same comments about repeated parameters applies
3549 if ( $rOpts->{'look-for-hash-bang'} ) {
3550 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3553 # this shouldn't happen unless a temporary file couldn't be made
3554 if ( $istream eq '-' ) {
3555 $logger_object->write_logfile_entry(
3556 "Cannot run perl -c on STDIN and STDOUT\n");
3557 return $infile_syntax_ok;
3560 $logger_object->write_logfile_entry(
3561 "checking input file syntax with perl $flags\n");
3563 # Not all operating systems/shells support redirection of the standard
3565 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3567 my ( $istream_filename, $perl_output ) =
3568 do_syntax_check( $istream, $flags, $error_redirection );
3569 $logger_object->write_logfile_entry(
3570 "Input stream passed to Perl as file $istream_filename\n");
3571 $logger_object->write_logfile_entry($line_of_dashes);
3572 $logger_object->write_logfile_entry("$perl_output\n");
3574 if ( $perl_output =~ /syntax\s*OK/ ) {
3575 $infile_syntax_ok = 1;
3576 $logger_object->write_logfile_entry($line_of_dashes);
3577 $logger_object->write_logfile_entry(
3578 "checking output file syntax with perl $flags ...\n");
3579 my ( $ostream_filename, $perl_output ) =
3580 do_syntax_check( $ostream, $flags, $error_redirection );
3581 $logger_object->write_logfile_entry(
3582 "Output stream passed to Perl as file $ostream_filename\n");
3583 $logger_object->write_logfile_entry($line_of_dashes);
3584 $logger_object->write_logfile_entry("$perl_output\n");
3586 unless ( $perl_output =~ /syntax\s*OK/ ) {
3587 $logger_object->write_logfile_entry($line_of_dashes);
3588 $logger_object->warning(
3589 "The output file has a syntax error when tested with perl $flags $ostream !\n"
3591 $logger_object->warning(
3592 "This implies an error in perltidy; the file $ostream is bad\n"
3594 $logger_object->report_definite_bug();
3596 # the perl version number will be helpful for diagnosing the problem
3597 $logger_object->write_logfile_entry(
3598 qx/perl -v $error_redirection/ . "\n" );
3603 # Only warn of perl -c syntax errors. Other messages,
3604 # such as missing modules, are too common. They can be
3605 # seen by running with perltidy -w
3606 $logger_object->complain("A syntax check using perl $flags\n");
3607 $logger_object->complain(
3608 "for the output in file $istream_filename gives:\n");
3609 $logger_object->complain($line_of_dashes);
3610 $logger_object->complain("$perl_output\n");
3611 $logger_object->complain($line_of_dashes);
3612 $infile_syntax_ok = -1;
3613 $logger_object->write_logfile_entry($line_of_dashes);
3614 $logger_object->write_logfile_entry(
3615 "The output file will not be checked because of input file problems\n"
3618 return $infile_syntax_ok;
3621 sub do_syntax_check {
3622 my ( $stream, $flags, $error_redirection ) = @_;
3624 # We need a named input file for executing perl
3625 my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
3627 # TODO: Need to add name of file to log somewhere
3628 # otherwise Perl output is hard to read
3629 if ( !$stream_filename ) { return $stream_filename, "" }
3631 # We have to quote the filename in case it has unusual characters
3632 # or spaces. Example: this filename #CM11.pm# gives trouble.
3633 my $quoted_stream_filename = '"' . $stream_filename . '"';
3635 # Under VMS something like -T will become -t (and an error) so we
3636 # will put quotes around the flags. Double quotes seem to work on
3637 # Unix/Windows/VMS, but this may not work on all systems. (Single
3638 # quotes do not work under Windows). It could become necessary to
3639 # put double quotes around each flag, such as: -"c" -"T"
3640 # We may eventually need some system-dependent coding here.
3641 $flags = '"' . $flags . '"';
3643 # now wish for luck...
3644 my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
3646 unlink $stream_filename if ($is_tmpfile);
3647 return $stream_filename, $msg;
3650 #####################################################################
3652 # This is a stripped down version of IO::Scalar
3653 # Given a reference to a scalar, it supplies either:
3654 # a getline method which reads lines (mode='r'), or
3655 # a print method which reads lines (mode='w')
3657 #####################################################################
3658 package Perl::Tidy::IOScalar;
3662 my ( $package, $rscalar, $mode ) = @_;
3663 my $ref = ref $rscalar;
3664 if ( $ref ne 'SCALAR' ) {
3666 ------------------------------------------------------------------------
3667 expecting ref to SCALAR but got ref to ($ref); trace follows:
3668 ------------------------------------------------------------------------
3672 if ( $mode eq 'w' ) {
3674 return bless [ $rscalar, $mode ], $package;
3676 elsif ( $mode eq 'r' ) {
3678 # Convert a scalar to an array.
3679 # This avoids looking for "\n" on each call to getline
3681 # NOTES: The -1 count is needed to avoid loss of trailing blank lines
3682 # (which might be important in a DATA section).
3684 if ( $rscalar && ${$rscalar} ) {
3685 @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
3687 # remove possible extra blank line introduced with split
3688 if ( @array && $array[-1] eq "\n" ) { pop @array }
3691 return bless [ \@array, $mode, $i_next ], $package;
3695 ------------------------------------------------------------------------
3696 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3697 ------------------------------------------------------------------------
3704 my $mode = $self->[1];
3705 if ( $mode ne 'r' ) {
3707 ------------------------------------------------------------------------
3708 getline call requires mode = 'r' but mode = ($mode); trace follows:
3709 ------------------------------------------------------------------------
3712 my $i = $self->[2]++;
3713 return $self->[0]->[$i];
3718 my $mode = $self->[1];
3719 if ( $mode ne 'w' ) {
3721 ------------------------------------------------------------------------
3722 print call requires mode = 'w' but mode = ($mode); trace follows:
3723 ------------------------------------------------------------------------
3726 ${ $self->[0] } .= $_[0];
3728 sub close { return }
3730 #####################################################################
3732 # This is a stripped down version of IO::ScalarArray
3733 # Given a reference to an array, it supplies either:
3734 # a getline method which reads lines (mode='r'), or
3735 # a print method which reads lines (mode='w')
3737 # NOTE: this routine assumes that there aren't any embedded
3738 # newlines within any of the array elements. There are no checks
3741 #####################################################################
3742 package Perl::Tidy::IOScalarArray;
3746 my ( $package, $rarray, $mode ) = @_;
3747 my $ref = ref $rarray;
3748 if ( $ref ne 'ARRAY' ) {
3750 ------------------------------------------------------------------------
3751 expecting ref to ARRAY but got ref to ($ref); trace follows:
3752 ------------------------------------------------------------------------
3756 if ( $mode eq 'w' ) {
3758 return bless [ $rarray, $mode ], $package;
3760 elsif ( $mode eq 'r' ) {
3762 return bless [ $rarray, $mode, $i_next ], $package;
3766 ------------------------------------------------------------------------
3767 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3768 ------------------------------------------------------------------------
3775 my $mode = $self->[1];
3776 if ( $mode ne 'r' ) {
3778 ------------------------------------------------------------------------
3779 getline requires mode = 'r' but mode = ($mode); trace follows:
3780 ------------------------------------------------------------------------
3783 my $i = $self->[2]++;
3784 return $self->[0]->[$i];
3789 my $mode = $self->[1];
3790 if ( $mode ne 'w' ) {
3792 ------------------------------------------------------------------------
3793 print requires mode = 'w' but mode = ($mode); trace follows:
3794 ------------------------------------------------------------------------
3797 push @{ $self->[0] }, $_[0];
3799 sub close { return }
3801 #####################################################################
3803 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3804 # which returns the next line to be parsed
3806 #####################################################################
3808 package Perl::Tidy::LineSource;
3812 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3814 my $input_line_ending;
3815 if ( $rOpts->{'preserve-line-endings'} ) {
3816 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3819 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3820 return undef unless $fh;
3822 # in order to check output syntax when standard output is used,
3823 # or when it is an object, we have to make a copy of the file
3824 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3827 # Turning off syntax check when input output is used.
3828 # The reason is that temporary files cause problems on
3830 $rOpts->{'check-syntax'} = 0;
3832 $$rpending_logfile_message .= <<EOM;
3833 Note: --syntax check will be skipped because standard input is used
3840 _filename => $input_file,
3841 _input_line_ending => $input_line_ending,
3842 _rinput_buffer => [],
3847 sub close_input_file {
3850 # Only close physical files, not STDIN and other objects
3851 my $filename = $self->{_filename};
3852 if ( $filename ne '-' && !ref $filename ) {
3853 eval { $self->{_fh}->close() };
3860 my $fh = $self->{_fh};
3861 my $rinput_buffer = $self->{_rinput_buffer};
3863 if ( scalar(@$rinput_buffer) ) {
3864 $line = shift @$rinput_buffer;
3867 $line = $fh->getline();
3869 # patch to read raw mac files under unix, dos
3870 # see if the first line has embedded \r's
3871 if ( $line && !$self->{_started} ) {
3872 if ( $line =~ /[\015][^\015\012]/ ) {
3874 # found one -- break the line up and store in a buffer
3875 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3876 my $count = @$rinput_buffer;
3877 $line = shift @$rinput_buffer;
3879 $self->{_started}++;
3885 #####################################################################
3887 # the Perl::Tidy::LineSink class supplies a write_line method for
3888 # actual file writing
3890 #####################################################################
3892 package Perl::Tidy::LineSink;
3896 my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3897 $rpending_logfile_message, $binmode )
3902 my $output_file_open = 0;
3904 if ( $rOpts->{'format'} eq 'tidy' ) {
3905 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3906 unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; }
3907 $output_file_open = 1;
3909 if ( ref($fh) eq 'IO::File' ) {
3912 if ( $output_file eq '-' ) { binmode STDOUT }
3916 # in order to check output syntax when standard output is used,
3917 # or when it is an object, we have to make a copy of the file
3918 if ( $output_file eq '-' || ref $output_file ) {
3919 if ( $rOpts->{'check-syntax'} ) {
3921 # Turning off syntax check when standard output is used.
3922 # The reason is that temporary files cause problems on
3924 $rOpts->{'check-syntax'} = 0;
3925 $$rpending_logfile_message .= <<EOM;
3926 Note: --syntax check will be skipped because standard output is used
3935 _output_file => $output_file,
3936 _output_file_open => $output_file_open,
3938 _tee_file => $tee_file,
3939 _tee_file_opened => 0,
3940 _line_separator => $line_separator,
3941 _binmode => $binmode,
3948 my $fh = $self->{_fh};
3950 my $output_file_open = $self->{_output_file_open};
3952 $_[0] .= $self->{_line_separator};
3954 $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3956 if ( $self->{_tee_flag} ) {
3957 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3958 my $fh_tee = $self->{_fh_tee};
3959 print $fh_tee $_[0];
3965 $self->{_tee_flag} = 1;
3970 $self->{_tee_flag} = 0;
3973 sub really_open_tee_file {
3975 my $tee_file = $self->{_tee_file};
3977 $fh_tee = IO::File->new(">$tee_file")
3978 or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n");
3979 binmode $fh_tee if $self->{_binmode};
3980 $self->{_tee_file_opened} = 1;
3981 $self->{_fh_tee} = $fh_tee;
3984 sub close_output_file {
3987 # Only close physical files, not STDOUT and other objects
3988 my $output_file = $self->{_output_file};
3989 if ( $output_file ne '-' && !ref $output_file ) {
3990 eval { $self->{_fh}->close() } if $self->{_output_file_open};
3992 $self->close_tee_file();
3995 sub close_tee_file {
3998 # Only close physical files, not STDOUT and other objects
3999 if ( $self->{_tee_file_opened} ) {
4000 my $tee_file = $self->{_tee_file};
4001 if ( $tee_file ne '-' && !ref $tee_file ) {
4002 eval { $self->{_fh_tee}->close() };
4003 $self->{_tee_file_opened} = 0;
4008 #####################################################################
4010 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
4011 # useful for program development.
4013 # Only one such file is created regardless of the number of input
4014 # files processed. This allows the results of processing many files
4015 # to be summarized in a single file.
4017 #####################################################################
4019 package Perl::Tidy::Diagnostics;
4025 _write_diagnostics_count => 0,
4026 _last_diagnostic_file => "",
4032 sub set_input_file {
4034 $self->{_input_file} = $_[0];
4037 # This is a diagnostic routine which is useful for program development.
4038 # Output from debug messages go to a file named DIAGNOSTICS, where
4039 # they are labeled by file and line. This allows many files to be
4040 # scanned at once for some particular condition of interest.
4041 sub write_diagnostics {
4044 unless ( $self->{_write_diagnostics_count} ) {
4045 open DIAGNOSTICS, ">DIAGNOSTICS"
4046 or death("couldn't open DIAGNOSTICS: $!\n");
4049 my $last_diagnostic_file = $self->{_last_diagnostic_file};
4050 my $input_file = $self->{_input_file};
4051 if ( $last_diagnostic_file ne $input_file ) {
4052 print DIAGNOSTICS "\nFILE:$input_file\n";
4054 $self->{_last_diagnostic_file} = $input_file;
4055 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
4056 print DIAGNOSTICS "$input_line_number:\t@_";
4057 $self->{_write_diagnostics_count}++;
4060 #####################################################################
4062 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
4064 #####################################################################
4066 package Perl::Tidy::Logger;
4071 my ( $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude, ) = @_;
4073 my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef;
4075 # remove any old error output file if we might write a new one
4076 unless ( $fh_warnings || ref($warning_file) ) {
4077 if ( -e $warning_file ) { unlink($warning_file) }
4081 defined( $rOpts->{'logfile-gap'} )
4082 ? $rOpts->{'logfile-gap'}
4084 if ( $logfile_gap == 0 ) { $logfile_gap = 1 }
4087 _log_file => $log_file,
4088 _logfile_gap => $logfile_gap,
4090 _fh_warnings => $fh_warnings,
4091 _last_input_line_written => 0,
4092 _at_end_of_file => 0,
4094 _block_log_output => 0,
4095 _line_of_tokens => undef,
4096 _output_line_number => undef,
4097 _wrote_line_information_string => 0,
4098 _wrote_column_headings => 0,
4099 _warning_file => $warning_file,
4100 _warning_count => 0,
4101 _complaint_count => 0,
4102 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure
4103 _saw_brace_error => 0,
4104 _saw_extrude => $saw_extrude,
4105 _output_array => [],
4109 sub get_warning_count {
4111 return $self->{_warning_count};
4114 sub get_use_prefix {
4116 return $self->{_use_prefix};
4119 sub block_log_output {
4121 $self->{_block_log_output} = 1;
4124 sub unblock_log_output {
4126 $self->{_block_log_output} = 0;
4129 sub interrupt_logfile {
4131 $self->{_use_prefix} = 0;
4132 $self->warning("\n");
4133 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
4136 sub resume_logfile {
4138 $self->write_logfile_entry( '#' x 60 . "\n" );
4139 $self->{_use_prefix} = 1;
4142 sub we_are_at_the_last_line {
4144 unless ( $self->{_wrote_line_information_string} ) {
4145 $self->write_logfile_entry("Last line\n\n");
4147 $self->{_at_end_of_file} = 1;
4150 # record some stuff in case we go down in flames
4153 my ( $line_of_tokens, $output_line_number ) = @_;
4154 my $input_line = $line_of_tokens->{_line_text};
4155 my $input_line_number = $line_of_tokens->{_line_number};
4157 # save line information in case we have to write a logfile message
4158 $self->{_line_of_tokens} = $line_of_tokens;
4159 $self->{_output_line_number} = $output_line_number;
4160 $self->{_wrote_line_information_string} = 0;
4162 my $last_input_line_written = $self->{_last_input_line_written};
4163 my $rOpts = $self->{_rOpts};
4166 ( $input_line_number - $last_input_line_written ) >=
4167 $self->{_logfile_gap}
4169 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
4172 my $rlevels = $line_of_tokens->{_rlevels};
4173 my $structural_indentation_level = $$rlevels[0];
4174 $self->{_last_input_line_written} = $input_line_number;
4175 ( my $out_str = $input_line ) =~ s/^\s*//;
4178 $out_str = ( '.' x $structural_indentation_level ) . $out_str;
4180 if ( length($out_str) > 35 ) {
4181 $out_str = substr( $out_str, 0, 35 ) . " ....";
4183 $self->logfile_output( "", "$out_str\n" );
4187 sub write_logfile_entry {
4190 # add leading >>> to avoid confusing error messages and code
4191 $self->logfile_output( ">>>", "@_" );
4194 sub write_column_headings {
4197 $self->{_wrote_column_headings} = 1;
4198 my $routput_array = $self->{_output_array};
4199 push @{$routput_array}, <<EOM;
4200 The nesting depths in the table below are at the start of the lines.
4201 The indicated output line numbers are not always exact.
4202 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
4204 in:out indent c b nesting code + messages; (messages begin with >>>)
4205 lines levels i k (code begins with one '.' per indent level)
4206 ------ ----- - - -------- -------------------------------------------
4210 sub make_line_information_string {
4212 # make columns of information when a logfile message needs to go out
4214 my $line_of_tokens = $self->{_line_of_tokens};
4215 my $input_line_number = $line_of_tokens->{_line_number};
4216 my $line_information_string = "";
4217 if ($input_line_number) {
4219 my $output_line_number = $self->{_output_line_number};
4220 my $brace_depth = $line_of_tokens->{_curly_brace_depth};
4221 my $paren_depth = $line_of_tokens->{_paren_depth};
4222 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
4223 my $guessed_indentation_level =
4224 $line_of_tokens->{_guessed_indentation_level};
4225 my $rlevels = $line_of_tokens->{_rlevels};
4226 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
4227 my $rci_levels = $line_of_tokens->{_rci_levels};
4228 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
4230 my $structural_indentation_level = $$rlevels[0];
4232 $self->write_column_headings() unless $self->{_wrote_column_headings};
4234 # keep logfile columns aligned for scripts up to 999 lines;
4235 # for longer scripts it doesn't really matter
4236 my $extra_space = "";
4238 ( $input_line_number < 10 ) ? " "
4239 : ( $input_line_number < 100 ) ? " "
4242 ( $output_line_number < 10 ) ? " "
4243 : ( $output_line_number < 100 ) ? " "
4246 # there are 2 possible nesting strings:
4247 # the original which looks like this: (0 [1 {2
4248 # the new one, which looks like this: {{[
4249 # the new one is easier to read, and shows the order, but
4250 # could be arbitrarily long, so we use it unless it is too long
4251 my $nesting_string =
4252 "($paren_depth [$square_bracket_depth {$brace_depth";
4253 my $nesting_string_new = $$rnesting_tokens[0];
4255 my $ci_level = $$rci_levels[0];
4256 if ( $ci_level > 9 ) { $ci_level = '*' }
4257 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
4259 if ( length($nesting_string_new) <= 8 ) {
4261 $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
4263 $line_information_string =
4264 "L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
4266 return $line_information_string;
4269 sub logfile_output {
4271 my ( $prompt, $msg ) = @_;
4272 return if ( $self->{_block_log_output} );
4274 my $routput_array = $self->{_output_array};
4275 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
4276 push @{$routput_array}, "$msg";
4279 my $line_information_string = $self->make_line_information_string();
4280 $self->{_wrote_line_information_string} = 1;
4282 if ($line_information_string) {
4283 push @{$routput_array}, "$line_information_string $prompt$msg";
4286 push @{$routput_array}, "$msg";
4291 sub get_saw_brace_error {
4293 return $self->{_saw_brace_error};
4296 sub increment_brace_error {
4298 $self->{_saw_brace_error}++;
4303 use constant BRACE_WARNING_LIMIT => 10;
4304 my $saw_brace_error = $self->{_saw_brace_error};
4306 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
4310 $self->{_saw_brace_error} = $saw_brace_error;
4312 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
4313 $self->warning("No further warnings of this type will be given\n");
4319 # handle non-critical warning messages based on input flag
4321 my $rOpts = $self->{_rOpts};
4323 # these appear in .ERR output only if -w flag is used
4324 if ( $rOpts->{'warning-output'} ) {
4328 # otherwise, they go to the .LOG file
4330 $self->{_complaint_count}++;
4331 $self->write_logfile_entry(@_);
4337 # report errors to .ERR file (or stdout)
4339 use constant WARNING_LIMIT => 50;
4341 my $rOpts = $self->{_rOpts};
4342 unless ( $rOpts->{'quiet'} ) {
4344 my $warning_count = $self->{_warning_count};
4345 my $fh_warnings = $self->{_fh_warnings};
4346 if ( !$fh_warnings ) {
4347 my $warning_file = $self->{_warning_file};
4348 ( $fh_warnings, my $filename ) =
4349 Perl::Tidy::streamhandle( $warning_file, 'w' );
4350 $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n");
4351 Perl::Tidy::Warn "## Please see file $filename\n"
4352 unless ref($warning_file);
4353 $self->{_fh_warnings} = $fh_warnings;
4356 if ( $warning_count < WARNING_LIMIT ) {
4357 if ( $self->get_use_prefix() > 0 ) {
4358 my $input_line_number =
4359 Perl::Tidy::Tokenizer::get_input_line_number();
4360 if ( !defined($input_line_number) ) { $input_line_number = -1 }
4361 $fh_warnings->print("$input_line_number:\t@_");
4362 $self->write_logfile_entry("WARNING: @_");
4365 $fh_warnings->print(@_);
4366 $self->write_logfile_entry(@_);
4370 $self->{_warning_count} = $warning_count;
4372 if ( $warning_count == WARNING_LIMIT ) {
4373 $fh_warnings->print("No further warnings will be given\n");
4378 # programming bug codes:
4380 # 0 = maybe, not sure.
4382 sub report_possible_bug {
4384 my $saw_code_bug = $self->{_saw_code_bug};
4385 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4388 sub report_definite_bug {
4390 $self->{_saw_code_bug} = 1;
4393 sub ask_user_for_bug_report {
4396 my ( $infile_syntax_ok, $formatter ) = @_;
4397 my $saw_code_bug = $self->{_saw_code_bug};
4398 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4399 $self->warning(<<EOM);
4401 You may have encountered a code bug in perltidy. If you think so, and
4402 the problem is not listed in the BUGS file at
4403 http://perltidy.sourceforge.net, please report it so that it can be
4404 corrected. Include the smallest possible script which has the problem,
4405 along with the .LOG file. See the manual pages for contact information.
4410 elsif ( $saw_code_bug == 1 ) {
4411 if ( $self->{_saw_extrude} ) {
4412 $self->warning(<<EOM);
4414 You may have encountered a bug in perltidy. However, since you are using the
4415 -extrude option, the problem may be with perl or one of its modules, which have
4416 occasional problems with this type of file. If you believe that the
4417 problem is with perltidy, and the problem is not listed in the BUGS file at
4418 http://perltidy.sourceforge.net, please report it so that it can be corrected.
4419 Include the smallest possible script which has the problem, along with the .LOG
4420 file. See the manual pages for contact information.
4425 $self->warning(<<EOM);
4427 Oops, you seem to have encountered a bug in perltidy. Please check the
4428 BUGS file at http://perltidy.sourceforge.net. If the problem is not
4429 listed there, please report it so that it can be corrected. Include the
4430 smallest possible script which produces this message, along with the
4431 .LOG file if appropriate. See the manual pages for contact information.
4432 Your efforts are appreciated.
4435 my $added_semicolon_count = 0;
4437 $added_semicolon_count =
4438 $formatter->get_added_semicolon_count();
4440 if ( $added_semicolon_count > 0 ) {
4441 $self->warning(<<EOM);
4443 The log file shows that perltidy added $added_semicolon_count semicolons.
4444 Please rerun with -nasc to see if that is the cause of the syntax error. Even
4445 if that is the problem, please report it so that it can be fixed.
4455 # called after all formatting to summarize errors
4457 my ( $infile_syntax_ok, $formatter ) = @_;
4459 my $rOpts = $self->{_rOpts};
4460 my $warning_count = $self->{_warning_count};
4461 my $saw_code_bug = $self->{_saw_code_bug};
4464 ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4465 || $saw_code_bug == 1
4466 || $rOpts->{'logfile'};
4467 my $log_file = $self->{_log_file};
4468 if ($warning_count) {
4469 if ($save_logfile) {
4470 $self->block_log_output(); # avoid echoing this to the logfile
4472 "The logfile $log_file may contain useful information\n");
4473 $self->unblock_log_output();
4476 if ( $self->{_complaint_count} > 0 ) {
4478 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4482 if ( $self->{_saw_brace_error}
4483 && ( $self->{_logfile_gap} > 1 || !$save_logfile ) )
4485 $self->warning("To save a full .LOG file rerun with -g\n");
4488 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4490 if ($save_logfile) {
4491 my $log_file = $self->{_log_file};
4492 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4494 my $routput_array = $self->{_output_array};
4495 foreach ( @{$routput_array} ) { $fh->print($_) }
4496 if ( $log_file ne '-' && !ref $log_file ) {
4497 eval { $fh->close() };
4503 #####################################################################
4505 # The Perl::Tidy::DevNull class supplies a dummy print method
4507 #####################################################################
4509 package Perl::Tidy::DevNull;
4510 sub new { return bless {}, $_[0] }
4511 sub print { return }
4512 sub close { return }
4514 #####################################################################
4516 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4518 #####################################################################
4520 package Perl::Tidy::HtmlWriter;
4530 %short_to_long_names
4534 $missing_html_entities
4537 # replace unsafe characters with HTML entity representation if HTML::Entities
4539 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4543 my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4544 $html_src_extension )
4547 my $html_file_opened = 0;
4549 ( $html_fh, my $html_filename ) =
4550 Perl::Tidy::streamhandle( $html_file, 'w' );
4552 Perl::Tidy::Warn("can't open $html_file: $!\n");
4555 $html_file_opened = 1;
4557 if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4558 $input_file = "NONAME";
4561 # write the table of contents to a string
4563 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4566 my @pre_string_stack;
4567 if ( $rOpts->{'html-pre-only'} ) {
4569 # pre section goes directly to the output stream
4570 $html_pre_fh = $html_fh;
4571 $html_pre_fh->print( <<"PRE_END");
4577 # pre section go out to a temporary string
4579 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4580 push @pre_string_stack, \$pre_string;
4583 # pod text gets diverted if the 'pod2html' is used
4586 if ( $rOpts->{'pod2html'} ) {
4587 if ( $rOpts->{'html-pre-only'} ) {
4588 undef $rOpts->{'pod2html'};
4591 eval "use Pod::Html";
4594 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4595 undef $rOpts->{'pod2html'};
4598 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4605 if ( $rOpts->{'frames'} ) {
4606 unless ($extension) {
4608 "cannot use frames without a specified output extension; ignoring -frm\n";
4609 undef $rOpts->{'frames'};
4612 $toc_filename = $input_file . $html_toc_extension . $extension;
4613 $src_filename = $input_file . $html_src_extension . $extension;
4617 # ----------------------------------------------------------
4618 # Output is now directed as follows:
4619 # html_toc_fh <-- table of contents items
4620 # html_pre_fh <-- the <pre> section of formatted code, except:
4621 # html_pod_fh <-- pod goes here with the pod2html option
4622 # ----------------------------------------------------------
4624 my $title = $rOpts->{'title'};
4626 ( $title, my $path ) = fileparse($input_file);
4628 my $toc_item_count = 0;
4629 my $in_toc_package = "";
4632 _input_file => $input_file, # name of input file
4633 _title => $title, # title, unescaped
4634 _html_file => $html_file, # name of .html output file
4635 _toc_filename => $toc_filename, # for frames option
4636 _src_filename => $src_filename, # for frames option
4637 _html_file_opened => $html_file_opened, # a flag
4638 _html_fh => $html_fh, # the output stream
4639 _html_pre_fh => $html_pre_fh, # pre section goes here
4640 _rpre_string_stack => \@pre_string_stack, # stack of pre sections
4641 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html
4642 _rpod_string => \$pod_string, # string holding pod
4643 _pod_cut_count => 0, # how many =cut's?
4644 _html_toc_fh => $html_toc_fh, # fh for table of contents
4645 _rtoc_string => \$toc_string, # string holding toc
4646 _rtoc_item_count => \$toc_item_count, # how many toc items
4647 _rin_toc_package => \$in_toc_package, # package name
4648 _rtoc_name_count => {}, # hash to track unique names
4649 _rpackage_stack => [], # stack to check for package
4651 _rlast_level => \$last_level, # brace indentation level
4657 # Add an item to the html table of contents.
4658 # This is called even if no table of contents is written,
4659 # because we still want to put the anchors in the <pre> text.
4660 # We are given an anchor name and its type; types are:
4661 # 'package', 'sub', '__END__', '__DATA__', 'EOF'
4662 # There must be an 'EOF' call at the end to wrap things up.
4664 my ( $name, $type ) = @_;
4665 my $html_toc_fh = $self->{_html_toc_fh};
4666 my $html_pre_fh = $self->{_html_pre_fh};
4667 my $rtoc_name_count = $self->{_rtoc_name_count};
4668 my $rtoc_item_count = $self->{_rtoc_item_count};
4669 my $rlast_level = $self->{_rlast_level};
4670 my $rin_toc_package = $self->{_rin_toc_package};
4671 my $rpackage_stack = $self->{_rpackage_stack};
4673 # packages contain sublists of subs, so to avoid errors all package
4674 # items are written and finished with the following routines
4675 my $end_package_list = sub {
4676 if ($$rin_toc_package) {
4677 $html_toc_fh->print("</ul>\n</li>\n");
4678 $$rin_toc_package = "";
4682 my $start_package_list = sub {
4683 my ( $unique_name, $package ) = @_;
4684 if ($$rin_toc_package) { $end_package_list->() }
4685 $html_toc_fh->print(<<EOM);
4686 <li><a href=\"#$unique_name\">package $package</a>
4689 $$rin_toc_package = $package;
4692 # start the table of contents on the first item
4693 unless ($$rtoc_item_count) {
4695 # but just quit if we hit EOF without any other entries
4696 # in this case, there will be no toc
4697 return if ( $type eq 'EOF' );
4698 $html_toc_fh->print( <<"TOC_END");
4699 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4703 $$rtoc_item_count++;
4705 # make a unique anchor name for this location:
4706 # - packages get a 'package-' prefix
4707 # - subs use their names
4708 my $unique_name = $name;
4709 if ( $type eq 'package' ) { $unique_name = "package-$name" }
4711 # append '-1', '-2', etc if necessary to make unique; this will
4712 # be unique because subs and packages cannot have a '-'
4713 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4714 $unique_name .= "-$count";
4717 # - all names get terminal '-' if pod2html is used, to avoid
4718 # conflicts with anchor names created by pod2html
4719 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4721 # start/stop lists of subs
4722 if ( $type eq 'sub' ) {
4723 my $package = $rpackage_stack->[$$rlast_level];
4724 unless ($package) { $package = 'main' }
4726 # if we're already in a package/sub list, be sure its the right
4727 # package or else close it
4728 if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4729 $end_package_list->();
4732 # start a package/sub list if necessary
4733 unless ($$rin_toc_package) {
4734 $start_package_list->( $unique_name, $package );
4738 # now write an entry in the toc for this item
4739 if ( $type eq 'package' ) {
4740 $start_package_list->( $unique_name, $name );
4742 elsif ( $type eq 'sub' ) {
4743 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4746 $end_package_list->();
4747 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4750 # write the anchor in the <pre> section
4751 $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4753 # end the table of contents, if any, on the end of file
4754 if ( $type eq 'EOF' ) {
4755 $html_toc_fh->print( <<"TOC_END");
4757 <!-- END CODE INDEX -->
4764 # This is the official list of tokens which may be identified by the
4765 # user. Long names are used as getopt keys. Short names are
4766 # convenient short abbreviations for specifying input. Short names
4767 # somewhat resemble token type characters, but are often different
4768 # because they may only be alphanumeric, to allow command line
4769 # input. Also, note that because of case insensitivity of html,
4770 # this table must be in a single case only (I've chosen to use all
4772 # When adding NEW_TOKENS: update this hash table
4773 # short names => long names
4774 %short_to_long_names = (
4784 'pu' => 'punctuation',
4785 'i' => 'identifier',
4787 'h' => 'here-doc-target',
4788 'hh' => 'here-doc-text',
4790 'sc' => 'semicolon',
4791 'm' => 'subroutine',
4795 # Now we have to map actual token types into one of the above short
4796 # names; any token types not mapped will get 'punctuation'
4799 # The values of this hash table correspond to the keys of the
4800 # previous hash table.
4801 # The keys of this hash table are token types and can be seen
4802 # by running with --dump-token-types (-dtt).
4804 # When adding NEW_TOKENS: update this hash table
4805 # $type => $short_name
4806 %token_short_names = (
4831 # These token types will all be called identifiers for now
4832 # FIXME: could separate user defined modules as separate type
4833 my @identifier = qw" i t U C Y Z G :: CORE::";
4834 @token_short_names{@identifier} = ('i') x scalar(@identifier);
4836 # These token types will be called 'structure'
4837 my @structure = qw" { } ";
4838 @token_short_names{@structure} = ('s') x scalar(@structure);
4840 # OLD NOTES: save for reference
4841 # Any of these could be added later if it would be useful.
4842 # For now, they will by default become punctuation
4843 # my @list = qw" L R [ ] ";
4844 # @token_long_names{@list} = ('non-structure') x scalar(@list);
4847 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4849 # @token_long_names{@list} = ('math') x scalar(@list);
4851 # my @list = qw" & &= ~ ~= ^ ^= | |= ";
4852 # @token_long_names{@list} = ('bit') x scalar(@list);
4854 # my @list = qw" == != < > <= <=> ";
4855 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4857 # my @list = qw" && || ! &&= ||= //= ";
4858 # @token_long_names{@list} = ('logical') x scalar(@list);
4860 # my @list = qw" . .= =~ !~ x x= ";
4861 # @token_long_names{@list} = ('string-operators') x scalar(@list);
4864 # my @list = qw" .. -> <> ... \ ? ";
4865 # @token_long_names{@list} = ('misc-operators') x scalar(@list);
4869 sub make_getopt_long_names {
4871 my ($rgetopt_names) = @_;
4872 while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4873 push @$rgetopt_names, "html-color-$name=s";
4874 push @$rgetopt_names, "html-italic-$name!";
4875 push @$rgetopt_names, "html-bold-$name!";
4877 push @$rgetopt_names, "html-color-background=s";
4878 push @$rgetopt_names, "html-linked-style-sheet=s";
4879 push @$rgetopt_names, "nohtml-style-sheets";
4880 push @$rgetopt_names, "html-pre-only";
4881 push @$rgetopt_names, "html-line-numbers";
4882 push @$rgetopt_names, "html-entities!";
4883 push @$rgetopt_names, "stylesheet";
4884 push @$rgetopt_names, "html-table-of-contents!";
4885 push @$rgetopt_names, "pod2html!";
4886 push @$rgetopt_names, "frames!";
4887 push @$rgetopt_names, "html-toc-extension=s";
4888 push @$rgetopt_names, "html-src-extension=s";
4890 # Pod::Html parameters:
4891 push @$rgetopt_names, "backlink=s";
4892 push @$rgetopt_names, "cachedir=s";
4893 push @$rgetopt_names, "htmlroot=s";
4894 push @$rgetopt_names, "libpods=s";
4895 push @$rgetopt_names, "podpath=s";
4896 push @$rgetopt_names, "podroot=s";
4897 push @$rgetopt_names, "title=s";
4899 # Pod::Html parameters with leading 'pod' which will be removed
4900 # before the call to Pod::Html
4901 push @$rgetopt_names, "podquiet!";
4902 push @$rgetopt_names, "podverbose!";
4903 push @$rgetopt_names, "podrecurse!";
4904 push @$rgetopt_names, "podflush";
4905 push @$rgetopt_names, "podheader!";
4906 push @$rgetopt_names, "podindex!";
4909 sub make_abbreviated_names {
4911 # We're appending things like this to the expansion list:
4912 # 'hcc' => [qw(html-color-comment)],
4913 # 'hck' => [qw(html-color-keyword)],
4916 my ($rexpansion) = @_;
4918 # abbreviations for color/bold/italic properties
4919 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4920 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
4921 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
4922 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
4923 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4924 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4927 # abbreviations for all other html options
4928 ${$rexpansion}{"hcbg"} = ["html-color-background"];
4929 ${$rexpansion}{"pre"} = ["html-pre-only"];
4930 ${$rexpansion}{"toc"} = ["html-table-of-contents"];
4931 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
4932 ${$rexpansion}{"nnn"} = ["html-line-numbers"];
4933 ${$rexpansion}{"hent"} = ["html-entities"];
4934 ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4935 ${$rexpansion}{"css"} = ["html-linked-style-sheet"];
4936 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
4937 ${$rexpansion}{"ss"} = ["stylesheet"];
4938 ${$rexpansion}{"pod"} = ["pod2html"];
4939 ${$rexpansion}{"npod"} = ["nopod2html"];
4940 ${$rexpansion}{"frm"} = ["frames"];
4941 ${$rexpansion}{"nfrm"} = ["noframes"];
4942 ${$rexpansion}{"text"} = ["html-toc-extension"];
4943 ${$rexpansion}{"sext"} = ["html-src-extension"];
4948 # This will be called once after options have been parsed
4952 # X11 color names for default settings that seemed to look ok
4953 # (these color names are only used for programming clarity; the hex
4954 # numbers are actually written)
4955 use constant ForestGreen => "#228B22";
4956 use constant SaddleBrown => "#8B4513";
4957 use constant magenta4 => "#8B008B";
4958 use constant IndianRed3 => "#CD5555";
4959 use constant DeepSkyBlue4 => "#00688B";
4960 use constant MediumOrchid3 => "#B452CD";
4961 use constant black => "#000000";
4962 use constant white => "#FFFFFF";
4963 use constant red => "#FF0000";
4965 # set default color, bold, italic properties
4966 # anything not listed here will be given the default (punctuation) color --
4967 # these types currently not listed and get default: ws pu s sc cm co p
4968 # When adding NEW_TOKENS: add an entry here if you don't want defaults
4970 # set_default_properties( $short_name, default_color, bold?, italic? );
4971 set_default_properties( 'c', ForestGreen, 0, 0 );
4972 set_default_properties( 'pd', ForestGreen, 0, 1 );
4973 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown
4974 set_default_properties( 'q', IndianRed3, 0, 0 );
4975 set_default_properties( 'hh', IndianRed3, 0, 1 );
4976 set_default_properties( 'h', IndianRed3, 1, 0 );
4977 set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
4978 set_default_properties( 'w', black, 0, 0 );
4979 set_default_properties( 'n', MediumOrchid3, 0, 0 );
4980 set_default_properties( 'v', MediumOrchid3, 0, 0 );
4981 set_default_properties( 'j', IndianRed3, 1, 0 );
4982 set_default_properties( 'm', red, 1, 0 );
4984 set_default_color( 'html-color-background', white );
4985 set_default_color( 'html-color-punctuation', black );
4987 # setup property lookup tables for tokens based on their short names
4988 # every token type has a short name, and will use these tables
4989 # to do the html markup
4990 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4991 $html_color{$short_name} = $rOpts->{"html-color-$long_name"};
4992 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
4993 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4996 # write style sheet to STDOUT and die if requested
4997 if ( defined( $rOpts->{'stylesheet'} ) ) {
4998 write_style_sheet_file('-');
5002 # make sure user gives a file name after -css
5003 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
5004 $css_linkname = $rOpts->{'html-linked-style-sheet'};
5005 if ( $css_linkname =~ /^-/ ) {
5006 Perl::Tidy::Die "You must specify a valid filename after -css\n";
5010 # check for conflict
5011 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
5012 $rOpts->{'nohtml-style-sheets'} = 0;
5013 warning("You can't specify both -css and -nss; -nss ignored\n");
5016 # write a style sheet file if necessary
5017 if ($css_linkname) {
5019 # if the selected filename exists, don't write, because user may
5020 # have done some work by hand to create it; use backup name instead
5021 # Also, this will avoid a potential disaster in which the user
5022 # forgets to specify the style sheet, like this:
5023 # perltidy -html -css myfile1.pl myfile2.pl
5024 # This would cause myfile1.pl to parsed as the style sheet by GetOpts
5025 my $css_filename = $css_linkname;
5026 unless ( -e $css_filename ) {
5027 write_style_sheet_file($css_filename);
5030 $missing_html_entities = 1 unless $rOpts->{'html-entities'};
5033 sub write_style_sheet_file {
5035 my $css_filename = shift;
5037 unless ( $fh = IO::File->new("> $css_filename") ) {
5038 Perl::Tidy::Die "can't open $css_filename: $!\n";
5040 write_style_sheet_data($fh);
5041 eval { $fh->close };
5044 sub write_style_sheet_data {
5046 # write the style sheet data to an open file handle
5049 my $bg_color = $rOpts->{'html-color-background'};
5050 my $text_color = $rOpts->{'html-color-punctuation'};
5052 # pre-bgcolor is new, and may not be defined
5053 my $pre_bg_color = $rOpts->{'html-pre-color-background'};
5054 $pre_bg_color = $bg_color unless $pre_bg_color;
5056 $fh->print(<<"EOM");
5057 /* default style sheet generated by perltidy */
5058 body {background: $bg_color; color: $text_color}
5059 pre { color: $text_color;
5060 background: $pre_bg_color;
5061 font-family: courier;
5066 foreach my $short_name ( sort keys %short_to_long_names ) {
5067 my $long_name = $short_to_long_names{$short_name};
5069 my $abbrev = '.' . $short_name;
5070 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment
5071 my $color = $html_color{$short_name};
5072 if ( !defined($color) ) { $color = $text_color }
5073 $fh->print("$abbrev \{ color: $color;");
5075 if ( $html_bold{$short_name} ) {
5076 $fh->print(" font-weight:bold;");
5079 if ( $html_italic{$short_name} ) {
5080 $fh->print(" font-style:italic;");
5082 $fh->print("} /* $long_name */\n");
5086 sub set_default_color {
5088 # make sure that options hash $rOpts->{$key} contains a valid color
5089 my ( $key, $color ) = @_;
5090 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
5091 $rOpts->{$key} = check_RGB($color);
5096 # if color is a 6 digit hex RGB value, prepend a #, otherwise
5097 # assume that it is a valid ascii color name
5099 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
5103 sub set_default_properties {
5104 my ( $short_name, $color, $bold, $italic ) = @_;
5106 set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
5108 $key = "html-bold-$short_to_long_names{$short_name}";
5109 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
5110 $key = "html-italic-$short_to_long_names{$short_name}";
5111 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
5116 # Use Pod::Html to process the pod and make the page
5117 # then merge the perltidy code sections into it.
5118 # return 1 if success, 0 otherwise
5120 my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
5121 my $input_file = $self->{_input_file};
5122 my $title = $self->{_title};
5123 my $success_flag = 0;
5125 # don't try to use pod2html if no pod
5126 unless ($pod_string) {
5127 return $success_flag;
5130 # Pod::Html requires a real temporary filename
5131 my ( $fh_tmp, $tmpfile ) = tempfile();
5134 "unable to open temporary file $tmpfile; cannot use pod2html\n";
5135 return $success_flag;
5138 #------------------------------------------------------------------
5139 # Warning: a temporary file is open; we have to clean up if
5140 # things go bad. From here on all returns should be by going to
5141 # RETURN so that the temporary file gets unlinked.
5142 #------------------------------------------------------------------
5144 # write the pod text to the temporary file
5145 $fh_tmp->print($pod_string);
5148 # Hand off the pod to pod2html.
5149 # Note that we can use the same temporary filename for input and output
5150 # because of the way pod2html works.
5154 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
5157 # Flags with string args:
5158 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
5159 # "podpath=s", "podroot=s"
5160 # Note: -css=s is handled by perltidy itself
5161 foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
5162 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
5165 # Toggle switches; these have extra leading 'pod'
5166 # "header!", "index!", "recurse!", "quiet!", "verbose!"
5167 foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
5168 my $kwd = $kw; # allows us to strip 'pod'
5169 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
5170 elsif ( defined( $rOpts->{$kw} ) ) {
5172 push @args, "--no$kwd";
5178 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
5180 # Must clean up if pod2html dies (it can);
5181 # Be careful not to overwrite callers __DIE__ routine
5182 local $SIG{__DIE__} = sub {
5183 unlink $tmpfile if -e $tmpfile;
5184 Perl::Tidy::Die $_[0];
5189 $fh_tmp = IO::File->new( $tmpfile, 'r' );
5192 # this error shouldn't happen ... we just used this filename
5194 "unable to open temporary file $tmpfile; cannot use pod2html\n";
5198 my $html_fh = $self->{_html_fh};
5204 # This routine will write the html selectively and store the toc
5205 my $html_print = sub {
5207 $html_fh->print($_) unless ($no_print);
5208 if ($in_toc) { push @toc, $_ }
5212 # loop over lines of html output from pod2html and merge in
5213 # the necessary perltidy html sections
5214 my ( $saw_body, $saw_index, $saw_body_end );
5215 while ( my $line = $fh_tmp->getline() ) {
5217 if ( $line =~ /^\s*<html>\s*$/i ) {
5218 my $date = localtime;
5219 $html_print->("<!-- Generated by perltidy on $date -->\n");
5220 $html_print->($line);
5223 # Copy the perltidy css, if any, after <body> tag
5224 elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
5226 $html_print->($css_string) if $css_string;
5227 $html_print->($line);
5229 # add a top anchor and heading
5230 $html_print->("<a name=\"-top-\"></a>\n");
5231 $title = escape_html($title);
5232 $html_print->("<h1>$title</h1>\n");
5235 # check for start of index, old pod2html
5236 # before Pod::Html VERSION 1.15_02 it is delimited by comments as:
5237 # <!-- INDEX BEGIN -->
5241 # <!-- INDEX END -->
5243 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
5246 # when frames are used, an extra table of contents in the
5247 # contents panel is confusing, so don't print it
5248 $no_print = $rOpts->{'frames'}
5249 || !$rOpts->{'html-table-of-contents'};
5250 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5251 $html_print->($line);
5254 # check for start of index, new pod2html
5255 # After Pod::Html VERSION 1.15_02 it is delimited as:
5259 elsif ( $line =~ /^\s*<ul\s+id="index">/i ) {
5263 # when frames are used, an extra table of contents in the
5264 # contents panel is confusing, so don't print it
5265 $no_print = $rOpts->{'frames'}
5266 || !$rOpts->{'html-table-of-contents'};
5267 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5268 $html_print->($line);
5271 # Check for end of index, old pod2html
5272 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
5274 $html_print->($line);
5276 # Copy the perltidy toc, if any, after the Pod::Html toc
5278 $html_print->("<hr />\n") if $rOpts->{'frames'};
5279 $html_print->("<h2>Code Index:</h2>\n");
5280 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5281 $html_print->(@toc);
5287 # must track <ul> depth level for new pod2html
5288 elsif ( $line =~ /\s*<ul>\s*$/i && $in_toc eq 'UL' ) {
5290 $html_print->($line);
5293 # Check for end of index, for new pod2html
5294 elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) {
5296 $html_print->($line);
5298 # Copy the perltidy toc, if any, after the Pod::Html toc
5299 if ( $ul_level <= 0 ) {
5302 $html_print->("<hr />\n") if $rOpts->{'frames'};
5303 $html_print->("<h2>Code Index:</h2>\n");
5304 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5305 $html_print->(@toc);
5313 # Copy one perltidy section after each marker
5314 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
5316 $html_print->($1) if $1;
5318 # Intermingle code and pod sections if we saw multiple =cut's.
5319 if ( $self->{_pod_cut_count} > 1 ) {
5320 my $rpre_string = shift(@$rpre_string_stack);
5321 if ($$rpre_string) {
5322 $html_print->('<pre>');
5323 $html_print->($$rpre_string);
5324 $html_print->('</pre>');
5328 # shouldn't happen: we stored a string before writing
5331 "Problem merging html stream with pod2html; order may be wrong\n";
5333 $html_print->($line);
5336 # If didn't see multiple =cut lines, we'll put the pod out first
5337 # and then the code, because it's less confusing.
5340 # since we are not intermixing code and pod, we don't need
5341 # or want any <hr> lines which separated pod and code
5342 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
5346 # Copy any remaining code section before the </body> tag
5347 elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
5349 if (@$rpre_string_stack) {
5350 unless ( $self->{_pod_cut_count} > 1 ) {
5351 $html_print->('<hr />');
5353 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
5354 $html_print->('<pre>');
5355 $html_print->($$rpre_string);
5356 $html_print->('</pre>');
5359 $html_print->($line);
5362 $html_print->($line);
5367 unless ($saw_body) {
5368 Perl::Tidy::Warn "Did not see <body> in pod2html output\n";
5371 unless ($saw_body_end) {
5372 Perl::Tidy::Warn "Did not see </body> in pod2html output\n";
5375 unless ($saw_index) {
5376 Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n";
5381 eval { $html_fh->close() };
5383 # note that we have to unlink tmpfile before making frames
5384 # because the tmpfile may be one of the names used for frames
5385 unlink $tmpfile if -e $tmpfile;
5386 if ( $success_flag && $rOpts->{'frames'} ) {
5387 $self->make_frame( \@toc );
5389 return $success_flag;
5394 # Make a frame with table of contents in the left panel
5395 # and the text in the right panel.
5397 # $html_filename contains the no-frames html output
5398 # $rtoc is a reference to an array with the table of contents
5401 my $input_file = $self->{_input_file};
5402 my $html_filename = $self->{_html_file};
5403 my $toc_filename = $self->{_toc_filename};
5404 my $src_filename = $self->{_src_filename};
5405 my $title = $self->{_title};
5406 $title = escape_html($title);
5408 # FUTURE input parameter:
5409 my $top_basename = "";
5411 # We need to produce 3 html files:
5412 # 1. - the table of contents
5413 # 2. - the contents (source code) itself
5414 # 3. - the frame which contains them
5416 # get basenames for relative links
5417 my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5418 my ( $src_basename, $src_path ) = fileparse($src_filename);
5420 # 1. Make the table of contents panel, with appropriate changes
5421 # to the anchor names
5422 my $src_frame_name = 'SRC';
5424 write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5427 # 2. The current .html filename is renamed to be the contents panel
5428 rename( $html_filename, $src_filename )
5429 or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n";
5431 # 3. Then use the original html filename for the frame
5433 $title, $html_filename, $top_basename,
5434 $toc_basename, $src_basename, $src_frame_name
5438 sub write_toc_html {
5440 # write a separate html table of contents file for frames
5441 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5442 my $fh = IO::File->new( $toc_filename, 'w' )
5443 or Perl::Tidy::Die "Cannot open $toc_filename:$!\n";
5447 <title>$title</title>
5450 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5454 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5455 $fh->print( join "", @$rtoc );
5464 sub write_frame_html {
5466 # write an html file to be the table of contents frame
5468 $title, $frame_filename, $top_basename,
5469 $toc_basename, $src_basename, $src_frame_name
5472 my $fh = IO::File->new( $frame_filename, 'w' )
5473 or Perl::Tidy::Die "Cannot open $toc_basename:$!\n";
5476 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5477 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5478 <?xml version="1.0" encoding="iso-8859-1" ?>
5479 <html xmlns="http://www.w3.org/1999/xhtml">
5481 <title>$title</title>
5485 # two left panels, one right, if master index file
5486 if ($top_basename) {
5488 <frameset cols="20%,80%">
5489 <frameset rows="30%,70%">
5490 <frame src = "$top_basename" />
5491 <frame src = "$toc_basename" />
5496 # one left panels, one right, if no master index file
5499 <frameset cols="20%,*">
5500 <frame src = "$toc_basename" />
5504 <frame src = "$src_basename" name = "$src_frame_name" />
5507 <p>If you see this message, you are using a non-frame-capable web client.</p>
5508 <p>This document contains:</p>
5510 <li><a href="$toc_basename">A table of contents</a></li>
5511 <li><a href="$src_basename">The source code</a></li>
5520 sub change_anchor_names {
5522 # add a filename and target to anchors
5523 # also return the first anchor
5524 my ( $rlines, $filename, $target ) = @_;
5526 foreach my $line (@$rlines) {
5528 # We're looking for lines like this:
5529 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5530 # ---- - -------- -----------------
5532 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5536 my $href = "$filename#$name";
5537 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5538 unless ($first_anchor) { $first_anchor = $href }
5541 return $first_anchor;
5544 sub close_html_file {
5546 return unless $self->{_html_file_opened};
5548 my $html_fh = $self->{_html_fh};
5549 my $rtoc_string = $self->{_rtoc_string};
5551 # There are 3 basic paths to html output...
5553 # ---------------------------------
5554 # Path 1: finish up if in -pre mode
5555 # ---------------------------------
5556 if ( $rOpts->{'html-pre-only'} ) {
5557 $html_fh->print( <<"PRE_END");
5560 eval { $html_fh->close() };
5565 $self->add_toc_item( 'EOF', 'EOF' );
5567 my $rpre_string_stack = $self->{_rpre_string_stack};
5569 # Patch to darken the <pre> background color in case of pod2html and
5570 # interleaved code/documentation. Otherwise, the distinction
5571 # between code and documentation is blurred.
5572 if ( $rOpts->{pod2html}
5573 && $self->{_pod_cut_count} >= 1
5574 && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5576 $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5579 # put the css or its link into a string, if used
5581 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5583 # use css linked to another file
5584 if ( $rOpts->{'html-linked-style-sheet'} ) {
5586 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5590 # use css embedded in this file
5591 elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5592 $fh_css->print( <<'ENDCSS');
5593 <style type="text/css">
5596 write_style_sheet_data($fh_css);
5597 $fh_css->print( <<"ENDCSS");
5603 # -----------------------------------------------------------
5604 # path 2: use pod2html if requested
5605 # If we fail for some reason, continue on to path 3
5606 # -----------------------------------------------------------
5607 if ( $rOpts->{'pod2html'} ) {
5608 my $rpod_string = $self->{_rpod_string};
5609 $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5610 $rpre_string_stack )
5614 # --------------------------------------------------
5615 # path 3: write code in html, with pod only in italics
5616 # --------------------------------------------------
5617 my $input_file = $self->{_input_file};
5618 my $title = escape_html($input_file);
5619 my $date = localtime;
5620 $html_fh->print( <<"HTML_START");
5621 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5622 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5623 <!-- Generated by perltidy on $date -->
5624 <html xmlns="http://www.w3.org/1999/xhtml">
5626 <title>$title</title>
5629 # output the css, if used
5631 $html_fh->print($css_string);
5632 $html_fh->print( <<"ENDCSS");
5639 $html_fh->print( <<"HTML_START");
5641 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5645 $html_fh->print("<a name=\"-top-\"></a>\n");
5646 $html_fh->print( <<"EOM");
5650 # copy the table of contents
5652 && !$rOpts->{'frames'}
5653 && $rOpts->{'html-table-of-contents'} )
5655 $html_fh->print($$rtoc_string);
5658 # copy the pre section(s)
5659 my $fname_comment = $input_file;
5660 $fname_comment =~ s/--+/-/g; # protect HTML comment tags
5661 $html_fh->print( <<"END_PRE");
5663 <!-- contents of filename: $fname_comment -->
5667 foreach my $rpre_string (@$rpre_string_stack) {
5668 $html_fh->print($$rpre_string);
5671 # and finish the html page
5672 $html_fh->print( <<"HTML_END");
5677 eval { $html_fh->close() }; # could be object without close method
5679 if ( $rOpts->{'frames'} ) {
5680 my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5681 $self->make_frame( \@toc );
5687 my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5688 my ( @colored_tokens, $j, $string, $type, $token, $level );
5689 my $rlast_level = $self->{_rlast_level};
5690 my $rpackage_stack = $self->{_rpackage_stack};
5692 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5693 $type = $$rtoken_type[$j];
5694 $token = $$rtokens[$j];
5695 $level = $$rlevels[$j];
5696 $level = 0 if ( $level < 0 );
5698 #-------------------------------------------------------
5699 # Update the package stack. The package stack is needed to keep
5700 # the toc correct because some packages may be declared within
5701 # blocks and go out of scope when we leave the block.
5702 #-------------------------------------------------------
5703 if ( $level > $$rlast_level ) {
5704 unless ( $rpackage_stack->[ $level - 1 ] ) {
5705 $rpackage_stack->[ $level - 1 ] = 'main';
5707 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5709 elsif ( $level < $$rlast_level ) {
5710 my $package = $rpackage_stack->[$level];
5711 unless ($package) { $package = 'main' }
5713 # if we change packages due to a nesting change, we
5714 # have to make an entry in the toc
5715 if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5716 $self->add_toc_item( $package, 'package' );
5719 $$rlast_level = $level;
5721 #-------------------------------------------------------
5722 # Intercept a sub name here; split it
5723 # into keyword 'sub' and sub name; and add an
5725 #-------------------------------------------------------
5726 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5727 $token = $self->markup_html_element( $1, 'k' );
5728 push @colored_tokens, $token;
5732 # but don't include sub declarations in the toc;
5733 # these wlll have leading token types 'i;'
5734 my $signature = join "", @$rtoken_type;
5735 unless ( $signature =~ /^i;/ ) {
5736 my $subname = $token;
5737 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5738 $self->add_toc_item( $subname, 'sub' );
5742 #-------------------------------------------------------
5743 # Intercept a package name here; split it
5744 # into keyword 'package' and name; add to the toc,
5745 # and update the package stack
5746 #-------------------------------------------------------
5747 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5748 $token = $self->markup_html_element( $1, 'k' );
5749 push @colored_tokens, $token;
5752 $self->add_toc_item( "$token", 'package' );
5753 $rpackage_stack->[$level] = $token;
5756 $token = $self->markup_html_element( $token, $type );
5757 push @colored_tokens, $token;
5759 return ( \@colored_tokens );
5762 sub markup_html_element {
5764 my ( $token, $type ) = @_;
5766 return $token if ( $type eq 'b' ); # skip a blank token
5767 return $token if ( $token =~ /^\s*$/ ); # skip a blank line
5768 $token = escape_html($token);
5770 # get the short abbreviation for this token type
5771 my $short_name = $token_short_names{$type};
5772 if ( !defined($short_name) ) {
5773 $short_name = "pu"; # punctuation is default
5776 # handle style sheets..
5777 if ( !$rOpts->{'nohtml-style-sheets'} ) {
5778 if ( $short_name ne 'pu' ) {
5779 $token = qq(<span class="$short_name">) . $token . "</span>";
5783 # handle no style sheets..
5785 my $color = $html_color{$short_name};
5787 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5788 $token = qq(<font color="$color">) . $token . "</font>";
5790 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5791 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
5799 if ($missing_html_entities) {
5800 $token =~ s/\&/&/g;
5801 $token =~ s/\</</g;
5802 $token =~ s/\>/>/g;
5803 $token =~ s/\"/"/g;
5806 HTML::Entities::encode_entities($token);
5811 sub finish_formatting {
5813 # called after last line
5815 $self->close_html_file();
5822 return unless $self->{_html_file_opened};
5823 my $html_pre_fh = $self->{_html_pre_fh};
5824 my ($line_of_tokens) = @_;
5825 my $line_type = $line_of_tokens->{_line_type};
5826 my $input_line = $line_of_tokens->{_line_text};
5827 my $line_number = $line_of_tokens->{_line_number};
5830 # markup line of code..
5832 if ( $line_type eq 'CODE' ) {
5833 my $rtoken_type = $line_of_tokens->{_rtoken_type};
5834 my $rtokens = $line_of_tokens->{_rtokens};
5835 my $rlevels = $line_of_tokens->{_rlevels};
5837 if ( $input_line =~ /(^\s*)/ ) {
5843 my ($rcolored_tokens) =
5844 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5845 $html_line .= join '', @$rcolored_tokens;
5848 # markup line of non-code..
5851 if ( $line_type eq 'HERE' ) { $line_character = 'H' }
5852 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
5853 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
5854 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5855 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
5856 elsif ( $line_type eq 'END_START' ) {
5857 $line_character = 'k';
5858 $self->add_toc_item( '__END__', '__END__' );
5860 elsif ( $line_type eq 'DATA_START' ) {
5861 $line_character = 'k';
5862 $self->add_toc_item( '__DATA__', '__DATA__' );
5864 elsif ( $line_type =~ /^POD/ ) {
5865 $line_character = 'P';
5866 if ( $rOpts->{'pod2html'} ) {
5867 my $html_pod_fh = $self->{_html_pod_fh};
5868 if ( $line_type eq 'POD_START' ) {
5870 my $rpre_string_stack = $self->{_rpre_string_stack};
5871 my $rpre_string = $rpre_string_stack->[-1];
5873 # if we have written any non-blank lines to the
5874 # current pre section, start writing to a new output
5876 if ( $$rpre_string =~ /\S/ ) {
5879 Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5880 $self->{_html_pre_fh} = $html_pre_fh;
5881 push @$rpre_string_stack, \$pre_string;
5883 # leave a marker in the pod stream so we know
5884 # where to put the pre section we just
5886 my $for_html = '=for html'; # don't confuse pod utils
5887 $html_pod_fh->print(<<EOM);
5890 <!-- pERLTIDY sECTION -->
5895 # otherwise, just clear the current string and start
5899 $html_pod_fh->print("\n");
5902 $html_pod_fh->print( $input_line . "\n" );
5903 if ( $line_type eq 'POD_END' ) {
5904 $self->{_pod_cut_count}++;
5905 $html_pod_fh->print("\n");
5910 else { $line_character = 'Q' }
5911 $html_line = $self->markup_html_element( $input_line, $line_character );
5914 # add the line number if requested
5915 if ( $rOpts->{'html-line-numbers'} ) {
5917 ( $line_number < 10 ) ? " "
5918 : ( $line_number < 100 ) ? " "
5919 : ( $line_number < 1000 ) ? " "
5921 $html_line = $extra_space . $line_number . " " . $html_line;
5925 $html_pre_fh->print("$html_line\n");
5928 #####################################################################
5930 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5931 # line breaks to the token stream
5933 # WARNING: This is not a real class for speed reasons. Only one
5934 # Formatter may be used.
5936 #####################################################################
5938 package Perl::Tidy::Formatter;
5942 # Caution: these debug flags produce a lot of output
5943 # They should all be 0 except when debugging small scripts
5944 use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
5945 use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
5946 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
5947 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
5948 use constant FORMATTER_DEBUG_FLAG_CI => 0;
5949 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
5950 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
5951 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
5952 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5953 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
5954 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
5955 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
5956 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
5957 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
5959 my $debug_warning = sub {
5960 print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
5963 FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
5964 FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
5965 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
5966 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
5967 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
5968 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
5969 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
5970 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
5971 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5972 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
5973 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
5974 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
5975 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
5976 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
5983 $max_gnu_stack_index
5984 $gnu_position_predictor
5985 $line_start_index_to_go
5986 $last_indentation_written
5987 $last_unadjusted_indentation
5989 $last_output_short_opening_token
5991 $saw_VERSION_in_this_file
5996 $gnu_sequence_number
5997 $last_output_indentation
6003 @type_sequence_to_go
6004 @container_environment_to_go
6005 @bond_strength_to_go
6006 @forced_breakpoint_to_go
6007 @token_lengths_to_go
6008 @summed_lengths_to_go
6010 @leading_spaces_to_go
6011 @reduced_spaces_to_go
6012 @matching_token_to_go
6014 @nesting_blocks_to_go
6016 @nesting_depth_to_go
6018 @old_breakpoint_to_go
6024 %saved_opening_indentation
6027 $comma_count_in_batch
6028 $old_line_count_in_batch
6029 $last_nonblank_index_to_go
6030 $last_nonblank_type_to_go
6031 $last_nonblank_token_to_go
6032 $last_last_nonblank_index_to_go
6033 $last_last_nonblank_type_to_go
6034 $last_last_nonblank_token_to_go
6035 @nonblank_lines_at_depth
6038 @whitespace_level_stack
6039 $whitespace_last_level
6041 $in_format_skipping_section
6042 $format_skipping_pattern_begin
6043 $format_skipping_pattern_end
6045 $forced_breakpoint_count
6046 $forced_breakpoint_undo_count
6047 @forced_breakpoint_undo_stack
6048 %postponed_breakpoint
6052 $first_embedded_tab_at
6053 $last_embedded_tab_at
6054 $deleted_semicolon_count
6055 $first_deleted_semicolon_at
6056 $last_deleted_semicolon_at
6057 $added_semicolon_count
6058 $first_added_semicolon_at
6059 $last_added_semicolon_at
6060 $first_tabbing_disagreement
6061 $last_tabbing_disagreement
6062 $in_tabbing_disagreement
6063 $tabbing_disagreement_count
6067 $last_line_leading_type
6068 $last_line_leading_level
6069 $last_last_line_leading_level
6072 %block_opening_line_number
6073 $csc_new_statement_ok
6076 $accumulating_text_for_block
6078 $rleading_block_if_elsif_text
6079 $leading_block_text_level
6080 $leading_block_text_length_exceeded
6081 $leading_block_text_line_length
6082 $leading_block_text_line_number
6083 $closing_side_comment_prefix_pattern
6084 $closing_side_comment_list_pattern
6086 $last_nonblank_token
6088 $last_last_nonblank_token
6089 $last_last_nonblank_type
6090 $last_nonblank_block_type
6093 %is_if_brace_follower
6094 %space_after_keyword
6097 %is_last_next_redo_return
6098 %is_other_brace_follower
6099 %is_else_brace_follower
6100 %is_anon_sub_brace_follower
6101 %is_anon_sub_1_brace_follower
6103 %is_sort_map_grep_eval
6104 %is_sort_map_grep_eval_do
6105 %is_block_without_semicolon
6110 %is_if_unless_and_or_last_next_redo_return
6116 $is_static_block_comment
6117 $index_start_one_line_block
6118 $semicolons_before_block_self_destruct
6119 $index_max_forced_break
6122 $vertical_aligner_object
6127 $last_line_had_side_comment
6130 $static_block_comment_pattern
6131 $static_side_comment_pattern
6132 %opening_vertical_tightness
6133 %closing_vertical_tightness
6134 %closing_token_indentation
6135 $some_closing_token_indentation
6137 %opening_token_right
6138 %stack_opening_token
6139 %stack_closing_token
6141 $block_brace_vertical_tightness_pattern
6144 $rOpts_add_whitespace
6145 $rOpts_block_brace_tightness
6146 $rOpts_block_brace_vertical_tightness
6147 $rOpts_brace_left_and_indent
6148 $rOpts_comma_arrow_breakpoints
6149 $rOpts_break_at_old_keyword_breakpoints
6150 $rOpts_break_at_old_comma_breakpoints
6151 $rOpts_break_at_old_logical_breakpoints
6152 $rOpts_break_at_old_ternary_breakpoints
6153 $rOpts_break_at_old_attribute_breakpoints
6154 $rOpts_closing_side_comment_else_flag
6155 $rOpts_closing_side_comment_maximum_text
6156 $rOpts_continuation_indentation
6158 $rOpts_delete_old_whitespace
6159 $rOpts_fuzzy_line_length
6160 $rOpts_indent_columns
6161 $rOpts_line_up_parentheses
6162 $rOpts_maximum_fields_per_table
6163 $rOpts_maximum_line_length
6164 $rOpts_variable_maximum_line_length
6165 $rOpts_short_concatenation_item_length
6166 $rOpts_keep_old_blank_lines
6167 $rOpts_ignore_old_breakpoints
6168 $rOpts_format_skipping
6169 $rOpts_space_function_paren
6170 $rOpts_space_keyword_paren
6171 $rOpts_keep_interior_semicolons
6172 $rOpts_ignore_side_comment_lengths
6173 $rOpts_stack_closing_block_brace
6174 $rOpts_whitespace_cycle
6175 $rOpts_tight_secret_operators
6179 %is_keyword_returning_list
6183 %right_bond_strength
6200 # default list of block types for which -bli would apply
6201 $bli_list_string = 'if else elsif unless while for foreach do : sub';
6204 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
6205 <= >= == =~ !~ != ++ -- /= x=
6207 @is_digraph{@_} = (1) x scalar(@_);
6209 @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
6210 @is_trigraph{@_} = (1) x scalar(@_);
6213 = **= += *= &= <<= &&=
6214 -= /= |= >>= ||= //=
6218 @is_assignment{@_} = (1) x scalar(@_);
6228 @is_keyword_returning_list{@_} = (1) x scalar(@_);
6230 @_ = qw(is if unless and or err last next redo return);
6231 @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
6233 @_ = qw(last next redo return);
6234 @is_last_next_redo_return{@_} = (1) x scalar(@_);
6236 @_ = qw(sort map grep);
6237 @is_sort_map_grep{@_} = (1) x scalar(@_);
6239 @_ = qw(sort map grep eval);
6240 @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
6242 @_ = qw(sort map grep eval do);
6243 @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
6246 @is_if_unless{@_} = (1) x scalar(@_);
6248 @_ = qw(and or err);
6249 @is_and_or{@_} = (1) x scalar(@_);
6251 # Identify certain operators which often occur in chains.
6252 # Note: the minus (-) causes a side effect of padding of the first line in
6253 # something like this (by sub set_logical_padding):
6254 # Checkbutton => 'Transmission checked',
6255 # -variable => \$TRANS
6256 # This usually improves appearance so it seems ok.
6257 @_ = qw(&& || and or : ? . + - * /);
6258 @is_chain_operator{@_} = (1) x scalar(@_);
6260 # We can remove semicolons after blocks preceded by these keywords
6262 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6263 unless while until for foreach given when default);
6264 @is_block_without_semicolon{@_} = (1) x scalar(@_);
6266 # 'L' is token for opening { at hash key
6268 @is_opening_type{@_} = (1) x scalar(@_);
6270 # 'R' is token for closing } at hash key
6272 @is_closing_type{@_} = (1) x scalar(@_);
6275 @is_opening_token{@_} = (1) x scalar(@_);
6278 @is_closing_token{@_} = (1) x scalar(@_);
6282 use constant WS_YES => 1;
6283 use constant WS_OPTIONAL => 0;
6284 use constant WS_NO => -1;
6286 # Token bond strengths.
6287 use constant NO_BREAK => 10000;
6288 use constant VERY_STRONG => 100;
6289 use constant STRONG => 2.1;
6290 use constant NOMINAL => 1.1;
6291 use constant WEAK => 0.8;
6292 use constant VERY_WEAK => 0.55;
6294 # values for testing indexes in output array
6295 use constant UNDEFINED_INDEX => -1;
6297 # Maximum number of little messages; probably need not be changed.
6298 use constant MAX_NAG_MESSAGES => 6;
6300 # increment between sequence numbers for each type
6301 # For example, ?: pairs might have numbers 7,11,15,...
6302 use constant TYPE_SEQUENCE_INCREMENT => 4;
6306 # methods to count instances
6308 sub get_count { $_count; }
6309 sub _increment_count { ++$_count }
6310 sub _decrement_count { --$_count }
6315 # trim leading and trailing whitespace from a string
6324 $max = ( $max < $_ ) ? $_ : $max;
6332 $min = ( $min > $_ ) ? $_ : $min;
6339 # given a string containing words separated by whitespace,
6340 # return the list of words
6345 return split( /\s+/, $str );
6348 # interface to Perl::Tidy::Logger routines
6350 if ($logger_object) {
6351 $logger_object->warning(@_);
6356 if ($logger_object) {
6357 $logger_object->complain(@_);
6361 sub write_logfile_entry {
6362 if ($logger_object) {
6363 $logger_object->write_logfile_entry(@_);
6368 if ($logger_object) {
6369 $logger_object->black_box(@_);
6373 sub report_definite_bug {
6374 if ($logger_object) {
6375 $logger_object->report_definite_bug();
6379 sub get_saw_brace_error {
6380 if ($logger_object) {
6381 $logger_object->get_saw_brace_error();
6385 sub we_are_at_the_last_line {
6386 if ($logger_object) {
6387 $logger_object->we_are_at_the_last_line();
6391 # interface to Perl::Tidy::Diagnostics routine
6392 sub write_diagnostics {
6394 if ($diagnostics_object) {
6395 $diagnostics_object->write_diagnostics(@_);
6399 sub get_added_semicolon_count {
6401 return $added_semicolon_count;
6405 $_[0]->_decrement_count();
6412 # we are given an object with a write_line() method to take lines
6414 sink_object => undef,
6415 diagnostics_object => undef,
6416 logger_object => undef,
6418 my %args = ( %defaults, @_ );
6420 $logger_object = $args{logger_object};
6421 $diagnostics_object = $args{diagnostics_object};
6423 # we create another object with a get_line() and peek_ahead() method
6424 my $sink_object = $args{sink_object};
6425 $file_writer_object =
6426 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6428 # initialize the leading whitespace stack to negative levels
6429 # so that we can never run off the end of the stack
6430 $gnu_position_predictor = 0; # where the current token is predicted to be
6431 $max_gnu_stack_index = 0;
6432 $max_gnu_item_index = -1;
6433 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6434 @gnu_item_list = ();
6435 $last_output_indentation = 0;
6436 $last_indentation_written = 0;
6437 $last_unadjusted_indentation = 0;
6438 $last_leading_token = "";
6439 $last_output_short_opening_token = 0;
6441 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6442 $saw_END_or_DATA_ = 0;
6444 @block_type_to_go = ();
6445 @type_sequence_to_go = ();
6446 @container_environment_to_go = ();
6447 @bond_strength_to_go = ();
6448 @forced_breakpoint_to_go = ();
6449 @summed_lengths_to_go = (); # line length to start of ith token
6450 @token_lengths_to_go = ();
6452 @matching_token_to_go = ();
6453 @mate_index_to_go = ();
6454 @nesting_blocks_to_go = ();
6455 @ci_levels_to_go = ();
6456 @nesting_depth_to_go = (0);
6457 @nobreak_to_go = ();
6458 @old_breakpoint_to_go = ();
6461 @leading_spaces_to_go = ();
6462 @reduced_spaces_to_go = ();
6466 @whitespace_level_stack = ();
6467 $whitespace_last_level = -1;
6470 @has_broken_sublist = ();
6471 @want_comma_break = ();
6474 $first_tabbing_disagreement = 0;
6475 $last_tabbing_disagreement = 0;
6476 $tabbing_disagreement_count = 0;
6477 $in_tabbing_disagreement = 0;
6478 $input_line_tabbing = undef;
6480 $last_line_type = "";
6481 $last_last_line_leading_level = 0;
6482 $last_line_leading_level = 0;
6483 $last_line_leading_type = '#';
6485 $last_nonblank_token = ';';
6486 $last_nonblank_type = ';';
6487 $last_last_nonblank_token = ';';
6488 $last_last_nonblank_type = ';';
6489 $last_nonblank_block_type = "";
6490 $last_output_level = 0;
6491 $looking_for_else = 0;
6492 $embedded_tab_count = 0;
6493 $first_embedded_tab_at = 0;
6494 $last_embedded_tab_at = 0;
6495 $deleted_semicolon_count = 0;
6496 $first_deleted_semicolon_at = 0;
6497 $last_deleted_semicolon_at = 0;
6498 $added_semicolon_count = 0;
6499 $first_added_semicolon_at = 0;
6500 $last_added_semicolon_at = 0;
6501 $last_line_had_side_comment = 0;
6502 $is_static_block_comment = 0;
6503 %postponed_breakpoint = ();
6505 # variables for adding side comments
6506 %block_leading_text = ();
6507 %block_opening_line_number = ();
6508 $csc_new_statement_ok = 1;
6509 %csc_block_label = ();
6511 %saved_opening_indentation = ();
6512 $in_format_skipping_section = 0;
6514 reset_block_text_accumulator();
6516 prepare_for_new_input_lines();
6518 $vertical_aligner_object =
6519 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6520 $logger_object, $diagnostics_object );
6522 if ( $rOpts->{'entab-leading-whitespace'} ) {
6523 write_logfile_entry(
6524 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6527 elsif ( $rOpts->{'tabs'} ) {
6528 write_logfile_entry("Indentation will be with a tab character\n");
6531 write_logfile_entry(
6532 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6535 # This was the start of a formatter referent, but object-oriented
6536 # coding has turned out to be too slow here.
6537 $formatter_self = {};
6539 bless $formatter_self, $class;
6541 # Safety check..this is not a class yet
6542 if ( _increment_count() > 1 ) {
6544 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
6546 return $formatter_self;
6549 sub prepare_for_new_input_lines {
6551 $gnu_sequence_number++; # increment output batch counter
6552 %last_gnu_equals = ();
6553 %gnu_comma_count = ();
6554 %gnu_arrow_count = ();
6555 $line_start_index_to_go = 0;
6556 $max_gnu_item_index = UNDEFINED_INDEX;
6557 $index_max_forced_break = UNDEFINED_INDEX;
6558 $max_index_to_go = UNDEFINED_INDEX;
6559 $last_nonblank_index_to_go = UNDEFINED_INDEX;
6560 $last_nonblank_type_to_go = '';
6561 $last_nonblank_token_to_go = '';
6562 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6563 $last_last_nonblank_type_to_go = '';
6564 $last_last_nonblank_token_to_go = '';
6565 $forced_breakpoint_count = 0;
6566 $forced_breakpoint_undo_count = 0;
6567 $rbrace_follower = undef;
6568 $summed_lengths_to_go[0] = 0;
6569 $old_line_count_in_batch = 1;
6570 $comma_count_in_batch = 0;
6571 $starting_in_quote = 0;
6573 destroy_one_line_block();
6579 my ($line_of_tokens) = @_;
6581 my $line_type = $line_of_tokens->{_line_type};
6582 my $input_line = $line_of_tokens->{_line_text};
6584 if ( $rOpts->{notidy} ) {
6585 write_unindented_line($input_line);
6586 $last_line_type = $line_type;
6590 # _line_type codes are:
6591 # SYSTEM - system-specific code before hash-bang line
6592 # CODE - line of perl code (including comments)
6593 # POD_START - line starting pod, such as '=head'
6594 # POD - pod documentation text
6595 # POD_END - last line of pod section, '=cut'
6596 # HERE - text of here-document
6597 # HERE_END - last line of here-doc (target word)
6598 # FORMAT - format section
6599 # FORMAT_END - last line of format section, '.'
6600 # DATA_START - __DATA__ line
6601 # DATA - unidentified text following __DATA__
6602 # END_START - __END__ line
6603 # END - unidentified text following __END__
6604 # ERROR - we are in big trouble, probably not a perl script
6606 # put a blank line after an =cut which comes before __END__ and __DATA__
6607 # (required by podchecker)
6608 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6609 $file_writer_object->reset_consecutive_blank_lines();
6610 if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6613 # handle line of code..
6614 if ( $line_type eq 'CODE' ) {
6616 # let logger see all non-blank lines of code
6617 if ( $input_line !~ /^\s*$/ ) {
6618 my $output_line_number =
6619 $vertical_aligner_object->get_output_line_number();
6620 black_box( $line_of_tokens, $output_line_number );
6622 print_line_of_tokens($line_of_tokens);
6625 # handle line of non-code..
6631 if ( $line_type =~ /^POD/ ) {
6633 # Pod docs should have a preceding blank line. But stay
6634 # out of __END__ and __DATA__ sections, because
6635 # the user may be using this section for any purpose whatsoever
6636 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6637 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
6638 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
6640 && $line_type eq 'POD_START'
6641 && !$saw_END_or_DATA_ )
6647 # leave the blank counters in a predictable state
6648 # after __END__ or __DATA__
6649 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6650 $file_writer_object->reset_consecutive_blank_lines();
6651 $saw_END_or_DATA_ = 1;
6654 # write unindented non-code line
6655 if ( !$skip_line ) {
6656 if ($tee_line) { $file_writer_object->tee_on() }
6657 write_unindented_line($input_line);
6658 if ($tee_line) { $file_writer_object->tee_off() }
6661 $last_line_type = $line_type;
6664 sub create_one_line_block {
6665 $index_start_one_line_block = $_[0];
6666 $semicolons_before_block_self_destruct = $_[1];
6669 sub destroy_one_line_block {
6670 $index_start_one_line_block = UNDEFINED_INDEX;
6671 $semicolons_before_block_self_destruct = 0;
6674 sub leading_spaces_to_go {
6676 # return the number of indentation spaces for a token in the output stream;
6677 # these were previously stored by 'set_leading_whitespace'.
6680 if ( $ii < 0 ) { $ii = 0 }
6681 return get_SPACES( $leading_spaces_to_go[$ii] );
6687 # return the number of leading spaces associated with an indentation
6688 # variable $indentation is either a constant number of spaces or an object
6689 # with a get_SPACES method.
6690 my $indentation = shift;
6691 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6694 sub get_RECOVERABLE_SPACES {
6696 # return the number of spaces (+ means shift right, - means shift left)
6697 # that we would like to shift a group of lines with the same indentation
6698 # to get them to line up with their opening parens
6699 my $indentation = shift;
6700 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6703 sub get_AVAILABLE_SPACES_to_go {
6705 my $item = $leading_spaces_to_go[ $_[0] ];
6707 # return the number of available leading spaces associated with an
6708 # indentation variable. $indentation is either a constant number of
6709 # spaces or an object with a get_AVAILABLE_SPACES method.
6710 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6713 sub new_lp_indentation_item {
6715 # this is an interface to the IndentationItem class
6716 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6718 # A negative level implies not to store the item in the item_list
6720 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6722 my $item = Perl::Tidy::IndentationItem->new(
6724 $ci_level, $available_spaces,
6725 $index, $gnu_sequence_number,
6726 $align_paren, $max_gnu_stack_index,
6727 $line_start_index_to_go,
6730 if ( $level >= 0 ) {
6731 $gnu_item_list[$max_gnu_item_index] = $item;
6737 sub set_leading_whitespace {
6739 # This routine defines leading whitespace
6740 # given: the level and continuation_level of a token,
6741 # define: space count of leading string which would apply if it
6742 # were the first token of a new line.
6744 my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
6746 # Adjust levels if necessary to recycle whitespace:
6747 # given $level_abs, the absolute level
6748 # define $level, a possibly reduced level for whitespace
6749 my $level = $level_abs;
6750 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
6751 if ( $level_abs < $whitespace_last_level ) {
6752 pop(@whitespace_level_stack);
6754 if ( !@whitespace_level_stack ) {
6755 push @whitespace_level_stack, $level_abs;
6757 elsif ( $level_abs > $whitespace_last_level ) {
6758 $level = $whitespace_level_stack[-1] +
6759 ( $level_abs - $whitespace_last_level );
6762 # 1 Try to break at a block brace
6764 $level > $rOpts_whitespace_cycle
6765 && $last_nonblank_type eq '{'
6766 && $last_nonblank_token eq '{'
6769 # 2 Then either a brace or bracket
6770 || ( $level > $rOpts_whitespace_cycle + 1
6771 && $last_nonblank_token =~ /^[\{\[]$/ )
6773 # 3 Then a paren too
6774 || $level > $rOpts_whitespace_cycle + 2
6779 push @whitespace_level_stack, $level;
6781 $level = $whitespace_level_stack[-1];
6783 $whitespace_last_level = $level_abs;
6785 # modify for -bli, which adds one continuation indentation for
6787 if ( $rOpts_brace_left_and_indent
6788 && $max_index_to_go == 0
6789 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6794 # patch to avoid trouble when input file has negative indentation.
6795 # other logic should catch this error.
6796 if ( $level < 0 ) { $level = 0 }
6798 #-------------------------------------------
6799 # handle the standard indentation scheme
6800 #-------------------------------------------
6801 unless ($rOpts_line_up_parentheses) {
6803 $ci_level * $rOpts_continuation_indentation +
6804 $level * $rOpts_indent_columns;
6806 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6808 if ($in_continued_quote) {
6812 $leading_spaces_to_go[$max_index_to_go] = $space_count;
6813 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6817 #-------------------------------------------------------------
6818 # handle case of -lp indentation..
6819 #-------------------------------------------------------------
6821 # The continued_quote flag means that this is the first token of a
6822 # line, and it is the continuation of some kind of multi-line quote
6823 # or pattern. It requires special treatment because it must have no
6824 # added leading whitespace. So we create a special indentation item
6825 # which is not in the stack.
6826 if ($in_continued_quote) {
6827 my $space_count = 0;
6828 my $available_space = 0;
6829 $level = -1; # flag to prevent storing in item_list
6830 $leading_spaces_to_go[$max_index_to_go] =
6831 $reduced_spaces_to_go[$max_index_to_go] =
6832 new_lp_indentation_item( $space_count, $level, $ci_level,
6833 $available_space, 0 );
6837 # get the top state from the stack
6838 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6839 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6840 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6842 my $type = $types_to_go[$max_index_to_go];
6843 my $token = $tokens_to_go[$max_index_to_go];
6844 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6846 if ( $type eq '{' || $type eq '(' ) {
6848 $gnu_comma_count{ $total_depth + 1 } = 0;
6849 $gnu_arrow_count{ $total_depth + 1 } = 0;
6851 # If we come to an opening token after an '=' token of some type,
6852 # see if it would be helpful to 'break' after the '=' to save space
6853 my $last_equals = $last_gnu_equals{$total_depth};
6854 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6856 # find the position if we break at the '='
6857 my $i_test = $last_equals;
6858 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6861 ##my $too_close = ($i_test==$max_index_to_go-1);
6863 my $test_position = total_line_length( $i_test, $max_index_to_go );
6864 my $mll = maximum_line_length($i_test);
6868 # the equals is not just before an open paren (testing)
6871 # if we are beyond the midpoint
6872 $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
6874 # or we are beyond the 1/4 point and there was an old
6875 # break at the equals
6877 $gnu_position_predictor >
6878 $mll - $rOpts_maximum_line_length * 3 / 4
6880 $old_breakpoint_to_go[$last_equals]
6881 || ( $last_equals > 0
6882 && $old_breakpoint_to_go[ $last_equals - 1 ] )
6883 || ( $last_equals > 1
6884 && $types_to_go[ $last_equals - 1 ] eq 'b'
6885 && $old_breakpoint_to_go[ $last_equals - 2 ] )
6891 # then make the switch -- note that we do not set a real
6892 # breakpoint here because we may not really need one; sub
6893 # scan_list will do that if necessary
6894 $line_start_index_to_go = $i_test + 1;
6895 $gnu_position_predictor = $test_position;
6901 maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
6903 # Check for decreasing depth ..
6904 # Note that one token may have both decreasing and then increasing
6905 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
6906 # in this example we would first go back to (1,0) then up to (2,0)
6908 if ( $level < $current_level || $ci_level < $current_ci_level ) {
6910 # loop to find the first entry at or completely below this level
6911 my ( $lev, $ci_lev );
6913 if ($max_gnu_stack_index) {
6915 # save index of token which closes this level
6916 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6918 # Undo any extra indentation if we saw no commas
6919 my $available_spaces =
6920 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6922 my $comma_count = 0;
6923 my $arrow_count = 0;
6924 if ( $type eq '}' || $type eq ')' ) {
6925 $comma_count = $gnu_comma_count{$total_depth};
6926 $arrow_count = $gnu_arrow_count{$total_depth};
6927 $comma_count = 0 unless $comma_count;
6928 $arrow_count = 0 unless $arrow_count;
6930 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6931 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6933 if ( $available_spaces > 0 ) {
6935 if ( $comma_count <= 0 || $arrow_count > 0 ) {
6937 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6939 $gnu_stack[$max_gnu_stack_index]
6940 ->get_SEQUENCE_NUMBER();
6942 # Be sure this item was created in this batch. This
6943 # should be true because we delete any available
6944 # space from open items at the end of each batch.
6945 if ( $gnu_sequence_number != $seqno
6946 || $i > $max_gnu_item_index )
6949 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6951 report_definite_bug();
6955 if ( $arrow_count == 0 ) {
6957 ->permanently_decrease_AVAILABLE_SPACES(
6962 ->tentatively_decrease_AVAILABLE_SPACES(
6969 $j <= $max_gnu_item_index ;
6974 ->decrease_SPACES($available_spaces);
6981 --$max_gnu_stack_index;
6982 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6983 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6985 # stop when we reach a level at or below the current level
6986 if ( $lev <= $level && $ci_lev <= $ci_level ) {
6988 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6989 $current_level = $lev;
6990 $current_ci_level = $ci_lev;
6995 # reached bottom of stack .. should never happen because
6996 # only negative levels can get here, and $level was forced
6997 # to be positive above.
7000 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
7002 report_definite_bug();
7008 # handle increasing depth
7009 if ( $level > $current_level || $ci_level > $current_ci_level ) {
7011 # Compute the standard incremental whitespace. This will be
7012 # the minimum incremental whitespace that will be used. This
7013 # choice results in a smooth transition between the gnu-style
7014 # and the standard style.
7015 my $standard_increment =
7016 ( $level - $current_level ) * $rOpts_indent_columns +
7017 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
7019 # Now we have to define how much extra incremental space
7020 # ("$available_space") we want. This extra space will be
7021 # reduced as necessary when long lines are encountered or when
7022 # it becomes clear that we do not have a good list.
7023 my $available_space = 0;
7024 my $align_paren = 0;
7027 # initialization on empty stack..
7028 if ( $max_gnu_stack_index == 0 ) {
7029 $space_count = $level * $rOpts_indent_columns;
7032 # if this is a BLOCK, add the standard increment
7033 elsif ($last_nonblank_block_type) {
7034 $space_count += $standard_increment;
7037 # if last nonblank token was not structural indentation,
7038 # just use standard increment
7039 elsif ( $last_nonblank_type ne '{' ) {
7040 $space_count += $standard_increment;
7043 # otherwise use the space to the first non-blank level change token
7046 $space_count = $gnu_position_predictor;
7048 my $min_gnu_indentation =
7049 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
7051 $available_space = $space_count - $min_gnu_indentation;
7052 if ( $available_space >= $standard_increment ) {
7053 $min_gnu_indentation += $standard_increment;
7055 elsif ( $available_space > 1 ) {
7056 $min_gnu_indentation += $available_space + 1;
7058 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
7059 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
7060 $min_gnu_indentation += 2;
7063 $min_gnu_indentation += 1;
7067 $min_gnu_indentation += $standard_increment;
7069 $available_space = $space_count - $min_gnu_indentation;
7071 if ( $available_space < 0 ) {
7072 $space_count = $min_gnu_indentation;
7073 $available_space = 0;
7078 # update state, but not on a blank token
7079 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
7081 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
7083 ++$max_gnu_stack_index;
7084 $gnu_stack[$max_gnu_stack_index] =
7085 new_lp_indentation_item( $space_count, $level, $ci_level,
7086 $available_space, $align_paren );
7088 # If the opening paren is beyond the half-line length, then
7089 # we will use the minimum (standard) indentation. This will
7090 # help avoid problems associated with running out of space
7091 # near the end of a line. As a result, in deeply nested
7092 # lists, there will be some indentations which are limited
7093 # to this minimum standard indentation. But the most deeply
7094 # nested container will still probably be able to shift its
7095 # parameters to the right for proper alignment, so in most
7096 # cases this will not be noticeable.
7097 if ( $available_space > 0 && $space_count > $halfway ) {
7098 $gnu_stack[$max_gnu_stack_index]
7099 ->tentatively_decrease_AVAILABLE_SPACES($available_space);
7104 # Count commas and look for non-list characters. Once we see a
7105 # non-list character, we give up and don't look for any more commas.
7106 if ( $type eq '=>' ) {
7107 $gnu_arrow_count{$total_depth}++;
7109 # tentatively treating '=>' like '=' for estimating breaks
7110 # TODO: this could use some experimentation
7111 $last_gnu_equals{$total_depth} = $max_index_to_go;
7114 elsif ( $type eq ',' ) {
7115 $gnu_comma_count{$total_depth}++;
7118 elsif ( $is_assignment{$type} ) {
7119 $last_gnu_equals{$total_depth} = $max_index_to_go;
7122 # this token might start a new line
7123 # if this is a non-blank..
7124 if ( $type ne 'b' ) {
7129 # this is the first nonblank token of the line
7130 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
7132 # or previous character was one of these:
7133 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
7135 # or previous character was opening and this does not close it
7136 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
7137 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
7139 # or this token is one of these:
7140 || $type =~ /^([\.]|\|\||\&\&)$/
7142 # or this is a closing structure
7143 || ( $last_nonblank_type_to_go eq '}'
7144 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
7146 # or previous token was keyword 'return'
7147 || ( $last_nonblank_type_to_go eq 'k'
7148 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
7150 # or starting a new line at certain keywords is fine
7152 && $is_if_unless_and_or_last_next_redo_return{$token} )
7154 # or this is after an assignment after a closing structure
7156 $is_assignment{$last_nonblank_type_to_go}
7158 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
7160 # and it is significantly to the right
7161 || $gnu_position_predictor > $halfway
7166 check_for_long_gnu_style_lines();
7167 $line_start_index_to_go = $max_index_to_go;
7169 # back up 1 token if we want to break before that type
7170 # otherwise, we may strand tokens like '?' or ':' on a line
7171 if ( $line_start_index_to_go > 0 ) {
7172 if ( $last_nonblank_type_to_go eq 'k' ) {
7174 if ( $want_break_before{$last_nonblank_token_to_go} ) {
7175 $line_start_index_to_go--;
7178 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
7179 $line_start_index_to_go--;
7185 # remember the predicted position of this token on the output line
7186 if ( $max_index_to_go > $line_start_index_to_go ) {
7187 $gnu_position_predictor =
7188 total_line_length( $line_start_index_to_go, $max_index_to_go );
7191 $gnu_position_predictor =
7192 $space_count + $token_lengths_to_go[$max_index_to_go];
7195 # store the indentation object for this token
7196 # this allows us to manipulate the leading whitespace
7197 # (in case we have to reduce indentation to fit a line) without
7198 # having to change any token values
7199 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
7200 $reduced_spaces_to_go[$max_index_to_go] =
7201 ( $max_gnu_stack_index > 0 && $ci_level )
7202 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
7203 : $gnu_stack[$max_gnu_stack_index];
7207 sub check_for_long_gnu_style_lines {
7209 # look at the current estimated maximum line length, and
7210 # remove some whitespace if it exceeds the desired maximum
7212 # this is only for the '-lp' style
7213 return unless ($rOpts_line_up_parentheses);
7215 # nothing can be done if no stack items defined for this line
7216 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7218 # see if we have exceeded the maximum desired line length
7219 # keep 2 extra free because they are needed in some cases
7220 # (result of trial-and-error testing)
7222 $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
7224 return if ( $spaces_needed <= 0 );
7226 # We are over the limit, so try to remove a requested number of
7227 # spaces from leading whitespace. We are only allowed to remove
7228 # from whitespace items created on this batch, since others have
7229 # already been used and cannot be undone.
7230 my @candidates = ();
7233 # loop over all whitespace items created for the current batch
7234 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7235 my $item = $gnu_item_list[$i];
7237 # item must still be open to be a candidate (otherwise it
7238 # cannot influence the current token)
7239 next if ( $item->get_CLOSED() >= 0 );
7241 my $available_spaces = $item->get_AVAILABLE_SPACES();
7243 if ( $available_spaces > 0 ) {
7244 push( @candidates, [ $i, $available_spaces ] );
7248 return unless (@candidates);
7250 # sort by available whitespace so that we can remove whitespace
7251 # from the maximum available first
7252 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
7254 # keep removing whitespace until we are done or have no more
7256 foreach $candidate (@candidates) {
7257 my ( $i, $available_spaces ) = @{$candidate};
7258 my $deleted_spaces =
7259 ( $available_spaces > $spaces_needed )
7261 : $available_spaces;
7263 # remove the incremental space from this item
7264 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
7268 # update the leading whitespace of this item and all items
7269 # that came after it
7270 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
7272 my $old_spaces = $gnu_item_list[$i]->get_SPACES();
7273 if ( $old_spaces >= $deleted_spaces ) {
7274 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
7277 # shouldn't happen except for code bug:
7279 my $level = $gnu_item_list[$i_debug]->get_LEVEL();
7280 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();
7281 my $old_level = $gnu_item_list[$i]->get_LEVEL();
7282 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
7284 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
7286 report_definite_bug();
7289 $gnu_position_predictor -= $deleted_spaces;
7290 $spaces_needed -= $deleted_spaces;
7291 last unless ( $spaces_needed > 0 );
7295 sub finish_lp_batch {
7297 # This routine is called once after each output stream batch is
7298 # finished to undo indentation for all incomplete -lp
7299 # indentation levels. It is too risky to leave a level open,
7300 # because then we can't backtrack in case of a long line to follow.
7301 # This means that comments and blank lines will disrupt this
7302 # indentation style. But the vertical aligner may be able to
7303 # get the space back if there are side comments.
7305 # this is only for the 'lp' style
7306 return unless ($rOpts_line_up_parentheses);
7308 # nothing can be done if no stack items defined for this line
7309 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7311 # loop over all whitespace items created for the current batch
7313 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7314 my $item = $gnu_item_list[$i];
7316 # only look for open items
7317 next if ( $item->get_CLOSED() >= 0 );
7319 # Tentatively remove all of the available space
7320 # (The vertical aligner will try to get it back later)
7321 my $available_spaces = $item->get_AVAILABLE_SPACES();
7322 if ( $available_spaces > 0 ) {
7324 # delete incremental space for this item
7326 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
7328 # Reduce the total indentation space of any nodes that follow
7329 # Note that any such nodes must necessarily be dependents
7331 foreach ( $i + 1 .. $max_gnu_item_index ) {
7332 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
7339 sub reduce_lp_indentation {
7341 # reduce the leading whitespace at token $i if possible by $spaces_needed
7342 # (a large value of $spaces_needed will remove all excess space)
7343 # NOTE: to be called from scan_list only for a sequence of tokens
7344 # contained between opening and closing parens/braces/brackets
7346 my ( $i, $spaces_wanted ) = @_;
7347 my $deleted_spaces = 0;
7349 my $item = $leading_spaces_to_go[$i];
7350 my $available_spaces = $item->get_AVAILABLE_SPACES();
7353 $available_spaces > 0
7354 && ( ( $spaces_wanted <= $available_spaces )
7355 || !$item->get_HAVE_CHILD() )
7359 # we'll remove these spaces, but mark them as recoverable
7361 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
7364 return $deleted_spaces;
7367 sub token_sequence_length {
7369 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
7370 # returns 0 if $ibeg > $iend (shouldn't happen)
7371 my ( $ibeg, $iend ) = @_;
7372 return 0 if ( $iend < 0 || $ibeg > $iend );
7373 return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
7374 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
7377 sub total_line_length {
7379 # return length of a line of tokens ($ibeg .. $iend)
7380 my ( $ibeg, $iend ) = @_;
7381 return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
7384 sub maximum_line_length_for_level {
7386 # return maximum line length for line starting with a given level
7387 my $maximum_line_length = $rOpts_maximum_line_length;
7389 # Modify if -vmll option is selected
7390 if ($rOpts_variable_maximum_line_length) {
7392 if ( $level < 0 ) { $level = 0 }
7393 $maximum_line_length += $level * $rOpts_indent_columns;
7395 return $maximum_line_length;
7398 sub maximum_line_length {
7400 # return maximum line length for line starting with the token at given index
7401 return maximum_line_length_for_level( $levels_to_go[ $_[0] ] );
7405 sub excess_line_length {
7407 # return number of characters by which a line of tokens ($ibeg..$iend)
7408 # exceeds the allowable line length.
7409 my ( $ibeg, $iend ) = @_;
7410 return total_line_length( $ibeg, $iend ) - maximum_line_length($ibeg);
7413 sub finish_formatting {
7415 # flush buffer and write any informative messages
7419 $file_writer_object->decrement_output_line_number()
7420 ; # fix up line number since it was incremented
7421 we_are_at_the_last_line();
7422 if ( $added_semicolon_count > 0 ) {
7423 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
7425 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
7426 write_logfile_entry("$added_semicolon_count $what added:\n");
7427 write_logfile_entry(
7428 " $first at input line $first_added_semicolon_at\n");
7430 if ( $added_semicolon_count > 1 ) {
7431 write_logfile_entry(
7432 " Last at input line $last_added_semicolon_at\n");
7434 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
7435 write_logfile_entry("\n");
7438 if ( $deleted_semicolon_count > 0 ) {
7439 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
7441 ( $deleted_semicolon_count > 1 )
7444 write_logfile_entry(
7445 "$deleted_semicolon_count unnecessary $what deleted:\n");
7446 write_logfile_entry(
7447 " $first at input line $first_deleted_semicolon_at\n");
7449 if ( $deleted_semicolon_count > 1 ) {
7450 write_logfile_entry(
7451 " Last at input line $last_deleted_semicolon_at\n");
7453 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
7454 write_logfile_entry("\n");
7457 if ( $embedded_tab_count > 0 ) {
7458 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
7460 ( $embedded_tab_count > 1 )
7461 ? "quotes or patterns"
7462 : "quote or pattern";
7463 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
7464 write_logfile_entry(
7465 "This means the display of this script could vary with device or software\n"
7467 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
7469 if ( $embedded_tab_count > 1 ) {
7470 write_logfile_entry(
7471 " Last at input line $last_embedded_tab_at\n");
7473 write_logfile_entry("\n");
7476 if ($first_tabbing_disagreement) {
7477 write_logfile_entry(
7478 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
7482 if ($in_tabbing_disagreement) {
7483 write_logfile_entry(
7484 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
7489 if ($last_tabbing_disagreement) {
7491 write_logfile_entry(
7492 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
7496 write_logfile_entry("No indentation disagreement seen\n");
7499 if ($first_tabbing_disagreement) {
7500 write_logfile_entry(
7501 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
7504 write_logfile_entry("\n");
7506 $vertical_aligner_object->report_anything_unusual();
7508 $file_writer_object->report_line_length_errors();
7513 # This routine is called to check the Opts hash after it is defined
7517 make_static_block_comment_pattern();
7518 make_static_side_comment_pattern();
7519 make_closing_side_comment_prefix();
7520 make_closing_side_comment_list_pattern();
7521 $format_skipping_pattern_begin =
7522 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
7523 $format_skipping_pattern_end =
7524 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
7526 # If closing side comments ARE selected, then we can safely
7527 # delete old closing side comments unless closing side comment
7528 # warnings are requested. This is a good idea because it will
7529 # eliminate any old csc's which fall below the line count threshold.
7530 # We cannot do this if warnings are turned on, though, because we
7531 # might delete some text which has been added. So that must
7532 # be handled when comments are created.
7533 if ( $rOpts->{'closing-side-comments'} ) {
7534 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
7535 $rOpts->{'delete-closing-side-comments'} = 1;
7539 # If closing side comments ARE NOT selected, but warnings ARE
7540 # selected and we ARE DELETING csc's, then we will pretend to be
7541 # adding with a huge interval. This will force the comments to be
7542 # generated for comparison with the old comments, but not added.
7543 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
7544 if ( $rOpts->{'delete-closing-side-comments'} ) {
7545 $rOpts->{'delete-closing-side-comments'} = 0;
7546 $rOpts->{'closing-side-comments'} = 1;
7547 $rOpts->{'closing-side-comment-interval'} = 100000000;
7552 make_block_brace_vertical_tightness_pattern();
7554 if ( $rOpts->{'line-up-parentheses'} ) {
7556 if ( $rOpts->{'indent-only'}
7557 || !$rOpts->{'add-newlines'}
7558 || !$rOpts->{'delete-old-newlines'} )
7560 Perl::Tidy::Warn <<EOM;
7561 -----------------------------------------------------------------------
7562 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
7564 The -lp indentation logic requires that perltidy be able to coordinate
7565 arbitrarily large numbers of line breakpoints. This isn't possible
7566 with these flags. Sometimes an acceptable workaround is to use -wocb=3
7567 -----------------------------------------------------------------------
7569 $rOpts->{'line-up-parentheses'} = 0;
7573 # At present, tabs are not compatible with the line-up-parentheses style
7574 # (it would be possible to entab the total leading whitespace
7575 # just prior to writing the line, if desired).
7576 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
7577 Perl::Tidy::Warn <<EOM;
7578 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
7580 $rOpts->{'tabs'} = 0;
7583 # Likewise, tabs are not compatible with outdenting..
7584 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
7585 Perl::Tidy::Warn <<EOM;
7586 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
7588 $rOpts->{'tabs'} = 0;
7591 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
7592 Perl::Tidy::Warn <<EOM;
7593 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
7595 $rOpts->{'tabs'} = 0;
7598 if ( !$rOpts->{'space-for-semicolon'} ) {
7599 $want_left_space{'f'} = -1;
7602 if ( $rOpts->{'space-terminal-semicolon'} ) {
7603 $want_left_space{';'} = 1;
7606 # implement outdenting preferences for keywords
7607 %outdent_keyword = ();
7608 unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
7609 @_ = qw(next last redo goto return); # defaults
7612 # FUTURE: if not a keyword, assume that it is an identifier
7614 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
7615 $outdent_keyword{$_} = 1;
7618 Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword";
7622 # implement user whitespace preferences
7623 if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
7624 @want_left_space{@_} = (1) x scalar(@_);
7627 if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
7628 @want_right_space{@_} = (1) x scalar(@_);
7631 if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
7632 @want_left_space{@_} = (-1) x scalar(@_);
7635 if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
7636 @want_right_space{@_} = (-1) x scalar(@_);
7638 if ( $rOpts->{'dump-want-left-space'} ) {
7639 dump_want_left_space(*STDOUT);
7643 if ( $rOpts->{'dump-want-right-space'} ) {
7644 dump_want_right_space(*STDOUT);
7648 # default keywords for which space is introduced before an opening paren
7649 # (at present, including them messes up vertical alignment)
7650 @_ = qw(my local our and or err eq ne if else elsif until
7651 unless while for foreach return switch case given when);
7652 @space_after_keyword{@_} = (1) x scalar(@_);
7654 # first remove any or all of these if desired
7655 if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7657 # -nsak='*' selects all the above keywords
7658 if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
7659 @space_after_keyword{@_} = (0) x scalar(@_);
7662 # then allow user to add to these defaults
7663 if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
7664 @space_after_keyword{@_} = (1) x scalar(@_);
7667 # implement user break preferences
7668 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7669 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7670 . : ? && || and or err xor
7673 my $break_after = sub {
7674 foreach my $tok (@_) {
7675 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
7676 my $lbs = $left_bond_strength{$tok};
7677 my $rbs = $right_bond_strength{$tok};
7678 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7679 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7685 my $break_before = sub {
7686 foreach my $tok (@_) {
7687 my $lbs = $left_bond_strength{$tok};
7688 my $rbs = $right_bond_strength{$tok};
7689 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7690 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7696 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7697 $break_before->(@all_operators)
7698 if ( $rOpts->{'break-before-all-operators'} );
7700 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7701 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7703 # make note if breaks are before certain key types
7704 %want_break_before = ();
7705 foreach my $tok ( @all_operators, ',' ) {
7706 $want_break_before{$tok} =
7707 $left_bond_strength{$tok} < $right_bond_strength{$tok};
7710 # Coordinate ?/: breaks, which must be similar
7711 if ( !$want_break_before{':'} ) {
7712 $want_break_before{'?'} = $want_break_before{':'};
7713 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7714 $left_bond_strength{'?'} = NO_BREAK;
7717 # Define here tokens which may follow the closing brace of a do statement
7718 # on the same line, as in:
7719 # } while ( $something);
7720 @_ = qw(until while unless if ; : );
7722 @is_do_follower{@_} = (1) x scalar(@_);
7724 # These tokens may follow the closing brace of an if or elsif block.
7725 # In other words, for cuddled else we want code to look like:
7726 # } elsif ( $something) {
7728 if ( $rOpts->{'cuddled-else'} ) {
7729 @_ = qw(else elsif);
7730 @is_if_brace_follower{@_} = (1) x scalar(@_);
7733 %is_if_brace_follower = ();
7736 # nothing can follow the closing curly of an else { } block:
7737 %is_else_brace_follower = ();
7739 # what can follow a multi-line anonymous sub definition closing curly:
7740 @_ = qw# ; : => or and && || ~~ !~~ ) #;
7742 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7744 # what can follow a one-line anonymous sub closing curly:
7745 # one-line anonymous subs also have ']' here...
7746 # see tk3.t and PP.pm
7747 @_ = qw# ; : => or and && || ) ] ~~ !~~ #;
7749 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7751 # What can follow a closing curly of a block
7752 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7753 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7754 @_ = qw# ; : => or and && || ) #;
7757 # allow cuddled continue if cuddled else is specified
7758 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7760 @is_other_brace_follower{@_} = (1) x scalar(@_);
7762 $right_bond_strength{'{'} = WEAK;
7763 $left_bond_strength{'{'} = VERY_STRONG;
7765 # make -l=0 equal to -l=infinite
7766 if ( !$rOpts->{'maximum-line-length'} ) {
7767 $rOpts->{'maximum-line-length'} = 1000000;
7770 # make -lbl=0 equal to -lbl=infinite
7771 if ( !$rOpts->{'long-block-line-count'} ) {
7772 $rOpts->{'long-block-line-count'} = 1000000;
7775 my $ole = $rOpts->{'output-line-ending'};
7784 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7785 my $str = join " ", keys %endings;
7786 Perl::Tidy::Die <<EOM;
7787 Unrecognized line ending '$ole'; expecting one of: $str
7790 if ( $rOpts->{'preserve-line-endings'} ) {
7791 Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
7792 $rOpts->{'preserve-line-endings'} = undef;
7796 # hashes used to simplify setting whitespace
7798 '{' => $rOpts->{'brace-tightness'},
7799 '}' => $rOpts->{'brace-tightness'},
7800 '(' => $rOpts->{'paren-tightness'},
7801 ')' => $rOpts->{'paren-tightness'},
7802 '[' => $rOpts->{'square-bracket-tightness'},
7803 ']' => $rOpts->{'square-bracket-tightness'},
7812 # frequently used parameters
7813 $rOpts_add_newlines = $rOpts->{'add-newlines'};
7814 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
7815 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7816 $rOpts_block_brace_vertical_tightness =
7817 $rOpts->{'block-brace-vertical-tightness'};
7818 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
7819 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7820 $rOpts_break_at_old_ternary_breakpoints =
7821 $rOpts->{'break-at-old-ternary-breakpoints'};
7822 $rOpts_break_at_old_attribute_breakpoints =
7823 $rOpts->{'break-at-old-attribute-breakpoints'};
7824 $rOpts_break_at_old_comma_breakpoints =
7825 $rOpts->{'break-at-old-comma-breakpoints'};
7826 $rOpts_break_at_old_keyword_breakpoints =
7827 $rOpts->{'break-at-old-keyword-breakpoints'};
7828 $rOpts_break_at_old_logical_breakpoints =
7829 $rOpts->{'break-at-old-logical-breakpoints'};
7830 $rOpts_closing_side_comment_else_flag =
7831 $rOpts->{'closing-side-comment-else-flag'};
7832 $rOpts_closing_side_comment_maximum_text =
7833 $rOpts->{'closing-side-comment-maximum-text'};
7834 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7835 $rOpts_cuddled_else = $rOpts->{'cuddled-else'};
7836 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
7837 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
7838 $rOpts_indent_columns = $rOpts->{'indent-columns'};
7839 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
7840 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7841 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
7842 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
7844 $rOpts_variable_maximum_line_length =
7845 $rOpts->{'variable-maximum-line-length'};
7846 $rOpts_short_concatenation_item_length =
7847 $rOpts->{'short-concatenation-item-length'};
7849 $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
7850 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
7851 $rOpts_format_skipping = $rOpts->{'format-skipping'};
7852 $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
7853 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
7854 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7855 $rOpts_ignore_side_comment_lengths =
7856 $rOpts->{'ignore-side-comment-lengths'};
7858 # Note that both opening and closing tokens can access the opening
7859 # and closing flags of their container types.
7860 %opening_vertical_tightness = (
7861 '(' => $rOpts->{'paren-vertical-tightness'},
7862 '{' => $rOpts->{'brace-vertical-tightness'},
7863 '[' => $rOpts->{'square-bracket-vertical-tightness'},
7864 ')' => $rOpts->{'paren-vertical-tightness'},
7865 '}' => $rOpts->{'brace-vertical-tightness'},
7866 ']' => $rOpts->{'square-bracket-vertical-tightness'},
7869 %closing_vertical_tightness = (
7870 '(' => $rOpts->{'paren-vertical-tightness-closing'},
7871 '{' => $rOpts->{'brace-vertical-tightness-closing'},
7872 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7873 ')' => $rOpts->{'paren-vertical-tightness-closing'},
7874 '}' => $rOpts->{'brace-vertical-tightness-closing'},
7875 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7878 $rOpts_tight_secret_operators = $rOpts->{'tight-secret-operators'};
7880 # assume flag for '>' same as ')' for closing qw quotes
7881 %closing_token_indentation = (
7882 ')' => $rOpts->{'closing-paren-indentation'},
7883 '}' => $rOpts->{'closing-brace-indentation'},
7884 ']' => $rOpts->{'closing-square-bracket-indentation'},
7885 '>' => $rOpts->{'closing-paren-indentation'},
7888 # flag indicating if any closing tokens are indented
7889 $some_closing_token_indentation =
7890 $rOpts->{'closing-paren-indentation'}
7891 || $rOpts->{'closing-brace-indentation'}
7892 || $rOpts->{'closing-square-bracket-indentation'}
7893 || $rOpts->{'indent-closing-brace'};
7895 %opening_token_right = (
7896 '(' => $rOpts->{'opening-paren-right'},
7897 '{' => $rOpts->{'opening-hash-brace-right'},
7898 '[' => $rOpts->{'opening-square-bracket-right'},
7901 %stack_opening_token = (
7902 '(' => $rOpts->{'stack-opening-paren'},
7903 '{' => $rOpts->{'stack-opening-hash-brace'},
7904 '[' => $rOpts->{'stack-opening-square-bracket'},
7907 %stack_closing_token = (
7908 ')' => $rOpts->{'stack-closing-paren'},
7909 '}' => $rOpts->{'stack-closing-hash-brace'},
7910 ']' => $rOpts->{'stack-closing-square-bracket'},
7912 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
7915 sub make_static_block_comment_pattern {
7917 # create the pattern used to identify static block comments
7918 $static_block_comment_pattern = '^\s*##';
7920 # allow the user to change it
7921 if ( $rOpts->{'static-block-comment-prefix'} ) {
7922 my $prefix = $rOpts->{'static-block-comment-prefix'};
7923 $prefix =~ s/^\s*//;
7924 my $pattern = $prefix;
7926 # user may give leading caret to force matching left comments only
7927 if ( $prefix !~ /^\^#/ ) {
7928 if ( $prefix !~ /^#/ ) {
7930 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7932 $pattern = '^\s*' . $prefix;
7934 eval "'##'=~/$pattern/";
7937 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7939 $static_block_comment_pattern = $pattern;
7943 sub make_format_skipping_pattern {
7944 my ( $opt_name, $default ) = @_;
7945 my $param = $rOpts->{$opt_name};
7946 unless ($param) { $param = $default }
7948 if ( $param !~ /^#/ ) {
7950 "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7952 my $pattern = '^' . $param . '\s';
7953 eval "'#'=~/$pattern/";
7956 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7961 sub make_closing_side_comment_list_pattern {
7963 # turn any input list into a regex for recognizing selected block types
7964 $closing_side_comment_list_pattern = '^\w+';
7965 if ( defined( $rOpts->{'closing-side-comment-list'} )
7966 && $rOpts->{'closing-side-comment-list'} )
7968 $closing_side_comment_list_pattern =
7969 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7973 sub make_bli_pattern {
7975 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7976 && $rOpts->{'brace-left-and-indent-list'} )
7978 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7981 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7984 sub make_block_brace_vertical_tightness_pattern {
7986 # turn any input list into a regex for recognizing selected block types
7987 $block_brace_vertical_tightness_pattern =
7988 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7989 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7990 && $rOpts->{'block-brace-vertical-tightness-list'} )
7992 $block_brace_vertical_tightness_pattern =
7993 make_block_pattern( '-bbvtl',
7994 $rOpts->{'block-brace-vertical-tightness-list'} );
7998 sub make_block_pattern {
8000 # given a string of block-type keywords, return a regex to match them
8001 # The only tricky part is that labels are indicated with a single ':'
8002 # and the 'sub' token text may have additional text after it (name of
8007 # input string: "if else elsif unless while for foreach do : sub";
8008 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
8010 my ( $abbrev, $string ) = @_;
8011 my @list = split_words($string);
8015 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
8018 if ( $i eq 'sub' ) {
8020 elsif ( $i eq ';' ) {
8023 elsif ( $i eq '{' ) {
8026 elsif ( $i eq ':' ) {
8027 push @words, '\w+:';
8029 elsif ( $i =~ /^\w/ ) {
8034 "unrecognized block type $i after $abbrev, ignoring\n";
8037 my $pattern = '(' . join( '|', @words ) . ')$';
8038 if ( $seen{'sub'} ) {
8039 $pattern = '(' . $pattern . '|sub)';
8041 $pattern = '^' . $pattern;
8045 sub make_static_side_comment_pattern {
8047 # create the pattern used to identify static side comments
8048 $static_side_comment_pattern = '^##';
8050 # allow the user to change it
8051 if ( $rOpts->{'static-side-comment-prefix'} ) {
8052 my $prefix = $rOpts->{'static-side-comment-prefix'};
8053 $prefix =~ s/^\s*//;
8054 my $pattern = '^' . $prefix;
8055 eval "'##'=~/$pattern/";
8058 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
8060 $static_side_comment_pattern = $pattern;
8064 sub make_closing_side_comment_prefix {
8066 # Be sure we have a valid closing side comment prefix
8067 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
8068 my $csc_prefix_pattern;
8069 if ( !defined($csc_prefix) ) {
8070 $csc_prefix = '## end';
8071 $csc_prefix_pattern = '^##\s+end';
8074 my $test_csc_prefix = $csc_prefix;
8075 if ( $test_csc_prefix !~ /^#/ ) {
8076 $test_csc_prefix = '#' . $test_csc_prefix;
8079 # make a regex to recognize the prefix
8080 my $test_csc_prefix_pattern = $test_csc_prefix;
8082 # escape any special characters
8083 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
8085 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
8087 # allow exact number of intermediate spaces to vary
8088 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
8090 # make sure we have a good pattern
8091 # if we fail this we probably have an error in escaping
8093 eval "'##'=~/$test_csc_prefix_pattern/";
8096 # shouldn't happen..must have screwed up escaping, above
8097 report_definite_bug();
8099 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
8101 # just warn and keep going with defaults
8102 Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n";
8104 "Using default -cscp instead; please check output\n";
8107 $csc_prefix = $test_csc_prefix;
8108 $csc_prefix_pattern = $test_csc_prefix_pattern;
8111 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
8112 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
8115 sub dump_want_left_space {
8119 These values are the main control of whitespace to the left of a token type;
8120 They may be altered with the -wls parameter.
8121 For a list of token types, use perltidy --dump-token-types (-dtt)
8122 1 means the token wants a space to its left
8123 -1 means the token does not want a space to its left
8124 ------------------------------------------------------------------------
8126 foreach ( sort keys %want_left_space ) {
8127 print $fh "$_\t$want_left_space{$_}\n";
8131 sub dump_want_right_space {
8135 These values are the main control of whitespace to the right of a token type;
8136 They may be altered with the -wrs parameter.
8137 For a list of token types, use perltidy --dump-token-types (-dtt)
8138 1 means the token wants a space to its right
8139 -1 means the token does not want a space to its right
8140 ------------------------------------------------------------------------
8142 foreach ( sort keys %want_right_space ) {
8143 print $fh "$_\t$want_right_space{$_}\n";
8147 { # begin is_essential_whitespace
8149 my %is_sort_grep_map;
8154 @_ = qw(sort grep map);
8155 @is_sort_grep_map{@_} = (1) x scalar(@_);
8157 @_ = qw(for foreach);
8158 @is_for_foreach{@_} = (1) x scalar(@_);
8162 sub is_essential_whitespace {
8164 # Essential whitespace means whitespace which cannot be safely deleted
8165 # without risking the introduction of a syntax error.
8166 # We are given three tokens and their types:
8167 # ($tokenl, $typel) is the token to the left of the space in question
8168 # ($tokenr, $typer) is the token to the right of the space in question
8169 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
8171 # This is a slow routine but is not needed too often except when -mangle
8174 # Note: This routine should almost never need to be changed. It is
8175 # for avoiding syntax problems rather than for formatting.
8176 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
8180 # never combine two bare words or numbers
8181 # examples: and ::ok(1)
8183 # for bla::bla:: abc
8184 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
8185 # $input eq"quit" to make $inputeq"quit"
8186 # my $size=-s::SINK if $file; <==OK but we won't do it
8187 # don't join something like: for bla::bla:: abc
8188 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
8189 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
8190 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
8192 # do not combine a number with a concatenation dot
8193 # example: pom.caputo:
8194 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
8195 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
8196 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
8198 # do not join a minus with a bare word, because you might form
8199 # a file test operator. Example from Complex.pm:
8200 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
8201 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
8203 # and something like this could become ambiguous without space
8205 # use constant III=>1;
8209 || ( ( $tokenl eq '-' )
8210 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
8212 # '= -' should not become =- or you will get a warning
8214 # || ($tokenr eq '-')
8216 # keep a space between a quote and a bareword to prevent the
8217 # bareword from becoming a quote modifier.
8218 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8220 # keep a space between a token ending in '$' and any word;
8221 # this caused trouble: "die @$ if $@"
8222 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
8223 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8225 # perl is very fussy about spaces before <<
8226 || ( $tokenr =~ /^\<\</ )
8228 # avoid combining tokens to create new meanings. Example:
8229 # $a+ +$b must not become $a++$b
8230 || ( $is_digraph{ $tokenl . $tokenr } )
8231 || ( $is_trigraph{ $tokenl . $tokenr } )
8233 # another example: do not combine these two &'s:
8234 # allow_options & &OPT_EXECCGI
8235 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
8237 # don't combine $$ or $# with any alphanumeric
8238 # (testfile mangle.t with --mangle)
8239 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
8241 # retain any space after possible filehandle
8242 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
8243 || ( $typel eq 'Z' )
8245 # Perl is sensitive to whitespace after the + here:
8246 # $b = xvals $a + 0.1 * yvals $a;
8247 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
8249 # keep paren separate in 'use Foo::Bar ()'
8253 && $tokenll eq 'use' )
8255 # keep any space between filehandle and paren:
8256 # file mangle.t with --mangle:
8257 || ( $typel eq 'Y' && $tokenr eq '(' )
8259 # retain any space after here doc operator ( hereerr.t)
8260 || ( $typel eq 'h' )
8262 # be careful with a space around ++ and --, to avoid ambiguity as to
8263 # which token it applies
8264 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
8265 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
8267 # need space after foreach my; for example, this will fail in
8268 # older versions of Perl:
8269 # foreach my$ft(@filetypes)...
8274 && $is_for_foreach{$tokenll}
8278 # must have space between grep and left paren; "grep(" will fail
8279 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
8281 # don't stick numbers next to left parens, as in:
8282 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
8283 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
8285 # We must be sure that a space between a ? and a quoted string
8286 # remains if the space before the ? remains. [Loca.pm, lockarea]
8288 # $b=join $comma ? ',' : ':', @_; # ok
8289 # $b=join $comma?',' : ':', @_; # ok!
8290 # $b=join $comma ?',' : ':', @_; # error!
8291 # Not really required:
8292 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
8294 # do not remove space between an '&' and a bare word because
8295 # it may turn into a function evaluation, like here
8296 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
8297 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
8298 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8300 # space stacked labels (TODO: check if really necessary)
8301 || ( $typel eq 'J' && $typer eq 'J' )
8303 ; # the value of this long logic sequence is the result we want
8309 my %secret_operators;
8310 my %is_leading_secret_token;
8314 # token lists for perl secret operators as compiled by Philippe Bruhat
8315 # at: https://metacpan.org/module/perlsecret
8316 %secret_operators = (
8317 'Goatse' => [qw#= ( ) =#], #=( )=
8318 'Venus1' => [qw#0 +#], # 0+
8319 'Venus2' => [qw#+ 0#], # +0
8320 'Enterprise' => [qw#) x ! !#], # ()x!!
8321 'Kite1' => [qw#~ ~ <>#], # ~~<>
8322 'Kite2' => [qw#~~ <>#], # ~~<>
8323 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
8326 # The following operators and constants are not included because they
8327 # are normally kept tight by perltidy:
8331 # Make a lookup table indexed by the first token of each operator:
8332 # first token => [list, list, ...]
8333 foreach my $value ( values(%secret_operators) ) {
8334 my $tok = $value->[0];
8335 push @{ $is_leading_secret_token{$tok} }, $value;
8339 sub secret_operator_whitespace {
8341 my ( $jmax, $rtokens, $rtoken_type, $rwhite_space_flag ) = @_;
8343 # Loop over all tokens in this line
8344 my ( $j, $token, $type );
8345 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8347 $token = $$rtokens[$j];
8348 $type = $$rtoken_type[$j];
8350 # Skip unless this token might start a secret operator
8351 next if ( $type eq 'b' );
8352 next unless ( $is_leading_secret_token{$token} );
8354 # Loop over all secret operators with this leading token
8355 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
8357 foreach my $tok ( @{$rpattern} ) {
8361 if ( $jend <= $jmax && $$rtoken_type[$jend] eq 'b' );
8362 if ( $jend > $jmax || $tok ne $$rtokens[$jend] ) {
8370 # set flags to prevent spaces within this operator
8371 for ( my $jj = $j + 1 ; $jj <= $jend ; $jj++ ) {
8372 $rwhite_space_flag->[$jj] = WS_NO;
8377 } ## End Loop over all operators
8378 } ## End loop over all tokens
8382 sub set_white_space_flag {
8384 # This routine examines each pair of nonblank tokens and
8385 # sets values for array @white_space_flag.
8387 # $white_space_flag[$j] is a flag indicating whether a white space
8388 # BEFORE token $j is needed, with the following values:
8390 # WS_NO = -1 do not want a space before token $j
8391 # WS_OPTIONAL= 0 optional space or $j is a whitespace
8392 # WS_YES = 1 want a space before token $j
8395 # The values for the first token will be defined based
8396 # upon the contents of the "to_go" output array.
8398 # Note: retain debug print statements because they are usually
8399 # required after adding new token types.
8403 # initialize these global hashes, which control the use of
8404 # whitespace around tokens:
8409 # %space_after_keyword
8411 # Many token types are identical to the tokens themselves.
8412 # See the tokenizer for a complete list. Here are some special types:
8414 # f = semicolon in for statement
8417 # Note that :: is excluded since it should be contained in an identifier
8418 # Note that '->' is excluded because it never gets space
8419 # parentheses and brackets are excluded since they are handled specially
8420 # curly braces are included but may be overridden by logic, such as
8423 # NEW_TOKENS: create a whitespace rule here. This can be as
8424 # simple as adding your new letter to @spaces_both_sides, for
8428 @is_opening_type{@_} = (1) x scalar(@_);
8431 @is_closing_type{@_} = (1) x scalar(@_);
8433 my @spaces_both_sides = qw"
8434 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
8435 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
8436 &&= ||= //= <=> A k f w F n C Y U G v
8439 my @spaces_left_side = qw"
8440 t ! ~ m p { \ h pp mm Z j
8442 push( @spaces_left_side, '#' ); # avoids warning message
8444 my @spaces_right_side = qw"
8445 ; } ) ] R J ++ -- **=
8447 push( @spaces_right_side, ',' ); # avoids warning message
8449 # Note that we are in a BEGIN block here. Later in processing
8450 # the values of %want_left_space and %want_right_space
8451 # may be overridden by any user settings specified by the
8452 # -wls and -wrs parameters. However the binary_whitespace_rules
8453 # are hardwired and have priority.
8454 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
8455 @want_right_space{@spaces_both_sides} =
8456 (1) x scalar(@spaces_both_sides);
8457 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);
8458 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
8459 @want_left_space{@spaces_right_side} =
8460 (-1) x scalar(@spaces_right_side);
8461 @want_right_space{@spaces_right_side} =
8462 (1) x scalar(@spaces_right_side);
8463 $want_left_space{'->'} = WS_NO;
8464 $want_right_space{'->'} = WS_NO;
8465 $want_left_space{'**'} = WS_NO;
8466 $want_right_space{'**'} = WS_NO;
8467 $want_right_space{'CORE::'} = WS_NO;
8469 # These binary_ws_rules are hardwired and have priority over the above
8470 # settings. It would be nice to allow adjustment by the user,
8471 # but it would be complicated to specify.
8473 # hash type information must stay tightly bound
8475 $binary_ws_rules{'i'}{'L'} = WS_NO;
8476 $binary_ws_rules{'i'}{'{'} = WS_YES;
8477 $binary_ws_rules{'k'}{'{'} = WS_YES;
8478 $binary_ws_rules{'U'}{'{'} = WS_YES;
8479 $binary_ws_rules{'i'}{'['} = WS_NO;
8480 $binary_ws_rules{'R'}{'L'} = WS_NO;
8481 $binary_ws_rules{'R'}{'{'} = WS_NO;
8482 $binary_ws_rules{'t'}{'L'} = WS_NO;
8483 $binary_ws_rules{'t'}{'{'} = WS_NO;
8484 $binary_ws_rules{'}'}{'L'} = WS_NO;
8485 $binary_ws_rules{'}'}{'{'} = WS_NO;
8486 $binary_ws_rules{'$'}{'L'} = WS_NO;
8487 $binary_ws_rules{'$'}{'{'} = WS_NO;
8488 $binary_ws_rules{'@'}{'L'} = WS_NO;
8489 $binary_ws_rules{'@'}{'{'} = WS_NO;
8490 $binary_ws_rules{'='}{'L'} = WS_YES;
8491 $binary_ws_rules{'J'}{'J'} = WS_YES;
8493 # the following includes ') {'
8494 # as in : if ( xxx ) { yyy }
8495 $binary_ws_rules{']'}{'L'} = WS_NO;
8496 $binary_ws_rules{']'}{'{'} = WS_NO;
8497 $binary_ws_rules{')'}{'{'} = WS_YES;
8498 $binary_ws_rules{')'}{'['} = WS_NO;
8499 $binary_ws_rules{']'}{'['} = WS_NO;
8500 $binary_ws_rules{']'}{'{'} = WS_NO;
8501 $binary_ws_rules{'}'}{'['} = WS_NO;
8502 $binary_ws_rules{'R'}{'['} = WS_NO;
8504 $binary_ws_rules{']'}{'++'} = WS_NO;
8505 $binary_ws_rules{']'}{'--'} = WS_NO;
8506 $binary_ws_rules{')'}{'++'} = WS_NO;
8507 $binary_ws_rules{')'}{'--'} = WS_NO;
8509 $binary_ws_rules{'R'}{'++'} = WS_NO;
8510 $binary_ws_rules{'R'}{'--'} = WS_NO;
8512 $binary_ws_rules{'i'}{'Q'} = WS_YES;
8513 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
8515 # FIXME: we could to split 'i' into variables and functions
8516 # and have no space for functions but space for variables. For now,
8517 # I have a special patch in the special rules below
8518 $binary_ws_rules{'i'}{'('} = WS_NO;
8520 $binary_ws_rules{'w'}{'('} = WS_NO;
8521 $binary_ws_rules{'w'}{'{'} = WS_YES;
8522 } ## end BEGIN block
8524 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
8525 my ( $last_token, $last_type, $last_block_type, $token, $type,
8527 my (@white_space_flag);
8528 my $j_tight_closing_paren = -1;
8530 if ( $max_index_to_go >= 0 ) {
8531 $token = $tokens_to_go[$max_index_to_go];
8532 $type = $types_to_go[$max_index_to_go];
8533 $block_type = $block_type_to_go[$max_index_to_go];
8535 #---------------------------------------------------------------
8536 # Patch due to splitting of tokens with leading ->
8537 #---------------------------------------------------------------
8539 # This routine is dealing with the raw tokens from the tokenizer,
8540 # but to get started it needs the previous token, which will
8541 # have been stored in the '_to_go' arrays.
8543 # This patch avoids requiring two iterations to
8544 # converge for cases such as the following, where a paren
8545 # comes in on a line following a variable with leading arrow:
8546 # $self->{main}->add_content_defer_opening
8547 # ($name, $wmkf, $self->{attrs}, $self);
8548 # In this case when we see the opening paren on line 2 we need
8549 # to know if the last token on the previous line had an arrow,
8550 # but it has already been split off so we have to add it back
8551 # in to avoid getting an unwanted space before the paren.
8552 if ( $type =~ /^[wi]$/ ) {
8553 my $im = $iprev_to_go[$max_index_to_go];
8554 my $tm = ( $im >= 0 ) ? $types_to_go[$im] : "";
8555 if ( $tm eq '->' ) { $token = $tm . $token }
8558 #---------------------------------------------------------------
8559 # End patch due to splitting of tokens with leading ->
8560 #---------------------------------------------------------------
8570 # main loop over all tokens to define the whitespace flags
8571 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8573 if ( $$rtoken_type[$j] eq 'b' ) {
8574 $white_space_flag[$j] = WS_OPTIONAL;
8578 # set a default value, to be changed as needed
8580 $last_token = $token;
8582 $last_block_type = $block_type;
8583 $token = $$rtokens[$j];
8584 $type = $$rtoken_type[$j];
8585 $block_type = $$rblock_type[$j];
8587 #---------------------------------------------------------------
8588 # Whitespace Rules Section 1:
8589 # Handle space on the inside of opening braces.
8590 #---------------------------------------------------------------
8593 if ( $is_opening_type{$last_type} ) {
8595 $j_tight_closing_paren = -1;
8597 # let's keep empty matched braces together: () {} []
8599 if ( $token eq $matching_token{$last_token} ) {
8609 # we're considering the right of an opening brace
8610 # tightness = 0 means always pad inside with space
8611 # tightness = 1 means pad inside if "complex"
8612 # tightness = 2 means never pad inside with space
8615 if ( $last_type eq '{'
8616 && $last_token eq '{'
8617 && $last_block_type )
8619 $tightness = $rOpts_block_brace_tightness;
8621 else { $tightness = $tightness{$last_token} }
8623 #=============================================================
8624 # Patch for test problem fabrice_bug.pl
8625 # We must always avoid spaces around a bare word beginning
8627 # my $before = ${^PREMATCH};
8628 # Because all of the following cause an error in perl:
8629 # my $before = ${ ^PREMATCH };
8630 # my $before = ${ ^PREMATCH};
8631 # my $before = ${^PREMATCH };
8632 # So if brace tightness flag is -bt=0 we must temporarily reset
8633 # to bt=1. Note that here we must set tightness=1 and not 2 so
8634 # that the closing space
8635 # is also avoided (via the $j_tight_closing_paren flag in coding)
8636 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
8638 #=============================================================
8640 if ( $tightness <= 0 ) {
8643 elsif ( $tightness > 1 ) {
8648 # Patch to count '-foo' as single token so that
8649 # each of $a{-foo} and $a{foo} and $a{'foo'} do
8650 # not get spaces with default formatting.
8654 && $last_token eq '{'
8655 && $$rtoken_type[ $j + 1 ] eq 'w' );
8657 # $j_next is where a closing token should be if
8658 # the container has a single token
8660 ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
8663 my $tok_next = $$rtokens[$j_next];
8664 my $type_next = $$rtoken_type[$j_next];
8666 # for tightness = 1, if there is just one token
8667 # within the matching pair, we will keep it tight
8669 $tok_next eq $matching_token{$last_token}
8671 # but watch out for this: [ [ ] (misc.t)
8672 && $last_token ne $token
8676 # remember where to put the space for the closing paren
8677 $j_tight_closing_paren = $j_next;
8685 } # end setting space flag inside opening tokens
8687 if FORMATTER_DEBUG_FLAG_WHITE;
8689 #---------------------------------------------------------------
8690 # Whitespace Rules Section 2:
8691 # Handle space on inside of closing brace pairs.
8692 #---------------------------------------------------------------
8695 if ( $is_closing_type{$type} ) {
8697 if ( $j == $j_tight_closing_paren ) {
8699 $j_tight_closing_paren = -1;
8704 if ( !defined($ws) ) {
8707 if ( $type eq '}' && $token eq '}' && $block_type ) {
8708 $tightness = $rOpts_block_brace_tightness;
8710 else { $tightness = $tightness{$token} }
8712 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
8715 } # end setting space flag inside closing tokens
8718 if FORMATTER_DEBUG_FLAG_WHITE;
8720 #---------------------------------------------------------------
8721 # Whitespace Rules Section 3:
8722 # Use the binary rule table.
8723 #---------------------------------------------------------------
8724 if ( !defined($ws) ) {
8725 $ws = $binary_ws_rules{$last_type}{$type};
8728 if FORMATTER_DEBUG_FLAG_WHITE;
8730 #---------------------------------------------------------------
8731 # Whitespace Rules Section 4:
8732 # Handle some special cases.
8733 #---------------------------------------------------------------
8734 if ( $token eq '(' ) {
8736 # This will have to be tweaked as tokenization changes.
8737 # We usually want a space at '} (', for example:
8738 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
8741 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
8742 # At present, the above & block is marked as type L/R so this case
8743 # won't go through here.
8744 if ( $last_type eq '}' ) { $ws = WS_YES }
8746 # NOTE: some older versions of Perl had occasional problems if
8747 # spaces are introduced between keywords or functions and opening
8748 # parens. So the default is not to do this except is certain
8749 # cases. The current Perl seems to tolerate spaces.
8751 # Space between keyword and '('
8752 elsif ( $last_type eq 'k' ) {
8754 unless ( $rOpts_space_keyword_paren
8755 || $space_after_keyword{$last_token} );
8758 # Space between function and '('
8759 # -----------------------------------------------------
8760 # 'w' and 'i' checks for something like:
8761 # myfun( &myfun( ->myfun(
8762 # -----------------------------------------------------
8763 elsif (( $last_type =~ /^[wUG]$/ )
8764 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
8766 $ws = WS_NO unless ($rOpts_space_function_paren);
8769 # space between something like $i and ( in
8770 # for $i ( 0 .. 20 ) {
8771 # FIXME: eventually, type 'i' needs to be split into multiple
8772 # token types so this can be a hardwired rule.
8773 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
8777 # allow constant function followed by '()' to retain no space
8778 elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
8783 # patch for SWITCH/CASE: make space at ']{' optional
8784 # since the '{' might begin a case or when block
8785 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
8789 # keep space between 'sub' and '{' for anonymous sub definition
8790 if ( $type eq '{' ) {
8791 if ( $last_token eq 'sub' ) {
8795 # this is needed to avoid no space in '){'
8796 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
8798 # avoid any space before the brace or bracket in something like
8799 # @opts{'a','b',...}
8800 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
8805 elsif ( $type eq 'i' ) {
8807 # never a space before ->
8808 if ( $token =~ /^\-\>/ ) {
8813 # retain any space between '-' and bare word
8814 elsif ( $type eq 'w' || $type eq 'C' ) {
8815 $ws = WS_OPTIONAL if $last_type eq '-';
8817 # never a space before ->
8818 if ( $token =~ /^\-\>/ ) {
8823 # retain any space between '-' and bare word
8824 # example: avoid space between 'USER' and '-' here:
8825 # $myhash{USER-NAME}='steve';
8826 elsif ( $type eq 'm' || $type eq '-' ) {
8827 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
8830 # always space before side comment
8831 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8833 # always preserver whatever space was used after a possible
8834 # filehandle (except _) or here doc operator
8837 && ( ( $last_type eq 'Z' && $last_token ne '_' )
8838 || $last_type eq 'h' )
8845 if FORMATTER_DEBUG_FLAG_WHITE;
8847 #---------------------------------------------------------------
8848 # Whitespace Rules Section 5:
8849 # Apply default rules not covered above.
8850 #---------------------------------------------------------------
8852 # If we fall through to here, look at the pre-defined hash tables for
8853 # the two tokens, and:
8854 # if (they are equal) use the common value
8855 # if (either is zero or undef) use the other
8856 # if (either is -1) use it
8870 if ( !defined($ws) ) {
8871 my $wl = $want_left_space{$type};
8872 my $wr = $want_right_space{$last_type};
8873 if ( !defined($wl) ) { $wl = 0 }
8874 if ( !defined($wr) ) { $wr = 0 }
8875 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8878 if ( !defined($ws) ) {
8881 "WS flag is undefined for tokens $last_token $token\n");
8884 # Treat newline as a whitespace. Otherwise, we might combine
8885 # 'Send' and '-recipients' here according to the above rules:
8886 # my $msg = new Fax::Send
8887 # -recipients => $to,
8889 if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8894 && ( $last_type !~ /^[Zh]$/ ) )
8897 # If this happens, we have a non-fatal but undesirable
8898 # hole in the above rules which should be patched.
8900 "WS flag is zero for tokens $last_token $token\n");
8902 $white_space_flag[$j] = $ws;
8904 FORMATTER_DEBUG_FLAG_WHITE && do {
8905 my $str = substr( $last_token, 0, 15 );
8906 $str .= ' ' x ( 16 - length($str) );
8907 if ( !defined($ws_1) ) { $ws_1 = "*" }
8908 if ( !defined($ws_2) ) { $ws_2 = "*" }
8909 if ( !defined($ws_3) ) { $ws_3 = "*" }
8910 if ( !defined($ws_4) ) { $ws_4 = "*" }
8912 "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8916 if ($rOpts_tight_secret_operators) {
8917 secret_operator_whitespace( $jmax, $rtokens, $rtoken_type,
8918 \@white_space_flag );
8921 return \@white_space_flag;
8922 } ## end sub set_white_space_flag
8924 { # begin print_line_of_tokens
8931 my $rcontainer_type;
8932 my $rcontainer_environment;
8935 my $rnesting_tokens;
8937 my $rnesting_blocks;
8940 my $guessed_indentation_level;
8942 # These local token variables are stored by store_token_to_go:
8945 my $container_environment;
8947 my $in_continued_quote;
8950 my $no_internal_newlines;
8956 # routine to pull the jth token from the line of tokens
8959 $token = $$rtokens[$j];
8960 $type = $$rtoken_type[$j];
8961 $block_type = $$rblock_type[$j];
8962 $container_type = $$rcontainer_type[$j];
8963 $container_environment = $$rcontainer_environment[$j];
8964 $type_sequence = $$rtype_sequence[$j];
8965 $level = $$rlevels[$j];
8966 $slevel = $$rslevels[$j];
8967 $nesting_blocks = $$rnesting_blocks[$j];
8968 $ci_level = $$rci_levels[$j];
8974 sub save_current_token {
8977 $block_type, $ci_level,
8978 $container_environment, $container_type,
8979 $in_continued_quote, $level,
8980 $nesting_blocks, $no_internal_newlines,
8982 $type, $type_sequence,
8986 sub restore_current_token {
8988 $block_type, $ci_level,
8989 $container_environment, $container_type,
8990 $in_continued_quote, $level,
8991 $nesting_blocks, $no_internal_newlines,
8993 $type, $type_sequence,
9000 # Returns the length of a token, given:
9001 # $token=text of the token
9003 # $not_first_token = should be TRUE if this is not the first token of
9004 # the line. It might the index of this token in an array. It is
9005 # used to test for a side comment vs a block comment.
9006 # Note: Eventually this should be the only routine determining the
9007 # length of a token in this package.
9008 my ( $token, $type, $not_first_token ) = @_;
9009 my $token_length = length($token);
9011 # We mark lengths of side comments as just 1 if we are
9012 # ignoring their lengths when setting line breaks.
9014 if ( $rOpts_ignore_side_comment_lengths
9017 return $token_length;
9022 # return length of ith token in @{$rtokens}
9024 return token_length( $$rtokens[$i], $$rtoken_type[$i], $i );
9027 # Routine to place the current token into the output stream.
9028 # Called once per output token.
9029 sub store_token_to_go {
9031 my $flag = $no_internal_newlines;
9032 if ( $_[0] ) { $flag = 1 }
9034 $tokens_to_go[ ++$max_index_to_go ] = $token;
9035 $types_to_go[$max_index_to_go] = $type;
9036 $nobreak_to_go[$max_index_to_go] = $flag;
9037 $old_breakpoint_to_go[$max_index_to_go] = 0;
9038 $forced_breakpoint_to_go[$max_index_to_go] = 0;
9039 $block_type_to_go[$max_index_to_go] = $block_type;
9040 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
9041 $container_environment_to_go[$max_index_to_go] = $container_environment;
9042 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;
9043 $ci_levels_to_go[$max_index_to_go] = $ci_level;
9044 $mate_index_to_go[$max_index_to_go] = -1;
9045 $matching_token_to_go[$max_index_to_go] = '';
9046 $bond_strength_to_go[$max_index_to_go] = 0;
9048 # Note: negative levels are currently retained as a diagnostic so that
9049 # the 'final indentation level' is correctly reported for bad scripts.
9050 # But this means that every use of $level as an index must be checked.
9051 # If this becomes too much of a problem, we might give up and just clip
9053 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
9054 $levels_to_go[$max_index_to_go] = $level;
9055 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
9057 # link the non-blank tokens
9058 my $iprev = $max_index_to_go - 1;
9059 $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
9060 $iprev_to_go[$max_index_to_go] = $iprev;
9061 $inext_to_go[$iprev] = $max_index_to_go
9062 if ( $iprev >= 0 && $type ne 'b' );
9063 $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
9065 $token_lengths_to_go[$max_index_to_go] =
9066 token_length( $token, $type, $max_index_to_go );
9068 # We keep a running sum of token lengths from the start of this batch:
9069 # summed_lengths_to_go[$i] = total length to just before token $i
9070 # summed_lengths_to_go[$i+1] = total length to just after token $i
9071 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
9072 $summed_lengths_to_go[$max_index_to_go] +
9073 $token_lengths_to_go[$max_index_to_go];
9075 # Define the indentation that this token would have if it started
9076 # a new line. We have to do this now because we need to know this
9077 # when considering one-line blocks.
9078 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
9080 # remember previous nonblank tokens seen
9081 if ( $type ne 'b' ) {
9082 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
9083 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
9084 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
9085 $last_nonblank_index_to_go = $max_index_to_go;
9086 $last_nonblank_type_to_go = $type;
9087 $last_nonblank_token_to_go = $token;
9088 if ( $type eq ',' ) {
9089 $comma_count_in_batch++;
9093 FORMATTER_DEBUG_FLAG_STORE && do {
9094 my ( $a, $b, $c ) = caller();
9096 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
9100 sub insert_new_token_to_go {
9102 # insert a new token into the output stream. use same level as
9103 # previous token; assumes a character at max_index_to_go.
9104 save_current_token();
9105 ( $token, $type, $slevel, $no_internal_newlines ) = @_;
9107 if ( $max_index_to_go == UNDEFINED_INDEX ) {
9108 warning("code bug: bad call to insert_new_token_to_go\n");
9110 $level = $levels_to_go[$max_index_to_go];
9112 # FIXME: it seems to be necessary to use the next, rather than
9113 # previous, value of this variable when creating a new blank (align.t)
9114 #my $slevel = $nesting_depth_to_go[$max_index_to_go];
9115 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
9116 $ci_level = $ci_levels_to_go[$max_index_to_go];
9117 $container_environment = $container_environment_to_go[$max_index_to_go];
9118 $in_continued_quote = 0;
9120 $type_sequence = "";
9121 store_token_to_go();
9122 restore_current_token();
9126 sub print_line_of_tokens {
9128 my $line_of_tokens = shift;
9130 # This routine is called once per input line to process all of
9131 # the tokens on that line. This is the first stage of
9134 # Full-line comments and blank lines may be processed immediately.
9136 # For normal lines of code, the tokens are stored one-by-one,
9137 # via calls to 'sub store_token_to_go', until a known line break
9138 # point is reached. Then, the batch of collected tokens is
9139 # passed along to 'sub output_line_to_go' for further
9140 # processing. This routine decides if there should be
9141 # whitespace between each pair of non-white tokens, so later
9142 # routines only need to decide on any additional line breaks.
9143 # Any whitespace is initially a single space character. Later,
9144 # the vertical aligner may expand that to be multiple space
9145 # characters if necessary for alignment.
9147 # extract input line number for error messages
9148 $input_line_number = $line_of_tokens->{_line_number};
9150 $rtoken_type = $line_of_tokens->{_rtoken_type};
9151 $rtokens = $line_of_tokens->{_rtokens};
9152 $rlevels = $line_of_tokens->{_rlevels};
9153 $rslevels = $line_of_tokens->{_rslevels};
9154 $rblock_type = $line_of_tokens->{_rblock_type};
9155 $rcontainer_type = $line_of_tokens->{_rcontainer_type};
9156 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
9157 $rtype_sequence = $line_of_tokens->{_rtype_sequence};
9158 $input_line = $line_of_tokens->{_line_text};
9159 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
9160 $rci_levels = $line_of_tokens->{_rci_levels};
9161 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
9163 $in_continued_quote = $starting_in_quote =
9164 $line_of_tokens->{_starting_in_quote};
9165 $in_quote = $line_of_tokens->{_ending_in_quote};
9166 $ending_in_quote = $in_quote;
9167 $guessed_indentation_level =
9168 $line_of_tokens->{_guessed_indentation_level};
9173 my $next_nonblank_token;
9174 my $next_nonblank_token_type;
9175 my $rwhite_space_flag;
9177 $jmax = @$rtokens - 1;
9179 $container_type = "";
9180 $container_environment = "";
9181 $type_sequence = "";
9182 $no_internal_newlines = 1 - $rOpts_add_newlines;
9183 $is_static_block_comment = 0;
9185 # Handle a continued quote..
9186 if ($in_continued_quote) {
9188 # A line which is entirely a quote or pattern must go out
9189 # verbatim. Note: the \n is contained in $input_line.
9191 if ( ( $input_line =~ "\t" ) ) {
9192 note_embedded_tab();
9194 write_unindented_line("$input_line");
9195 $last_line_had_side_comment = 0;
9200 # Write line verbatim if we are in a formatting skip section
9201 if ($in_format_skipping_section) {
9202 write_unindented_line("$input_line");
9203 $last_line_had_side_comment = 0;
9205 # Note: extra space appended to comment simplifies pattern matching
9207 && $$rtoken_type[0] eq '#'
9208 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
9210 $in_format_skipping_section = 0;
9211 write_logfile_entry("Exiting formatting skip section\n");
9212 $file_writer_object->reset_consecutive_blank_lines();
9217 # See if we are entering a formatting skip section
9218 if ( $rOpts_format_skipping
9220 && $$rtoken_type[0] eq '#'
9221 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
9224 $in_format_skipping_section = 1;
9225 write_logfile_entry("Entering formatting skip section\n");
9226 write_unindented_line("$input_line");
9227 $last_line_had_side_comment = 0;
9231 # delete trailing blank tokens
9232 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
9234 # Handle a blank line..
9237 # If keep-old-blank-lines is zero, we delete all
9238 # old blank lines and let the blank line rules generate any
9240 if ($rOpts_keep_old_blank_lines) {
9242 $file_writer_object->write_blank_code_line(
9243 $rOpts_keep_old_blank_lines == 2 );
9244 $last_line_leading_type = 'b';
9246 $last_line_had_side_comment = 0;
9250 # see if this is a static block comment (starts with ## by default)
9251 my $is_static_block_comment_without_leading_space = 0;
9253 && $$rtoken_type[0] eq '#'
9254 && $rOpts->{'static-block-comments'}
9255 && $input_line =~ /$static_block_comment_pattern/o )
9257 $is_static_block_comment = 1;
9258 $is_static_block_comment_without_leading_space =
9259 substr( $input_line, 0, 1 ) eq '#';
9262 # Check for comments which are line directives
9263 # Treat exactly as static block comments without leading space
9264 # reference: perlsyn, near end, section Plain Old Comments (Not!)
9265 # example: '# line 42 "new_filename.plx"'
9268 && $$rtoken_type[0] eq '#'
9269 && $input_line =~ /^\# \s*
9271 (?:\s("?)([^"]+)\2)? \s*
9275 $is_static_block_comment = 1;
9276 $is_static_block_comment_without_leading_space = 1;
9279 # create a hanging side comment if appropriate
9280 my $is_hanging_side_comment;
9283 && $$rtoken_type[0] eq '#' # only token is a comment
9284 && $last_line_had_side_comment # last line had side comment
9285 && $input_line =~ /^\s/ # there is some leading space
9286 && !$is_static_block_comment # do not make static comment hanging
9287 && $rOpts->{'hanging-side-comments'} # user is allowing
9288 # hanging side comments
9293 # We will insert an empty qw string at the start of the token list
9294 # to force this comment to be a side comment. The vertical aligner
9295 # should then line it up with the previous side comment.
9296 $is_hanging_side_comment = 1;
9297 unshift @$rtoken_type, 'q';
9298 unshift @$rtokens, '';
9299 unshift @$rlevels, $$rlevels[0];
9300 unshift @$rslevels, $$rslevels[0];
9301 unshift @$rblock_type, '';
9302 unshift @$rcontainer_type, '';
9303 unshift @$rcontainer_environment, '';
9304 unshift @$rtype_sequence, '';
9305 unshift @$rnesting_tokens, $$rnesting_tokens[0];
9306 unshift @$rci_levels, $$rci_levels[0];
9307 unshift @$rnesting_blocks, $$rnesting_blocks[0];
9311 # remember if this line has a side comment
9312 $last_line_had_side_comment =
9313 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
9315 # Handle a block (full-line) comment..
9316 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
9318 if ( $rOpts->{'delete-block-comments'} ) { return }
9320 if ( $rOpts->{'tee-block-comments'} ) {
9321 $file_writer_object->tee_on();
9324 destroy_one_line_block();
9325 output_line_to_go();
9327 # output a blank line before block comments
9329 # unless we follow a blank or comment line
9330 $last_line_leading_type !~ /^[#b]$/
9333 && $rOpts->{'blanks-before-comments'}
9335 # not if this is an empty comment line
9336 && $$rtokens[0] ne '#'
9338 # not after a short line ending in an opening token
9339 # because we already have space above this comment.
9340 # Note that the first comment in this if block, after
9341 # the 'if (', does not get a blank line because of this.
9342 && !$last_output_short_opening_token
9344 # never before static block comments
9345 && !$is_static_block_comment
9348 flush(); # switching to new output stream
9349 $file_writer_object->write_blank_code_line();
9350 $last_line_leading_type = 'b';
9353 # TRIM COMMENTS -- This could be turned off as a option
9354 $$rtokens[0] =~ s/\s*$//; # trim right end
9357 $rOpts->{'indent-block-comments'}
9358 && ( !$rOpts->{'indent-spaced-block-comments'}
9359 || $input_line =~ /^\s+/ )
9360 && !$is_static_block_comment_without_leading_space
9364 store_token_to_go();
9365 output_line_to_go();
9368 flush(); # switching to new output stream
9369 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
9370 $last_line_leading_type = '#';
9372 if ( $rOpts->{'tee-block-comments'} ) {
9373 $file_writer_object->tee_off();
9378 # compare input/output indentation except for continuation lines
9379 # (because they have an unknown amount of initial blank space)
9380 # and lines which are quotes (because they may have been outdented)
9381 # Note: this test is placed here because we know the continuation flag
9382 # at this point, which allows us to avoid non-meaningful checks.
9383 my $structural_indentation_level = $$rlevels[0];
9384 compare_indentation_levels( $guessed_indentation_level,
9385 $structural_indentation_level )
9386 unless ( $is_hanging_side_comment
9387 || $$rci_levels[0] > 0
9388 || $guessed_indentation_level == 0 && $$rtoken_type[0] eq 'Q' );
9390 # Patch needed for MakeMaker. Do not break a statement
9391 # in which $VERSION may be calculated. See MakeMaker.pm;
9392 # this is based on the coding in it.
9393 # The first line of a file that matches this will be eval'd:
9394 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9396 # *VERSION = \'1.01';
9397 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
9398 # We will pass such a line straight through without breaking
9399 # it unless -npvl is used
9401 my $is_VERSION_statement = 0;
9404 !$saw_VERSION_in_this_file
9405 && $input_line =~ /VERSION/ # quick check to reject most lines
9406 && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9409 $saw_VERSION_in_this_file = 1;
9410 $is_VERSION_statement = 1;
9411 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
9412 $no_internal_newlines = 1;
9415 # take care of indentation-only
9416 # NOTE: In previous versions we sent all qw lines out immediately here.
9417 # No longer doing this: also write a line which is entirely a 'qw' list
9418 # to allow stacking of opening and closing tokens. Note that interior
9419 # qw lines will still go out at the end of this routine.
9420 if ( $rOpts->{'indent-only'} ) {
9425 $token = $input_line;
9428 $container_type = "";
9429 $container_environment = "";
9430 $type_sequence = "";
9431 store_token_to_go();
9432 output_line_to_go();
9436 push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding
9437 push( @$rtoken_type, 'b', 'b' );
9438 ($rwhite_space_flag) =
9439 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
9441 # if the buffer hasn't been flushed, add a leading space if
9442 # necessary to keep essential whitespace. This is really only
9443 # necessary if we are squeezing out all ws.
9444 if ( $max_index_to_go >= 0 ) {
9446 $old_line_count_in_batch++;
9449 is_essential_whitespace(
9450 $last_last_nonblank_token,
9451 $last_last_nonblank_type,
9452 $tokens_to_go[$max_index_to_go],
9453 $types_to_go[$max_index_to_go],
9459 my $slevel = $$rslevels[0];
9460 insert_new_token_to_go( ' ', 'b', $slevel,
9461 $no_internal_newlines );
9465 # If we just saw the end of an elsif block, write nag message
9466 # if we do not see another elseif or an else.
9467 if ($looking_for_else) {
9469 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
9470 write_logfile_entry("(No else block)\n");
9472 $looking_for_else = 0;
9475 # This is a good place to kill incomplete one-line blocks
9476 if ( ( $semicolons_before_block_self_destruct == 0 )
9477 && ( $max_index_to_go >= 0 )
9478 && ( $types_to_go[$max_index_to_go] eq ';' )
9479 && ( $$rtokens[0] ne '}' ) )
9481 destroy_one_line_block();
9482 output_line_to_go();
9485 # loop to process the tokens one-by-one
9489 foreach $j ( 0 .. $jmax ) {
9491 # pull out the local values for this token
9494 if ( $type eq '#' ) {
9496 # trim trailing whitespace
9497 # (there is no option at present to prevent this)
9501 $rOpts->{'delete-side-comments'}
9503 # delete closing side comments if necessary
9504 || ( $rOpts->{'delete-closing-side-comments'}
9505 && $token =~ /$closing_side_comment_prefix_pattern/o
9506 && $last_nonblank_block_type =~
9507 /$closing_side_comment_list_pattern/o )
9510 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9511 unstore_token_to_go();
9517 # If we are continuing after seeing a right curly brace, flush
9518 # buffer unless we see what we are looking for, as in
9520 if ( $rbrace_follower && $type ne 'b' ) {
9522 unless ( $rbrace_follower->{$token} ) {
9523 output_line_to_go();
9525 $rbrace_follower = undef;
9528 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
9529 $next_nonblank_token = $$rtokens[$j_next];
9530 $next_nonblank_token_type = $$rtoken_type[$j_next];
9532 #--------------------------------------------------------
9533 # Start of section to patch token text
9534 #--------------------------------------------------------
9536 # Modify certain tokens here for whitespace
9537 # The following is not yet done, but could be:
9539 if ( $type =~ /^[wit]$/ ) {
9542 # change '$ var' to '$var' etc
9543 # '-> new' to '->new'
9544 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
9548 # Split identifiers with leading arrows, inserting blanks if
9549 # necessary. It is easier and safer here than in the
9550 # tokenizer. For example '->new' becomes two tokens, '->' and
9551 # 'new' with a possible blank between.
9553 # Note: there is a related patch in sub set_white_space_flag
9554 if ( $token =~ /^\-\>(.*)$/ && $1 ) {
9555 my $token_save = $1;
9556 my $type_save = $type;
9558 # store a blank to left of arrow if necessary
9559 if ( $max_index_to_go >= 0
9560 && $types_to_go[$max_index_to_go] ne 'b'
9561 && $want_left_space{'->'} == WS_YES )
9563 insert_new_token_to_go( ' ', 'b', $slevel,
9564 $no_internal_newlines );
9567 # then store the arrow
9570 store_token_to_go();
9572 # then reset the current token to be the remainder,
9573 # and reset the whitespace flag according to the arrow
9574 $$rwhite_space_flag[$j] = $want_right_space{'->'};
9575 $token = $token_save;
9579 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
9581 # trim identifiers of trailing blanks which can occur
9582 # under some unusual circumstances, such as if the
9583 # identifier 'witch' has trailing blanks on input here:
9587 # () # prototype may be on new line ...
9589 if ( $type eq 'i' ) { $token =~ s/\s+$//g }
9592 # change 'LABEL :' to 'LABEL:'
9593 elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
9595 # patch to add space to something like "x10"
9596 # This avoids having to split this token in the pre-tokenizer
9597 elsif ( $type eq 'n' ) {
9598 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
9601 elsif ( $type eq 'Q' ) {
9602 note_embedded_tab() if ( $token =~ "\t" );
9604 # make note of something like '$var = s/xxx/yyy/;'
9605 # in case it should have been '$var =~ s/xxx/yyy/;'
9607 $token =~ /^(s|tr|y|m|\/)/
9608 && $last_nonblank_token =~ /^(=|==|!=)$/
9610 # preceded by simple scalar
9611 && $last_last_nonblank_type eq 'i'
9612 && $last_last_nonblank_token =~ /^\$/
9614 # followed by some kind of termination
9615 # (but give complaint if we can's see far enough ahead)
9616 && $next_nonblank_token =~ /^[; \)\}]$/
9618 # scalar is not declared
9620 $types_to_go[0] eq 'k'
9621 && $tokens_to_go[0] =~ /^(my|our|local)$/
9625 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
9627 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
9632 # trim blanks from right of qw quotes
9633 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
9634 elsif ( $type eq 'q' ) {
9636 note_embedded_tab() if ( $token =~ "\t" );
9639 #--------------------------------------------------------
9640 # End of section to patch token text
9641 #--------------------------------------------------------
9643 # insert any needed whitespace
9644 if ( ( $type ne 'b' )
9645 && ( $max_index_to_go >= 0 )
9646 && ( $types_to_go[$max_index_to_go] ne 'b' )
9647 && $rOpts_add_whitespace )
9649 my $ws = $$rwhite_space_flag[$j];
9652 insert_new_token_to_go( ' ', 'b', $slevel,
9653 $no_internal_newlines );
9657 # Do not allow breaks which would promote a side comment to a
9658 # block comment. In order to allow a break before an opening
9659 # or closing BLOCK, followed by a side comment, those sections
9660 # of code will handle this flag separately.
9661 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
9662 my $is_opening_BLOCK =
9666 && $block_type ne 't' );
9667 my $is_closing_BLOCK =
9671 && $block_type ne 't' );
9673 if ( $side_comment_follows
9674 && !$is_opening_BLOCK
9675 && !$is_closing_BLOCK )
9677 $no_internal_newlines = 1;
9680 # We're only going to handle breaking for code BLOCKS at this
9681 # (top) level. Other indentation breaks will be handled by
9682 # sub scan_list, which is better suited to dealing with them.
9683 if ($is_opening_BLOCK) {
9685 # Tentatively output this token. This is required before
9686 # calling starting_one_line_block. We may have to unstore
9687 # it, though, if we have to break before it.
9688 store_token_to_go($side_comment_follows);
9690 # Look ahead to see if we might form a one-line block
9692 starting_one_line_block( $j, $jmax, $level, $slevel,
9693 $ci_level, $rtokens, $rtoken_type, $rblock_type );
9694 clear_breakpoint_undo_stack();
9696 # to simplify the logic below, set a flag to indicate if
9697 # this opening brace is far from the keyword which introduces it
9698 my $keyword_on_same_line = 1;
9699 if ( ( $max_index_to_go >= 0 )
9700 && ( $last_nonblank_type eq ')' ) )
9702 if ( $block_type =~ /^(if|else|elsif)$/
9703 && ( $tokens_to_go[0] eq '}' )
9704 && $rOpts_cuddled_else )
9706 $keyword_on_same_line = 1;
9708 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
9710 $keyword_on_same_line = 0;
9714 # decide if user requested break before '{'
9717 # use -bl flag if not a sub block of any type
9718 $block_type !~ /^sub/
9719 ? $rOpts->{'opening-brace-on-new-line'}
9721 # use -sbl flag for a named sub block
9722 : $block_type !~ /^sub\W*$/
9723 ? $rOpts->{'opening-sub-brace-on-new-line'}
9725 # use -asbl flag for an anonymous sub block
9726 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
9728 # Break before an opening '{' ...
9734 # and we were unable to start looking for a block,
9735 && $index_start_one_line_block == UNDEFINED_INDEX
9737 # or if it will not be on same line as its keyword, so that
9738 # it will be outdented (eval.t, overload.t), and the user
9739 # has not insisted on keeping it on the right
9740 || ( !$keyword_on_same_line
9741 && !$rOpts->{'opening-brace-always-on-right'} )
9746 # but only if allowed
9747 unless ($no_internal_newlines) {
9749 # since we already stored this token, we must unstore it
9750 unstore_token_to_go();
9752 # then output the line
9753 output_line_to_go();
9755 # and now store this token at the start of a new line
9756 store_token_to_go($side_comment_follows);
9760 # Now update for side comment
9761 if ($side_comment_follows) { $no_internal_newlines = 1 }
9763 # now output this line
9764 unless ($no_internal_newlines) {
9765 output_line_to_go();
9769 elsif ($is_closing_BLOCK) {
9771 # If there is a pending one-line block ..
9772 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9774 # we have to terminate it if..
9777 # it is too long (final length may be different from
9778 # initial estimate). note: must allow 1 space for this token
9779 excess_line_length( $index_start_one_line_block,
9780 $max_index_to_go ) >= 0
9782 # or if it has too many semicolons
9783 || ( $semicolons_before_block_self_destruct == 0
9784 && $last_nonblank_type ne ';' )
9787 destroy_one_line_block();
9791 # put a break before this closing curly brace if appropriate
9792 unless ( $no_internal_newlines
9793 || $index_start_one_line_block != UNDEFINED_INDEX )
9796 # add missing semicolon if ...
9797 # there are some tokens
9799 ( $max_index_to_go > 0 )
9801 # and we don't have one
9802 && ( $last_nonblank_type ne ';' )
9804 # patch until some block type issues are fixed:
9805 # Do not add semi-colon for block types '{',
9806 # '}', and ';' because we cannot be sure yet
9807 # that this is a block and not an anonymous
9808 # hash (blktype.t, blktype1.t)
9809 && ( $block_type !~ /^[\{\};]$/ )
9811 # patch: and do not add semi-colons for recently
9812 # added block types (see tmp/semicolon.t)
9814 /^(switch|case|given|when|default)$/ )
9816 # it seems best not to add semicolons in these
9817 # special block types: sort|map|grep
9818 && ( !$is_sort_map_grep{$block_type} )
9820 # and we are allowed to do so.
9821 && $rOpts->{'add-semicolons'}
9825 save_current_token();
9828 $level = $levels_to_go[$max_index_to_go];
9829 $slevel = $nesting_depth_to_go[$max_index_to_go];
9831 $nesting_blocks_to_go[$max_index_to_go];
9832 $ci_level = $ci_levels_to_go[$max_index_to_go];
9834 $container_type = "";
9835 $container_environment = "";
9836 $type_sequence = "";
9838 # Note - we remove any blank AFTER extracting its
9839 # parameters such as level, etc, above
9840 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9841 unstore_token_to_go();
9843 store_token_to_go();
9845 note_added_semicolon();
9846 restore_current_token();
9849 # then write out everything before this closing curly brace
9850 output_line_to_go();
9854 # Now update for side comment
9855 if ($side_comment_follows) { $no_internal_newlines = 1 }
9857 # store the closing curly brace
9858 store_token_to_go();
9860 # ok, we just stored a closing curly brace. Often, but
9861 # not always, we want to end the line immediately.
9862 # So now we have to check for special cases.
9864 # if this '}' successfully ends a one-line block..
9865 my $is_one_line_block = 0;
9867 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9869 # Remember the type of token just before the
9870 # opening brace. It would be more general to use
9871 # a stack, but this will work for one-line blocks.
9872 $is_one_line_block =
9873 $types_to_go[$index_start_one_line_block];
9875 # we have to actually make it by removing tentative
9876 # breaks that were set within it
9877 undo_forced_breakpoint_stack(0);
9878 set_nobreaks( $index_start_one_line_block,
9879 $max_index_to_go - 1 );
9881 # then re-initialize for the next one-line block
9882 destroy_one_line_block();
9884 # then decide if we want to break after the '}' ..
9885 # We will keep going to allow certain brace followers as in:
9886 # do { $ifclosed = 1; last } unless $losing;
9888 # But make a line break if the curly ends a
9889 # significant block:
9891 $is_block_without_semicolon{$block_type}
9893 # if needless semicolon follows we handle it later
9894 && $next_nonblank_token ne ';'
9897 output_line_to_go() unless ($no_internal_newlines);
9901 # set string indicating what we need to look for brace follower
9903 if ( $block_type eq 'do' ) {
9904 $rbrace_follower = \%is_do_follower;
9906 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
9907 $rbrace_follower = \%is_if_brace_follower;
9909 elsif ( $block_type eq 'else' ) {
9910 $rbrace_follower = \%is_else_brace_follower;
9913 # added eval for borris.t
9914 elsif ($is_sort_map_grep_eval{$block_type}
9915 || $is_one_line_block eq 'G' )
9917 $rbrace_follower = undef;
9922 elsif ( $block_type =~ /^sub\W*$/ ) {
9924 if ($is_one_line_block) {
9925 $rbrace_follower = \%is_anon_sub_1_brace_follower;
9928 $rbrace_follower = \%is_anon_sub_brace_follower;
9932 # None of the above: specify what can follow a closing
9933 # brace of a block which is not an
9934 # if/elsif/else/do/sort/map/grep/eval
9936 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
9938 $rbrace_follower = \%is_other_brace_follower;
9941 # See if an elsif block is followed by another elsif or else;
9943 if ( $block_type eq 'elsif' ) {
9945 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
9946 $looking_for_else = 1; # ok, check on next line
9950 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
9951 write_logfile_entry("No else block :(\n");
9956 # keep going after certain block types (map,sort,grep,eval)
9957 # added eval for borris.t
9963 # if no more tokens, postpone decision until re-entring
9964 elsif ( ( $next_nonblank_token_type eq 'b' )
9965 && $rOpts_add_newlines )
9967 unless ($rbrace_follower) {
9968 output_line_to_go() unless ($no_internal_newlines);
9972 elsif ($rbrace_follower) {
9974 unless ( $rbrace_follower->{$next_nonblank_token} ) {
9975 output_line_to_go() unless ($no_internal_newlines);
9977 $rbrace_follower = undef;
9981 output_line_to_go() unless ($no_internal_newlines);
9984 } # end treatment of closing block token
9987 elsif ( $type eq ';' ) {
9989 # kill one-line blocks with too many semicolons
9990 $semicolons_before_block_self_destruct--;
9992 ( $semicolons_before_block_self_destruct < 0 )
9993 || ( $semicolons_before_block_self_destruct == 0
9994 && $next_nonblank_token_type !~ /^[b\}]$/ )
9997 destroy_one_line_block();
10000 # Remove unnecessary semicolons, but not after bare
10001 # blocks, where it could be unsafe if the brace is
10005 $last_nonblank_token eq '}'
10007 $is_block_without_semicolon{
10008 $last_nonblank_block_type}
10009 || $last_nonblank_block_type =~ /^sub\s+\w/
10010 || $last_nonblank_block_type =~ /^\w+:$/ )
10012 || $last_nonblank_type eq ';'
10017 $rOpts->{'delete-semicolons'}
10019 # don't delete ; before a # because it would promote it
10020 # to a block comment
10021 && ( $next_nonblank_token_type ne '#' )
10024 note_deleted_semicolon();
10025 output_line_to_go()
10026 unless ( $no_internal_newlines
10027 || $index_start_one_line_block != UNDEFINED_INDEX );
10031 write_logfile_entry("Extra ';'\n");
10034 store_token_to_go();
10036 output_line_to_go()
10037 unless ( $no_internal_newlines
10038 || ( $rOpts_keep_interior_semicolons && $j < $jmax )
10039 || ( $next_nonblank_token eq '}' ) );
10043 # handle here_doc target string
10044 elsif ( $type eq 'h' ) {
10045 $no_internal_newlines =
10046 1; # no newlines after seeing here-target
10047 destroy_one_line_block();
10048 store_token_to_go();
10051 # handle all other token types
10054 # if this is a blank...
10055 if ( $type eq 'b' ) {
10057 # make it just one character
10058 $token = ' ' if $rOpts_add_whitespace;
10060 # delete it if unwanted by whitespace rules
10061 # or we are deleting all whitespace
10062 my $ws = $$rwhite_space_flag[ $j + 1 ];
10063 if ( ( defined($ws) && $ws == -1 )
10064 || $rOpts_delete_old_whitespace )
10067 # unless it might make a syntax error
10069 unless is_essential_whitespace(
10070 $last_last_nonblank_token,
10071 $last_last_nonblank_type,
10072 $tokens_to_go[$max_index_to_go],
10073 $types_to_go[$max_index_to_go],
10074 $$rtokens[ $j + 1 ],
10075 $$rtoken_type[ $j + 1 ]
10079 store_token_to_go();
10082 # remember two previous nonblank OUTPUT tokens
10083 if ( $type ne '#' && $type ne 'b' ) {
10084 $last_last_nonblank_token = $last_nonblank_token;
10085 $last_last_nonblank_type = $last_nonblank_type;
10086 $last_nonblank_token = $token;
10087 $last_nonblank_type = $type;
10088 $last_nonblank_block_type = $block_type;
10091 # unset the continued-quote flag since it only applies to the
10092 # first token, and we want to resume normal formatting if
10093 # there are additional tokens on the line
10094 $in_continued_quote = 0;
10096 } # end of loop over all tokens in this 'line_of_tokens'
10098 # we have to flush ..
10101 # if there is a side comment
10102 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
10104 # if this line ends in a quote
10105 # NOTE: This is critically important for insuring that quoted lines
10106 # do not get processed by things like -sot and -sct
10109 # if this is a VERSION statement
10110 || $is_VERSION_statement
10112 # to keep a label at the end of a line
10115 # if we are instructed to keep all old line breaks
10116 || !$rOpts->{'delete-old-newlines'}
10119 destroy_one_line_block();
10120 output_line_to_go();
10123 # mark old line breakpoints in current output stream
10124 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
10125 $old_breakpoint_to_go[$max_index_to_go] = 1;
10127 } ## end sub print_line_of_tokens
10128 } ## end block print_line_of_tokens
10130 # sub output_line_to_go sends one logical line of tokens on down the
10131 # pipeline to the VerticalAligner package, breaking the line into continuation
10132 # lines as necessary. The line of tokens is ready to go in the "to_go"
10134 sub output_line_to_go {
10136 # debug stuff; this routine can be called from many points
10137 FORMATTER_DEBUG_FLAG_OUTPUT && do {
10138 my ( $a, $b, $c ) = caller;
10140 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
10142 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
10143 write_diagnostics("$output_str\n");
10146 # just set a tentative breakpoint if we might be in a one-line block
10147 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10148 set_forced_breakpoint($max_index_to_go);
10152 my $cscw_block_comment;
10153 $cscw_block_comment = add_closing_side_comment()
10154 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
10156 my $comma_arrow_count_contained = match_opening_and_closing_tokens();
10158 # tell the -lp option we are outputting a batch so it can close
10159 # any unfinished items in its stack
10162 # If this line ends in a code block brace, set breaks at any
10163 # previous closing code block braces to breakup a chain of code
10164 # blocks on one line. This is very rare but can happen for
10165 # user-defined subs. For example we might be looking at this:
10166 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
10167 my $saw_good_break = 0; # flag to force breaks even if short line
10170 # looking for opening or closing block brace
10171 $block_type_to_go[$max_index_to_go]
10173 # but not one of these which are never duplicated on a line:
10174 # until|while|for|if|elsif|else
10175 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
10178 my $lev = $nesting_depth_to_go[$max_index_to_go];
10180 # Walk backwards from the end and
10181 # set break at any closing block braces at the same level.
10182 # But quit if we are not in a chain of blocks.
10183 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
10184 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
10185 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
10187 if ( $block_type_to_go[$i] ) {
10188 if ( $tokens_to_go[$i] eq '}' ) {
10189 set_forced_breakpoint($i);
10190 $saw_good_break = 1;
10194 # quit if we see anything besides words, function, blanks
10196 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
10201 my $imax = $max_index_to_go;
10203 # trim any blank tokens
10204 if ( $max_index_to_go >= 0 ) {
10205 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
10206 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
10209 # anything left to write?
10210 if ( $imin <= $imax ) {
10212 # add a blank line before certain key types but not after a comment
10213 if ( $last_line_leading_type !~ /^[#]/ ) {
10214 my $want_blank = 0;
10215 my $leading_token = $tokens_to_go[$imin];
10216 my $leading_type = $types_to_go[$imin];
10218 # blank lines before subs except declarations and one-liners
10219 # MCONVERSION LOCATION - for sub tokenization change
10220 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
10221 $want_blank = $rOpts->{'blank-lines-before-subs'}
10223 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10224 $imax ) !~ /^[\;\}]$/
10228 # break before all package declarations
10229 # MCONVERSION LOCATION - for tokenizaton change
10230 elsif ($leading_token =~ /^(package\s)/
10231 && $leading_type eq 'i' )
10233 $want_blank = $rOpts->{'blank-lines-before-packages'};
10236 # break before certain key blocks except one-liners
10237 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
10238 $want_blank = $rOpts->{'blank-lines-before-subs'}
10240 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10245 # Break before certain block types if we haven't had a
10246 # break at this level for a while. This is the
10247 # difficult decision..
10248 elsif ($leading_type eq 'k'
10249 && $last_line_leading_type ne 'b'
10250 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
10252 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
10253 if ( !defined($lc) ) { $lc = 0 }
10256 $rOpts->{'blanks-before-blocks'}
10257 && $lc >= $rOpts->{'long-block-line-count'}
10258 && $file_writer_object->get_consecutive_nonblank_lines() >=
10259 $rOpts->{'long-block-line-count'}
10261 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10268 # future: send blank line down normal path to VerticalAligner
10269 Perl::Tidy::VerticalAligner::flush();
10270 $file_writer_object->require_blank_code_lines($want_blank);
10274 # update blank line variables and count number of consecutive
10275 # non-blank, non-comment lines at this level
10276 $last_last_line_leading_level = $last_line_leading_level;
10277 $last_line_leading_level = $levels_to_go[$imin];
10278 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
10279 $last_line_leading_type = $types_to_go[$imin];
10280 if ( $last_line_leading_level == $last_last_line_leading_level
10281 && $last_line_leading_type ne 'b'
10282 && $last_line_leading_type ne '#'
10283 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
10285 $nonblank_lines_at_depth[$last_line_leading_level]++;
10288 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
10291 FORMATTER_DEBUG_FLAG_FLUSH && do {
10292 my ( $package, $file, $line ) = caller;
10294 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
10297 # add a couple of extra terminal blank tokens
10300 # set all forced breakpoints for good list formatting
10301 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
10305 || $old_line_count_in_batch > 1
10307 # must always call scan_list() with unbalanced batches because it
10308 # is maintaining some stacks
10309 || is_unbalanced_batch()
10311 # call scan_list if we might want to break at commas
10313 $comma_count_in_batch
10314 && ( $rOpts_maximum_fields_per_table > 0
10315 || $rOpts_comma_arrow_breakpoints == 0 )
10318 # call scan_list if user may want to break open some one-line
10320 || ( $comma_arrow_count_contained
10321 && $rOpts_comma_arrow_breakpoints != 3 )
10324 ## This caused problems in one version of perl for unknown reasons:
10325 ## $saw_good_break ||= scan_list();
10326 my $sgb = scan_list();
10327 $saw_good_break ||= $sgb;
10330 # let $ri_first and $ri_last be references to lists of
10331 # first and last tokens of line fragments to output..
10332 my ( $ri_first, $ri_last );
10334 # write a single line if..
10337 # we aren't allowed to add any newlines
10338 !$rOpts_add_newlines
10340 # or, we don't already have an interior breakpoint
10341 # and we didn't see a good breakpoint
10343 !$forced_breakpoint_count
10344 && !$saw_good_break
10346 # and this line is 'short'
10351 @$ri_first = ($imin);
10352 @$ri_last = ($imax);
10355 # otherwise use multiple lines
10358 ( $ri_first, $ri_last, my $colon_count ) =
10359 set_continuation_breaks($saw_good_break);
10361 break_all_chain_tokens( $ri_first, $ri_last );
10363 break_equals( $ri_first, $ri_last );
10365 # now we do a correction step to clean this up a bit
10366 # (The only time we would not do this is for debugging)
10367 if ( $rOpts->{'recombine'} ) {
10368 ( $ri_first, $ri_last ) =
10369 recombine_breakpoints( $ri_first, $ri_last );
10372 insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
10375 # do corrector step if -lp option is used
10376 my $do_not_pad = 0;
10377 if ($rOpts_line_up_parentheses) {
10378 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
10380 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
10382 prepare_for_new_input_lines();
10384 # output any new -cscw block comment
10385 if ($cscw_block_comment) {
10387 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
10391 sub note_added_semicolon {
10392 $last_added_semicolon_at = $input_line_number;
10393 if ( $added_semicolon_count == 0 ) {
10394 $first_added_semicolon_at = $last_added_semicolon_at;
10396 $added_semicolon_count++;
10397 write_logfile_entry("Added ';' here\n");
10400 sub note_deleted_semicolon {
10401 $last_deleted_semicolon_at = $input_line_number;
10402 if ( $deleted_semicolon_count == 0 ) {
10403 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
10405 $deleted_semicolon_count++;
10406 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
10409 sub note_embedded_tab {
10410 $embedded_tab_count++;
10411 $last_embedded_tab_at = $input_line_number;
10412 if ( !$first_embedded_tab_at ) {
10413 $first_embedded_tab_at = $last_embedded_tab_at;
10416 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
10417 write_logfile_entry("Embedded tabs in quote or pattern\n");
10421 sub starting_one_line_block {
10423 # after seeing an opening curly brace, look for the closing brace
10424 # and see if the entire block will fit on a line. This routine is
10425 # not always right because it uses the old whitespace, so a check
10426 # is made later (at the closing brace) to make sure we really
10427 # have a one-line block. We have to do this preliminary check,
10428 # though, because otherwise we would always break at a semicolon
10429 # within a one-line block if the block contains multiple statements.
10431 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
10435 # kill any current block - we can only go 1 deep
10436 destroy_one_line_block();
10439 # 1=distance from start of block to opening brace exceeds line length
10444 # shouldn't happen: there must have been a prior call to
10445 # store_token_to_go to put the opening brace in the output stream
10446 if ( $max_index_to_go < 0 ) {
10447 warning("program bug: store_token_to_go called incorrectly\n");
10448 report_definite_bug();
10452 # cannot use one-line blocks with cuddled else/elsif lines
10453 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
10458 my $block_type = $$rblock_type[$j];
10460 # find the starting keyword for this block (such as 'if', 'else', ...)
10462 if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
10463 $i_start = $max_index_to_go;
10466 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
10468 # For something like "if (xxx) {", the keyword "if" will be
10469 # just after the most recent break. This will be 0 unless
10470 # we have just killed a one-line block and are starting another.
10472 # Note: cannot use inext_index_to_go[] here because that array
10473 # is still being constructed.
10474 $i_start = $index_max_forced_break + 1;
10475 if ( $types_to_go[$i_start] eq 'b' ) {
10479 unless ( $tokens_to_go[$i_start] eq $block_type ) {
10484 # the previous nonblank token should start these block types
10485 elsif (( $last_last_nonblank_token_to_go eq $block_type )
10486 || ( $block_type =~ /^sub/ ) )
10488 $i_start = $last_last_nonblank_index_to_go;
10491 # patch for SWITCH/CASE to retain one-line case/when blocks
10492 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
10494 # Note: cannot use inext_index_to_go[] here because that array
10495 # is still being constructed.
10496 $i_start = $index_max_forced_break + 1;
10497 if ( $types_to_go[$i_start] eq 'b' ) {
10500 unless ( $tokens_to_go[$i_start] eq $block_type ) {
10509 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
10513 # see if length is too long to even start
10514 if ( $pos > maximum_line_length($i_start) ) {
10518 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
10520 # old whitespace could be arbitrarily large, so don't use it
10521 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
10522 else { $pos += rtoken_length($i) }
10524 # Return false result if we exceed the maximum line length,
10525 if ( $pos > maximum_line_length($i_start) ) {
10529 # or encounter another opening brace before finding the closing brace.
10530 elsif ($$rtokens[$i] eq '{'
10531 && $$rtoken_type[$i] eq '{'
10532 && $$rblock_type[$i] )
10537 # if we find our closing brace..
10538 elsif ($$rtokens[$i] eq '}'
10539 && $$rtoken_type[$i] eq '}'
10540 && $$rblock_type[$i] )
10543 # be sure any trailing comment also fits on the line
10545 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
10547 # Patch for one-line sort/map/grep/eval blocks with side comments:
10548 # We will ignore the side comment length for sort/map/grep/eval
10549 # because this can lead to statements which change every time
10550 # perltidy is run. Here is an example from Denis Moskowitz which
10551 # oscillates between these two states without this patch:
10554 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10558 ## $_->foo ne 'bar'
10559 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10563 # When the first line is input it gets broken apart by the main
10564 # line break logic in sub print_line_of_tokens.
10565 # When the second line is input it gets recombined by
10566 # print_line_of_tokens and passed to the output routines. The
10567 # output routines (set_continuation_breaks) do not break it apart
10568 # because the bond strengths are set to the highest possible value
10569 # for grep/map/eval/sort blocks, so the first version gets output.
10570 # It would be possible to fix this by changing bond strengths,
10571 # but they are high to prevent errors in older versions of perl.
10573 if ( $$rtoken_type[$i_nonblank] eq '#'
10574 && !$is_sort_map_grep{$block_type} )
10577 $pos += rtoken_length($i_nonblank);
10579 if ( $i_nonblank > $i + 1 ) {
10581 # source whitespace could be anything, assume
10582 # at least one space before the hash on output
10583 if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 }
10584 else { $pos += rtoken_length( $i + 1 ) }
10587 if ( $pos >= maximum_line_length($i_start) ) {
10592 # ok, it's a one-line block
10593 create_one_line_block( $i_start, 20 );
10597 # just keep going for other characters
10602 # Allow certain types of new one-line blocks to form by joining
10603 # input lines. These can be safely done, but for other block types,
10604 # we keep old one-line blocks but do not form new ones. It is not
10605 # always a good idea to make as many one-line blocks as possible,
10606 # so other types are not done. The user can always use -mangle.
10607 if ( $is_sort_map_grep_eval{$block_type} ) {
10608 create_one_line_block( $i_start, 1 );
10614 sub unstore_token_to_go {
10616 # remove most recent token from output stream
10617 if ( $max_index_to_go > 0 ) {
10618 $max_index_to_go--;
10621 $max_index_to_go = UNDEFINED_INDEX;
10626 sub want_blank_line {
10628 $file_writer_object->want_blank_line();
10631 sub write_unindented_line {
10633 $file_writer_object->write_line( $_[0] );
10638 # Undo continuation indentation in certain sequences
10639 # For example, we can undo continuation indentation in sort/map/grep chains
10640 # my $dat1 = pack( "n*",
10641 # map { $_, $lookup->{$_} }
10642 # sort { $a <=> $b }
10643 # grep { $lookup->{$_} ne $default } keys %$lookup );
10644 # To align the map/sort/grep keywords like this:
10645 # my $dat1 = pack( "n*",
10646 # map { $_, $lookup->{$_} }
10647 # sort { $a <=> $b }
10648 # grep { $lookup->{$_} ne $default } keys %$lookup );
10649 my ( $ri_first, $ri_last ) = @_;
10650 my ( $line_1, $line_2, $lev_last );
10651 my $this_line_is_semicolon_terminated;
10652 my $max_line = @$ri_first - 1;
10654 # looking at each line of this batch..
10655 # We are looking at leading tokens and looking for a sequence
10656 # all at the same level and higher level than enclosing lines.
10657 foreach my $line ( 0 .. $max_line ) {
10659 my $ibeg = $$ri_first[$line];
10660 my $lev = $levels_to_go[$ibeg];
10663 # if we have started a chain..
10666 # see if it continues..
10667 if ( $lev == $lev_last ) {
10668 if ( $types_to_go[$ibeg] eq 'k'
10669 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10672 # chain continues...
10673 # check for chain ending at end of a statement
10674 if ( $line == $max_line ) {
10676 # see of this line ends a statement
10677 my $iend = $$ri_last[$line];
10678 $this_line_is_semicolon_terminated =
10679 $types_to_go[$iend] eq ';'
10681 # with possible side comment
10682 || ( $types_to_go[$iend] eq '#'
10683 && $iend - $ibeg >= 2
10684 && $types_to_go[ $iend - 2 ] eq ';'
10685 && $types_to_go[ $iend - 1 ] eq 'b' );
10687 $line_2 = $line if ($this_line_is_semicolon_terminated);
10695 elsif ( $lev < $lev_last ) {
10697 # chain ends with previous line
10698 $line_2 = $line - 1;
10700 elsif ( $lev > $lev_last ) {
10706 # undo the continuation indentation if a chain ends
10707 if ( defined($line_2) && defined($line_1) ) {
10708 my $continuation_line_count = $line_2 - $line_1 + 1;
10709 @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10710 (0) x ($continuation_line_count);
10711 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10712 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
10717 # not in a chain yet..
10720 # look for start of a new sort/map/grep chain
10721 if ( $lev > $lev_last ) {
10722 if ( $types_to_go[$ibeg] eq 'k'
10723 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10736 # If there is a single, long parameter within parens, like this:
10738 # $self->command( "/msg "
10739 # . $infoline->chan
10740 # . " You said $1, but did you know that it's square was "
10741 # . $1 * $1 . " ?" );
10743 # we can remove the continuation indentation of the 2nd and higher lines
10744 # to achieve this effect, which is more pleasing:
10746 # $self->command("/msg "
10747 # . $infoline->chan
10748 # . " You said $1, but did you know that it's square was "
10749 # . $1 * $1 . " ?");
10751 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
10752 my $max_line = @$ri_first - 1;
10754 # must be multiple lines
10755 return unless $max_line > $line_open;
10757 my $lev_start = $levels_to_go[$i_start];
10758 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
10760 # see if all additional lines in this container have continuation
10763 my $line_1 = 1 + $line_open;
10764 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
10765 my $ibeg = $$ri_first[$n];
10766 my $iend = $$ri_last[$n];
10767 if ( $ibeg eq $closing_index ) { $n--; last }
10768 return if ( $lev_start != $levels_to_go[$ibeg] );
10769 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
10770 last if ( $closing_index <= $iend );
10773 # we can reduce the indentation of all continuation lines
10774 my $continuation_line_count = $n - $line_open;
10775 @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10776 (0) x ($continuation_line_count);
10777 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10778 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
10783 # insert $pad_spaces before token number $ipad
10784 my ( $ipad, $pad_spaces ) = @_;
10785 if ( $pad_spaces > 0 ) {
10786 $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
10788 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
10789 $tokens_to_go[$ipad] = "";
10797 $token_lengths_to_go[$ipad] += $pad_spaces;
10798 for ( my $i = $ipad ; $i <= $max_index_to_go ; $i++ ) {
10799 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
10808 @_ = qw( + - * / );
10809 @is_math_op{@_} = (1) x scalar(@_);
10812 sub set_logical_padding {
10814 # Look at a batch of lines and see if extra padding can improve the
10815 # alignment when there are certain leading operators. Here is an
10816 # example, in which some extra space is introduced before
10817 # '( $year' to make it line up with the subsequent lines:
10819 # if ( ( $Year < 1601 )
10820 # || ( $Year > 2899 )
10821 # || ( $EndYear < 1601 )
10822 # || ( $EndYear > 2899 ) )
10824 # &Error_OutOfRange;
10827 my ( $ri_first, $ri_last ) = @_;
10828 my $max_line = @$ri_first - 1;
10830 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line,
10832 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
10834 # looking at each line of this batch..
10835 foreach $line ( 0 .. $max_line - 1 ) {
10837 # see if the next line begins with a logical operator
10838 $ibeg = $$ri_first[$line];
10839 $iend = $$ri_last[$line];
10840 $ibeg_next = $$ri_first[ $line + 1 ];
10841 $tok_next = $tokens_to_go[$ibeg_next];
10842 $type_next = $types_to_go[$ibeg_next];
10844 $has_leading_op_next = ( $tok_next =~ /^\w/ )
10845 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
10846 : $is_chain_operator{$type_next}; # and, or
10848 next unless ($has_leading_op_next);
10850 # next line must not be at lesser depth
10852 if ( $nesting_depth_to_go[$ibeg] >
10853 $nesting_depth_to_go[$ibeg_next] );
10855 # identify the token in this line to be padded on the left
10858 # handle lines at same depth...
10859 if ( $nesting_depth_to_go[$ibeg] ==
10860 $nesting_depth_to_go[$ibeg_next] )
10863 # if this is not first line of the batch ...
10866 # and we have leading operator..
10867 next if $has_leading_op;
10869 # Introduce padding if..
10870 # 1. the previous line is at lesser depth, or
10871 # 2. the previous line ends in an assignment
10872 # 3. the previous line ends in a 'return'
10873 # 4. the previous line ends in a comma
10874 # Example 1: previous line at lesser depth
10875 # if ( ( $Year < 1601 ) # <- we are here but
10876 # || ( $Year > 2899 ) # list has not yet
10877 # || ( $EndYear < 1601 ) # collapsed vertically
10878 # || ( $EndYear > 2899 ) )
10881 # Example 2: previous line ending in assignment:
10883 # $year % 4 ? 0 # <- We are here
10884 # : $year % 100 ? 1
10885 # : $year % 400 ? 0
10888 # Example 3: previous line ending in comma:
10895 # be sure levels agree (do not indent after an indented 'if')
10897 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
10899 # allow padding on first line after a comma but only if:
10900 # (1) this is line 2 and
10901 # (2) there are at more than three lines and
10902 # (3) lines 3 and 4 have the same leading operator
10903 # These rules try to prevent padding within a long
10904 # comma-separated list.
10906 if ( $types_to_go[$iendm] eq ','
10910 my $ibeg_next_next = $$ri_first[ $line + 2 ];
10911 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
10912 $ok_comma = $tok_next_next eq $tok_next;
10917 $is_assignment{ $types_to_go[$iendm] }
10919 || ( $nesting_depth_to_go[$ibegm] <
10920 $nesting_depth_to_go[$ibeg] )
10921 || ( $types_to_go[$iendm] eq 'k'
10922 && $tokens_to_go[$iendm] eq 'return' )
10925 # we will add padding before the first token
10929 # for first line of the batch..
10932 # WARNING: Never indent if first line is starting in a
10933 # continued quote, which would change the quote.
10934 next if $starting_in_quote;
10936 # if this is text after closing '}'
10937 # then look for an interior token to pad
10938 if ( $types_to_go[$ibeg] eq '}' ) {
10942 # otherwise, we might pad if it looks really good
10945 # we might pad token $ibeg, so be sure that it
10946 # is at the same depth as the next line.
10948 if ( $nesting_depth_to_go[$ibeg] !=
10949 $nesting_depth_to_go[$ibeg_next] );
10951 # We can pad on line 1 of a statement if at least 3
10952 # lines will be aligned. Otherwise, it
10953 # can look very confusing.
10955 # We have to be careful not to pad if there are too few
10956 # lines. The current rule is:
10957 # (1) in general we require at least 3 consecutive lines
10958 # with the same leading chain operator token,
10959 # (2) but an exception is that we only require two lines
10960 # with leading colons if there are no more lines. For example,
10961 # the first $i in the following snippet would get padding
10962 # by the second rule:
10964 # $i == 1 ? ( "First", "Color" )
10965 # : $i == 2 ? ( "Then", "Rarity" )
10966 # : ( "Then", "Name" );
10968 if ( $max_line > 1 ) {
10969 my $leading_token = $tokens_to_go[$ibeg_next];
10972 # never indent line 1 of a '.' series because
10973 # previous line is most likely at same level.
10974 # TODO: we should also look at the leasing_spaces
10975 # of the last output line and skip if it is same
10977 next if ( $leading_token eq '.' );
10980 foreach my $l ( 2 .. 3 ) {
10981 last if ( $line + $l > $max_line );
10982 my $ibeg_next_next = $$ri_first[ $line + $l ];
10983 if ( $tokens_to_go[$ibeg_next_next] ne
10986 $tokens_differ = 1;
10991 next if ($tokens_differ);
10992 next if ( $count < 3 && $leading_token ne ':' );
11002 # find interior token to pad if necessary
11003 if ( !defined($ipad) ) {
11005 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
11007 # find any unclosed container
11009 unless ( $type_sequence_to_go[$i]
11010 && $mate_index_to_go[$i] > $iend );
11012 # find next nonblank token to pad
11013 $ipad = $inext_to_go[$i];
11014 last if ( $ipad > $iend );
11019 # We cannot pad a leading token at the lowest level because
11020 # it could cause a bug in which the starting indentation
11021 # level is guessed incorrectly each time the code is run
11022 # though perltidy, thus causing the code to march off to
11023 # the right. For example, the following snippet would have
11026 ## ov_method mycan( $package, '(""' ), $package
11027 ## or ov_method mycan( $package, '(0+' ), $package
11028 ## or ov_method mycan( $package, '(bool' ), $package
11029 ## or ov_method mycan( $package, '(nomethod' ), $package;
11031 # If this snippet is within a block this won't happen
11032 # unless the user just processes the snippet alone within
11033 # an editor. In that case either the user will see and
11034 # fix the problem or it will be corrected next time the
11035 # entire file is processed with perltidy.
11036 next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
11038 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
11039 ## IT DID MORE HARM THAN GOOD
11041 ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
11044 ##? # do not put leading padding for just 2 lines of math
11045 ##? if ( $ipad == $ibeg
11047 ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
11048 ##? && $is_math_op{$type_next}
11049 ##? && $line + 2 <= $max_line )
11051 ##? my $ibeg_next_next = $$ri_first[ $line + 2 ];
11052 ##? my $type_next_next = $types_to_go[$ibeg_next_next];
11053 ##? next if !$is_math_op{$type_next_next};
11056 # next line must not be at greater depth
11057 my $iend_next = $$ri_last[ $line + 1 ];
11059 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
11060 $nesting_depth_to_go[$ipad] );
11062 # lines must be somewhat similar to be padded..
11063 my $inext_next = $inext_to_go[$ibeg_next];
11064 my $type = $types_to_go[$ipad];
11065 my $type_next = $types_to_go[ $ipad + 1 ];
11067 # see if there are multiple continuation lines
11068 my $logical_continuation_lines = 1;
11069 if ( $line + 2 <= $max_line ) {
11070 my $leading_token = $tokens_to_go[$ibeg_next];
11071 my $ibeg_next_next = $$ri_first[ $line + 2 ];
11072 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
11073 && $nesting_depth_to_go[$ibeg_next] eq
11074 $nesting_depth_to_go[$ibeg_next_next] )
11076 $logical_continuation_lines++;
11080 # see if leading types match
11081 my $types_match = $types_to_go[$inext_next] eq $type;
11082 my $matches_without_bang;
11084 # if first line has leading ! then compare the following token
11085 if ( !$types_match && $type eq '!' ) {
11086 $types_match = $matches_without_bang =
11087 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
11092 # either we have multiple continuation lines to follow
11093 # and we are not padding the first token
11094 ( $logical_continuation_lines > 1 && $ipad > 0 )
11102 # and keywords must match if keyword
11105 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
11111 #----------------------begin special checks--------------
11114 # A check is needed before we can make the pad.
11115 # If we are in a list with some long items, we want each
11116 # item to stand out. So in the following example, the
11117 # first line beginning with '$casefold->' would look good
11118 # padded to align with the next line, but then it
11119 # would be indented more than the last line, so we
11123 # $casefold->{code} eq '0041'
11124 # && $casefold->{status} eq 'C'
11125 # && $casefold->{mapping} eq '0061',
11130 # It would be faster, and almost as good, to use a comma
11131 # count, and not pad if comma_count > 1 and the previous
11132 # line did not end with a comma.
11136 my $ibg = $$ri_first[ $line + 1 ];
11137 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
11139 # just use simplified formula for leading spaces to avoid
11140 # needless sub calls
11141 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
11143 # look at each line beyond the next ..
11145 foreach $l ( $line + 2 .. $max_line ) {
11146 my $ibg = $$ri_first[$l];
11148 # quit looking at the end of this container
11150 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
11151 || ( $nesting_depth_to_go[$ibg] < $depth );
11153 # cannot do the pad if a later line would be
11155 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
11161 # don't pad if we end in a broken list
11162 if ( $l == $max_line ) {
11163 my $i2 = $$ri_last[$l];
11164 if ( $types_to_go[$i2] eq '#' ) {
11165 my $i1 = $$ri_first[$l];
11168 terminal_type( \@types_to_go, \@block_type_to_go,
11175 # a minus may introduce a quoted variable, and we will
11176 # add the pad only if this line begins with a bare word,
11177 # such as for the word 'Button' here:
11179 # Button => "Print letter \"~$_\"",
11180 # -command => [ sub { print "$_[0]\n" }, $_ ],
11181 # -accelerator => "Meta+$_"
11184 # On the other hand, if 'Button' is quoted, it looks best
11187 # 'Button' => "Print letter \"~$_\"",
11188 # -command => [ sub { print "$_[0]\n" }, $_ ],
11189 # -accelerator => "Meta+$_"
11191 if ( $types_to_go[$ibeg_next] eq 'm' ) {
11192 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
11195 next unless $ok_to_pad;
11197 #----------------------end special check---------------
11199 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
11200 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
11201 $pad_spaces = $length_2 - $length_1;
11203 # If the first line has a leading ! and the second does
11204 # not, then remove one space to try to align the next
11205 # leading characters, which are often the same. For example:
11207 # || $ts == $self->Holder
11208 # || $self->Holder->Type eq "Arena" )
11210 # This usually helps readability, but if there are subsequent
11211 # ! operators things will still get messed up. For example:
11213 # if ( !exists $Net::DNS::typesbyname{$qtype}
11214 # && exists $Net::DNS::classesbyname{$qtype}
11215 # && !exists $Net::DNS::classesbyname{$qclass}
11216 # && exists $Net::DNS::typesbyname{$qclass} )
11217 # We can't fix that.
11218 if ($matches_without_bang) { $pad_spaces-- }
11220 # make sure this won't change if -lp is used
11221 my $indentation_1 = $leading_spaces_to_go[$ibeg];
11222 if ( ref($indentation_1) ) {
11223 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
11224 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
11225 unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 )
11232 # we might be able to handle a pad of -1 by removing a blank
11234 if ( $pad_spaces < 0 ) {
11236 if ( $pad_spaces == -1 ) {
11237 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
11239 pad_token( $ipad - 1, $pad_spaces );
11245 # now apply any padding for alignment
11246 if ( $ipad >= 0 && $pad_spaces ) {
11248 my $length_t = total_line_length( $ibeg, $iend );
11249 if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
11251 pad_token( $ipad, $pad_spaces );
11259 $has_leading_op = $has_leading_op_next;
11260 } # end of loop over lines
11265 sub correct_lp_indentation {
11267 # When the -lp option is used, we need to make a last pass through
11268 # each line to correct the indentation positions in case they differ
11269 # from the predictions. This is necessary because perltidy uses a
11270 # predictor/corrector method for aligning with opening parens. The
11271 # predictor is usually good, but sometimes stumbles. The corrector
11272 # tries to patch things up once the actual opening paren locations
11274 my ( $ri_first, $ri_last ) = @_;
11275 my $do_not_pad = 0;
11277 # Note on flag '$do_not_pad':
11278 # We want to avoid a situation like this, where the aligner inserts
11279 # whitespace before the '=' to align it with a previous '=', because
11280 # otherwise the parens might become mis-aligned in a situation like
11281 # this, where the '=' has become aligned with the previous line,
11282 # pushing the opening '(' forward beyond where we want it.
11284 # $mkFloor::currentRoom = '';
11285 # $mkFloor::c_entry = $c->Entry(
11287 # -relief => 'sunken',
11291 # We leave it to the aligner to decide how to do this.
11293 # first remove continuation indentation if appropriate
11294 my $max_line = @$ri_first - 1;
11296 # looking at each line of this batch..
11297 my ( $ibeg, $iend );
11299 foreach $line ( 0 .. $max_line ) {
11300 $ibeg = $$ri_first[$line];
11301 $iend = $$ri_last[$line];
11303 # looking at each token in this output line..
11305 foreach $i ( $ibeg .. $iend ) {
11307 # How many space characters to place before this token
11308 # for special alignment. Actual padding is done in the
11311 # looking for next unvisited indentation item
11312 my $indentation = $leading_spaces_to_go[$i];
11313 if ( !$indentation->get_MARKED() ) {
11314 $indentation->set_MARKED(1);
11316 # looking for indentation item for which we are aligning
11317 # with parens, braces, and brackets
11318 next unless ( $indentation->get_ALIGN_PAREN() );
11320 # skip closed container on this line
11321 if ( $i > $ibeg ) {
11322 my $im = max( $ibeg, $iprev_to_go[$i] );
11323 if ( $type_sequence_to_go[$im]
11324 && $mate_index_to_go[$im] <= $iend )
11330 if ( $line == 1 && $i == $ibeg ) {
11334 # Ok, let's see what the error is and try to fix it
11336 my $predicted_pos = $indentation->get_SPACES();
11337 if ( $i > $ibeg ) {
11339 # token is mid-line - use length to previous token
11340 $actual_pos = total_line_length( $ibeg, $i - 1 );
11342 # for mid-line token, we must check to see if all
11343 # additional lines have continuation indentation,
11344 # and remove it if so. Otherwise, we do not get
11346 my $closing_index = $indentation->get_CLOSED();
11347 if ( $closing_index > $iend ) {
11348 my $ibeg_next = $$ri_first[ $line + 1 ];
11349 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
11350 undo_lp_ci( $line, $i, $closing_index, $ri_first,
11355 elsif ( $line > 0 ) {
11357 # handle case where token starts a new line;
11358 # use length of previous line
11359 my $ibegm = $$ri_first[ $line - 1 ];
11360 my $iendm = $$ri_last[ $line - 1 ];
11361 $actual_pos = total_line_length( $ibegm, $iendm );
11365 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
11369 # token is first character of first line of batch
11370 $actual_pos = $predicted_pos;
11373 my $move_right = $actual_pos - $predicted_pos;
11375 # done if no error to correct (gnu2.t)
11376 if ( $move_right == 0 ) {
11377 $indentation->set_RECOVERABLE_SPACES($move_right);
11381 # if we have not seen closure for this indentation in
11382 # this batch, we can only pass on a request to the
11384 my $closing_index = $indentation->get_CLOSED();
11386 if ( $closing_index < 0 ) {
11387 $indentation->set_RECOVERABLE_SPACES($move_right);
11391 # If necessary, look ahead to see if there is really any
11392 # leading whitespace dependent on this whitespace, and
11393 # also find the longest line using this whitespace.
11394 # Since it is always safe to move left if there are no
11395 # dependents, we only need to do this if we may have
11396 # dependent nodes or need to move right.
11398 my $right_margin = 0;
11399 my $have_child = $indentation->get_HAVE_CHILD();
11401 my %saw_indentation;
11402 my $line_count = 1;
11403 $saw_indentation{$indentation} = $indentation;
11405 if ( $have_child || $move_right > 0 ) {
11407 my $max_length = 0;
11408 if ( $i == $ibeg ) {
11409 $max_length = total_line_length( $ibeg, $iend );
11412 # look ahead at the rest of the lines of this batch..
11414 foreach $line_t ( $line + 1 .. $max_line ) {
11415 my $ibeg_t = $$ri_first[$line_t];
11416 my $iend_t = $$ri_last[$line_t];
11417 last if ( $closing_index <= $ibeg_t );
11419 # remember all different indentation objects
11420 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
11421 $saw_indentation{$indentation_t} = $indentation_t;
11424 # remember longest line in the group
11425 my $length_t = total_line_length( $ibeg_t, $iend_t );
11426 if ( $length_t > $max_length ) {
11427 $max_length = $length_t;
11430 $right_margin = maximum_line_length($ibeg) - $max_length;
11431 if ( $right_margin < 0 ) { $right_margin = 0 }
11434 my $first_line_comma_count =
11435 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
11436 my $comma_count = $indentation->get_COMMA_COUNT();
11437 my $arrow_count = $indentation->get_ARROW_COUNT();
11439 # This is a simple approximate test for vertical alignment:
11440 # if we broke just after an opening paren, brace, bracket,
11441 # and there are 2 or more commas in the first line,
11442 # and there are no '=>'s,
11443 # then we are probably vertically aligned. We could set
11444 # an exact flag in sub scan_list, but this is good
11446 my $indentation_count = keys %saw_indentation;
11447 my $is_vertically_aligned =
11449 && $first_line_comma_count > 1
11450 && $indentation_count == 1
11451 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
11453 # Make the move if possible ..
11456 # we can always move left
11459 # but we should only move right if we are sure it will
11460 # not spoil vertical alignment
11461 || ( $comma_count == 0 )
11462 || ( $comma_count > 0 && !$is_vertically_aligned )
11466 ( $move_right <= $right_margin )
11470 foreach ( keys %saw_indentation ) {
11471 $saw_indentation{$_}
11472 ->permanently_decrease_AVAILABLE_SPACES( -$move );
11476 # Otherwise, record what we want and the vertical aligner
11477 # will try to recover it.
11479 $indentation->set_RECOVERABLE_SPACES($move_right);
11484 return $do_not_pad;
11487 # flush is called to output any tokens in the pipeline, so that
11488 # an alternate source of lines can be written in the correct order
11491 destroy_one_line_block();
11492 output_line_to_go();
11493 Perl::Tidy::VerticalAligner::flush();
11496 sub reset_block_text_accumulator {
11498 # save text after 'if' and 'elsif' to append after 'else'
11499 if ($accumulating_text_for_block) {
11501 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
11502 push @{$rleading_block_if_elsif_text}, $leading_block_text;
11505 $accumulating_text_for_block = "";
11506 $leading_block_text = "";
11507 $leading_block_text_level = 0;
11508 $leading_block_text_length_exceeded = 0;
11509 $leading_block_text_line_number = 0;
11510 $leading_block_text_line_length = 0;
11513 sub set_block_text_accumulator {
11515 $accumulating_text_for_block = $tokens_to_go[$i];
11516 if ( $accumulating_text_for_block !~ /^els/ ) {
11517 $rleading_block_if_elsif_text = [];
11519 $leading_block_text = "";
11520 $leading_block_text_level = $levels_to_go[$i];
11521 $leading_block_text_line_number =
11522 $vertical_aligner_object->get_output_line_number();
11523 $leading_block_text_length_exceeded = 0;
11525 # this will contain the column number of the last character
11526 # of the closing side comment
11527 $leading_block_text_line_length =
11528 length($csc_last_label) +
11529 length($accumulating_text_for_block) +
11530 length( $rOpts->{'closing-side-comment-prefix'} ) +
11531 $leading_block_text_level * $rOpts_indent_columns + 3;
11534 sub accumulate_block_text {
11537 # accumulate leading text for -csc, ignoring any side comments
11538 if ( $accumulating_text_for_block
11539 && !$leading_block_text_length_exceeded
11540 && $types_to_go[$i] ne '#' )
11543 my $added_length = $token_lengths_to_go[$i];
11544 $added_length += 1 if $i == 0;
11545 my $new_line_length = $leading_block_text_line_length + $added_length;
11547 # we can add this text if we don't exceed some limits..
11550 # we must not have already exceeded the text length limit
11551 length($leading_block_text) <
11552 $rOpts_closing_side_comment_maximum_text
11555 # the new total line length must be below the line length limit
11556 # or the new length must be below the text length limit
11557 # (ie, we may allow one token to exceed the text length limit)
11560 maximum_line_length_for_level($leading_block_text_level)
11562 || length($leading_block_text) + $added_length <
11563 $rOpts_closing_side_comment_maximum_text
11566 # UNLESS: we are adding a closing paren before the brace we seek.
11567 # This is an attempt to avoid situations where the ... to be
11568 # added are longer than the omitted right paren, as in:
11570 # foreach my $item (@a_rather_long_variable_name_here) {
11572 # } ## end foreach my $item (@a_rather_long_variable_name_here...
11575 $tokens_to_go[$i] eq ')'
11578 $i + 1 <= $max_index_to_go
11579 && $block_type_to_go[ $i + 1 ] eq
11580 $accumulating_text_for_block
11582 || ( $i + 2 <= $max_index_to_go
11583 && $block_type_to_go[ $i + 2 ] eq
11584 $accumulating_text_for_block )
11590 # add an extra space at each newline
11591 if ( $i == 0 ) { $leading_block_text .= ' ' }
11593 # add the token text
11594 $leading_block_text .= $tokens_to_go[$i];
11595 $leading_block_text_line_length = $new_line_length;
11598 # show that text was truncated if necessary
11599 elsif ( $types_to_go[$i] ne 'b' ) {
11600 $leading_block_text_length_exceeded = 1;
11601 ## Please see file perltidy.ERR
11602 $leading_block_text .= '...';
11608 my %is_if_elsif_else_unless_while_until_for_foreach;
11612 # These block types may have text between the keyword and opening
11613 # curly. Note: 'else' does not, but must be included to allow trailing
11614 # if/elsif text to be appended.
11615 # patch for SWITCH/CASE: added 'case' and 'when'
11616 @_ = qw(if elsif else unless while until for foreach case when);
11617 @is_if_elsif_else_unless_while_until_for_foreach{@_} =
11621 sub accumulate_csc_text {
11623 # called once per output buffer when -csc is used. Accumulates
11624 # the text placed after certain closing block braces.
11625 # Defines and returns the following for this buffer:
11627 my $block_leading_text = ""; # the leading text of the last '}'
11628 my $rblock_leading_if_elsif_text;
11629 my $i_block_leading_text =
11630 -1; # index of token owning block_leading_text
11631 my $block_line_count = 100; # how many lines the block spans
11632 my $terminal_type = 'b'; # type of last nonblank token
11633 my $i_terminal = 0; # index of last nonblank token
11634 my $terminal_block_type = "";
11636 # update most recent statement label
11637 $csc_last_label = "" unless ($csc_last_label);
11638 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
11639 my $block_label = $csc_last_label;
11641 # Loop over all tokens of this batch
11642 for my $i ( 0 .. $max_index_to_go ) {
11643 my $type = $types_to_go[$i];
11644 my $block_type = $block_type_to_go[$i];
11645 my $token = $tokens_to_go[$i];
11647 # remember last nonblank token type
11648 if ( $type ne '#' && $type ne 'b' ) {
11649 $terminal_type = $type;
11650 $terminal_block_type = $block_type;
11654 my $type_sequence = $type_sequence_to_go[$i];
11655 if ( $block_type && $type_sequence ) {
11657 if ( $token eq '}' ) {
11659 # restore any leading text saved when we entered this block
11660 if ( defined( $block_leading_text{$type_sequence} ) ) {
11661 ( $block_leading_text, $rblock_leading_if_elsif_text )
11662 = @{ $block_leading_text{$type_sequence} };
11663 $i_block_leading_text = $i;
11664 delete $block_leading_text{$type_sequence};
11665 $rleading_block_if_elsif_text =
11666 $rblock_leading_if_elsif_text;
11669 if ( defined( $csc_block_label{$type_sequence} ) ) {
11670 $block_label = $csc_block_label{$type_sequence};
11671 delete $csc_block_label{$type_sequence};
11674 # if we run into a '}' then we probably started accumulating
11675 # at something like a trailing 'if' clause..no harm done.
11676 if ( $accumulating_text_for_block
11677 && $levels_to_go[$i] <= $leading_block_text_level )
11679 my $lev = $levels_to_go[$i];
11680 reset_block_text_accumulator();
11683 if ( defined( $block_opening_line_number{$type_sequence} ) )
11685 my $output_line_number =
11686 $vertical_aligner_object->get_output_line_number();
11687 $block_line_count =
11688 $output_line_number -
11689 $block_opening_line_number{$type_sequence} + 1;
11690 delete $block_opening_line_number{$type_sequence};
11694 # Error: block opening line undefined for this line..
11695 # This shouldn't be possible, but it is not a
11696 # significant problem.
11700 elsif ( $token eq '{' ) {
11703 $vertical_aligner_object->get_output_line_number();
11704 $block_opening_line_number{$type_sequence} = $line_number;
11706 # set a label for this block, except for
11707 # a bare block which already has the label
11708 # A label can only be used on the next {
11709 if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
11710 $csc_block_label{$type_sequence} = $csc_last_label;
11711 $csc_last_label = "";
11713 if ( $accumulating_text_for_block
11714 && $levels_to_go[$i] == $leading_block_text_level )
11717 if ( $accumulating_text_for_block eq $block_type ) {
11719 # save any leading text before we enter this block
11720 $block_leading_text{$type_sequence} = [
11721 $leading_block_text,
11722 $rleading_block_if_elsif_text
11724 $block_opening_line_number{$type_sequence} =
11725 $leading_block_text_line_number;
11726 reset_block_text_accumulator();
11730 # shouldn't happen, but not a serious error.
11731 # We were accumulating -csc text for block type
11732 # $accumulating_text_for_block and unexpectedly
11733 # encountered a '{' for block type $block_type.
11740 && $csc_new_statement_ok
11741 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
11742 && $token =~ /$closing_side_comment_list_pattern/o )
11744 set_block_text_accumulator($i);
11748 # note: ignoring type 'q' because of tricks being played
11749 # with 'q' for hanging side comments
11750 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
11751 $csc_new_statement_ok =
11752 ( $block_type || $type eq 'J' || $type eq ';' );
11755 && $accumulating_text_for_block
11756 && $levels_to_go[$i] == $leading_block_text_level )
11758 reset_block_text_accumulator();
11761 accumulate_block_text($i);
11766 # Treat an 'else' block specially by adding preceding 'if' and
11767 # 'elsif' text. Otherwise, the 'end else' is not helpful,
11768 # especially for cuddled-else formatting.
11769 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
11770 $block_leading_text =
11771 make_else_csc_text( $i_terminal, $terminal_block_type,
11772 $block_leading_text, $rblock_leading_if_elsif_text );
11775 # if this line ends in a label then remember it for the next pass
11776 $csc_last_label = "";
11777 if ( $terminal_type eq 'J' ) {
11778 $csc_last_label = $tokens_to_go[$i_terminal];
11781 return ( $terminal_type, $i_terminal, $i_block_leading_text,
11782 $block_leading_text, $block_line_count, $block_label );
11786 sub make_else_csc_text {
11788 # create additional -csc text for an 'else' and optionally 'elsif',
11789 # depending on the value of switch
11790 # $rOpts_closing_side_comment_else_flag:
11792 # = 0 add 'if' text to trailing else
11793 # = 1 same as 0 plus:
11794 # add 'if' to 'elsif's if can fit in line length
11795 # add last 'elsif' to trailing else if can fit in one line
11796 # = 2 same as 1 but do not check if exceed line length
11798 # $rif_elsif_text = a reference to a list of all previous closing
11799 # side comments created for this if block
11801 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
11802 my $csc_text = $block_leading_text;
11804 if ( $block_type eq 'elsif'
11805 && $rOpts_closing_side_comment_else_flag == 0 )
11810 my $count = @{$rif_elsif_text};
11811 return $csc_text unless ($count);
11813 my $if_text = '[ if' . $rif_elsif_text->[0];
11815 # always show the leading 'if' text on 'else'
11816 if ( $block_type eq 'else' ) {
11817 $csc_text .= $if_text;
11820 # see if that's all
11821 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
11825 my $last_elsif_text = "";
11826 if ( $count > 1 ) {
11827 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
11828 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
11831 # tentatively append one more item
11832 my $saved_text = $csc_text;
11833 if ( $block_type eq 'else' ) {
11834 $csc_text .= $last_elsif_text;
11837 $csc_text .= ' ' . $if_text;
11840 # all done if no length checks requested
11841 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
11845 # undo it if line length exceeded
11847 length($csc_text) +
11848 length($block_type) +
11849 length( $rOpts->{'closing-side-comment-prefix'} ) +
11850 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
11851 if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
11852 $csc_text = $saved_text;
11857 { # sub balance_csc_text
11872 sub balance_csc_text {
11874 # Append characters to balance a closing side comment so that editors
11875 # such as vim can correctly jump through code.
11877 # input = ## end foreach my $foo ( sort { $b ...
11878 # output = ## end foreach my $foo ( sort { $b ...})
11880 # NOTE: This routine does not currently filter out structures within
11881 # quoted text because the bounce algorithms in text editors do not
11882 # necessarily do this either (a version of vim was checked and
11883 # did not do this).
11885 # Some complex examples which will cause trouble for some editors:
11886 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
11887 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
11888 # if ( $1 eq '{' ) {
11889 # test file test1/braces.pl has many such examples.
11893 # loop to examine characters one-by-one, RIGHT to LEFT and
11894 # build a balancing ending, LEFT to RIGHT.
11895 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
11897 my $char = substr( $csc, $pos, 1 );
11899 # ignore everything except structural characters
11900 next unless ( $matching_char{$char} );
11902 # pop most recently appended character
11903 my $top = chop($csc);
11905 # push it back plus the mate to the newest character
11906 # unless they balance each other.
11907 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
11910 # return the balanced string
11915 sub add_closing_side_comment {
11917 # add closing side comments after closing block braces if -csc used
11918 my $cscw_block_comment;
11920 #---------------------------------------------------------------
11921 # Step 1: loop through all tokens of this line to accumulate
11922 # the text needed to create the closing side comments. Also see
11923 # how the line ends.
11924 #---------------------------------------------------------------
11926 my ( $terminal_type, $i_terminal, $i_block_leading_text,
11927 $block_leading_text, $block_line_count, $block_label )
11928 = accumulate_csc_text();
11930 #---------------------------------------------------------------
11931 # Step 2: make the closing side comment if this ends a block
11932 #---------------------------------------------------------------
11933 my $have_side_comment = $i_terminal != $max_index_to_go;
11935 # if this line might end in a block closure..
11937 $terminal_type eq '}'
11942 # the block is long enough
11943 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
11945 # or there is an existing comment to check
11946 || ( $have_side_comment
11947 && $rOpts->{'closing-side-comment-warnings'} )
11950 # .. and if this is one of the types of interest
11951 && $block_type_to_go[$i_terminal] =~
11952 /$closing_side_comment_list_pattern/o
11954 # .. but not an anonymous sub
11955 # These are not normally of interest, and their closing braces are
11956 # often followed by commas or semicolons anyway. This also avoids
11957 # possible erratic output due to line numbering inconsistencies
11958 # in the cases where their closing braces terminate a line.
11959 && $block_type_to_go[$i_terminal] ne 'sub'
11961 # ..and the corresponding opening brace must is not in this batch
11962 # (because we do not need to tag one-line blocks, although this
11963 # should also be caught with a positive -csci value)
11964 && $mate_index_to_go[$i_terminal] < 0
11969 # this is the last token (line doesn't have a side comment)
11970 !$have_side_comment
11972 # or the old side comment is a closing side comment
11973 || $tokens_to_go[$max_index_to_go] =~
11974 /$closing_side_comment_prefix_pattern/o
11979 # then make the closing side comment text
11980 if ($block_label) { $block_label .= " " }
11982 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
11984 # append any extra descriptive text collected above
11985 if ( $i_block_leading_text == $i_terminal ) {
11986 $token .= $block_leading_text;
11989 $token = balance_csc_text($token)
11990 if $rOpts->{'closing-side-comments-balanced'};
11992 $token =~ s/\s*$//; # trim any trailing whitespace
11994 # handle case of existing closing side comment
11995 if ($have_side_comment) {
11997 # warn if requested and tokens differ significantly
11998 if ( $rOpts->{'closing-side-comment-warnings'} ) {
11999 my $old_csc = $tokens_to_go[$max_index_to_go];
12000 my $new_csc = $token;
12001 $new_csc =~ s/\s+//g; # trim all whitespace
12002 $old_csc =~ s/\s+//g; # trim all whitespace
12003 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
12004 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
12005 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
12006 my $new_trailing_dots = $1;
12007 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
12009 # Patch to handle multiple closing side comments at
12010 # else and elsif's. These have become too complicated
12011 # to check, so if we see an indication of
12012 # '[ if' or '[ # elsif', then assume they were made
12014 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
12015 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
12017 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
12018 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
12021 # if old comment is contained in new comment,
12022 # only compare the common part.
12023 if ( length($new_csc) > length($old_csc) ) {
12024 $new_csc = substr( $new_csc, 0, length($old_csc) );
12027 # if the new comment is shorter and has been limited,
12028 # only compare the common part.
12029 if ( length($new_csc) < length($old_csc)
12030 && $new_trailing_dots )
12032 $old_csc = substr( $old_csc, 0, length($new_csc) );
12035 # any remaining difference?
12036 if ( $new_csc ne $old_csc ) {
12038 # just leave the old comment if we are below the threshold
12039 # for creating side comments
12040 if ( $block_line_count <
12041 $rOpts->{'closing-side-comment-interval'} )
12046 # otherwise we'll make a note of it
12050 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
12053 # save the old side comment in a new trailing block comment
12054 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
12057 $cscw_block_comment =
12058 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
12063 # No differences.. we can safely delete old comment if we
12064 # are below the threshold
12065 if ( $block_line_count <
12066 $rOpts->{'closing-side-comment-interval'} )
12069 unstore_token_to_go()
12070 if ( $types_to_go[$max_index_to_go] eq '#' );
12071 unstore_token_to_go()
12072 if ( $types_to_go[$max_index_to_go] eq 'b' );
12077 # switch to the new csc (unless we deleted it!)
12078 $tokens_to_go[$max_index_to_go] = $token if $token;
12081 # handle case of NO existing closing side comment
12084 # insert the new side comment into the output token stream
12086 my $block_type = '';
12087 my $type_sequence = '';
12088 my $container_environment =
12089 $container_environment_to_go[$max_index_to_go];
12090 my $level = $levels_to_go[$max_index_to_go];
12091 my $slevel = $nesting_depth_to_go[$max_index_to_go];
12092 my $no_internal_newlines = 0;
12094 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
12095 my $ci_level = $ci_levels_to_go[$max_index_to_go];
12096 my $in_continued_quote = 0;
12098 # first insert a blank token
12099 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
12101 # then the side comment
12102 insert_new_token_to_go( $token, $type, $slevel,
12103 $no_internal_newlines );
12106 return $cscw_block_comment;
12109 sub previous_nonblank_token {
12113 return "" if ( $im < 0 );
12114 if ( $types_to_go[$im] eq 'b' ) { $im--; }
12115 return "" if ( $im < 0 );
12116 $name = $tokens_to_go[$im];
12118 # prepend any sub name to an isolated -> to avoid unwanted alignments
12119 # [test case is test8/penco.pl]
12120 if ( $name eq '->' ) {
12122 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
12123 $name = $tokens_to_go[$im] . $name;
12129 sub send_lines_to_vertical_aligner {
12131 my ( $ri_first, $ri_last, $do_not_pad ) = @_;
12133 my $rindentation_list = [0]; # ref to indentations for each line
12135 # define the array @matching_token_to_go for the output tokens
12136 # which will be non-blank for each special token (such as =>)
12137 # for which alignment is required.
12138 set_vertical_alignment_markers( $ri_first, $ri_last );
12140 # flush if necessary to avoid unwanted alignment
12141 my $must_flush = 0;
12142 if ( @$ri_first > 1 ) {
12144 # flush before a long if statement
12145 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
12150 Perl::Tidy::VerticalAligner::flush();
12153 undo_ci( $ri_first, $ri_last );
12155 set_logical_padding( $ri_first, $ri_last );
12157 # loop to prepare each line for shipment
12158 my $n_last_line = @$ri_first - 1;
12160 for my $n ( 0 .. $n_last_line ) {
12161 my $ibeg = $$ri_first[$n];
12162 my $iend = $$ri_last[$n];
12164 my ( $rtokens, $rfields, $rpatterns ) =
12165 make_alignment_patterns( $ibeg, $iend );
12167 # Set flag to show how much level changes between this line
12168 # and the next line, if we have it.
12170 if ( $n < $n_last_line ) {
12171 my $ibegp = $$ri_first[ $n + 1 ];
12172 $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
12175 my ( $indentation, $lev, $level_end, $terminal_type,
12176 $is_semicolon_terminated, $is_outdented_line )
12177 = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
12178 $ri_first, $ri_last, $rindentation_list, $ljump );
12180 # we will allow outdenting of long lines..
12181 my $outdent_long_lines = (
12183 # which are long quotes, if allowed
12184 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
12186 # which are long block comments, if allowed
12188 $types_to_go[$ibeg] eq '#'
12189 && $rOpts->{'outdent-long-comments'}
12191 # but not if this is a static block comment
12192 && !$is_static_block_comment
12197 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
12199 my $rvertical_tightness_flags =
12200 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
12201 $ri_first, $ri_last );
12203 # flush an outdented line to avoid any unwanted vertical alignment
12204 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
12206 # Set a flag at the final ':' of a ternary chain to request
12207 # vertical alignment of the final term. Here is a
12208 # slightly complex example:
12210 # $self->{_text} = (
12212 # : $type eq 'item' ? "the $section entry"
12213 # : "the section on $section"
12217 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
12218 # : ' elsewhere in this document'
12221 my $is_terminal_ternary = 0;
12222 if ( $tokens_to_go[$ibeg] eq ':'
12223 || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
12225 my $last_leading_type = ":";
12227 my $iprev = $$ri_first[ $n - 1 ];
12228 $last_leading_type = $types_to_go[$iprev];
12230 if ( $terminal_type ne ';'
12231 && $n_last_line > $n
12232 && $level_end == $lev )
12234 my $inext = $$ri_first[ $n + 1 ];
12235 $level_end = $levels_to_go[$inext];
12236 $terminal_type = $types_to_go[$inext];
12239 $is_terminal_ternary = $last_leading_type eq ':'
12240 && ( ( $terminal_type eq ';' && $level_end <= $lev )
12241 || ( $terminal_type ne ':' && $level_end < $lev ) )
12243 # the terminal term must not contain any ternary terms, as in
12245 # $Is_MSWin32 ? ".\\echo$$"
12246 # : $Is_MacOS ? ":echo$$"
12247 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
12249 && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ];
12252 # send this new line down the pipe
12253 my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
12254 Perl::Tidy::VerticalAligner::valign_input(
12261 $forced_breakpoint_to_go[$iend] || $in_comma_list,
12262 $outdent_long_lines,
12263 $is_terminal_ternary,
12264 $is_semicolon_terminated,
12266 $rvertical_tightness_flags,
12270 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
12272 # flush an outdented line to avoid any unwanted vertical alignment
12273 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
12277 # Set flag indicating if this line ends in an opening
12278 # token and is very short, so that a blank line is not
12279 # needed if the subsequent line is a comment.
12280 # Examples of what we are looking for:
12286 $last_output_short_opening_token
12288 # line ends in opening token
12289 = $types_to_go[$iend] =~ /^[\{\(\[L]$/
12293 # line has either single opening token
12296 # or is a single token followed by opening token.
12297 # Note that sub identifiers have blanks like 'sub doit'
12298 || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
12301 # and limit total to 10 character widths
12302 && token_sequence_length( $ibeg, $iend ) <= 10;
12304 } # end of loop to output each line
12306 # remember indentation of lines containing opening containers for
12307 # later use by sub set_adjusted_indentation
12308 save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
12311 { # begin make_alignment_patterns
12313 my %block_type_map;
12318 # map related block names into a common name to
12320 %block_type_map = (
12331 # map certain keywords to the same 'if' class to align
12332 # long if/elsif sequences. [elsif.pl]
12338 'default' => 'given',
12339 'case' => 'switch',
12341 # treat an 'undef' similar to numbers and quotes
12346 sub make_alignment_patterns {
12348 # Here we do some important preliminary work for the
12349 # vertical aligner. We create three arrays for one
12350 # output line. These arrays contain strings that can
12351 # be tested by the vertical aligner to see if
12352 # consecutive lines can be aligned vertically.
12354 # The three arrays are indexed on the vertical
12355 # alignment fields and are:
12356 # @tokens - a list of any vertical alignment tokens for this line.
12357 # These are tokens, such as '=' '&&' '#' etc which
12358 # we want to might align vertically. These are
12359 # decorated with various information such as
12360 # nesting depth to prevent unwanted vertical
12361 # alignment matches.
12362 # @fields - the actual text of the line between the vertical alignment
12364 # @patterns - a modified list of token types, one for each alignment
12365 # field. These should normally each match before alignment is
12366 # allowed, even when the alignment tokens match.
12367 my ( $ibeg, $iend ) = @_;
12371 my $i_start = $ibeg;
12375 my @container_name = ("");
12376 my @multiple_comma_arrows = (undef);
12378 my $j = 0; # field index
12381 for $i ( $ibeg .. $iend ) {
12383 # Keep track of containers balanced on this line only.
12384 # These are used below to prevent unwanted cross-line alignments.
12385 # Unbalanced containers already avoid aligning across
12386 # container boundaries.
12387 if ( $tokens_to_go[$i] eq '(' ) {
12389 # if container is balanced on this line...
12390 my $i_mate = $mate_index_to_go[$i];
12391 if ( $i_mate > $i && $i_mate <= $iend ) {
12393 my $seqno = $type_sequence_to_go[$i];
12394 my $count = comma_arrow_count($seqno);
12395 $multiple_comma_arrows[$depth] = $count && $count > 1;
12397 # Append the previous token name to make the container name
12398 # more unique. This name will also be given to any commas
12399 # within this container, and it helps avoid undesirable
12400 # alignments of different types of containers.
12401 my $name = previous_nonblank_token($i);
12403 $container_name[$depth] = "+" . $name;
12405 # Make the container name even more unique if necessary.
12406 # If we are not vertically aligning this opening paren,
12407 # append a character count to avoid bad alignment because
12408 # it usually looks bad to align commas within containers
12409 # for which the opening parens do not align. Here
12410 # is an example very BAD alignment of commas (because
12411 # the atan2 functions are not all aligned):
12413 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
12414 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
12415 # $X * atan2( $X, 1 ) -
12416 # $Y * atan2( $Y, 1 );
12418 # On the other hand, it is usually okay to align commas if
12419 # opening parens align, such as:
12420 # glVertex3d( $cx + $s * $xs, $cy, $z );
12421 # glVertex3d( $cx, $cy + $s * $ys, $z );
12422 # glVertex3d( $cx - $s * $xs, $cy, $z );
12423 # glVertex3d( $cx, $cy - $s * $ys, $z );
12425 # To distinguish between these situations, we will
12426 # append the length of the line from the previous matching
12427 # token, or beginning of line, to the function name. This
12428 # will allow the vertical aligner to reject undesirable
12431 # if we are not aligning on this paren...
12432 if ( $matching_token_to_go[$i] eq '' ) {
12434 # Sum length from previous alignment, or start of line.
12436 ( $i_start == $ibeg )
12437 ? total_line_length( $i_start, $i - 1 )
12438 : token_sequence_length( $i_start, $i - 1 );
12440 # tack length onto the container name to make unique
12441 $container_name[$depth] .= "-" . $len;
12445 elsif ( $tokens_to_go[$i] eq ')' ) {
12446 $depth-- if $depth > 0;
12449 # if we find a new synchronization token, we are done with
12451 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
12453 my $tok = my $raw_tok = $matching_token_to_go[$i];
12455 # make separators in different nesting depths unique
12456 # by appending the nesting depth digit.
12457 if ( $raw_tok ne '#' ) {
12458 $tok .= "$nesting_depth_to_go[$i]";
12461 # also decorate commas with any container name to avoid
12462 # unwanted cross-line alignments.
12463 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
12464 if ( $container_name[$depth] ) {
12465 $tok .= $container_name[$depth];
12469 # Patch to avoid aligning leading and trailing if, unless.
12470 # Mark trailing if, unless statements with container names.
12471 # This makes them different from leading if, unless which
12472 # are not so marked at present. If we ever need to name
12473 # them too, we could use ci to distinguish them.
12474 # Example problem to avoid:
12475 # return ( 2, "DBERROR" )
12476 # if ( $retval == 2 );
12477 # if ( scalar @_ ) {
12478 # my ( $a, $b, $c, $d, $e, $f ) = @_;
12480 if ( $raw_tok eq '(' ) {
12481 my $ci = $ci_levels_to_go[$ibeg];
12482 if ( $container_name[$depth] =~ /^\+(if|unless)/
12485 $tok .= $container_name[$depth];
12489 # Decorate block braces with block types to avoid
12490 # unwanted alignments such as the following:
12491 # foreach ( @{$routput_array} ) { $fh->print($_) }
12492 # eval { $fh->close() };
12493 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
12494 my $block_type = $block_type_to_go[$i];
12496 # map certain related block types to allow
12497 # else blocks to align
12498 $block_type = $block_type_map{$block_type}
12499 if ( defined( $block_type_map{$block_type} ) );
12501 # remove sub names to allow one-line sub braces to align
12502 # regardless of name
12503 if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
12505 # allow all control-type blocks to align
12506 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
12508 $tok .= $block_type;
12511 # concatenate the text of the consecutive tokens to form
12514 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
12516 # store the alignment token for this field
12517 push( @tokens, $tok );
12519 # get ready for the next batch
12522 $patterns[$j] = "";
12525 # continue accumulating tokens
12526 # handle non-keywords..
12527 if ( $types_to_go[$i] ne 'k' ) {
12528 my $type = $types_to_go[$i];
12530 # Mark most things before arrows as a quote to
12531 # get them to line up. Testfile: mixed.pl.
12532 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
12533 my $next_type = $types_to_go[ $i + 1 ];
12534 my $i_next_nonblank =
12535 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12537 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
12540 # Patch to ignore leading minus before words,
12541 # by changing pattern 'mQ' into just 'Q',
12542 # so that we can align things like this:
12543 # Button => "Print letter \"~$_\"",
12544 # -command => [ sub { print "$_[0]\n" }, $_ ],
12545 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
12549 # patch to make numbers and quotes align
12550 if ( $type eq 'n' ) { $type = 'Q' }
12552 # patch to ignore any ! in patterns
12553 if ( $type eq '!' ) { $type = '' }
12555 $patterns[$j] .= $type;
12558 # for keywords we have to use the actual text
12561 my $tok = $tokens_to_go[$i];
12563 # but map certain keywords to a common string to allow
12565 $tok = $keyword_map{$tok}
12566 if ( defined( $keyword_map{$tok} ) );
12567 $patterns[$j] .= $tok;
12571 # done with this line .. join text of tokens to make the last field
12572 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
12573 return ( \@tokens, \@fields, \@patterns );
12576 } # end make_alignment_patterns
12578 { # begin unmatched_indexes
12580 # closure to keep track of unbalanced containers.
12581 # arrays shared by the routines in this block:
12582 my @unmatched_opening_indexes_in_this_batch;
12583 my @unmatched_closing_indexes_in_this_batch;
12584 my %comma_arrow_count;
12586 sub is_unbalanced_batch {
12587 @unmatched_opening_indexes_in_this_batch +
12588 @unmatched_closing_indexes_in_this_batch;
12591 sub comma_arrow_count {
12593 return $comma_arrow_count{$seqno};
12596 sub match_opening_and_closing_tokens {
12598 # Match up indexes of opening and closing braces, etc, in this batch.
12599 # This has to be done after all tokens are stored because unstoring
12600 # of tokens would otherwise cause trouble.
12602 @unmatched_opening_indexes_in_this_batch = ();
12603 @unmatched_closing_indexes_in_this_batch = ();
12604 %comma_arrow_count = ();
12605 my $comma_arrow_count_contained = 0;
12607 my ( $i, $i_mate, $token );
12608 foreach $i ( 0 .. $max_index_to_go ) {
12609 if ( $type_sequence_to_go[$i] ) {
12610 $token = $tokens_to_go[$i];
12611 if ( $token =~ /^[\(\[\{\?]$/ ) {
12612 push @unmatched_opening_indexes_in_this_batch, $i;
12614 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
12616 $i_mate = pop @unmatched_opening_indexes_in_this_batch;
12617 if ( defined($i_mate) && $i_mate >= 0 ) {
12618 if ( $type_sequence_to_go[$i_mate] ==
12619 $type_sequence_to_go[$i] )
12621 $mate_index_to_go[$i] = $i_mate;
12622 $mate_index_to_go[$i_mate] = $i;
12623 my $seqno = $type_sequence_to_go[$i];
12624 if ( $comma_arrow_count{$seqno} ) {
12625 $comma_arrow_count_contained +=
12626 $comma_arrow_count{$seqno};
12630 push @unmatched_opening_indexes_in_this_batch,
12632 push @unmatched_closing_indexes_in_this_batch, $i;
12636 push @unmatched_closing_indexes_in_this_batch, $i;
12640 elsif ( $tokens_to_go[$i] eq '=>' ) {
12641 if (@unmatched_opening_indexes_in_this_batch) {
12642 my $j = $unmatched_opening_indexes_in_this_batch[-1];
12643 my $seqno = $type_sequence_to_go[$j];
12644 $comma_arrow_count{$seqno}++;
12648 return $comma_arrow_count_contained;
12651 sub save_opening_indentation {
12653 # This should be called after each batch of tokens is output. It
12654 # saves indentations of lines of all unmatched opening tokens.
12655 # These will be used by sub get_opening_indentation.
12657 my ( $ri_first, $ri_last, $rindentation_list ) = @_;
12659 # we no longer need indentations of any saved indentations which
12660 # are unmatched closing tokens in this batch, because we will
12661 # never encounter them again. So we can delete them to keep
12662 # the hash size down.
12663 foreach (@unmatched_closing_indexes_in_this_batch) {
12664 my $seqno = $type_sequence_to_go[$_];
12665 delete $saved_opening_indentation{$seqno};
12668 # we need to save indentations of any unmatched opening tokens
12669 # in this batch because we may need them in a subsequent batch.
12670 foreach (@unmatched_opening_indexes_in_this_batch) {
12671 my $seqno = $type_sequence_to_go[$_];
12672 $saved_opening_indentation{$seqno} = [
12673 lookup_opening_indentation(
12674 $_, $ri_first, $ri_last, $rindentation_list
12679 } # end unmatched_indexes
12681 sub get_opening_indentation {
12683 # get the indentation of the line which output the opening token
12684 # corresponding to a given closing token in the current output batch.
12687 # $i_closing - index in this line of a closing token ')' '}' or ']'
12689 # $ri_first - reference to list of the first index $i for each output
12690 # line in this batch
12691 # $ri_last - reference to list of the last index $i for each output line
12693 # $rindentation_list - reference to a list containing the indentation
12694 # used for each line.
12697 # -the indentation of the line which contained the opening token
12698 # which matches the token at index $i_opening
12699 # -and its offset (number of columns) from the start of the line
12701 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
12703 # first, see if the opening token is in the current batch
12704 my $i_opening = $mate_index_to_go[$i_closing];
12705 my ( $indent, $offset, $is_leading, $exists );
12707 if ( $i_opening >= 0 ) {
12709 # it is..look up the indentation
12710 ( $indent, $offset, $is_leading ) =
12711 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
12712 $rindentation_list );
12715 # if not, it should have been stored in the hash by a previous batch
12717 my $seqno = $type_sequence_to_go[$i_closing];
12719 if ( $saved_opening_indentation{$seqno} ) {
12720 ( $indent, $offset, $is_leading ) =
12721 @{ $saved_opening_indentation{$seqno} };
12724 # some kind of serious error
12725 # (example is badfile.t)
12734 # if no sequence number it must be an unbalanced container
12742 return ( $indent, $offset, $is_leading, $exists );
12745 sub lookup_opening_indentation {
12747 # get the indentation of the line in the current output batch
12748 # which output a selected opening token
12751 # $i_opening - index of an opening token in the current output batch
12752 # whose line indentation we need
12753 # $ri_first - reference to list of the first index $i for each output
12754 # line in this batch
12755 # $ri_last - reference to list of the last index $i for each output line
12757 # $rindentation_list - reference to a list containing the indentation
12758 # used for each line. (NOTE: the first slot in
12759 # this list is the last returned line number, and this is
12760 # followed by the list of indentations).
12763 # -the indentation of the line which contained token $i_opening
12764 # -and its offset (number of columns) from the start of the line
12766 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
12768 my $nline = $rindentation_list->[0]; # line number of previous lookup
12770 # reset line location if necessary
12771 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
12773 # find the correct line
12774 unless ( $i_opening > $ri_last->[-1] ) {
12775 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
12778 # error - token index is out of bounds - shouldn't happen
12781 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
12783 report_definite_bug();
12784 $nline = $#{$ri_last};
12787 $rindentation_list->[0] =
12788 $nline; # save line number to start looking next call
12789 my $ibeg = $ri_start->[$nline];
12790 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
12791 my $is_leading = ( $ibeg == $i_opening );
12792 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
12796 my %is_if_elsif_else_unless_while_until_for_foreach;
12800 # These block types may have text between the keyword and opening
12801 # curly. Note: 'else' does not, but must be included to allow trailing
12802 # if/elsif text to be appended.
12803 # patch for SWITCH/CASE: added 'case' and 'when'
12804 @_ = qw(if elsif else unless while until for foreach case when);
12805 @is_if_elsif_else_unless_while_until_for_foreach{@_} =
12809 sub set_adjusted_indentation {
12811 # This routine has the final say regarding the actual indentation of
12812 # a line. It starts with the basic indentation which has been
12813 # defined for the leading token, and then takes into account any
12814 # options that the user has set regarding special indenting and
12817 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
12818 $rindentation_list, $level_jump )
12821 # we need to know the last token of this line
12822 my ( $terminal_type, $i_terminal ) =
12823 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
12825 my $is_outdented_line = 0;
12827 my $is_semicolon_terminated = $terminal_type eq ';'
12828 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
12830 ##########################################################
12831 # Section 1: set a flag and a default indentation
12833 # Most lines are indented according to the initial token.
12834 # But it is common to outdent to the level just after the
12835 # terminal token in certain cases...
12836 # adjust_indentation flag:
12837 # 0 - do not adjust
12839 # 2 - vertically align with opening token
12841 ##########################################################
12842 my $adjust_indentation = 0;
12843 my $default_adjust_indentation = $adjust_indentation;
12846 $opening_indentation, $opening_offset,
12847 $is_leading, $opening_exists
12850 # if we are at a closing token of some type..
12851 if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
12853 # get the indentation of the line containing the corresponding
12856 $opening_indentation, $opening_offset,
12857 $is_leading, $opening_exists
12859 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12860 $rindentation_list );
12862 # First set the default behavior:
12865 # default behavior is to outdent closing lines
12866 # of the form: "); }; ]; )->xxx;"
12867 $is_semicolon_terminated
12869 # and 'cuddled parens' of the form: ")->pack("
12871 $terminal_type eq '('
12872 && $types_to_go[$ibeg] eq ')'
12873 && ( $nesting_depth_to_go[$iend] + 1 ==
12874 $nesting_depth_to_go[$ibeg] )
12877 # and when the next line is at a lower indentation level
12878 # PATCH: and only if the style allows undoing continuation
12879 # for all closing token types. We should really wait until
12880 # the indentation of the next line is known and then make
12881 # a decision, but that would require another pass.
12882 || ( $level_jump < 0 && !$some_closing_token_indentation )
12885 $adjust_indentation = 1;
12888 # outdent something like '),'
12890 $terminal_type eq ','
12892 # allow just one character before the comma
12893 && $i_terminal == $ibeg + 1
12895 # require LIST environment; otherwise, we may outdent too much -
12896 # this can happen in calls without parentheses (overload.t);
12897 && $container_environment_to_go[$i_terminal] eq 'LIST'
12900 $adjust_indentation = 1;
12903 # undo continuation indentation of a terminal closing token if
12904 # it is the last token before a level decrease. This will allow
12905 # a closing token to line up with its opening counterpart, and
12906 # avoids a indentation jump larger than 1 level.
12907 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
12908 && $i_terminal == $ibeg )
12910 my $ci = $ci_levels_to_go[$ibeg];
12911 my $lev = $levels_to_go[$ibeg];
12912 my $next_type = $types_to_go[ $ibeg + 1 ];
12913 my $i_next_nonblank =
12914 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
12915 if ( $i_next_nonblank <= $max_index_to_go
12916 && $levels_to_go[$i_next_nonblank] < $lev )
12918 $adjust_indentation = 1;
12922 # YVES patch 1 of 2:
12923 # Undo ci of line with leading closing eval brace,
12924 # but not beyond the indention of the line with
12925 # the opening brace.
12926 if ( $block_type_to_go[$ibeg] eq 'eval'
12927 && !$rOpts->{'line-up-parentheses'}
12928 && !$rOpts->{'indent-closing-brace'} )
12931 $opening_indentation, $opening_offset,
12932 $is_leading, $opening_exists
12934 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12935 $rindentation_list );
12936 my $indentation = $leading_spaces_to_go[$ibeg];
12937 if ( defined($opening_indentation)
12938 && $indentation > $opening_indentation )
12940 $adjust_indentation = 1;
12944 $default_adjust_indentation = $adjust_indentation;
12946 # Now modify default behavior according to user request:
12947 # handle option to indent non-blocks of the form ); }; ];
12948 # But don't do special indentation to something like ')->pack('
12949 if ( !$block_type_to_go[$ibeg] ) {
12950 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
12952 if ( $i_terminal <= $ibeg + 1
12953 || $is_semicolon_terminated )
12955 $adjust_indentation = 2;
12958 $adjust_indentation = 0;
12961 elsif ( $cti == 2 ) {
12962 if ($is_semicolon_terminated) {
12963 $adjust_indentation = 3;
12966 $adjust_indentation = 0;
12969 elsif ( $cti == 3 ) {
12970 $adjust_indentation = 3;
12974 # handle option to indent blocks
12977 $rOpts->{'indent-closing-brace'}
12979 $i_terminal == $ibeg # isolated terminal '}'
12980 || $is_semicolon_terminated
12984 $adjust_indentation = 3;
12989 # if at ');', '};', '>;', and '];' of a terminal qw quote
12990 elsif ($$rpatterns[0] =~ /^qb*;$/
12991 && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
12993 if ( $closing_token_indentation{$1} == 0 ) {
12994 $adjust_indentation = 1;
12997 $adjust_indentation = 3;
13001 # if line begins with a ':', align it with any
13002 # previous line leading with corresponding ?
13003 elsif ( $types_to_go[$ibeg] eq ':' ) {
13005 $opening_indentation, $opening_offset,
13006 $is_leading, $opening_exists
13008 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
13009 $rindentation_list );
13010 if ($is_leading) { $adjust_indentation = 2; }
13013 ##########################################################
13014 # Section 2: set indentation according to flag set above
13016 # Select the indentation object to define leading
13017 # whitespace. If we are outdenting something like '} } );'
13018 # then we want to use one level below the last token
13019 # ($i_terminal) in order to get it to fully outdent through
13021 ##########################################################
13024 my $level_end = $levels_to_go[$iend];
13026 if ( $adjust_indentation == 0 ) {
13027 $indentation = $leading_spaces_to_go[$ibeg];
13028 $lev = $levels_to_go[$ibeg];
13030 elsif ( $adjust_indentation == 1 ) {
13031 $indentation = $reduced_spaces_to_go[$i_terminal];
13032 $lev = $levels_to_go[$i_terminal];
13035 # handle indented closing token which aligns with opening token
13036 elsif ( $adjust_indentation == 2 ) {
13038 # handle option to align closing token with opening token
13039 $lev = $levels_to_go[$ibeg];
13041 # calculate spaces needed to align with opening token
13043 get_SPACES($opening_indentation) + $opening_offset;
13045 # Indent less than the previous line.
13047 # Problem: For -lp we don't exactly know what it was if there
13048 # were recoverable spaces sent to the aligner. A good solution
13049 # would be to force a flush of the vertical alignment buffer, so
13050 # that we would know. For now, this rule is used for -lp:
13052 # When the last line did not start with a closing token we will
13053 # be optimistic that the aligner will recover everything wanted.
13055 # This rule will prevent us from breaking a hierarchy of closing
13056 # tokens, and in a worst case will leave a closing paren too far
13057 # indented, but this is better than frequently leaving it not
13059 my $last_spaces = get_SPACES($last_indentation_written);
13060 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
13062 get_RECOVERABLE_SPACES($last_indentation_written);
13065 # reset the indentation to the new space count if it works
13066 # only options are all or none: nothing in-between looks good
13067 $lev = $levels_to_go[$ibeg];
13068 if ( $space_count < $last_spaces ) {
13069 if ($rOpts_line_up_parentheses) {
13070 my $lev = $levels_to_go[$ibeg];
13072 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
13075 $indentation = $space_count;
13079 # revert to default if it doesn't work
13081 $space_count = leading_spaces_to_go($ibeg);
13082 if ( $default_adjust_indentation == 0 ) {
13083 $indentation = $leading_spaces_to_go[$ibeg];
13085 elsif ( $default_adjust_indentation == 1 ) {
13086 $indentation = $reduced_spaces_to_go[$i_terminal];
13087 $lev = $levels_to_go[$i_terminal];
13092 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
13095 # handle -icb (indented closing code block braces)
13096 # Updated method for indented block braces: indent one full level if
13097 # there is no continuation indentation. This will occur for major
13098 # structures such as sub, if, else, but not for things like map
13101 # Note: only code blocks without continuation indentation are
13102 # handled here (if, else, unless, ..). In the following snippet,
13103 # the terminal brace of the sort block will have continuation
13104 # indentation as shown so it will not be handled by the coding
13105 # here. We would have to undo the continuation indentation to do
13106 # this, but it probably looks ok as is. This is a possible future
13107 # update for semicolon terminated lines.
13109 # if ($sortby eq 'date' or $sortby eq 'size') {
13111 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
13116 if ( $block_type_to_go[$ibeg]
13117 && $ci_levels_to_go[$i_terminal] == 0 )
13119 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
13120 $indentation = $spaces + $rOpts_indent_columns;
13122 # NOTE: for -lp we could create a new indentation object, but
13123 # there is probably no need to do it
13126 # handle -icp and any -icb block braces which fall through above
13127 # test such as the 'sort' block mentioned above.
13130 # There are currently two ways to handle -icp...
13131 # One way is to use the indentation of the previous line:
13132 # $indentation = $last_indentation_written;
13134 # The other way is to use the indentation that the previous line
13135 # would have had if it hadn't been adjusted:
13136 $indentation = $last_unadjusted_indentation;
13138 # Current method: use the minimum of the two. This avoids
13139 # inconsistent indentation.
13140 if ( get_SPACES($last_indentation_written) <
13141 get_SPACES($indentation) )
13143 $indentation = $last_indentation_written;
13147 # use previous indentation but use own level
13148 # to cause list to be flushed properly
13149 $lev = $levels_to_go[$ibeg];
13152 # remember indentation except for multi-line quotes, which get
13154 unless ( $ibeg == 0 && $starting_in_quote ) {
13155 $last_indentation_written = $indentation;
13156 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
13157 $last_leading_token = $tokens_to_go[$ibeg];
13160 # be sure lines with leading closing tokens are not outdented more
13161 # than the line which contained the corresponding opening token.
13163 #############################################################
13164 # updated per bug report in alex_bug.pl: we must not
13165 # mess with the indentation of closing logical braces so
13166 # we must treat something like '} else {' as if it were
13167 # an isolated brace my $is_isolated_block_brace = (
13168 # $iend == $ibeg ) && $block_type_to_go[$ibeg];
13169 #############################################################
13170 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
13171 && ( $iend == $ibeg
13172 || $is_if_elsif_else_unless_while_until_for_foreach{
13173 $block_type_to_go[$ibeg]
13176 # only do this for a ':; which is aligned with its leading '?'
13177 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
13178 if ( defined($opening_indentation)
13179 && !$is_isolated_block_brace
13180 && !$is_unaligned_colon )
13182 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
13183 $indentation = $opening_indentation;
13187 # remember the indentation of each line of this batch
13188 push @{$rindentation_list}, $indentation;
13190 # outdent lines with certain leading tokens...
13193 # must be first word of this batch
13199 # certain leading keywords if requested
13201 $rOpts->{'outdent-keywords'}
13202 && $types_to_go[$ibeg] eq 'k'
13203 && $outdent_keyword{ $tokens_to_go[$ibeg] }
13206 # or labels if requested
13207 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
13209 # or static block comments if requested
13210 || ( $types_to_go[$ibeg] eq '#'
13211 && $rOpts->{'outdent-static-block-comments'}
13212 && $is_static_block_comment )
13217 my $space_count = leading_spaces_to_go($ibeg);
13218 if ( $space_count > 0 ) {
13219 $space_count -= $rOpts_continuation_indentation;
13220 $is_outdented_line = 1;
13221 if ( $space_count < 0 ) { $space_count = 0 }
13223 # do not promote a spaced static block comment to non-spaced;
13224 # this is not normally necessary but could be for some
13225 # unusual user inputs (such as -ci = -i)
13226 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
13230 if ($rOpts_line_up_parentheses) {
13232 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
13235 $indentation = $space_count;
13240 return ( $indentation, $lev, $level_end, $terminal_type,
13241 $is_semicolon_terminated, $is_outdented_line );
13245 sub set_vertical_tightness_flags {
13247 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
13249 # Define vertical tightness controls for the nth line of a batch.
13250 # We create an array of parameters which tell the vertical aligner
13251 # if we should combine this line with the next line to achieve the
13252 # desired vertical tightness. The array of parameters contains:
13254 # [0] type: 1=opening non-block 2=closing non-block
13255 # 3=opening block brace 4=closing block brace
13257 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
13258 # if closing: spaces of padding to use
13259 # [2] sequence number of container
13260 # [3] valid flag: do not append if this flag is false. Will be
13261 # true if appropriate -vt flag is set. Otherwise, Will be
13262 # made true only for 2 line container in parens with -lp
13264 # These flags are used by sub set_leading_whitespace in
13265 # the vertical aligner
13267 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
13269 #--------------------------------------------------------------
13270 # Vertical Tightness Flags Section 1:
13271 # Handle Lines 1 .. n-1 but not the last line
13272 # For non-BLOCK tokens, we will need to examine the next line
13273 # too, so we won't consider the last line.
13274 #--------------------------------------------------------------
13275 if ( $n < $n_last_line ) {
13277 #--------------------------------------------------------------
13278 # Vertical Tightness Flags Section 1a:
13279 # Look for Type 1, last token of this line is a non-block opening token
13280 #--------------------------------------------------------------
13281 my $ibeg_next = $$ri_first[ $n + 1 ];
13282 my $token_end = $tokens_to_go[$iend];
13283 my $iend_next = $$ri_last[ $n + 1 ];
13285 $type_sequence_to_go[$iend]
13286 && !$block_type_to_go[$iend]
13287 && $is_opening_token{$token_end}
13289 $opening_vertical_tightness{$token_end} > 0
13291 # allow 2-line method call to be closed up
13292 || ( $rOpts_line_up_parentheses
13293 && $token_end eq '('
13295 && $types_to_go[ $iend - 1 ] ne 'b' )
13300 # avoid multiple jumps in nesting depth in one line if
13302 my $ovt = $opening_vertical_tightness{$token_end};
13303 my $iend_next = $$ri_last[ $n + 1 ];
13306 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
13307 $nesting_depth_to_go[$ibeg_next] )
13311 # If -vt flag has not been set, mark this as invalid
13312 # and aligner will validate it if it sees the closing paren
13314 my $valid_flag = $ovt;
13315 @{$rvertical_tightness_flags} =
13316 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
13320 #--------------------------------------------------------------
13321 # Vertical Tightness Flags Section 1b:
13322 # Look for Type 2, first token of next line is a non-block closing
13323 # token .. and be sure this line does not have a side comment
13324 #--------------------------------------------------------------
13325 my $token_next = $tokens_to_go[$ibeg_next];
13326 if ( $type_sequence_to_go[$ibeg_next]
13327 && !$block_type_to_go[$ibeg_next]
13328 && $is_closing_token{$token_next}
13329 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
13331 my $ovt = $opening_vertical_tightness{$token_next};
13332 my $cvt = $closing_vertical_tightness{$token_next};
13335 # never append a trailing line like )->pack(
13336 # because it will throw off later alignment
13338 $nesting_depth_to_go[$ibeg_next] ==
13339 $nesting_depth_to_go[ $iend_next + 1 ] + 1
13344 $container_environment_to_go[$ibeg_next] ne 'LIST'
13348 # allow closing up 2-line method calls
13349 || ( $rOpts_line_up_parentheses
13350 && $token_next eq ')' )
13357 # decide which trailing closing tokens to append..
13359 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
13361 my $str = join( '',
13362 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
13364 # append closing token if followed by comment or ';'
13365 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
13369 my $valid_flag = $cvt;
13370 @{$rvertical_tightness_flags} = (
13372 $tightness{$token_next} == 2 ? 0 : 1,
13373 $type_sequence_to_go[$ibeg_next], $valid_flag,
13379 #--------------------------------------------------------------
13380 # Vertical Tightness Flags Section 1c:
13381 # Implement the Opening Token Right flag (Type 2)..
13382 # If requested, move an isolated trailing opening token to the end of
13383 # the previous line which ended in a comma. We could do this
13384 # in sub recombine_breakpoints but that would cause problems
13385 # with -lp formatting. The problem is that indentation will
13386 # quickly move far to the right in nested expressions. By
13387 # doing it after indentation has been set, we avoid changes
13388 # to the indentation. Actual movement of the token takes place
13389 # in sub valign_output_step_B.
13390 #--------------------------------------------------------------
13392 $opening_token_right{ $tokens_to_go[$ibeg_next] }
13394 # previous line is not opening
13395 # (use -sot to combine with it)
13396 && !$is_opening_token{$token_end}
13398 # previous line ended in one of these
13399 # (add other cases if necessary; '=>' and '.' are not necessary
13400 && !$block_type_to_go[$ibeg_next]
13402 # this is a line with just an opening token
13403 && ( $iend_next == $ibeg_next
13404 || $iend_next == $ibeg_next + 2
13405 && $types_to_go[$iend_next] eq '#' )
13407 # looks bad if we align vertically with the wrong container
13408 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
13411 my $valid_flag = 1;
13412 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
13413 @{$rvertical_tightness_flags} =
13414 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
13417 #--------------------------------------------------------------
13418 # Vertical Tightness Flags Section 1d:
13419 # Stacking of opening and closing tokens (Type 2)
13420 #--------------------------------------------------------------
13422 my $token_beg_next = $tokens_to_go[$ibeg_next];
13424 # patch to make something like 'qw(' behave like an opening paren
13426 if ( $types_to_go[$ibeg_next] eq 'q' ) {
13427 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
13428 $token_beg_next = $1;
13432 if ( $is_closing_token{$token_end}
13433 && $is_closing_token{$token_beg_next} )
13435 $stackable = $stack_closing_token{$token_beg_next}
13436 unless ( $block_type_to_go[$ibeg_next] )
13437 ; # shouldn't happen; just checking
13439 elsif ($is_opening_token{$token_end}
13440 && $is_opening_token{$token_beg_next} )
13442 $stackable = $stack_opening_token{$token_beg_next}
13443 unless ( $block_type_to_go[$ibeg_next] )
13444 ; # shouldn't happen; just checking
13449 my $is_semicolon_terminated;
13450 if ( $n + 1 == $n_last_line ) {
13451 my ( $terminal_type, $i_terminal ) = terminal_type(
13452 \@types_to_go, \@block_type_to_go,
13453 $ibeg_next, $iend_next
13455 $is_semicolon_terminated = $terminal_type eq ';'
13456 && $nesting_depth_to_go[$iend_next] <
13457 $nesting_depth_to_go[$ibeg_next];
13460 # this must be a line with just an opening token
13461 # or end in a semicolon
13463 $is_semicolon_terminated
13464 || ( $iend_next == $ibeg_next
13465 || $iend_next == $ibeg_next + 2
13466 && $types_to_go[$iend_next] eq '#' )
13469 my $valid_flag = 1;
13470 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
13471 @{$rvertical_tightness_flags} =
13472 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
13478 #--------------------------------------------------------------
13479 # Vertical Tightness Flags Section 2:
13480 # Handle type 3, opening block braces on last line of the batch
13481 # Check for a last line with isolated opening BLOCK curly
13482 #--------------------------------------------------------------
13483 elsif ($rOpts_block_brace_vertical_tightness
13485 && $types_to_go[$iend] eq '{'
13486 && $block_type_to_go[$iend] =~
13487 /$block_brace_vertical_tightness_pattern/o )
13489 @{$rvertical_tightness_flags} =
13490 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
13493 #--------------------------------------------------------------
13494 # Vertical Tightness Flags Section 3:
13495 # Handle type 4, a closing block brace on the last line of the batch Check
13496 # for a last line with isolated closing BLOCK curly
13497 #--------------------------------------------------------------
13498 elsif ($rOpts_stack_closing_block_brace
13500 && $block_type_to_go[$iend]
13501 && $types_to_go[$iend] eq '}' )
13503 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
13504 @{$rvertical_tightness_flags} =
13505 ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
13508 # pack in the sequence numbers of the ends of this line
13509 $rvertical_tightness_flags->[4] = get_seqno($ibeg);
13510 $rvertical_tightness_flags->[5] = get_seqno($iend);
13511 return $rvertical_tightness_flags;
13516 # get opening and closing sequence numbers of a token for the vertical
13517 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
13518 # to be treated somewhat like opening and closing tokens for stacking
13519 # tokens by the vertical aligner.
13521 my $seqno = $type_sequence_to_go[$ii];
13522 if ( $types_to_go[$ii] eq 'q' ) {
13525 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
13528 if ( !$ending_in_quote ) {
13529 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
13537 my %is_vertical_alignment_type;
13538 my %is_vertical_alignment_keyword;
13539 my %is_terminal_alignment_type;
13543 # Removed =~ from list to improve chances of alignment
13545 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
13546 { ? : => && || // ~~ !~~
13548 @is_vertical_alignment_type{@_} = (1) x scalar(@_);
13550 # only align these at end of line
13552 @is_terminal_alignment_type{@_} = (1) x scalar(@_);
13554 # eq and ne were removed from this list to improve alignment chances
13555 @_ = qw(if unless and or err for foreach while until);
13556 @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
13559 sub set_vertical_alignment_markers {
13561 # This routine takes the first step toward vertical alignment of the
13562 # lines of output text. It looks for certain tokens which can serve as
13563 # vertical alignment markers (such as an '=').
13565 # Method: We look at each token $i in this output batch and set
13566 # $matching_token_to_go[$i] equal to those tokens at which we would
13567 # accept vertical alignment.
13569 # nothing to do if we aren't allowed to change whitespace
13570 if ( !$rOpts_add_whitespace ) {
13571 for my $i ( 0 .. $max_index_to_go ) {
13572 $matching_token_to_go[$i] = '';
13577 my ( $ri_first, $ri_last ) = @_;
13579 # remember the index of last nonblank token before any sidecomment
13580 my $i_terminal = $max_index_to_go;
13581 if ( $types_to_go[$i_terminal] eq '#' ) {
13582 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
13583 if ( $i_terminal > 0 ) { --$i_terminal }
13587 # look at each line of this batch..
13588 my $last_vertical_alignment_before_index;
13589 my $vert_last_nonblank_type;
13590 my $vert_last_nonblank_token;
13591 my $vert_last_nonblank_block_type;
13592 my $max_line = @$ri_first - 1;
13593 my ( $i, $type, $token, $block_type, $alignment_type );
13594 my ( $ibeg, $iend, $line );
13596 foreach $line ( 0 .. $max_line ) {
13597 $ibeg = $$ri_first[$line];
13598 $iend = $$ri_last[$line];
13599 $last_vertical_alignment_before_index = -1;
13600 $vert_last_nonblank_type = '';
13601 $vert_last_nonblank_token = '';
13602 $vert_last_nonblank_block_type = '';
13604 # look at each token in this output line..
13605 foreach $i ( $ibeg .. $iend ) {
13606 $alignment_type = '';
13607 $type = $types_to_go[$i];
13608 $block_type = $block_type_to_go[$i];
13609 $token = $tokens_to_go[$i];
13611 # check for flag indicating that we should not align
13613 if ( $matching_token_to_go[$i] ) {
13614 $matching_token_to_go[$i] = '';
13618 #--------------------------------------------------------
13619 # First see if we want to align BEFORE this token
13620 #--------------------------------------------------------
13622 # The first possible token that we can align before
13623 # is index 2 because: 1) it doesn't normally make sense to
13624 # align before the first token and 2) the second
13625 # token must be a blank if we are to align before
13627 if ( $i < $ibeg + 2 ) { }
13629 # must follow a blank token
13630 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
13632 # align a side comment --
13633 elsif ( $type eq '#' ) {
13637 # it is a static side comment
13639 $rOpts->{'static-side-comments'}
13640 && $token =~ /$static_side_comment_pattern/o
13643 # or a closing side comment
13644 || ( $vert_last_nonblank_block_type
13646 /$closing_side_comment_prefix_pattern/o )
13649 $alignment_type = $type;
13650 } ## Example of a static side comment
13653 # otherwise, do not align two in a row to create a
13655 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
13657 # align before one of these keywords
13658 # (within a line, since $i>1)
13659 elsif ( $type eq 'k' ) {
13661 # /^(if|unless|and|or|eq|ne)$/
13662 if ( $is_vertical_alignment_keyword{$token} ) {
13663 $alignment_type = $token;
13667 # align before one of these types..
13668 # Note: add '.' after new vertical aligner is operational
13669 elsif ( $is_vertical_alignment_type{$type} ) {
13670 $alignment_type = $token;
13672 # Do not align a terminal token. Although it might
13673 # occasionally look ok to do this, this has been found to be
13674 # a good general rule. The main problems are:
13675 # (1) that the terminal token (such as an = or :) might get
13676 # moved far to the right where it is hard to see because
13677 # nothing follows it, and
13678 # (2) doing so may prevent other good alignments.
13679 # Current exceptions are && and ||
13680 if ( $i == $iend || $i >= $i_terminal ) {
13681 $alignment_type = ""
13682 unless ( $is_terminal_alignment_type{$type} );
13685 # Do not align leading ': (' or '. ('. This would prevent
13686 # alignment in something like the following:
13688 # ( $input_line_number < 10 ) ? " "
13689 # : ( $input_line_number < 100 ) ? " "
13693 # ( $case_matters ? $accessor : " lc($accessor) " )
13694 # . ( $yesno ? " eq " : " ne " )
13695 if ( $i == $ibeg + 2
13696 && $types_to_go[$ibeg] =~ /^[\.\:]$/
13697 && $types_to_go[ $i - 1 ] eq 'b' )
13699 $alignment_type = "";
13702 # For a paren after keyword, only align something like this:
13704 # elsif ( $b ) { &b }
13705 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
13706 $alignment_type = ""
13707 unless $vert_last_nonblank_token =~
13708 /^(if|unless|elsif)$/;
13711 # be sure the alignment tokens are unique
13712 # This didn't work well: reason not determined
13713 # if ($token ne $type) {$alignment_type .= $type}
13716 # NOTE: This is deactivated because it causes the previous
13717 # if/elsif alignment to fail
13718 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
13719 #{ $alignment_type = $type; }
13721 if ($alignment_type) {
13722 $last_vertical_alignment_before_index = $i;
13725 #--------------------------------------------------------
13726 # Next see if we want to align AFTER the previous nonblank
13727 #--------------------------------------------------------
13729 # We want to line up ',' and interior ';' tokens, with the added
13730 # space AFTER these tokens. (Note: interior ';' is included
13731 # because it may occur in short blocks).
13734 # we haven't already set it
13737 # and its not the first token of the line
13740 # and it follows a blank
13741 && $types_to_go[ $i - 1 ] eq 'b'
13743 # and previous token IS one of these:
13744 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
13746 # and it's NOT one of these
13747 && ( $type !~ /^[b\#\)\]\}]$/ )
13749 # then go ahead and align
13753 $alignment_type = $vert_last_nonblank_type;
13756 #--------------------------------------------------------
13757 # then store the value
13758 #--------------------------------------------------------
13759 $matching_token_to_go[$i] = $alignment_type;
13760 if ( $type ne 'b' ) {
13761 $vert_last_nonblank_type = $type;
13762 $vert_last_nonblank_token = $token;
13763 $vert_last_nonblank_block_type = $block_type;
13770 sub terminal_type {
13772 # returns type of last token on this line (terminal token), as follows:
13773 # returns # for a full-line comment
13774 # returns ' ' for a blank line
13775 # otherwise returns final token type
13777 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
13779 # check for full-line comment..
13780 if ( $$rtype[$ibeg] eq '#' ) {
13781 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
13785 # start at end and walk backwards..
13786 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
13788 # skip past any side comment and blanks
13789 next if ( $$rtype[$i] eq 'b' );
13790 next if ( $$rtype[$i] eq '#' );
13792 # found it..make sure it is a BLOCK termination,
13793 # but hide a terminal } after sort/grep/map because it is not
13794 # necessarily the end of the line. (terminal.t)
13795 my $terminal_type = $$rtype[$i];
13797 $terminal_type eq '}'
13798 && ( !$$rblock_type[$i]
13799 || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
13802 $terminal_type = 'b';
13804 return wantarray ? ( $terminal_type, $i ) : $terminal_type;
13808 return wantarray ? ( ' ', $ibeg ) : ' ';
13812 { # set_bond_strengths
13814 my %is_good_keyword_breakpoint;
13815 my %is_lt_gt_le_ge;
13817 my %binary_bond_strength;
13824 sub bias_table_key {
13825 my ( $type, $token ) = @_;
13826 my $bias_table_key = $type;
13827 if ( $type eq 'k' ) {
13828 $bias_table_key = $token;
13829 if ( $token eq 'err' ) { $bias_table_key = 'or' }
13831 return $bias_table_key;
13834 sub set_bond_strengths {
13838 @_ = qw(if unless while until for foreach);
13839 @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
13841 @_ = qw(lt gt le ge);
13842 @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
13844 # The decision about where to break a line depends upon a "bond
13845 # strength" between tokens. The LOWER the bond strength, the MORE
13846 # likely a break. A bond strength may be any value but to simplify
13847 # things there are several pre-defined strength levels:
13849 # NO_BREAK => 10000;
13850 # VERY_STRONG => 100;
13854 # VERY_WEAK => 0.55;
13856 # The strength values are based on trial-and-error, and need to be
13857 # tweaked occasionally to get desired results. Some comments:
13859 # 1. Only relative strengths are important. small differences
13860 # in strengths can make big formatting differences.
13861 # 2. Each indentation level adds one unit of bond strength.
13862 # 3. A value of NO_BREAK makes an unbreakable bond
13863 # 4. A value of VERY_WEAK is the strength of a ','
13864 # 5. Values below NOMINAL are considered ok break points.
13865 # 6. Values above NOMINAL are considered poor break points.
13867 # The bond strengths should roughly follow precedence order where
13868 # possible. If you make changes, please check the results very
13869 # carefully on a variety of scripts. Testing with the -extrude
13870 # options is particularly helpful in exercising all of the rules.
13872 # Wherever possible, bond strengths are defined in the following
13873 # tables. There are two main stages to setting bond strengths and
13874 # two types of tables:
13876 # The first stage involves looking at each token individually and
13877 # defining left and right bond strengths, according to if we want
13878 # to break to the left or right side, and how good a break point it
13879 # is. For example tokens like =, ||, && make good break points and
13880 # will have low strengths, but one might want to break on either
13881 # side to put them at the end of one line or beginning of the next.
13883 # The second stage involves looking at certain pairs of tokens and
13884 # defining a bond strength for that particular pair. This second
13885 # stage has priority.
13887 #---------------------------------------------------------------
13888 # Bond Strength BEGIN Section 1.
13889 # Set left and right bond strengths of individual tokens.
13890 #---------------------------------------------------------------
13892 # NOTE: NO_BREAK's set in this section first are HINTS which will
13893 # probably not be honored. Essential NO_BREAKS's should be set in
13894 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
13895 # of this subroutine.
13897 # Note that we are setting defaults in this section. The user
13898 # cannot change bond strengths but can cause the left and right
13899 # bond strengths of any token type to be swapped through the use of
13900 # the -wba and -wbb flags. In this way the user can determine if a
13901 # breakpoint token should appear at the end of one line or the
13902 # beginning of the next line.
13904 # The hash keys in this section are token types, plus the text of
13905 # certain keywords like 'or', 'and'.
13907 # no break around possible filehandle
13908 $left_bond_strength{'Z'} = NO_BREAK;
13909 $right_bond_strength{'Z'} = NO_BREAK;
13911 # never put a bare word on a new line:
13912 # example print (STDERR, "bla"); will fail with break after (
13913 $left_bond_strength{'w'} = NO_BREAK;
13915 # blanks always have infinite strength to force breaks after
13917 $right_bond_strength{'b'} = NO_BREAK;
13919 # try not to break on exponentation
13920 @_ = qw" ** .. ... <=> ";
13921 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13922 @right_bond_strength{@_} = (STRONG) x scalar(@_);
13924 # The comma-arrow has very low precedence but not a good break point
13925 $left_bond_strength{'=>'} = NO_BREAK;
13926 $right_bond_strength{'=>'} = NOMINAL;
13928 # ok to break after label
13929 $left_bond_strength{'J'} = NO_BREAK;
13930 $right_bond_strength{'J'} = NOMINAL;
13931 $left_bond_strength{'j'} = STRONG;
13932 $right_bond_strength{'j'} = STRONG;
13933 $left_bond_strength{'A'} = STRONG;
13934 $right_bond_strength{'A'} = STRONG;
13936 $left_bond_strength{'->'} = STRONG;
13937 $right_bond_strength{'->'} = VERY_STRONG;
13939 $left_bond_strength{'CORE::'} = NOMINAL;
13940 $right_bond_strength{'CORE::'} = NO_BREAK;
13942 # breaking AFTER modulus operator is ok:
13944 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13945 @right_bond_strength{@_} =
13946 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
13948 # Break AFTER math operators * and /
13950 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13951 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
13953 # Break AFTER weakest math operators + and -
13954 # Make them weaker than * but a bit stronger than '.'
13956 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13957 @right_bond_strength{@_} =
13958 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
13960 # breaking BEFORE these is just ok:
13962 @right_bond_strength{@_} = (STRONG) x scalar(@_);
13963 @left_bond_strength{@_} = (NOMINAL) x scalar(@_);
13965 # breaking before the string concatenation operator seems best
13966 # because it can be hard to see at the end of a line
13967 $right_bond_strength{'.'} = STRONG;
13968 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
13971 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13972 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
13974 # make these a little weaker than nominal so that they get
13975 # favored for end-of-line characters
13976 @_ = qw"!= == =~ !~ ~~ !~~";
13977 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13978 @right_bond_strength{@_} =
13979 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
13981 # break AFTER these
13982 @_ = qw" < > | & >= <=";
13983 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
13984 @right_bond_strength{@_} =
13985 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
13987 # breaking either before or after a quote is ok
13988 # but bias for breaking before a quote
13989 $left_bond_strength{'Q'} = NOMINAL;
13990 $right_bond_strength{'Q'} = NOMINAL + 0.02;
13991 $left_bond_strength{'q'} = NOMINAL;
13992 $right_bond_strength{'q'} = NOMINAL;
13994 # starting a line with a keyword is usually ok
13995 $left_bond_strength{'k'} = NOMINAL;
13997 # we usually want to bond a keyword strongly to what immediately
13998 # follows, rather than leaving it stranded at the end of a line
13999 $right_bond_strength{'k'} = STRONG;
14001 $left_bond_strength{'G'} = NOMINAL;
14002 $right_bond_strength{'G'} = STRONG;
14004 # assignment operators
14006 = **= += *= &= <<= &&=
14007 -= /= |= >>= ||= //=
14012 # Default is to break AFTER various assignment operators
14013 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14014 @right_bond_strength{@_} =
14015 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
14017 # Default is to break BEFORE '&&' and '||' and '//'
14018 # set strength of '||' to same as '=' so that chains like
14019 # $a = $b || $c || $d will break before the first '||'
14020 $right_bond_strength{'||'} = NOMINAL;
14021 $left_bond_strength{'||'} = $right_bond_strength{'='};
14023 # same thing for '//'
14024 $right_bond_strength{'//'} = NOMINAL;
14025 $left_bond_strength{'//'} = $right_bond_strength{'='};
14027 # set strength of && a little higher than ||
14028 $right_bond_strength{'&&'} = NOMINAL;
14029 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
14031 $left_bond_strength{';'} = VERY_STRONG;
14032 $right_bond_strength{';'} = VERY_WEAK;
14033 $left_bond_strength{'f'} = VERY_STRONG;
14035 # make right strength of for ';' a little less than '='
14036 # to make for contents break after the ';' to avoid this:
14037 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
14038 # $number_of_fields )
14039 # and make it weaker than ',' and 'and' too
14040 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
14042 # The strengths of ?/: should be somewhere between
14043 # an '=' and a quote (NOMINAL),
14044 # make strength of ':' slightly less than '?' to help
14045 # break long chains of ? : after the colons
14046 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
14047 $right_bond_strength{':'} = NO_BREAK;
14048 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
14049 $right_bond_strength{'?'} = NO_BREAK;
14051 $left_bond_strength{','} = VERY_STRONG;
14052 $right_bond_strength{','} = VERY_WEAK;
14054 # remaining digraphs and trigraphs not defined above
14055 @_ = qw( :: <> ++ --);
14056 @left_bond_strength{@_} = (WEAK) x scalar(@_);
14057 @right_bond_strength{@_} = (STRONG) x scalar(@_);
14059 # Set bond strengths of certain keywords
14060 # make 'or', 'err', 'and' slightly weaker than a ','
14061 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
14062 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
14063 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
14064 $left_bond_strength{'xor'} = NOMINAL;
14065 $right_bond_strength{'and'} = NOMINAL;
14066 $right_bond_strength{'or'} = NOMINAL;
14067 $right_bond_strength{'err'} = NOMINAL;
14068 $right_bond_strength{'xor'} = STRONG;
14070 #---------------------------------------------------------------
14071 # Bond Strength BEGIN Section 2.
14072 # Set binary rules for bond strengths between certain token types.
14073 #---------------------------------------------------------------
14075 # We have a little problem making tables which apply to the
14076 # container tokens. Here is a list of container tokens and
14079 # type tokens // meaning
14080 # { {, [, ( // indent
14081 # } }, ], ) // outdent
14082 # [ [ // left non-structural [ (enclosing an array index)
14083 # ] ] // right non-structural square bracket
14084 # ( ( // left non-structural paren
14085 # ) ) // right non-structural paren
14086 # L { // left non-structural curly brace (enclosing a key)
14087 # R } // right non-structural curly brace
14089 # Some rules apply to token types and some to just the token
14090 # itself. We solve the problem by combining type and token into a
14091 # new hash key for the container types.
14093 # If a rule applies to a token 'type' then we need to make rules
14094 # for each of these 'type.token' combinations:
14105 # If a rule applies to a token then we need to make rules for
14106 # these 'type.token' combinations:
14115 # allow long lines before final { in an if statement, as in:
14120 # Otherwise, the line before the { tends to be too short.
14122 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
14123 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
14125 # break on something like '} (', but keep this stronger than a ','
14126 # example is in 'howe.pl'
14127 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
14128 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
14130 # keep matrix and hash indices together
14131 # but make them a little below STRONG to allow breaking open
14132 # something like {'some-word'}{'some-very-long-word'} at the }{
14134 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
14135 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
14136 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
14137 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
14139 # increase strength to the point where a break in the following
14140 # will be after the opening paren rather than at the arrow:
14142 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
14144 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14145 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14146 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14147 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14148 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14149 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14151 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
14152 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
14153 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
14154 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
14156 #---------------------------------------------------------------
14157 # Binary NO_BREAK rules
14158 #---------------------------------------------------------------
14160 # use strict requires that bare word and => not be separated
14161 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
14162 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
14164 # Never break between a bareword and a following paren because
14165 # perl may give an error. For example, if a break is placed
14166 # between 'to_filehandle' and its '(' the following line will
14167 # give a syntax error [Carp.pm]: my( $no) =fileno(
14168 # to_filehandle( $in)) ;
14169 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
14170 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
14171 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
14172 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
14174 # use strict requires that bare word within braces not start new
14176 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
14178 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
14180 # use strict requires that bare word and => not be separated
14181 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
14183 # use strict does not allow separating type info from trailing { }
14184 # testfile is readmail.pl
14185 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
14186 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
14188 # As a defensive measure, do not break between a '(' and a
14189 # filehandle. In some cases, this can cause an error. For
14190 # example, the following program works:
14197 # But this program fails:
14205 # This is normally only a problem with the 'extrude' option
14206 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
14207 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
14209 # never break between sub name and opening paren
14210 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
14211 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
14213 # keep '}' together with ';'
14214 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
14216 # Breaking before a ++ can cause perl to guess wrong. For
14217 # example the following line will cause a syntax error
14218 # with -extrude if we break between '$i' and '++' [fixstyle2]
14219 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
14220 $nobreak_lhs{'++'} = NO_BREAK;
14222 # Do not break before a possible file handle
14223 $nobreak_lhs{'Z'} = NO_BREAK;
14225 # use strict hates bare words on any new line. For
14226 # example, a break before the underscore here provokes the
14227 # wrath of use strict:
14228 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
14229 $nobreak_rhs{'F'} = NO_BREAK;
14230 $nobreak_rhs{'CORE::'} = NO_BREAK;
14232 #---------------------------------------------------------------
14233 # Bond Strength BEGIN Section 3.
14234 # Define tables and values for applying a small bias to the above
14236 #---------------------------------------------------------------
14237 # Adding a small 'bias' to strengths is a simple way to make a line
14238 # break at the first of a sequence of identical terms. For
14239 # example, to force long string of conditional operators to break
14240 # with each line ending in a ':', we can add a small number to the
14241 # bond strength of each ':' (colon.t)
14242 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
14243 $delta_bias = 0.0001; # a very small strength level
14247 # patch-its always ok to break at end of line
14248 $nobreak_to_go[$max_index_to_go] = 0;
14250 # we start a new set of bias values for each line
14252 @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
14253 my $code_bias = -.01; # bias for closing block braces
14258 my $last_nonblank_type = $type;
14259 my $last_nonblank_token = $token;
14260 my $list_str = $left_bond_strength{'?'};
14262 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
14263 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
14266 # main loop to compute bond strengths between each pair of tokens
14267 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
14268 $last_type = $type;
14269 if ( $type ne 'b' ) {
14270 $last_nonblank_type = $type;
14271 $last_nonblank_token = $token;
14273 $type = $types_to_go[$i];
14275 # strength on both sides of a blank is the same
14276 if ( $type eq 'b' && $last_type ne 'b' ) {
14277 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
14281 $token = $tokens_to_go[$i];
14282 $block_type = $block_type_to_go[$i];
14284 $next_type = $types_to_go[$i_next];
14285 $next_token = $tokens_to_go[$i_next];
14286 $total_nesting_depth = $nesting_depth_to_go[$i_next];
14287 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
14288 $next_nonblank_type = $types_to_go[$i_next_nonblank];
14289 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14291 # We are computing the strength of the bond between the current
14292 # token and the NEXT token.
14294 #---------------------------------------------------------------
14295 # Bond Strength Section 1:
14296 # First Approximation.
14297 # Use minimum of individual left and right tabulated bond
14299 #---------------------------------------------------------------
14300 my $bsr = $right_bond_strength{$type};
14301 my $bsl = $left_bond_strength{$next_nonblank_type};
14303 # define right bond strengths of certain keywords
14304 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
14305 $bsr = $right_bond_strength{$token};
14307 elsif ( $token eq 'ne' or $token eq 'eq' ) {
14311 # set terminal bond strength to the nominal value
14312 # this will cause good preceding breaks to be retained
14313 if ( $i_next_nonblank > $max_index_to_go ) {
14317 # define right bond strengths of certain keywords
14318 if ( $next_nonblank_type eq 'k'
14319 && defined( $left_bond_strength{$next_nonblank_token} ) )
14321 $bsl = $left_bond_strength{$next_nonblank_token};
14323 elsif ($next_nonblank_token eq 'ne'
14324 or $next_nonblank_token eq 'eq' )
14328 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
14329 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
14332 # Use the minimum of the left and right strengths. Note: it might
14333 # seem that we would want to keep a NO_BREAK if either token has
14334 # this value. This didn't work, for example because in an arrow
14335 # list, it prevents the comma from separating from the following
14336 # bare word (which is probably quoted by its arrow). So necessary
14337 # NO_BREAK's have to be handled as special cases in the final
14339 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
14340 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
14341 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
14342 my $bond_str_1 = $bond_str;
14344 #---------------------------------------------------------------
14345 # Bond Strength Section 2:
14346 # Apply hardwired rules..
14347 #---------------------------------------------------------------
14349 # Patch to put terminal or clauses on a new line: Weaken the bond
14350 # at an || followed by die or similar keyword to make the terminal
14351 # or clause fall on a new line, like this:
14353 # my $class = shift
14354 # || die "Cannot add broadcast: No class identifier found";
14356 # Otherwise the break will be at the previous '=' since the || and
14357 # = have the same starting strength and the or is biased, like
14361 # shift || die "Cannot add broadcast: No class identifier found";
14363 # In any case if the user places a break at either the = or the ||
14364 # it should remain there.
14365 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
14366 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
14367 if ( $want_break_before{$token} && $i > 0 ) {
14368 $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
14371 $bond_str -= $delta_bias;
14376 # good to break after end of code blocks
14377 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
14379 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
14380 $code_bias += $delta_bias;
14383 if ( $type eq 'k' ) {
14385 # allow certain control keywords to stand out
14386 if ( $next_nonblank_type eq 'k'
14387 && $is_last_next_redo_return{$token} )
14389 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
14392 # Don't break after keyword my. This is a quick fix for a
14393 # rare problem with perl. An example is this line from file
14396 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
14397 # $this->{'question'} ) )
14399 if ( $token eq 'my' ) {
14400 $bond_str = NO_BREAK;
14405 # good to break before 'if', 'unless', etc
14406 if ( $is_if_brace_follower{$next_nonblank_token} ) {
14407 $bond_str = VERY_WEAK;
14410 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
14412 # FIXME: needs more testing
14413 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
14414 $bond_str = $list_str if ( $bond_str > $list_str );
14417 # keywords like 'unless', 'if', etc, within statements
14419 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
14420 $bond_str = VERY_WEAK / 1.05;
14424 # try not to break before a comma-arrow
14425 elsif ( $next_nonblank_type eq '=>' ) {
14426 if ( $bond_str < STRONG ) { $bond_str = STRONG }
14429 #---------------------------------------------------------------
14430 # Additional hardwired NOBREAK rules
14431 #---------------------------------------------------------------
14433 # map1.t -- correct for a quirk in perl
14435 && $next_nonblank_type eq 'i'
14436 && $last_nonblank_type eq 'k'
14437 && $is_sort_map_grep{$last_nonblank_token} )
14439 # /^(sort|map|grep)$/ )
14441 $bond_str = NO_BREAK;
14444 # extrude.t: do not break before paren at:
14446 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
14447 $bond_str = NO_BREAK;
14450 # in older version of perl, use strict can cause problems with
14451 # breaks before bare words following opening parens. For example,
14452 # this will fail under older versions if a break is made between
14453 # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
14454 # command"); close MAIL;
14455 if ( $type eq '{' ) {
14457 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
14459 # but it's fine to break if the word is followed by a '=>'
14460 # or if it is obviously a sub call
14461 my $i_next_next_nonblank = $i_next_nonblank + 1;
14462 my $next_next_type = $types_to_go[$i_next_next_nonblank];
14463 if ( $next_next_type eq 'b'
14464 && $i_next_nonblank < $max_index_to_go )
14466 $i_next_next_nonblank++;
14467 $next_next_type = $types_to_go[$i_next_next_nonblank];
14470 # We'll check for an old breakpoint and keep a leading
14471 # bareword if it was that way in the input file.
14472 # Presumably it was ok that way. For example, the
14473 # following would remain unchanged:
14476 # January, February, March, April,
14477 # May, June, July, August,
14478 # September, October, November, December,
14481 # This should be sufficient:
14483 !$old_breakpoint_to_go[$i]
14484 && ( $next_next_type eq ','
14485 || $next_next_type eq '}' )
14488 $bond_str = NO_BREAK;
14493 # Do not break between a possible filehandle and a ? or / and do
14494 # not introduce a break after it if there is no blank
14496 elsif ( $type eq 'Z' ) {
14501 # if there is no blank and we do not want one. Examples:
14502 # print $x++ # do not break after $x
14503 # print HTML"HELLO" # break ok after HTML
14506 && defined( $want_left_space{$next_type} )
14507 && $want_left_space{$next_type} == WS_NO
14510 # or we might be followed by the start of a quote
14511 || $next_nonblank_type =~ /^[\/\?]$/
14514 $bond_str = NO_BREAK;
14518 # Breaking before a ? before a quote can cause trouble if
14519 # they are not separated by a blank.
14520 # Example: a syntax error occurs if you break before the ? here
14521 # my$logic=join$all?' && ':' || ',@regexps;
14522 # From: Professional_Perl_Programming_Code/multifind.pl
14523 if ( $next_nonblank_type eq '?' ) {
14524 $bond_str = NO_BREAK
14525 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
14528 # Breaking before a . followed by a number
14529 # can cause trouble if there is no intervening space
14530 # Example: a syntax error occurs if you break before the .2 here
14531 # $str .= pack($endian.2, ensurrogate($ord));
14532 # From: perl58/Unicode.pm
14533 elsif ( $next_nonblank_type eq '.' ) {
14534 $bond_str = NO_BREAK
14535 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
14538 # patch to put cuddled elses back together when on multiple
14539 # lines, as in: } \n else \n { \n
14540 if ($rOpts_cuddled_else) {
14542 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
14543 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
14545 $bond_str = NO_BREAK;
14548 my $bond_str_2 = $bond_str;
14550 #---------------------------------------------------------------
14551 # End of hardwired rules
14552 #---------------------------------------------------------------
14554 #---------------------------------------------------------------
14555 # Bond Strength Section 3:
14556 # Apply table rules. These have priority over the above
14558 #---------------------------------------------------------------
14560 my $tabulated_bond_str;
14562 my $rtype = $next_nonblank_type;
14563 if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
14564 if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
14565 $rtype = $next_nonblank_type . $next_nonblank_token;
14568 if ( $binary_bond_strength{$ltype}{$rtype} ) {
14569 $bond_str = $binary_bond_strength{$ltype}{$rtype};
14570 $tabulated_bond_str = $bond_str;
14573 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
14574 $bond_str = NO_BREAK;
14575 $tabulated_bond_str = $bond_str;
14577 my $bond_str_3 = $bond_str;
14579 # If the hardwired rules conflict with the tabulated bond
14580 # strength then there is an inconsistency that should be fixed
14581 FORMATTER_DEBUG_FLAG_BOND_TABLES
14582 && $tabulated_bond_str
14584 && $bond_str_1 != $bond_str_2
14585 && $bond_str_2 != $tabulated_bond_str
14588 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
14591 #-----------------------------------------------------------------
14592 # Bond Strength Section 4:
14593 # Modify strengths of certain tokens which often occur in sequence
14594 # by adding a small bias to each one in turn so that the breaks
14595 # occur from left to right.
14597 # Note that we only changing strengths by small amounts here,
14598 # and usually increasing, so we should not be altering any NO_BREAKs.
14599 # Other routines which check for NO_BREAKs will use a tolerance
14600 # of one to avoid any problem.
14601 #-----------------------------------------------------------------
14603 # The bias tables use special keys
14604 my $left_key = bias_table_key( $type, $token );
14606 bias_table_key( $next_nonblank_type, $next_nonblank_token );
14608 # add any bias set by sub scan_list at old comma break points.
14609 if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
14612 elsif ( defined( $bias{$left_key} ) ) {
14613 if ( !$want_break_before{$left_key} ) {
14614 $bias{$left_key} += $delta_bias;
14615 $bond_str += $bias{$left_key};
14620 if ( defined( $bias{$right_key} ) ) {
14621 if ( $want_break_before{$right_key} ) {
14623 # for leading '.' align all but 'short' quotes; the idea
14624 # is to not place something like "\n" on a single line.
14625 if ( $right_key eq '.' ) {
14627 $last_nonblank_type eq '.'
14630 $rOpts_short_concatenation_item_length )
14631 && ( $token !~ /^[\)\]\}]$/ )
14634 $bias{$right_key} += $delta_bias;
14638 $bias{$right_key} += $delta_bias;
14640 $bond_str += $bias{$right_key};
14643 my $bond_str_4 = $bond_str;
14645 #---------------------------------------------------------------
14646 # Bond Strength Section 5:
14647 # Fifth Approximation.
14648 # Take nesting depth into account by adding the nesting depth
14649 # to the bond strength.
14650 #---------------------------------------------------------------
14653 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
14654 if ( $total_nesting_depth > 0 ) {
14655 $strength = $bond_str + $total_nesting_depth;
14658 $strength = $bond_str;
14662 $strength = NO_BREAK;
14665 # always break after side comment
14666 if ( $type eq '#' ) { $strength = 0 }
14668 $bond_strength_to_go[$i] = $strength;
14670 FORMATTER_DEBUG_FLAG_BOND && do {
14671 my $str = substr( $token, 0, 15 );
14672 $str .= ' ' x ( 16 - length($str) );
14674 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
14677 } ## end sub set_bond_strengths
14680 sub pad_array_to_go {
14682 # to simplify coding in scan_list and set_bond_strengths, it helps
14683 # to create some extra blank tokens at the end of the arrays
14684 $tokens_to_go[ $max_index_to_go + 1 ] = '';
14685 $tokens_to_go[ $max_index_to_go + 2 ] = '';
14686 $types_to_go[ $max_index_to_go + 1 ] = 'b';
14687 $types_to_go[ $max_index_to_go + 2 ] = 'b';
14688 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
14689 $nesting_depth_to_go[$max_index_to_go];
14692 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
14693 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
14695 # shouldn't happen:
14696 unless ( get_saw_brace_error() ) {
14698 "Program bug in scan_list: hit nesting error which should have been caught\n"
14700 report_definite_bug();
14704 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
14709 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
14710 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
14714 { # begin scan_list
14717 $block_type, $current_depth,
14719 $i_last_nonblank_token, $last_colon_sequence_number,
14720 $last_nonblank_token, $last_nonblank_type,
14721 $last_nonblank_block_type, $last_old_breakpoint_count,
14722 $minimum_depth, $next_nonblank_block_type,
14723 $next_nonblank_token, $next_nonblank_type,
14724 $old_breakpoint_count, $starting_breakpoint_count,
14725 $starting_depth, $token,
14726 $type, $type_sequence,
14730 @breakpoint_stack, @breakpoint_undo_stack,
14731 @comma_index, @container_type,
14732 @identifier_count_stack, @index_before_arrow,
14733 @interrupted_list, @item_count_stack,
14734 @last_comma_index, @last_dot_index,
14735 @last_nonblank_type, @old_breakpoint_count_stack,
14736 @opening_structure_index_stack, @rfor_semicolon_list,
14737 @has_old_logical_breakpoints, @rand_or_list,
14741 # routine to define essential variables when we go 'up' to
14743 sub check_for_new_minimum_depth {
14745 if ( $depth < $minimum_depth ) {
14747 $minimum_depth = $depth;
14749 # these arrays need not retain values between calls
14750 $breakpoint_stack[$depth] = $starting_breakpoint_count;
14751 $container_type[$depth] = "";
14752 $identifier_count_stack[$depth] = 0;
14753 $index_before_arrow[$depth] = -1;
14754 $interrupted_list[$depth] = 1;
14755 $item_count_stack[$depth] = 0;
14756 $last_nonblank_type[$depth] = "";
14757 $opening_structure_index_stack[$depth] = -1;
14759 $breakpoint_undo_stack[$depth] = undef;
14760 $comma_index[$depth] = undef;
14761 $last_comma_index[$depth] = undef;
14762 $last_dot_index[$depth] = undef;
14763 $old_breakpoint_count_stack[$depth] = undef;
14764 $has_old_logical_breakpoints[$depth] = 0;
14765 $rand_or_list[$depth] = [];
14766 $rfor_semicolon_list[$depth] = [];
14767 $i_equals[$depth] = -1;
14769 # these arrays must retain values between calls
14770 if ( !defined( $has_broken_sublist[$depth] ) ) {
14771 $dont_align[$depth] = 0;
14772 $has_broken_sublist[$depth] = 0;
14773 $want_comma_break[$depth] = 0;
14778 # routine to decide which commas to break at within a container;
14780 # $bp_count = number of comma breakpoints set
14781 # $do_not_break_apart = a flag indicating if container need not
14783 sub set_comma_breakpoints {
14787 my $do_not_break_apart = 0;
14790 if ( $item_count_stack[$dd] ) {
14792 # handle commas not in containers...
14793 if ( $dont_align[$dd] ) {
14794 do_uncontained_comma_breaks($dd);
14797 # handle commas within containers...
14799 my $fbc = $forced_breakpoint_count;
14801 # always open comma lists not preceded by keywords,
14802 # barewords, identifiers (that is, anything that doesn't
14803 # look like a function call)
14804 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
14806 set_comma_breakpoints_do(
14808 $opening_structure_index_stack[$dd],
14810 $item_count_stack[$dd],
14811 $identifier_count_stack[$dd],
14813 $next_nonblank_type,
14814 $container_type[$dd],
14815 $interrupted_list[$dd],
14816 \$do_not_break_apart,
14819 $bp_count = $forced_breakpoint_count - $fbc;
14820 $do_not_break_apart = 0 if $must_break_open;
14823 return ( $bp_count, $do_not_break_apart );
14826 sub do_uncontained_comma_breaks {
14828 # Handle commas not in containers...
14829 # This is a catch-all routine for commas that we
14830 # don't know what to do with because the don't fall
14831 # within containers. We will bias the bond strength
14832 # to break at commas which ended lines in the input
14833 # file. This usually works better than just trying
14834 # to put as many items on a line as possible. A
14835 # downside is that if the input file is garbage it
14836 # won't work very well. However, the user can always
14837 # prevent following the old breakpoints with the
14841 my $old_comma_break_count = 0;
14842 foreach my $ii ( @{ $comma_index[$dd] } ) {
14843 if ( $old_breakpoint_to_go[$ii] ) {
14844 $old_comma_break_count++;
14845 $bond_strength_to_go[$ii] = $bias;
14847 # reduce bias magnitude to force breaks in order
14852 # Also put a break before the first comma if
14853 # (1) there was a break there in the input, and
14854 # (2) there was exactly one old break before the first comma break
14855 # (3) OLD: there are multiple old comma breaks
14856 # (3) NEW: there are one or more old comma breaks (see return example)
14858 # For example, we will follow the user and break after
14859 # 'print' in this snippet:
14861 # "conformability (Not the same dimension)\n",
14862 # "\t", $have, " is ", text_unit($hu), "\n",
14863 # "\t", $want, " is ", text_unit($wu), "\n",
14866 # Another example, just one comma, where we will break after
14869 # $x * cos($a) - $y * sin($a),
14870 # $x * sin($a) + $y * cos($a);
14872 # Breaking a print statement:
14874 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
14875 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
14877 # But we will not force a break after the opening paren here
14878 # (causes a blinker):
14879 # $heap->{stream}->set_output_filter(
14880 # poe::filter::reference->new('myotherfreezer') ),
14883 my $i_first_comma = $comma_index[$dd]->[0];
14884 if ( $old_breakpoint_to_go[$i_first_comma] ) {
14885 my $level_comma = $levels_to_go[$i_first_comma];
14888 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
14889 if ( $old_breakpoint_to_go[$ii] ) {
14891 last if ( $obp_count > 1 );
14893 if ( $levels_to_go[$ii] == $level_comma );
14897 # Changed rule from multiple old commas to just one here:
14898 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
14900 # Do not to break before an opening token because
14901 # it can lead to "blinkers".
14902 my $ibreakm = $ibreak;
14903 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
14904 if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
14906 set_forced_breakpoint($ibreak);
14912 my %is_logical_container;
14915 @_ = qw# if elsif unless while and or err not && | || ? : ! #;
14916 @is_logical_container{@_} = (1) x scalar(@_);
14919 sub set_for_semicolon_breakpoints {
14921 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
14922 set_forced_breakpoint($_);
14926 sub set_logical_breakpoints {
14929 $item_count_stack[$dd] == 0
14930 && $is_logical_container{ $container_type[$dd] }
14932 || $has_old_logical_breakpoints[$dd]
14936 # Look for breaks in this order:
14939 foreach my $i ( 0 .. 3 ) {
14940 if ( $rand_or_list[$dd][$i] ) {
14941 foreach ( @{ $rand_or_list[$dd][$i] } ) {
14942 set_forced_breakpoint($_);
14945 # break at any 'if' and 'unless' too
14946 foreach ( @{ $rand_or_list[$dd][4] } ) {
14947 set_forced_breakpoint($_);
14949 $rand_or_list[$dd] = [];
14956 sub is_unbreakable_container {
14958 # never break a container of one of these types
14959 # because bad things can happen (map1.t)
14961 $is_sort_map_grep{ $container_type[$dd] };
14966 # This routine is responsible for setting line breaks for all lists,
14967 # so that hierarchical structure can be displayed and so that list
14968 # items can be vertically aligned. The output of this routine is
14969 # stored in the array @forced_breakpoint_to_go, which is used to set
14970 # final breakpoints.
14972 $starting_depth = $nesting_depth_to_go[0];
14975 $current_depth = $starting_depth;
14977 $last_colon_sequence_number = -1;
14978 $last_nonblank_token = ';';
14979 $last_nonblank_type = ';';
14980 $last_nonblank_block_type = ' ';
14981 $last_old_breakpoint_count = 0;
14982 $minimum_depth = $current_depth + 1; # forces update in check below
14983 $old_breakpoint_count = 0;
14984 $starting_breakpoint_count = $forced_breakpoint_count;
14987 $type_sequence = '';
14989 my $total_depth_variation = 0;
14990 my $i_old_assignment_break;
14991 my $depth_last = $starting_depth;
14993 check_for_new_minimum_depth($current_depth);
14995 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
14996 my $want_previous_breakpoint = -1;
14998 my $saw_good_breakpoint;
14999 my $i_line_end = -1;
15000 my $i_line_start = -1;
15002 # loop over all tokens in this batch
15003 while ( ++$i <= $max_index_to_go ) {
15004 if ( $type ne 'b' ) {
15005 $i_last_nonblank_token = $i - 1;
15006 $last_nonblank_type = $type;
15007 $last_nonblank_token = $token;
15008 $last_nonblank_block_type = $block_type;
15009 } ## end if ( $type ne 'b' )
15010 $type = $types_to_go[$i];
15011 $block_type = $block_type_to_go[$i];
15012 $token = $tokens_to_go[$i];
15013 $type_sequence = $type_sequence_to_go[$i];
15014 my $next_type = $types_to_go[ $i + 1 ];
15015 my $next_token = $tokens_to_go[ $i + 1 ];
15016 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
15017 $next_nonblank_type = $types_to_go[$i_next_nonblank];
15018 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15019 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
15021 # set break if flag was set
15022 if ( $want_previous_breakpoint >= 0 ) {
15023 set_forced_breakpoint($want_previous_breakpoint);
15024 $want_previous_breakpoint = -1;
15027 $last_old_breakpoint_count = $old_breakpoint_count;
15028 if ( $old_breakpoint_to_go[$i] ) {
15030 $i_line_start = $i_next_nonblank;
15032 $old_breakpoint_count++;
15034 # Break before certain keywords if user broke there and
15035 # this is a 'safe' break point. The idea is to retain
15036 # any preferred breaks for sequential list operations,
15037 # like a schwartzian transform.
15038 if ($rOpts_break_at_old_keyword_breakpoints) {
15040 $next_nonblank_type eq 'k'
15041 && $is_keyword_returning_list{$next_nonblank_token}
15042 && ( $type =~ /^[=\)\]\}Riw]$/
15044 && $is_keyword_returning_list{$token} )
15048 # we actually have to set this break next time through
15049 # the loop because if we are at a closing token (such
15050 # as '}') which forms a one-line block, this break might
15052 $want_previous_breakpoint = $i;
15053 } ## end if ( $next_nonblank_type...)
15054 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
15056 # Break before attributes if user broke there
15057 if ($rOpts_break_at_old_attribute_breakpoints) {
15058 if ( $next_nonblank_type eq 'A' ) {
15059 $want_previous_breakpoint = $i;
15063 # remember an = break as possible good break point
15064 if ( $is_assignment{$type} ) {
15065 $i_old_assignment_break = $i;
15067 elsif ( $is_assignment{$next_nonblank_type} ) {
15068 $i_old_assignment_break = $i_next_nonblank;
15070 } ## end if ( $old_breakpoint_to_go...)
15071 next if ( $type eq 'b' );
15072 $depth = $nesting_depth_to_go[ $i + 1 ];
15074 $total_depth_variation += abs( $depth - $depth_last );
15075 $depth_last = $depth;
15077 # safety check - be sure we always break after a comment
15078 # Shouldn't happen .. an error here probably means that the
15079 # nobreak flag did not get turned off correctly during
15081 if ( $type eq '#' ) {
15082 if ( $i != $max_index_to_go ) {
15084 "Non-fatal program bug: backup logic needed to break after a comment\n"
15086 report_definite_bug();
15087 $nobreak_to_go[$i] = 0;
15088 set_forced_breakpoint($i);
15089 } ## end if ( $i != $max_index_to_go)
15090 } ## end if ( $type eq '#' )
15092 # Force breakpoints at certain tokens in long lines.
15093 # Note that such breakpoints will be undone later if these tokens
15094 # are fully contained within parens on a line.
15097 # break before a keyword within a line
15101 # if one of these keywords:
15102 && $token =~ /^(if|unless|while|until|for)$/
15104 # but do not break at something like '1 while'
15105 && ( $last_nonblank_type ne 'n' || $i > 2 )
15107 # and let keywords follow a closing 'do' brace
15108 && $last_nonblank_block_type ne 'do'
15113 # or container is broken (by side-comment, etc)
15114 || ( $next_nonblank_token eq '('
15115 && $mate_index_to_go[$i_next_nonblank] < $i )
15119 set_forced_breakpoint( $i - 1 );
15120 } ## end if ( $type eq 'k' && $i...)
15122 # remember locations of '||' and '&&' for possible breaks if we
15123 # decide this is a long logical expression.
15124 if ( $type eq '||' ) {
15125 push @{ $rand_or_list[$depth][2] }, $i;
15126 ++$has_old_logical_breakpoints[$depth]
15127 if ( ( $i == $i_line_start || $i == $i_line_end )
15128 && $rOpts_break_at_old_logical_breakpoints );
15129 } ## end if ( $type eq '||' )
15130 elsif ( $type eq '&&' ) {
15131 push @{ $rand_or_list[$depth][3] }, $i;
15132 ++$has_old_logical_breakpoints[$depth]
15133 if ( ( $i == $i_line_start || $i == $i_line_end )
15134 && $rOpts_break_at_old_logical_breakpoints );
15135 } ## end elsif ( $type eq '&&' )
15136 elsif ( $type eq 'f' ) {
15137 push @{ $rfor_semicolon_list[$depth] }, $i;
15139 elsif ( $type eq 'k' ) {
15140 if ( $token eq 'and' ) {
15141 push @{ $rand_or_list[$depth][1] }, $i;
15142 ++$has_old_logical_breakpoints[$depth]
15143 if ( ( $i == $i_line_start || $i == $i_line_end )
15144 && $rOpts_break_at_old_logical_breakpoints );
15145 } ## end if ( $token eq 'and' )
15147 # break immediately at 'or's which are probably not in a logical
15148 # block -- but we will break in logical breaks below so that
15149 # they do not add to the forced_breakpoint_count
15150 elsif ( $token eq 'or' ) {
15151 push @{ $rand_or_list[$depth][0] }, $i;
15152 ++$has_old_logical_breakpoints[$depth]
15153 if ( ( $i == $i_line_start || $i == $i_line_end )
15154 && $rOpts_break_at_old_logical_breakpoints );
15155 if ( $is_logical_container{ $container_type[$depth] } ) {
15158 if ($is_long_line) { set_forced_breakpoint($i) }
15159 elsif ( ( $i == $i_line_start || $i == $i_line_end )
15160 && $rOpts_break_at_old_logical_breakpoints )
15162 $saw_good_breakpoint = 1;
15164 } ## end else [ if ( $is_logical_container...)]
15165 } ## end elsif ( $token eq 'or' )
15166 elsif ( $token eq 'if' || $token eq 'unless' ) {
15167 push @{ $rand_or_list[$depth][4] }, $i;
15168 if ( ( $i == $i_line_start || $i == $i_line_end )
15169 && $rOpts_break_at_old_logical_breakpoints )
15171 set_forced_breakpoint($i);
15173 } ## end elsif ( $token eq 'if' ||...)
15174 } ## end elsif ( $type eq 'k' )
15175 elsif ( $is_assignment{$type} ) {
15176 $i_equals[$depth] = $i;
15179 if ($type_sequence) {
15181 # handle any postponed closing breakpoints
15182 if ( $token =~ /^[\)\]\}\:]$/ ) {
15183 if ( $type eq ':' ) {
15184 $last_colon_sequence_number = $type_sequence;
15186 # retain break at a ':' line break
15187 if ( ( $i == $i_line_start || $i == $i_line_end )
15188 && $rOpts_break_at_old_ternary_breakpoints )
15191 set_forced_breakpoint($i);
15193 # break at previous '='
15194 if ( $i_equals[$depth] > 0 ) {
15195 set_forced_breakpoint( $i_equals[$depth] );
15196 $i_equals[$depth] = -1;
15198 } ## end if ( ( $i == $i_line_start...))
15199 } ## end if ( $type eq ':' )
15200 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
15201 my $inc = ( $type eq ':' ) ? 0 : 1;
15202 set_forced_breakpoint( $i - $inc );
15203 delete $postponed_breakpoint{$type_sequence};
15205 } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
15207 # set breaks at ?/: if they will get separated (and are
15208 # not a ?/: chain), or if the '?' is at the end of the
15210 elsif ( $token eq '?' ) {
15211 my $i_colon = $mate_index_to_go[$i];
15213 $i_colon <= 0 # the ':' is not in this batch
15214 || $i == 0 # this '?' is the first token of the line
15216 $max_index_to_go # or this '?' is the last token
15220 # don't break at a '?' if preceded by ':' on
15221 # this line of previous ?/: pair on this line.
15222 # This is an attempt to preserve a chain of ?/:
15223 # expressions (elsif2.t). And don't break if
15224 # this has a side comment.
15225 set_forced_breakpoint($i)
15227 $type_sequence == (
15228 $last_colon_sequence_number +
15229 TYPE_SEQUENCE_INCREMENT
15231 || $tokens_to_go[$max_index_to_go] eq '#'
15233 set_closing_breakpoint($i);
15234 } ## end if ( $i_colon <= 0 ||...)
15235 } ## end elsif ( $token eq '?' )
15236 } ## end if ($type_sequence)
15238 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
15240 #------------------------------------------------------------
15241 # Handle Increasing Depth..
15243 # prepare for a new list when depth increases
15244 # token $i is a '(','{', or '['
15245 #------------------------------------------------------------
15246 if ( $depth > $current_depth ) {
15248 $breakpoint_stack[$depth] = $forced_breakpoint_count;
15249 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
15250 $has_broken_sublist[$depth] = 0;
15251 $identifier_count_stack[$depth] = 0;
15252 $index_before_arrow[$depth] = -1;
15253 $interrupted_list[$depth] = 0;
15254 $item_count_stack[$depth] = 0;
15255 $last_comma_index[$depth] = undef;
15256 $last_dot_index[$depth] = undef;
15257 $last_nonblank_type[$depth] = $last_nonblank_type;
15258 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
15259 $opening_structure_index_stack[$depth] = $i;
15260 $rand_or_list[$depth] = [];
15261 $rfor_semicolon_list[$depth] = [];
15262 $i_equals[$depth] = -1;
15263 $want_comma_break[$depth] = 0;
15264 $container_type[$depth] =
15265 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
15266 ? $last_nonblank_token
15268 $has_old_logical_breakpoints[$depth] = 0;
15270 # if line ends here then signal closing token to break
15271 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
15273 set_closing_breakpoint($i);
15276 # Not all lists of values should be vertically aligned..
15277 $dont_align[$depth] =
15279 # code BLOCKS are handled at a higher level
15280 ( $block_type ne "" )
15282 # certain paren lists
15283 || ( $type eq '(' ) && (
15285 # it does not usually look good to align a list of
15286 # identifiers in a parameter list, as in:
15287 # my($var1, $var2, ...)
15288 # (This test should probably be refined, for now I'm just
15289 # testing for any keyword)
15290 ( $last_nonblank_type eq 'k' )
15292 # a trailing '(' usually indicates a non-list
15293 || ( $next_nonblank_type eq '(' )
15296 # patch to outdent opening brace of long if/for/..
15297 # statements (like this one). See similar coding in
15298 # set_continuation breaks. We have also catch it here for
15299 # short line fragments which otherwise will not go through
15300 # set_continuation_breaks.
15304 # if we have the ')' but not its '(' in this batch..
15305 && ( $last_nonblank_token eq ')' )
15306 && $mate_index_to_go[$i_last_nonblank_token] < 0
15308 # and user wants brace to left
15309 && !$rOpts->{'opening-brace-always-on-right'}
15311 && ( $type eq '{' ) # should be true
15312 && ( $token eq '{' ) # should be true
15315 set_forced_breakpoint( $i - 1 );
15316 } ## end if ( $block_type && ( ...))
15317 } ## end if ( $depth > $current_depth)
15319 #------------------------------------------------------------
15320 # Handle Decreasing Depth..
15322 # finish off any old list when depth decreases
15323 # token $i is a ')','}', or ']'
15324 #------------------------------------------------------------
15325 elsif ( $depth < $current_depth ) {
15327 check_for_new_minimum_depth($depth);
15329 # force all outer logical containers to break after we see on
15331 $has_old_logical_breakpoints[$depth] ||=
15332 $has_old_logical_breakpoints[$current_depth];
15334 # Patch to break between ') {' if the paren list is broken.
15335 # There is similar logic in set_continuation_breaks for
15336 # non-broken lists.
15338 && $next_nonblank_block_type
15339 && $interrupted_list[$current_depth]
15340 && $next_nonblank_type eq '{'
15341 && !$rOpts->{'opening-brace-always-on-right'} )
15343 set_forced_breakpoint($i);
15344 } ## end if ( $token eq ')' && ...
15346 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
15348 # set breaks at commas if necessary
15349 my ( $bp_count, $do_not_break_apart ) =
15350 set_comma_breakpoints($current_depth);
15352 my $i_opening = $opening_structure_index_stack[$current_depth];
15353 my $saw_opening_structure = ( $i_opening >= 0 );
15355 # this term is long if we had to break at interior commas..
15356 my $is_long_term = $bp_count > 0;
15358 # If this is a short container with one or more comma arrows,
15359 # then we will mark it as a long term to open it if requested.
15360 # $rOpts_comma_arrow_breakpoints =
15361 # 0 - open only if comma precedes closing brace
15362 # 1 - stable: except for one line blocks
15363 # 2 - try to form 1 line blocks
15365 # 4 - always open up if vt=0
15366 # 5 - stable: even for one line blocks if vt=0
15367 if ( !$is_long_term
15368 && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
15369 && $index_before_arrow[ $depth + 1 ] > 0
15370 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
15373 $is_long_term = $rOpts_comma_arrow_breakpoints == 4
15374 || ( $rOpts_comma_arrow_breakpoints == 0
15375 && $last_nonblank_token eq ',' )
15376 || ( $rOpts_comma_arrow_breakpoints == 5
15377 && $old_breakpoint_to_go[$i_opening] );
15378 } ## end if ( !$is_long_term &&...)
15380 # mark term as long if the length between opening and closing
15381 # parens exceeds allowed line length
15382 if ( !$is_long_term && $saw_opening_structure ) {
15383 my $i_opening_minus = find_token_starting_list($i_opening);
15385 # Note: we have to allow for one extra space after a
15386 # closing token so that we do not strand a comma or
15387 # semicolon, hence the '>=' here (oneline.t)
15389 excess_line_length( $i_opening_minus, $i ) >= 0;
15390 } ## end if ( !$is_long_term &&...)
15392 # We've set breaks after all comma-arrows. Now we have to
15393 # undo them if this can be a one-line block
15394 # (the only breakpoints set will be due to comma-arrows)
15397 # user doesn't require breaking after all comma-arrows
15398 ( $rOpts_comma_arrow_breakpoints != 0 )
15399 && ( $rOpts_comma_arrow_breakpoints != 4 )
15401 # and if the opening structure is in this batch
15402 && $saw_opening_structure
15404 # and either on the same old line
15406 $old_breakpoint_count_stack[$current_depth] ==
15407 $last_old_breakpoint_count
15409 # or user wants to form long blocks with arrows
15410 || $rOpts_comma_arrow_breakpoints == 2
15413 # and we made some breakpoints between the opening and closing
15414 && ( $breakpoint_undo_stack[$current_depth] <
15415 $forced_breakpoint_undo_count )
15417 # and this block is short enough to fit on one line
15418 # Note: use < because need 1 more space for possible comma
15423 undo_forced_breakpoint_stack(
15424 $breakpoint_undo_stack[$current_depth] );
15425 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
15427 # now see if we have any comma breakpoints left
15428 my $has_comma_breakpoints =
15429 ( $breakpoint_stack[$current_depth] !=
15430 $forced_breakpoint_count );
15432 # update broken-sublist flag of the outer container
15433 $has_broken_sublist[$depth] =
15434 $has_broken_sublist[$depth]
15435 || $has_broken_sublist[$current_depth]
15437 || $has_comma_breakpoints;
15439 # Having come to the closing ')', '}', or ']', now we have to decide if we
15440 # should 'open up' the structure by placing breaks at the opening and
15441 # closing containers. This is a tricky decision. Here are some of the
15442 # basic considerations:
15444 # -If this is a BLOCK container, then any breakpoints will have already
15445 # been set (and according to user preferences), so we need do nothing here.
15447 # -If we have a comma-separated list for which we can align the list items,
15448 # then we need to do so because otherwise the vertical aligner cannot
15449 # currently do the alignment.
15451 # -If this container does itself contain a container which has been broken
15452 # open, then it should be broken open to properly show the structure.
15454 # -If there is nothing to align, and no other reason to break apart,
15455 # then do not do it.
15457 # We will not break open the parens of a long but 'simple' logical expression.
15460 # This is an example of a simple logical expression and its formatting:
15462 # if ( $bigwasteofspace1 && $bigwasteofspace2
15463 # || $bigwasteofspace3 && $bigwasteofspace4 )
15465 # Most people would prefer this than the 'spacey' version:
15468 # $bigwasteofspace1 && $bigwasteofspace2
15469 # || $bigwasteofspace3 && $bigwasteofspace4
15472 # To illustrate the rules for breaking logical expressions, consider:
15476 # and ( exists $ids_excl_uc{$id_uc}
15477 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
15479 # This is on the verge of being difficult to read. The current default is to
15480 # open it up like this:
15485 # and ( exists $ids_excl_uc{$id_uc}
15486 # or grep $id_uc =~ /$_/, @ids_excl_uc )
15489 # This is a compromise which tries to avoid being too dense and to spacey.
15490 # A more spaced version would be:
15496 # exists $ids_excl_uc{$id_uc}
15497 # or grep $id_uc =~ /$_/, @ids_excl_uc
15501 # Some people might prefer the spacey version -- an option could be added. The
15502 # innermost expression contains a long block '( exists $ids_... ')'.
15504 # Here is how the logic goes: We will force a break at the 'or' that the
15505 # innermost expression contains, but we will not break apart its opening and
15506 # closing containers because (1) it contains no multi-line sub-containers itself,
15507 # and (2) there is no alignment to be gained by breaking it open like this
15510 # exists $ids_excl_uc{$id_uc}
15511 # or grep $id_uc =~ /$_/, @ids_excl_uc
15514 # (although this looks perfectly ok and might be good for long expressions). The
15515 # outer 'if' container, though, contains a broken sub-container, so it will be
15516 # broken open to avoid too much density. Also, since it contains no 'or's, there
15517 # will be a forced break at its 'and'.
15519 # set some flags telling something about this container..
15520 my $is_simple_logical_expression = 0;
15521 if ( $item_count_stack[$current_depth] == 0
15522 && $saw_opening_structure
15523 && $tokens_to_go[$i_opening] eq '('
15524 && $is_logical_container{ $container_type[$current_depth] }
15528 # This seems to be a simple logical expression with
15529 # no existing breakpoints. Set a flag to prevent
15531 if ( !$has_comma_breakpoints ) {
15532 $is_simple_logical_expression = 1;
15535 # This seems to be a simple logical expression with
15536 # breakpoints (broken sublists, for example). Break
15537 # at all 'or's and '||'s.
15539 set_logical_breakpoints($current_depth);
15541 } ## end if ( $item_count_stack...)
15544 && @{ $rfor_semicolon_list[$current_depth] } )
15546 set_for_semicolon_breakpoints($current_depth);
15548 # open up a long 'for' or 'foreach' container to allow
15549 # leading term alignment unless -lp is used.
15550 $has_comma_breakpoints = 1
15551 unless $rOpts_line_up_parentheses;
15552 } ## end if ( $is_long_term && ...)
15556 # breaks for code BLOCKS are handled at a higher level
15559 # we do not need to break at the top level of an 'if'
15561 && !$is_simple_logical_expression
15563 ## modification to keep ': (' containers vertically tight;
15564 ## but probably better to let user set -vt=1 to avoid
15565 ## inconsistency with other paren types
15566 ## && ($container_type[$current_depth] ne ':')
15568 # otherwise, we require one of these reasons for breaking:
15571 # - this term has forced line breaks
15572 $has_comma_breakpoints
15574 # - the opening container is separated from this batch
15575 # for some reason (comment, blank line, code block)
15576 # - this is a non-paren container spanning multiple lines
15577 || !$saw_opening_structure
15579 # - this is a long block contained in another breakable
15582 && $container_environment_to_go[$i_opening] ne
15588 # For -lp option, we must put a breakpoint before
15589 # the token which has been identified as starting
15590 # this indentation level. This is necessary for
15591 # proper alignment.
15592 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
15594 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
15595 if ( $i_opening + 1 < $max_index_to_go
15596 && $types_to_go[ $i_opening + 1 ] eq 'b' )
15598 $item = $leading_spaces_to_go[ $i_opening + 2 ];
15600 if ( defined($item) ) {
15601 my $i_start_2 = $item->get_STARTING_INDEX();
15603 defined($i_start_2)
15605 # we are breaking after an opening brace, paren,
15606 # so don't break before it too
15607 && $i_start_2 ne $i_opening
15611 # Only break for breakpoints at the same
15612 # indentation level as the opening paren
15613 my $test1 = $nesting_depth_to_go[$i_opening];
15614 my $test2 = $nesting_depth_to_go[$i_start_2];
15615 if ( $test2 == $test1 ) {
15616 set_forced_breakpoint( $i_start_2 - 1 );
15618 } ## end if ( defined($i_start_2...))
15619 } ## end if ( defined($item) )
15620 } ## end if ( $rOpts_line_up_parentheses...)
15622 # break after opening structure.
15623 # note: break before closing structure will be automatic
15624 if ( $minimum_depth <= $current_depth ) {
15626 set_forced_breakpoint($i_opening)
15627 unless ( $do_not_break_apart
15628 || is_unbreakable_container($current_depth) );
15630 # break at ',' of lower depth level before opening token
15631 if ( $last_comma_index[$depth] ) {
15632 set_forced_breakpoint( $last_comma_index[$depth] );
15635 # break at '.' of lower depth level before opening token
15636 if ( $last_dot_index[$depth] ) {
15637 set_forced_breakpoint( $last_dot_index[$depth] );
15640 # break before opening structure if preceded by another
15641 # closing structure and a comma. This is normally
15642 # done by the previous closing brace, but not
15643 # if it was a one-line block.
15644 if ( $i_opening > 2 ) {
15646 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
15650 if ( $types_to_go[$i_prev] eq ','
15651 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
15653 set_forced_breakpoint($i_prev);
15656 # also break before something like ':(' or '?('
15659 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
15661 my $token_prev = $tokens_to_go[$i_prev];
15662 if ( $want_break_before{$token_prev} ) {
15663 set_forced_breakpoint($i_prev);
15665 } ## end elsif ( $types_to_go[$i_prev...])
15666 } ## end if ( $i_opening > 2 )
15667 } ## end if ( $minimum_depth <=...)
15669 # break after comma following closing structure
15670 if ( $next_type eq ',' ) {
15671 set_forced_breakpoint( $i + 1 );
15674 # break before an '=' following closing structure
15676 $is_assignment{$next_nonblank_type}
15677 && ( $breakpoint_stack[$current_depth] !=
15678 $forced_breakpoint_count )
15681 set_forced_breakpoint($i);
15682 } ## end if ( $is_assignment{$next_nonblank_type...})
15684 # break at any comma before the opening structure Added
15685 # for -lp, but seems to be good in general. It isn't
15686 # obvious how far back to look; the '5' below seems to
15687 # work well and will catch the comma in something like
15688 # push @list, myfunc( $param, $param, ..
15690 my $icomma = $last_comma_index[$depth];
15691 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
15692 unless ( $forced_breakpoint_to_go[$icomma] ) {
15693 set_forced_breakpoint($icomma);
15696 } # end logic to open up a container
15698 # Break open a logical container open if it was already open
15699 elsif ($is_simple_logical_expression
15700 && $has_old_logical_breakpoints[$current_depth] )
15702 set_logical_breakpoints($current_depth);
15705 # Handle long container which does not get opened up
15706 elsif ($is_long_term) {
15708 # must set fake breakpoint to alert outer containers that
15710 set_fake_breakpoint();
15711 } ## end elsif ($is_long_term)
15713 } ## end elsif ( $depth < $current_depth)
15715 #------------------------------------------------------------
15716 # Handle this token
15717 #------------------------------------------------------------
15719 $current_depth = $depth;
15721 # handle comma-arrow
15722 if ( $type eq '=>' ) {
15723 next if ( $last_nonblank_type eq '=>' );
15724 next if $rOpts_break_at_old_comma_breakpoints;
15725 next if $rOpts_comma_arrow_breakpoints == 3;
15726 $want_comma_break[$depth] = 1;
15727 $index_before_arrow[$depth] = $i_last_nonblank_token;
15729 } ## end if ( $type eq '=>' )
15731 elsif ( $type eq '.' ) {
15732 $last_dot_index[$depth] = $i;
15735 # Turn off alignment if we are sure that this is not a list
15736 # environment. To be safe, we will do this if we see certain
15737 # non-list tokens, such as ';', and also the environment is
15738 # not a list. Note that '=' could be in any of the = operators
15739 # (lextest.t). We can't just use the reported environment
15740 # because it can be incorrect in some cases.
15741 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
15742 && $container_environment_to_go[$i] ne 'LIST' )
15744 $dont_align[$depth] = 1;
15745 $want_comma_break[$depth] = 0;
15746 $index_before_arrow[$depth] = -1;
15747 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
15749 # now just handle any commas
15750 next unless ( $type eq ',' );
15752 $last_dot_index[$depth] = undef;
15753 $last_comma_index[$depth] = $i;
15755 # break here if this comma follows a '=>'
15756 # but not if there is a side comment after the comma
15757 if ( $want_comma_break[$depth] ) {
15759 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
15760 if ($rOpts_comma_arrow_breakpoints) {
15761 $want_comma_break[$depth] = 0;
15762 ##$index_before_arrow[$depth] = -1;
15767 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
15769 # break before the previous token if it looks safe
15770 # Example of something that we will not try to break before:
15771 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
15772 # Also we don't want to break at a binary operator (like +):
15776 # $y - $R, -fill => 'black',
15778 my $ibreak = $index_before_arrow[$depth] - 1;
15780 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
15782 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
15783 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
15784 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
15786 # don't break pointer calls, such as the following:
15787 # File::Spec->curdir => 1,
15788 # (This is tokenized as adjacent 'w' tokens)
15789 ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
15791 # And don't break before a comma, as in the following:
15792 # ( LONGER_THAN,=> 1,
15793 # EIGHTY_CHARACTERS,=> 2,
15794 # CAUSES_FORMATTING,=> 3,
15797 # This example is for -tso but should be general rule
15798 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
15799 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
15801 set_forced_breakpoint($ibreak);
15803 } ## end if ( $types_to_go[$ibreak...])
15804 } ## end if ( $ibreak > 0 && $tokens_to_go...)
15806 $want_comma_break[$depth] = 0;
15807 $index_before_arrow[$depth] = -1;
15809 # handle list which mixes '=>'s and ','s:
15810 # treat any list items so far as an interrupted list
15811 $interrupted_list[$depth] = 1;
15813 } ## end if ( $want_comma_break...)
15815 # break after all commas above starting depth
15816 if ( $depth < $starting_depth && !$dont_align[$depth] ) {
15817 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
15821 # add this comma to the list..
15822 my $item_count = $item_count_stack[$depth];
15823 if ( $item_count == 0 ) {
15825 # but do not form a list with no opening structure
15828 # open INFILE_COPY, ">$input_file_copy"
15829 # or die ("very long message");
15831 if ( ( $opening_structure_index_stack[$depth] < 0 )
15832 && $container_environment_to_go[$i] eq 'BLOCK' )
15834 $dont_align[$depth] = 1;
15836 } ## end if ( $item_count == 0 )
15838 $comma_index[$depth][$item_count] = $i;
15839 ++$item_count_stack[$depth];
15840 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
15841 $identifier_count_stack[$depth]++;
15843 } ## end while ( ++$i <= $max_index_to_go)
15845 #-------------------------------------------
15846 # end of loop over all tokens in this batch
15847 #-------------------------------------------
15849 # set breaks for any unfinished lists ..
15850 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
15852 $interrupted_list[$dd] = 1;
15853 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
15854 set_comma_breakpoints($dd);
15855 set_logical_breakpoints($dd)
15856 if ( $has_old_logical_breakpoints[$dd] );
15857 set_for_semicolon_breakpoints($dd);
15859 # break open container...
15860 my $i_opening = $opening_structure_index_stack[$dd];
15861 set_forced_breakpoint($i_opening)
15863 is_unbreakable_container($dd)
15865 # Avoid a break which would place an isolated ' or "
15868 && $i_opening >= $max_index_to_go - 2
15869 && $token =~ /^['"]$/ )
15871 } ## end for ( my $dd = $current_depth...)
15873 # Return a flag indicating if the input file had some good breakpoints.
15874 # This flag will be used to force a break in a line shorter than the
15875 # allowed line length.
15876 if ( $has_old_logical_breakpoints[$current_depth] ) {
15877 $saw_good_breakpoint = 1;
15880 # A complex line with one break at an = has a good breakpoint.
15881 # This is not complex ($total_depth_variation=0):
15885 # This is complex ($total_depth_variation=6):
15887 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
15888 elsif ($i_old_assignment_break
15889 && $total_depth_variation > 4
15890 && $old_breakpoint_count == 1 )
15892 $saw_good_breakpoint = 1;
15893 } ## end elsif ( $i_old_assignment_break...)
15895 return $saw_good_breakpoint;
15896 } ## end sub scan_list
15899 sub find_token_starting_list {
15901 # When testing to see if a block will fit on one line, some
15902 # previous token(s) may also need to be on the line; particularly
15903 # if this is a sub call. So we will look back at least one
15904 # token. NOTE: This isn't perfect, but not critical, because
15905 # if we mis-identify a block, it will be wrapped and therefore
15906 # fixed the next time it is formatted.
15907 my $i_opening_paren = shift;
15908 my $i_opening_minus = $i_opening_paren;
15909 my $im1 = $i_opening_paren - 1;
15910 my $im2 = $i_opening_paren - 2;
15911 my $im3 = $i_opening_paren - 3;
15912 my $typem1 = $types_to_go[$im1];
15913 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
15914 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
15915 $i_opening_minus = $i_opening_paren;
15917 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
15918 $i_opening_minus = $im1 if $im1 >= 0;
15920 # walk back to improve length estimate
15921 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
15922 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
15923 $i_opening_minus = $j;
15925 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
15927 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
15928 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
15929 $i_opening_minus = $im2;
15931 return $i_opening_minus;
15934 { # begin set_comma_breakpoints_do
15936 my %is_keyword_with_special_leading_term;
15940 # These keywords have prototypes which allow a special leading item
15941 # followed by a list
15943 qw(formline grep kill map printf sprintf push chmod join pack unshift);
15944 @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
15947 sub set_comma_breakpoints_do {
15949 # Given a list with some commas, set breakpoints at some of the
15950 # commas, if necessary, to make it easy to read. This list is
15953 $depth, $i_opening_paren, $i_closing_paren,
15954 $item_count, $identifier_count, $rcomma_index,
15955 $next_nonblank_type, $list_type, $interrupted,
15956 $rdo_not_break_apart, $must_break_open,
15959 # nothing to do if no commas seen
15960 return if ( $item_count < 1 );
15961 my $i_first_comma = $$rcomma_index[0];
15962 my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
15963 my $i_last_comma = $i_true_last_comma;
15964 if ( $i_last_comma >= $max_index_to_go ) {
15965 $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
15966 return if ( $item_count < 1 );
15969 #---------------------------------------------------------------
15970 # find lengths of all items in the list to calculate page layout
15971 #---------------------------------------------------------------
15972 my $comma_count = $item_count;
15978 my @max_length = ( 0, 0 );
15979 my $first_term_length;
15980 my $i = $i_opening_paren;
15983 for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
15984 $is_odd = 1 - $is_odd;
15985 $i_prev_plus = $i + 1;
15986 $i = $$rcomma_index[$j];
15989 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
15991 ( $types_to_go[$i_prev_plus] eq 'b' )
15994 push @i_term_begin, $i_term_begin;
15995 push @i_term_end, $i_term_end;
15996 push @i_term_comma, $i;
15998 # note: currently adding 2 to all lengths (for comma and space)
16000 2 + token_sequence_length( $i_term_begin, $i_term_end );
16001 push @item_lengths, $length;
16004 $first_term_length = $length;
16008 if ( $length > $max_length[$is_odd] ) {
16009 $max_length[$is_odd] = $length;
16014 # now we have to make a distinction between the comma count and item
16015 # count, because the item count will be one greater than the comma
16016 # count if the last item is not terminated with a comma
16018 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
16019 ? $i_last_comma + 1
16022 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
16023 ? $i_closing_paren - 2
16024 : $i_closing_paren - 1;
16025 my $i_effective_last_comma = $i_last_comma;
16027 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
16029 if ( $last_item_length > 0 ) {
16031 # add 2 to length because other lengths include a comma and a blank
16032 $last_item_length += 2;
16033 push @item_lengths, $last_item_length;
16034 push @i_term_begin, $i_b + 1;
16035 push @i_term_end, $i_e;
16036 push @i_term_comma, undef;
16038 my $i_odd = $item_count % 2;
16040 if ( $last_item_length > $max_length[$i_odd] ) {
16041 $max_length[$i_odd] = $last_item_length;
16045 $i_effective_last_comma = $i_e + 1;
16047 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
16048 $identifier_count++;
16052 #---------------------------------------------------------------
16053 # End of length calculations
16054 #---------------------------------------------------------------
16056 #---------------------------------------------------------------
16057 # Compound List Rule 1:
16058 # Break at (almost) every comma for a list containing a broken
16059 # sublist. This has higher priority than the Interrupted List
16061 #---------------------------------------------------------------
16062 if ( $has_broken_sublist[$depth] ) {
16064 # Break at every comma except for a comma between two
16065 # simple, small terms. This prevents long vertical
16066 # columns of, say, just 0's.
16067 my $small_length = 10; # 2 + actual maximum length wanted
16069 # We'll insert a break in long runs of small terms to
16070 # allow alignment in uniform tables.
16071 my $skipped_count = 0;
16072 my $columns = table_columns_available($i_first_comma);
16073 my $fields = int( $columns / $small_length );
16074 if ( $rOpts_maximum_fields_per_table
16075 && $fields > $rOpts_maximum_fields_per_table )
16077 $fields = $rOpts_maximum_fields_per_table;
16079 my $max_skipped_count = $fields - 1;
16081 my $is_simple_last_term = 0;
16082 my $is_simple_next_term = 0;
16083 foreach my $j ( 0 .. $item_count ) {
16084 $is_simple_last_term = $is_simple_next_term;
16085 $is_simple_next_term = 0;
16086 if ( $j < $item_count
16087 && $i_term_end[$j] == $i_term_begin[$j]
16088 && $item_lengths[$j] <= $small_length )
16090 $is_simple_next_term = 1;
16093 if ( $is_simple_last_term
16094 && $is_simple_next_term
16095 && $skipped_count < $max_skipped_count )
16100 $skipped_count = 0;
16101 my $i = $i_term_comma[ $j - 1 ];
16102 last unless defined $i;
16103 set_forced_breakpoint($i);
16107 # always break at the last comma if this list is
16108 # interrupted; we wouldn't want to leave a terminal '{', for
16110 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
16114 #my ( $a, $b, $c ) = caller();
16115 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
16116 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
16117 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
16119 #---------------------------------------------------------------
16120 # Interrupted List Rule:
16121 # A list is forced to use old breakpoints if it was interrupted
16122 # by side comments or blank lines, or requested by user.
16123 #---------------------------------------------------------------
16124 if ( $rOpts_break_at_old_comma_breakpoints
16126 || $i_opening_paren < 0 )
16128 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
16132 #---------------------------------------------------------------
16133 # Looks like a list of items. We have to look at it and size it up.
16134 #---------------------------------------------------------------
16136 my $opening_token = $tokens_to_go[$i_opening_paren];
16137 my $opening_environment =
16138 $container_environment_to_go[$i_opening_paren];
16140 #-------------------------------------------------------------------
16141 # Return if this will fit on one line
16142 #-------------------------------------------------------------------
16144 my $i_opening_minus = find_token_starting_list($i_opening_paren);
16146 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
16148 #-------------------------------------------------------------------
16149 # Now we know that this block spans multiple lines; we have to set
16150 # at least one breakpoint -- real or fake -- as a signal to break
16151 # open any outer containers.
16152 #-------------------------------------------------------------------
16153 set_fake_breakpoint();
16155 # be sure we do not extend beyond the current list length
16156 if ( $i_effective_last_comma >= $max_index_to_go ) {
16157 $i_effective_last_comma = $max_index_to_go - 1;
16160 # Set a flag indicating if we need to break open to keep -lp
16161 # items aligned. This is necessary if any of the list terms
16162 # exceeds the available space after the '('.
16163 my $need_lp_break_open = $must_break_open;
16164 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
16165 my $columns_if_unbroken =
16166 maximum_line_length($i_opening_minus) -
16167 total_line_length( $i_opening_minus, $i_opening_paren );
16168 $need_lp_break_open =
16169 ( $max_length[0] > $columns_if_unbroken )
16170 || ( $max_length[1] > $columns_if_unbroken )
16171 || ( $first_term_length > $columns_if_unbroken );
16174 # Specify if the list must have an even number of fields or not.
16175 # It is generally safest to assume an even number, because the
16176 # list items might be a hash list. But if we can be sure that
16177 # it is not a hash, then we can allow an odd number for more
16179 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
16181 if ( $identifier_count >= $item_count - 1
16182 || $is_assignment{$next_nonblank_type}
16183 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
16189 # do we have a long first term which should be
16190 # left on a line by itself?
16191 my $use_separate_first_term = (
16192 $odd_or_even == 1 # only if we can use 1 field/line
16193 && $item_count > 3 # need several items
16194 && $first_term_length >
16195 2 * $max_length[0] - 2 # need long first term
16196 && $first_term_length >
16197 2 * $max_length[1] - 2 # need long first term
16200 # or do we know from the type of list that the first term should
16202 if ( !$use_separate_first_term ) {
16203 if ( $is_keyword_with_special_leading_term{$list_type} ) {
16204 $use_separate_first_term = 1;
16206 # should the container be broken open?
16207 if ( $item_count < 3 ) {
16208 if ( $i_first_comma - $i_opening_paren < 4 ) {
16209 $$rdo_not_break_apart = 1;
16212 elsif ($first_term_length < 20
16213 && $i_first_comma - $i_opening_paren < 4 )
16215 my $columns = table_columns_available($i_first_comma);
16216 if ( $first_term_length < $columns ) {
16217 $$rdo_not_break_apart = 1;
16224 if ($use_separate_first_term) {
16226 # ..set a break and update starting values
16227 $use_separate_first_term = 1;
16228 set_forced_breakpoint($i_first_comma);
16229 $i_opening_paren = $i_first_comma;
16230 $i_first_comma = $$rcomma_index[1];
16232 return if $comma_count == 1;
16233 shift @item_lengths;
16234 shift @i_term_begin;
16236 shift @i_term_comma;
16239 # if not, update the metrics to include the first term
16241 if ( $first_term_length > $max_length[0] ) {
16242 $max_length[0] = $first_term_length;
16246 # Field width parameters
16247 my $pair_width = ( $max_length[0] + $max_length[1] );
16249 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
16251 # Number of free columns across the page width for laying out tables
16252 my $columns = table_columns_available($i_first_comma);
16254 # Estimated maximum number of fields which fit this space
16255 # This will be our first guess
16256 my $number_of_fields_max =
16257 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
16259 my $number_of_fields = $number_of_fields_max;
16261 # Find the best-looking number of fields
16262 # and make this our second guess if possible
16263 my ( $number_of_fields_best, $ri_ragged_break_list,
16264 $new_identifier_count )
16265 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
16268 if ( $number_of_fields_best != 0
16269 && $number_of_fields_best < $number_of_fields_max )
16271 $number_of_fields = $number_of_fields_best;
16274 # ----------------------------------------------------------------------
16275 # If we are crowded and the -lp option is being used, try to
16276 # undo some indentation
16277 # ----------------------------------------------------------------------
16279 $rOpts_line_up_parentheses
16281 $number_of_fields == 0
16282 || ( $number_of_fields == 1
16283 && $number_of_fields != $number_of_fields_best )
16287 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
16288 if ( $available_spaces > 0 ) {
16290 my $spaces_wanted = $max_width - $columns; # for 1 field
16292 if ( $number_of_fields_best == 0 ) {
16293 $number_of_fields_best =
16294 get_maximum_fields_wanted( \@item_lengths );
16297 if ( $number_of_fields_best != 1 ) {
16298 my $spaces_wanted_2 =
16299 1 + $pair_width - $columns; # for 2 fields
16300 if ( $available_spaces > $spaces_wanted_2 ) {
16301 $spaces_wanted = $spaces_wanted_2;
16305 if ( $spaces_wanted > 0 ) {
16306 my $deleted_spaces =
16307 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
16310 if ( $deleted_spaces > 0 ) {
16311 $columns = table_columns_available($i_first_comma);
16312 $number_of_fields_max =
16313 maximum_number_of_fields( $columns, $odd_or_even,
16314 $max_width, $pair_width );
16315 $number_of_fields = $number_of_fields_max;
16317 if ( $number_of_fields_best == 1
16318 && $number_of_fields >= 1 )
16320 $number_of_fields = $number_of_fields_best;
16327 # try for one column if two won't work
16328 if ( $number_of_fields <= 0 ) {
16329 $number_of_fields = int( $columns / $max_width );
16332 # The user can place an upper bound on the number of fields,
16333 # which can be useful for doing maintenance on tables
16334 if ( $rOpts_maximum_fields_per_table
16335 && $number_of_fields > $rOpts_maximum_fields_per_table )
16337 $number_of_fields = $rOpts_maximum_fields_per_table;
16340 # How many columns (characters) and lines would this container take
16341 # if no additional whitespace were added?
16342 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
16343 $i_effective_last_comma + 1 );
16344 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
16345 my $packed_lines = 1 + int( $packed_columns / $columns );
16347 # are we an item contained in an outer list?
16348 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
16350 if ( $number_of_fields <= 0 ) {
16352 # #---------------------------------------------------------------
16353 # # We're in trouble. We can't find a single field width that works.
16354 # # There is no simple answer here; we may have a single long list
16356 # #---------------------------------------------------------------
16358 # In many cases, it may be best to not force a break if there is just one
16359 # comma, because the standard continuation break logic will do a better
16362 # In the common case that all but one of the terms can fit
16363 # on a single line, it may look better not to break open the
16364 # containing parens. Consider, for example
16368 # sort { $color_value{$::a} <=> $color_value{$::b}; }
16371 # which will look like this with the container broken:
16375 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
16378 # Here is an example of this rule for a long last term:
16380 # log_message( 0, 256, 128,
16381 # "Number of routes in adj-RIB-in to be considered: $peercount" );
16383 # And here is an example with a long first term:
16386 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
16387 # $r, $pu, $ps, $cu, $cs, $tt
16389 # if $style eq 'all';
16391 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
16392 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
16393 my $long_first_term =
16394 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
16396 # break at every comma ...
16399 # if requested by user or is best looking
16400 $number_of_fields_best == 1
16402 # or if this is a sublist of a larger list
16403 || $in_hierarchical_list
16405 # or if multiple commas and we don't have a long first or last
16407 || ( $comma_count > 1
16408 && !( $long_last_term || $long_first_term ) )
16411 foreach ( 0 .. $comma_count - 1 ) {
16412 set_forced_breakpoint( $$rcomma_index[$_] );
16415 elsif ($long_last_term) {
16417 set_forced_breakpoint($i_last_comma);
16418 $$rdo_not_break_apart = 1 unless $must_break_open;
16420 elsif ($long_first_term) {
16422 set_forced_breakpoint($i_first_comma);
16426 # let breaks be defined by default bond strength logic
16431 # --------------------------------------------------------
16432 # We have a tentative field count that seems to work.
16433 # How many lines will this require?
16434 # --------------------------------------------------------
16435 my $formatted_lines = $item_count / ($number_of_fields);
16436 if ( $formatted_lines != int $formatted_lines ) {
16437 $formatted_lines = 1 + int $formatted_lines;
16440 # So far we've been trying to fill out to the right margin. But
16441 # compact tables are easier to read, so let's see if we can use fewer
16442 # fields without increasing the number of lines.
16443 $number_of_fields =
16444 compactify_table( $item_count, $number_of_fields, $formatted_lines,
16447 # How many spaces across the page will we fill?
16448 my $columns_per_line =
16449 ( int $number_of_fields / 2 ) * $pair_width +
16450 ( $number_of_fields % 2 ) * $max_width;
16452 my $formatted_columns;
16454 if ( $number_of_fields > 1 ) {
16455 $formatted_columns =
16456 ( $pair_width * ( int( $item_count / 2 ) ) +
16457 ( $item_count % 2 ) * $max_width );
16460 $formatted_columns = $max_width * $item_count;
16462 if ( $formatted_columns < $packed_columns ) {
16463 $formatted_columns = $packed_columns;
16466 my $unused_columns = $formatted_columns - $packed_columns;
16468 # set some empirical parameters to help decide if we should try to
16469 # align; high sparsity does not look good, especially with few lines
16470 my $sparsity = ($unused_columns) / ($formatted_columns);
16471 my $max_allowed_sparsity =
16472 ( $item_count < 3 ) ? 0.1
16473 : ( $packed_lines == 1 ) ? 0.15
16474 : ( $packed_lines == 2 ) ? 0.4
16477 # Begin check for shortcut methods, which avoid treating a list
16478 # as a table for relatively small parenthesized lists. These
16479 # are usually easier to read if not formatted as tables.
16481 $packed_lines <= 2 # probably can fit in 2 lines
16482 && $item_count < 9 # doesn't have too many items
16483 && $opening_environment eq 'BLOCK' # not a sub-container
16484 && $opening_token eq '(' # is paren list
16488 # Shortcut method 1: for -lp and just one comma:
16489 # This is a no-brainer, just break at the comma.
16491 $rOpts_line_up_parentheses # -lp
16492 && $item_count == 2 # two items, one comma
16493 && !$must_break_open
16496 my $i_break = $$rcomma_index[0];
16497 set_forced_breakpoint($i_break);
16498 $$rdo_not_break_apart = 1;
16499 set_non_alignment_flags( $comma_count, $rcomma_index );
16504 # method 2 is for most small ragged lists which might look
16505 # best if not displayed as a table.
16507 ( $number_of_fields == 2 && $item_count == 3 )
16509 $new_identifier_count > 0 # isn't all quotes
16510 && $sparsity > 0.15
16511 ) # would be fairly spaced gaps if aligned
16515 my $break_count = set_ragged_breakpoints( \@i_term_comma,
16516 $ri_ragged_break_list );
16517 ++$break_count if ($use_separate_first_term);
16519 # NOTE: we should really use the true break count here,
16520 # which can be greater if there are large terms and
16521 # little space, but usually this will work well enough.
16522 unless ($must_break_open) {
16524 if ( $break_count <= 1 ) {
16525 $$rdo_not_break_apart = 1;
16527 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
16529 $$rdo_not_break_apart = 1;
16532 set_non_alignment_flags( $comma_count, $rcomma_index );
16536 } # end shortcut methods
16540 FORMATTER_DEBUG_FLAG_SPARSE && do {
16542 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
16546 #---------------------------------------------------------------
16547 # Compound List Rule 2:
16548 # If this list is too long for one line, and it is an item of a
16549 # larger list, then we must format it, regardless of sparsity
16550 # (ian.t). One reason that we have to do this is to trigger
16551 # Compound List Rule 1, above, which causes breaks at all commas of
16552 # all outer lists. In this way, the structure will be properly
16554 #---------------------------------------------------------------
16556 # Decide if this list is too long for one line unless broken
16557 my $total_columns = table_columns_available($i_opening_paren);
16558 my $too_long = $packed_columns > $total_columns;
16560 # For a paren list, include the length of the token just before the
16561 # '(' because this is likely a sub call, and we would have to
16562 # include the sub name on the same line as the list. This is still
16563 # imprecise, but not too bad. (steve.t)
16564 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
16566 $too_long = excess_line_length( $i_opening_minus,
16567 $i_effective_last_comma + 1 ) > 0;
16570 # FIXME: For an item after a '=>', try to include the length of the
16571 # thing before the '=>'. This is crude and should be improved by
16572 # actually looking back token by token.
16573 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
16574 my $i_opening_minus = $i_opening_paren - 4;
16575 if ( $i_opening_minus >= 0 ) {
16576 $too_long = excess_line_length( $i_opening_minus,
16577 $i_effective_last_comma + 1 ) > 0;
16581 # Always break lists contained in '[' and '{' if too long for 1 line,
16582 # and always break lists which are too long and part of a more complex
16584 my $must_break_open_container = $must_break_open
16586 && ( $in_hierarchical_list || $opening_token ne '(' ) );
16588 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
16590 #---------------------------------------------------------------
16591 # The main decision:
16592 # Now decide if we will align the data into aligned columns. Do not
16593 # attempt to align columns if this is a tiny table or it would be
16594 # too spaced. It seems that the more packed lines we have, the
16595 # sparser the list that can be allowed and still look ok.
16596 #---------------------------------------------------------------
16598 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
16599 || ( $formatted_lines < 2 )
16600 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
16604 #---------------------------------------------------------------
16605 # too sparse: would look ugly if aligned in a table;
16606 #---------------------------------------------------------------
16608 # use old breakpoints if this is a 'big' list
16609 # FIXME: goal is to improve set_ragged_breakpoints so that
16610 # this is not necessary.
16611 if ( $packed_lines > 2 && $item_count > 10 ) {
16612 write_logfile_entry("List sparse: using old breakpoints\n");
16613 copy_old_breakpoints( $i_first_comma, $i_last_comma );
16616 # let the continuation logic handle it if 2 lines
16619 my $break_count = set_ragged_breakpoints( \@i_term_comma,
16620 $ri_ragged_break_list );
16621 ++$break_count if ($use_separate_first_term);
16623 unless ($must_break_open_container) {
16624 if ( $break_count <= 1 ) {
16625 $$rdo_not_break_apart = 1;
16627 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
16629 $$rdo_not_break_apart = 1;
16632 set_non_alignment_flags( $comma_count, $rcomma_index );
16637 #---------------------------------------------------------------
16638 # go ahead and format as a table
16639 #---------------------------------------------------------------
16640 write_logfile_entry(
16641 "List: auto formatting with $number_of_fields fields/row\n");
16643 my $j_first_break =
16644 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
16647 my $j = $j_first_break ;
16648 $j < $comma_count ;
16649 $j += $number_of_fields
16652 my $i = $$rcomma_index[$j];
16653 set_forced_breakpoint($i);
16659 sub set_non_alignment_flags {
16661 # set flag which indicates that these commas should not be
16663 my ( $comma_count, $rcomma_index ) = @_;
16664 foreach ( 0 .. $comma_count - 1 ) {
16665 $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
16669 sub study_list_complexity {
16671 # Look for complex tables which should be formatted with one term per line.
16672 # Returns the following:
16674 # \@i_ragged_break_list = list of good breakpoints to avoid lines
16675 # which are hard to read
16676 # $number_of_fields_best = suggested number of fields based on
16677 # complexity; = 0 if any number may be used.
16679 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
16680 my $item_count = @{$ri_term_begin};
16681 my $complex_item_count = 0;
16682 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
16683 my $i_max = @{$ritem_lengths} - 1;
16684 ##my @item_complexity;
16686 my $i_last_last_break = -3;
16687 my $i_last_break = -2;
16688 my @i_ragged_break_list;
16690 my $definitely_complex = 30;
16691 my $definitely_simple = 12;
16692 my $quote_count = 0;
16694 for my $i ( 0 .. $i_max ) {
16695 my $ib = $ri_term_begin->[$i];
16696 my $ie = $ri_term_end->[$i];
16698 # define complexity: start with the actual term length
16699 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
16701 ##TBD: join types here and check for variations
16702 ##my $str=join "", @tokens_to_go[$ib..$ie];
16705 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
16709 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
16713 if ( $ib eq $ie ) {
16714 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
16715 $complex_item_count++;
16716 $weighted_length *= 2;
16722 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
16723 $complex_item_count++;
16724 $weighted_length *= 2;
16726 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
16727 $weighted_length += 4;
16731 # add weight for extra tokens.
16732 $weighted_length += 2 * ( $ie - $ib );
16734 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
16735 ## print "# COMPLEXITY:$weighted_length $BUB\n";
16737 ##push @item_complexity, $weighted_length;
16739 # now mark a ragged break after this item it if it is 'long and
16741 if ( $weighted_length >= $definitely_complex ) {
16743 # if we broke after the previous term
16744 # then break before it too
16745 if ( $i_last_break == $i - 1
16747 && $i_last_last_break != $i - 2 )
16750 ## FIXME: don't strand a small term
16751 pop @i_ragged_break_list;
16752 push @i_ragged_break_list, $i - 2;
16753 push @i_ragged_break_list, $i - 1;
16756 push @i_ragged_break_list, $i;
16757 $i_last_last_break = $i_last_break;
16758 $i_last_break = $i;
16761 # don't break before a small last term -- it will
16762 # not look good on a line by itself.
16763 elsif ($i == $i_max
16764 && $i_last_break == $i - 1
16765 && $weighted_length <= $definitely_simple )
16767 pop @i_ragged_break_list;
16771 my $identifier_count = $i_max + 1 - $quote_count;
16773 # Need more tuning here..
16774 if ( $max_width > 12
16775 && $complex_item_count > $item_count / 2
16776 && $number_of_fields_best != 2 )
16778 $number_of_fields_best = 1;
16781 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
16784 sub get_maximum_fields_wanted {
16786 # Not all tables look good with more than one field of items.
16787 # This routine looks at a table and decides if it should be
16788 # formatted with just one field or not.
16789 # This coding is still under development.
16790 my ($ritem_lengths) = @_;
16792 my $number_of_fields_best = 0;
16794 # For just a few items, we tentatively assume just 1 field.
16795 my $item_count = @{$ritem_lengths};
16796 if ( $item_count <= 5 ) {
16797 $number_of_fields_best = 1;
16800 # For larger tables, look at it both ways and see what looks best
16804 my @max_length = ( 0, 0 );
16805 my @last_length_2 = ( undef, undef );
16806 my @first_length_2 = ( undef, undef );
16807 my $last_length = undef;
16808 my $total_variation_1 = 0;
16809 my $total_variation_2 = 0;
16810 my @total_variation_2 = ( 0, 0 );
16811 for ( my $j = 0 ; $j < $item_count ; $j++ ) {
16813 $is_odd = 1 - $is_odd;
16814 my $length = $ritem_lengths->[$j];
16815 if ( $length > $max_length[$is_odd] ) {
16816 $max_length[$is_odd] = $length;
16819 if ( defined($last_length) ) {
16820 my $dl = abs( $length - $last_length );
16821 $total_variation_1 += $dl;
16823 $last_length = $length;
16825 my $ll = $last_length_2[$is_odd];
16826 if ( defined($ll) ) {
16827 my $dl = abs( $length - $ll );
16828 $total_variation_2[$is_odd] += $dl;
16831 $first_length_2[$is_odd] = $length;
16833 $last_length_2[$is_odd] = $length;
16835 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
16837 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
16838 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
16839 $number_of_fields_best = 1;
16842 return ($number_of_fields_best);
16845 sub table_columns_available {
16846 my $i_first_comma = shift;
16848 maximum_line_length($i_first_comma) -
16849 leading_spaces_to_go($i_first_comma);
16851 # Patch: the vertical formatter does not line up lines whose lengths
16852 # exactly equal the available line length because of allowances
16853 # that must be made for side comments. Therefore, the number of
16854 # available columns is reduced by 1 character.
16859 sub maximum_number_of_fields {
16861 # how many fields will fit in the available space?
16862 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
16863 my $max_pairs = int( $columns / $pair_width );
16864 my $number_of_fields = $max_pairs * 2;
16865 if ( $odd_or_even == 1
16866 && $max_pairs * $pair_width + $max_width <= $columns )
16868 $number_of_fields++;
16870 return $number_of_fields;
16873 sub compactify_table {
16875 # given a table with a certain number of fields and a certain number
16876 # of lines, see if reducing the number of fields will make it look
16878 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
16879 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
16883 $min_fields = $number_of_fields ;
16884 $min_fields >= $odd_or_even
16885 && $min_fields * $formatted_lines >= $item_count ;
16886 $min_fields -= $odd_or_even
16889 $number_of_fields = $min_fields;
16892 return $number_of_fields;
16895 sub set_ragged_breakpoints {
16897 # Set breakpoints in a list that cannot be formatted nicely as a
16899 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
16901 my $break_count = 0;
16902 foreach (@$ri_ragged_break_list) {
16903 my $j = $ri_term_comma->[$_];
16905 set_forced_breakpoint($j);
16909 return $break_count;
16912 sub copy_old_breakpoints {
16913 my ( $i_first_comma, $i_last_comma ) = @_;
16914 for my $i ( $i_first_comma .. $i_last_comma ) {
16915 if ( $old_breakpoint_to_go[$i] ) {
16916 set_forced_breakpoint($i);
16922 my ( $i, $j ) = @_;
16923 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
16925 FORMATTER_DEBUG_FLAG_NOBREAK && do {
16926 my ( $a, $b, $c ) = caller();
16928 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
16931 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
16934 # shouldn't happen; non-critical error
16936 FORMATTER_DEBUG_FLAG_NOBREAK && do {
16937 my ( $a, $b, $c ) = caller();
16939 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
16944 sub set_fake_breakpoint {
16946 # Just bump up the breakpoint count as a signal that there are breaks.
16947 # This is useful if we have breaks but may want to postpone deciding where
16949 $forced_breakpoint_count++;
16952 sub set_forced_breakpoint {
16955 return unless defined $i && $i >= 0;
16957 # when called with certain tokens, use bond strengths to decide
16958 # if we break before or after it
16959 my $token = $tokens_to_go[$i];
16961 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
16962 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
16965 # breaks are forced before 'if' and 'unless'
16966 elsif ( $is_if_unless{$token} ) { $i-- }
16968 if ( $i >= 0 && $i <= $max_index_to_go ) {
16969 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
16971 FORMATTER_DEBUG_FLAG_FORCE && do {
16972 my ( $a, $b, $c ) = caller();
16974 "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
16977 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
16978 $forced_breakpoint_to_go[$i_nonblank] = 1;
16980 if ( $i_nonblank > $index_max_forced_break ) {
16981 $index_max_forced_break = $i_nonblank;
16983 $forced_breakpoint_count++;
16984 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
16987 # if we break at an opening container..break at the closing
16988 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
16989 set_closing_breakpoint($i_nonblank);
16995 sub clear_breakpoint_undo_stack {
16996 $forced_breakpoint_undo_count = 0;
16999 sub undo_forced_breakpoint_stack {
17001 my $i_start = shift;
17002 if ( $i_start < 0 ) {
17004 my ( $a, $b, $c ) = caller();
17006 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
17010 while ( $forced_breakpoint_undo_count > $i_start ) {
17012 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
17013 if ( $i >= 0 && $i <= $max_index_to_go ) {
17014 $forced_breakpoint_to_go[$i] = 0;
17015 $forced_breakpoint_count--;
17017 FORMATTER_DEBUG_FLAG_UNDOBP && do {
17018 my ( $a, $b, $c ) = caller();
17020 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
17024 # shouldn't happen, but not a critical error
17026 FORMATTER_DEBUG_FLAG_UNDOBP && do {
17027 my ( $a, $b, $c ) = caller();
17029 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
17035 { # begin recombine_breakpoints
17046 @is_amp_amp{@_} = (1) x scalar(@_);
17049 @is_ternary{@_} = (1) x scalar(@_);
17051 @_ = qw( + - * / );
17052 @is_math_op{@_} = (1) x scalar(@_);
17055 @is_plus_minus{@_} = (1) x scalar(@_);
17058 @is_mult_div{@_} = (1) x scalar(@_);
17061 sub recombine_breakpoints {
17063 # sub set_continuation_breaks is very liberal in setting line breaks
17064 # for long lines, always setting breaks at good breakpoints, even
17065 # when that creates small lines. Sometimes small line fragments
17066 # are produced which would look better if they were combined.
17067 # That's the task of this routine.
17069 # We are given indexes to the current lines:
17070 # $ri_beg = ref to array of BEGinning indexes of each line
17071 # $ri_end = ref to array of ENDing indexes of each line
17072 my ( $ri_beg, $ri_end ) = @_;
17074 # Make a list of all good joining tokens between the lines
17077 my $nmax = @$ri_end - 1;
17078 for my $n ( 1 .. $nmax ) {
17079 my $ibeg_1 = $$ri_beg[ $n - 1 ];
17080 my $iend_1 = $$ri_end[ $n - 1 ];
17081 my $iend_2 = $$ri_end[$n];
17082 my $ibeg_2 = $$ri_beg[$n];
17084 my ( $itok, $itokp, $itokm );
17086 foreach my $itest ( $iend_1, $ibeg_2 ) {
17087 my $type = $types_to_go[$itest];
17088 if ( $is_math_op{$type}
17089 || $is_amp_amp{$type}
17090 || $is_assignment{$type}
17096 $joint[$n] = [$itok];
17099 my $more_to_do = 1;
17101 # We keep looping over all of the lines of this batch
17102 # until there are no more possible recombinations
17103 my $nmax_last = @$ri_end;
17104 while ($more_to_do) {
17108 my $nmax = @$ri_end - 1;
17110 # Safety check for infinite loop
17111 unless ( $nmax < $nmax_last ) {
17113 # Shouldn't happen because splice below decreases nmax on each
17116 "Program bug-infinite loop in recombine breakpoints\n";
17118 $nmax_last = $nmax;
17120 my $previous_outdentable_closing_paren;
17121 my $leading_amp_count = 0;
17122 my $this_line_is_semicolon_terminated;
17124 # loop over all remaining lines in this batch
17125 for $n ( 1 .. $nmax ) {
17127 #----------------------------------------------------------
17128 # If we join the current pair of lines,
17129 # line $n-1 will become the left part of the joined line
17130 # line $n will become the right part of the joined line
17132 # Here are Indexes of the endpoint tokens of the two lines:
17134 # -----line $n-1--- | -----line $n-----
17135 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
17138 # We want to decide if we should remove the line break
17139 # between the tokens at $iend_1 and $ibeg_2
17141 # We will apply a number of ad-hoc tests to see if joining
17142 # here will look ok. The code will just issue a 'next'
17143 # command if the join doesn't look good. If we get through
17144 # the gauntlet of tests, the lines will be recombined.
17145 #----------------------------------------------------------
17147 # beginning and ending tokens of the lines we are working on
17148 my $ibeg_1 = $$ri_beg[ $n - 1 ];
17149 my $iend_1 = $$ri_end[ $n - 1 ];
17150 my $iend_2 = $$ri_end[$n];
17151 my $ibeg_2 = $$ri_beg[$n];
17152 my $ibeg_nmax = $$ri_beg[$nmax];
17154 my $type_iend_1 = $types_to_go[$iend_1];
17155 my $type_iend_2 = $types_to_go[$iend_2];
17156 my $type_ibeg_1 = $types_to_go[$ibeg_1];
17157 my $type_ibeg_2 = $types_to_go[$ibeg_2];
17159 # some beginning indexes of other lines, which may not exist
17160 my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1;
17161 my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1;
17162 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
17166 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
17167 # $nesting_depth_to_go[$ibeg_1] );
17169 FORMATTER_DEBUG_FLAG_RECOMBINE && do {
17171 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
17174 # If line $n is the last line, we set some flags and
17175 # do any special checks for it
17176 if ( $n == $nmax ) {
17178 # a terminal '{' should stay where it is
17179 next if $type_ibeg_2 eq '{';
17181 # set flag if statement $n ends in ';'
17182 $this_line_is_semicolon_terminated = $type_iend_2 eq ';'
17184 # with possible side comment
17185 || ( $type_iend_2 eq '#'
17186 && $iend_2 - $ibeg_2 >= 2
17187 && $types_to_go[ $iend_2 - 2 ] eq ';'
17188 && $types_to_go[ $iend_2 - 1 ] eq 'b' );
17191 #----------------------------------------------------------
17192 # Recombine Section 1:
17193 # Examine the special token joining this line pair, if any.
17194 # Put as many tests in this section to avoid duplicate code and
17195 # to make formatting independent of whether breaks are to the
17196 # left or right of an operator.
17197 #----------------------------------------------------------
17199 my ($itok) = @{ $joint[$n] };
17202 # FIXME: Patch - may not be necessary
17204 $type_iend_1 eq 'b'
17209 $type_iend_2 eq 'b'
17214 my $type = $types_to_go[$itok];
17216 if ( $type eq ':' ) {
17218 # do not join at a colon unless it disobeys the break request
17219 if ( $itok eq $iend_1 ) {
17220 next unless $want_break_before{$type};
17223 $leading_amp_count++;
17224 next if $want_break_before{$type};
17228 # handle math operators + - * /
17229 elsif ( $is_math_op{$type} ) {
17231 # Combine these lines if this line is a single
17232 # number, or if it is a short term with same
17233 # operator as the previous line. For example, in
17234 # the following code we will combine all of the
17235 # short terms $A, $B, $C, $D, $E, $F, together
17236 # instead of leaving them one per line:
17238 # $A * $B * $C * $D * $E * $F *
17239 # ( 2. * $eps * $sigma * $area ) *
17240 # ( 1. / $tcold**3 - 1. / $thot**3 );
17242 # This can be important in math-intensive code.
17246 my $itokp = min( $inext_to_go[$itok], $iend_2 );
17247 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
17248 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
17249 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
17251 # check for a number on the right
17252 if ( $types_to_go[$itokp] eq 'n' ) {
17254 # ok if nothing else on right
17255 if ( $itokp == $iend_2 ) {
17260 # look one more token to right..
17261 # okay if math operator or some termination
17263 ( ( $itokpp == $iend_2 )
17264 && $is_math_op{ $types_to_go[$itokpp] } )
17265 || $types_to_go[$itokpp] =~ /^[#,;]$/;
17269 # check for a number on the left
17270 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
17272 # okay if nothing else to left
17273 if ( $itokm == $ibeg_1 ) {
17277 # otherwise look one more token to left
17280 # okay if math operator, comma, or assignment
17281 $good_combo = ( $itokmm == $ibeg_1 )
17282 && ( $is_math_op{ $types_to_go[$itokmm] }
17283 || $types_to_go[$itokmm] =~ /^[,]$/
17284 || $is_assignment{ $types_to_go[$itokmm] }
17289 # look for a single short token either side of the
17291 if ( !$good_combo ) {
17293 # Slight adjustment factor to make results
17294 # independent of break before or after operator in
17295 # long summed lists. (An operator and a space make
17297 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
17301 # numbers or id's on both sides of this joint
17302 $types_to_go[$itokp] =~ /^[in]$/
17303 && $types_to_go[$itokm] =~ /^[in]$/
17305 # one of the two lines must be short:
17308 # no more than 2 nonblank tokens right of
17313 && token_sequence_length( $itokp, $iend_2 )
17315 $rOpts_short_concatenation_item_length
17318 # no more than 2 nonblank tokens left of
17323 && token_sequence_length( $ibeg_1, $itokm )
17325 $rOpts_short_concatenation_item_length
17330 # keep pure terms; don't mix +- with */
17332 $is_plus_minus{$type}
17333 && ( $is_mult_div{ $types_to_go[$itokmm] }
17334 || $is_mult_div{ $types_to_go[$itokpp] } )
17337 $is_mult_div{$type}
17338 && ( $is_plus_minus{ $types_to_go[$itokmm] }
17339 || $is_plus_minus{ $types_to_go[$itokpp] } )
17345 # it is also good to combine if we can reduce to 2 lines
17346 if ( !$good_combo ) {
17348 # index on other line where same token would be in a
17351 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
17356 && $types_to_go[$iother] ne $type;
17359 next unless ($good_combo);
17363 elsif ( $is_amp_amp{$type} ) {
17367 elsif ( $is_assignment{$type} ) {
17369 } ## end assignment
17372 #----------------------------------------------------------
17373 # Recombine Section 2:
17374 # Examine token at $iend_1 (right end of first line of pair)
17375 #----------------------------------------------------------
17377 # an isolated '}' may join with a ';' terminated segment
17378 if ( $type_iend_1 eq '}' ) {
17380 # Check for cases where combining a semicolon terminated
17381 # statement with a previous isolated closing paren will
17382 # allow the combined line to be outdented. This is
17383 # generally a good move. For example, we can join up
17384 # the last two lines here:
17386 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
17387 # $size, $atime, $mtime, $ctime, $blksize, $blocks
17393 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
17394 # $size, $atime, $mtime, $ctime, $blksize, $blocks
17397 # which makes the parens line up.
17399 # Another example, from Joe Matarazzo, probably looks best
17400 # with the 'or' clause appended to the trailing paren:
17401 # $self->some_method(
17404 # ) or die "Some_method didn't work";
17406 # But we do not want to do this for something like the -lp
17407 # option where the paren is not outdentable because the
17408 # trailing clause will be far to the right.
17410 # The logic here is synchronized with the logic in sub
17411 # sub set_adjusted_indentation, which actually does
17414 $previous_outdentable_closing_paren =
17415 $this_line_is_semicolon_terminated
17417 # only one token on last line
17418 && $ibeg_1 == $iend_1
17420 # must be structural paren
17421 && $tokens_to_go[$iend_1] eq ')'
17423 # style must allow outdenting,
17424 && !$closing_token_indentation{')'}
17426 # only leading '&&', '||', and ':' if no others seen
17427 # (but note: our count made below could be wrong
17428 # due to intervening comments)
17429 && ( $leading_amp_count == 0
17430 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
17432 # but leading colons probably line up with a
17433 # previous colon or question (count could be wrong).
17434 && $type_ibeg_2 ne ':'
17436 # only one step in depth allowed. this line must not
17437 # begin with a ')' itself.
17438 && ( $nesting_depth_to_go[$iend_1] ==
17439 $nesting_depth_to_go[$iend_2] + 1 );
17441 # YVES patch 2 of 2:
17442 # Allow cuddled eval chains, like this:
17449 # This patch works together with a patch in
17450 # setting adjusted indentation (where the closing eval
17451 # brace is outdented if possible).
17452 # The problem is that an 'eval' block has continuation
17453 # indentation and it looks better to undo it in some
17454 # cases. If we do not use this patch we would get:
17462 # The alternative, for uncuddled style, is to create
17463 # a patch in set_adjusted_indentation which undoes
17464 # the indentation of a leading line like 'or do {'.
17465 # This doesn't work well with -icb through
17467 $block_type_to_go[$iend_1] eq 'eval'
17468 && !$rOpts->{'line-up-parentheses'}
17469 && !$rOpts->{'indent-closing-brace'}
17470 && $tokens_to_go[$iend_2] eq '{'
17472 ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
17473 || ( $type_ibeg_2 eq 'k'
17474 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
17475 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
17479 $previous_outdentable_closing_paren ||= 1;
17484 $previous_outdentable_closing_paren
17486 # handle '.' and '?' specially below
17487 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
17492 # honor breaks at opening brace
17493 # Added to prevent recombining something like this:
17494 # } || eval { package main;
17495 elsif ( $type_iend_1 eq '{' ) {
17496 next if $forced_breakpoint_to_go[$iend_1];
17499 # do not recombine lines with ending &&, ||,
17500 elsif ( $is_amp_amp{$type_iend_1} ) {
17501 next unless $want_break_before{$type_iend_1};
17504 # Identify and recombine a broken ?/: chain
17505 elsif ( $type_iend_1 eq '?' ) {
17507 # Do not recombine different levels
17509 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
17511 # do not recombine unless next line ends in :
17512 next unless $type_iend_2 eq ':';
17515 # for lines ending in a comma...
17516 elsif ( $type_iend_1 eq ',' ) {
17518 # Do not recombine at comma which is following the
17520 # TODO: might be best to make a special flag
17521 next if ( $old_breakpoint_to_go[$iend_1] );
17523 # an isolated '},' may join with an identifier + ';'
17524 # this is useful for the class of a 'bless' statement (bless.t)
17525 if ( $type_ibeg_1 eq '}'
17526 && $type_ibeg_2 eq 'i' )
17529 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
17530 && ( $iend_2 == ( $ibeg_2 + 1 ) )
17531 && $this_line_is_semicolon_terminated );
17533 # override breakpoint
17534 $forced_breakpoint_to_go[$iend_1] = 0;
17540 # do not recombine after a comma unless this will leave
17542 next unless ( $n + 1 >= $nmax );
17544 # do not recombine if there is a change in indentation depth
17547 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
17549 # do not recombine a "complex expression" after a
17550 # comma. "complex" means no parens.
17552 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
17553 if ( $tokens_to_go[$ii] eq '(' ) {
17558 next if $saw_paren;
17563 elsif ( $type_iend_1 eq '(' ) {
17565 # No longer doing this
17568 elsif ( $type_iend_1 eq ')' ) {
17570 # No longer doing this
17573 # keep a terminal for-semicolon
17574 elsif ( $type_iend_1 eq 'f' ) {
17578 # if '=' at end of line ...
17579 elsif ( $is_assignment{$type_iend_1} ) {
17581 # keep break after = if it was in input stream
17582 # this helps prevent 'blinkers'
17583 next if $old_breakpoint_to_go[$iend_1]
17585 # don't strand an isolated '='
17586 && $iend_1 != $ibeg_1;
17588 my $is_short_quote =
17589 ( $type_ibeg_2 eq 'Q'
17590 && $ibeg_2 == $iend_2
17591 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
17592 $rOpts_short_concatenation_item_length );
17594 ( $type_ibeg_1 eq '?'
17595 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
17597 # always join an isolated '=', a short quote, or if this
17598 # will put ?/: at start of adjacent lines
17599 if ( $ibeg_1 != $iend_1
17600 && !$is_short_quote
17607 # unless we can reduce this to two lines
17610 # or three lines, the last with a leading semicolon
17611 || ( $nmax == $n + 2
17612 && $types_to_go[$ibeg_nmax] eq ';' )
17614 # or the next line ends with a here doc
17615 || $type_iend_2 eq 'h'
17617 # or the next line ends in an open paren or brace
17618 # and the break hasn't been forced [dima.t]
17619 || ( !$forced_breakpoint_to_go[$iend_1]
17620 && $type_iend_2 eq '{' )
17623 # do not recombine if the two lines might align well
17624 # this is a very approximate test for this
17626 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
17631 # Recombine if we can make two lines
17634 # -lp users often prefer this:
17635 # my $title = function($env, $env, $sysarea,
17636 # "bubba Borrower Entry");
17637 # so we will recombine if -lp is used we have
17639 && ( !$rOpts_line_up_parentheses
17640 || $type_iend_2 ne ',' )
17644 # otherwise, scan the rhs line up to last token for
17645 # complexity. Note that we are not counting the last
17646 # token in case it is an opening paren.
17648 my $depth = $nesting_depth_to_go[$ibeg_2];
17649 for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
17650 if ( $nesting_depth_to_go[$i] != $depth ) {
17652 last if ( $tv > 1 );
17654 $depth = $nesting_depth_to_go[$i];
17657 # ok to recombine if no level changes before last token
17660 # otherwise, do not recombine if more than two
17662 next if ( $tv > 1 );
17664 # check total complexity of the two adjacent lines
17665 # that will occur if we do this join
17667 ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
17668 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
17669 if ( $nesting_depth_to_go[$i] != $depth ) {
17671 last if ( $tv > 2 );
17673 $depth = $nesting_depth_to_go[$i];
17676 # do not recombine if total is more than 2 level changes
17677 next if ( $tv > 2 );
17682 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
17683 $forced_breakpoint_to_go[$iend_1] = 0;
17688 elsif ( $type_iend_1 eq 'k' ) {
17690 # make major control keywords stand out
17695 #/^(last|next|redo|return)$/
17696 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
17698 # but only if followed by multiple lines
17702 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
17704 unless $want_break_before{ $tokens_to_go[$iend_1] };
17708 #----------------------------------------------------------
17709 # Recombine Section 3:
17710 # Examine token at $ibeg_2 (left end of second line of pair)
17711 #----------------------------------------------------------
17713 # join lines identified above as capable of
17714 # causing an outdented line with leading closing paren
17715 # Note that we are skipping the rest of this section
17716 if ($previous_outdentable_closing_paren) {
17717 $forced_breakpoint_to_go[$iend_1] = 0;
17720 # handle lines with leading &&, ||
17721 elsif ( $is_amp_amp{$type_ibeg_2} ) {
17723 $leading_amp_count++;
17725 # ok to recombine if it follows a ? or :
17726 # and is followed by an open paren..
17728 ( $is_ternary{$type_ibeg_1}
17729 && $tokens_to_go[$iend_2] eq '(' )
17731 # or is followed by a ? or : at same depth
17733 # We are looking for something like this. We can
17734 # recombine the && line with the line above to make the
17735 # structure more clear:
17737 # exists $G->{Attr}->{V}
17738 # && exists $G->{Attr}->{V}->{$u}
17739 # ? %{ $G->{Attr}->{V}->{$u} }
17742 # We should probably leave something like this alone:
17744 # exists $G->{Attr}->{E}
17745 # && exists $G->{Attr}->{E}->{$u}
17746 # && exists $G->{Attr}->{E}->{$u}->{$v}
17747 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
17749 # so that we either have all of the &&'s (or ||'s)
17750 # on one line, as in the first example, or break at
17751 # each one as in the second example. However, it
17752 # sometimes makes things worse to check for this because
17753 # it prevents multiple recombinations. So this is not done.
17755 && $is_ternary{ $types_to_go[$ibeg_3] }
17756 && $nesting_depth_to_go[$ibeg_3] ==
17757 $nesting_depth_to_go[$ibeg_2] );
17759 next if !$ok && $want_break_before{$type_ibeg_2};
17760 $forced_breakpoint_to_go[$iend_1] = 0;
17762 # tweak the bond strength to give this joint priority
17767 # Identify and recombine a broken ?/: chain
17768 elsif ( $type_ibeg_2 eq '?' ) {
17770 # Do not recombine different levels
17771 my $lev = $levels_to_go[$ibeg_2];
17772 next if ( $lev ne $levels_to_go[$ibeg_1] );
17774 # Do not recombine a '?' if either next line or
17775 # previous line does not start with a ':'. The reasons
17776 # are that (1) no alignment of the ? will be possible
17777 # and (2) the expression is somewhat complex, so the
17778 # '?' is harder to see in the interior of the line.
17779 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
17780 my $precedes_colon =
17781 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
17782 next unless ( $follows_colon || $precedes_colon );
17784 # we will always combining a ? line following a : line
17785 if ( !$follows_colon ) {
17787 # ...otherwise recombine only if it looks like a chain.
17788 # we will just look at a few nearby lines to see if
17789 # this looks like a chain.
17790 my $local_count = 0;
17791 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
17794 && $types_to_go[$ii] eq ':'
17795 && $levels_to_go[$ii] == $lev;
17797 next unless ( $local_count > 1 );
17799 $forced_breakpoint_to_go[$iend_1] = 0;
17802 # do not recombine lines with leading '.'
17803 elsif ( $type_ibeg_2 eq '.' ) {
17804 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
17808 # ... unless there is just one and we can reduce
17809 # this to two lines if we do. For example, this
17813 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
17815 # looks better than this:
17816 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
17817 # . '$args .= $pat;'
17822 && $type_ibeg_1 ne $type_ibeg_2
17825 # ... or this would strand a short quote , like this
17826 # . "some long quote"
17829 || ( $types_to_go[$i_next_nonblank] eq 'Q'
17830 && $i_next_nonblank >= $iend_2 - 1
17831 && $token_lengths_to_go[$i_next_nonblank] <
17832 $rOpts_short_concatenation_item_length )
17836 # handle leading keyword..
17837 elsif ( $type_ibeg_2 eq 'k' ) {
17839 # handle leading "or"
17840 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
17843 $this_line_is_semicolon_terminated
17846 # following 'if' or 'unless' or 'or'
17847 $type_ibeg_1 eq 'k'
17848 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
17850 # important: only combine a very simple or
17851 # statement because the step below may have
17852 # combined a trailing 'and' with this or,
17853 # and we do not want to then combine
17854 # everything together
17855 && ( $iend_2 - $ibeg_2 <= 7 )
17859 $forced_breakpoint_to_go[$iend_1] = 0
17860 unless $old_breakpoint_to_go[$iend_1];
17863 # handle leading 'and'
17864 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
17866 # Decide if we will combine a single terminal 'and'
17867 # after an 'if' or 'unless'.
17869 # This looks best with the 'and' on the same
17870 # line as the 'if':
17873 # if $seconds and $nu < 2;
17875 # But this looks better as shown:
17878 # if !$this->{Parents}{$_}
17879 # or $this->{Parents}{$_} eq $_;
17883 $this_line_is_semicolon_terminated
17886 # following 'if' or 'unless' or 'or'
17887 $type_ibeg_1 eq 'k'
17888 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
17889 || $tokens_to_go[$ibeg_1] eq 'or' )
17894 # handle leading "if" and "unless"
17895 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
17897 # FIXME: This is still experimental..may not be too useful
17900 $this_line_is_semicolon_terminated
17902 # previous line begins with 'and' or 'or'
17903 && $type_ibeg_1 eq 'k'
17904 && $is_and_or{ $tokens_to_go[$ibeg_1] }
17909 # handle all other leading keywords
17912 # keywords look best at start of lines,
17913 # but combine things like "1 while"
17914 unless ( $is_assignment{$type_iend_1} ) {
17916 if ( ( $type_iend_1 ne 'k' )
17917 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
17922 # similar treatment of && and || as above for 'and' and 'or':
17923 # NOTE: This block of code is currently bypassed because
17924 # of a previous block but is retained for possible future use.
17925 elsif ( $is_amp_amp{$type_ibeg_2} ) {
17927 # maybe looking at something like:
17928 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
17932 $this_line_is_semicolon_terminated
17934 # previous line begins with an 'if' or 'unless' keyword
17935 && $type_ibeg_1 eq 'k'
17936 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
17941 # handle line with leading = or similar
17942 elsif ( $is_assignment{$type_ibeg_2} ) {
17943 next unless ( $n == 1 || $n == $nmax );
17944 next if $old_breakpoint_to_go[$iend_1];
17948 # unless we can reduce this to two lines
17951 # or three lines, the last with a leading semicolon
17952 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
17954 # or the next line ends with a here doc
17955 || $type_iend_2 eq 'h'
17957 # or this is a short line ending in ;
17958 || ( $n == $nmax && $this_line_is_semicolon_terminated )
17960 $forced_breakpoint_to_go[$iend_1] = 0;
17963 #----------------------------------------------------------
17964 # Recombine Section 4:
17965 # Combine the lines if we arrive here and it is possible
17966 #----------------------------------------------------------
17968 # honor hard breakpoints
17969 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
17971 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
17973 # combined line cannot be too long
17974 my $excess = excess_line_length( $ibeg_1, $iend_2 );
17975 next if ( $excess > 0 );
17977 # Require a few extra spaces before recombining lines if we are
17978 # at an old breakpoint unless this is a simple list or terminal
17979 # line. The goal is to avoid oscillating between two
17980 # quasi-stable end states. For example this snippet caused
17984 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
17988 if ( $old_breakpoint_to_go[$iend_1]
17989 && !$this_line_is_semicolon_terminated
17992 && $type_iend_2 ne ',' );
17994 # do not recombine if we would skip in indentation levels
17995 if ( $n < $nmax ) {
17996 my $if_next = $$ri_beg[ $n + 1 ];
17999 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
18000 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
18002 # but an isolated 'if (' is undesirable
18005 && $iend_1 - $ibeg_1 <= 2
18006 && $type_ibeg_1 eq 'k'
18007 && $tokens_to_go[$ibeg_1] eq 'if'
18008 && $tokens_to_go[$iend_1] ne '('
18014 next if ( $bs >= NO_BREAK - 1 );
18016 # remember the pair with the greatest bond strength
18023 if ( $bs > $bs_best ) {
18030 # recombine the pair with the greatest bond strength
18032 splice @$ri_beg, $n_best, 1;
18033 splice @$ri_end, $n_best - 1, 1;
18034 splice @joint, $n_best, 1;
18036 # keep going if we are still making progress
18040 return ( $ri_beg, $ri_end );
18042 } # end recombine_breakpoints
18044 sub break_all_chain_tokens {
18046 # scan the current breakpoints looking for breaks at certain "chain
18047 # operators" (. : && || + etc) which often occur repeatedly in a long
18048 # statement. If we see a break at any one, break at all similar tokens
18049 # within the same container.
18051 my ( $ri_left, $ri_right ) = @_;
18053 my %saw_chain_type;
18054 my %left_chain_type;
18055 my %right_chain_type;
18056 my %interior_chain_type;
18057 my $nmax = @$ri_right - 1;
18059 # scan the left and right end tokens of all lines
18061 for my $n ( 0 .. $nmax ) {
18062 my $il = $$ri_left[$n];
18063 my $ir = $$ri_right[$n];
18064 my $typel = $types_to_go[$il];
18065 my $typer = $types_to_go[$ir];
18066 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
18067 $typer = '+' if ( $typer eq '-' );
18068 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
18069 $typer = '*' if ( $typer eq '/' );
18070 my $tokenl = $tokens_to_go[$il];
18071 my $tokenr = $tokens_to_go[$ir];
18073 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
18074 next if ( $typel eq '?' );
18075 push @{ $left_chain_type{$typel} }, $il;
18076 $saw_chain_type{$typel} = 1;
18079 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
18080 next if ( $typer eq '?' );
18081 push @{ $right_chain_type{$typer} }, $ir;
18082 $saw_chain_type{$typer} = 1;
18086 return unless $count;
18088 # now look for any interior tokens of the same types
18090 for my $n ( 0 .. $nmax ) {
18091 my $il = $$ri_left[$n];
18092 my $ir = $$ri_right[$n];
18093 for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
18094 my $type = $types_to_go[$i];
18095 $type = '+' if ( $type eq '-' );
18096 $type = '*' if ( $type eq '/' );
18097 if ( $saw_chain_type{$type} ) {
18098 push @{ $interior_chain_type{$type} }, $i;
18103 return unless $count;
18105 # now make a list of all new break points
18108 # loop over all chain types
18109 foreach my $type ( keys %saw_chain_type ) {
18111 # quit if just ONE continuation line with leading . For example--
18112 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
18114 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
18116 # loop over all interior chain tokens
18117 foreach my $itest ( @{ $interior_chain_type{$type} } ) {
18119 # loop over all left end tokens of same type
18120 if ( $left_chain_type{$type} ) {
18121 next if $nobreak_to_go[ $itest - 1 ];
18122 foreach my $i ( @{ $left_chain_type{$type} } ) {
18123 next unless in_same_container( $i, $itest );
18124 push @insert_list, $itest - 1;
18126 # Break at matching ? if this : is at a different level.
18127 # For example, the ? before $THRf_DEAD in the following
18128 # should get a break if its : gets a break.
18131 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
18132 # : ( $_ & 4 ) ? $THRf_R_DETACHED
18133 # : $THRf_R_JOINABLE;
18135 && $levels_to_go[$i] != $levels_to_go[$itest] )
18137 my $i_question = $mate_index_to_go[$itest];
18138 if ( $i_question > 0 ) {
18139 push @insert_list, $i_question - 1;
18146 # loop over all right end tokens of same type
18147 if ( $right_chain_type{$type} ) {
18148 next if $nobreak_to_go[$itest];
18149 foreach my $i ( @{ $right_chain_type{$type} } ) {
18150 next unless in_same_container( $i, $itest );
18151 push @insert_list, $itest;
18153 # break at matching ? if this : is at a different level
18155 && $levels_to_go[$i] != $levels_to_go[$itest] )
18157 my $i_question = $mate_index_to_go[$itest];
18158 if ( $i_question >= 0 ) {
18159 push @insert_list, $i_question;
18168 # insert any new break points
18169 if (@insert_list) {
18170 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18176 # Look for assignment operators that could use a breakpoint.
18177 # For example, in the following snippet
18179 # $HOME = $ENV{HOME}
18182 # || die "no home directory for user $<";
18184 # we could break at the = to get this, which is a little nicer:
18189 # || die "no home directory for user $<";
18191 # The logic here follows the logic in set_logical_padding, which
18192 # will add the padding in the second line to improve alignment.
18194 my ( $ri_left, $ri_right ) = @_;
18195 my $nmax = @$ri_right - 1;
18196 return unless ( $nmax >= 2 );
18198 # scan the left ends of first two lines
18201 for my $n ( 1 .. 2 ) {
18202 my $il = $$ri_left[$n];
18203 my $typel = $types_to_go[$il];
18204 my $tokenl = $tokens_to_go[$il];
18206 my $has_leading_op = ( $tokenl =~ /^\w/ )
18207 ? $is_chain_operator{$tokenl} # + - * / : ? && ||
18208 : $is_chain_operator{$typel}; # and, or
18209 return unless ($has_leading_op);
18212 unless ( $tokenl eq $tokbeg
18213 && $nesting_depth_to_go[$il] eq $depth_beg );
18216 $depth_beg = $nesting_depth_to_go[$il];
18219 # now look for any interior tokens of the same types
18220 my $il = $$ri_left[0];
18221 my $ir = $$ri_right[0];
18223 # now make a list of all new break points
18225 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
18226 my $type = $types_to_go[$i];
18227 if ( $is_assignment{$type}
18228 && $nesting_depth_to_go[$i] eq $depth_beg )
18230 if ( $want_break_before{$type} ) {
18231 push @insert_list, $i - 1;
18234 push @insert_list, $i;
18239 # Break after a 'return' followed by a chain of operators
18240 # return ( $^O !~ /win32|dos/i )
18241 # && ( $^O ne 'VMS' )
18242 # && ( $^O ne 'OS2' )
18243 # && ( $^O ne 'MacOS' );
18246 # ( $^O !~ /win32|dos/i )
18247 # && ( $^O ne 'VMS' )
18248 # && ( $^O ne 'OS2' )
18249 # && ( $^O ne 'MacOS' );
18251 if ( $types_to_go[$i] eq 'k'
18252 && $tokens_to_go[$i] eq 'return'
18254 && $nesting_depth_to_go[$i] eq $depth_beg )
18256 push @insert_list, $i;
18259 return unless (@insert_list);
18261 # One final check...
18262 # scan second and third lines and be sure there are no assignments
18263 # we want to avoid breaking at an = to make something like this:
18265 # $html_icons{"$type-$state"}
18266 # or $icon = $html_icons{$type}
18267 # or $icon = $html_icons{$state} )
18268 for my $n ( 1 .. 2 ) {
18269 my $il = $$ri_left[$n];
18270 my $ir = $$ri_right[$n];
18271 for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
18272 my $type = $types_to_go[$i];
18274 if ( $is_assignment{$type}
18275 && $nesting_depth_to_go[$i] eq $depth_beg );
18279 # ok, insert any new break point
18280 if (@insert_list) {
18281 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18285 sub insert_final_breaks {
18287 my ( $ri_left, $ri_right ) = @_;
18289 my $nmax = @$ri_right - 1;
18291 # scan the left and right end tokens of all lines
18293 my $i_first_colon = -1;
18294 for my $n ( 0 .. $nmax ) {
18295 my $il = $$ri_left[$n];
18296 my $ir = $$ri_right[$n];
18297 my $typel = $types_to_go[$il];
18298 my $typer = $types_to_go[$ir];
18299 return if ( $typel eq '?' );
18300 return if ( $typer eq '?' );
18301 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
18302 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
18305 # For long ternary chains,
18306 # if the first : we see has its # ? is in the interior
18307 # of a preceding line, then see if there are any good
18308 # breakpoints before the ?.
18309 if ( $i_first_colon > 0 ) {
18310 my $i_question = $mate_index_to_go[$i_first_colon];
18311 if ( $i_question > 0 ) {
18313 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
18314 my $token = $tokens_to_go[$ii];
18315 my $type = $types_to_go[$ii];
18317 # For now, a good break is either a comma or a 'return'.
18318 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
18319 && in_same_container( $ii, $i_question ) )
18321 push @insert_list, $ii;
18326 # insert any new break points
18327 if (@insert_list) {
18328 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18334 sub in_same_container {
18336 # check to see if tokens at i1 and i2 are in the
18337 # same container, and not separated by a comma, ? or :
18338 my ( $i1, $i2 ) = @_;
18339 my $type = $types_to_go[$i1];
18340 my $depth = $nesting_depth_to_go[$i1];
18341 return unless ( $nesting_depth_to_go[$i2] == $depth );
18342 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
18344 ###########################################################
18345 # This is potentially a very slow routine and not critical.
18346 # For safety just give up for large differences.
18347 # See test file 'infinite_loop.txt'
18348 # TODO: replace this loop with a data structure
18349 ###########################################################
18350 return if ( $i2 - $i1 > 200 );
18352 for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
18353 next if ( $nesting_depth_to_go[$i] > $depth );
18354 return if ( $nesting_depth_to_go[$i] < $depth );
18356 my $tok = $tokens_to_go[$i];
18357 $tok = ',' if $tok eq '=>'; # treat => same as ,
18359 # Example: we would not want to break at any of these .'s
18360 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
18361 if ( $type ne ':' ) {
18362 return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
18365 return if ( $tok =~ /^[\,]$/ );
18371 sub set_continuation_breaks {
18373 # Define an array of indexes for inserting newline characters to
18374 # keep the line lengths below the maximum desired length. There is
18375 # an implied break after the last token, so it need not be included.
18378 # This routine is part of series of routines which adjust line
18379 # lengths. It is only called if a statement is longer than the
18380 # maximum line length, or if a preliminary scanning located
18381 # desirable break points. Sub scan_list has already looked at
18382 # these tokens and set breakpoints (in array
18383 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
18384 # after commas, after opening parens, and before closing parens).
18385 # This routine will honor these breakpoints and also add additional
18386 # breakpoints as necessary to keep the line length below the maximum
18387 # requested. It bases its decision on where the 'bond strength' is
18390 # Output: returns references to the arrays:
18393 # which contain the indexes $i of the first and last tokens on each
18396 # In addition, the array:
18397 # $forced_breakpoint_to_go[$i]
18398 # may be updated to be =1 for any index $i after which there must be
18399 # a break. This signals later routines not to undo the breakpoint.
18401 my $saw_good_break = shift;
18402 my @i_first = (); # the first index to output
18403 my @i_last = (); # the last index to output
18404 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
18405 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
18407 set_bond_strengths();
18410 my $imax = $max_index_to_go;
18411 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
18412 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
18413 my $i_begin = $imin; # index for starting next iteration
18415 my $leading_spaces = leading_spaces_to_go($imin);
18416 my $line_count = 0;
18417 my $last_break_strength = NO_BREAK;
18418 my $i_last_break = -1;
18419 my $max_bias = 0.001;
18420 my $tiny_bias = 0.0001;
18421 my $leading_alignment_token = "";
18422 my $leading_alignment_type = "";
18424 # see if any ?/:'s are in order
18425 my $colons_in_order = 1;
18427 my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ];
18428 my $colon_count = @colon_list;
18429 foreach (@colon_list) {
18430 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
18434 # This is a sufficient but not necessary condition for colon chain
18435 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
18437 #-------------------------------------------------------
18438 # BEGINNING of main loop to set continuation breakpoints
18439 # Keep iterating until we reach the end
18440 #-------------------------------------------------------
18441 while ( $i_begin <= $imax ) {
18442 my $lowest_strength = NO_BREAK;
18443 my $starting_sum = $summed_lengths_to_go[$i_begin];
18446 my $lowest_next_token = '';
18447 my $lowest_next_type = 'b';
18448 my $i_lowest_next_nonblank = -1;
18450 #-------------------------------------------------------
18451 # BEGINNING of inner loop to find the best next breakpoint
18452 #-------------------------------------------------------
18453 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
18454 my $type = $types_to_go[$i_test];
18455 my $token = $tokens_to_go[$i_test];
18456 my $next_type = $types_to_go[ $i_test + 1 ];
18457 my $next_token = $tokens_to_go[ $i_test + 1 ];
18458 my $i_next_nonblank = $inext_to_go[$i_test];
18459 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
18460 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18461 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
18462 my $strength = $bond_strength_to_go[$i_test];
18463 my $maximum_line_length = maximum_line_length($i_begin);
18465 # use old breaks as a tie-breaker. For example to
18466 # prevent blinkers with -pbp in this code:
18469 ## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
18472 # At the same time try to prevent a leading * in this code
18473 # with the default formatting:
18476 ## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
18477 ## * ( $x**( $a - 1 ) )
18478 ## * ( ( 1 - $x )**( $b - 1 ) );
18480 # reduce strength a bit to break ties at an old breakpoint ...
18482 $old_breakpoint_to_go[$i_test]
18484 # which is a 'good' breakpoint, meaning ...
18485 # we don't want to break before it
18486 && !$want_break_before{$type}
18488 # and either we want to break before the next token
18489 # or the next token is not short (i.e. not a '*', '/' etc.)
18490 && $i_next_nonblank <= $imax
18491 && ( $want_break_before{$next_nonblank_type}
18492 || $token_lengths_to_go[$i_next_nonblank] > 2
18493 || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
18496 $strength -= $tiny_bias;
18499 # otherwise increase strength a bit if this token would be at the
18500 # maximum line length. This is necessary to avoid blinking
18501 # in the above example when the -iob flag is added.
18505 $summed_lengths_to_go[ $i_test + 1 ] -
18507 if ( $len >= $maximum_line_length ) {
18508 $strength += $tiny_bias;
18512 my $must_break = 0;
18514 # Force an immediate break at certain operators
18515 # with lower level than the start of the line,
18516 # unless we've already seen a better break.
18518 ##############################################
18519 # Note on an issue with a preceding ?
18520 ##############################################
18521 # We don't include a ? in the above list, but there may
18522 # be a break at a previous ? if the line is long.
18523 # Because of this we do not want to force a break if
18524 # there is a previous ? on this line. For now the best way
18525 # to do this is to not break if we have seen a lower strength
18526 # point, which is probably a ?.
18528 # Example of unwanted breaks we are avoiding at a '.' following a ?
18529 # from pod2html using perltidy -gnu:
18531 # ? "\n<A NAME=\""
18533 # . "\">\n$text</A>\n"
18534 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
18537 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
18538 || ( $next_nonblank_type eq 'k'
18539 && $next_nonblank_token =~ /^(and|or)$/ )
18541 && ( $nesting_depth_to_go[$i_begin] >
18542 $nesting_depth_to_go[$i_next_nonblank] )
18543 && ( $strength <= $lowest_strength )
18546 set_forced_breakpoint($i_next_nonblank);
18551 # Try to put a break where requested by scan_list
18552 $forced_breakpoint_to_go[$i_test]
18554 # break between ) { in a continued line so that the '{' can
18556 # See similar logic in scan_list which catches instances
18557 # where a line is just something like ') {'. We have to
18558 # be careful because the corresponding block keyword might
18559 # not be on the first line, such as 'for' here:
18563 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
18568 && ( $token eq ')' )
18569 && ( $next_nonblank_type eq '{' )
18570 && ($next_nonblank_block_type)
18571 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
18572 && !$rOpts->{'opening-brace-always-on-right'} )
18574 # There is an implied forced break at a terminal opening brace
18575 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
18579 # Forced breakpoints must sometimes be overridden, for example
18580 # because of a side comment causing a NO_BREAK. It is easier
18581 # to catch this here than when they are set.
18582 if ( $strength < NO_BREAK - 1 ) {
18583 $strength = $lowest_strength - $tiny_bias;
18588 # quit if a break here would put a good terminal token on
18589 # the next line and we already have a possible break
18592 && ( $next_nonblank_type =~ /^[\;\,]$/ )
18596 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
18598 ) > $maximum_line_length
18602 last if ( $i_lowest >= 0 );
18605 # Avoid a break which would strand a single punctuation
18606 # token. For example, we do not want to strand a leading
18607 # '.' which is followed by a long quoted string.
18608 # But note that we do want to do this with -extrude (l=1)
18609 # so please test any changes to this code on -extrude.
18612 && ( $i_test == $i_begin )
18613 && ( $i_test < $imax )
18614 && ( $token eq $type )
18618 $summed_lengths_to_go[ $i_test + 1 ] -
18620 ) < $maximum_line_length
18624 $i_test = min( $imax, $inext_to_go[$i_test] );
18628 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
18631 # break at previous best break if it would have produced
18632 # a leading alignment of certain common tokens, and it
18633 # is different from the latest candidate break
18635 if ($leading_alignment_type);
18637 # Force at least one breakpoint if old code had good
18638 # break It is only called if a breakpoint is required or
18639 # desired. This will probably need some adjustments
18640 # over time. A goal is to try to be sure that, if a new
18641 # side comment is introduced into formatted text, then
18642 # the same breakpoints will occur. scbreak.t
18645 $i_test == $imax # we are at the end
18646 && !$forced_breakpoint_count #
18647 && $saw_good_break # old line had good break
18648 && $type =~ /^[#;\{]$/ # and this line ends in
18649 # ';' or side comment
18650 && $i_last_break < 0 # and we haven't made a break
18651 && $i_lowest >= 0 # and we saw a possible break
18652 && $i_lowest < $imax - 1 # (but not just before this ;)
18653 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
18656 # Do not skip past an important break point in a short final
18657 # segment. For example, without this check we would miss the
18658 # break at the final / in the following code:
18661 # ( $tau * $mass_pellet * $q_0 *
18662 # ( 1. - exp( -$t_stop / $tau ) ) -
18663 # 4. * $pi * $factor * $k_ice *
18664 # ( $t_melt - $t_ice ) *
18667 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
18669 if ( $line_count > 2
18670 && $i_lowest < $i_test
18671 && $i_test > $imax - 2
18672 && $nesting_depth_to_go[$i_begin] >
18673 $nesting_depth_to_go[$i_lowest]
18674 && $lowest_strength < $last_break_strength - .5 * WEAK )
18676 # Make this break for math operators for now
18677 my $ir = $inext_to_go[$i_lowest];
18678 my $il = $iprev_to_go[$ir];
18680 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
18681 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
18684 # Update the minimum bond strength location
18685 $lowest_strength = $strength;
18686 $i_lowest = $i_test;
18687 $lowest_next_token = $next_nonblank_token;
18688 $lowest_next_type = $next_nonblank_type;
18689 $i_lowest_next_nonblank = $i_next_nonblank;
18690 last if $must_break;
18692 # set flags to remember if a break here will produce a
18693 # leading alignment of certain common tokens
18694 if ( $line_count > 0
18696 && ( $lowest_strength - $last_break_strength <= $max_bias )
18699 my $i_last_end = $iprev_to_go[$i_begin];
18700 my $tok_beg = $tokens_to_go[$i_begin];
18701 my $type_beg = $types_to_go[$i_begin];
18704 # check for leading alignment of certain tokens
18706 $tok_beg eq $next_nonblank_token
18707 && $is_chain_operator{$tok_beg}
18708 && ( $type_beg eq 'k'
18709 || $type_beg eq $tok_beg )
18710 && $nesting_depth_to_go[$i_begin] >=
18711 $nesting_depth_to_go[$i_next_nonblank]
18714 || ( $tokens_to_go[$i_last_end] eq $token
18715 && $is_chain_operator{$token}
18716 && ( $type eq 'k' || $type eq $token )
18717 && $nesting_depth_to_go[$i_last_end] >=
18718 $nesting_depth_to_go[$i_test] )
18721 $leading_alignment_token = $next_nonblank_token;
18722 $leading_alignment_type = $next_nonblank_type;
18727 my $too_long = ( $i_test >= $imax );
18728 if ( !$too_long ) {
18731 $summed_lengths_to_go[ $i_test + 2 ] -
18733 $too_long = $next_length > $maximum_line_length;
18735 # To prevent blinkers we will avoid leaving a token exactly at
18736 # the line length limit unless it is the last token or one of
18737 # several "good" types.
18739 # The following code was a blinker with -pbp before this
18741 ## $last_nonblank_token eq '('
18742 ## && $is_indirect_object_taker{ $paren_type
18743 ## [$paren_depth] }
18744 # The issue causing the problem is that if the
18745 # term [$paren_depth] gets broken across a line then
18746 # the whitespace routine doesn't see both opening and closing
18747 # brackets and will format like '[ $paren_depth ]'. This
18748 # leads to an oscillation in length depending if we break
18749 # before the closing bracket or not.
18751 && $i_test + 1 < $imax
18752 && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
18754 $too_long = $next_length >= $maximum_line_length;
18758 FORMATTER_DEBUG_FLAG_BREAK
18761 my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
18762 my $i_testp2 = $i_test + 2;
18763 if ( $i_testp2 > $max_index_to_go + 1 ) {
18764 $i_testp2 = $max_index_to_go + 1;
18766 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
18767 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
18769 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
18772 # allow one extra terminal token after exceeding line length
18773 # if it would strand this token.
18774 if ( $rOpts_fuzzy_line_length
18776 && $i_lowest == $i_test
18777 && $token_lengths_to_go[$i_test] > 1
18778 && $next_nonblank_type =~ /^[\;\,]$/ )
18785 ( $i_test == $imax ) # we're done if no more tokens,
18787 ( $i_lowest >= 0 ) # or no more space and we have a break
18793 #-------------------------------------------------------
18794 # END of inner loop to find the best next breakpoint
18795 # Now decide exactly where to put the breakpoint
18796 #-------------------------------------------------------
18798 # it's always ok to break at imax if no other break was found
18799 if ( $i_lowest < 0 ) { $i_lowest = $imax }
18801 # semi-final index calculation
18802 my $i_next_nonblank = $inext_to_go[$i_lowest];
18803 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
18804 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18806 #-------------------------------------------------------
18807 # ?/: rule 1 : if a break here will separate a '?' on this
18808 # line from its closing ':', then break at the '?' instead.
18809 #-------------------------------------------------------
18811 foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
18812 next unless ( $tokens_to_go[$i] eq '?' );
18814 # do not break if probable sequence of ?/: statements
18815 next if ($is_colon_chain);
18817 # do not break if statement is broken by side comment
18820 $tokens_to_go[$max_index_to_go] eq '#'
18821 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
18822 $max_index_to_go ) !~ /^[\;\}]$/
18825 # no break needed if matching : is also on the line
18827 if ( $mate_index_to_go[$i] >= 0
18828 && $mate_index_to_go[$i] <= $i_next_nonblank );
18831 if ( $want_break_before{'?'} ) { $i_lowest-- }
18835 #-------------------------------------------------------
18836 # END of inner loop to find the best next breakpoint:
18837 # Break the line after the token with index i=$i_lowest
18838 #-------------------------------------------------------
18840 # final index calculation
18841 $i_next_nonblank = $inext_to_go[$i_lowest];
18842 $next_nonblank_type = $types_to_go[$i_next_nonblank];
18843 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18845 FORMATTER_DEBUG_FLAG_BREAK
18847 "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
18849 #-------------------------------------------------------
18850 # ?/: rule 2 : if we break at a '?', then break at its ':'
18852 # Note: this rule is also in sub scan_list to handle a break
18853 # at the start and end of a line (in case breaks are dictated
18854 # by side comments).
18855 #-------------------------------------------------------
18856 if ( $next_nonblank_type eq '?' ) {
18857 set_closing_breakpoint($i_next_nonblank);
18859 elsif ( $types_to_go[$i_lowest] eq '?' ) {
18860 set_closing_breakpoint($i_lowest);
18863 #-------------------------------------------------------
18864 # ?/: rule 3 : if we break at a ':' then we save
18865 # its location for further work below. We may need to go
18866 # back and break at its '?'.
18867 #-------------------------------------------------------
18868 if ( $next_nonblank_type eq ':' ) {
18869 push @i_colon_breaks, $i_next_nonblank;
18871 elsif ( $types_to_go[$i_lowest] eq ':' ) {
18872 push @i_colon_breaks, $i_lowest;
18875 # here we should set breaks for all '?'/':' pairs which are
18876 # separated by this line
18880 # save this line segment, after trimming blanks at the ends
18882 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
18884 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
18886 # set a forced breakpoint at a container opening, if necessary, to
18887 # signal a break at a closing container. Excepting '(' for now.
18888 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
18889 && !$forced_breakpoint_to_go[$i_lowest] )
18891 set_closing_breakpoint($i_lowest);
18894 # get ready to go again
18895 $i_begin = $i_lowest + 1;
18896 $last_break_strength = $lowest_strength;
18897 $i_last_break = $i_lowest;
18898 $leading_alignment_token = "";
18899 $leading_alignment_type = "";
18900 $lowest_next_token = '';
18901 $lowest_next_type = 'b';
18903 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
18907 # update indentation size
18908 if ( $i_begin <= $imax ) {
18909 $leading_spaces = leading_spaces_to_go($i_begin);
18913 #-------------------------------------------------------
18914 # END of main loop to set continuation breakpoints
18915 # Now go back and make any necessary corrections
18916 #-------------------------------------------------------
18918 #-------------------------------------------------------
18919 # ?/: rule 4 -- if we broke at a ':', then break at
18920 # corresponding '?' unless this is a chain of ?: expressions
18921 #-------------------------------------------------------
18922 if (@i_colon_breaks) {
18924 # using a simple method for deciding if we are in a ?/: chain --
18925 # this is a chain if it has multiple ?/: pairs all in order;
18927 # Note that if line starts in a ':' we count that above as a break
18928 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
18930 unless ($is_chain) {
18931 my @insert_list = ();
18932 foreach (@i_colon_breaks) {
18933 my $i_question = $mate_index_to_go[$_];
18934 if ( $i_question >= 0 ) {
18935 if ( $want_break_before{'?'} ) {
18936 $i_question = $iprev_to_go[$i_question];
18939 if ( $i_question >= 0 ) {
18940 push @insert_list, $i_question;
18943 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
18947 return ( \@i_first, \@i_last, $colon_count );
18950 sub insert_additional_breaks {
18952 # this routine will add line breaks at requested locations after
18953 # sub set_continuation_breaks has made preliminary breaks.
18955 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
18958 my $line_number = 0;
18960 foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
18962 $i_f = $$ri_first[$line_number];
18963 $i_l = $$ri_last[$line_number];
18964 while ( $i_break_left >= $i_l ) {
18967 # shouldn't happen unless caller passes bad indexes
18968 if ( $line_number >= @$ri_last ) {
18970 "Non-fatal program bug: couldn't set break at $i_break_left\n"
18972 report_definite_bug();
18975 $i_f = $$ri_first[$line_number];
18976 $i_l = $$ri_last[$line_number];
18979 # Do not leave a blank at the end of a line; back up if necessary
18980 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
18982 my $i_break_right = $inext_to_go[$i_break_left];
18983 if ( $i_break_left >= $i_f
18984 && $i_break_left < $i_l
18985 && $i_break_right > $i_f
18986 && $i_break_right <= $i_l )
18988 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
18989 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
18994 sub set_closing_breakpoint {
18996 # set a breakpoint at a matching closing token
18997 # at present, this is only used to break at a ':' which matches a '?'
18998 my $i_break = shift;
19000 if ( $mate_index_to_go[$i_break] >= 0 ) {
19002 # CAUTION: infinite recursion possible here:
19003 # set_closing_breakpoint calls set_forced_breakpoint, and
19004 # set_forced_breakpoint call set_closing_breakpoint
19005 # ( test files attrib.t, BasicLyx.pm.html).
19006 # Don't reduce the '2' in the statement below
19007 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
19009 # break before } ] and ), but sub set_forced_breakpoint will decide
19010 # to break before or after a ? and :
19011 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
19012 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
19016 my $type_sequence = $type_sequence_to_go[$i_break];
19017 if ($type_sequence) {
19018 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
19019 $postponed_breakpoint{$type_sequence} = 1;
19024 sub compare_indentation_levels {
19026 # check to see if output line tabbing agrees with input line
19027 # this can be very useful for debugging a script which has an extra
19029 my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
19030 if ( $guessed_indentation_level ne $structural_indentation_level ) {
19031 $last_tabbing_disagreement = $input_line_number;
19033 if ($in_tabbing_disagreement) {
19036 $tabbing_disagreement_count++;
19038 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
19039 write_logfile_entry(
19040 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
19043 $in_tabbing_disagreement = $input_line_number;
19044 $first_tabbing_disagreement = $in_tabbing_disagreement
19045 unless ($first_tabbing_disagreement);
19050 if ($in_tabbing_disagreement) {
19052 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
19053 write_logfile_entry(
19054 "End indentation disagreement from input line $in_tabbing_disagreement\n"
19057 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
19058 write_logfile_entry(
19059 "No further tabbing disagreements will be noted\n");
19062 $in_tabbing_disagreement = 0;
19067 #####################################################################
19069 # the Perl::Tidy::IndentationItem class supplies items which contain
19070 # how much whitespace should be used at the start of a line
19072 #####################################################################
19074 package Perl::Tidy::IndentationItem;
19076 # Indexes for indentation items
19077 use constant SPACES => 0; # total leading white spaces
19078 use constant LEVEL => 1; # the indentation 'level'
19079 use constant CI_LEVEL => 2; # the 'continuation level'
19080 use constant AVAILABLE_SPACES => 3; # how many left spaces available
19082 use constant CLOSED => 4; # index where we saw closing '}'
19083 use constant COMMA_COUNT => 5; # how many commas at this level?
19084 use constant SEQUENCE_NUMBER => 6; # output batch number
19085 use constant INDEX => 7; # index in output batch list
19086 use constant HAVE_CHILD => 8; # any dependents?
19087 use constant RECOVERABLE_SPACES => 9; # how many spaces to the right
19088 # we would like to move to get
19089 # alignment (negative if left)
19090 use constant ALIGN_PAREN => 10; # do we want to try to align
19091 # with an opening structure?
19092 use constant MARKED => 11; # if visited by corrector logic
19093 use constant STACK_DEPTH => 12; # indentation nesting depth
19094 use constant STARTING_INDEX => 13; # first token index of this level
19095 use constant ARROW_COUNT => 14; # how many =>'s
19099 # Create an 'indentation_item' which describes one level of leading
19100 # whitespace when the '-lp' indentation is used. We return
19101 # a reference to an anonymous array of associated variables.
19102 # See above constants for storage scheme.
19104 $class, $spaces, $level,
19105 $ci_level, $available_spaces, $index,
19106 $gnu_sequence_number, $align_paren, $stack_depth,
19110 my $arrow_count = 0;
19111 my $comma_count = 0;
19112 my $have_child = 0;
19113 my $want_right_spaces = 0;
19116 $spaces, $level, $ci_level,
19117 $available_spaces, $closed, $comma_count,
19118 $gnu_sequence_number, $index, $have_child,
19119 $want_right_spaces, $align_paren, $marked,
19120 $stack_depth, $starting_index, $arrow_count,
19124 sub permanently_decrease_AVAILABLE_SPACES {
19126 # make a permanent reduction in the available indentation spaces
19127 # at one indentation item. NOTE: if there are child nodes, their
19128 # total SPACES must be reduced by the caller.
19130 my ( $item, $spaces_needed ) = @_;
19131 my $available_spaces = $item->get_AVAILABLE_SPACES();
19132 my $deleted_spaces =
19133 ( $available_spaces > $spaces_needed )
19135 : $available_spaces;
19136 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
19137 $item->decrease_SPACES($deleted_spaces);
19138 $item->set_RECOVERABLE_SPACES(0);
19140 return $deleted_spaces;
19143 sub tentatively_decrease_AVAILABLE_SPACES {
19145 # We are asked to tentatively delete $spaces_needed of indentation
19146 # for a indentation item. We may want to undo this later. NOTE: if
19147 # there are child nodes, their total SPACES must be reduced by the
19149 my ( $item, $spaces_needed ) = @_;
19150 my $available_spaces = $item->get_AVAILABLE_SPACES();
19151 my $deleted_spaces =
19152 ( $available_spaces > $spaces_needed )
19154 : $available_spaces;
19155 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
19156 $item->decrease_SPACES($deleted_spaces);
19157 $item->increase_RECOVERABLE_SPACES($deleted_spaces);
19158 return $deleted_spaces;
19161 sub get_STACK_DEPTH {
19163 return $self->[STACK_DEPTH];
19168 return $self->[SPACES];
19173 return $self->[MARKED];
19177 my ( $self, $value ) = @_;
19178 if ( defined($value) ) {
19179 $self->[MARKED] = $value;
19181 return $self->[MARKED];
19184 sub get_AVAILABLE_SPACES {
19186 return $self->[AVAILABLE_SPACES];
19189 sub decrease_SPACES {
19190 my ( $self, $value ) = @_;
19191 if ( defined($value) ) {
19192 $self->[SPACES] -= $value;
19194 return $self->[SPACES];
19197 sub decrease_AVAILABLE_SPACES {
19198 my ( $self, $value ) = @_;
19199 if ( defined($value) ) {
19200 $self->[AVAILABLE_SPACES] -= $value;
19202 return $self->[AVAILABLE_SPACES];
19205 sub get_ALIGN_PAREN {
19207 return $self->[ALIGN_PAREN];
19210 sub get_RECOVERABLE_SPACES {
19212 return $self->[RECOVERABLE_SPACES];
19215 sub set_RECOVERABLE_SPACES {
19216 my ( $self, $value ) = @_;
19217 if ( defined($value) ) {
19218 $self->[RECOVERABLE_SPACES] = $value;
19220 return $self->[RECOVERABLE_SPACES];
19223 sub increase_RECOVERABLE_SPACES {
19224 my ( $self, $value ) = @_;
19225 if ( defined($value) ) {
19226 $self->[RECOVERABLE_SPACES] += $value;
19228 return $self->[RECOVERABLE_SPACES];
19233 return $self->[CI_LEVEL];
19238 return $self->[LEVEL];
19241 sub get_SEQUENCE_NUMBER {
19243 return $self->[SEQUENCE_NUMBER];
19248 return $self->[INDEX];
19251 sub get_STARTING_INDEX {
19253 return $self->[STARTING_INDEX];
19256 sub set_HAVE_CHILD {
19257 my ( $self, $value ) = @_;
19258 if ( defined($value) ) {
19259 $self->[HAVE_CHILD] = $value;
19261 return $self->[HAVE_CHILD];
19264 sub get_HAVE_CHILD {
19266 return $self->[HAVE_CHILD];
19269 sub set_ARROW_COUNT {
19270 my ( $self, $value ) = @_;
19271 if ( defined($value) ) {
19272 $self->[ARROW_COUNT] = $value;
19274 return $self->[ARROW_COUNT];
19277 sub get_ARROW_COUNT {
19279 return $self->[ARROW_COUNT];
19282 sub set_COMMA_COUNT {
19283 my ( $self, $value ) = @_;
19284 if ( defined($value) ) {
19285 $self->[COMMA_COUNT] = $value;
19287 return $self->[COMMA_COUNT];
19290 sub get_COMMA_COUNT {
19292 return $self->[COMMA_COUNT];
19296 my ( $self, $value ) = @_;
19297 if ( defined($value) ) {
19298 $self->[CLOSED] = $value;
19300 return $self->[CLOSED];
19305 return $self->[CLOSED];
19308 #####################################################################
19310 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
19311 # contain a single output line
19313 #####################################################################
19315 package Perl::Tidy::VerticalAligner::Line;
19322 use constant JMAX => 0;
19323 use constant JMAX_ORIGINAL_LINE => 1;
19324 use constant RTOKENS => 2;
19325 use constant RFIELDS => 3;
19326 use constant RPATTERNS => 4;
19327 use constant INDENTATION => 5;
19328 use constant LEADING_SPACE_COUNT => 6;
19329 use constant OUTDENT_LONG_LINES => 7;
19330 use constant LIST_TYPE => 8;
19331 use constant IS_HANGING_SIDE_COMMENT => 9;
19332 use constant RALIGNMENTS => 10;
19333 use constant MAXIMUM_LINE_LENGTH => 11;
19334 use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
19337 $_index_map{jmax} = JMAX;
19338 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;
19339 $_index_map{rtokens} = RTOKENS;
19340 $_index_map{rfields} = RFIELDS;
19341 $_index_map{rpatterns} = RPATTERNS;
19342 $_index_map{indentation} = INDENTATION;
19343 $_index_map{leading_space_count} = LEADING_SPACE_COUNT;
19344 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;
19345 $_index_map{list_type} = LIST_TYPE;
19346 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
19347 $_index_map{ralignments} = RALIGNMENTS;
19348 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;
19349 $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
19351 my @_default_data = ();
19352 $_default_data[JMAX] = undef;
19353 $_default_data[JMAX_ORIGINAL_LINE] = undef;
19354 $_default_data[RTOKENS] = undef;
19355 $_default_data[RFIELDS] = undef;
19356 $_default_data[RPATTERNS] = undef;
19357 $_default_data[INDENTATION] = undef;
19358 $_default_data[LEADING_SPACE_COUNT] = undef;
19359 $_default_data[OUTDENT_LONG_LINES] = undef;
19360 $_default_data[LIST_TYPE] = undef;
19361 $_default_data[IS_HANGING_SIDE_COMMENT] = undef;
19362 $_default_data[RALIGNMENTS] = [];
19363 $_default_data[MAXIMUM_LINE_LENGTH] = undef;
19364 $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
19368 # methods to count object population
19370 sub get_count { $_count; }
19371 sub _increment_count { ++$_count }
19372 sub _decrement_count { --$_count }
19375 # Constructor may be called as a class method
19377 my ( $caller, %arg ) = @_;
19378 my $caller_is_obj = ref($caller);
19379 my $class = $caller_is_obj || $caller;
19381 my $self = bless [], $class;
19383 $self->[RALIGNMENTS] = [];
19386 foreach ( keys %_index_map ) {
19387 $index = $_index_map{$_};
19388 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
19389 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
19390 else { $self->[$index] = $_default_data[$index] }
19393 $self->_increment_count();
19398 $_[0]->_decrement_count();
19401 sub get_jmax { $_[0]->[JMAX] }
19402 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }
19403 sub get_rtokens { $_[0]->[RTOKENS] }
19404 sub get_rfields { $_[0]->[RFIELDS] }
19405 sub get_rpatterns { $_[0]->[RPATTERNS] }
19406 sub get_indentation { $_[0]->[INDENTATION] }
19407 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }
19408 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }
19409 sub get_list_type { $_[0]->[LIST_TYPE] }
19410 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
19411 sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
19413 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
19414 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
19415 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
19416 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
19418 sub get_starting_column {
19419 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
19422 sub increment_column {
19423 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
19425 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
19427 sub current_field_width {
19431 return $self->get_column($j);
19434 return $self->get_column($j) - $self->get_column( $j - 1 );
19438 sub field_width_growth {
19441 return $self->get_column($j) - $self->get_starting_column($j);
19444 sub starting_field_width {
19448 return $self->get_starting_column($j);
19451 return $self->get_starting_column($j) -
19452 $self->get_starting_column( $j - 1 );
19456 sub increase_field_width {
19459 my ( $j, $pad ) = @_;
19460 my $jmax = $self->get_jmax();
19461 for my $k ( $j .. $jmax ) {
19462 $self->increment_column( $k, $pad );
19466 sub get_available_space_on_right {
19468 my $jmax = $self->get_jmax();
19469 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
19472 sub set_jmax { $_[0]->[JMAX] = $_[1] }
19473 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }
19474 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }
19475 sub set_rfields { $_[0]->[RFIELDS] = $_[1] }
19476 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }
19477 sub set_indentation { $_[0]->[INDENTATION] = $_[1] }
19478 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
19479 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }
19480 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }
19481 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
19482 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
19486 #####################################################################
19488 # the Perl::Tidy::VerticalAligner::Alignment class holds information
19489 # on a single column being aligned
19491 #####################################################################
19492 package Perl::Tidy::VerticalAligner::Alignment;
19500 # Symbolic array indexes
19501 use constant COLUMN => 0; # the current column number
19502 use constant STARTING_COLUMN => 1; # column number when created
19503 use constant MATCHING_TOKEN => 2; # what token we are matching
19504 use constant STARTING_LINE => 3; # the line index of creation
19505 use constant ENDING_LINE => 4; # the most recent line to use it
19506 use constant SAVED_COLUMN => 5; # the most recent line to use it
19507 use constant SERIAL_NUMBER => 6; # unique number for this alignment
19508 # (just its index in an array)
19510 # Correspondence between variables and array indexes
19512 $_index_map{column} = COLUMN;
19513 $_index_map{starting_column} = STARTING_COLUMN;
19514 $_index_map{matching_token} = MATCHING_TOKEN;
19515 $_index_map{starting_line} = STARTING_LINE;
19516 $_index_map{ending_line} = ENDING_LINE;
19517 $_index_map{saved_column} = SAVED_COLUMN;
19518 $_index_map{serial_number} = SERIAL_NUMBER;
19520 my @_default_data = ();
19521 $_default_data[COLUMN] = undef;
19522 $_default_data[STARTING_COLUMN] = undef;
19523 $_default_data[MATCHING_TOKEN] = undef;
19524 $_default_data[STARTING_LINE] = undef;
19525 $_default_data[ENDING_LINE] = undef;
19526 $_default_data[SAVED_COLUMN] = undef;
19527 $_default_data[SERIAL_NUMBER] = undef;
19529 # class population count
19532 sub get_count { $_count; }
19533 sub _increment_count { ++$_count }
19534 sub _decrement_count { --$_count }
19539 my ( $caller, %arg ) = @_;
19540 my $caller_is_obj = ref($caller);
19541 my $class = $caller_is_obj || $caller;
19543 my $self = bless [], $class;
19545 foreach ( keys %_index_map ) {
19546 my $index = $_index_map{$_};
19547 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
19548 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
19549 else { $self->[$index] = $_default_data[$index] }
19551 $self->_increment_count();
19556 $_[0]->_decrement_count();
19559 sub get_column { return $_[0]->[COLUMN] }
19560 sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
19561 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }
19562 sub get_starting_line { return $_[0]->[STARTING_LINE] }
19563 sub get_ending_line { return $_[0]->[ENDING_LINE] }
19564 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }
19566 sub set_column { $_[0]->[COLUMN] = $_[1] }
19567 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
19568 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }
19569 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }
19570 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }
19571 sub increment_column { $_[0]->[COLUMN] += $_[1] }
19573 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
19574 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }
19578 package Perl::Tidy::VerticalAligner;
19580 # The Perl::Tidy::VerticalAligner package collects output lines and
19581 # attempts to line up certain common tokens, such as => and #, which are
19582 # identified by the calling routine.
19584 # There are two main routines: valign_input and flush. Append acts as a
19585 # storage buffer, collecting lines into a group which can be vertically
19586 # aligned. When alignment is no longer possible or desirable, it dumps
19587 # the group to flush.
19589 # valign_input -----> flush
19597 # Caution: these debug flags produce a lot of output
19598 # They should all be 0 except when debugging small scripts
19600 use constant VALIGN_DEBUG_FLAG_APPEND => 0;
19601 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
19602 use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
19603 use constant VALIGN_DEBUG_FLAG_TABS => 0;
19605 my $debug_warning = sub {
19606 print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
19609 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
19610 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
19611 VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
19612 VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS');
19617 $vertical_aligner_self
19619 $maximum_alignment_index
19623 $previous_minimum_jmax_seen
19624 $previous_maximum_jmax_seen
19625 $maximum_line_index
19630 $last_level_written
19631 $last_leading_space_count
19635 $last_comment_column
19636 $last_side_comment_line_number
19637 $last_side_comment_length
19638 $last_side_comment_level
19639 $outdented_line_count
19640 $first_outdented_line_at
19641 $last_outdented_line_at
19642 $diagnostics_object
19644 $file_writer_object
19645 @side_comment_history
19646 $comment_leading_space_count
19647 $is_matching_terminal_line
19648 $consecutive_block_comments
19655 $cached_line_leading_space_count
19656 $cached_seqno_string
19658 $valign_buffer_filling
19662 $last_nonblank_seqno_string
19666 $rOpts_maximum_line_length
19667 $rOpts_variable_maximum_line_length
19668 $rOpts_continuation_indentation
19669 $rOpts_indent_columns
19671 $rOpts_entab_leading_whitespace
19674 $rOpts_fixed_position_side_comment
19675 $rOpts_minimum_space_to_comment
19683 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
19686 # variables describing the entire space group:
19687 $ralignment_list = [];
19689 $last_level_written = -1;
19690 $extra_indent_ok = 0; # can we move all lines to the right?
19691 $last_side_comment_length = 0;
19692 $maximum_jmax_seen = 0;
19693 $minimum_jmax_seen = 0;
19694 $previous_minimum_jmax_seen = 0;
19695 $previous_maximum_jmax_seen = 0;
19697 # variables describing each line of the group
19698 @group_lines = (); # list of all lines in group
19700 $outdented_line_count = 0;
19701 $first_outdented_line_at = 0;
19702 $last_outdented_line_at = 0;
19703 $last_side_comment_line_number = 0;
19704 $last_side_comment_level = -1;
19705 $is_matching_terminal_line = 0;
19707 # most recent 3 side comments; [ line number, column ]
19708 $side_comment_history[0] = [ -300, 0 ];
19709 $side_comment_history[1] = [ -200, 0 ];
19710 $side_comment_history[2] = [ -100, 0 ];
19712 # valign_output_step_B cache:
19713 $cached_line_text = "";
19714 $cached_line_type = 0;
19715 $cached_line_flag = 0;
19717 $cached_line_valid = 0;
19718 $cached_line_leading_space_count = 0;
19719 $cached_seqno_string = "";
19721 # string of sequence numbers joined together
19722 $seqno_string = "";
19723 $last_nonblank_seqno_string = "";
19725 # frequently used parameters
19726 $rOpts_indent_columns = $rOpts->{'indent-columns'};
19727 $rOpts_tabs = $rOpts->{'tabs'};
19728 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
19729 $rOpts_fixed_position_side_comment =
19730 $rOpts->{'fixed-position-side-comment'};
19731 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
19732 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
19733 $rOpts_variable_maximum_line_length =
19734 $rOpts->{'variable-maximum-line-length'};
19735 $rOpts_valign = $rOpts->{'valign'};
19737 $consecutive_block_comments = 0;
19738 forget_side_comment();
19740 initialize_for_new_group();
19742 $vertical_aligner_self = {};
19743 bless $vertical_aligner_self, $class;
19744 return $vertical_aligner_self;
19747 sub initialize_for_new_group {
19748 $maximum_line_index = -1; # lines in the current group
19749 $maximum_alignment_index = -1; # alignments in current group
19750 $zero_count = 0; # count consecutive lines without tokens
19751 $current_line = undef; # line being matched for alignment
19752 $group_maximum_gap = 0; # largest gap introduced
19754 $marginal_match = 0;
19755 $comment_leading_space_count = 0;
19756 $last_leading_space_count = 0;
19759 # interface to Perl::Tidy::Diagnostics routines
19760 sub write_diagnostics {
19761 if ($diagnostics_object) {
19762 $diagnostics_object->write_diagnostics(@_);
19766 # interface to Perl::Tidy::Logger routines
19768 if ($logger_object) {
19769 $logger_object->warning(@_);
19773 sub write_logfile_entry {
19774 if ($logger_object) {
19775 $logger_object->write_logfile_entry(@_);
19779 sub report_definite_bug {
19780 if ($logger_object) {
19781 $logger_object->report_definite_bug();
19787 # return the number of leading spaces associated with an indentation
19788 # variable $indentation is either a constant number of spaces or an
19789 # object with a get_SPACES method.
19790 my $indentation = shift;
19791 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
19794 sub get_RECOVERABLE_SPACES {
19796 # return the number of spaces (+ means shift right, - means shift left)
19797 # that we would like to shift a group of lines with the same indentation
19798 # to get them to line up with their opening parens
19799 my $indentation = shift;
19800 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
19803 sub get_STACK_DEPTH {
19805 my $indentation = shift;
19806 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
19809 sub make_alignment {
19810 my ( $col, $token ) = @_;
19812 # make one new alignment at column $col which aligns token $token
19813 ++$maximum_alignment_index;
19814 my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
19816 starting_column => $col,
19817 matching_token => $token,
19818 starting_line => $maximum_line_index,
19819 ending_line => $maximum_line_index,
19820 serial_number => $maximum_alignment_index,
19822 $ralignment_list->[$maximum_alignment_index] = $alignment;
19826 sub dump_alignments {
19828 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
19829 for my $i ( 0 .. $maximum_alignment_index ) {
19830 my $column = $ralignment_list->[$i]->get_column();
19831 my $starting_column = $ralignment_list->[$i]->get_starting_column();
19832 my $matching_token = $ralignment_list->[$i]->get_matching_token();
19833 my $starting_line = $ralignment_list->[$i]->get_starting_line();
19834 my $ending_line = $ralignment_list->[$i]->get_ending_line();
19836 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
19840 sub save_alignment_columns {
19841 for my $i ( 0 .. $maximum_alignment_index ) {
19842 $ralignment_list->[$i]->save_column();
19846 sub restore_alignment_columns {
19847 for my $i ( 0 .. $maximum_alignment_index ) {
19848 $ralignment_list->[$i]->restore_column();
19852 sub forget_side_comment {
19853 $last_comment_column = 0;
19856 sub maximum_line_length_for_level {
19858 # return maximum line length for line starting with a given level
19859 my $maximum_line_length = $rOpts_maximum_line_length;
19860 if ($rOpts_variable_maximum_line_length) {
19862 if ( $level < 0 ) { $level = 0 }
19863 $maximum_line_length += $level * $rOpts_indent_columns;
19865 return $maximum_line_length;
19870 # Place one line in the current vertical group.
19872 # The input parameters are:
19873 # $level = indentation level of this line
19874 # $rfields = reference to array of fields
19875 # $rpatterns = reference to array of patterns, one per field
19876 # $rtokens = reference to array of tokens starting fields 1,2,..
19878 # Here is an example of what this package does. In this example,
19879 # we are trying to line up both the '=>' and the '#'.
19881 # '18' => 'grave', # \`
19882 # '19' => 'acute', # `'
19883 # '20' => 'caron', # \v
19884 # <-tabs-><f1-><--field 2 ---><-f3->
19887 # col1 col2 col3 col4
19889 # The calling routine has already broken the entire line into 3 fields as
19890 # indicated. (So the work of identifying promising common tokens has
19891 # already been done).
19893 # In this example, there will be 2 tokens being matched: '=>' and '#'.
19894 # They are the leading parts of fields 2 and 3, but we do need to know
19895 # what they are so that we can dump a group of lines when these tokens
19898 # The fields contain the actual characters of each field. The patterns
19899 # are like the fields, but they contain mainly token types instead
19900 # of tokens, so they have fewer characters. They are used to be
19901 # sure we are matching fields of similar type.
19903 # In this example, there will be 4 column indexes being adjusted. The
19904 # first one is always at zero. The interior columns are at the start of
19905 # the matching tokens, and the last one tracks the maximum line length.
19907 # Each time a new line comes in, it joins the current vertical
19908 # group if possible. Otherwise it causes the current group to be dumped
19909 # and a new group is started.
19911 # For each new group member, the column locations are increased, as
19912 # necessary, to make room for the new fields. When the group is finally
19913 # output, these column numbers are used to compute the amount of spaces of
19914 # padding needed for each field.
19916 # Programming note: the fields are assumed not to have any tab characters.
19917 # Tabs have been previously removed except for tabs in quoted strings and
19918 # side comments. Tabs in these fields can mess up the column counting.
19919 # The log file warns the user if there are any such tabs.
19922 $level, $level_end,
19923 $indentation, $rfields,
19924 $rtokens, $rpatterns,
19925 $is_forced_break, $outdent_long_lines,
19926 $is_terminal_ternary, $is_terminal_statement,
19927 $do_not_pad, $rvertical_tightness_flags,
19931 # number of fields is $jmax
19932 # number of tokens between fields is $jmax-1
19933 my $jmax = $#{$rfields};
19935 my $leading_space_count = get_SPACES($indentation);
19937 # set outdented flag to be sure we either align within statements or
19938 # across statement boundaries, but not both.
19939 my $is_outdented = $last_leading_space_count > $leading_space_count;
19940 $last_leading_space_count = $leading_space_count;
19942 # Patch: undo for hanging side comment
19943 my $is_hanging_side_comment =
19944 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
19945 $is_outdented = 0 if $is_hanging_side_comment;
19947 # Forget side comment alignment after seeing 2 or more block comments
19948 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
19949 if ($is_block_comment) {
19950 $consecutive_block_comments++;
19953 if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
19954 $consecutive_block_comments = 0;
19957 VALIGN_DEBUG_FLAG_APPEND0 && do {
19959 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
19962 # Validate cached line if necessary: If we can produce a container
19963 # with just 2 lines total by combining an existing cached opening
19964 # token with the closing token to follow, then we will mark both
19965 # cached flags as valid.
19966 if ($rvertical_tightness_flags) {
19967 if ( $maximum_line_index <= 0
19968 && $cached_line_type
19970 && $rvertical_tightness_flags->[2]
19971 && $rvertical_tightness_flags->[2] == $cached_seqno )
19973 $rvertical_tightness_flags->[3] ||= 1;
19974 $cached_line_valid ||= 1;
19978 # do not join an opening block brace with an unbalanced line
19979 # unless requested with a flag value of 2
19980 if ( $cached_line_type == 3
19981 && $maximum_line_index < 0
19982 && $cached_line_flag < 2
19983 && $level_jump != 0 )
19985 $cached_line_valid = 0;
19988 # patch until new aligner is finished
19989 if ($do_not_pad) { my_flush() }
19991 # shouldn't happen:
19992 if ( $level < 0 ) { $level = 0 }
19994 # do not align code across indentation level changes
19995 # or if vertical alignment is turned off for debugging
19996 if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
19998 # we are allowed to shift a group of lines to the right if its
19999 # level is greater than the previous and next group
20001 ( $level < $group_level && $last_level_written < $group_level );
20005 # If we know that this line will get flushed out by itself because
20006 # of level changes, we can leave the extra_indent_ok flag set.
20007 # That way, if we get an external flush call, we will still be
20008 # able to do some -lp alignment if necessary.
20009 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
20011 $group_level = $level;
20013 # wait until after the above flush to get the leading space
20014 # count because it may have been changed if the -icp flag is in
20016 $leading_space_count = get_SPACES($indentation);
20020 # --------------------------------------------------------------------
20021 # Patch to collect outdentable block COMMENTS
20022 # --------------------------------------------------------------------
20023 my $is_blank_line = "";
20024 if ( $group_type eq 'COMMENT' ) {
20028 && $outdent_long_lines
20029 && $leading_space_count == $comment_leading_space_count
20034 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
20042 # --------------------------------------------------------------------
20043 # add dummy fields for terminal ternary
20044 # --------------------------------------------------------------------
20045 my $j_terminal_match;
20046 if ( $is_terminal_ternary && $current_line ) {
20047 $j_terminal_match =
20048 fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
20049 $jmax = @{$rfields} - 1;
20052 # --------------------------------------------------------------------
20053 # add dummy fields for else statement
20054 # --------------------------------------------------------------------
20055 if ( $rfields->[0] =~ /^else\s*$/
20057 && $level_jump == 0 )
20059 $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
20060 $jmax = @{$rfields} - 1;
20063 # --------------------------------------------------------------------
20064 # Step 1. Handle simple line of code with no fields to match.
20065 # --------------------------------------------------------------------
20066 if ( $jmax <= 0 ) {
20069 if ( $maximum_line_index >= 0
20070 && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
20073 # flush the current group if it has some aligned columns..
20074 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
20076 # flush current group if we are just collecting side comments..
20079 # ...and we haven't seen a comment lately
20080 ( $zero_count > 3 )
20082 # ..or if this new line doesn't fit to the left of the comments
20083 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
20084 $group_lines[0]->get_column(0) )
20091 # patch to start new COMMENT group if this comment may be outdented
20092 if ( $is_block_comment
20093 && $outdent_long_lines
20094 && $maximum_line_index < 0 )
20096 $group_type = 'COMMENT';
20097 $comment_leading_space_count = $leading_space_count;
20098 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
20102 # just write this line directly if no current group, no side comment,
20103 # and no space recovery is needed.
20104 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
20106 valign_output_step_B( $leading_space_count, $$rfields[0], 0,
20107 $outdent_long_lines, $rvertical_tightness_flags, $level );
20115 # programming check: (shouldn't happen)
20116 # an error here implies an incorrect call was made
20117 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
20119 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
20121 report_definite_bug();
20124 # --------------------------------------------------------------------
20125 # create an object to hold this line
20126 # --------------------------------------------------------------------
20127 my $new_line = new Perl::Tidy::VerticalAligner::Line(
20129 jmax_original_line => $jmax,
20130 rtokens => $rtokens,
20131 rfields => $rfields,
20132 rpatterns => $rpatterns,
20133 indentation => $indentation,
20134 leading_space_count => $leading_space_count,
20135 outdent_long_lines => $outdent_long_lines,
20137 is_hanging_side_comment => $is_hanging_side_comment,
20138 maximum_line_length => maximum_line_length_for_level($level),
20139 rvertical_tightness_flags => $rvertical_tightness_flags,
20142 # Initialize a global flag saying if the last line of the group should
20143 # match end of group and also terminate the group. There should be no
20144 # returns between here and where the flag is handled at the bottom.
20145 my $col_matching_terminal = 0;
20146 if ( defined($j_terminal_match) ) {
20148 # remember the column of the terminal ? or { to match with
20149 $col_matching_terminal = $current_line->get_column($j_terminal_match);
20151 # set global flag for sub decide_if_aligned
20152 $is_matching_terminal_line = 1;
20155 # --------------------------------------------------------------------
20156 # It simplifies things to create a zero length side comment
20158 # --------------------------------------------------------------------
20159 make_side_comment( $new_line, $level_end );
20161 # --------------------------------------------------------------------
20162 # Decide if this is a simple list of items.
20163 # There are 3 list types: none, comma, comma-arrow.
20164 # We use this below to be less restrictive in deciding what to align.
20165 # --------------------------------------------------------------------
20166 if ($is_forced_break) {
20167 decide_if_list($new_line);
20170 if ($current_line) {
20172 # --------------------------------------------------------------------
20173 # Allow hanging side comment to join current group, if any
20174 # This will help keep side comments aligned, because otherwise we
20175 # will have to start a new group, making alignment less likely.
20176 # --------------------------------------------------------------------
20177 join_hanging_comment( $new_line, $current_line )
20178 if $is_hanging_side_comment;
20180 # --------------------------------------------------------------------
20181 # If there is just one previous line, and it has more fields
20182 # than the new line, try to join fields together to get a match with
20183 # the new line. At the present time, only a single leading '=' is
20184 # allowed to be compressed out. This is useful in rare cases where
20185 # a table is forced to use old breakpoints because of side comments,
20186 # and the table starts out something like this:
20187 # my %MonthChars = ('0', 'Jan', # side comment
20190 # Eliminating the '=' field will allow the remaining fields to line up.
20191 # This situation does not occur if there are no side comments
20192 # because scan_list would put a break after the opening '('.
20193 # --------------------------------------------------------------------
20194 eliminate_old_fields( $new_line, $current_line );
20196 # --------------------------------------------------------------------
20197 # If the new line has more fields than the current group,
20198 # see if we can match the first fields and combine the remaining
20199 # fields of the new line.
20200 # --------------------------------------------------------------------
20201 eliminate_new_fields( $new_line, $current_line );
20203 # --------------------------------------------------------------------
20204 # Flush previous group unless all common tokens and patterns match..
20205 # --------------------------------------------------------------------
20206 check_match( $new_line, $current_line );
20208 # --------------------------------------------------------------------
20209 # See if there is space for this line in the current group (if any)
20210 # --------------------------------------------------------------------
20211 if ($current_line) {
20212 check_fit( $new_line, $current_line );
20216 # --------------------------------------------------------------------
20217 # Append this line to the current group (or start new group)
20218 # --------------------------------------------------------------------
20219 add_to_group($new_line);
20221 # Future update to allow this to vary:
20222 $current_line = $new_line if ( $maximum_line_index == 0 );
20224 # output this group if it ends in a terminal else or ternary line
20225 if ( defined($j_terminal_match) ) {
20227 # if there is only one line in the group (maybe due to failure to match
20228 # perfectly with previous lines), then align the ? or { of this
20229 # terminal line with the previous one unless that would make the line
20231 if ( $maximum_line_index == 0 ) {
20232 my $col_now = $current_line->get_column($j_terminal_match);
20233 my $pad = $col_matching_terminal - $col_now;
20234 my $padding_available =
20235 $current_line->get_available_space_on_right();
20236 if ( $pad > 0 && $pad <= $padding_available ) {
20237 $current_line->increase_field_width( $j_terminal_match, $pad );
20241 $is_matching_terminal_line = 0;
20244 # --------------------------------------------------------------------
20245 # Step 8. Some old debugging stuff
20246 # --------------------------------------------------------------------
20247 VALIGN_DEBUG_FLAG_APPEND && do {
20248 print STDOUT "APPEND fields:";
20249 dump_array(@$rfields);
20250 print STDOUT "APPEND tokens:";
20251 dump_array(@$rtokens);
20252 print STDOUT "APPEND patterns:";
20253 dump_array(@$rpatterns);
20260 sub join_hanging_comment {
20263 my $jmax = $line->get_jmax();
20264 return 0 unless $jmax == 1; # must be 2 fields
20265 my $rtokens = $line->get_rtokens();
20266 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment..
20267 my $rfields = $line->get_rfields();
20268 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty...
20269 my $old_line = shift;
20270 my $maximum_field_index = $old_line->get_jmax();
20272 unless $maximum_field_index > $jmax; # the current line has more fields
20273 my $rpatterns = $line->get_rpatterns();
20275 $line->set_is_hanging_side_comment(1);
20276 $jmax = $maximum_field_index;
20277 $line->set_jmax($jmax);
20278 $$rfields[$jmax] = $$rfields[1];
20279 $$rtokens[ $jmax - 1 ] = $$rtokens[0];
20280 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
20281 for ( my $j = 1 ; $j < $jmax ; $j++ ) {
20282 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
20283 $$rtokens[ $j - 1 ] = "";
20284 $$rpatterns[ $j - 1 ] = "";
20289 sub eliminate_old_fields {
20291 my $new_line = shift;
20292 my $jmax = $new_line->get_jmax();
20293 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
20294 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
20296 # there must be one previous line
20297 return unless ( $maximum_line_index == 0 );
20299 my $old_line = shift;
20300 my $maximum_field_index = $old_line->get_jmax();
20302 ###############################################
20303 # this line must have fewer fields
20304 return unless $maximum_field_index > $jmax;
20305 ###############################################
20307 # Identify specific cases where field elimination is allowed:
20308 # case=1: both lines have comma-separated lists, and the first
20309 # line has an equals
20310 # case=2: both lines have leading equals
20312 # case 1 is the default
20315 # See if case 2: both lines have leading '='
20316 # We'll require similar leading patterns in this case
20317 my $old_rtokens = $old_line->get_rtokens();
20318 my $rtokens = $new_line->get_rtokens();
20319 my $rpatterns = $new_line->get_rpatterns();
20320 my $old_rpatterns = $old_line->get_rpatterns();
20321 if ( $rtokens->[0] =~ /^=\d*$/
20322 && $old_rtokens->[0] eq $rtokens->[0]
20323 && $old_rpatterns->[0] eq $rpatterns->[0] )
20328 # not too many fewer fields in new line for case 1
20329 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
20331 # case 1 must have side comment
20332 my $old_rfields = $old_line->get_rfields();
20335 && length( $$old_rfields[$maximum_field_index] ) == 0 );
20337 my $rfields = $new_line->get_rfields();
20339 my $hid_equals = 0;
20341 my @new_alignments = ();
20342 my @new_fields = ();
20343 my @new_matching_patterns = ();
20344 my @new_matching_tokens = ();
20348 my $current_field = '';
20349 my $current_pattern = '';
20351 # loop over all old tokens
20353 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
20354 $current_field .= $$old_rfields[$k];
20355 $current_pattern .= $$old_rpatterns[$k];
20356 last if ( $j > $jmax - 1 );
20358 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
20360 $new_fields[$j] = $current_field;
20361 $new_matching_patterns[$j] = $current_pattern;
20362 $current_field = '';
20363 $current_pattern = '';
20364 $new_matching_tokens[$j] = $$old_rtokens[$k];
20365 $new_alignments[$j] = $old_line->get_alignment($k);
20370 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
20371 last if ( $case == 2 ); # avoid problems with stuff
20372 # like: $a=$b=$c=$d;
20376 if ( $in_match && $case == 1 )
20377 ; # disallow gaps in matching field types in case 1
20381 # Modify the current state if we are successful.
20382 # We must exactly reach the ends of both lists for success.
20383 if ( ( $j == $jmax )
20384 && ( $current_field eq '' )
20385 && ( $case != 1 || $hid_equals ) )
20387 $k = $maximum_field_index;
20388 $current_field .= $$old_rfields[$k];
20389 $current_pattern .= $$old_rpatterns[$k];
20390 $new_fields[$j] = $current_field;
20391 $new_matching_patterns[$j] = $current_pattern;
20393 $new_alignments[$j] = $old_line->get_alignment($k);
20394 $maximum_field_index = $j;
20396 $old_line->set_alignments(@new_alignments);
20397 $old_line->set_jmax($jmax);
20398 $old_line->set_rtokens( \@new_matching_tokens );
20399 $old_line->set_rfields( \@new_fields );
20400 $old_line->set_rpatterns( \@$rpatterns );
20404 # create an empty side comment if none exists
20405 sub make_side_comment {
20406 my $new_line = shift;
20407 my $level_end = shift;
20408 my $jmax = $new_line->get_jmax();
20409 my $rtokens = $new_line->get_rtokens();
20411 # if line does not have a side comment...
20412 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
20413 my $rfields = $new_line->get_rfields();
20414 my $rpatterns = $new_line->get_rpatterns();
20415 $$rtokens[$jmax] = '#';
20416 $$rfields[ ++$jmax ] = '';
20417 $$rpatterns[$jmax] = '#';
20418 $new_line->set_jmax($jmax);
20419 $new_line->set_jmax_original_line($jmax);
20422 # line has a side comment..
20425 # don't remember old side comment location for very long
20426 my $line_number = $vertical_aligner_self->get_output_line_number();
20427 my $rfields = $new_line->get_rfields();
20429 $line_number - $last_side_comment_line_number > 12
20431 # and don't remember comment location across block level changes
20432 || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
20435 forget_side_comment();
20437 $last_side_comment_line_number = $line_number;
20438 $last_side_comment_level = $level_end;
20442 sub decide_if_list {
20446 # A list will be taken to be a line with a forced break in which all
20447 # of the field separators are commas or comma-arrows (except for the
20450 # List separator tokens are things like ',3' or '=>2',
20451 # where the trailing digit is the nesting depth. Allow braces
20452 # to allow nested list items.
20453 my $rtokens = $line->get_rtokens();
20454 my $test_token = $$rtokens[0];
20455 if ( $test_token =~ /^(\,|=>)/ ) {
20456 my $list_type = $test_token;
20457 my $jmax = $line->get_jmax();
20459 foreach ( 1 .. $jmax - 2 ) {
20460 if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
20465 $line->set_list_type($list_type);
20469 sub eliminate_new_fields {
20471 return unless ( $maximum_line_index >= 0 );
20472 my ( $new_line, $old_line ) = @_;
20473 my $jmax = $new_line->get_jmax();
20475 my $old_rtokens = $old_line->get_rtokens();
20476 my $rtokens = $new_line->get_rtokens();
20477 my $is_assignment =
20478 ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
20480 # must be monotonic variation
20481 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
20483 # must be more fields in the new line
20484 my $maximum_field_index = $old_line->get_jmax();
20485 return unless ( $maximum_field_index < $jmax );
20487 unless ($is_assignment) {
20489 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
20490 ; # only if monotonic
20492 # never combine fields of a comma list
20494 unless ( $maximum_field_index > 1 )
20495 && ( $new_line->get_list_type() !~ /^,/ );
20498 my $rfields = $new_line->get_rfields();
20499 my $rpatterns = $new_line->get_rpatterns();
20500 my $old_rpatterns = $old_line->get_rpatterns();
20502 # loop over all OLD tokens except comment and check match
20505 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
20506 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
20507 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
20514 # first tokens agree, so combine extra new tokens
20516 for $k ( $maximum_field_index .. $jmax - 1 ) {
20518 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
20519 $$rfields[$k] = "";
20520 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
20521 $$rpatterns[$k] = "";
20524 $$rtokens[ $maximum_field_index - 1 ] = '#';
20525 $$rfields[$maximum_field_index] = $$rfields[$jmax];
20526 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
20527 $jmax = $maximum_field_index;
20529 $new_line->set_jmax($jmax);
20532 sub fix_terminal_ternary {
20534 # Add empty fields as necessary to align a ternary term
20539 # : $year % 100 ? 1
20540 # : $year % 400 ? 0
20543 # returns 1 if the terminal item should be indented
20545 my ( $rfields, $rtokens, $rpatterns ) = @_;
20547 my $jmax = @{$rfields} - 1;
20548 my $old_line = $group_lines[$maximum_line_index];
20549 my $rfields_old = $old_line->get_rfields();
20551 my $rpatterns_old = $old_line->get_rpatterns();
20552 my $rtokens_old = $old_line->get_rtokens();
20553 my $maximum_field_index = $old_line->get_jmax();
20555 # look for the question mark after the :
20557 my $depth_question;
20559 for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
20560 my $tok = $rtokens_old->[$j];
20561 if ( $tok =~ /^\?(\d+)$/ ) {
20562 $depth_question = $1;
20564 # depth must be correct
20565 next unless ( $depth_question eq $group_level );
20568 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
20569 $pad = " " x length($1);
20572 return; # shouldn't happen
20577 return unless ( defined($jquestion) ); # shouldn't happen
20579 # Now splice the tokens and patterns of the previous line
20580 # into the else line to insure a match. Add empty fields
20582 my $jadd = $jquestion;
20584 # Work on copies of the actual arrays in case we have
20585 # to return due to an error
20586 my @fields = @{$rfields};
20587 my @patterns = @{$rpatterns};
20588 my @tokens = @{$rtokens};
20590 VALIGN_DEBUG_FLAG_TERNARY && do {
20592 print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
20593 print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
20594 print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
20595 print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
20596 print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
20597 print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
20600 # handle cases of leading colon on this line
20601 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
20603 my ( $colon, $therest ) = ( $1, $2 );
20605 # Handle sub-case of first field with leading colon plus additional code
20606 # This is the usual situation as at the '1' below:
20608 # : $year % 400 ? 0
20612 # Split the first field after the leading colon and insert padding.
20613 # Note that this padding will remain even if the terminal value goes
20614 # out on a separate line. This does not seem to look to bad, so no
20615 # mechanism has been included to undo it.
20616 my $field1 = shift @fields;
20617 unshift @fields, ( $colon, $pad . $therest );
20619 # change the leading pattern from : to ?
20620 return unless ( $patterns[0] =~ s/^\:/?/ );
20622 # install leading tokens and patterns of existing line
20623 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
20624 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
20626 # insert appropriate number of empty fields
20627 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
20630 # handle sub-case of first field just equal to leading colon.
20631 # This can happen for example in the example below where
20632 # the leading '(' would create a new alignment token
20633 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
20634 # : ( $mname = $name . '->' );
20637 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
20639 # prepend a leading ? onto the second pattern
20640 $patterns[1] = "?b" . $patterns[1];
20642 # pad the second field
20643 $fields[1] = $pad . $fields[1];
20645 # install leading tokens and patterns of existing line, replacing
20646 # leading token and inserting appropriate number of empty fields
20647 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
20648 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
20649 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
20653 # Handle case of no leading colon on this line. This will
20654 # be the case when -wba=':' is used. For example,
20655 # $year % 400 ? 0 :
20659 # install leading tokens and patterns of existing line
20660 $patterns[0] = '?' . 'b' . $patterns[0];
20661 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
20662 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
20664 # insert appropriate number of empty fields
20665 $jadd = $jquestion + 1;
20666 $fields[0] = $pad . $fields[0];
20667 splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
20670 VALIGN_DEBUG_FLAG_TERNARY && do {
20672 print STDOUT "MODIFIED TOKENS=<@tokens>\n";
20673 print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
20674 print STDOUT "MODIFIED FIELDS=<@fields>\n";
20677 # all ok .. update the arrays
20678 @{$rfields} = @fields;
20679 @{$rtokens} = @tokens;
20680 @{$rpatterns} = @patterns;
20682 # force a flush after this line
20686 sub fix_terminal_else {
20688 # Add empty fields as necessary to align a balanced terminal
20689 # else block to a previous if/elsif/unless block,
20692 # if ( 1 || $x ) { print "ok 13\n"; }
20693 # else { print "not ok 13\n"; }
20695 # returns 1 if the else block should be indented
20697 my ( $rfields, $rtokens, $rpatterns ) = @_;
20698 my $jmax = @{$rfields} - 1;
20699 return unless ( $jmax > 0 );
20701 # check for balanced else block following if/elsif/unless
20702 my $rfields_old = $current_line->get_rfields();
20704 # TBD: add handling for 'case'
20705 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
20707 # look for the opening brace after the else, and extract the depth
20708 my $tok_brace = $rtokens->[0];
20710 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
20712 # probably: "else # side_comment"
20715 my $rpatterns_old = $current_line->get_rpatterns();
20716 my $rtokens_old = $current_line->get_rtokens();
20717 my $maximum_field_index = $current_line->get_jmax();
20719 # be sure the previous if/elsif is followed by an opening paren
20721 my $tok_paren = '(' . $depth_brace;
20722 my $tok_test = $rtokens_old->[$jparen];
20723 return unless ( $tok_test eq $tok_paren ); # shouldn't happen
20725 # Now find the opening block brace
20727 for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
20728 my $tok = $rtokens_old->[$j];
20729 if ( $tok eq $tok_brace ) {
20734 return unless ( defined($jbrace) ); # shouldn't happen
20736 # Now splice the tokens and patterns of the previous line
20737 # into the else line to insure a match. Add empty fields
20739 my $jadd = $jbrace - $jparen;
20740 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
20741 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
20742 splice( @{$rfields}, 1, 0, ('') x $jadd );
20744 # force a flush after this line if it does not follow a case
20746 unless ( $rfields_old->[0] =~ /^case\s*$/ );
20749 { # sub check_match
20750 my %is_good_alignment;
20754 # Vertically aligning on certain "good" tokens is usually okay
20755 # so we can be less restrictive in marginal cases.
20756 @_ = qw( { ? => = );
20758 @is_good_alignment{@_} = (1) x scalar(@_);
20763 # See if the current line matches the current vertical alignment group.
20764 # If not, flush the current group.
20765 my $new_line = shift;
20766 my $old_line = shift;
20768 # uses global variables:
20769 # $previous_minimum_jmax_seen
20770 # $maximum_jmax_seen
20771 # $maximum_line_index
20773 my $jmax = $new_line->get_jmax();
20774 my $maximum_field_index = $old_line->get_jmax();
20776 # flush if this line has too many fields
20777 if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
20779 # flush if adding this line would make a non-monotonic field count
20781 ( $maximum_field_index > $jmax ) # this has too few fields
20783 ( $previous_minimum_jmax_seen <
20784 $jmax ) # and wouldn't be monotonic
20785 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
20792 # otherwise see if this line matches the current group
20793 my $jmax_original_line = $new_line->get_jmax_original_line();
20794 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
20795 my $rtokens = $new_line->get_rtokens();
20796 my $rfields = $new_line->get_rfields();
20797 my $rpatterns = $new_line->get_rpatterns();
20798 my $list_type = $new_line->get_list_type();
20800 my $group_list_type = $old_line->get_list_type();
20801 my $old_rpatterns = $old_line->get_rpatterns();
20802 my $old_rtokens = $old_line->get_rtokens();
20804 my $jlimit = $jmax - 1;
20805 if ( $maximum_field_index > $jmax ) {
20806 $jlimit = $jmax_original_line;
20807 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
20810 # handle comma-separated lists ..
20811 if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
20812 for my $j ( 0 .. $jlimit ) {
20813 my $old_tok = $$old_rtokens[$j];
20814 next unless $old_tok;
20815 my $new_tok = $$rtokens[$j];
20816 next unless $new_tok;
20818 # lists always match ...
20819 # unless they would align any '=>'s with ','s
20821 if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
20822 || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
20826 # do detailed check for everything else except hanging side comments
20827 elsif ( !$is_hanging_side_comment ) {
20829 my $leading_space_count = $new_line->get_leading_space_count();
20833 my $saw_good_alignment;
20835 for my $j ( 0 .. $jlimit ) {
20837 my $old_tok = $$old_rtokens[$j];
20838 my $new_tok = $$rtokens[$j];
20840 # Note on encoding used for alignment tokens:
20841 # -------------------------------------------
20842 # Tokens are "decorated" with information which can help
20843 # prevent unwanted alignments. Consider for example the
20844 # following two lines:
20845 # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
20846 # local ( $i, $f ) = &'bdiv( $xn, $xd );
20847 # There are three alignment tokens in each line, a comma,
20848 # an =, and a comma. In the first line these three tokens
20850 # ,4+local-18 =3 ,4+split-7
20851 # and in the second line they are encoded as
20852 # ,4+local-18 =3 ,4+&'bdiv-8
20853 # Tokens always at least have token name and nesting
20854 # depth. So in this example the ='s are at depth 3 and
20855 # the ,'s are at depth 4. This prevents aligning tokens
20856 # of different depths. Commas contain additional
20857 # information, as follows:
20858 # , {depth} + {container name} - {spaces to opening paren}
20859 # This allows us to reject matching the rightmost commas
20860 # in the above two lines, since they are for different
20861 # function calls. This encoding is done in
20862 # 'sub send_lines_to_vertical_aligner'.
20864 # Pick off actual token.
20865 # Everything up to the first digit is the actual token.
20866 my $alignment_token = $new_tok;
20867 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
20869 # see if the decorated tokens match
20870 my $tokens_match = $new_tok eq $old_tok
20872 # Exception for matching terminal : of ternary statement..
20873 # consider containers prefixed by ? and : a match
20874 || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
20876 # No match if the alignment tokens differ...
20877 if ( !$tokens_match ) {
20879 # ...Unless this is a side comment
20883 # and there is either at least one alignment token
20884 # or this is a single item following a list. This
20885 # latter rule is required for 'December' to join
20886 # the following list:
20888 # '', 'January', 'February', 'March',
20889 # 'April', 'May', 'June', 'July',
20890 # 'August', 'September', 'October', 'November',
20893 # If it doesn't then the -lp formatting will fail.
20894 && ( $j > 0 || $old_tok =~ /^,/ )
20897 $marginal_match = 1
20898 if ( $marginal_match == 0
20899 && $maximum_line_index == 0 );
20906 # Calculate amount of padding required to fit this in.
20907 # $pad is the number of spaces by which we must increase
20908 # the current field to squeeze in this field.
20910 length( $$rfields[$j] ) - $old_line->current_field_width($j);
20911 if ( $j == 0 ) { $pad += $leading_space_count; }
20913 # remember max pads to limit marginal cases
20914 if ( $alignment_token ne '#' ) {
20915 if ( $pad > $max_pad ) { $max_pad = $pad }
20916 if ( $pad < $min_pad ) { $min_pad = $pad }
20918 if ( $is_good_alignment{$alignment_token} ) {
20919 $saw_good_alignment = 1;
20922 # If patterns don't match, we have to be careful...
20923 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
20925 # flag this as a marginal match since patterns differ
20926 $marginal_match = 1
20927 if ( $marginal_match == 0 && $maximum_line_index == 0 );
20929 # We have to be very careful about aligning commas
20930 # when the pattern's don't match, because it can be
20931 # worse to create an alignment where none is needed
20932 # than to omit one. Here's an example where the ','s
20933 # are not in named containers. The first line below
20934 # should not match the next two:
20935 # ( $a, $b ) = ( $b, $r );
20936 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
20937 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
20938 if ( $alignment_token eq ',' ) {
20940 # do not align commas unless they are in named containers
20941 goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
20944 # do not align parens unless patterns match;
20945 # large ugly spaces can occur in math expressions.
20946 elsif ( $alignment_token eq '(' ) {
20948 # But we can allow a match if the parens don't
20949 # require any padding.
20950 if ( $pad != 0 ) { goto NO_MATCH }
20953 # Handle an '=' alignment with different patterns to
20955 elsif ( $alignment_token eq '=' ) {
20957 # It is best to be a little restrictive when
20958 # aligning '=' tokens. Here is an example of
20959 # two lines that we will not align:
20962 # The problem is that one is a 'my' declaration,
20963 # and the other isn't, so they're not very similar.
20964 # We will filter these out by comparing the first
20965 # letter of the pattern. This is crude, but works
20968 substr( $$old_rpatterns[$j], 0, 1 ) ne
20969 substr( $$rpatterns[$j], 0, 1 ) )
20974 # If we pass that test, we'll call it a marginal match.
20975 # Here is an example of a marginal match:
20977 # $op = compile_bblock($op);
20978 # The left tokens are both identifiers, but
20979 # one accesses a hash and the other doesn't.
20980 # We'll let this be a tentative match and undo
20981 # it later if we don't find more than 2 lines
20983 elsif ( $maximum_line_index == 0 ) {
20985 2; # =2 prevents being undone below
20990 # Don't let line with fewer fields increase column widths
20992 if ( $maximum_field_index > $jmax ) {
20994 # Exception: suspend this rule to allow last lines to join
20995 if ( $pad > 0 ) { goto NO_MATCH; }
20997 } ## end for my $j ( 0 .. $jlimit)
20999 # Turn off the "marginal match" flag in some cases...
21000 # A "marginal match" occurs when the alignment tokens agree
21001 # but there are differences in the other tokens (patterns).
21002 # If we leave the marginal match flag set, then the rule is that we
21003 # will align only if there are more than two lines in the group.
21004 # We will turn of the flag if we almost have a match
21005 # and either we have seen a good alignment token or we
21006 # just need a small pad (2 spaces) to fit. These rules are
21007 # the result of experimentation. Tokens which misaligned by just
21008 # one or two characters are annoying. On the other hand,
21009 # large gaps to less important alignment tokens are also annoying.
21010 if ( $marginal_match == 1
21011 && $jmax == $maximum_field_index
21012 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
21015 $marginal_match = 0;
21017 ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
21020 # We have a match (even if marginal).
21021 # If the current line has fewer fields than the current group
21022 # but otherwise matches, copy the remaining group fields to
21023 # make it a perfect match.
21024 if ( $maximum_field_index > $jmax ) {
21025 my $comment = $$rfields[$jmax];
21026 for $jmax ( $jlimit .. $maximum_field_index ) {
21027 $$rtokens[$jmax] = $$old_rtokens[$jmax];
21028 $$rfields[ ++$jmax ] = '';
21029 $$rpatterns[$jmax] = $$old_rpatterns[$jmax];
21031 $$rfields[$jmax] = $comment;
21032 $new_line->set_jmax($jmax);
21037 ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
21045 return unless ( $maximum_line_index >= 0 );
21046 my $new_line = shift;
21047 my $old_line = shift;
21049 my $jmax = $new_line->get_jmax();
21050 my $leading_space_count = $new_line->get_leading_space_count();
21051 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
21052 my $rtokens = $new_line->get_rtokens();
21053 my $rfields = $new_line->get_rfields();
21054 my $rpatterns = $new_line->get_rpatterns();
21056 my $group_list_type = $group_lines[0]->get_list_type();
21058 my $padding_so_far = 0;
21059 my $padding_available = $old_line->get_available_space_on_right();
21061 # save current columns in case this doesn't work
21062 save_alignment_columns();
21064 my ( $j, $pad, $eight );
21065 my $maximum_field_index = $old_line->get_jmax();
21066 for $j ( 0 .. $jmax ) {
21068 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
21071 $pad += $leading_space_count;
21074 # remember largest gap of the group, excluding gap to side comment
21076 && $group_maximum_gap < -$pad
21078 && $j < $jmax - 1 )
21080 $group_maximum_gap = -$pad;
21085 ## This patch helps sometimes, but it doesn't check to see if
21086 ## the line is too long even without the side comment. It needs
21088 ##don't let a long token with no trailing side comment push
21089 ##side comments out, or end a group. (sidecmt1.t)
21090 ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
21092 # This line will need space; lets see if we want to accept it..
21095 # not if this won't fit
21096 ( $pad > $padding_available )
21098 # previously, there were upper bounds placed on padding here
21099 # (maximum_whitespace_columns), but they were not really helpful
21104 # revert to starting state then flush; things didn't work out
21105 restore_alignment_columns();
21110 # patch to avoid excessive gaps in previous lines,
21111 # due to a line of fewer fields.
21112 # return join( ".",
21113 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
21114 # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
21115 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
21117 # looks ok, squeeze this field in
21118 $old_line->increase_field_width( $j, $pad );
21119 $padding_available -= $pad;
21121 # remember largest gap of the group, excluding gap to side comment
21122 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
21123 $group_maximum_gap = $pad;
21130 # The current line either starts a new alignment group or is
21131 # accepted into the current alignment group.
21132 my $new_line = shift;
21133 $group_lines[ ++$maximum_line_index ] = $new_line;
21135 # initialize field lengths if starting new group
21136 if ( $maximum_line_index == 0 ) {
21138 my $jmax = $new_line->get_jmax();
21139 my $rfields = $new_line->get_rfields();
21140 my $rtokens = $new_line->get_rtokens();
21142 my $col = $new_line->get_leading_space_count();
21144 for $j ( 0 .. $jmax ) {
21145 $col += length( $$rfields[$j] );
21147 # create initial alignments for the new group
21149 if ( $j < $jmax ) { $token = $$rtokens[$j] }
21150 my $alignment = make_alignment( $col, $token );
21151 $new_line->set_alignment( $j, $alignment );
21154 $maximum_jmax_seen = $jmax;
21155 $minimum_jmax_seen = $jmax;
21158 # use previous alignments otherwise
21160 my @new_alignments =
21161 $group_lines[ $maximum_line_index - 1 ]->get_alignments();
21162 $new_line->set_alignments(@new_alignments);
21165 # remember group jmax extremes for next call to valign_input
21166 $previous_minimum_jmax_seen = $minimum_jmax_seen;
21167 $previous_maximum_jmax_seen = $maximum_jmax_seen;
21172 # debug routine to dump array contents
21174 print STDOUT "(@_)\n";
21177 # flush() sends the current Perl::Tidy::VerticalAligner group down the
21178 # pipeline to Perl::Tidy::FileWriter.
21180 # This is the external flush, which also empties the buffer and cache
21183 # the buffer must be emptied first, then any cached text
21184 dump_valign_buffer();
21186 if ( $maximum_line_index < 0 ) {
21187 if ($cached_line_type) {
21188 $seqno_string = $cached_seqno_string;
21189 valign_output_step_C( $cached_line_text,
21190 $cached_line_leading_space_count,
21191 $last_level_written );
21192 $cached_line_type = 0;
21193 $cached_line_text = "";
21194 $cached_seqno_string = "";
21202 sub reduce_valign_buffer_indentation {
21205 if ( $valign_buffer_filling && $diff ) {
21206 my $max_valign_buffer = @valign_buffer;
21207 for ( my $i = 0 ; $i < $max_valign_buffer ; $i++ ) {
21208 my ( $line, $leading_space_count, $level ) =
21209 @{ $valign_buffer[$i] };
21210 my $ws = substr( $line, 0, $diff );
21211 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
21212 $line = substr( $line, $diff );
21214 if ( $leading_space_count >= $diff ) {
21215 $leading_space_count -= $diff;
21216 $level = level_change( $leading_space_count, $diff, $level );
21218 $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
21225 # compute decrease in level when we remove $diff spaces from the
21227 my ( $leading_space_count, $diff, $level ) = @_;
21228 if ($rOpts_indent_columns) {
21230 int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
21231 my $nlev = int( $leading_space_count / $rOpts_indent_columns );
21232 $level -= ( $olev - $nlev );
21233 if ( $level < 0 ) { $level = 0 }
21238 sub dump_valign_buffer {
21239 if (@valign_buffer) {
21240 foreach (@valign_buffer) {
21241 valign_output_step_D( @{$_} );
21243 @valign_buffer = ();
21245 $valign_buffer_filling = "";
21248 # This is the internal flush, which leaves the cache intact
21251 return if ( $maximum_line_index < 0 );
21253 # handle a group of comment lines
21254 if ( $group_type eq 'COMMENT' ) {
21256 VALIGN_DEBUG_FLAG_APPEND0 && do {
21257 my ( $a, $b, $c ) = caller();
21259 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
21262 my $leading_space_count = $comment_leading_space_count;
21263 my $leading_string = get_leading_string($leading_space_count);
21265 # zero leading space count if any lines are too long
21266 my $max_excess = 0;
21267 for my $i ( 0 .. $maximum_line_index ) {
21268 my $str = $group_lines[$i];
21271 $leading_space_count -
21272 maximum_line_length_for_level($group_level);
21273 if ( $excess > $max_excess ) {
21274 $max_excess = $excess;
21278 if ( $max_excess > 0 ) {
21279 $leading_space_count -= $max_excess;
21280 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
21281 $last_outdented_line_at =
21282 $file_writer_object->get_output_line_number();
21283 unless ($outdented_line_count) {
21284 $first_outdented_line_at = $last_outdented_line_at;
21286 $outdented_line_count += ( $maximum_line_index + 1 );
21289 # write the group of lines
21290 my $outdent_long_lines = 0;
21291 for my $i ( 0 .. $maximum_line_index ) {
21292 valign_output_step_B( $leading_space_count, $group_lines[$i], 0,
21293 $outdent_long_lines, "", $group_level );
21297 # handle a group of code lines
21300 VALIGN_DEBUG_FLAG_APPEND0 && do {
21301 my $group_list_type = $group_lines[0]->get_list_type();
21302 my ( $a, $b, $c ) = caller();
21303 my $maximum_field_index = $group_lines[0]->get_jmax();
21305 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
21309 # some small groups are best left unaligned
21310 my $do_not_align = decide_if_aligned();
21312 # optimize side comment location
21313 $do_not_align = adjust_side_comment($do_not_align);
21315 # recover spaces for -lp option if possible
21316 my $extra_leading_spaces = get_extra_leading_spaces();
21318 # all lines of this group have the same basic leading spacing
21319 my $group_leader_length = $group_lines[0]->get_leading_space_count();
21321 # add extra leading spaces if helpful
21322 my $min_ci_gap = improve_continuation_indentation( $do_not_align,
21323 $group_leader_length );
21325 # loop to output all lines
21326 for my $i ( 0 .. $maximum_line_index ) {
21327 my $line = $group_lines[$i];
21328 valign_output_step_A( $line, $min_ci_gap, $do_not_align,
21329 $group_leader_length, $extra_leading_spaces );
21332 initialize_for_new_group();
21335 sub decide_if_aligned {
21337 # Do not try to align two lines which are not really similar
21338 return unless $maximum_line_index == 1;
21339 return if ($is_matching_terminal_line);
21341 my $group_list_type = $group_lines[0]->get_list_type();
21343 my $do_not_align = (
21345 # always align lists
21350 # don't align if it was just a marginal match
21353 # don't align two lines with big gap
21354 || $group_maximum_gap > 12
21356 # or lines with differing number of alignment tokens
21357 # TODO: this could be improved. It occasionally rejects
21359 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
21363 # But try to convert them into a simple comment group if the first line
21364 # a has side comment
21365 my $rfields = $group_lines[0]->get_rfields();
21366 my $maximum_field_index = $group_lines[0]->get_jmax();
21368 && ( $maximum_line_index > 0 )
21369 && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
21374 return $do_not_align;
21377 sub adjust_side_comment {
21379 my $do_not_align = shift;
21381 # let's see if we can move the side comment field out a little
21382 # to improve readability (the last field is always a side comment field)
21383 my $have_side_comment = 0;
21384 my $first_side_comment_line = -1;
21385 my $maximum_field_index = $group_lines[0]->get_jmax();
21386 for my $i ( 0 .. $maximum_line_index ) {
21387 my $line = $group_lines[$i];
21389 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
21390 $have_side_comment = 1;
21391 $first_side_comment_line = $i;
21396 my $kmax = $maximum_field_index + 1;
21398 if ($have_side_comment) {
21400 my $line = $group_lines[0];
21402 # the maximum space without exceeding the line length:
21403 my $avail = $line->get_available_space_on_right();
21405 # try to use the previous comment column
21406 my $side_comment_column = $line->get_column( $kmax - 2 );
21407 my $move = $last_comment_column - $side_comment_column;
21409 ## my $sc_line0 = $side_comment_history[0]->[0];
21410 ## my $sc_col0 = $side_comment_history[0]->[1];
21411 ## my $sc_line1 = $side_comment_history[1]->[0];
21412 ## my $sc_col1 = $side_comment_history[1]->[1];
21413 ## my $sc_line2 = $side_comment_history[2]->[0];
21414 ## my $sc_col2 = $side_comment_history[2]->[1];
21416 ## # FUTURE UPDATES:
21417 ## # Be sure to ignore 'do not align' and '} # end comments'
21418 ## # Find first $move > 0 and $move <= $avail as follows:
21419 ## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
21420 ## # 2. try sc_col2 if (line-sc_line2) < 12
21421 ## # 3. try min possible space, plus up to 8,
21422 ## # 4. try min possible space
21424 if ( $kmax > 0 && !$do_not_align ) {
21426 # but if this doesn't work, give up and use the minimum space
21427 if ( $move > $avail ) {
21428 $move = $rOpts_minimum_space_to_comment - 1;
21431 # but we want some minimum space to the comment
21432 my $min_move = $rOpts_minimum_space_to_comment - 1;
21434 && $last_side_comment_length > 0
21435 && ( $first_side_comment_line == 0 )
21436 && $group_level == $last_level_written )
21441 if ( $move < $min_move ) {
21445 # previously, an upper bound was placed on $move here,
21446 # (maximum_space_to_comment), but it was not helpful
21448 # don't exceed the available space
21449 if ( $move > $avail ) { $move = $avail }
21451 # we can only increase space, never decrease
21453 $line->increase_field_width( $maximum_field_index - 1, $move );
21456 # remember this column for the next group
21457 $last_comment_column = $line->get_column( $kmax - 2 );
21461 # try to at least line up the existing side comment location
21462 if ( $kmax > 0 && $move > 0 && $move < $avail ) {
21463 $line->increase_field_width( $maximum_field_index - 1, $move );
21467 # reset side comment column if we can't align
21469 forget_side_comment();
21473 return $do_not_align;
21476 sub improve_continuation_indentation {
21477 my ( $do_not_align, $group_leader_length ) = @_;
21479 # See if we can increase the continuation indentation
21480 # to move all continuation lines closer to the next field
21481 # (unless it is a comment).
21483 # '$min_ci_gap'is the extra indentation that we may need to introduce.
21484 # We will only introduce this to fields which already have some ci.
21485 # Without this variable, we would occasionally get something like this
21488 # use overload '+' => \&plus,
21490 # '*' => \&multiply,
21493 # 'atan2' => \&atan2,
21495 # Whereas with this variable, we can shift variables over to get this:
21497 # use overload '+' => \&plus,
21499 # '*' => \&multiply,
21502 # 'atan2' => \&atan2,
21504 ## Deactivated####################
21505 # The trouble with this patch is that it may, for example,
21506 # move in some 'or's or ':'s, and leave some out, so that the
21507 # left edge alignment suffers.
21509 ###########################################
21511 my $maximum_field_index = $group_lines[0]->get_jmax();
21513 my $min_ci_gap = maximum_line_length_for_level($group_level);
21514 if ( $maximum_field_index > 1 && !$do_not_align ) {
21516 for my $i ( 0 .. $maximum_line_index ) {
21517 my $line = $group_lines[$i];
21518 my $leading_space_count = $line->get_leading_space_count();
21519 my $rfields = $line->get_rfields();
21522 $line->get_column(0) -
21523 $leading_space_count -
21524 length( $$rfields[0] );
21526 if ( $leading_space_count > $group_leader_length ) {
21527 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
21531 if ( $min_ci_gap >= maximum_line_length_for_level($group_level) ) {
21538 return $min_ci_gap;
21541 sub valign_output_step_A {
21543 ###############################################################
21544 # This is Step A in writing vertically aligned lines.
21545 # The line is prepared according to the alignments which have
21546 # been found and shipped to the next step.
21547 ###############################################################
21549 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
21550 $extra_leading_spaces )
21552 my $rfields = $line->get_rfields();
21553 my $leading_space_count = $line->get_leading_space_count();
21554 my $outdent_long_lines = $line->get_outdent_long_lines();
21555 my $maximum_field_index = $line->get_jmax();
21556 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
21558 # add any extra spaces
21559 if ( $leading_space_count > $group_leader_length ) {
21560 $leading_space_count += $min_ci_gap;
21563 my $str = $$rfields[0];
21565 # loop to concatenate all fields of this line and needed padding
21566 my $total_pad_count = 0;
21568 for $j ( 1 .. $maximum_field_index ) {
21570 # skip zero-length side comments
21572 if ( ( $j == $maximum_field_index )
21573 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
21576 # compute spaces of padding before this field
21577 my $col = $line->get_column( $j - 1 );
21578 $pad = $col - ( length($str) + $leading_space_count );
21580 if ($do_not_align) {
21582 ( $j < $maximum_field_index )
21584 : $rOpts_minimum_space_to_comment - 1;
21587 # if the -fpsc flag is set, move the side comment to the selected
21588 # column if and only if it is possible, ignoring constraints on
21589 # line length and minimum space to comment
21590 if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
21592 my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
21593 if ( $newpad >= 0 ) { $pad = $newpad; }
21596 # accumulate the padding
21597 if ( $pad > 0 ) { $total_pad_count += $pad; }
21600 if ( !defined $$rfields[$j] ) {
21601 write_diagnostics("UNDEFined field at j=$j\n");
21604 # only add padding when we have a finite field;
21605 # this avoids extra terminal spaces if we have empty fields
21606 if ( length( $$rfields[$j] ) > 0 ) {
21607 $str .= ' ' x $total_pad_count;
21608 $total_pad_count = 0;
21609 $str .= $$rfields[$j];
21612 $total_pad_count = 0;
21615 # update side comment history buffer
21616 if ( $j == $maximum_field_index ) {
21617 my $lineno = $file_writer_object->get_output_line_number();
21618 shift @side_comment_history;
21619 push @side_comment_history, [ $lineno, $col ];
21623 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
21625 # ship this line off
21626 valign_output_step_B( $leading_space_count + $extra_leading_spaces,
21627 $str, $side_comment_length, $outdent_long_lines,
21628 $rvertical_tightness_flags, $group_level );
21631 sub get_extra_leading_spaces {
21633 #----------------------------------------------------------
21634 # Define any extra indentation space (for the -lp option).
21636 # If a list has side comments, sub scan_list must dump the
21637 # list before it sees everything. When this happens, it sets
21638 # the indentation to the standard scheme, but notes how
21639 # many spaces it would have liked to use. We may be able
21640 # to recover that space here in the event that all of the
21641 # lines of a list are back together again.
21642 #----------------------------------------------------------
21644 my $extra_leading_spaces = 0;
21645 if ($extra_indent_ok) {
21646 my $object = $group_lines[0]->get_indentation();
21647 if ( ref($object) ) {
21648 my $extra_indentation_spaces_wanted =
21649 get_RECOVERABLE_SPACES($object);
21651 # all indentation objects must be the same
21653 for $i ( 1 .. $maximum_line_index ) {
21654 if ( $object != $group_lines[$i]->get_indentation() ) {
21655 $extra_indentation_spaces_wanted = 0;
21660 if ($extra_indentation_spaces_wanted) {
21662 # the maximum space without exceeding the line length:
21663 my $avail = $group_lines[0]->get_available_space_on_right();
21664 $extra_leading_spaces =
21665 ( $avail > $extra_indentation_spaces_wanted )
21666 ? $extra_indentation_spaces_wanted
21669 # update the indentation object because with -icp the terminal
21670 # ');' will use the same adjustment.
21671 $object->permanently_decrease_AVAILABLE_SPACES(
21672 -$extra_leading_spaces );
21676 return $extra_leading_spaces;
21679 sub combine_fields {
21681 # combine all fields except for the comment field ( sidecmt.t )
21682 # Uses global variables:
21684 # $maximum_line_index
21686 my $maximum_field_index = $group_lines[0]->get_jmax();
21687 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
21688 my $line = $group_lines[$j];
21689 my $rfields = $line->get_rfields();
21690 foreach ( 1 .. $maximum_field_index - 1 ) {
21691 $$rfields[0] .= $$rfields[$_];
21693 $$rfields[1] = $$rfields[$maximum_field_index];
21695 $line->set_jmax(1);
21696 $line->set_column( 0, 0 );
21697 $line->set_column( 1, 0 );
21700 $maximum_field_index = 1;
21702 for $j ( 0 .. $maximum_line_index ) {
21703 my $line = $group_lines[$j];
21704 my $rfields = $line->get_rfields();
21705 for $k ( 0 .. $maximum_field_index ) {
21706 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
21708 $pad += $group_lines[$j]->get_leading_space_count();
21711 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
21717 sub get_output_line_number {
21719 # the output line number reported to a caller is the number of items
21720 # written plus the number of items in the buffer
21722 1 + $maximum_line_index + $file_writer_object->get_output_line_number();
21725 sub valign_output_step_B {
21727 ###############################################################
21728 # This is Step B in writing vertically aligned lines.
21729 # Vertical tightness is applied according to preset flags.
21730 # In particular this routine handles stacking of opening
21731 # and closing tokens.
21732 ###############################################################
21734 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
21735 $rvertical_tightness_flags, $level )
21738 # handle outdenting of long lines:
21739 if ($outdent_long_lines) {
21742 $side_comment_length +
21743 $leading_space_count -
21744 maximum_line_length_for_level($level);
21745 if ( $excess > 0 ) {
21746 $leading_space_count = 0;
21747 $last_outdented_line_at =
21748 $file_writer_object->get_output_line_number();
21750 unless ($outdented_line_count) {
21751 $first_outdented_line_at = $last_outdented_line_at;
21753 $outdented_line_count++;
21757 # Make preliminary leading whitespace. It could get changed
21758 # later by entabbing, so we have to keep track of any changes
21759 # to the leading_space_count from here on.
21760 my $leading_string =
21761 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
21763 # Unpack any recombination data; it was packed by
21764 # sub send_lines_to_vertical_aligner. Contents:
21766 # [0] type: 1=opening non-block 2=closing non-block
21767 # 3=opening block brace 4=closing block brace
21768 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
21769 # if closing: spaces of padding to use
21770 # [2] sequence number of container
21771 # [3] valid flag: do not append if this flag is false
21773 my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
21775 if ($rvertical_tightness_flags) {
21777 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
21779 ) = @{$rvertical_tightness_flags};
21782 $seqno_string = $seqno_end;
21784 # handle any cached line ..
21785 # either append this line to it or write it out
21786 if ( length($cached_line_text) ) {
21788 # Dump an invalid cached line
21789 if ( !$cached_line_valid ) {
21790 valign_output_step_C( $cached_line_text,
21791 $cached_line_leading_space_count,
21792 $last_level_written );
21795 # Handle cached line ending in OPENING tokens
21796 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
21798 my $gap = $leading_space_count - length($cached_line_text);
21800 # handle option of just one tight opening per line:
21801 if ( $cached_line_flag == 1 ) {
21802 if ( defined($open_or_close) && $open_or_close == 1 ) {
21807 if ( $gap >= 0 && defined($seqno_beg) ) {
21808 $leading_string = $cached_line_text . ' ' x $gap;
21809 $leading_space_count = $cached_line_leading_space_count;
21810 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
21811 $level = $last_level_written;
21814 valign_output_step_C( $cached_line_text,
21815 $cached_line_leading_space_count,
21816 $last_level_written );
21820 # Handle cached line ending in CLOSING tokens
21822 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
21825 # The new line must start with container
21828 # The container combination must be okay..
21831 # okay to combine like types
21832 ( $open_or_close == $cached_line_type )
21834 # closing block brace may append to non-block
21835 || ( $cached_line_type == 2 && $open_or_close == 4 )
21837 # something like ');'
21838 || ( !$open_or_close && $cached_line_type == 2 )
21842 # The combined line must fit
21844 length($test_line) <=
21845 maximum_line_length_for_level($last_level_written) )
21849 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
21851 # Patch to outdent closing tokens ending # in ');'
21852 # If we are joining a line like ');' to a previous stacked
21853 # set of closing tokens, then decide if we may outdent the
21854 # combined stack to the indentation of the ');'. Since we
21855 # should not normally outdent any of the other tokens more than
21856 # the indentation of the lines that contained them, we will
21857 # only do this if all of the corresponding opening
21858 # tokens were on the same line. This can happen with
21859 # -sot and -sct. For example, it is ok here:
21860 # __PACKAGE__->load_components( qw(
21865 # But, for example, we do not outdent in this example because
21866 # that would put the closing sub brace out farther than the
21867 # opening sub brace:
21869 # perltidy -sot -sct
21871 # '<Control-f>' => sub {
21873 # my $e = $c->XEvent;
21874 # itemsUnderArea $c;
21877 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
21879 # The way to tell this is if the stacked sequence numbers
21880 # of this output line are the reverse of the stacked
21881 # sequence numbers of the previous non-blank line of
21882 # sequence numbers. So we can join if the previous
21883 # nonblank string of tokens is the mirror image. For
21884 # example if stack )}] is 13:8:6 then we are looking for a
21885 # leading stack like [{( which is 6:8:13 We only need to
21886 # check the two ends, because the intermediate tokens must
21887 # fall in order. Note on speed: having to split on colons
21888 # and eliminate multiple colons might appear to be slow,
21889 # but it's not an issue because we almost never come
21890 # through here. In a typical file we don't.
21891 $seqno_string =~ s/^:+//;
21892 $last_nonblank_seqno_string =~ s/^:+//;
21893 $seqno_string =~ s/:+/:/g;
21894 $last_nonblank_seqno_string =~ s/:+/:/g;
21896 # how many spaces can we outdent?
21898 $cached_line_leading_space_count - $leading_space_count;
21900 && length($seqno_string)
21901 && length($last_nonblank_seqno_string) ==
21902 length($seqno_string) )
21905 ( split ':', $last_nonblank_seqno_string );
21906 my @seqno_now = ( split ':', $seqno_string );
21907 if ( $seqno_now[-1] == $seqno_last[0]
21908 && $seqno_now[0] == $seqno_last[-1] )
21912 # for absolute safety, be sure we only remove
21914 my $ws = substr( $test_line, 0, $diff );
21915 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
21917 $test_line = substr( $test_line, $diff );
21918 $cached_line_leading_space_count -= $diff;
21919 $last_level_written =
21921 $cached_line_leading_space_count,
21922 $diff, $last_level_written );
21923 reduce_valign_buffer_indentation($diff);
21926 # shouldn't happen, but not critical:
21928 ## ERROR transferring indentation here
21935 $leading_string = "";
21936 $leading_space_count = $cached_line_leading_space_count;
21937 $level = $last_level_written;
21940 valign_output_step_C( $cached_line_text,
21941 $cached_line_leading_space_count,
21942 $last_level_written );
21946 $cached_line_type = 0;
21947 $cached_line_text = "";
21949 # make the line to be written
21950 my $line = $leading_string . $str;
21952 # write or cache this line
21953 if ( !$open_or_close || $side_comment_length > 0 ) {
21954 valign_output_step_C( $line, $leading_space_count, $level );
21957 $cached_line_text = $line;
21958 $cached_line_type = $open_or_close;
21959 $cached_line_flag = $tightness_flag;
21960 $cached_seqno = $seqno;
21961 $cached_line_valid = $valid;
21962 $cached_line_leading_space_count = $leading_space_count;
21963 $cached_seqno_string = $seqno_string;
21966 $last_level_written = $level;
21967 $last_side_comment_length = $side_comment_length;
21968 $extra_indent_ok = 0;
21971 sub valign_output_step_C {
21973 ###############################################################
21974 # This is Step C in writing vertically aligned lines.
21975 # Lines are either stored in a buffer or passed along to the next step.
21976 # The reason for storing lines is that we may later want to reduce their
21977 # indentation when -sot and -sct are both used.
21978 ###############################################################
21981 # Dump any saved lines if we see a line with an unbalanced opening or
21983 dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
21985 # Either store or write this line
21986 if ($valign_buffer_filling) {
21987 push @valign_buffer, [@args];
21990 valign_output_step_D(@args);
21993 # For lines starting or ending with opening or closing tokens..
21994 if ($seqno_string) {
21995 $last_nonblank_seqno_string = $seqno_string;
21997 # Start storing lines when we see a line with multiple stacked opening
21999 if ( $args[0] =~ /[\{\(\[]\s*[\{\(\[]$/ ) {
22000 $valign_buffer_filling = $seqno_string;
22005 sub valign_output_step_D {
22007 ###############################################################
22008 # This is Step D in writing vertically aligned lines.
22009 # Write one vertically aligned line of code to the output object.
22010 ###############################################################
22012 my ( $line, $leading_space_count, $level ) = @_;
22014 # The line is currently correct if there is no tabbing (recommended!)
22015 # We may have to lop off some leading spaces and replace with tabs.
22016 if ( $leading_space_count > 0 ) {
22018 # Nothing to do if no tabs
22019 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
22020 || $rOpts_indent_columns <= 0 )
22026 # Handle entab option
22027 elsif ($rOpts_entab_leading_whitespace) {
22029 $leading_space_count % $rOpts_entab_leading_whitespace;
22031 int( $leading_space_count / $rOpts_entab_leading_whitespace );
22032 my $leading_string = "\t" x $tab_count . ' ' x $space_count;
22033 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
22034 substr( $line, 0, $leading_space_count ) = $leading_string;
22038 # shouldn't happen - program error counting whitespace
22040 VALIGN_DEBUG_FLAG_TABS
22042 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
22047 # Handle option of one tab per level
22049 my $leading_string = ( "\t" x $level );
22051 $leading_space_count - $level * $rOpts_indent_columns;
22053 # shouldn't happen:
22054 if ( $space_count < 0 ) {
22056 # But it could be an outdented comment
22057 if ( $line !~ /^\s*#/ ) {
22058 VALIGN_DEBUG_FLAG_TABS
22060 "Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
22063 $leading_string = ( ' ' x $leading_space_count );
22066 $leading_string .= ( ' ' x $space_count );
22068 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
22069 substr( $line, 0, $leading_space_count ) = $leading_string;
22073 # shouldn't happen - program error counting whitespace
22074 # we'll skip entabbing
22075 VALIGN_DEBUG_FLAG_TABS
22077 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
22082 $file_writer_object->write_code_line( $line . "\n" );
22085 { # begin get_leading_string
22087 my @leading_string_cache;
22089 sub get_leading_string {
22091 # define the leading whitespace string for this line..
22092 my $leading_whitespace_count = shift;
22094 # Handle case of zero whitespace, which includes multi-line quotes
22095 # (which may have a finite level; this prevents tab problems)
22096 if ( $leading_whitespace_count <= 0 ) {
22100 # look for previous result
22101 elsif ( $leading_string_cache[$leading_whitespace_count] ) {
22102 return $leading_string_cache[$leading_whitespace_count];
22105 # must compute a string for this number of spaces
22106 my $leading_string;
22108 # Handle simple case of no tabs
22109 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
22110 || $rOpts_indent_columns <= 0 )
22112 $leading_string = ( ' ' x $leading_whitespace_count );
22115 # Handle entab option
22116 elsif ($rOpts_entab_leading_whitespace) {
22118 $leading_whitespace_count % $rOpts_entab_leading_whitespace;
22119 my $tab_count = int(
22120 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
22121 $leading_string = "\t" x $tab_count . ' ' x $space_count;
22124 # Handle option of one tab per level
22126 $leading_string = ( "\t" x $group_level );
22128 $leading_whitespace_count - $group_level * $rOpts_indent_columns;
22130 # shouldn't happen:
22131 if ( $space_count < 0 ) {
22132 VALIGN_DEBUG_FLAG_TABS
22134 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
22137 # -- skip entabbing
22138 $leading_string = ( ' ' x $leading_whitespace_count );
22141 $leading_string .= ( ' ' x $space_count );
22144 $leading_string_cache[$leading_whitespace_count] = $leading_string;
22145 return $leading_string;
22147 } # end get_leading_string
22149 sub report_anything_unusual {
22151 if ( $outdented_line_count > 0 ) {
22152 write_logfile_entry(
22153 "$outdented_line_count long lines were outdented:\n");
22154 write_logfile_entry(
22155 " First at output line $first_outdented_line_at\n");
22157 if ( $outdented_line_count > 1 ) {
22158 write_logfile_entry(
22159 " Last at output line $last_outdented_line_at\n");
22161 write_logfile_entry(
22162 " use -noll to prevent outdenting, -l=n to increase line length\n"
22164 write_logfile_entry("\n");
22168 #####################################################################
22170 # the Perl::Tidy::FileWriter class writes the output file
22172 #####################################################################
22174 package Perl::Tidy::FileWriter;
22176 # Maximum number of little messages; probably need not be changed.
22177 use constant MAX_NAG_MESSAGES => 6;
22179 sub write_logfile_entry {
22181 my $logger_object = $self->{_logger_object};
22182 if ($logger_object) {
22183 $logger_object->write_logfile_entry(@_);
22189 my ( $line_sink_object, $rOpts, $logger_object ) = @_;
22192 _line_sink_object => $line_sink_object,
22193 _logger_object => $logger_object,
22195 _output_line_number => 1,
22196 _consecutive_blank_lines => 0,
22197 _consecutive_nonblank_lines => 0,
22198 _first_line_length_error => 0,
22199 _max_line_length_error => 0,
22200 _last_line_length_error => 0,
22201 _first_line_length_error_at => 0,
22202 _max_line_length_error_at => 0,
22203 _last_line_length_error_at => 0,
22204 _line_length_error_count => 0,
22205 _max_output_line_length => 0,
22206 _max_output_line_length_at => 0,
22212 $self->{_line_sink_object}->tee_on();
22217 $self->{_line_sink_object}->tee_off();
22220 sub get_output_line_number {
22222 return $self->{_output_line_number};
22225 sub decrement_output_line_number {
22227 $self->{_output_line_number}--;
22230 sub get_consecutive_nonblank_lines {
22232 return $self->{_consecutive_nonblank_lines};
22235 sub reset_consecutive_blank_lines {
22237 $self->{_consecutive_blank_lines} = 0;
22240 sub want_blank_line {
22242 unless ( $self->{_consecutive_blank_lines} ) {
22243 $self->write_blank_code_line();
22247 sub require_blank_code_lines {
22249 # write out the requested number of blanks regardless of the value of -mbl
22250 # unless -mbl=0. This allows extra blank lines to be written for subs and
22251 # packages even with the default -mbl=1
22254 my $need = $count - $self->{_consecutive_blank_lines};
22255 my $rOpts = $self->{_rOpts};
22256 my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
22257 for ( my $i = 0 ; $i < $need ; $i++ ) {
22258 $self->write_blank_code_line($forced);
22262 sub write_blank_code_line {
22264 my $forced = shift;
22265 my $rOpts = $self->{_rOpts};
22268 && $self->{_consecutive_blank_lines} >=
22269 $rOpts->{'maximum-consecutive-blank-lines'} );
22270 $self->{_consecutive_blank_lines}++;
22271 $self->{_consecutive_nonblank_lines} = 0;
22272 $self->write_line("\n");
22275 sub write_code_line {
22279 if ( $a =~ /^\s*$/ ) {
22280 my $rOpts = $self->{_rOpts};
22282 if ( $self->{_consecutive_blank_lines} >=
22283 $rOpts->{'maximum-consecutive-blank-lines'} );
22284 $self->{_consecutive_blank_lines}++;
22285 $self->{_consecutive_nonblank_lines} = 0;
22288 $self->{_consecutive_blank_lines} = 0;
22289 $self->{_consecutive_nonblank_lines}++;
22291 $self->write_line($a);
22298 # TODO: go through and see if the test is necessary here
22299 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
22301 $self->{_line_sink_object}->write_line($a);
22303 # This calculation of excess line length ignores any internal tabs
22304 my $rOpts = $self->{_rOpts};
22305 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
22306 if ( $a =~ /^\t+/g ) {
22307 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
22310 # Note that we just incremented output line number to future value
22311 # so we must subtract 1 for current line number
22312 if ( length($a) > 1 + $self->{_max_output_line_length} ) {
22313 $self->{_max_output_line_length} = length($a) - 1;
22314 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
22317 if ( $exceed > 0 ) {
22318 my $output_line_number = $self->{_output_line_number};
22319 $self->{_last_line_length_error} = $exceed;
22320 $self->{_last_line_length_error_at} = $output_line_number - 1;
22321 if ( $self->{_line_length_error_count} == 0 ) {
22322 $self->{_first_line_length_error} = $exceed;
22323 $self->{_first_line_length_error_at} = $output_line_number - 1;
22327 $self->{_last_line_length_error} > $self->{_max_line_length_error} )
22329 $self->{_max_line_length_error} = $exceed;
22330 $self->{_max_line_length_error_at} = $output_line_number - 1;
22333 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
22334 $self->write_logfile_entry(
22335 "Line length exceeded by $exceed characters\n");
22337 $self->{_line_length_error_count}++;
22342 sub report_line_length_errors {
22344 my $rOpts = $self->{_rOpts};
22345 my $line_length_error_count = $self->{_line_length_error_count};
22346 if ( $line_length_error_count == 0 ) {
22347 $self->write_logfile_entry(
22348 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
22349 my $max_output_line_length = $self->{_max_output_line_length};
22350 my $max_output_line_length_at = $self->{_max_output_line_length_at};
22351 $self->write_logfile_entry(
22352 " Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
22358 my $word = ( $line_length_error_count > 1 ) ? "s" : "";
22359 $self->write_logfile_entry(
22360 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
22363 $word = ( $line_length_error_count > 1 ) ? "First" : "";
22364 my $first_line_length_error = $self->{_first_line_length_error};
22365 my $first_line_length_error_at = $self->{_first_line_length_error_at};
22366 $self->write_logfile_entry(
22367 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
22370 if ( $line_length_error_count > 1 ) {
22371 my $max_line_length_error = $self->{_max_line_length_error};
22372 my $max_line_length_error_at = $self->{_max_line_length_error_at};
22373 my $last_line_length_error = $self->{_last_line_length_error};
22374 my $last_line_length_error_at = $self->{_last_line_length_error_at};
22375 $self->write_logfile_entry(
22376 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
22378 $self->write_logfile_entry(
22379 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
22385 #####################################################################
22387 # The Perl::Tidy::Debugger class shows line tokenization
22389 #####################################################################
22391 package Perl::Tidy::Debugger;
22395 my ( $class, $filename ) = @_;
22398 _debug_file => $filename,
22399 _debug_file_opened => 0,
22404 sub really_open_debug_file {
22407 my $debug_file = $self->{_debug_file};
22409 unless ( $fh = IO::File->new("> $debug_file") ) {
22410 Perl::Tidy::Warn("can't open $debug_file: $!\n");
22412 $self->{_debug_file_opened} = 1;
22413 $self->{_fh} = $fh;
22415 "Use -dump-token-types (-dtt) to get a list of token type codes\n";
22418 sub close_debug_file {
22421 my $fh = $self->{_fh};
22422 if ( $self->{_debug_file_opened} ) {
22424 eval { $self->{_fh}->close() };
22428 sub write_debug_entry {
22430 # This is a debug dump routine which may be modified as necessary
22431 # to dump tokens on a line-by-line basis. The output will be written
22432 # to the .DEBUG file when the -D flag is entered.
22434 my $line_of_tokens = shift;
22436 my $input_line = $line_of_tokens->{_line_text};
22437 my $rtoken_type = $line_of_tokens->{_rtoken_type};
22438 my $rtokens = $line_of_tokens->{_rtokens};
22439 my $rlevels = $line_of_tokens->{_rlevels};
22440 my $rslevels = $line_of_tokens->{_rslevels};
22441 my $rblock_type = $line_of_tokens->{_rblock_type};
22442 my $input_line_number = $line_of_tokens->{_line_number};
22443 my $line_type = $line_of_tokens->{_line_type};
22447 my $token_str = "$input_line_number: ";
22448 my $reconstructed_original = "$input_line_number: ";
22449 my $block_str = "$input_line_number: ";
22451 #$token_str .= "$line_type: ";
22452 #$reconstructed_original .= "$line_type: ";
22455 my @next_char = ( '"', '"' );
22457 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
22458 my $fh = $self->{_fh};
22460 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
22463 if ( $$rtoken_type[$j] eq 'k' ) {
22464 $pattern .= $$rtokens[$j];
22467 $pattern .= $$rtoken_type[$j];
22469 $reconstructed_original .= $$rtokens[$j];
22470 $block_str .= "($$rblock_type[$j])";
22471 $num = length( $$rtokens[$j] );
22472 my $type_str = $$rtoken_type[$j];
22474 # be sure there are no blank tokens (shouldn't happen)
22475 # This can only happen if a programming error has been made
22476 # because all valid tokens are non-blank
22477 if ( $type_str eq ' ' ) {
22478 print $fh "BLANK TOKEN on the next line\n";
22479 $type_str = $next_char[$i_next];
22480 $i_next = 1 - $i_next;
22483 if ( length($type_str) == 1 ) {
22484 $type_str = $type_str x $num;
22486 $token_str .= $type_str;
22489 # Write what you want here ...
22490 # print $fh "$input_line\n";
22491 # print $fh "$pattern\n";
22492 print $fh "$reconstructed_original\n";
22493 print $fh "$token_str\n";
22495 #print $fh "$block_str\n";
22498 #####################################################################
22500 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
22501 # method for returning the next line to be parsed, as well as a
22502 # 'peek_ahead()' method
22504 # The input parameter is an object with a 'get_line()' method
22505 # which returns the next line to be parsed
22507 #####################################################################
22509 package Perl::Tidy::LineBuffer;
22514 my $line_source_object = shift;
22517 _line_source_object => $line_source_object,
22518 _rlookahead_buffer => [],
22524 my $buffer_index = shift;
22526 my $line_source_object = $self->{_line_source_object};
22527 my $rlookahead_buffer = $self->{_rlookahead_buffer};
22528 if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
22529 $line = $$rlookahead_buffer[$buffer_index];
22532 $line = $line_source_object->get_line();
22533 push( @$rlookahead_buffer, $line );
22541 my $line_source_object = $self->{_line_source_object};
22542 my $rlookahead_buffer = $self->{_rlookahead_buffer};
22544 if ( scalar(@$rlookahead_buffer) ) {
22545 $line = shift @$rlookahead_buffer;
22548 $line = $line_source_object->get_line();
22553 ########################################################################
22555 # the Perl::Tidy::Tokenizer package is essentially a filter which
22556 # reads lines of perl source code from a source object and provides
22557 # corresponding tokenized lines through its get_line() method. Lines
22558 # flow from the source_object to the caller like this:
22560 # source_object --> LineBuffer_object --> Tokenizer --> calling routine
22561 # get_line() get_line() get_line() line_of_tokens
22563 # The source object can be any object with a get_line() method which
22564 # supplies one line (a character string) perl call.
22565 # The LineBuffer object is created by the Tokenizer.
22566 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
22567 # containing one tokenized line for each call to its get_line() method.
22569 # WARNING: This is not a real class yet. Only one tokenizer my be used.
22571 ########################################################################
22573 package Perl::Tidy::Tokenizer;
22577 # Caution: these debug flags produce a lot of output
22578 # They should all be 0 except when debugging small scripts
22580 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
22581 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
22582 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
22583 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
22584 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
22586 my $debug_warning = sub {
22587 print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n";
22590 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
22591 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
22592 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
22593 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
22594 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
22600 # PACKAGE VARIABLES for processing an entire FILE.
22604 $last_nonblank_token
22605 $last_nonblank_type
22606 $last_nonblank_block_type
22614 %user_function_prototype
22616 %is_block_list_function
22617 %saw_function_definition
22621 $square_bracket_depth
22626 @nesting_sequence_number
22627 @current_sequence_number
22629 @paren_semicolon_count
22630 @paren_structural_type
22632 @brace_structural_type
22635 @square_bracket_type
22636 @square_bracket_structural_type
22638 @nested_ternary_flag
22639 @nested_statement_type
22640 @starting_line_of_current_depth
22643 # GLOBAL CONSTANTS for routines in this package
22645 %is_indirect_object_taker
22647 %expecting_operator_token
22648 %expecting_operator_types
22649 %expecting_term_types
22650 %expecting_term_token
22652 %is_file_test_operator
22654 %is_valid_token_type
22656 %is_code_block_token
22658 @opening_brace_names
22659 @closing_brace_names
22660 %is_keyword_taking_list
22661 %is_q_qq_qw_qx_qr_s_y_tr_m
22664 # possible values of operator_expected()
22665 use constant TERM => -1;
22666 use constant UNKNOWN => 0;
22667 use constant OPERATOR => 1;
22669 # possible values of context
22670 use constant SCALAR_CONTEXT => -1;
22671 use constant UNKNOWN_CONTEXT => 0;
22672 use constant LIST_CONTEXT => 1;
22674 # Maximum number of little messages; probably need not be changed.
22675 use constant MAX_NAG_MESSAGES => 6;
22679 # methods to count instances
22681 sub get_count { $_count; }
22682 sub _increment_count { ++$_count }
22683 sub _decrement_count { --$_count }
22687 $_[0]->_decrement_count();
22694 # Note: 'tabs' and 'indent_columns' are temporary and should be
22697 source_object => undef,
22698 debugger_object => undef,
22699 diagnostics_object => undef,
22700 logger_object => undef,
22701 starting_level => undef,
22702 indent_columns => 4,
22704 look_for_hash_bang => 0,
22706 look_for_autoloader => 1,
22707 look_for_selfloader => 1,
22708 starting_line_number => 1,
22710 my %args = ( %defaults, @_ );
22712 # we are given an object with a get_line() method to supply source lines
22713 my $source_object = $args{source_object};
22715 # we create another object with a get_line() and peek_ahead() method
22716 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
22718 # Tokenizer state data is as follows:
22719 # _rhere_target_list reference to list of here-doc targets
22720 # _here_doc_target the target string for a here document
22721 # _here_quote_character the type of here-doc quoting (" ' ` or none)
22722 # to determine if interpolation is done
22723 # _quote_target character we seek if chasing a quote
22724 # _line_start_quote line where we started looking for a long quote
22725 # _in_here_doc flag indicating if we are in a here-doc
22726 # _in_pod flag set if we are in pod documentation
22727 # _in_error flag set if we saw severe error (binary in script)
22728 # _in_data flag set if we are in __DATA__ section
22729 # _in_end flag set if we are in __END__ section
22730 # _in_format flag set if we are in a format description
22731 # _in_attribute_list flag telling if we are looking for attributes
22732 # _in_quote flag telling if we are chasing a quote
22733 # _starting_level indentation level of first line
22734 # _line_buffer_object object with get_line() method to supply source code
22735 # _diagnostics_object place to write debugging information
22736 # _unexpected_error_count error count used to limit output
22737 # _lower_case_labels_at line numbers where lower case labels seen
22738 $tokenizer_self = {
22739 _rhere_target_list => [],
22741 _here_doc_target => "",
22742 _here_quote_character => "",
22748 _in_attribute_list => 0,
22750 _quote_target => "",
22751 _line_start_quote => -1,
22752 _starting_level => $args{starting_level},
22753 _know_starting_level => defined( $args{starting_level} ),
22754 _tabsize => $args{tabsize},
22755 _indent_columns => $args{indent_columns},
22756 _look_for_hash_bang => $args{look_for_hash_bang},
22757 _trim_qw => $args{trim_qw},
22758 _continuation_indentation => $args{continuation_indentation},
22759 _outdent_labels => $args{outdent_labels},
22760 _last_line_number => $args{starting_line_number} - 1,
22761 _saw_perl_dash_P => 0,
22762 _saw_perl_dash_w => 0,
22763 _saw_use_strict => 0,
22764 _saw_v_string => 0,
22765 _look_for_autoloader => $args{look_for_autoloader},
22766 _look_for_selfloader => $args{look_for_selfloader},
22767 _saw_autoloader => 0,
22768 _saw_selfloader => 0,
22769 _saw_hash_bang => 0,
22772 _saw_negative_indentation => 0,
22773 _started_tokenizing => 0,
22774 _line_buffer_object => $line_buffer_object,
22775 _debugger_object => $args{debugger_object},
22776 _diagnostics_object => $args{diagnostics_object},
22777 _logger_object => $args{logger_object},
22778 _unexpected_error_count => 0,
22779 _started_looking_for_here_target_at => 0,
22780 _nearly_matched_here_target_at => undef,
22782 _rlower_case_labels_at => undef,
22785 prepare_for_a_new_file();
22786 find_starting_indentation_level();
22788 bless $tokenizer_self, $class;
22790 # This is not a full class yet, so die if an attempt is made to
22791 # create more than one object.
22793 if ( _increment_count() > 1 ) {
22795 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
22798 return $tokenizer_self;
22802 # interface to Perl::Tidy::Logger routines
22804 my $logger_object = $tokenizer_self->{_logger_object};
22805 if ($logger_object) {
22806 $logger_object->warning(@_);
22811 my $logger_object = $tokenizer_self->{_logger_object};
22812 if ($logger_object) {
22813 $logger_object->complain(@_);
22817 sub write_logfile_entry {
22818 my $logger_object = $tokenizer_self->{_logger_object};
22819 if ($logger_object) {
22820 $logger_object->write_logfile_entry(@_);
22824 sub interrupt_logfile {
22825 my $logger_object = $tokenizer_self->{_logger_object};
22826 if ($logger_object) {
22827 $logger_object->interrupt_logfile();
22831 sub resume_logfile {
22832 my $logger_object = $tokenizer_self->{_logger_object};
22833 if ($logger_object) {
22834 $logger_object->resume_logfile();
22838 sub increment_brace_error {
22839 my $logger_object = $tokenizer_self->{_logger_object};
22840 if ($logger_object) {
22841 $logger_object->increment_brace_error();
22845 sub report_definite_bug {
22846 my $logger_object = $tokenizer_self->{_logger_object};
22847 if ($logger_object) {
22848 $logger_object->report_definite_bug();
22852 sub brace_warning {
22853 my $logger_object = $tokenizer_self->{_logger_object};
22854 if ($logger_object) {
22855 $logger_object->brace_warning(@_);
22859 sub get_saw_brace_error {
22860 my $logger_object = $tokenizer_self->{_logger_object};
22861 if ($logger_object) {
22862 $logger_object->get_saw_brace_error();
22869 # interface to Perl::Tidy::Diagnostics routines
22870 sub write_diagnostics {
22871 if ( $tokenizer_self->{_diagnostics_object} ) {
22872 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
22876 sub report_tokenization_errors {
22880 my $level = get_indentation_level();
22881 if ( $level != $tokenizer_self->{_starting_level} ) {
22882 warning("final indentation level: $level\n");
22885 check_final_nesting_depths();
22887 if ( $tokenizer_self->{_look_for_hash_bang}
22888 && !$tokenizer_self->{_saw_hash_bang} )
22891 "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
22894 if ( $tokenizer_self->{_in_format} ) {
22895 warning("hit EOF while in format description\n");
22898 if ( $tokenizer_self->{_in_pod} ) {
22900 # Just write log entry if this is after __END__ or __DATA__
22901 # because this happens to often, and it is not likely to be
22903 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
22904 write_logfile_entry(
22905 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
22911 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
22917 if ( $tokenizer_self->{_in_here_doc} ) {
22918 my $here_doc_target = $tokenizer_self->{_here_doc_target};
22919 my $started_looking_for_here_target_at =
22920 $tokenizer_self->{_started_looking_for_here_target_at};
22921 if ($here_doc_target) {
22923 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
22928 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
22931 my $nearly_matched_here_target_at =
22932 $tokenizer_self->{_nearly_matched_here_target_at};
22933 if ($nearly_matched_here_target_at) {
22935 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
22940 if ( $tokenizer_self->{_in_quote} ) {
22941 my $line_start_quote = $tokenizer_self->{_line_start_quote};
22942 my $quote_target = $tokenizer_self->{_quote_target};
22944 ( $tokenizer_self->{_in_attribute_list} )
22948 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
22952 unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
22953 if ( $] < 5.006 ) {
22954 write_logfile_entry("Suggest including '-w parameter'\n");
22957 write_logfile_entry("Suggest including 'use warnings;'\n");
22961 if ( $tokenizer_self->{_saw_perl_dash_P} ) {
22962 write_logfile_entry("Use of -P parameter for defines is discouraged\n");
22965 unless ( $tokenizer_self->{_saw_use_strict} ) {
22966 write_logfile_entry("Suggest including 'use strict;'\n");
22969 # it is suggested that labels have at least one upper case character
22970 # for legibility and to avoid code breakage as new keywords are introduced
22971 if ( $tokenizer_self->{_rlower_case_labels_at} ) {
22972 my @lower_case_labels_at =
22973 @{ $tokenizer_self->{_rlower_case_labels_at} };
22974 write_logfile_entry(
22975 "Suggest using upper case characters in label(s)\n");
22977 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
22981 sub report_v_string {
22983 # warn if this version can't handle v-strings
22985 unless ( $tokenizer_self->{_saw_v_string} ) {
22986 $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
22988 if ( $] < 5.006 ) {
22990 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
22995 sub get_input_line_number {
22996 return $tokenizer_self->{_last_line_number};
22999 # returns the next tokenized line
23004 # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
23005 # $square_bracket_depth, $paren_depth
23007 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
23008 $tokenizer_self->{_line_text} = $input_line;
23010 return undef unless ($input_line);
23012 my $input_line_number = ++$tokenizer_self->{_last_line_number};
23014 # Find and remove what characters terminate this line, including any
23016 my $input_line_separator = "";
23017 if ( chomp($input_line) ) { $input_line_separator = $/ }
23019 # TODO: what other characters should be included here?
23020 if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
23021 $input_line_separator = $2 . $input_line_separator;
23024 # for backwards compatibility we keep the line text terminated with
23025 # a newline character
23026 $input_line .= "\n";
23027 $tokenizer_self->{_line_text} = $input_line; # update
23029 # create a data structure describing this line which will be
23030 # returned to the caller.
23032 # _line_type codes are:
23033 # SYSTEM - system-specific code before hash-bang line
23034 # CODE - line of perl code (including comments)
23035 # POD_START - line starting pod, such as '=head'
23036 # POD - pod documentation text
23037 # POD_END - last line of pod section, '=cut'
23038 # HERE - text of here-document
23039 # HERE_END - last line of here-doc (target word)
23040 # FORMAT - format section
23041 # FORMAT_END - last line of format section, '.'
23042 # DATA_START - __DATA__ line
23043 # DATA - unidentified text following __DATA__
23044 # END_START - __END__ line
23045 # END - unidentified text following __END__
23046 # ERROR - we are in big trouble, probably not a perl script
23049 # _curly_brace_depth - depth of curly braces at start of line
23050 # _square_bracket_depth - depth of square brackets at start of line
23051 # _paren_depth - depth of parens at start of line
23052 # _starting_in_quote - this line continues a multi-line quote
23053 # (so don't trim leading blanks!)
23054 # _ending_in_quote - this line ends in a multi-line quote
23055 # (so don't trim trailing blanks!)
23056 my $line_of_tokens = {
23057 _line_type => 'EOF',
23058 _line_text => $input_line,
23059 _line_number => $input_line_number,
23060 _rtoken_type => undef,
23063 _rslevels => undef,
23064 _rblock_type => undef,
23065 _rcontainer_type => undef,
23066 _rcontainer_environment => undef,
23067 _rtype_sequence => undef,
23068 _rnesting_tokens => undef,
23069 _rci_levels => undef,
23070 _rnesting_blocks => undef,
23071 _guessed_indentation_level => 0,
23072 _starting_in_quote => 0, # to be set by subroutine
23073 _ending_in_quote => 0,
23074 _curly_brace_depth => $brace_depth,
23075 _square_bracket_depth => $square_bracket_depth,
23076 _paren_depth => $paren_depth,
23077 _quote_character => '',
23080 # must print line unchanged if we are in a here document
23081 if ( $tokenizer_self->{_in_here_doc} ) {
23083 $line_of_tokens->{_line_type} = 'HERE';
23084 my $here_doc_target = $tokenizer_self->{_here_doc_target};
23085 my $here_quote_character = $tokenizer_self->{_here_quote_character};
23086 my $candidate_target = $input_line;
23087 chomp $candidate_target;
23088 if ( $candidate_target eq $here_doc_target ) {
23089 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
23090 $line_of_tokens->{_line_type} = 'HERE_END';
23091 write_logfile_entry("Exiting HERE document $here_doc_target\n");
23093 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
23094 if (@$rhere_target_list) { # there can be multiple here targets
23095 ( $here_doc_target, $here_quote_character ) =
23096 @{ shift @$rhere_target_list };
23097 $tokenizer_self->{_here_doc_target} = $here_doc_target;
23098 $tokenizer_self->{_here_quote_character} =
23099 $here_quote_character;
23100 write_logfile_entry(
23101 "Entering HERE document $here_doc_target\n");
23102 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
23103 $tokenizer_self->{_started_looking_for_here_target_at} =
23104 $input_line_number;
23107 $tokenizer_self->{_in_here_doc} = 0;
23108 $tokenizer_self->{_here_doc_target} = "";
23109 $tokenizer_self->{_here_quote_character} = "";
23113 # check for error of extra whitespace
23114 # note for PERL6: leading whitespace is allowed
23116 $candidate_target =~ s/\s*$//;
23117 $candidate_target =~ s/^\s*//;
23118 if ( $candidate_target eq $here_doc_target ) {
23119 $tokenizer_self->{_nearly_matched_here_target_at} =
23120 $input_line_number;
23123 return $line_of_tokens;
23126 # must print line unchanged if we are in a format section
23127 elsif ( $tokenizer_self->{_in_format} ) {
23129 if ( $input_line =~ /^\.[\s#]*$/ ) {
23130 write_logfile_entry("Exiting format section\n");
23131 $tokenizer_self->{_in_format} = 0;
23132 $line_of_tokens->{_line_type} = 'FORMAT_END';
23135 $line_of_tokens->{_line_type} = 'FORMAT';
23137 return $line_of_tokens;
23140 # must print line unchanged if we are in pod documentation
23141 elsif ( $tokenizer_self->{_in_pod} ) {
23143 $line_of_tokens->{_line_type} = 'POD';
23144 if ( $input_line =~ /^=cut/ ) {
23145 $line_of_tokens->{_line_type} = 'POD_END';
23146 write_logfile_entry("Exiting POD section\n");
23147 $tokenizer_self->{_in_pod} = 0;
23149 if ( $input_line =~ /^\#\!.*perl\b/ ) {
23151 "Hash-bang in pod can cause older versions of perl to fail! \n"
23155 return $line_of_tokens;
23158 # must print line unchanged if we have seen a severe error (i.e., we
23159 # are seeing illegal tokens and cannot continue. Syntax errors do
23160 # not pass this route). Calling routine can decide what to do, but
23161 # the default can be to just pass all lines as if they were after __END__
23162 elsif ( $tokenizer_self->{_in_error} ) {
23163 $line_of_tokens->{_line_type} = 'ERROR';
23164 return $line_of_tokens;
23167 # print line unchanged if we are __DATA__ section
23168 elsif ( $tokenizer_self->{_in_data} ) {
23170 # ...but look for POD
23171 # Note that the _in_data and _in_end flags remain set
23172 # so that we return to that state after seeing the
23173 # end of a pod section
23174 if ( $input_line =~ /^=(?!cut)/ ) {
23175 $line_of_tokens->{_line_type} = 'POD_START';
23176 write_logfile_entry("Entering POD section\n");
23177 $tokenizer_self->{_in_pod} = 1;
23178 return $line_of_tokens;
23181 $line_of_tokens->{_line_type} = 'DATA';
23182 return $line_of_tokens;
23186 # print line unchanged if we are in __END__ section
23187 elsif ( $tokenizer_self->{_in_end} ) {
23189 # ...but look for POD
23190 # Note that the _in_data and _in_end flags remain set
23191 # so that we return to that state after seeing the
23192 # end of a pod section
23193 if ( $input_line =~ /^=(?!cut)/ ) {
23194 $line_of_tokens->{_line_type} = 'POD_START';
23195 write_logfile_entry("Entering POD section\n");
23196 $tokenizer_self->{_in_pod} = 1;
23197 return $line_of_tokens;
23200 $line_of_tokens->{_line_type} = 'END';
23201 return $line_of_tokens;
23205 # check for a hash-bang line if we haven't seen one
23206 if ( !$tokenizer_self->{_saw_hash_bang} ) {
23207 if ( $input_line =~ /^\#\!.*perl\b/ ) {
23208 $tokenizer_self->{_saw_hash_bang} = $input_line_number;
23210 # check for -w and -P flags
23211 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
23212 $tokenizer_self->{_saw_perl_dash_P} = 1;
23215 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
23216 $tokenizer_self->{_saw_perl_dash_w} = 1;
23219 if ( ( $input_line_number > 1 )
23220 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
23223 # this is helpful for VMS systems; we may have accidentally
23224 # tokenized some DCL commands
23225 if ( $tokenizer_self->{_started_tokenizing} ) {
23227 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
23231 complain("Useless hash-bang after line 1\n");
23235 # Report the leading hash-bang as a system line
23236 # This will prevent -dac from deleting it
23238 $line_of_tokens->{_line_type} = 'SYSTEM';
23239 return $line_of_tokens;
23244 # wait for a hash-bang before parsing if the user invoked us with -x
23245 if ( $tokenizer_self->{_look_for_hash_bang}
23246 && !$tokenizer_self->{_saw_hash_bang} )
23248 $line_of_tokens->{_line_type} = 'SYSTEM';
23249 return $line_of_tokens;
23252 # a first line of the form ': #' will be marked as SYSTEM
23253 # since lines of this form may be used by tcsh
23254 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
23255 $line_of_tokens->{_line_type} = 'SYSTEM';
23256 return $line_of_tokens;
23259 # now we know that it is ok to tokenize the line...
23260 # the line tokenizer will modify any of these private variables:
23261 # _rhere_target_list
23268 my $ending_in_quote_last = $tokenizer_self->{_in_quote};
23269 tokenize_this_line($line_of_tokens);
23271 # Now finish defining the return structure and return it
23272 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
23274 # handle severe error (binary data in script)
23275 if ( $tokenizer_self->{_in_error} ) {
23276 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages
23277 warning("Giving up after error\n");
23278 $line_of_tokens->{_line_type} = 'ERROR';
23279 reset_indentation_level(0); # avoid error messages
23280 return $line_of_tokens;
23283 # handle start of pod documentation
23284 if ( $tokenizer_self->{_in_pod} ) {
23286 # This gets tricky..above a __DATA__ or __END__ section, perl
23287 # accepts '=cut' as the start of pod section. But afterwards,
23288 # only pod utilities see it and they may ignore an =cut without
23289 # leading =head. In any case, this isn't good.
23290 if ( $input_line =~ /^=cut\b/ ) {
23291 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
23292 complain("=cut while not in pod ignored\n");
23293 $tokenizer_self->{_in_pod} = 0;
23294 $line_of_tokens->{_line_type} = 'POD_END';
23297 $line_of_tokens->{_line_type} = 'POD_START';
23299 "=cut starts a pod section .. this can fool pod utilities.\n"
23301 write_logfile_entry("Entering POD section\n");
23306 $line_of_tokens->{_line_type} = 'POD_START';
23307 write_logfile_entry("Entering POD section\n");
23310 return $line_of_tokens;
23313 # update indentation levels for log messages
23314 if ( $input_line !~ /^\s*$/ ) {
23315 my $rlevels = $line_of_tokens->{_rlevels};
23316 $line_of_tokens->{_guessed_indentation_level} =
23317 guess_old_indentation_level($input_line);
23320 # see if this line contains here doc targets
23321 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
23322 if (@$rhere_target_list) {
23324 my ( $here_doc_target, $here_quote_character ) =
23325 @{ shift @$rhere_target_list };
23326 $tokenizer_self->{_in_here_doc} = 1;
23327 $tokenizer_self->{_here_doc_target} = $here_doc_target;
23328 $tokenizer_self->{_here_quote_character} = $here_quote_character;
23329 write_logfile_entry("Entering HERE document $here_doc_target\n");
23330 $tokenizer_self->{_started_looking_for_here_target_at} =
23331 $input_line_number;
23334 # NOTE: __END__ and __DATA__ statements are written unformatted
23335 # because they can theoretically contain additional characters
23336 # which are not tokenized (and cannot be read with <DATA> either!).
23337 if ( $tokenizer_self->{_in_data} ) {
23338 $line_of_tokens->{_line_type} = 'DATA_START';
23339 write_logfile_entry("Starting __DATA__ section\n");
23340 $tokenizer_self->{_saw_data} = 1;
23342 # keep parsing after __DATA__ if use SelfLoader was seen
23343 if ( $tokenizer_self->{_saw_selfloader} ) {
23344 $tokenizer_self->{_in_data} = 0;
23345 write_logfile_entry(
23346 "SelfLoader seen, continuing; -nlsl deactivates\n");
23349 return $line_of_tokens;
23352 elsif ( $tokenizer_self->{_in_end} ) {
23353 $line_of_tokens->{_line_type} = 'END_START';
23354 write_logfile_entry("Starting __END__ section\n");
23355 $tokenizer_self->{_saw_end} = 1;
23357 # keep parsing after __END__ if use AutoLoader was seen
23358 if ( $tokenizer_self->{_saw_autoloader} ) {
23359 $tokenizer_self->{_in_end} = 0;
23360 write_logfile_entry(
23361 "AutoLoader seen, continuing; -nlal deactivates\n");
23363 return $line_of_tokens;
23366 # now, finally, we know that this line is type 'CODE'
23367 $line_of_tokens->{_line_type} = 'CODE';
23369 # remember if we have seen any real code
23370 if ( !$tokenizer_self->{_started_tokenizing}
23371 && $input_line !~ /^\s*$/
23372 && $input_line !~ /^\s*#/ )
23374 $tokenizer_self->{_started_tokenizing} = 1;
23377 if ( $tokenizer_self->{_debugger_object} ) {
23378 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
23381 # Note: if keyword 'format' occurs in this line code, it is still CODE
23382 # (keyword 'format' need not start a line)
23383 if ( $tokenizer_self->{_in_format} ) {
23384 write_logfile_entry("Entering format section\n");
23387 if ( $tokenizer_self->{_in_quote}
23388 and ( $tokenizer_self->{_line_start_quote} < 0 ) )
23391 #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
23393 ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
23395 $tokenizer_self->{_line_start_quote} = $input_line_number;
23396 write_logfile_entry(
23397 "Start multi-line quote or pattern ending in $quote_target\n");
23400 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
23401 and !$tokenizer_self->{_in_quote} )
23403 $tokenizer_self->{_line_start_quote} = -1;
23404 write_logfile_entry("End of multi-line quote or pattern\n");
23407 # we are returning a line of CODE
23408 return $line_of_tokens;
23411 sub find_starting_indentation_level {
23413 # We need to find the indentation level of the first line of the
23414 # script being formatted. Often it will be zero for an entire file,
23415 # but if we are formatting a local block of code (within an editor for
23416 # example) it may not be zero. The user may specify this with the
23417 # -sil=n parameter but normally doesn't so we have to guess.
23419 # USES GLOBAL VARIABLES: $tokenizer_self
23420 my $starting_level = 0;
23422 # use value if given as parameter
23423 if ( $tokenizer_self->{_know_starting_level} ) {
23424 $starting_level = $tokenizer_self->{_starting_level};
23427 # if we know there is a hash_bang line, the level must be zero
23428 elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
23429 $tokenizer_self->{_know_starting_level} = 1;
23432 # otherwise figure it out from the input file
23437 # keep looking at lines until we find a hash bang or piece of code
23440 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
23443 # if first line is #! then assume starting level is zero
23444 if ( $i == 1 && $line =~ /^\#\!/ ) {
23445 $starting_level = 0;
23448 next if ( $line =~ /^\s*#/ ); # skip past comments
23449 next if ( $line =~ /^\s*$/ ); # skip past blank lines
23450 $starting_level = guess_old_indentation_level($line);
23453 $msg = "Line $i implies starting-indentation-level = $starting_level\n";
23454 write_logfile_entry("$msg");
23456 $tokenizer_self->{_starting_level} = $starting_level;
23457 reset_indentation_level($starting_level);
23460 sub guess_old_indentation_level {
23463 # Guess the indentation level of an input line.
23465 # For the first line of code this result will define the starting
23466 # indentation level. It will mainly be non-zero when perltidy is applied
23467 # within an editor to a local block of code.
23469 # This is an impossible task in general because we can't know what tabs
23470 # meant for the old script and how many spaces were used for one
23471 # indentation level in the given input script. For example it may have
23472 # been previously formatted with -i=7 -et=3. But we can at least try to
23473 # make sure that perltidy guesses correctly if it is applied repeatedly to
23474 # a block of code within an editor, so that the block stays at the same
23475 # level when perltidy is applied repeatedly.
23477 # USES GLOBAL VARIABLES: $tokenizer_self
23480 # find leading tabs, spaces, and any statement label
23482 if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
23484 # If there are leading tabs, we use the tab scheme for this run, if
23485 # any, so that the code will remain stable when editing.
23486 if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} }
23488 if ($2) { $spaces += length($2) }
23490 # correct for outdented labels
23491 if ( $3 && $tokenizer_self->{'_outdent_labels'} ) {
23492 $spaces += $tokenizer_self->{_continuation_indentation};
23496 # compute indentation using the value of -i for this run.
23497 # If -i=0 is used for this run (which is possible) it doesn't matter
23498 # what we do here but we'll guess that the old run used 4 spaces per level.
23499 my $indent_columns = $tokenizer_self->{_indent_columns};
23500 $indent_columns = 4 if ( !$indent_columns );
23501 $level = int( $spaces / $indent_columns );
23505 # This is a currently unused debug routine
23506 sub dump_functions {
23510 foreach $pkg ( keys %is_user_function ) {
23511 print $fh "\nnon-constant subs in package $pkg\n";
23513 foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
23515 if ( $is_block_list_function{$pkg}{$sub} ) {
23516 $msg = 'block_list';
23519 if ( $is_block_function{$pkg}{$sub} ) {
23522 print $fh "$sub $msg\n";
23526 foreach $pkg ( keys %is_constant ) {
23527 print $fh "\nconstants and constant subs in package $pkg\n";
23529 foreach $sub ( keys %{ $is_constant{$pkg} } ) {
23530 print $fh "$sub\n";
23537 # count number of 1's in a string of 1's and 0's
23538 # example: ones_count("010101010101") gives 6
23539 return ( my $cis = $_[0] ) =~ tr/1/0/;
23542 sub prepare_for_a_new_file {
23544 # previous tokens needed to determine what to expect next
23545 $last_nonblank_token = ';'; # the only possible starting state which
23546 $last_nonblank_type = ';'; # will make a leading brace a code block
23547 $last_nonblank_block_type = '';
23549 # scalars for remembering statement types across multiple lines
23550 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
23551 $in_attribute_list = 0;
23553 # scalars for remembering where we are in the file
23554 $current_package = "main";
23555 $context = UNKNOWN_CONTEXT;
23557 # hashes used to remember function information
23558 %is_constant = (); # user-defined constants
23559 %is_user_function = (); # user-defined functions
23560 %user_function_prototype = (); # their prototypes
23561 %is_block_function = ();
23562 %is_block_list_function = ();
23563 %saw_function_definition = ();
23565 # variables used to track depths of various containers
23566 # and report nesting errors
23569 $square_bracket_depth = 0;
23570 @current_depth[ 0 .. $#closing_brace_names ] =
23571 (0) x scalar @closing_brace_names;
23574 @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
23575 ( 0 .. $#closing_brace_names );
23576 @current_sequence_number = ();
23577 $paren_type[$paren_depth] = '';
23578 $paren_semicolon_count[$paren_depth] = 0;
23579 $paren_structural_type[$brace_depth] = '';
23580 $brace_type[$brace_depth] = ';'; # identify opening brace as code block
23581 $brace_structural_type[$brace_depth] = '';
23582 $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
23583 $brace_package[$paren_depth] = $current_package;
23584 $square_bracket_type[$square_bracket_depth] = '';
23585 $square_bracket_structural_type[$square_bracket_depth] = '';
23587 initialize_tokenizer_state();
23590 { # begin tokenize_this_line
23592 use constant BRACE => 0;
23593 use constant SQUARE_BRACKET => 1;
23594 use constant PAREN => 2;
23595 use constant QUESTION_COLON => 3;
23597 # TV1: scalars for processing one LINE.
23598 # Re-initialized on each entry to sub tokenize_this_line.
23600 $block_type, $container_type, $expecting,
23601 $i, $i_tok, $input_line,
23602 $input_line_number, $last_nonblank_i, $max_token_index,
23603 $next_tok, $next_type, $peeked_ahead,
23604 $prototype, $rhere_target_list, $rtoken_map,
23605 $rtoken_type, $rtokens, $tok,
23606 $type, $type_sequence, $indent_flag,
23609 # TV2: refs to ARRAYS for processing one LINE
23610 # Re-initialized on each call.
23611 my $routput_token_list = []; # stack of output token indexes
23612 my $routput_token_type = []; # token types
23613 my $routput_block_type = []; # types of code block
23614 my $routput_container_type = []; # paren types, such as if, elsif, ..
23615 my $routput_type_sequence = []; # nesting sequential number
23616 my $routput_indent_flag = []; #
23618 # TV3: SCALARS for quote variables. These are initialized with a
23619 # subroutine call and continually updated as lines are processed.
23620 my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
23621 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
23623 # TV4: SCALARS for multi-line identifiers and
23624 # statements. These are initialized with a subroutine call
23625 # and continually updated as lines are processed.
23626 my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
23628 # TV5: SCALARS for tracking indentation level.
23629 # Initialized once and continually updated as lines are
23632 $nesting_token_string, $nesting_type_string,
23633 $nesting_block_string, $nesting_block_flag,
23634 $nesting_list_string, $nesting_list_flag,
23635 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
23636 $in_statement_continuation, $level_in_tokenizer,
23637 $slevel_in_tokenizer, $rslevel_stack,
23640 # TV6: SCALARS for remembering several previous
23641 # tokens. Initialized once and continually updated as
23642 # lines are processed.
23644 $last_nonblank_container_type, $last_nonblank_type_sequence,
23645 $last_last_nonblank_token, $last_last_nonblank_type,
23646 $last_last_nonblank_block_type, $last_last_nonblank_container_type,
23647 $last_last_nonblank_type_sequence, $last_nonblank_prototype,
23650 # ----------------------------------------------------------------
23651 # beginning of tokenizer variable access and manipulation routines
23652 # ----------------------------------------------------------------
23654 sub initialize_tokenizer_state {
23656 # TV1: initialized on each call
23657 # TV2: initialized on each call
23661 $quote_character = "";
23664 $quoted_string_1 = "";
23665 $quoted_string_2 = "";
23666 $allowed_quote_modifiers = "";
23669 $id_scan_state = '';
23672 $indented_if_level = 0;
23675 $nesting_token_string = "";
23676 $nesting_type_string = "";
23677 $nesting_block_string = '1'; # initially in a block
23678 $nesting_block_flag = 1;
23679 $nesting_list_string = '0'; # initially not in a list
23680 $nesting_list_flag = 0; # initially not in a list
23681 $ci_string_in_tokenizer = "";
23682 $continuation_string_in_tokenizer = "0";
23683 $in_statement_continuation = 0;
23684 $level_in_tokenizer = 0;
23685 $slevel_in_tokenizer = 0;
23686 $rslevel_stack = [];
23689 $last_nonblank_container_type = '';
23690 $last_nonblank_type_sequence = '';
23691 $last_last_nonblank_token = ';';
23692 $last_last_nonblank_type = ';';
23693 $last_last_nonblank_block_type = '';
23694 $last_last_nonblank_container_type = '';
23695 $last_last_nonblank_type_sequence = '';
23696 $last_nonblank_prototype = "";
23699 sub save_tokenizer_state {
23702 $block_type, $container_type, $expecting,
23703 $i, $i_tok, $input_line,
23704 $input_line_number, $last_nonblank_i, $max_token_index,
23705 $next_tok, $next_type, $peeked_ahead,
23706 $prototype, $rhere_target_list, $rtoken_map,
23707 $rtoken_type, $rtokens, $tok,
23708 $type, $type_sequence, $indent_flag,
23712 $routput_token_list, $routput_token_type,
23713 $routput_block_type, $routput_container_type,
23714 $routput_type_sequence, $routput_indent_flag,
23718 $in_quote, $quote_type,
23719 $quote_character, $quote_pos,
23720 $quote_depth, $quoted_string_1,
23721 $quoted_string_2, $allowed_quote_modifiers,
23725 [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
23728 $nesting_token_string, $nesting_type_string,
23729 $nesting_block_string, $nesting_block_flag,
23730 $nesting_list_string, $nesting_list_flag,
23731 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
23732 $in_statement_continuation, $level_in_tokenizer,
23733 $slevel_in_tokenizer, $rslevel_stack,
23737 $last_nonblank_container_type,
23738 $last_nonblank_type_sequence,
23739 $last_last_nonblank_token,
23740 $last_last_nonblank_type,
23741 $last_last_nonblank_block_type,
23742 $last_last_nonblank_container_type,
23743 $last_last_nonblank_type_sequence,
23744 $last_nonblank_prototype,
23746 return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
23749 sub restore_tokenizer_state {
23751 my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
23753 $block_type, $container_type, $expecting,
23754 $i, $i_tok, $input_line,
23755 $input_line_number, $last_nonblank_i, $max_token_index,
23756 $next_tok, $next_type, $peeked_ahead,
23757 $prototype, $rhere_target_list, $rtoken_map,
23758 $rtoken_type, $rtokens, $tok,
23759 $type, $type_sequence, $indent_flag,
23763 $routput_token_list, $routput_token_type,
23764 $routput_block_type, $routput_container_type,
23765 $routput_type_sequence, $routput_type_sequence,
23769 $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
23770 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
23773 ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
23777 $nesting_token_string, $nesting_type_string,
23778 $nesting_block_string, $nesting_block_flag,
23779 $nesting_list_string, $nesting_list_flag,
23780 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
23781 $in_statement_continuation, $level_in_tokenizer,
23782 $slevel_in_tokenizer, $rslevel_stack,
23786 $last_nonblank_container_type,
23787 $last_nonblank_type_sequence,
23788 $last_last_nonblank_token,
23789 $last_last_nonblank_type,
23790 $last_last_nonblank_block_type,
23791 $last_last_nonblank_container_type,
23792 $last_last_nonblank_type_sequence,
23793 $last_nonblank_prototype,
23797 sub get_indentation_level {
23799 # patch to avoid reporting error if indented if is not terminated
23800 if ($indented_if_level) { return $level_in_tokenizer - 1 }
23801 return $level_in_tokenizer;
23804 sub reset_indentation_level {
23805 $level_in_tokenizer = $_[0];
23806 $slevel_in_tokenizer = $_[0];
23807 push @{$rslevel_stack}, $slevel_in_tokenizer;
23811 $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
23814 # ------------------------------------------------------------
23815 # end of tokenizer variable access and manipulation routines
23816 # ------------------------------------------------------------
23818 # ------------------------------------------------------------
23819 # beginning of various scanner interface routines
23820 # ------------------------------------------------------------
23821 sub scan_replacement_text {
23823 # check for here-docs in replacement text invoked by
23824 # a substitution operator with executable modifier 'e'.
23827 # $replacement_text
23829 # $rht = reference to any here-doc targets
23830 my ($replacement_text) = @_;
23833 return undef unless ( $replacement_text =~ /<</ );
23835 write_logfile_entry("scanning replacement text for here-doc targets\n");
23837 # save the logger object for error messages
23838 my $logger_object = $tokenizer_self->{_logger_object};
23840 # localize all package variables
23842 $tokenizer_self, $last_nonblank_token,
23843 $last_nonblank_type, $last_nonblank_block_type,
23844 $statement_type, $in_attribute_list,
23845 $current_package, $context,
23846 %is_constant, %is_user_function,
23847 %user_function_prototype, %is_block_function,
23848 %is_block_list_function, %saw_function_definition,
23849 $brace_depth, $paren_depth,
23850 $square_bracket_depth, @current_depth,
23851 @total_depth, $total_depth,
23852 @nesting_sequence_number, @current_sequence_number,
23853 @paren_type, @paren_semicolon_count,
23854 @paren_structural_type, @brace_type,
23855 @brace_structural_type, @brace_context,
23856 @brace_package, @square_bracket_type,
23857 @square_bracket_structural_type, @depth_array,
23858 @starting_line_of_current_depth, @nested_ternary_flag,
23859 @nested_statement_type,
23862 # save all lexical variables
23863 my $rstate = save_tokenizer_state();
23864 _decrement_count(); # avoid error check for multiple tokenizers
23866 # make a new tokenizer
23868 my $rpending_logfile_message;
23869 my $source_object =
23870 Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
23871 $rpending_logfile_message );
23872 my $tokenizer = Perl::Tidy::Tokenizer->new(
23873 source_object => $source_object,
23874 logger_object => $logger_object,
23875 starting_line_number => $input_line_number,
23878 # scan the replacement text
23879 1 while ( $tokenizer->get_line() );
23881 # remove any here doc targets
23883 if ( $tokenizer_self->{_in_here_doc} ) {
23887 $tokenizer_self->{_here_doc_target},
23888 $tokenizer_self->{_here_quote_character}
23890 if ( $tokenizer_self->{_rhere_target_list} ) {
23891 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
23892 $tokenizer_self->{_rhere_target_list} = undef;
23894 $tokenizer_self->{_in_here_doc} = undef;
23897 # now its safe to report errors
23898 $tokenizer->report_tokenization_errors();
23900 # restore all tokenizer lexical variables
23901 restore_tokenizer_state($rstate);
23903 # return the here doc targets
23907 sub scan_bare_identifier {
23908 ( $i, $tok, $type, $prototype ) =
23909 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
23910 $rtoken_map, $max_token_index );
23913 sub scan_identifier {
23914 ( $i, $tok, $type, $id_scan_state, $identifier ) =
23915 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
23916 $max_token_index, $expecting );
23920 ( $i, $tok, $type, $id_scan_state ) =
23921 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
23922 $id_scan_state, $max_token_index );
23927 ( $i, $type, $number ) =
23928 scan_number_do( $input_line, $i, $rtoken_map, $type,
23929 $max_token_index );
23933 # a sub to warn if token found where term expected
23934 sub error_if_expecting_TERM {
23935 if ( $expecting == TERM ) {
23936 if ( $really_want_term{$last_nonblank_type} ) {
23937 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
23938 $rtoken_type, $input_line );
23944 # a sub to warn if token found where operator expected
23945 sub error_if_expecting_OPERATOR {
23946 if ( $expecting == OPERATOR ) {
23947 my $thing = defined $_[0] ? $_[0] : $tok;
23948 unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
23949 $rtoken_map, $rtoken_type, $input_line );
23950 if ( $i_tok == 0 ) {
23951 interrupt_logfile();
23952 warning("Missing ';' above?\n");
23959 # ------------------------------------------------------------
23960 # end scanner interfaces
23961 # ------------------------------------------------------------
23963 my %is_for_foreach;
23964 @_ = qw(for foreach);
23965 @is_for_foreach{@_} = (1) x scalar(@_);
23969 @is_my_our{@_} = (1) x scalar(@_);
23971 # These keywords may introduce blocks after parenthesized expressions,
23973 # keyword ( .... ) { BLOCK }
23974 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
23975 my %is_blocktype_with_paren;
23976 @_ = qw(if elsif unless while until for foreach switch case given when);
23977 @is_blocktype_with_paren{@_} = (1) x scalar(@_);
23979 # ------------------------------------------------------------
23980 # begin hash of code for handling most token types
23981 # ------------------------------------------------------------
23982 my $tokenization_code = {
23984 # no special code for these types yet, but syntax checks
24019 error_if_expecting_TERM()
24020 if ( $expecting == TERM );
24023 error_if_expecting_TERM()
24024 if ( $expecting == TERM );
24028 # start looking for a scalar
24029 error_if_expecting_OPERATOR("Scalar")
24030 if ( $expecting == OPERATOR );
24033 if ( $identifier eq '$^W' ) {
24034 $tokenizer_self->{_saw_perl_dash_w} = 1;
24037 # Check for identifier in indirect object slot
24038 # (vorboard.pl, sort.t). Something like:
24039 # /^(print|printf|sort|exec|system)$/
24041 $is_indirect_object_taker{$last_nonblank_token}
24043 || ( ( $last_nonblank_token eq '(' )
24044 && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
24045 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
24054 $paren_semicolon_count[$paren_depth] = 0;
24056 $container_type = $want_paren;
24060 $container_type = $last_nonblank_token;
24062 # We can check for a syntax error here of unexpected '(',
24063 # but this is going to get messy...
24065 $expecting == OPERATOR
24067 # be sure this is not a method call of the form
24068 # &method(...), $method->(..), &{method}(...),
24069 # $ref[2](list) is ok & short for $ref[2]->(list)
24070 # NOTE: at present, braces in something like &{ xxx }
24071 # are not marked as a block, we might have a method call
24072 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
24077 # ref: camel 3 p 703.
24078 if ( $last_last_nonblank_token eq 'do' ) {
24080 "do SUBROUTINE is deprecated; consider & or -> notation\n"
24085 # if this is an empty list, (), then it is not an
24086 # error; for example, we might have a constant pi and
24087 # invoke it with pi() or just pi;
24088 my ( $next_nonblank_token, $i_next ) =
24089 find_next_nonblank_token( $i, $rtokens,
24090 $max_token_index );
24091 if ( $next_nonblank_token ne ')' ) {
24093 error_if_expecting_OPERATOR('(');
24095 if ( $last_nonblank_type eq 'C' ) {
24097 "$last_nonblank_token has a void prototype\n";
24099 elsif ( $last_nonblank_type eq 'i' ) {
24101 && $last_nonblank_token =~ /^\$/ )
24104 "Do you mean '$last_nonblank_token->(' ?\n";
24108 interrupt_logfile();
24112 } ## end if ( $next_nonblank_token...
24113 } ## end else [ if ( $last_last_nonblank_token...
24114 } ## end if ( $expecting == OPERATOR...
24116 $paren_type[$paren_depth] = $container_type;
24117 ( $type_sequence, $indent_flag ) =
24118 increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
24120 # propagate types down through nested parens
24121 # for example: the second paren in 'if ((' would be structural
24122 # since the first is.
24124 if ( $last_nonblank_token eq '(' ) {
24125 $type = $last_nonblank_type;
24128 # We exclude parens as structural after a ',' because it
24129 # causes subtle problems with continuation indentation for
24130 # something like this, where the first 'or' will not get
24135 # ( not defined $check )
24137 # or $check eq "new"
24138 # or $check eq "old",
24141 # Likewise, we exclude parens where a statement can start
24142 # because of problems with continuation indentation, like
24145 # ($firstline =~ /^#\!.*perl/)
24146 # and (print $File::Find::name, "\n")
24149 # (ref($usage_fref) =~ /CODE/)
24151 # : (&blast_usage, &blast_params, &blast_general_params);
24157 if ( $last_nonblank_type eq ')' ) {
24159 "Syntax error? found token '$last_nonblank_type' then '('\n"
24162 $paren_structural_type[$paren_depth] = $type;
24166 ( $type_sequence, $indent_flag ) =
24167 decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
24169 if ( $paren_structural_type[$paren_depth] eq '{' ) {
24173 $container_type = $paren_type[$paren_depth];
24175 # /^(for|foreach)$/
24176 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
24177 my $num_sc = $paren_semicolon_count[$paren_depth];
24178 if ( $num_sc > 0 && $num_sc != 2 ) {
24179 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
24183 if ( $paren_depth > 0 ) { $paren_depth-- }
24186 if ( $last_nonblank_type eq ',' ) {
24187 complain("Repeated ','s \n");
24190 # patch for operator_expected: note if we are in the list (use.t)
24191 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
24192 ## FIXME: need to move this elsewhere, perhaps check after a '('
24193 ## elsif ($last_nonblank_token eq '(') {
24194 ## warning("Leading ','s illegal in some versions of perl\n");
24198 $context = UNKNOWN_CONTEXT;
24199 $statement_type = '';
24202 # /^(for|foreach)$/
24203 if ( $is_for_foreach{ $paren_type[$paren_depth] } )
24204 { # mark ; in for loop
24206 # Be careful: we do not want a semicolon such as the
24207 # following to be included:
24209 # for (sort {strcoll($a,$b);} keys %investments) {
24211 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
24212 && $square_bracket_depth ==
24213 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
24217 $paren_semicolon_count[$paren_depth]++;
24223 error_if_expecting_OPERATOR("String")
24224 if ( $expecting == OPERATOR );
24227 $allowed_quote_modifiers = "";
24230 error_if_expecting_OPERATOR("String")
24231 if ( $expecting == OPERATOR );
24234 $allowed_quote_modifiers = "";
24237 error_if_expecting_OPERATOR("String")
24238 if ( $expecting == OPERATOR );
24241 $allowed_quote_modifiers = "";
24246 if ( $expecting == UNKNOWN ) { # indeterminate, must guess..
24248 ( $is_pattern, $msg ) =
24249 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
24250 $max_token_index );
24253 write_diagnostics("DIVIDE:$msg\n");
24254 write_logfile_entry($msg);
24257 else { $is_pattern = ( $expecting == TERM ) }
24262 $allowed_quote_modifiers = '[msixpodualgc]';
24264 else { # not a pattern; check for a /= token
24266 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=
24272 #DEBUG - collecting info on what tokens follow a divide
24273 # for development of guessing algorithm
24274 #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
24275 # #write_diagnostics( "DIVIDE? $input_line\n" );
24281 # if we just saw a ')', we will label this block with
24282 # its type. We need to do this to allow sub
24283 # code_block_type to determine if this brace starts a
24284 # code block or anonymous hash. (The type of a paren
24285 # pair is the preceding token, such as 'if', 'else',
24287 $container_type = "";
24289 # ATTRS: for a '{' following an attribute list, reset
24290 # things to look like we just saw the sub name
24291 if ( $statement_type =~ /^sub/ ) {
24292 $last_nonblank_token = $statement_type;
24293 $last_nonblank_type = 'i';
24294 $statement_type = "";
24297 # patch for SWITCH/CASE: hide these keywords from an immediately
24298 # following opening brace
24299 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
24300 && $statement_type eq $last_nonblank_token )
24302 $last_nonblank_token = ";";
24305 elsif ( $last_nonblank_token eq ')' ) {
24306 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
24308 # defensive move in case of a nesting error (pbug.t)
24309 # in which this ')' had no previous '('
24310 # this nesting error will have been caught
24311 if ( !defined($last_nonblank_token) ) {
24312 $last_nonblank_token = 'if';
24315 # check for syntax error here;
24316 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
24317 my $list = join( ' ', sort keys %is_blocktype_with_paren );
24319 "syntax error at ') {', didn't see one of: $list\n");
24323 # patch for paren-less for/foreach glitch, part 2.
24324 # see note below under 'qw'
24325 elsif ($last_nonblank_token eq 'qw'
24326 && $is_for_foreach{$want_paren} )
24328 $last_nonblank_token = $want_paren;
24329 if ( $last_last_nonblank_token eq $want_paren ) {
24331 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
24338 # now identify which of the three possible types of
24339 # curly braces we have: hash index container, anonymous
24340 # hash reference, or code block.
24342 # non-structural (hash index) curly brace pair
24343 # get marked 'L' and 'R'
24344 if ( is_non_structural_brace() ) {
24347 # patch for SWITCH/CASE:
24348 # allow paren-less identifier after 'when'
24349 # if the brace is preceded by a space
24350 if ( $statement_type eq 'when'
24351 && $last_nonblank_type eq 'i'
24352 && $last_last_nonblank_type eq 'k'
24353 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
24356 $block_type = $statement_type;
24360 # code and anonymous hash have the same type, '{', but are
24361 # distinguished by 'block_type',
24362 # which will be blank for an anonymous hash
24365 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
24366 $max_token_index );
24368 # remember a preceding smartmatch operator
24370 ##if ( $last_nonblank_type eq '~~' ) {
24371 ## $block_type = $last_nonblank_type;
24374 # patch to promote bareword type to function taking block
24376 && $last_nonblank_type eq 'w'
24377 && $last_nonblank_i >= 0 )
24379 if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
24380 $routput_token_type->[$last_nonblank_i] = 'G';
24384 # patch for SWITCH/CASE: if we find a stray opening block brace
24385 # where we might accept a 'case' or 'when' block, then take it
24386 if ( $statement_type eq 'case'
24387 || $statement_type eq 'when' )
24389 if ( !$block_type || $block_type eq '}' ) {
24390 $block_type = $statement_type;
24394 $brace_type[ ++$brace_depth ] = $block_type;
24395 $brace_package[$brace_depth] = $current_package;
24396 $brace_structural_type[$brace_depth] = $type;
24397 $brace_context[$brace_depth] = $context;
24398 ( $type_sequence, $indent_flag ) =
24399 increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
24402 $block_type = $brace_type[$brace_depth];
24403 if ($block_type) { $statement_type = '' }
24404 if ( defined( $brace_package[$brace_depth] ) ) {
24405 $current_package = $brace_package[$brace_depth];
24408 # can happen on brace error (caught elsewhere)
24411 ( $type_sequence, $indent_flag ) =
24412 decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
24414 if ( $brace_structural_type[$brace_depth] eq 'L' ) {
24418 # propagate type information for 'do' and 'eval' blocks, and also
24419 # for smartmatch operator. This is necessary to enable us to know
24420 # if an operator or term is expected next.
24422 ##if ( $is_block_operator{$block_type} || $block_type eq '~~' ) {
24423 if ( $is_block_operator{$block_type} ) {
24424 $tok = $block_type;
24427 $context = $brace_context[$brace_depth];
24428 if ( $brace_depth > 0 ) { $brace_depth--; }
24430 '&' => sub { # maybe sub call? start looking
24432 # We have to check for sub call unless we are sure we
24433 # are expecting an operator. This example from s2p
24434 # got mistaken as a q operator in an early version:
24435 # print BODY &q(<<'EOT');
24436 if ( $expecting != OPERATOR ) {
24438 # But only look for a sub call if we are expecting a term or
24439 # if there is no existing space after the &.
24440 # For example we probably don't want & as sub call here:
24441 # Fcntl::S_IRUSR & $mode;
24442 if ( $expecting == TERM || $next_type ne 'b' ) {
24449 '<' => sub { # angle operator or less than?
24451 if ( $expecting != OPERATOR ) {
24453 find_angle_operator_termination( $input_line, $i, $rtoken_map,
24454 $expecting, $max_token_index );
24456 if ( $type eq '<' && $expecting == TERM ) {
24457 error_if_expecting_TERM();
24458 interrupt_logfile();
24459 warning("Unterminated <> operator?\n");
24466 '?' => sub { # ?: conditional or starting pattern?
24470 if ( $expecting == UNKNOWN ) {
24473 ( $is_pattern, $msg ) =
24474 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
24475 $max_token_index );
24477 if ($msg) { write_logfile_entry($msg) }
24479 else { $is_pattern = ( $expecting == TERM ) }
24484 $allowed_quote_modifiers = '[msixpodualgc]';
24487 ( $type_sequence, $indent_flag ) =
24488 increase_nesting_depth( QUESTION_COLON,
24489 $$rtoken_map[$i_tok] );
24492 '*' => sub { # typeglob, or multiply?
24494 if ( $expecting == TERM ) {
24499 if ( $$rtokens[ $i + 1 ] eq '=' ) {
24504 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
24508 if ( $$rtokens[ $i + 1 ] eq '=' ) {
24516 '.' => sub { # what kind of . ?
24518 if ( $expecting != OPERATOR ) {
24520 if ( $type eq '.' ) {
24521 error_if_expecting_TERM()
24522 if ( $expecting == TERM );
24530 # if this is the first nonblank character, call it a label
24531 # since perl seems to just swallow it
24532 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
24536 # ATTRS: check for a ':' which introduces an attribute list
24537 # (this might eventually get its own token type)
24538 elsif ( $statement_type =~ /^sub/ ) {
24540 $in_attribute_list = 1;
24543 # check for scalar attribute, such as
24544 # my $foo : shared = 1;
24545 elsif ($is_my_our{$statement_type}
24546 && $current_depth[QUESTION_COLON] == 0 )
24549 $in_attribute_list = 1;
24552 # otherwise, it should be part of a ?/: operator
24554 ( $type_sequence, $indent_flag ) =
24555 decrease_nesting_depth( QUESTION_COLON,
24556 $$rtoken_map[$i_tok] );
24557 if ( $last_nonblank_token eq '?' ) {
24558 warning("Syntax error near ? :\n");
24562 '+' => sub { # what kind of plus?
24564 if ( $expecting == TERM ) {
24565 my $number = scan_number();
24567 # unary plus is safest assumption if not a number
24568 if ( !defined($number) ) { $type = 'p'; }
24570 elsif ( $expecting == OPERATOR ) {
24573 if ( $next_type eq 'w' ) { $type = 'p' }
24578 error_if_expecting_OPERATOR("Array")
24579 if ( $expecting == OPERATOR );
24582 '%' => sub { # hash or modulo?
24584 # first guess is hash if no following blank
24585 if ( $expecting == UNKNOWN ) {
24586 if ( $next_type ne 'b' ) { $expecting = TERM }
24588 if ( $expecting == TERM ) {
24593 $square_bracket_type[ ++$square_bracket_depth ] =
24594 $last_nonblank_token;
24595 ( $type_sequence, $indent_flag ) =
24596 increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
24598 # It may seem odd, but structural square brackets have
24599 # type '{' and '}'. This simplifies the indentation logic.
24600 if ( !is_non_structural_brace() ) {
24603 $square_bracket_structural_type[$square_bracket_depth] = $type;
24606 ( $type_sequence, $indent_flag ) =
24607 decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
24609 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
24614 # propagate type information for smartmatch operator. This is
24615 # necessary to enable us to know if an operator or term is expected
24617 if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
24618 $tok = $square_bracket_type[$square_bracket_depth];
24621 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
24623 '-' => sub { # what kind of minus?
24625 if ( ( $expecting != OPERATOR )
24626 && $is_file_test_operator{$next_tok} )
24628 my ( $next_nonblank_token, $i_next ) =
24629 find_next_nonblank_token( $i + 1, $rtokens,
24630 $max_token_index );
24632 # check for a quoted word like "-w=>xx";
24633 # it is sufficient to just check for a following '='
24634 if ( $next_nonblank_token eq '=' ) {
24643 elsif ( $expecting == TERM ) {
24644 my $number = scan_number();
24646 # maybe part of bareword token? unary is safest
24647 if ( !defined($number) ) { $type = 'm'; }
24650 elsif ( $expecting == OPERATOR ) {
24654 if ( $next_type eq 'w' ) {
24662 # check for special variables like ${^WARNING_BITS}
24663 if ( $expecting == TERM ) {
24665 # FIXME: this should work but will not catch errors
24666 # because we also have to be sure that previous token is
24667 # a type character ($,@,%).
24668 if ( $last_nonblank_token eq '{'
24669 && ( $next_tok =~ /^[A-Za-z_]/ ) )
24672 if ( $next_tok eq 'W' ) {
24673 $tokenizer_self->{_saw_perl_dash_w} = 1;
24675 $tok = $tok . $next_tok;
24681 unless ( error_if_expecting_TERM() ) {
24683 # Something like this is valid but strange:
24685 complain("The '^' seems unusual here\n");
24691 '::' => sub { # probably a sub call
24692 scan_bare_identifier();
24694 '<<' => sub { # maybe a here-doc?
24696 unless ( $i < $max_token_index )
24697 ; # here-doc not possible if end of line
24699 if ( $expecting != OPERATOR ) {
24700 my ( $found_target, $here_doc_target, $here_quote_character,
24703 $found_target, $here_doc_target, $here_quote_character, $i,
24706 = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
24707 $max_token_index );
24709 if ($found_target) {
24710 push @{$rhere_target_list},
24711 [ $here_doc_target, $here_quote_character ];
24713 if ( length($here_doc_target) > 80 ) {
24714 my $truncated = substr( $here_doc_target, 0, 80 );
24715 complain("Long here-target: '$truncated' ...\n");
24717 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
24719 "Unconventional here-target: '$here_doc_target'\n"
24723 elsif ( $expecting == TERM ) {
24724 unless ($saw_error) {
24726 # shouldn't happen..
24727 warning("Program bug; didn't find here doc target\n");
24728 report_definite_bug();
24737 # if -> points to a bare word, we must scan for an identifier,
24738 # otherwise something like ->y would look like the y operator
24742 # type = 'pp' for pre-increment, '++' for post-increment
24744 if ( $expecting == TERM ) { $type = 'pp' }
24745 elsif ( $expecting == UNKNOWN ) {
24746 my ( $next_nonblank_token, $i_next ) =
24747 find_next_nonblank_token( $i, $rtokens, $max_token_index );
24748 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
24753 if ( $last_nonblank_type eq $tok ) {
24754 complain("Repeated '=>'s \n");
24757 # patch for operator_expected: note if we are in the list (use.t)
24758 # TODO: make version numbers a new token type
24759 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
24762 # type = 'mm' for pre-decrement, '--' for post-decrement
24765 if ( $expecting == TERM ) { $type = 'mm' }
24766 elsif ( $expecting == UNKNOWN ) {
24767 my ( $next_nonblank_token, $i_next ) =
24768 find_next_nonblank_token( $i, $rtokens, $max_token_index );
24769 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
24774 error_if_expecting_TERM()
24775 if ( $expecting == TERM );
24779 error_if_expecting_TERM()
24780 if ( $expecting == TERM );
24784 error_if_expecting_TERM()
24785 if ( $expecting == TERM );
24789 # ------------------------------------------------------------
24790 # end hash of code for handling individual token types
24791 # ------------------------------------------------------------
24793 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
24795 # These block types terminate statements and do not need a trailing
24797 # patched for SWITCH/CASE/
24798 my %is_zero_continuation_block_type;
24799 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
24800 if elsif else unless while until for foreach switch case given when);
24801 @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
24803 my %is_not_zero_continuation_block_type;
24804 @_ = qw(sort grep map do eval);
24805 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
24807 my %is_logical_container;
24808 @_ = qw(if elsif unless while and or err not && ! || for foreach);
24809 @is_logical_container{@_} = (1) x scalar(@_);
24811 my %is_binary_type;
24813 @is_binary_type{@_} = (1) x scalar(@_);
24815 my %is_binary_keyword;
24816 @_ = qw(and or err eq ne cmp);
24817 @is_binary_keyword{@_} = (1) x scalar(@_);
24819 # 'L' is token for opening { at hash key
24820 my %is_opening_type;
24821 @_ = qw" L { ( [ ";
24822 @is_opening_type{@_} = (1) x scalar(@_);
24824 # 'R' is token for closing } at hash key
24825 my %is_closing_type;
24826 @_ = qw" R } ) ] ";
24827 @is_closing_type{@_} = (1) x scalar(@_);
24829 my %is_redo_last_next_goto;
24830 @_ = qw(redo last next goto);
24831 @is_redo_last_next_goto{@_} = (1) x scalar(@_);
24833 my %is_use_require;
24834 @_ = qw(use require);
24835 @is_use_require{@_} = (1) x scalar(@_);
24837 my %is_sub_package;
24838 @_ = qw(sub package);
24839 @is_sub_package{@_} = (1) x scalar(@_);
24841 # This hash holds the hash key in $tokenizer_self for these keywords:
24842 my %is_format_END_DATA = (
24843 'format' => '_in_format',
24844 '__END__' => '_in_end',
24845 '__DATA__' => '_in_data',
24848 # ref: camel 3 p 147,
24849 # but perl may accept undocumented flags
24850 # perl 5.10 adds 'p' (preserve)
24851 # Perl version 5.16, http://perldoc.perl.org/perlop.html, has these:
24852 # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc
24853 # s/PATTERN/REPLACEMENT/msixpodualgcer
24854 # y/SEARCHLIST/REPLACEMENTLIST/cdsr
24855 # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
24856 # qr/STRING/msixpodual
24857 my %quote_modifiers = (
24858 's' => '[msixpodualgcer]',
24861 'm' => '[msixpodualgc]',
24862 'qr' => '[msixpodual]',
24869 # table showing how many quoted things to look for after quote operator..
24870 # s, y, tr have 2 (pattern and replacement)
24871 # others have 1 (pattern only)
24872 my %quote_items = (
24884 sub tokenize_this_line {
24886 # This routine breaks a line of perl code into tokens which are of use in
24887 # indentation and reformatting. One of my goals has been to define tokens
24888 # such that a newline may be inserted between any pair of tokens without
24889 # changing or invalidating the program. This version comes close to this,
24890 # although there are necessarily a few exceptions which must be caught by
24891 # the formatter. Many of these involve the treatment of bare words.
24893 # The tokens and their types are returned in arrays. See previous
24894 # routine for their names.
24896 # See also the array "valid_token_types" in the BEGIN section for an
24899 # To simplify things, token types are either a single character, or they
24900 # are identical to the tokens themselves.
24902 # As a debugging aid, the -D flag creates a file containing a side-by-side
24903 # comparison of the input string and its tokenization for each line of a file.
24904 # This is an invaluable debugging aid.
24906 # In addition to tokens, and some associated quantities, the tokenizer
24907 # also returns flags indication any special line types. These include
24908 # quotes, here_docs, formats.
24910 # -----------------------------------------------------------------------
24912 # How to add NEW_TOKENS:
24914 # New token types will undoubtedly be needed in the future both to keep up
24915 # with changes in perl and to help adapt the tokenizer to other applications.
24917 # Here are some notes on the minimal steps. I wrote these notes while
24918 # adding the 'v' token type for v-strings, which are things like version
24919 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You
24920 # can use your editor to search for the string "NEW_TOKENS" to find the
24921 # appropriate sections to change):
24923 # *. Try to talk somebody else into doing it! If not, ..
24925 # *. Make a backup of your current version in case things don't work out!
24927 # *. Think of a new, unused character for the token type, and add to
24928 # the array @valid_token_types in the BEGIN section of this package.
24929 # For example, I used 'v' for v-strings.
24931 # *. Implement coding to recognize the $type of the token in this routine.
24932 # This is the hardest part, and is best done by imitating or modifying
24933 # some of the existing coding. For example, to recognize v-strings, I
24934 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
24935 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
24937 # *. Update sub operator_expected. This update is critically important but
24938 # the coding is trivial. Look at the comments in that routine for help.
24939 # For v-strings, which should behave like numbers, I just added 'v' to the
24940 # regex used to handle numbers and strings (types 'n' and 'Q').
24942 # *. Implement a 'bond strength' rule in sub set_bond_strengths in
24943 # Perl::Tidy::Formatter for breaking lines around this token type. You can
24944 # skip this step and take the default at first, then adjust later to get
24945 # desired results. For adding type 'v', I looked at sub bond_strength and
24946 # saw that number type 'n' was using default strengths, so I didn't do
24947 # anything. I may tune it up someday if I don't like the way line
24948 # breaks with v-strings look.
24950 # *. Implement a 'whitespace' rule in sub set_white_space_flag in
24951 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
24952 # and saw that type 'n' used spaces on both sides, so I just added 'v'
24953 # to the array @spaces_both_sides.
24955 # *. Update HtmlWriter package so that users can colorize the token as
24956 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
24957 # that package. For v-strings, I initially chose to use a default color
24958 # equal to the default for numbers, but it might be nice to change that
24961 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
24963 # *. Run lots and lots of debug tests. Start with special files designed
24964 # to test the new token type. Run with the -D flag to create a .DEBUG
24965 # file which shows the tokenization. When these work ok, test as many old
24966 # scripts as possible. Start with all of the '.t' files in the 'test'
24967 # directory of the distribution file. Compare .tdy output with previous
24968 # version and updated version to see the differences. Then include as
24969 # many more files as possible. My own technique has been to collect a huge
24970 # number of perl scripts (thousands!) into one directory and run perltidy
24971 # *, then run diff between the output of the previous version and the
24974 # *. For another example, search for the smartmatch operator '~~'
24975 # with your editor to see where updates were made for it.
24977 # -----------------------------------------------------------------------
24979 my $line_of_tokens = shift;
24980 my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
24982 # patch while coding change is underway
24983 # make callers private data to allow access
24984 # $tokenizer_self = $caller_tokenizer_self;
24986 # extract line number for use in error messages
24987 $input_line_number = $line_of_tokens->{_line_number};
24989 # reinitialize for multi-line quote
24990 $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
24992 # check for pod documentation
24993 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
24995 # must not be in multi-line quote
24996 # and must not be in an equation
24997 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
24999 $tokenizer_self->{_in_pod} = 1;
25004 $input_line = $untrimmed_input_line;
25008 # trim start of this line unless we are continuing a quoted line
25009 # do not trim end because we might end in a quote (test: deken4.pl)
25010 # Perl::Tidy::Formatter will delete needless trailing blanks
25011 unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
25012 $input_line =~ s/^\s*//; # trim left end
25015 # update the copy of the line for use in error messages
25016 # This must be exactly what we give the pre_tokenizer
25017 $tokenizer_self->{_line_text} = $input_line;
25019 # re-initialize for the main loop
25020 $routput_token_list = []; # stack of output token indexes
25021 $routput_token_type = []; # token types
25022 $routput_block_type = []; # types of code block
25023 $routput_container_type = []; # paren types, such as if, elsif, ..
25024 $routput_type_sequence = []; # nesting sequential number
25026 $rhere_target_list = [];
25028 $tok = $last_nonblank_token;
25029 $type = $last_nonblank_type;
25030 $prototype = $last_nonblank_prototype;
25031 $last_nonblank_i = -1;
25032 $block_type = $last_nonblank_block_type;
25033 $container_type = $last_nonblank_container_type;
25034 $type_sequence = $last_nonblank_type_sequence;
25038 # tokenization is done in two stages..
25039 # stage 1 is a very simple pre-tokenization
25040 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
25042 # a little optimization for a full-line comment
25043 if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
25044 $max_tokens_wanted = 1 # no use tokenizing a comment
25047 # start by breaking the line into pre-tokens
25048 ( $rtokens, $rtoken_map, $rtoken_type ) =
25049 pre_tokenize( $input_line, $max_tokens_wanted );
25051 $max_token_index = scalar(@$rtokens) - 1;
25052 push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
25053 push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced
25054 push( @$rtoken_type, 'b', 'b', 'b' );
25056 # initialize for main loop
25057 for $i ( 0 .. $max_token_index + 3 ) {
25058 $routput_token_type->[$i] = "";
25059 $routput_block_type->[$i] = "";
25060 $routput_container_type->[$i] = "";
25061 $routput_type_sequence->[$i] = "";
25062 $routput_indent_flag->[$i] = 0;
25067 # ------------------------------------------------------------
25068 # begin main tokenization loop
25069 # ------------------------------------------------------------
25071 # we are looking at each pre-token of one line and combining them
25073 while ( ++$i <= $max_token_index ) {
25075 if ($in_quote) { # continue looking for end of a quote
25076 $type = $quote_type;
25078 unless ( @{$routput_token_list} )
25079 { # initialize if continuation line
25080 push( @{$routput_token_list}, $i );
25081 $routput_token_type->[$i] = $type;
25084 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
25086 # scan for the end of the quote or pattern
25088 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25089 $quoted_string_1, $quoted_string_2
25092 $i, $in_quote, $quote_character,
25093 $quote_pos, $quote_depth, $quoted_string_1,
25094 $quoted_string_2, $rtokens, $rtoken_map,
25098 # all done if we didn't find it
25099 last if ($in_quote);
25101 # save pattern and replacement text for rescanning
25102 my $qs1 = $quoted_string_1;
25103 my $qs2 = $quoted_string_2;
25105 # re-initialize for next search
25106 $quote_character = '';
25109 $quoted_string_1 = "";
25110 $quoted_string_2 = "";
25111 last if ( ++$i > $max_token_index );
25113 # look for any modifiers
25114 if ($allowed_quote_modifiers) {
25116 # check for exact quote modifiers
25117 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
25118 my $str = $$rtokens[$i];
25119 my $saw_modifier_e;
25120 while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
25121 my $pos = pos($str);
25122 my $char = substr( $str, $pos - 1, 1 );
25123 $saw_modifier_e ||= ( $char eq 'e' );
25126 # For an 'e' quote modifier we must scan the replacement
25127 # text for here-doc targets.
25128 if ($saw_modifier_e) {
25130 my $rht = scan_replacement_text($qs1);
25132 # Change type from 'Q' to 'h' for quotes with
25133 # here-doc targets so that the formatter (see sub
25134 # print_line_of_tokens) will not make any line
25135 # breaks after this point.
25137 push @{$rhere_target_list}, @{$rht};
25139 if ( $i_tok < 0 ) {
25140 my $ilast = $routput_token_list->[-1];
25141 $routput_token_type->[$ilast] = $type;
25146 if ( defined( pos($str) ) ) {
25149 if ( pos($str) == length($str) ) {
25150 last if ( ++$i > $max_token_index );
25153 # Looks like a joined quote modifier
25154 # and keyword, maybe something like
25155 # s/xxx/yyy/gefor @k=...
25156 # Example is "galgen.pl". Would have to split
25157 # the word and insert a new token in the
25158 # pre-token list. This is so rare that I haven't
25159 # done it. Will just issue a warning citation.
25161 # This error might also be triggered if my quote
25162 # modifier characters are incomplete
25166 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
25167 Please put a space between quote modifiers and trailing keywords.
25170 # print "token $$rtokens[$i]\n";
25171 # my $num = length($str) - pos($str);
25172 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
25173 # print "continuing with new token $$rtokens[$i]\n";
25175 # skipping past this token does least damage
25176 last if ( ++$i > $max_token_index );
25181 # example file: rokicki4.pl
25182 # This error might also be triggered if my quote
25183 # modifier characters are incomplete
25184 write_logfile_entry(
25185 "Note: found word $str at quote modifier location\n"
25191 $allowed_quote_modifiers = "";
25195 unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) {
25197 # try to catch some common errors
25198 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
25200 if ( $last_nonblank_token eq 'eq' ) {
25201 complain("Should 'eq' be '==' here ?\n");
25203 elsif ( $last_nonblank_token eq 'ne' ) {
25204 complain("Should 'ne' be '!=' here ?\n");
25208 $last_last_nonblank_token = $last_nonblank_token;
25209 $last_last_nonblank_type = $last_nonblank_type;
25210 $last_last_nonblank_block_type = $last_nonblank_block_type;
25211 $last_last_nonblank_container_type =
25212 $last_nonblank_container_type;
25213 $last_last_nonblank_type_sequence =
25214 $last_nonblank_type_sequence;
25215 $last_nonblank_token = $tok;
25216 $last_nonblank_type = $type;
25217 $last_nonblank_prototype = $prototype;
25218 $last_nonblank_block_type = $block_type;
25219 $last_nonblank_container_type = $container_type;
25220 $last_nonblank_type_sequence = $type_sequence;
25221 $last_nonblank_i = $i_tok;
25224 # store previous token type
25225 if ( $i_tok >= 0 ) {
25226 $routput_token_type->[$i_tok] = $type;
25227 $routput_block_type->[$i_tok] = $block_type;
25228 $routput_container_type->[$i_tok] = $container_type;
25229 $routput_type_sequence->[$i_tok] = $type_sequence;
25230 $routput_indent_flag->[$i_tok] = $indent_flag;
25232 my $pre_tok = $$rtokens[$i]; # get the next pre-token
25233 my $pre_type = $$rtoken_type[$i]; # and type
25235 $type = $pre_type; # to be modified as necessary
25236 $block_type = ""; # blank for all tokens except code block braces
25237 $container_type = ""; # blank for all tokens except some parens
25238 $type_sequence = ""; # blank for all tokens except ?/:
25240 $prototype = ""; # blank for all tokens except user defined subs
25243 # this pre-token will start an output token
25244 push( @{$routput_token_list}, $i_tok );
25246 # continue gathering identifier if necessary
25247 # but do not start on blanks and comments
25248 if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
25250 if ( $id_scan_state =~ /^(sub|package)/ ) {
25257 last if ($id_scan_state);
25258 next if ( ( $i > 0 ) || $type );
25260 # didn't find any token; start over
25265 # handle whitespace tokens..
25266 next if ( $type eq 'b' );
25267 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
25268 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
25270 # Build larger tokens where possible, since we are not in a quote.
25272 # First try to assemble digraphs. The following tokens are
25273 # excluded and handled specially:
25274 # '/=' is excluded because the / might start a pattern.
25275 # 'x=' is excluded since it might be $x=, with $ on previous line
25276 # '**' and *= might be typeglobs of punctuation variables
25277 # I have allowed tokens starting with <, such as <=,
25278 # because I don't think these could be valid angle operators.
25279 # test file: storrs4.pl
25280 my $test_tok = $tok . $$rtokens[ $i + 1 ];
25281 my $combine_ok = $is_digraph{$test_tok};
25283 # check for special cases which cannot be combined
25286 # '//' must be defined_or operator if an operator is expected.
25287 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
25288 # could be migrated here for clarity
25289 if ( $test_tok eq '//' ) {
25290 my $next_type = $$rtokens[ $i + 1 ];
25292 operator_expected( $prev_type, $tok, $next_type );
25293 $combine_ok = 0 unless ( $expecting == OPERATOR );
25299 && ( $test_tok ne '/=' ) # might be pattern
25300 && ( $test_tok ne 'x=' ) # might be $x
25301 && ( $test_tok ne '**' ) # typeglob?
25302 && ( $test_tok ne '*=' ) # typeglob?
25308 # Now try to assemble trigraphs. Note that all possible
25309 # perl trigraphs can be constructed by appending a character
25311 $test_tok = $tok . $$rtokens[ $i + 1 ];
25313 if ( $is_trigraph{$test_tok} ) {
25320 $next_tok = $$rtokens[ $i + 1 ];
25321 $next_type = $$rtoken_type[ $i + 1 ];
25323 TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
25326 $last_nonblank_token, $tok,
25327 $next_tok, $brace_depth,
25328 $brace_type[$brace_depth], $paren_depth,
25329 $paren_type[$paren_depth]
25331 print STDOUT "TOKENIZE:(@debug_list)\n";
25334 # turn off attribute list on first non-blank, non-bareword
25335 if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
25337 ###############################################################
25338 # We have the next token, $tok.
25339 # Now we have to examine this token and decide what it is
25340 # and define its $type
25342 # section 1: bare words
25343 ###############################################################
25345 if ( $pre_type eq 'w' ) {
25346 $expecting = operator_expected( $prev_type, $tok, $next_type );
25347 my ( $next_nonblank_token, $i_next ) =
25348 find_next_nonblank_token( $i, $rtokens, $max_token_index );
25350 # ATTRS: handle sub and variable attributes
25351 if ($in_attribute_list) {
25353 # treat bare word followed by open paren like qw(
25354 if ( $next_nonblank_token eq '(' ) {
25355 $in_quote = $quote_items{'q'};
25356 $allowed_quote_modifiers = $quote_modifiers{'q'};
25362 # handle bareword not followed by open paren
25369 # quote a word followed by => operator
25370 if ( $next_nonblank_token eq '=' ) {
25372 if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
25373 if ( $is_constant{$current_package}{$tok} ) {
25376 elsif ( $is_user_function{$current_package}{$tok} ) {
25379 $user_function_prototype{$current_package}{$tok};
25381 elsif ( $tok =~ /^v\d+$/ ) {
25383 report_v_string($tok);
25385 else { $type = 'w' }
25391 # quote a bare word within braces..like xxx->{s}; note that we
25392 # must be sure this is not a structural brace, to avoid
25393 # mistaking {s} in the following for a quoted bare word:
25394 # for(@[){s}bla}BLA}
25395 # Also treat q in something like var{-q} as a bare word, not qoute operator
25397 $next_nonblank_token eq '}'
25399 $last_nonblank_type eq 'L'
25400 || ( $last_nonblank_type eq 'm'
25401 && $last_last_nonblank_type eq 'L' )
25409 # a bare word immediately followed by :: is not a keyword;
25410 # use $tok_kw when testing for keywords to avoid a mistake
25412 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
25417 # handle operator x (now we know it isn't $x=)
25418 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
25419 if ( $tok eq 'x' ) {
25421 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=
25431 # FIXME: Patch: mark something like x4 as an integer for now
25432 # It gets fixed downstream. This is easier than
25433 # splitting the pretoken.
25438 elsif ( $tok_kw eq 'CORE::' ) {
25439 $type = $tok = $tok_kw;
25442 elsif ( ( $tok eq 'strict' )
25443 and ( $last_nonblank_token eq 'use' ) )
25445 $tokenizer_self->{_saw_use_strict} = 1;
25446 scan_bare_identifier();
25449 elsif ( ( $tok eq 'warnings' )
25450 and ( $last_nonblank_token eq 'use' ) )
25452 $tokenizer_self->{_saw_perl_dash_w} = 1;
25454 # scan as identifier, so that we pick up something like:
25455 # use warnings::register
25456 scan_bare_identifier();
25460 $tok eq 'AutoLoader'
25461 && $tokenizer_self->{_look_for_autoloader}
25463 $last_nonblank_token eq 'use'
25465 # these regexes are from AutoSplit.pm, which we want
25467 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
25468 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
25472 write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
25473 $tokenizer_self->{_saw_autoloader} = 1;
25474 $tokenizer_self->{_look_for_autoloader} = 0;
25475 scan_bare_identifier();
25479 $tok eq 'SelfLoader'
25480 && $tokenizer_self->{_look_for_selfloader}
25481 && ( $last_nonblank_token eq 'use'
25482 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
25483 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
25486 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
25487 $tokenizer_self->{_saw_selfloader} = 1;
25488 $tokenizer_self->{_look_for_selfloader} = 0;
25489 scan_bare_identifier();
25492 elsif ( ( $tok eq 'constant' )
25493 and ( $last_nonblank_token eq 'use' ) )
25495 scan_bare_identifier();
25496 my ( $next_nonblank_token, $i_next ) =
25497 find_next_nonblank_token( $i, $rtokens,
25498 $max_token_index );
25500 if ($next_nonblank_token) {
25502 if ( $is_keyword{$next_nonblank_token} ) {
25504 # Assume qw is used as a quote and okay, as in:
25505 # use constant qw{ DEBUG 0 };
25506 # Not worth trying to parse for just a warning
25508 # NOTE: This warning is deactivated because recent
25509 # versions of perl do not complain here, but
25510 # the coding is retained for reference.
25511 if ( 0 && $next_nonblank_token ne 'qw' ) {
25513 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
25518 # FIXME: could check for error in which next token is
25519 # not a word (number, punctuation, ..)
25521 $is_constant{$current_package}{$next_nonblank_token}
25527 # various quote operators
25528 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
25530 if ( $expecting == OPERATOR ) {
25532 # Be careful not to call an error for a qw quote
25533 # where a parenthesized list is allowed. For example,
25534 # it could also be a for/foreach construct such as
25536 # foreach my $key qw\Uno Due Tres Quadro\ {
25537 # print "Set $key\n";
25541 # Or it could be a function call.
25542 # NOTE: Braces in something like &{ xxx } are not
25543 # marked as a block, we might have a method call.
25544 # &method(...), $method->(..), &{method}(...),
25545 # $ref[2](list) is ok & short for $ref[2]->(list)
25547 # See notes in 'sub code_block_type' and
25548 # 'sub is_non_structural_brace'
25552 && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
25553 || $is_for_foreach{$want_paren} )
25556 error_if_expecting_OPERATOR();
25559 $in_quote = $quote_items{$tok};
25560 $allowed_quote_modifiers = $quote_modifiers{$tok};
25562 # All quote types are 'Q' except possibly qw quotes.
25563 # qw quotes are special in that they may generally be trimmed
25564 # of leading and trailing whitespace. So they are given a
25565 # separate type, 'q', unless requested otherwise.
25567 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
25570 $quote_type = $type;
25573 # check for a statement label
25575 ( $next_nonblank_token eq ':' )
25576 && ( $$rtokens[ $i_next + 1 ] ne ':' )
25577 && ( $i_next <= $max_token_index ) # colon on same line
25581 if ( $tok !~ /[A-Z]/ ) {
25582 push @{ $tokenizer_self->{_rlower_case_labels_at} },
25583 $input_line_number;
25591 # 'sub' || 'package'
25592 elsif ( $is_sub_package{$tok_kw} ) {
25593 error_if_expecting_OPERATOR()
25594 if ( $expecting == OPERATOR );
25598 # Note on token types for format, __DATA__, __END__:
25599 # It simplifies things to give these type ';', so that when we
25600 # start rescanning we will be expecting a token of type TERM.
25601 # We will switch to type 'k' before outputting the tokens.
25602 elsif ( $is_format_END_DATA{$tok_kw} ) {
25603 $type = ';'; # make tokenizer look for TERM next
25604 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
25608 elsif ( $is_keyword{$tok_kw} ) {
25611 # Since for and foreach may not be followed immediately
25612 # by an opening paren, we have to remember which keyword
25613 # is associated with the next '('
25614 if ( $is_for_foreach{$tok} ) {
25615 if ( new_statement_ok() ) {
25616 $want_paren = $tok;
25620 # recognize 'use' statements, which are special
25621 elsif ( $is_use_require{$tok} ) {
25622 $statement_type = $tok;
25623 error_if_expecting_OPERATOR()
25624 if ( $expecting == OPERATOR );
25627 # remember my and our to check for trailing ": shared"
25628 elsif ( $is_my_our{$tok} ) {
25629 $statement_type = $tok;
25632 # Check for misplaced 'elsif' and 'else', but allow isolated
25633 # else or elsif blocks to be formatted. This is indicated
25634 # by a last noblank token of ';'
25635 elsif ( $tok eq 'elsif' ) {
25636 if ( $last_nonblank_token ne ';'
25637 && $last_nonblank_block_type !~
25638 /^(if|elsif|unless)$/ )
25641 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
25645 elsif ( $tok eq 'else' ) {
25647 # patched for SWITCH/CASE
25648 if ( $last_nonblank_token ne ';'
25649 && $last_nonblank_block_type !~
25650 /^(if|elsif|unless|case|when)$/ )
25653 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
25657 elsif ( $tok eq 'continue' ) {
25658 if ( $last_nonblank_token ne ';'
25659 && $last_nonblank_block_type !~
25660 /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
25663 # note: ';' '{' and '}' in list above
25664 # because continues can follow bare blocks;
25665 # ':' is labeled block
25667 ############################################
25668 # NOTE: This check has been deactivated because
25669 # continue has an alternative usage for given/when
25670 # blocks in perl 5.10
25671 ## warning("'$tok' should follow a block\n");
25672 ############################################
25676 # patch for SWITCH/CASE if 'case' and 'when are
25677 # treated as keywords.
25678 elsif ( $tok eq 'when' || $tok eq 'case' ) {
25679 $statement_type = $tok; # next '{' is block
25683 # indent trailing if/unless/while/until
25684 # outdenting will be handled by later indentation loop
25685 ## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
25697 ## if ( $tok =~ /^(if|unless|while|until)$/
25698 ## && $next_nonblank_token ne '(' )
25700 ## $indent_flag = 1;
25704 # check for inline label following
25705 # /^(redo|last|next|goto)$/
25706 elsif (( $last_nonblank_type eq 'k' )
25707 && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
25713 # something else --
25716 scan_bare_identifier();
25717 if ( $type eq 'w' ) {
25719 if ( $expecting == OPERATOR ) {
25721 # don't complain about possible indirect object
25725 # sub new($) { ... }
25726 # $b = new A::; # calls A::new
25727 # $c = new A; # same thing but suspicious
25728 # This will call A::new but we have a 'new' in
25729 # main:: which looks like a constant.
25731 if ( $last_nonblank_type eq 'C' ) {
25732 if ( $tok !~ /::$/ ) {
25734 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
25735 Maybe indirectet object notation?
25740 error_if_expecting_OPERATOR("bareword");
25744 # mark bare words immediately followed by a paren as
25746 $next_tok = $$rtokens[ $i + 1 ];
25747 if ( $next_tok eq '(' ) {
25751 # underscore after file test operator is file handle
25752 if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
25756 # patch for SWITCH/CASE if 'case' and 'when are
25757 # not treated as keywords:
25761 && $brace_type[$brace_depth] eq 'switch'
25763 || ( $tok eq 'when'
25764 && $brace_type[$brace_depth] eq 'given' )
25767 $statement_type = $tok; # next '{' is block
25768 $type = 'k'; # for keyword syntax coloring
25771 # patch for SWITCH/CASE if switch and given not keywords
25772 # Switch is not a perl 5 keyword, but we will gamble
25773 # and mark switch followed by paren as a keyword. This
25774 # is only necessary to get html syntax coloring nice,
25775 # and does not commit this as being a switch/case.
25776 if ( $next_nonblank_token eq '('
25777 && ( $tok eq 'switch' || $tok eq 'given' ) )
25779 $type = 'k'; # for keyword syntax coloring
25785 ###############################################################
25786 # section 2: strings of digits
25787 ###############################################################
25788 elsif ( $pre_type eq 'd' ) {
25789 $expecting = operator_expected( $prev_type, $tok, $next_type );
25790 error_if_expecting_OPERATOR("Number")
25791 if ( $expecting == OPERATOR );
25792 my $number = scan_number();
25793 if ( !defined($number) ) {
25795 # shouldn't happen - we should always get a number
25796 warning("non-number beginning with digit--program bug\n");
25797 report_definite_bug();
25801 ###############################################################
25802 # section 3: all other tokens
25803 ###############################################################
25806 last if ( $tok eq '#' );
25807 my $code = $tokenization_code->{$tok};
25810 operator_expected( $prev_type, $tok, $next_type );
25817 # -----------------------------
25818 # end of main tokenization loop
25819 # -----------------------------
25821 if ( $i_tok >= 0 ) {
25822 $routput_token_type->[$i_tok] = $type;
25823 $routput_block_type->[$i_tok] = $block_type;
25824 $routput_container_type->[$i_tok] = $container_type;
25825 $routput_type_sequence->[$i_tok] = $type_sequence;
25826 $routput_indent_flag->[$i_tok] = $indent_flag;
25829 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
25830 $last_last_nonblank_token = $last_nonblank_token;
25831 $last_last_nonblank_type = $last_nonblank_type;
25832 $last_last_nonblank_block_type = $last_nonblank_block_type;
25833 $last_last_nonblank_container_type = $last_nonblank_container_type;
25834 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
25835 $last_nonblank_token = $tok;
25836 $last_nonblank_type = $type;
25837 $last_nonblank_block_type = $block_type;
25838 $last_nonblank_container_type = $container_type;
25839 $last_nonblank_type_sequence = $type_sequence;
25840 $last_nonblank_prototype = $prototype;
25843 # reset indentation level if necessary at a sub or package
25844 # in an attempt to recover from a nesting error
25845 if ( $level_in_tokenizer < 0 ) {
25846 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
25847 reset_indentation_level(0);
25848 brace_warning("resetting level to 0 at $1 $2\n");
25852 # all done tokenizing this line ...
25853 # now prepare the final list of tokens and types
25855 my @token_type = (); # stack of output token types
25856 my @block_type = (); # stack of output code block types
25857 my @container_type = (); # stack of output code container types
25858 my @type_sequence = (); # stack of output type sequence numbers
25859 my @tokens = (); # output tokens
25860 my @levels = (); # structural brace levels of output tokens
25861 my @slevels = (); # secondary nesting levels of output tokens
25862 my @nesting_tokens = (); # string of tokens leading to this depth
25863 my @nesting_types = (); # string of token types leading to this depth
25864 my @nesting_blocks = (); # string of block types leading to this depth
25865 my @nesting_lists = (); # string of list types leading to this depth
25866 my @ci_string = (); # string needed to compute continuation indentation
25867 my @container_environment = (); # BLOCK or LIST
25868 my $container_environment = '';
25869 my $im = -1; # previous $i value
25871 my $ci_string_sum = ones_count($ci_string_in_tokenizer);
25873 # Computing Token Indentation
25875 # The final section of the tokenizer forms tokens and also computes
25876 # parameters needed to find indentation. It is much easier to do it
25877 # in the tokenizer than elsewhere. Here is a brief description of how
25878 # indentation is computed. Perl::Tidy computes indentation as the sum
25881 # (1) structural indentation, such as if/else/elsif blocks
25882 # (2) continuation indentation, such as long parameter call lists.
25884 # These are occasionally called primary and secondary indentation.
25886 # Structural indentation is introduced by tokens of type '{', although
25887 # the actual tokens might be '{', '(', or '['. Structural indentation
25888 # is of two types: BLOCK and non-BLOCK. Default structural indentation
25889 # is 4 characters if the standard indentation scheme is used.
25891 # Continuation indentation is introduced whenever a line at BLOCK level
25892 # is broken before its termination. Default continuation indentation
25893 # is 2 characters in the standard indentation scheme.
25895 # Both types of indentation may be nested arbitrarily deep and
25896 # interlaced. The distinction between the two is somewhat arbitrary.
25898 # For each token, we will define two variables which would apply if
25899 # the current statement were broken just before that token, so that
25900 # that token started a new line:
25902 # $level = the structural indentation level,
25903 # $ci_level = the continuation indentation level
25905 # The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
25906 # assuming defaults. However, in some special cases it is customary
25907 # to modify $ci_level from this strict value.
25909 # The total structural indentation is easy to compute by adding and
25910 # subtracting 1 from a saved value as types '{' and '}' are seen. The
25911 # running value of this variable is $level_in_tokenizer.
25913 # The total continuation is much more difficult to compute, and requires
25914 # several variables. These variables are:
25916 # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
25917 # each indentation level, if there are intervening open secondary
25918 # structures just prior to that level.
25919 # $continuation_string_in_tokenizer = a string of 1's and 0's indicating
25920 # if the last token at that level is "continued", meaning that it
25921 # is not the first token of an expression.
25922 # $nesting_block_string = a string of 1's and 0's indicating, for each
25923 # indentation level, if the level is of type BLOCK or not.
25924 # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
25925 # $nesting_list_string = a string of 1's and 0's indicating, for each
25926 # indentation level, if it is appropriate for list formatting.
25927 # If so, continuation indentation is used to indent long list items.
25928 # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
25929 # @{$rslevel_stack} = a stack of total nesting depths at each
25930 # structural indentation level, where "total nesting depth" means
25931 # the nesting depth that would occur if every nesting token -- '{', '[',
25932 # and '(' -- , regardless of context, is used to compute a nesting
25935 #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
25936 #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
25938 my ( $ci_string_i, $level_i, $nesting_block_string_i,
25939 $nesting_list_string_i, $nesting_token_string_i,
25940 $nesting_type_string_i, );
25942 foreach $i ( @{$routput_token_list} )
25943 { # scan the list of pre-tokens indexes
25945 # self-checking for valid token types
25946 my $type = $routput_token_type->[$i];
25947 my $forced_indentation_flag = $routput_indent_flag->[$i];
25949 # See if we should undo the $forced_indentation_flag.
25950 # Forced indentation after 'if', 'unless', 'while' and 'until'
25951 # expressions without trailing parens is optional and doesn't
25952 # always look good. It is usually okay for a trailing logical
25953 # expression, but if the expression is a function call, code block,
25954 # or some kind of list it puts in an unwanted extra indentation
25955 # level which is hard to remove.
25957 # Example where extra indentation looks ok:
25959 # if $det_a < 0 and $det_b > 0
25960 # or $det_a > 0 and $det_b < 0;
25962 # Example where extra indentation is not needed because
25963 # the eval brace also provides indentation:
25964 # print "not " if defined eval {
25965 # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
25968 # The following rule works fairly well:
25969 # Undo the flag if the end of this line, or start of the next
25970 # line, is an opening container token or a comma.
25971 # This almost always works, but if not after another pass it will
25973 if ( $forced_indentation_flag && $type eq 'k' ) {
25975 my $ilast = $routput_token_list->[$ixlast];
25976 my $toklast = $routput_token_type->[$ilast];
25977 if ( $toklast eq '#' ) {
25979 $ilast = $routput_token_list->[$ixlast];
25980 $toklast = $routput_token_type->[$ilast];
25982 if ( $toklast eq 'b' ) {
25984 $ilast = $routput_token_list->[$ixlast];
25985 $toklast = $routput_token_type->[$ilast];
25987 if ( $toklast =~ /^[\{,]$/ ) {
25988 $forced_indentation_flag = 0;
25991 ( $toklast, my $i_next ) =
25992 find_next_nonblank_token( $max_token_index, $rtokens,
25993 $max_token_index );
25994 if ( $toklast =~ /^[\{,]$/ ) {
25995 $forced_indentation_flag = 0;
26000 # if we are already in an indented if, see if we should outdent
26001 if ($indented_if_level) {
26003 # don't try to nest trailing if's - shouldn't happen
26004 if ( $type eq 'k' ) {
26005 $forced_indentation_flag = 0;
26008 # check for the normal case - outdenting at next ';'
26009 elsif ( $type eq ';' ) {
26010 if ( $level_in_tokenizer == $indented_if_level ) {
26011 $forced_indentation_flag = -1;
26012 $indented_if_level = 0;
26016 # handle case of missing semicolon
26017 elsif ( $type eq '}' ) {
26018 if ( $level_in_tokenizer == $indented_if_level ) {
26019 $indented_if_level = 0;
26021 # TBD: This could be a subroutine call
26022 $level_in_tokenizer--;
26023 if ( @{$rslevel_stack} > 1 ) {
26024 pop( @{$rslevel_stack} );
26026 if ( length($nesting_block_string) > 1 )
26027 { # true for valid script
26028 chop $nesting_block_string;
26029 chop $nesting_list_string;
26036 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
26037 $level_i = $level_in_tokenizer;
26039 # This can happen by running perltidy on non-scripts
26040 # although it could also be bug introduced by programming change.
26041 # Perl silently accepts a 032 (^Z) and takes it as the end
26042 if ( !$is_valid_token_type{$type} ) {
26043 my $val = ord($type);
26045 "unexpected character decimal $val ($type) in script\n");
26046 $tokenizer_self->{_in_error} = 1;
26049 # ----------------------------------------------------------------
26050 # TOKEN TYPE PATCHES
26051 # output __END__, __DATA__, and format as type 'k' instead of ';'
26052 # to make html colors correct, etc.
26053 my $fix_type = $type;
26054 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
26056 # output anonymous 'sub' as keyword
26057 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
26059 # -----------------------------------------------------------------
26061 $nesting_token_string_i = $nesting_token_string;
26062 $nesting_type_string_i = $nesting_type_string;
26063 $nesting_block_string_i = $nesting_block_string;
26064 $nesting_list_string_i = $nesting_list_string;
26066 # set primary indentation levels based on structural braces
26067 # Note: these are set so that the leading braces have a HIGHER
26068 # level than their CONTENTS, which is convenient for indentation
26069 # Also, define continuation indentation for each token.
26070 if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
26073 # use environment before updating
26074 $container_environment =
26075 $nesting_block_flag ? 'BLOCK'
26076 : $nesting_list_flag ? 'LIST'
26079 # if the difference between total nesting levels is not 1,
26080 # there are intervening non-structural nesting types between
26081 # this '{' and the previous unclosed '{'
26082 my $intervening_secondary_structure = 0;
26083 if ( @{$rslevel_stack} ) {
26084 $intervening_secondary_structure =
26085 $slevel_in_tokenizer - $rslevel_stack->[-1];
26088 # Continuation Indentation
26090 # Having tried setting continuation indentation both in the formatter and
26091 # in the tokenizer, I can say that setting it in the tokenizer is much,
26092 # much easier. The formatter already has too much to do, and can't
26093 # make decisions on line breaks without knowing what 'ci' will be at
26094 # arbitrary locations.
26096 # But a problem with setting the continuation indentation (ci) here
26097 # in the tokenizer is that we do not know where line breaks will actually
26098 # be. As a result, we don't know if we should propagate continuation
26099 # indentation to higher levels of structure.
26101 # For nesting of only structural indentation, we never need to do this.
26102 # For example, in a long if statement, like this
26104 # if ( !$output_block_type[$i]
26105 # && ($in_statement_continuation) )
26110 # the second line has ci but we do normally give the lines within the BLOCK
26111 # any ci. This would be true if we had blocks nested arbitrarily deeply.
26113 # But consider something like this, where we have created a break after
26114 # an opening paren on line 1, and the paren is not (currently) a
26115 # structural indentation token:
26117 # my $file = $menubar->Menubutton(
26118 # qw/-text File -underline 0 -menuitems/ => [
26120 # Cascade => '~View',
26124 # The second line has ci, so it would seem reasonable to propagate it
26125 # down, giving the third line 1 ci + 1 indentation. This suggests the
26126 # following rule, which is currently used to propagating ci down: if there
26127 # are any non-structural opening parens (or brackets, or braces), before
26128 # an opening structural brace, then ci is propagated down, and otherwise
26129 # not. The variable $intervening_secondary_structure contains this
26130 # information for the current token, and the string
26131 # "$ci_string_in_tokenizer" is a stack of previous values of this
26134 # save the current states
26135 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
26136 $level_in_tokenizer++;
26138 if ($forced_indentation_flag) {
26140 # break BEFORE '?' when there is forced indentation
26141 if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
26142 if ( $type eq 'k' ) {
26143 $indented_if_level = $level_in_tokenizer;
26146 # do not change container environment here if we are not
26147 # at a real list. Adding this check prevents "blinkers"
26148 # often near 'unless" clauses, such as in the following
26153 ## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
26156 $nesting_block_string .= "$nesting_block_flag";
26160 if ( $routput_block_type->[$i] ) {
26161 $nesting_block_flag = 1;
26162 $nesting_block_string .= '1';
26165 $nesting_block_flag = 0;
26166 $nesting_block_string .= '0';
26170 # we will use continuation indentation within containers
26171 # which are not blocks and not logical expressions
26173 if ( !$routput_block_type->[$i] ) {
26175 # propagate flag down at nested open parens
26176 if ( $routput_container_type->[$i] eq '(' ) {
26177 $bit = 1 if $nesting_list_flag;
26180 # use list continuation if not a logical grouping
26181 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
26185 $is_logical_container{ $routput_container_type->[$i]
26189 $nesting_list_string .= $bit;
26190 $nesting_list_flag = $bit;
26192 $ci_string_in_tokenizer .=
26193 ( $intervening_secondary_structure != 0 ) ? '1' : '0';
26194 $ci_string_sum = ones_count($ci_string_in_tokenizer);
26195 $continuation_string_in_tokenizer .=
26196 ( $in_statement_continuation > 0 ) ? '1' : '0';
26198 # Sometimes we want to give an opening brace continuation indentation,
26199 # and sometimes not. For code blocks, we don't do it, so that the leading
26200 # '{' gets outdented, like this:
26202 # if ( !$output_block_type[$i]
26203 # && ($in_statement_continuation) )
26206 # For other types, we will give them continuation indentation. For example,
26207 # here is how a list looks with the opening paren indented:
26210 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
26211 # [ "homer", "marge", "bart" ], );
26213 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
26215 my $total_ci = $ci_string_sum;
26217 !$routput_block_type->[$i] # patch: skip for BLOCK
26218 && ($in_statement_continuation)
26219 && !( $forced_indentation_flag && $type eq ':' )
26222 $total_ci += $in_statement_continuation
26223 unless ( $ci_string_in_tokenizer =~ /1$/ );
26226 $ci_string_i = $total_ci;
26227 $in_statement_continuation = 0;
26230 elsif ($type eq '}'
26232 || $forced_indentation_flag < 0 )
26235 # only a nesting error in the script would prevent popping here
26236 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
26238 $level_i = --$level_in_tokenizer;
26240 # restore previous level values
26241 if ( length($nesting_block_string) > 1 )
26242 { # true for valid script
26243 chop $nesting_block_string;
26244 $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
26245 chop $nesting_list_string;
26246 $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
26248 chop $ci_string_in_tokenizer;
26249 $ci_string_sum = ones_count($ci_string_in_tokenizer);
26251 $in_statement_continuation =
26252 chop $continuation_string_in_tokenizer;
26254 # zero continuation flag at terminal BLOCK '}' which
26255 # ends a statement.
26256 if ( $routput_block_type->[$i] ) {
26258 # ...These include non-anonymous subs
26259 # note: could be sub ::abc { or sub 'abc
26260 if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
26262 # note: older versions of perl require the /gc modifier
26263 # here or else the \G does not work.
26264 if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
26266 $in_statement_continuation = 0;
26270 # ...and include all block types except user subs with
26271 # block prototypes and these: (sort|grep|map|do|eval)
26272 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
26274 $is_zero_continuation_block_type{
26275 $routput_block_type->[$i]
26278 $in_statement_continuation = 0;
26281 # ..but these are not terminal types:
26282 # /^(sort|grep|map|do|eval)$/ )
26284 $is_not_zero_continuation_block_type{
26285 $routput_block_type->[$i]
26290 # ..and a block introduced by a label
26291 # /^\w+\s*:$/gc ) {
26292 elsif ( $routput_block_type->[$i] =~ /:$/ ) {
26293 $in_statement_continuation = 0;
26296 # user function with block prototype
26298 $in_statement_continuation = 0;
26302 # If we are in a list, then
26303 # we must set continuation indentation at the closing
26304 # paren of something like this (paren after $check):
26307 # ( not defined $check )
26309 # or $check eq "new"
26310 # or $check eq "old",
26312 elsif ( $tok eq ')' ) {
26313 $in_statement_continuation = 1
26314 if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
26317 elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
26320 # use environment after updating
26321 $container_environment =
26322 $nesting_block_flag ? 'BLOCK'
26323 : $nesting_list_flag ? 'LIST'
26325 $ci_string_i = $ci_string_sum + $in_statement_continuation;
26326 $nesting_block_string_i = $nesting_block_string;
26327 $nesting_list_string_i = $nesting_list_string;
26330 # not a structural indentation type..
26333 $container_environment =
26334 $nesting_block_flag ? 'BLOCK'
26335 : $nesting_list_flag ? 'LIST'
26338 # zero the continuation indentation at certain tokens so
26339 # that they will be at the same level as its container. For
26340 # commas, this simplifies the -lp indentation logic, which
26341 # counts commas. For ?: it makes them stand out.
26342 if ($nesting_list_flag) {
26343 if ( $type =~ /^[,\?\:]$/ ) {
26344 $in_statement_continuation = 0;
26348 # be sure binary operators get continuation indentation
26350 $container_environment
26351 && ( $type eq 'k' && $is_binary_keyword{$tok}
26352 || $is_binary_type{$type} )
26355 $in_statement_continuation = 1;
26358 # continuation indentation is sum of any open ci from previous
26359 # levels plus the current level
26360 $ci_string_i = $ci_string_sum + $in_statement_continuation;
26362 # update continuation flag ...
26363 # if this isn't a blank or comment..
26364 if ( $type ne 'b' && $type ne '#' ) {
26366 # and we are in a BLOCK
26367 if ($nesting_block_flag) {
26369 # the next token after a ';' and label starts a new stmt
26370 if ( $type eq ';' || $type eq 'J' ) {
26371 $in_statement_continuation = 0;
26374 # otherwise, we are continuing the current statement
26376 $in_statement_continuation = 1;
26380 # if we are not in a BLOCK..
26383 # do not use continuation indentation if not list
26384 # environment (could be within if/elsif clause)
26385 if ( !$nesting_list_flag ) {
26386 $in_statement_continuation = 0;
26389 # otherwise, the next token after a ',' starts a new term
26390 elsif ( $type eq ',' ) {
26391 $in_statement_continuation = 0;
26394 # otherwise, we are continuing the current term
26396 $in_statement_continuation = 1;
26402 if ( $level_in_tokenizer < 0 ) {
26403 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
26404 $tokenizer_self->{_saw_negative_indentation} = 1;
26405 warning("Starting negative indentation\n");
26409 # set secondary nesting levels based on all containment token types
26410 # Note: these are set so that the nesting depth is the depth
26411 # of the PREVIOUS TOKEN, which is convenient for setting
26412 # the strength of token bonds
26413 my $slevel_i = $slevel_in_tokenizer;
26416 if ( $is_opening_type{$type} ) {
26417 $slevel_in_tokenizer++;
26418 $nesting_token_string .= $tok;
26419 $nesting_type_string .= $type;
26423 elsif ( $is_closing_type{$type} ) {
26424 $slevel_in_tokenizer--;
26425 my $char = chop $nesting_token_string;
26427 if ( $char ne $matching_start_token{$tok} ) {
26428 $nesting_token_string .= $char . $tok;
26429 $nesting_type_string .= $type;
26432 chop $nesting_type_string;
26436 push( @block_type, $routput_block_type->[$i] );
26437 push( @ci_string, $ci_string_i );
26438 push( @container_environment, $container_environment );
26439 push( @container_type, $routput_container_type->[$i] );
26440 push( @levels, $level_i );
26441 push( @nesting_tokens, $nesting_token_string_i );
26442 push( @nesting_types, $nesting_type_string_i );
26443 push( @slevels, $slevel_i );
26444 push( @token_type, $fix_type );
26445 push( @type_sequence, $routput_type_sequence->[$i] );
26446 push( @nesting_blocks, $nesting_block_string );
26447 push( @nesting_lists, $nesting_list_string );
26449 # now form the previous token
26452 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters
26456 substr( $input_line, $$rtoken_map[$im], $num ) );
26462 $num = length($input_line) - $$rtoken_map[$im]; # make the last token
26464 push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
26467 $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
26468 $tokenizer_self->{_in_quote} = $in_quote;
26469 $tokenizer_self->{_quote_target} =
26470 $in_quote ? matching_end_token($quote_character) : "";
26471 $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
26473 $line_of_tokens->{_rtoken_type} = \@token_type;
26474 $line_of_tokens->{_rtokens} = \@tokens;
26475 $line_of_tokens->{_rblock_type} = \@block_type;
26476 $line_of_tokens->{_rcontainer_type} = \@container_type;
26477 $line_of_tokens->{_rcontainer_environment} = \@container_environment;
26478 $line_of_tokens->{_rtype_sequence} = \@type_sequence;
26479 $line_of_tokens->{_rlevels} = \@levels;
26480 $line_of_tokens->{_rslevels} = \@slevels;
26481 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
26482 $line_of_tokens->{_rci_levels} = \@ci_string;
26483 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
26487 } # end tokenize_this_line
26489 #########i#############################################################
26490 # Tokenizer routines which assist in identifying token types
26491 #######################################################################
26493 sub operator_expected {
26495 # Many perl symbols have two or more meanings. For example, '<<'
26496 # can be a shift operator or a here-doc operator. The
26497 # interpretation of these symbols depends on the current state of
26498 # the tokenizer, which may either be expecting a term or an
26499 # operator. For this example, a << would be a shift if an operator
26500 # is expected, and a here-doc if a term is expected. This routine
26501 # is called to make this decision for any current token. It returns
26502 # one of three possible values:
26504 # OPERATOR - operator expected (or at least, not a term)
26505 # UNKNOWN - can't tell
26506 # TERM - a term is expected (or at least, not an operator)
26508 # The decision is based on what has been seen so far. This
26509 # information is stored in the "$last_nonblank_type" and
26510 # "$last_nonblank_token" variables. For example, if the
26511 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
26512 # if $last_nonblank_type is 'n' (numeric), we are expecting an
26515 # If a UNKNOWN is returned, the calling routine must guess. A major
26516 # goal of this tokenizer is to minimize the possibility of returning
26517 # UNKNOWN, because a wrong guess can spoil the formatting of a
26520 # adding NEW_TOKENS: it is critically important that this routine be
26521 # updated to allow it to determine if an operator or term is to be
26522 # expected after the new token. Doing this simply involves adding
26523 # the new token character to one of the regexes in this routine or
26524 # to one of the hash lists
26525 # that it uses, which are initialized in the BEGIN section.
26526 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
26529 my ( $prev_type, $tok, $next_type ) = @_;
26531 my $op_expected = UNKNOWN;
26533 ##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
26535 # Note: function prototype is available for token type 'U' for future
26536 # program development. It contains the leading and trailing parens,
26537 # and no blanks. It might be used to eliminate token type 'C', for
26538 # example (prototype = '()'). Thus:
26539 # if ($last_nonblank_type eq 'U') {
26540 # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
26543 # A possible filehandle (or object) requires some care...
26544 if ( $last_nonblank_type eq 'Z' ) {
26547 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
26548 $op_expected = UNKNOWN;
26551 # For possible file handle like "$a", Perl uses weird parsing rules.
26553 # print $a/2,"/hi"; - division
26554 # print $a / 2,"/hi"; - division
26555 # print $a/ 2,"/hi"; - division
26556 # print $a /2,"/hi"; - pattern (and error)!
26557 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
26558 $op_expected = TERM;
26561 # Note when an operation is being done where a
26562 # filehandle might be expected, since a change in whitespace
26563 # could change the interpretation of the statement.
26565 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
26566 complain("operator in print statement not recommended\n");
26567 $op_expected = OPERATOR;
26572 # Check for smartmatch operator before preceding brace or square bracket.
26573 # For example, at the ? after the ] in the following expressions we are
26574 # expecting an operator:
26576 # qr/3/ ~~ ['1234'] ? 1 : 0;
26577 # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
26578 elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) {
26579 $op_expected = OPERATOR;
26582 # handle something after 'do' and 'eval'
26583 elsif ( $is_block_operator{$last_nonblank_token} ) {
26585 # something like $a = eval "expression";
26587 if ( $last_nonblank_type eq 'k' ) {
26588 $op_expected = TERM; # expression or list mode following keyword
26591 # something like $a = do { BLOCK } / 2;
26592 # or this ? after a smartmatch anonynmous hash or array reference:
26593 # qr/3/ ~~ ['1234'] ? 1 : 0;
26596 $op_expected = OPERATOR; # block mode following }
26600 # handle bare word..
26601 elsif ( $last_nonblank_type eq 'w' ) {
26603 # unfortunately, we can't tell what type of token to expect next
26604 # after most bare words
26605 $op_expected = UNKNOWN;
26608 # operator, but not term possible after these types
26609 # Note: moved ')' from type to token because parens in list context
26610 # get marked as '{' '}' now. This is a minor glitch in the following:
26611 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
26613 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
26614 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
26616 $op_expected = OPERATOR;
26618 # in a 'use' statement, numbers and v-strings are not true
26619 # numbers, so to avoid incorrect error messages, we will
26620 # mark them as unknown for now (use.t)
26621 # TODO: it would be much nicer to create a new token V for VERSION
26622 # number in a use statement. Then this could be a check on type V
26623 # and related patches which change $statement_type for '=>'
26624 # and ',' could be removed. Further, it would clean things up to
26625 # scan the 'use' statement with a separate subroutine.
26626 if ( ( $statement_type eq 'use' )
26627 && ( $last_nonblank_type =~ /^[nv]$/ ) )
26629 $op_expected = UNKNOWN;
26632 # expecting VERSION or {} after package NAMESPACE
26633 elsif ($statement_type =~ /^package\b/
26634 && $last_nonblank_token =~ /^package\b/ )
26636 $op_expected = TERM;
26640 # no operator after many keywords, such as "die", "warn", etc
26641 elsif ( $expecting_term_token{$last_nonblank_token} ) {
26643 # patch for dor.t (defined or).
26644 # perl functions which may be unary operators
26645 # TODO: This list is incomplete, and these should be put
26648 && $next_type eq '/'
26649 && $last_nonblank_type eq 'k'
26650 && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
26652 $op_expected = OPERATOR;
26655 $op_expected = TERM;
26659 # no operator after things like + - ** (i.e., other operators)
26660 elsif ( $expecting_term_types{$last_nonblank_type} ) {
26661 $op_expected = TERM;
26664 # a few operators, like "time", have an empty prototype () and so
26665 # take no parameters but produce a value to operate on
26666 elsif ( $expecting_operator_token{$last_nonblank_token} ) {
26667 $op_expected = OPERATOR;
26670 # post-increment and decrement produce values to be operated on
26671 elsif ( $expecting_operator_types{$last_nonblank_type} ) {
26672 $op_expected = OPERATOR;
26675 # no value to operate on after sub block
26676 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
26678 # a right brace here indicates the end of a simple block.
26679 # all non-structural right braces have type 'R'
26680 # all braces associated with block operator keywords have been given those
26681 # keywords as "last_nonblank_token" and caught above.
26682 # (This statement is order dependent, and must come after checking
26683 # $last_nonblank_token).
26684 elsif ( $last_nonblank_type eq '}' ) {
26686 # patch for dor.t (defined or).
26688 && $next_type eq '/'
26689 && $last_nonblank_token eq ']' )
26691 $op_expected = OPERATOR;
26694 $op_expected = TERM;
26698 # something else..what did I forget?
26701 # collecting diagnostics on unknown operator types..see what was missed
26702 $op_expected = UNKNOWN;
26704 "OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
26708 TOKENIZER_DEBUG_FLAG_EXPECT && do {
26710 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
26712 return $op_expected;
26715 sub new_statement_ok {
26717 # return true if the current token can start a new statement
26718 # USES GLOBAL VARIABLES: $last_nonblank_type
26720 return label_ok() # a label would be ok here
26722 || $last_nonblank_type eq 'J'; # or we follow a label
26728 # Decide if a bare word followed by a colon here is a label
26729 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
26730 # $brace_depth, @brace_type
26732 # if it follows an opening or closing code block curly brace..
26733 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
26734 && $last_nonblank_type eq $last_nonblank_token )
26737 # it is a label if and only if the curly encloses a code block
26738 return $brace_type[$brace_depth];
26741 # otherwise, it is a label if and only if it follows a ';' (real or fake)
26744 return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
26748 sub code_block_type {
26750 # Decide if this is a block of code, and its type.
26751 # Must be called only when $type = $token = '{'
26752 # The problem is to distinguish between the start of a block of code
26753 # and the start of an anonymous hash reference
26754 # Returns "" if not code block, otherwise returns 'last_nonblank_token'
26755 # to indicate the type of code block. (For example, 'last_nonblank_token'
26756 # might be 'if' for an if block, 'else' for an else block, etc).
26757 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
26758 # $last_nonblank_block_type, $brace_depth, @brace_type
26760 # handle case of multiple '{'s
26762 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
26764 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
26765 if ( $last_nonblank_token eq '{'
26766 && $last_nonblank_type eq $last_nonblank_token )
26769 # opening brace where a statement may appear is probably
26770 # a code block but might be and anonymous hash reference
26771 if ( $brace_type[$brace_depth] ) {
26772 return decide_if_code_block( $i, $rtokens, $rtoken_type,
26773 $max_token_index );
26776 # cannot start a code block within an anonymous hash
26782 elsif ( $last_nonblank_token eq ';' ) {
26784 # an opening brace where a statement may appear is probably
26785 # a code block but might be and anonymous hash reference
26786 return decide_if_code_block( $i, $rtokens, $rtoken_type,
26787 $max_token_index );
26790 # handle case of '}{'
26791 elsif ($last_nonblank_token eq '}'
26792 && $last_nonblank_type eq $last_nonblank_token )
26795 # a } { situation ...
26796 # could be hash reference after code block..(blktype1.t)
26797 if ($last_nonblank_block_type) {
26798 return decide_if_code_block( $i, $rtokens, $rtoken_type,
26799 $max_token_index );
26802 # must be a block if it follows a closing hash reference
26804 return $last_nonblank_token;
26808 # NOTE: braces after type characters start code blocks, but for
26809 # simplicity these are not identified as such. See also
26810 # sub is_non_structural_brace.
26812 ## elsif ( $last_nonblank_type eq 't' ) {
26813 ## return $last_nonblank_token;
26816 # brace after label:
26817 elsif ( $last_nonblank_type eq 'J' ) {
26818 return $last_nonblank_token;
26821 # otherwise, look at previous token. This must be a code block if
26822 # it follows any of these:
26823 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
26824 elsif ( $is_code_block_token{$last_nonblank_token} ) {
26826 # Bug Patch: Note that the opening brace after the 'if' in the following
26827 # snippet is an anonymous hash ref and not a code block!
26828 # print 'hi' if { x => 1, }->{x};
26829 # We can identify this situation because the last nonblank type
26830 # will be a keyword (instead of a closing peren)
26831 if ( $last_nonblank_token =~ /^(if|unless)$/
26832 && $last_nonblank_type eq 'k' )
26837 return $last_nonblank_token;
26841 # or a sub or package BLOCK
26842 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
26843 && $last_nonblank_token =~ /^(sub|package)\b/ )
26845 return $last_nonblank_token;
26848 elsif ( $statement_type =~ /^(sub|package)\b/ ) {
26849 return $statement_type;
26852 # user-defined subs with block parameters (like grep/map/eval)
26853 elsif ( $last_nonblank_type eq 'G' ) {
26854 return $last_nonblank_token;
26858 elsif ( $last_nonblank_type eq 'w' ) {
26859 return decide_if_code_block( $i, $rtokens, $rtoken_type,
26860 $max_token_index );
26863 # anything else must be anonymous hash reference
26869 sub decide_if_code_block {
26871 # USES GLOBAL VARIABLES: $last_nonblank_token
26872 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
26873 my ( $next_nonblank_token, $i_next ) =
26874 find_next_nonblank_token( $i, $rtokens, $max_token_index );
26876 # we are at a '{' where a statement may appear.
26877 # We must decide if this brace starts an anonymous hash or a code
26879 # return "" if anonymous hash, and $last_nonblank_token otherwise
26881 # initialize to be code BLOCK
26882 my $code_block_type = $last_nonblank_token;
26884 # Check for the common case of an empty anonymous hash reference:
26885 # Maybe something like sub { { } }
26886 if ( $next_nonblank_token eq '}' ) {
26887 $code_block_type = "";
26892 # To guess if this '{' is an anonymous hash reference, look ahead
26893 # and test as follows:
26895 # it is a hash reference if next come:
26896 # - a string or digit followed by a comma or =>
26897 # - bareword followed by =>
26898 # otherwise it is a code block
26900 # Examples of anonymous hash ref:
26904 # Examples of code blocks:
26905 # {1; print "hello\n", 1;}
26908 # We are only going to look ahead one more (nonblank/comment) line.
26909 # Strange formatting could cause a bad guess, but that's unlikely.
26910 my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
26911 my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
26912 my ( $rpre_tokens, $rpre_types ) =
26913 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
26914 # generous, and prevents
26916 # time in mangled files
26917 if ( defined($rpre_types) && @$rpre_types ) {
26918 push @pre_types, @$rpre_types;
26919 push @pre_tokens, @$rpre_tokens;
26922 # put a sentinel token to simplify stopping the search
26923 push @pre_types, '}';
26926 $jbeg = 1 if $pre_types[0] eq 'b';
26928 # first look for one of these
26930 # - bareword with leading -
26934 if ( $pre_types[$j] =~ /^[\'\"]/ ) {
26936 # find the closing quote; don't worry about escapes
26937 my $quote_mark = $pre_types[$j];
26938 for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
26939 if ( $pre_types[$k] eq $quote_mark ) {
26941 my $next = $pre_types[$j];
26946 elsif ( $pre_types[$j] eq 'd' ) {
26949 elsif ( $pre_types[$j] eq 'w' ) {
26950 unless ( $is_keyword{ $pre_tokens[$j] } ) {
26954 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
26957 if ( $j > $jbeg ) {
26959 $j++ if $pre_types[$j] eq 'b';
26961 # it's a hash ref if a comma or => follow next
26962 if ( $pre_types[$j] eq ','
26963 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
26965 $code_block_type = "";
26970 return $code_block_type;
26975 # report unexpected token type and show where it is
26976 # USES GLOBAL VARIABLES: $tokenizer_self
26977 my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
26978 $rpretoken_type, $input_line )
26981 if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
26982 my $msg = "found $found where $expecting expected";
26983 my $pos = $$rpretoken_map[$i_tok];
26984 interrupt_logfile();
26985 my $input_line_number = $tokenizer_self->{_last_line_number};
26986 my ( $offset, $numbered_line, $underline ) =
26987 make_numbered_line( $input_line_number, $input_line, $pos );
26988 $underline = write_on_underline( $underline, $pos - $offset, '^' );
26991 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
26992 my $pos_prev = $$rpretoken_map[$last_nonblank_i];
26994 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
26995 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
26998 $num = $pos - $pos_prev;
27000 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
27003 write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
27004 $trailer = " (previous token underlined)";
27006 warning( $numbered_line . "\n" );
27007 warning( $underline . "\n" );
27008 warning( $msg . $trailer . "\n" );
27013 sub is_non_structural_brace {
27015 # Decide if a brace or bracket is structural or non-structural
27016 # by looking at the previous token and type
27017 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
27019 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
27020 # Tentatively deactivated because it caused the wrong operator expectation
27022 # $user = @vars[1] / 100;
27023 # Must update sub operator_expected before re-implementing.
27024 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
27028 # NOTE: braces after type characters start code blocks, but for
27029 # simplicity these are not identified as such. See also
27030 # sub code_block_type
27032 ##if ($last_nonblank_type eq 't') {return 0}
27034 # otherwise, it is non-structural if it is decorated
27035 # by type information.
27036 # For example, the '{' here is non-structural: ${xxx}
27038 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
27040 # or if we follow a hash or array closing curly brace or bracket
27041 # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
27042 # because the first '}' would have been given type 'R'
27043 || $last_nonblank_type =~ /^([R\]])$/
27047 #########i#############################################################
27048 # Tokenizer routines for tracking container nesting depths
27049 #######################################################################
27051 # The following routines keep track of nesting depths of the nesting
27052 # types, ( [ { and ?. This is necessary for determining the indentation
27053 # level, and also for debugging programs. Not only do they keep track of
27054 # nesting depths of the individual brace types, but they check that each
27055 # of the other brace types is balanced within matching pairs. For
27056 # example, if the program sees this sequence:
27060 # then it can determine that there is an extra left paren somewhere
27061 # between the { and the }. And so on with every other possible
27062 # combination of outer and inner brace types. For another
27067 # which has an extra ] within the parens.
27069 # The brace types have indexes 0 .. 3 which are indexes into
27072 # The pair ? : are treated as just another nesting type, with ? acting
27073 # as the opening brace and : acting as the closing brace.
27077 # $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
27079 # saves the nesting depth of brace type $b (where $b is either of the other
27080 # nesting types) when brace type $a enters a new depth. When this depth
27081 # decreases, a check is made that the current depth of brace types $b is
27082 # unchanged, or otherwise there must have been an error. This can
27083 # be very useful for localizing errors, particularly when perl runs to
27084 # the end of a large file (such as this one) and announces that there
27085 # is a problem somewhere.
27087 # A numerical sequence number is maintained for every nesting type,
27088 # so that each matching pair can be uniquely identified in a simple
27091 sub increase_nesting_depth {
27092 my ( $aa, $pos ) = @_;
27094 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
27095 # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
27098 $current_depth[$aa]++;
27100 $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
27101 my $input_line_number = $tokenizer_self->{_last_line_number};
27102 my $input_line = $tokenizer_self->{_line_text};
27104 # Sequence numbers increment by number of items. This keeps
27105 # a unique set of numbers but still allows the relative location
27106 # of any type to be determined.
27107 $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
27108 my $seqno = $nesting_sequence_number[$aa];
27109 $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
27111 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
27112 [ $input_line_number, $input_line, $pos ];
27114 for $bb ( 0 .. $#closing_brace_names ) {
27115 next if ( $bb == $aa );
27116 $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
27119 # set a flag for indenting a nested ternary statement
27121 if ( $aa == QUESTION_COLON ) {
27122 $nested_ternary_flag[ $current_depth[$aa] ] = 0;
27123 if ( $current_depth[$aa] > 1 ) {
27124 if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
27125 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
27126 if ( $pdepth == $total_depth - 1 ) {
27128 $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
27133 $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
27134 $statement_type = "";
27135 return ( $seqno, $indent );
27138 sub decrease_nesting_depth {
27140 my ( $aa, $pos ) = @_;
27142 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
27143 # @current_sequence_number, @depth_array, @starting_line_of_current_depth
27147 my $input_line_number = $tokenizer_self->{_last_line_number};
27148 my $input_line = $tokenizer_self->{_line_text};
27152 if ( $current_depth[$aa] > 0 ) {
27154 # set a flag for un-indenting after seeing a nested ternary statement
27155 $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
27156 if ( $aa == QUESTION_COLON ) {
27157 $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
27159 $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
27161 # check that any brace types $bb contained within are balanced
27162 for $bb ( 0 .. $#closing_brace_names ) {
27163 next if ( $bb == $aa );
27165 unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
27166 $current_depth[$bb] )
27169 $current_depth[$bb] -
27170 $depth_array[$aa][$bb][ $current_depth[$aa] ];
27172 # don't whine too many times
27173 my $saw_brace_error = get_saw_brace_error();
27175 $saw_brace_error <= MAX_NAG_MESSAGES
27177 # if too many closing types have occurred, we probably
27178 # already caught this error
27179 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
27182 interrupt_logfile();
27184 $starting_line_of_current_depth[$aa]
27185 [ $current_depth[$aa] ];
27187 my $rel = [ $input_line_number, $input_line, $pos ];
27191 if ( $diff == 1 || $diff == -1 ) {
27199 ? $opening_brace_names[$bb]
27200 : $closing_brace_names[$bb];
27201 write_error_indicator_pair( @$rsl, '^' );
27203 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
27208 $starting_line_of_current_depth[$bb]
27209 [ $current_depth[$bb] ];
27212 " The most recent un-matched $bname is on line $ml\n";
27213 write_error_indicator_pair( @$rml, '^' );
27215 write_error_indicator_pair( @$rel, '^' );
27219 increment_brace_error();
27222 $current_depth[$aa]--;
27226 my $saw_brace_error = get_saw_brace_error();
27227 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
27229 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
27231 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
27233 increment_brace_error();
27235 return ( $seqno, $outdent );
27238 sub check_final_nesting_depths {
27241 # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
27243 for $aa ( 0 .. $#closing_brace_names ) {
27245 if ( $current_depth[$aa] ) {
27247 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
27250 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
27251 The most recent un-matched $opening_brace_names[$aa] is on line $sl
27253 indicate_error( $msg, @$rsl, '^' );
27254 increment_brace_error();
27259 #########i#############################################################
27260 # Tokenizer routines for looking ahead in input stream
27261 #######################################################################
27263 sub peek_ahead_for_n_nonblank_pre_tokens {
27265 # returns next n pretokens if they exist
27266 # returns undef's if hits eof without seeing any pretokens
27267 # USES GLOBAL VARIABLES: $tokenizer_self
27268 my $max_pretokens = shift;
27271 my ( $rpre_tokens, $rmap, $rpre_types );
27273 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
27275 $line =~ s/^\s*//; # trim leading blanks
27276 next if ( length($line) <= 0 ); # skip blank
27277 next if ( $line =~ /^#/ ); # skip comment
27278 ( $rpre_tokens, $rmap, $rpre_types ) =
27279 pre_tokenize( $line, $max_pretokens );
27282 return ( $rpre_tokens, $rpre_types );
27285 # look ahead for next non-blank, non-comment line of code
27286 sub peek_ahead_for_nonblank_token {
27288 # USES GLOBAL VARIABLES: $tokenizer_self
27289 my ( $rtokens, $max_token_index ) = @_;
27293 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
27295 $line =~ s/^\s*//; # trim leading blanks
27296 next if ( length($line) <= 0 ); # skip blank
27297 next if ( $line =~ /^#/ ); # skip comment
27298 my ( $rtok, $rmap, $rtype ) =
27299 pre_tokenize( $line, 2 ); # only need 2 pre-tokens
27300 my $j = $max_token_index + 1;
27303 foreach $tok (@$rtok) {
27304 last if ( $tok =~ "\n" );
27305 $$rtokens[ ++$j ] = $tok;
27312 #########i#############################################################
27313 # Tokenizer guessing routines for ambiguous situations
27314 #######################################################################
27316 sub guess_if_pattern_or_conditional {
27318 # this routine is called when we have encountered a ? following an
27319 # unknown bareword, and we must decide if it starts a pattern or not
27320 # input parameters:
27321 # $i - token index of the ? starting possible pattern
27322 # output parameters:
27323 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
27324 # msg = a warning or diagnostic message
27325 # USES GLOBAL VARIABLES: $last_nonblank_token
27326 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27327 my $is_pattern = 0;
27328 my $msg = "guessing that ? after $last_nonblank_token starts a ";
27330 if ( $i >= $max_token_index ) {
27331 $msg .= "conditional (no end to pattern found on the line)\n";
27336 my $next_token = $$rtokens[$i]; # first token after ?
27338 # look for a possible ending ? on this line..
27340 my $quote_depth = 0;
27341 my $quote_character = '';
27345 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27348 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27349 $quote_pos, $quote_depth, $max_token_index );
27353 # we didn't find an ending ? on this line,
27354 # so we bias towards conditional
27356 $msg .= "conditional (no ending ? on this line)\n";
27358 # we found an ending ?, so we bias towards a pattern
27362 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
27364 $msg .= "pattern (found ending ? and pattern expected)\n";
27367 $msg .= "pattern (uncertain, but found ending ?)\n";
27371 return ( $is_pattern, $msg );
27374 sub guess_if_pattern_or_division {
27376 # this routine is called when we have encountered a / following an
27377 # unknown bareword, and we must decide if it starts a pattern or is a
27379 # input parameters:
27380 # $i - token index of the / starting possible pattern
27381 # output parameters:
27382 # $is_pattern = 0 if probably division, =1 if probably a pattern
27383 # msg = a warning or diagnostic message
27384 # USES GLOBAL VARIABLES: $last_nonblank_token
27385 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27386 my $is_pattern = 0;
27387 my $msg = "guessing that / after $last_nonblank_token starts a ";
27389 if ( $i >= $max_token_index ) {
27390 $msg .= "division (no end to pattern found on the line)\n";
27394 my $divide_expected =
27395 numerator_expected( $i, $rtokens, $max_token_index );
27397 my $next_token = $$rtokens[$i]; # first token after slash
27399 # look for a possible ending / on this line..
27401 my $quote_depth = 0;
27402 my $quote_character = '';
27406 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27409 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27410 $quote_pos, $quote_depth, $max_token_index );
27414 # we didn't find an ending / on this line,
27415 # so we bias towards division
27416 if ( $divide_expected >= 0 ) {
27418 $msg .= "division (no ending / on this line)\n";
27421 $msg = "multi-line pattern (division not possible)\n";
27427 # we found an ending /, so we bias towards a pattern
27430 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
27432 if ( $divide_expected >= 0 ) {
27434 if ( $i - $ibeg > 60 ) {
27435 $msg .= "division (matching / too distant)\n";
27439 $msg .= "pattern (but division possible too)\n";
27445 $msg .= "pattern (division not possible)\n";
27450 if ( $divide_expected >= 0 ) {
27452 $msg .= "division (pattern not possible)\n";
27457 "pattern (uncertain, but division would not work here)\n";
27462 return ( $is_pattern, $msg );
27465 # try to resolve here-doc vs. shift by looking ahead for
27466 # non-code or the end token (currently only looks for end token)
27467 # returns 1 if it is probably a here doc, 0 if not
27468 sub guess_if_here_doc {
27470 # This is how many lines we will search for a target as part of the
27471 # guessing strategy. It is a constant because there is probably
27472 # little reason to change it.
27473 # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
27475 use constant HERE_DOC_WINDOW => 40;
27477 my $next_token = shift;
27478 my $here_doc_expected = 0;
27481 my $msg = "checking <<";
27483 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
27487 if ( $line =~ /^$next_token$/ ) {
27488 $msg .= " -- found target $next_token ahead $k lines\n";
27489 $here_doc_expected = 1; # got it
27492 last if ( $k >= HERE_DOC_WINDOW );
27495 unless ($here_doc_expected) {
27497 if ( !defined($line) ) {
27498 $here_doc_expected = -1; # hit eof without seeing target
27499 $msg .= " -- must be shift; target $next_token not in file\n";
27502 else { # still unsure..taking a wild guess
27504 if ( !$is_constant{$current_package}{$next_token} ) {
27505 $here_doc_expected = 1;
27507 " -- guessing it's a here-doc ($next_token not a constant)\n";
27511 " -- guessing it's a shift ($next_token is a constant)\n";
27515 write_logfile_entry($msg);
27516 return $here_doc_expected;
27519 #########i#############################################################
27520 # Tokenizer Routines for scanning identifiers and related items
27521 #######################################################################
27523 sub scan_bare_identifier_do {
27525 # this routine is called to scan a token starting with an alphanumeric
27526 # variable or package separator, :: or '.
27527 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
27528 # $last_nonblank_type,@paren_type, $paren_depth
27530 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
27534 my $package = undef;
27538 # we have to back up one pretoken at a :: since each : is one pretoken
27539 if ( $tok eq '::' ) { $i_beg-- }
27540 if ( $tok eq '->' ) { $i_beg-- }
27541 my $pos_beg = $$rtoken_map[$i_beg];
27542 pos($input_line) = $pos_beg;
27549 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
27551 my $pos = pos($input_line);
27552 my $numc = $pos - $pos_beg;
27553 $tok = substr( $input_line, $pos_beg, $numc );
27555 # type 'w' includes anything without leading type info
27556 # ($,%,@,*) including something like abc::def::ghi
27560 if ( defined($2) ) { $sub_name = $2; }
27561 if ( defined($1) ) {
27564 # patch: don't allow isolated package name which just ends
27565 # in the old style package separator (single quote). Example:
27567 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
27571 $package =~ s/\'/::/g;
27572 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
27573 $package =~ s/::$//;
27576 $package = $current_package;
27578 if ( $is_keyword{$tok} ) {
27583 # if it is a bareword..
27584 if ( $type eq 'w' ) {
27586 # check for v-string with leading 'v' type character
27587 # (This seems to have precedence over filehandle, type 'Y')
27588 if ( $tok =~ /^v\d[_\d]*$/ ) {
27590 # we only have the first part - something like 'v101' -
27592 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
27593 $pos = pos($input_line);
27594 $numc = $pos - $pos_beg;
27595 $tok = substr( $input_line, $pos_beg, $numc );
27599 # warn if this version can't handle v-strings
27600 report_v_string($tok);
27603 elsif ( $is_constant{$package}{$sub_name} ) {
27607 # bareword after sort has implied empty prototype; for example:
27608 # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
27609 # This has priority over whatever the user has specified.
27610 elsif ($last_nonblank_token eq 'sort'
27611 && $last_nonblank_type eq 'k' )
27616 # Note: strangely, perl does not seem to really let you create
27617 # functions which act like eval and do, in the sense that eval
27618 # and do may have operators following the final }, but any operators
27619 # that you create with prototype (&) apparently do not allow
27620 # trailing operators, only terms. This seems strange.
27621 # If this ever changes, here is the update
27622 # to make perltidy behave accordingly:
27624 # elsif ( $is_block_function{$package}{$tok} ) {
27625 # $tok='eval'; # patch to do braces like eval - doesn't work
27628 # FIXME: This could become a separate type to allow for different
27630 elsif ( $is_block_function{$package}{$sub_name} ) {
27634 elsif ( $is_block_list_function{$package}{$sub_name} ) {
27637 elsif ( $is_user_function{$package}{$sub_name} ) {
27639 $prototype = $user_function_prototype{$package}{$sub_name};
27642 # check for indirect object
27645 # added 2001-03-27: must not be followed immediately by '('
27647 ( $input_line !~ m/\G\(/gc )
27652 # preceded by keyword like 'print', 'printf' and friends
27653 $is_indirect_object_taker{$last_nonblank_token}
27655 # or preceded by something like 'print(' or 'printf('
27657 ( $last_nonblank_token eq '(' )
27658 && $is_indirect_object_taker{ $paren_type[$paren_depth]
27666 # may not be indirect object unless followed by a space
27667 if ( $input_line =~ m/\G\s+/gc ) {
27671 # Perl's indirect object notation is a very bad
27672 # thing and can cause subtle bugs, especially for
27673 # beginning programmers. And I haven't even been
27674 # able to figure out a sane warning scheme which
27675 # doesn't get in the way of good scripts.
27677 # Complain if a filehandle has any lower case
27678 # letters. This is suggested good practice.
27679 # Use 'sub_name' because something like
27680 # main::MYHANDLE is ok for filehandle
27681 if ( $sub_name =~ /[a-z]/ ) {
27683 # could be bug caused by older perltidy if
27685 if ( $input_line =~ m/\G\s*\(/gc ) {
27687 "Caution: unknown word '$tok' in indirect object slot\n"
27693 # bareword not followed by a space -- may not be filehandle
27694 # (may be function call defined in a 'use' statement)
27701 # Now we must convert back from character position
27702 # to pre_token index.
27703 # I don't think an error flag can occur here ..but who knows
27706 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27708 warning("scan_bare_identifier: Possibly invalid tokenization\n");
27712 # no match but line not blank - could be syntax error
27713 # perl will take '::' alone without complaint
27717 # change this warning to log message if it becomes annoying
27718 warning("didn't find identifier after leading ::\n");
27720 return ( $i, $tok, $type, $prototype );
27725 # This is the new scanner and will eventually replace scan_identifier.
27726 # Only type 'sub' and 'package' are implemented.
27727 # Token types $ * % @ & -> are not yet implemented.
27729 # Scan identifier following a type token.
27730 # The type of call depends on $id_scan_state: $id_scan_state = ''
27731 # for starting call, in which case $tok must be the token defining
27734 # If the type token is the last nonblank token on the line, a value
27735 # of $id_scan_state = $tok is returned, indicating that further
27736 # calls must be made to get the identifier. If the type token is
27737 # not the last nonblank token on the line, the identifier is
27738 # scanned and handled and a value of '' is returned.
27739 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
27740 # $statement_type, $tokenizer_self
27742 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
27746 my ( $i_beg, $pos_beg );
27748 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
27749 #my ($a,$b,$c) = caller;
27750 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
27752 # on re-entry, start scanning at first token on the line
27753 if ($id_scan_state) {
27758 # on initial entry, start scanning just after type token
27761 $id_scan_state = $tok;
27765 # find $i_beg = index of next nonblank token,
27766 # and handle empty lines
27767 my $blank_line = 0;
27768 my $next_nonblank_token = $$rtokens[$i_beg];
27769 if ( $i_beg > $max_token_index ) {
27774 # only a '#' immediately after a '$' is not a comment
27775 if ( $next_nonblank_token eq '#' ) {
27776 unless ( $tok eq '$' ) {
27781 if ( $next_nonblank_token =~ /^\s/ ) {
27782 ( $next_nonblank_token, $i_beg ) =
27783 find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
27784 $max_token_index );
27785 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
27791 # handle non-blank line; identifier, if any, must follow
27792 unless ($blank_line) {
27794 if ( $id_scan_state eq 'sub' ) {
27795 ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
27796 $input_line, $i, $i_beg,
27797 $tok, $type, $rtokens,
27798 $rtoken_map, $id_scan_state, $max_token_index
27802 elsif ( $id_scan_state eq 'package' ) {
27803 ( $i, $tok, $type ) =
27804 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
27805 $rtoken_map, $max_token_index );
27806 $id_scan_state = '';
27810 warning("invalid token in scan_id: $tok\n");
27811 $id_scan_state = '';
27815 if ( $id_scan_state && ( !defined($type) || !$type ) ) {
27817 # shouldn't happen:
27819 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
27821 report_definite_bug();
27824 TOKENIZER_DEBUG_FLAG_NSCAN && do {
27826 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
27828 return ( $i, $tok, $type, $id_scan_state );
27831 sub check_prototype {
27832 my ( $proto, $package, $subname ) = @_;
27833 return unless ( defined($package) && defined($subname) );
27834 if ( defined($proto) ) {
27835 $proto =~ s/^\s*\(\s*//;
27836 $proto =~ s/\s*\)$//;
27838 $is_user_function{$package}{$subname} = 1;
27839 $user_function_prototype{$package}{$subname} = "($proto)";
27841 # prototypes containing '&' must be treated specially..
27842 if ( $proto =~ /\&/ ) {
27844 # right curly braces of prototypes ending in
27845 # '&' may be followed by an operator
27846 if ( $proto =~ /\&$/ ) {
27847 $is_block_function{$package}{$subname} = 1;
27850 # right curly braces of prototypes NOT ending in
27851 # '&' may NOT be followed by an operator
27852 elsif ( $proto !~ /\&$/ ) {
27853 $is_block_list_function{$package}{$subname} = 1;
27858 $is_constant{$package}{$subname} = 1;
27862 $is_user_function{$package}{$subname} = 1;
27866 sub do_scan_package {
27868 # do_scan_package parses a package name
27869 # it is called with $i_beg equal to the index of the first nonblank
27870 # token following a 'package' token.
27871 # USES GLOBAL VARIABLES: $current_package,
27873 # package NAMESPACE
27874 # package NAMESPACE VERSION
27875 # package NAMESPACE BLOCK
27876 # package NAMESPACE VERSION BLOCK
27878 # If VERSION is provided, package sets the $VERSION variable in the given
27879 # namespace to a version object with the VERSION provided. VERSION must be
27880 # a "strict" style version number as defined by the version module: a
27881 # positive decimal number (integer or decimal-fraction) without
27882 # exponentiation or else a dotted-decimal v-string with a leading 'v'
27883 # character and at least three components.
27884 # reference http://perldoc.perl.org/functions/package.html
27886 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
27889 my $package = undef;
27890 my $pos_beg = $$rtoken_map[$i_beg];
27891 pos($input_line) = $pos_beg;
27893 # handle non-blank line; package name, if any, must follow
27894 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
27896 $package = ( defined($1) && $1 ) ? $1 : 'main';
27897 $package =~ s/\'/::/g;
27898 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
27899 $package =~ s/::$//;
27900 my $pos = pos($input_line);
27901 my $numc = $pos - $pos_beg;
27902 $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
27905 # Now we must convert back from character position
27906 # to pre_token index.
27907 # I don't think an error flag can occur here ..but ?
27910 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27911 if ($error) { warning("Possibly invalid package\n") }
27912 $current_package = $package;
27914 # we should now have package NAMESPACE
27915 # now expecting VERSION, BLOCK, or ; to follow ...
27916 # package NAMESPACE VERSION
27917 # package NAMESPACE BLOCK
27918 # package NAMESPACE VERSION BLOCK
27919 my ( $next_nonblank_token, $i_next ) =
27920 find_next_nonblank_token( $i, $rtokens, $max_token_index );
27922 # check that something recognizable follows, but do not parse.
27923 # A VERSION number will be parsed later as a number or v-string in the
27924 # normal way. What is important is to set the statement type if
27925 # everything looks okay so that the operator_expected() routine
27926 # knows that the number is in a package statement.
27927 # Examples of valid primitive tokens that might follow are:
27928 # 1235 . ; { } v3 v
27929 if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) {
27930 $statement_type = $tok;
27934 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
27939 # no match but line not blank --
27940 # could be a label with name package, like package: , for example.
27945 return ( $i, $tok, $type );
27948 sub scan_identifier_do {
27950 # This routine assembles tokens into identifiers. It maintains a
27951 # scan state, id_scan_state. It updates id_scan_state based upon
27952 # current id_scan_state and token, and returns an updated
27953 # id_scan_state and the next index after the identifier.
27954 # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
27955 # $last_nonblank_type
27957 my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
27962 my $tok_begin = $$rtokens[$i_begin];
27963 if ( $tok_begin eq ':' ) { $tok_begin = '::' }
27964 my $id_scan_state_begin = $id_scan_state;
27965 my $identifier_begin = $identifier;
27966 my $tok = $tok_begin;
27969 # these flags will be used to help figure out the type:
27970 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
27973 # allow old package separator (') except in 'use' statement
27974 my $allow_tick = ( $last_nonblank_token ne 'use' );
27976 # get started by defining a type and a state if necessary
27977 unless ($id_scan_state) {
27978 $context = UNKNOWN_CONTEXT;
27980 # fixup for digraph
27981 if ( $tok eq '>' ) {
27985 $identifier = $tok;
27987 if ( $tok eq '$' || $tok eq '*' ) {
27988 $id_scan_state = '$';
27989 $context = SCALAR_CONTEXT;
27991 elsif ( $tok eq '%' || $tok eq '@' ) {
27992 $id_scan_state = '$';
27993 $context = LIST_CONTEXT;
27995 elsif ( $tok eq '&' ) {
27996 $id_scan_state = '&';
27998 elsif ( $tok eq 'sub' or $tok eq 'package' ) {
27999 $saw_alpha = 0; # 'sub' is considered type info here
28000 $id_scan_state = '$';
28001 $identifier .= ' '; # need a space to separate sub from sub name
28003 elsif ( $tok eq '::' ) {
28004 $id_scan_state = 'A';
28006 elsif ( $tok =~ /^[A-Za-z_]/ ) {
28007 $id_scan_state = ':';
28009 elsif ( $tok eq '->' ) {
28010 $id_scan_state = '$';
28015 my ( $a, $b, $c ) = caller;
28016 warning("Program Bug: scan_identifier given bad token = $tok \n");
28017 warning(" called from sub $a line: $c\n");
28018 report_definite_bug();
28020 $saw_type = !$saw_alpha;
28024 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
28027 # now loop to gather the identifier
28030 while ( $i < $max_token_index ) {
28031 $i_save = $i unless ( $tok =~ /^\s*$/ );
28032 $tok = $$rtokens[ ++$i ];
28034 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
28039 if ( $id_scan_state eq '$' ) { # starting variable name
28041 if ( $tok eq '$' ) {
28043 $identifier .= $tok;
28045 # we've got a punctuation variable if end of line (punct.t)
28046 if ( $i == $max_token_index ) {
28048 $id_scan_state = '';
28052 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
28054 $id_scan_state = ':'; # now need ::
28055 $identifier .= $tok;
28057 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
28059 $id_scan_state = ':'; # now need ::
28060 $identifier .= $tok;
28062 # Perl will accept leading digits in identifiers,
28063 # although they may not always produce useful results.
28064 # Something like $main::0 is ok. But this also works:
28066 # sub howdy::123::bubba{ print "bubba $54321!\n" }
28067 # howdy::123::bubba();
28070 elsif ( $tok =~ /^[0-9]/ ) { # numeric
28072 $id_scan_state = ':'; # now need ::
28073 $identifier .= $tok;
28075 elsif ( $tok eq '::' ) {
28076 $id_scan_state = 'A';
28077 $identifier .= $tok;
28079 elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array
28080 $identifier .= $tok; # keep same state, a $ could follow
28082 elsif ( $tok eq '{' ) {
28084 # check for something like ${#} or ${©}
28085 ##if ( $identifier eq '$'
28089 || $identifier eq '@'
28090 || $identifier eq '$#'
28092 && $i + 2 <= $max_token_index
28093 && $$rtokens[ $i + 2 ] eq '}'
28094 && $$rtokens[ $i + 1 ] !~ /[\s\w]/
28097 my $next2 = $$rtokens[ $i + 2 ];
28098 my $next1 = $$rtokens[ $i + 1 ];
28099 $identifier .= $tok . $next1 . $next2;
28101 $id_scan_state = '';
28105 # skip something like ${xxx} or ->{
28106 $id_scan_state = '';
28108 # if this is the first token of a line, any tokens for this
28109 # identifier have already been accumulated
28110 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
28115 # space ok after leading $ % * & @
28116 elsif ( $tok =~ /^\s*$/ ) {
28118 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
28120 if ( length($identifier) > 1 ) {
28121 $id_scan_state = '';
28123 $type = 'i'; # probably punctuation variable
28128 # spaces after $'s are common, and space after @
28129 # is harmless, so only complain about space
28130 # after other type characters. Space after $ and
28131 # @ will be removed in formatting. Report space
28132 # after % and * because they might indicate a
28133 # parsing error. In other words '% ' might be a
28134 # modulo operator. Delete this warning if it
28136 if ( $identifier !~ /^[\@\$]$/ ) {
28138 "Space in identifier, following $identifier\n";
28144 # space after '->' is ok
28146 elsif ( $tok eq '^' ) {
28148 # check for some special variables like $^W
28149 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
28150 $identifier .= $tok;
28151 $id_scan_state = 'A';
28153 # Perl accepts '$^]' or '@^]', but
28154 # there must not be a space before the ']'.
28155 my $next1 = $$rtokens[ $i + 1 ];
28156 if ( $next1 eq ']' ) {
28158 $identifier .= $next1;
28159 $id_scan_state = "";
28164 $id_scan_state = '';
28167 else { # something else
28169 # check for various punctuation variables
28170 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
28171 $identifier .= $tok;
28174 elsif ( $identifier eq '$#' ) {
28176 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
28178 # perl seems to allow just these: $#: $#- $#+
28179 elsif ( $tok =~ /^[\:\-\+]$/ ) {
28181 $identifier .= $tok;
28185 write_logfile_entry( 'Use of $# is deprecated' . "\n" );
28188 elsif ( $identifier eq '$$' ) {
28190 # perl does not allow references to punctuation
28191 # variables without braces. For example, this
28195 # You would have to use
28199 if ( $tok eq '{' ) { $type = 't' }
28200 else { $type = 'i' }
28202 elsif ( $identifier eq '->' ) {
28207 if ( length($identifier) == 1 ) { $identifier = ''; }
28209 $id_scan_state = '';
28213 elsif ( $id_scan_state eq '&' ) { # starting sub call?
28215 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric ..
28216 $id_scan_state = ':'; # now need ::
28218 $identifier .= $tok;
28220 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
28221 $id_scan_state = ':'; # now need ::
28223 $identifier .= $tok;
28225 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28226 $id_scan_state = ':'; # now need ::
28228 $identifier .= $tok;
28230 elsif ( $tok =~ /^\s*$/ ) { # allow space
28232 elsif ( $tok eq '::' ) { # leading ::
28233 $id_scan_state = 'A'; # accept alpha next
28234 $identifier .= $tok;
28236 elsif ( $tok eq '{' ) {
28237 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
28239 $id_scan_state = '';
28244 # punctuation variable?
28245 # testfile: cunningham4.pl
28247 # We have to be careful here. If we are in an unknown state,
28248 # we will reject the punctuation variable. In the following
28249 # example the '&' is a binary operator but we are in an unknown
28250 # state because there is no sigil on 'Prima', so we don't
28251 # know what it is. But it is a bad guess that
28252 # '&~' is a function variable.
28253 # $self->{text}->{colorMap}->[
28254 # Prima::PodView::COLOR_CODE_FOREGROUND
28255 # & ~tb::COLOR_INDEX ] =
28256 # $sec->{ColorCode}
28257 if ( $identifier eq '&' && $expecting ) {
28258 $identifier .= $tok;
28265 $id_scan_state = '';
28269 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)
28271 if ( $tok =~ /^[A-Za-z_]/ ) { # found it
28272 $identifier .= $tok;
28273 $id_scan_state = ':'; # now need ::
28276 elsif ( $tok eq "'" && $allow_tick ) {
28277 $identifier .= $tok;
28278 $id_scan_state = ':'; # now need ::
28281 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28282 $identifier .= $tok;
28283 $id_scan_state = ':'; # now need ::
28286 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
28287 $id_scan_state = '(';
28288 $identifier .= $tok;
28290 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
28291 $id_scan_state = ')';
28292 $identifier .= $tok;
28295 $id_scan_state = '';
28300 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha
28302 if ( $tok eq '::' ) { # got it
28303 $identifier .= $tok;
28304 $id_scan_state = 'A'; # now require alpha
28306 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here
28307 $identifier .= $tok;
28308 $id_scan_state = ':'; # now need ::
28311 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28312 $identifier .= $tok;
28313 $id_scan_state = ':'; # now need ::
28316 elsif ( $tok eq "'" && $allow_tick ) { # tick
28318 if ( $is_keyword{$identifier} ) {
28319 $id_scan_state = ''; # that's all
28323 $identifier .= $tok;
28326 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
28327 $id_scan_state = '(';
28328 $identifier .= $tok;
28330 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
28331 $id_scan_state = ')';
28332 $identifier .= $tok;
28335 $id_scan_state = ''; # that's all
28340 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype
28342 if ( $tok eq '(' ) { # got it
28343 $identifier .= $tok;
28344 $id_scan_state = ')'; # now find the end of it
28346 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
28347 $identifier .= $tok;
28350 $id_scan_state = ''; # that's all - no prototype
28355 elsif ( $id_scan_state eq ')' ) { # looking for ) to end
28357 if ( $tok eq ')' ) { # got it
28358 $identifier .= $tok;
28359 $id_scan_state = ''; # all done
28362 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
28363 $identifier .= $tok;
28365 else { # probable error in script, but keep going
28366 warning("Unexpected '$tok' while seeking end of prototype\n");
28367 $identifier .= $tok;
28370 else { # can get here due to error in initialization
28371 $id_scan_state = '';
28377 if ( $id_scan_state eq ')' ) {
28378 warning("Hit end of line while seeking ) to end prototype\n");
28381 # once we enter the actual identifier, it may not extend beyond
28382 # the end of the current line
28383 if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
28384 $id_scan_state = '';
28386 if ( $i < 0 ) { $i = 0 }
28393 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
28396 else { $type = 'i' }
28398 elsif ( $identifier eq '->' ) {
28402 ( length($identifier) > 1 )
28404 # In something like '@$=' we have an identifier '@$'
28405 # In something like '$${' we have type '$$' (and only
28406 # part of an identifier)
28407 && !( $identifier =~ /\$$/ && $tok eq '{' )
28408 && ( $identifier !~ /^(sub |package )$/ )
28413 else { $type = 't' }
28415 elsif ($saw_alpha) {
28417 # type 'w' includes anything without leading type info
28418 # ($,%,@,*) including something like abc::def::ghi
28423 } # this can happen on a restart
28427 $tok = $identifier;
28428 if ($message) { write_logfile_entry($message) }
28435 TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
28436 my ( $a, $b, $c ) = caller;
28438 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
28440 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
28442 return ( $i, $tok, $type, $id_scan_state, $identifier );
28447 # saved package and subnames in case prototype is on separate line
28448 my ( $package_saved, $subname_saved );
28452 # do_scan_sub parses a sub name and prototype
28453 # it is called with $i_beg equal to the index of the first nonblank
28454 # token following a 'sub' token.
28456 # TODO: add future error checks to be sure we have a valid
28457 # sub name. For example, 'sub &doit' is wrong. Also, be sure
28458 # a name is given if and only if a non-anonymous sub is
28460 # USES GLOBAL VARS: $current_package, $last_nonblank_token,
28461 # $in_attribute_list, %saw_function_definition,
28465 $input_line, $i, $i_beg,
28466 $tok, $type, $rtokens,
28467 $rtoken_map, $id_scan_state, $max_token_index
28469 $id_scan_state = ""; # normally we get everything in one call
28470 my $subname = undef;
28471 my $package = undef;
28476 my $pos_beg = $$rtoken_map[$i_beg];
28477 pos($input_line) = $pos_beg;
28479 # sub NAME PROTO ATTRS
28481 $input_line =~ m/\G\s*
28482 ((?:\w*(?:'|::))*) # package - something that ends in :: or '
28483 (\w+) # NAME - required
28484 (\s*\([^){]*\))? # PROTO - something in parens
28485 (\s*:)? # ATTRS - leading : of attribute list
28494 $package = ( defined($1) && $1 ) ? $1 : $current_package;
28495 $package =~ s/\'/::/g;
28496 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
28497 $package =~ s/::$//;
28498 my $pos = pos($input_line);
28499 my $numc = $pos - $pos_beg;
28500 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
28504 # Look for prototype/attributes not preceded on this line by subname;
28505 # This might be an anonymous sub with attributes,
28506 # or a prototype on a separate line from its sub name
28508 $input_line =~ m/\G(\s*\([^){]*\))? # PROTO
28509 (\s*:)? # ATTRS leading ':'
28518 # Handle prototype on separate line from subname
28519 if ($subname_saved) {
28520 $package = $package_saved;
28521 $subname = $subname_saved;
28522 $tok = $last_nonblank_token;
28529 # ATTRS: if there are attributes, back up and let the ':' be
28530 # found later by the scanner.
28531 my $pos = pos($input_line);
28533 $pos -= length($attrs);
28536 my $next_nonblank_token = $tok;
28538 # catch case of line with leading ATTR ':' after anonymous sub
28539 if ( $pos == $pos_beg && $tok eq ':' ) {
28541 $in_attribute_list = 1;
28544 # We must convert back from character position
28545 # to pre_token index.
28548 # I don't think an error flag can occur here ..but ?
28550 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
28551 $max_token_index );
28552 if ($error) { warning("Possibly invalid sub\n") }
28554 # check for multiple definitions of a sub
28555 ( $next_nonblank_token, my $i_next ) =
28556 find_next_nonblank_token_on_this_line( $i, $rtokens,
28557 $max_token_index );
28560 if ( $next_nonblank_token =~ /^(\s*|#)$/ )
28561 { # skip blank or side comment
28562 my ( $rpre_tokens, $rpre_types ) =
28563 peek_ahead_for_n_nonblank_pre_tokens(1);
28564 if ( defined($rpre_tokens) && @$rpre_tokens ) {
28565 $next_nonblank_token = $rpre_tokens->[0];
28568 $next_nonblank_token = '}';
28571 $package_saved = "";
28572 $subname_saved = "";
28573 if ( $next_nonblank_token eq '{' ) {
28576 # Check for multiple definitions of a sub, but
28577 # it is ok to have multiple sub BEGIN, etc,
28578 # so we do not complain if name is all caps
28579 if ( $saw_function_definition{$package}{$subname}
28580 && $subname !~ /^[A-Z]+$/ )
28582 my $lno = $saw_function_definition{$package}{$subname};
28584 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
28587 $saw_function_definition{$package}{$subname} =
28588 $tokenizer_self->{_last_line_number};
28591 elsif ( $next_nonblank_token eq ';' ) {
28593 elsif ( $next_nonblank_token eq '}' ) {
28596 # ATTRS - if an attribute list follows, remember the name
28597 # of the sub so the next opening brace can be labeled.
28598 # Setting 'statement_type' causes any ':'s to introduce
28600 elsif ( $next_nonblank_token eq ':' ) {
28601 $statement_type = $tok;
28604 # see if PROTO follows on another line:
28605 elsif ( $next_nonblank_token eq '(' ) {
28606 if ( $attrs || $proto ) {
28608 "unexpected '(' after definition or declaration of sub '$subname'\n"
28612 $id_scan_state = 'sub'; # we must come back to get proto
28613 $statement_type = $tok;
28614 $package_saved = $package;
28615 $subname_saved = $subname;
28618 elsif ($next_nonblank_token) { # EOF technically ok
28620 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
28623 check_prototype( $proto, $package, $subname );
28626 # no match but line not blank
28629 return ( $i, $tok, $type, $id_scan_state );
28633 #########i###############################################################
28634 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
28635 #########################################################################
28637 sub find_next_nonblank_token {
28638 my ( $i, $rtokens, $max_token_index ) = @_;
28640 if ( $i >= $max_token_index ) {
28641 if ( !peeked_ahead() ) {
28644 peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
28647 my $next_nonblank_token = $$rtokens[ ++$i ];
28649 if ( $next_nonblank_token =~ /^\s*$/ ) {
28650 $next_nonblank_token = $$rtokens[ ++$i ];
28652 return ( $next_nonblank_token, $i );
28655 sub numerator_expected {
28657 # this is a filter for a possible numerator, in support of guessing
28658 # for the / pattern delimiter token.
28663 # Note: I am using the convention that variables ending in
28664 # _expected have these 3 possible values.
28665 my ( $i, $rtokens, $max_token_index ) = @_;
28666 my $next_token = $$rtokens[ $i + 1 ];
28667 if ( $next_token eq '=' ) { $i++; } # handle /=
28668 my ( $next_nonblank_token, $i_next ) =
28669 find_next_nonblank_token( $i, $rtokens, $max_token_index );
28671 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
28676 if ( $next_nonblank_token =~ /^\s*$/ ) {
28685 sub pattern_expected {
28687 # This is the start of a filter for a possible pattern.
28688 # It looks at the token after a possible pattern and tries to
28689 # determine if that token could end a pattern.
28694 my ( $i, $rtokens, $max_token_index ) = @_;
28695 my $next_token = $$rtokens[ $i + 1 ];
28696 if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier
28697 my ( $next_nonblank_token, $i_next ) =
28698 find_next_nonblank_token( $i, $rtokens, $max_token_index );
28700 # list of tokens which may follow a pattern
28701 # (can probably be expanded)
28702 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
28708 if ( $next_nonblank_token =~ /^\s*$/ ) {
28717 sub find_next_nonblank_token_on_this_line {
28718 my ( $i, $rtokens, $max_token_index ) = @_;
28719 my $next_nonblank_token;
28721 if ( $i < $max_token_index ) {
28722 $next_nonblank_token = $$rtokens[ ++$i ];
28724 if ( $next_nonblank_token =~ /^\s*$/ ) {
28726 if ( $i < $max_token_index ) {
28727 $next_nonblank_token = $$rtokens[ ++$i ];
28732 $next_nonblank_token = "";
28734 return ( $next_nonblank_token, $i );
28737 sub find_angle_operator_termination {
28739 # We are looking at a '<' and want to know if it is an angle operator.
28740 # We are to return:
28741 # $i = pretoken index of ending '>' if found, current $i otherwise
28742 # $type = 'Q' if found, '>' otherwise
28743 my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
28746 pos($input_line) = 1 + $$rtoken_map[$i];
28750 # we just have to find the next '>' if a term is expected
28751 if ( $expecting == TERM ) { $filter = '[\>]' }
28753 # we have to guess if we don't know what is expected
28754 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
28756 # shouldn't happen - we shouldn't be here if operator is expected
28757 else { warning("Program Bug in find_angle_operator_termination\n") }
28759 # To illustrate what we might be looking at, in case we are
28760 # guessing, here are some examples of valid angle operators
28767 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
28768 # <${PREFIX}*img*.$IMAGE_TYPE>
28769 # <img*.$IMAGE_TYPE>
28770 # <Timg*.$IMAGE_TYPE>
28771 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
28773 # Here are some examples of lines which do not have angle operators:
28774 # return undef unless $self->[2]++ < $#{$self->[1]};
28777 # the following line from dlister.pl caused trouble:
28778 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
28780 # If the '<' starts an angle operator, it must end on this line and
28781 # it must not have certain characters like ';' and '=' in it. I use
28782 # this to limit the testing. This filter should be improved if
28785 if ( $input_line =~ /($filter)/g ) {
28789 # We MAY have found an angle operator termination if we get
28790 # here, but we need to do more to be sure we haven't been
28792 my $pos = pos($input_line);
28794 my $pos_beg = $$rtoken_map[$i];
28795 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
28797 # Reject if the closing '>' follows a '-' as in:
28798 # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
28799 if ( $expecting eq UNKNOWN ) {
28800 my $check = substr( $input_line, $pos - 2, 1 );
28801 if ( $check eq '-' ) {
28802 return ( $i, $type );
28806 ######################################debug#####
28807 #write_diagnostics( "ANGLE? :$str\n");
28808 #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
28809 ######################################debug#####
28813 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
28815 # It may be possible that a quote ends midway in a pretoken.
28816 # If this happens, it may be necessary to split the pretoken.
28819 "Possible tokinization error..please check this line\n");
28820 report_possible_bug();
28823 # Now let's see where we stand....
28824 # OK if math op not possible
28825 if ( $expecting == TERM ) {
28828 # OK if there are no more than 2 pre-tokens inside
28829 # (not possible to write 2 token math between < and >)
28830 # This catches most common cases
28831 elsif ( $i <= $i_beg + 3 ) {
28832 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
28838 # Let's try a Brace Test: any braces inside must balance
28840 while ( $str =~ /\{/g ) { $br++ }
28841 while ( $str =~ /\}/g ) { $br-- }
28843 while ( $str =~ /\[/g ) { $sb++ }
28844 while ( $str =~ /\]/g ) { $sb-- }
28846 while ( $str =~ /\(/g ) { $pr++ }
28847 while ( $str =~ /\)/g ) { $pr-- }
28849 # if braces do not balance - not angle operator
28850 if ( $br || $sb || $pr ) {
28854 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
28857 # we should keep doing more checks here...to be continued
28858 # Tentatively accepting this as a valid angle operator.
28859 # There are lots more things that can be checked.
28862 "ANGLE-Guessing yes: $str expecting=$expecting\n");
28863 write_logfile_entry("Guessing angle operator here: $str\n");
28868 # didn't find ending >
28870 if ( $expecting == TERM ) {
28871 warning("No ending > for angle operator\n");
28875 return ( $i, $type );
28878 sub scan_number_do {
28880 # scan a number in any of the formats that Perl accepts
28881 # Underbars (_) are allowed in decimal numbers.
28882 # input parameters -
28883 # $input_line - the string to scan
28884 # $i - pre_token index to start scanning
28885 # $rtoken_map - reference to the pre_token map giving starting
28886 # character position in $input_line of token $i
28887 # output parameters -
28888 # $i - last pre_token index of the number just scanned
28889 # number - the number (characters); or undef if not a number
28891 my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
28892 my $pos_beg = $$rtoken_map[$i];
28895 my $number = undef;
28896 my $type = $input_type;
28898 my $first_char = substr( $input_line, $pos_beg, 1 );
28900 # Look for bad starting characters; Shouldn't happen..
28901 if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
28902 warning("Program bug - scan_number given character $first_char\n");
28903 report_definite_bug();
28904 return ( $i, $type, $number );
28907 # handle v-string without leading 'v' character ('Two Dot' rule)
28909 # TODO: v-strings may contain underscores
28910 pos($input_line) = $pos_beg;
28911 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
28912 $pos = pos($input_line);
28913 my $numc = $pos - $pos_beg;
28914 $number = substr( $input_line, $pos_beg, $numc );
28916 report_v_string($number);
28919 # handle octal, hex, binary
28920 if ( !defined($number) ) {
28921 pos($input_line) = $pos_beg;
28922 if ( $input_line =~
28923 /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
28925 $pos = pos($input_line);
28926 my $numc = $pos - $pos_beg;
28927 $number = substr( $input_line, $pos_beg, $numc );
28933 if ( !defined($number) ) {
28934 pos($input_line) = $pos_beg;
28936 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
28937 $pos = pos($input_line);
28939 # watch out for things like 0..40 which would give 0. by this;
28940 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
28941 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
28945 my $numc = $pos - $pos_beg;
28946 $number = substr( $input_line, $pos_beg, $numc );
28951 # filter out non-numbers like e + - . e2 .e3 +e6
28952 # the rule: at least one digit, and any 'e' must be preceded by a digit
28954 $number !~ /\d/ # no digits
28955 || ( $number =~ /^(.*)[eE]/
28956 && $1 !~ /\d/ ) # or no digits before the 'e'
28960 $type = $input_type;
28961 return ( $i, $type, $number );
28964 # Found a number; now we must convert back from character position
28965 # to pre_token index. An error here implies user syntax error.
28966 # An example would be an invalid octal number like '009'.
28969 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
28970 if ($error) { warning("Possibly invalid number\n") }
28972 return ( $i, $type, $number );
28975 sub inverse_pretoken_map {
28977 # Starting with the current pre_token index $i, scan forward until
28978 # finding the index of the next pre_token whose position is $pos.
28979 my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
28982 while ( ++$i <= $max_token_index ) {
28984 if ( $pos <= $$rtoken_map[$i] ) {
28986 # Let the calling routine handle errors in which we do not
28987 # land on a pre-token boundary. It can happen by running
28988 # perltidy on some non-perl scripts, for example.
28989 if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
28994 return ( $i, $error );
28997 sub find_here_doc {
28999 # find the target of a here document, if any
29000 # input parameters:
29001 # $i - token index of the second < of <<
29002 # ($i must be less than the last token index if this is called)
29003 # output parameters:
29004 # $found_target = 0 didn't find target; =1 found target
29005 # HERE_TARGET - the target string (may be empty string)
29006 # $i - unchanged if not here doc,
29007 # or index of the last token of the here target
29008 # $saw_error - flag noting unbalanced quote on here target
29009 my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
29011 my $found_target = 0;
29012 my $here_doc_target = '';
29013 my $here_quote_character = '';
29015 my ( $next_nonblank_token, $i_next_nonblank, $next_token );
29016 $next_token = $$rtokens[ $i + 1 ];
29018 # perl allows a backslash before the target string (heredoc.t)
29020 if ( $next_token eq '\\' ) {
29022 $next_token = $$rtokens[ $i + 2 ];
29025 ( $next_nonblank_token, $i_next_nonblank ) =
29026 find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
29028 if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
29031 my $quote_depth = 0;
29036 $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
29039 = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
29040 $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
29042 if ($in_quote) { # didn't find end of quote, so no target found
29044 if ( $expecting == TERM ) {
29046 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
29051 else { # found ending quote
29056 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
29057 $tokj = $$rtokens[$j];
29059 # we have to remove any backslash before the quote character
29060 # so that the here-doc-target exactly matches this string
29064 && $$rtokens[ $j + 1 ] eq $here_quote_character );
29065 $here_doc_target .= $tokj;
29070 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
29072 write_logfile_entry(
29073 "found blank here-target after <<; suggest using \"\"\n");
29076 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
29078 my $here_doc_expected;
29079 if ( $expecting == UNKNOWN ) {
29080 $here_doc_expected = guess_if_here_doc($next_token);
29083 $here_doc_expected = 1;
29086 if ($here_doc_expected) {
29088 $here_doc_target = $next_token;
29095 if ( $expecting == TERM ) {
29097 write_logfile_entry("Note: bare here-doc operator <<\n");
29104 # patch to neglect any prepended backslash
29105 if ( $found_target && $backslash ) { $i++ }
29107 return ( $found_target, $here_doc_target, $here_quote_character, $i,
29113 # follow (or continue following) quoted string(s)
29114 # $in_quote return code:
29115 # 0 - ok, found end
29116 # 1 - still must find end of quote whose target is $quote_character
29117 # 2 - still looking for end of first of two quotes
29119 # Returns updated strings:
29120 # $quoted_string_1 = quoted string seen while in_quote=1
29121 # $quoted_string_2 = quoted string seen while in_quote=2
29123 $i, $in_quote, $quote_character,
29124 $quote_pos, $quote_depth, $quoted_string_1,
29125 $quoted_string_2, $rtokens, $rtoken_map,
29129 my $in_quote_starting = $in_quote;
29132 if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
29135 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29138 = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
29139 $quote_pos, $quote_depth, $max_token_index );
29140 $quoted_string_2 .= $quoted_string;
29141 if ( $in_quote == 1 ) {
29142 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
29143 $quote_character = '';
29146 $quoted_string_2 .= "\n";
29150 if ( $in_quote == 1 ) { # one (more) quote to follow
29153 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29156 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
29157 $quote_pos, $quote_depth, $max_token_index );
29158 $quoted_string_1 .= $quoted_string;
29159 if ( $in_quote == 1 ) {
29160 $quoted_string_1 .= "\n";
29163 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29164 $quoted_string_1, $quoted_string_2 );
29167 sub follow_quoted_string {
29169 # scan for a specific token, skipping escaped characters
29170 # if the quote character is blank, use the first non-blank character
29171 # input parameters:
29172 # $rtokens = reference to the array of tokens
29173 # $i = the token index of the first character to search
29174 # $in_quote = number of quoted strings being followed
29175 # $beginning_tok = the starting quote character
29176 # $quote_pos = index to check next for alphanumeric delimiter
29177 # output parameters:
29178 # $i = the token index of the ending quote character
29179 # $in_quote = decremented if found end, unchanged if not
29180 # $beginning_tok = the starting quote character
29181 # $quote_pos = index to check next for alphanumeric delimiter
29182 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
29183 # $quoted_string = the text of the quote (without quotation tokens)
29184 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
29187 my ( $tok, $end_tok );
29188 my $i = $i_beg - 1;
29189 my $quoted_string = "";
29191 TOKENIZER_DEBUG_FLAG_QUOTE && do {
29193 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
29196 # get the corresponding end token
29197 if ( $beginning_tok !~ /^\s*$/ ) {
29198 $end_tok = matching_end_token($beginning_tok);
29201 # a blank token means we must find and use the first non-blank one
29203 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
29205 while ( $i < $max_token_index ) {
29206 $tok = $$rtokens[ ++$i ];
29208 if ( $tok !~ /^\s*$/ ) {
29210 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
29211 $i = $max_token_index;
29215 if ( length($tok) > 1 ) {
29216 if ( $quote_pos <= 0 ) { $quote_pos = 1 }
29217 $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
29220 $beginning_tok = $tok;
29223 $end_tok = matching_end_token($beginning_tok);
29229 $allow_quote_comments = 1;
29234 # There are two different loops which search for the ending quote
29235 # character. In the rare case of an alphanumeric quote delimiter, we
29236 # have to look through alphanumeric tokens character-by-character, since
29237 # the pre-tokenization process combines multiple alphanumeric
29238 # characters, whereas for a non-alphanumeric delimiter, only tokens of
29239 # length 1 can match.
29241 ###################################################################
29242 # Case 1 (rare): loop for case of alphanumeric quote delimiter..
29243 # "quote_pos" is the position the current word to begin searching
29244 ###################################################################
29245 if ( $beginning_tok =~ /\w/ ) {
29247 # Note this because it is not recommended practice except
29248 # for obfuscated perl contests
29249 if ( $in_quote == 1 ) {
29250 write_logfile_entry(
29251 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
29254 while ( $i < $max_token_index ) {
29256 if ( $quote_pos == 0 || ( $i < 0 ) ) {
29257 $tok = $$rtokens[ ++$i ];
29259 if ( $tok eq '\\' ) {
29261 # retain backslash unless it hides the end token
29262 $quoted_string .= $tok
29263 unless $$rtokens[ $i + 1 ] eq $end_tok;
29265 last if ( $i >= $max_token_index );
29266 $tok = $$rtokens[ ++$i ];
29269 my $old_pos = $quote_pos;
29271 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
29275 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
29277 if ( $quote_pos > 0 ) {
29280 substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
29284 if ( $quote_depth == 0 ) {
29290 $quoted_string .= substr( $tok, $old_pos );
29295 ########################################################################
29296 # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
29297 ########################################################################
29300 while ( $i < $max_token_index ) {
29301 $tok = $$rtokens[ ++$i ];
29303 if ( $tok eq $end_tok ) {
29306 if ( $quote_depth == 0 ) {
29311 elsif ( $tok eq $beginning_tok ) {
29314 elsif ( $tok eq '\\' ) {
29316 # retain backslash unless it hides the beginning or end token
29317 $tok = $$rtokens[ ++$i ];
29318 $quoted_string .= '\\'
29319 unless ( $tok eq $end_tok || $tok eq $beginning_tok );
29321 $quoted_string .= $tok;
29324 if ( $i > $max_token_index ) { $i = $max_token_index }
29325 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
29329 sub indicate_error {
29330 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
29331 interrupt_logfile();
29333 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
29337 sub write_error_indicator_pair {
29338 my ( $line_number, $input_line, $pos, $carrat ) = @_;
29339 my ( $offset, $numbered_line, $underline ) =
29340 make_numbered_line( $line_number, $input_line, $pos );
29341 $underline = write_on_underline( $underline, $pos - $offset, $carrat );
29342 warning( $numbered_line . "\n" );
29343 $underline =~ s/\s*$//;
29344 warning( $underline . "\n" );
29347 sub make_numbered_line {
29349 # Given an input line, its line number, and a character position of
29350 # interest, create a string not longer than 80 characters of the form
29351 # $lineno: sub_string
29352 # such that the sub_string of $str contains the position of interest
29354 # Here is an example of what we want, in this case we add trailing
29355 # '...' because the line is long.
29357 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
29359 # Here is another example, this time in which we used leading '...'
29360 # because of excessive length:
29362 # 2: ... er of the World Wide Web Consortium's
29364 # input parameters are:
29365 # $lineno = line number
29366 # $str = the text of the line
29367 # $pos = position of interest (the error) : 0 = first character
29370 # - $offset = an offset which corrects the position in case we only
29371 # display part of a line, such that $pos-$offset is the effective
29372 # position from the start of the displayed line.
29373 # - $numbered_line = the numbered line as above,
29374 # - $underline = a blank 'underline' which is all spaces with the same
29375 # number of characters as the numbered line.
29377 my ( $lineno, $str, $pos ) = @_;
29378 my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
29379 my $excess = length($str) - $offset - 68;
29380 my $numc = ( $excess > 0 ) ? 68 : undef;
29382 if ( defined($numc) ) {
29383 if ( $offset == 0 ) {
29384 $str = substr( $str, $offset, $numc - 4 ) . " ...";
29387 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
29392 if ( $offset == 0 ) {
29395 $str = "... " . substr( $str, $offset + 4 );
29399 my $numbered_line = sprintf( "%d: ", $lineno );
29400 $offset -= length($numbered_line);
29401 $numbered_line .= $str;
29402 my $underline = " " x length($numbered_line);
29403 return ( $offset, $numbered_line, $underline );
29406 sub write_on_underline {
29408 # The "underline" is a string that shows where an error is; it starts
29409 # out as a string of blanks with the same length as the numbered line of
29410 # code above it, and we have to add marking to show where an error is.
29411 # In the example below, we want to write the string '--^' just below
29412 # the line of bad code:
29414 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
29416 # We are given the current underline string, plus a position and a
29417 # string to write on it.
29419 # In the above example, there will be 2 calls to do this:
29420 # First call: $pos=19, pos_chr=^
29421 # Second call: $pos=16, pos_chr=---
29423 # This is a trivial thing to do with substr, but there is some
29426 my ( $underline, $pos, $pos_chr ) = @_;
29428 # check for error..shouldn't happen
29429 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
29432 my $excess = length($pos_chr) + $pos - length($underline);
29433 if ( $excess > 0 ) {
29434 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
29436 substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
29437 return ($underline);
29442 # Break a string, $str, into a sequence of preliminary tokens. We
29443 # are interested in these types of tokens:
29444 # words (type='w'), example: 'max_tokens_wanted'
29445 # digits (type = 'd'), example: '0755'
29446 # whitespace (type = 'b'), example: ' '
29447 # any other single character (i.e. punct; type = the character itself).
29448 # We cannot do better than this yet because we might be in a quoted
29449 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
29451 my ( $str, $max_tokens_wanted ) = @_;
29453 # we return references to these 3 arrays:
29454 my @tokens = (); # array of the tokens themselves
29455 my @token_map = (0); # string position of start of each token
29456 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
29461 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
29464 # note that this must come before words!
29465 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
29468 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
29470 # single-character punctuation
29471 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
29475 return ( \@tokens, \@token_map, \@type );
29479 push @token_map, pos($str);
29481 } while ( --$max_tokens_wanted != 0 );
29483 return ( \@tokens, \@token_map, \@type );
29488 # this is an old debug routine
29489 my ( $rtokens, $rtoken_map ) = @_;
29490 my $num = scalar(@$rtokens);
29493 for ( $i = 0 ; $i < $num ; $i++ ) {
29494 my $len = length( $$rtokens[$i] );
29495 print STDOUT "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
29499 sub matching_end_token {
29501 # find closing character for a pattern
29502 my $beginning_token = shift;
29504 if ( $beginning_token eq '{' ) {
29507 elsif ( $beginning_token eq '[' ) {
29510 elsif ( $beginning_token eq '<' ) {
29513 elsif ( $beginning_token eq '(' ) {
29521 sub dump_token_types {
29525 # This should be the latest list of token types in use
29526 # adding NEW_TOKENS: add a comment here
29527 print $fh <<'END_OF_LIST';
29529 Here is a list of the token types currently used for lines of type 'CODE'.
29530 For the following tokens, the "type" of a token is just the token itself.
29532 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
29533 ( ) <= >= == =~ !~ != ++ -- /= x=
29534 ... **= <<= >>= &&= ||= //= <=>
29535 , + - / * | % ! x ~ = \ ? : . < > ^ &
29537 The following additional token types are defined:
29540 b blank (white space)
29541 { indent: opening structural curly brace or square bracket or paren
29542 (code block, anonymous hash reference, or anonymous array reference)
29543 } outdent: right structural curly brace or square bracket or paren
29544 [ left non-structural square bracket (enclosing an array index)
29545 ] right non-structural square bracket
29546 ( left non-structural paren (all but a list right of an =)
29547 ) right non-structural paren
29548 L left non-structural curly brace (enclosing a key)
29549 R right non-structural curly brace
29550 ; terminal semicolon
29551 f indicates a semicolon in a "for" statement
29552 h here_doc operator <<
29554 Q indicates a quote or pattern
29555 q indicates a qw quote block
29557 C user-defined constant or constant function (with void prototype = ())
29558 U user-defined function taking parameters
29559 G user-defined function taking block parameter (like grep/map/eval)
29560 M (unused, but reserved for subroutine definition name)
29561 P (unused, but -html uses it to label pod text)
29562 t type indicater such as %,$,@,*,&,sub
29563 w bare word (perhaps a subroutine call)
29564 i identifier of some type (with leading %, $, @, *, &, sub, -> )
29567 F a file test operator (like -e)
29569 Z identifier in indirect object slot: may be file handle, object
29570 J LABEL: code block label
29571 j LABEL after next, last, redo, goto
29574 pp pre-increment operator ++
29575 mm pre-decrement operator --
29576 A : used as attribute separator
29578 Here are the '_line_type' codes used internally:
29579 SYSTEM - system-specific code before hash-bang line
29580 CODE - line of perl code (including comments)
29581 POD_START - line starting pod, such as '=head'
29582 POD - pod documentation text
29583 POD_END - last line of pod section, '=cut'
29584 HERE - text of here-document
29585 HERE_END - last line of here-doc (target word)
29586 FORMAT - format section
29587 FORMAT_END - last line of format section, '.'
29588 DATA_START - __DATA__ line
29589 DATA - unidentified text following __DATA__
29590 END_START - __END__ line
29591 END - unidentified text following __END__
29592 ERROR - we are in big trouble, probably not a perl script
29598 # These names are used in error messages
29599 @opening_brace_names = qw# '{' '[' '(' '?' #;
29600 @closing_brace_names = qw# '}' ']' ')' ':' #;
29603 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
29604 <= >= == =~ !~ != ++ -- /= x= ~~
29606 @is_digraph{@digraphs} = (1) x scalar(@digraphs);
29608 my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
29609 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
29611 # make a hash of all valid token types for self-checking the tokenizer
29612 # (adding NEW_TOKENS : select a new character and add to this list)
29613 my @valid_token_types = qw#
29614 A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
29615 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
29617 push( @valid_token_types, @digraphs );
29618 push( @valid_token_types, @trigraphs );
29619 push( @valid_token_types, ( '#', ',', 'CORE::' ) );
29620 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
29622 # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
29623 my @file_test_operators =
29624 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
29625 @is_file_test_operator{@file_test_operators} =
29626 (1) x scalar(@file_test_operators);
29628 # these functions have prototypes of the form (&), so when they are
29629 # followed by a block, that block MAY BE followed by an operator.
29630 # Smartmatch operator ~~ may be followed by anonymous hash or array ref
29631 @_ = qw( do eval );
29632 @is_block_operator{@_} = (1) x scalar(@_);
29634 # these functions allow an identifier in the indirect object slot
29635 @_ = qw( print printf sort exec system say);
29636 @is_indirect_object_taker{@_} = (1) x scalar(@_);
29638 # These tokens may precede a code block
29639 # patched for SWITCH/CASE
29641 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
29642 unless do while until eval for foreach map grep sort
29643 switch case given when);
29644 @is_code_block_token{@_} = (1) x scalar(@_);
29646 # I'll build the list of keywords incrementally
29649 # keywords and tokens after which a value or pattern is expected,
29650 # but not an operator. In other words, these should consume terms
29651 # to their right, or at least they are not expected to be followed
29652 # immediately by operators.
29653 my @value_requestor = qw(
29874 # patched above for SWITCH/CASE given/when err say
29875 # 'err' is a fairly safe addition.
29876 # TODO: 'default' still needed if appropriate
29877 # 'use feature' seen, but perltidy works ok without it.
29878 # Concerned that 'default' could break code.
29879 push( @Keywords, @value_requestor );
29881 # These are treated the same but are not keywords:
29886 push( @value_requestor, @extra_vr );
29888 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
29890 # this list contains keywords which do not look for arguments,
29891 # so that they might be followed by an operator, or at least
29893 my @operator_requestor = qw(
29917 push( @Keywords, @operator_requestor );
29919 # These are treated the same but are not considered keywords:
29926 push( @operator_requestor, @extra_or );
29928 @expecting_operator_token{@operator_requestor} =
29929 (1) x scalar(@operator_requestor);
29931 # these token TYPES expect trailing operator but not a term
29932 # note: ++ and -- are post-increment and decrement, 'C' = constant
29933 my @operator_requestor_types = qw( ++ -- C <> q );
29934 @expecting_operator_types{@operator_requestor_types} =
29935 (1) x scalar(@operator_requestor_types);
29937 # these token TYPES consume values (terms)
29938 # note: pp and mm are pre-increment and decrement
29939 # f=semicolon in for, F=file test operator
29940 my @value_requestor_type = qw#
29941 L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
29942 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
29943 <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
29944 f F pp mm Y p m U J G j >> << ^ t
29946 push( @value_requestor_type, ',' )
29947 ; # (perl doesn't like a ',' in a qw block)
29948 @expecting_term_types{@value_requestor_type} =
29949 (1) x scalar(@value_requestor_type);
29951 # Note: the following valid token types are not assigned here to
29952 # hashes requesting to be followed by values or terms, but are
29953 # instead currently hard-coded into sub operator_expected:
29954 # ) -> :: Q R Z ] b h i k n v w } #
29956 # For simple syntax checking, it is nice to have a list of operators which
29957 # will really be unhappy if not followed by a term. This includes most
29959 %really_want_term = %expecting_term_types;
29961 # with these exceptions...
29962 delete $really_want_term{'U'}; # user sub, depends on prototype
29963 delete $really_want_term{'F'}; # file test works on $_ if no following term
29964 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
29967 @_ = qw(q qq qw qx qr s y tr m);
29968 @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
29970 # These keywords are handled specially in the tokenizer code:
29971 my @special_keywords = qw(
29987 push( @Keywords, @special_keywords );
29989 # Keywords after which list formatting may be used
29990 # WARNING: do not include |map|grep|eval or perl may die on
29991 # syntax errors (map1.t).
29992 my @keyword_taking_list = qw(
30066 @is_keyword_taking_list{@keyword_taking_list} =
30067 (1) x scalar(@keyword_taking_list);
30069 # These are not used in any way yet
30070 # my @unused_keywords = qw(
30076 # The list of keywords was originally extracted from function 'keyword' in
30077 # perl file toke.c version 5.005.03, using this utility, plus a
30078 # little editing: (file getkwd.pl):
30079 # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
30080 # Add 'get' prefix where necessary, then split into the above lists.
30081 # This list should be updated as necessary.
30082 # The list should not contain these special variables:
30083 # ARGV DATA ENV SIG STDERR STDIN STDOUT
30086 @is_keyword{@Keywords} = (1) x scalar(@Keywords);