2 ############################################################
4 # perltidy - a perl script indenter and formatter
6 # Copyright (c) 2000-2013 by Steve Hancock
7 # Distributed under the GPL license agreement; see file COPYING
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License along
20 # with this program; if not, write to the Free Software Foundation, Inc.,
21 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 # For brief instructions, try 'perltidy -h'.
24 # For more complete documentation, try 'man perltidy'
25 # or visit http://perltidy.sourceforge.net
27 # This script is an example of the default style. It was formatted with:
31 # Code Contributions: See ChangeLog.html for a complete history.
32 # Michael Cartmell supplied code for adaptation to VMS and helped with
34 # Hugh S. Myers supplied sub streamhandle and the supporting code to
35 # create a Perl::Tidy module which can operate on strings, arrays, etc.
36 # Yves Orton supplied coding to help detect Windows versions.
37 # Axel Rose supplied a patch for MacPerl.
38 # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
39 # Dan Tyrell contributed a patch for binary I/O.
40 # Ueli Hugenschmidt contributed a patch for -fpsc
41 # Sam Kington supplied a patch to identify the initial indentation of
43 # jonathan swartz supplied patches for:
44 # * .../ pattern, which looks upwards from directory
45 # * --notidy, to be used in directories where we want to avoid
46 # accidentally tidying
47 # * prefilter and postfilter
50 # Many others have supplied key ideas, suggestions, and bug reports;
51 # see the CHANGES file.
53 ############################################################
56 use 5.004; # need IO::File from 5.004 or later
57 BEGIN { $^W = 1; } # turn on warnings
72 @ISA = qw( Exporter );
73 @EXPORT = qw( &perltidy );
81 ( $VERSION = q($Id: Tidy.pm,v 1.74 2013/09/22 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
86 # given filename and mode (r or w), create an object which:
87 # has a 'getline' method if mode='r', and
88 # has a 'print' method if mode='w'.
89 # The objects also need a 'close' method.
91 # How the object is made:
93 # if $filename is: Make object using:
94 # ---------------- -----------------
95 # '-' (STDIN if mode = 'r', STDOUT if mode='w')
97 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
98 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar)
100 # (check for 'print' method for 'w' mode)
101 # (check for 'getline' method for 'r' mode)
102 my $ref = ref( my $filename = shift );
109 if ( $ref eq 'ARRAY' ) {
110 $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
112 elsif ( $ref eq 'SCALAR' ) {
113 $New = sub { Perl::Tidy::IOScalar->new(@_) };
117 # Accept an object with a getline method for reading. Note:
118 # IO::File is built-in and does not respond to the defined
119 # operator. If this causes trouble, the check can be
120 # skipped and we can just let it crash if there is no
122 if ( $mode =~ /[rR]/ ) {
123 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
124 $New = sub { $filename };
127 $New = sub { undef };
129 ------------------------------------------------------------------------
130 No 'getline' method is defined for object of class $ref
131 Please check your call to Perl::Tidy::perltidy. Trace follows.
132 ------------------------------------------------------------------------
137 # Accept an object with a print method for writing.
138 # See note above about IO::File
139 if ( $mode =~ /[wW]/ ) {
140 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
141 $New = sub { $filename };
144 $New = sub { undef };
146 ------------------------------------------------------------------------
147 No 'print' method is defined for object of class $ref
148 Please check your call to Perl::Tidy::perltidy. Trace follows.
149 ------------------------------------------------------------------------
158 if ( $filename eq '-' ) {
159 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
162 $New = sub { IO::File->new(@_) };
165 $fh = $New->( $filename, $mode )
166 or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
167 return $fh, ( $ref or $filename );
170 sub find_input_line_ending {
172 # Peek at a file and return first line ending character.
173 # Quietly return undef in case of any trouble.
174 my ($input_file) = @_;
177 # silently ignore input from object or stdin
178 if ( ref($input_file) || $input_file eq '-' ) {
181 open( INFILE, $input_file ) || return $ending;
185 read( INFILE, $buf, 1024 );
187 if ( $buf && $buf =~ /([\012\015]+)/ ) {
191 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
194 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
197 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
211 # concatenate a path and file basename
212 # returns undef in case of error
214 BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
216 # use File::Spec if we can
217 unless ($missing_file_spec) {
218 return File::Spec->catfile(@_);
221 # Perl 5.004 systems may not have File::Spec so we'll make
222 # a simple try. We assume File::Basename is available.
223 # return undef if not successful.
225 my $path = join '/', @_;
226 my $test_file = $path . $name;
227 my ( $test_name, $test_path ) = fileparse($test_file);
228 return $test_file if ( $test_name eq $name );
229 return undef if ( $^O eq 'VMS' );
231 # this should work at least for Windows and Unix:
232 $test_file = $path . '/' . $name;
233 ( $test_name, $test_path ) = fileparse($test_file);
234 return $test_file if ( $test_name eq $name );
238 sub make_temporary_filename {
240 # Make a temporary filename.
241 # The POSIX tmpnam() function has been unreliable for non-unix systems
242 # (at least for the win32 systems that I've tested), so use a pre-defined
243 # name for them. A disadvantage of this is that two perltidy
244 # runs in the same working directory may conflict. However, the chance of
245 # that is small and manageable by the user, especially on systems for which
246 # the POSIX tmpnam function doesn't work.
247 my $name = "perltidy.TMP";
248 if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
251 eval "use POSIX qw(tmpnam)";
252 if ($@) { return $name }
255 # just make a couple of tries before giving up and using the default
257 my $tmpname = tmpnam();
258 my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
268 # Here is a map of the flow of data from the input source to the output
271 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
272 # input groups output
273 # lines tokens lines of lines lines
276 # The names correspond to the package names responsible for the unit processes.
278 # The overall process is controlled by the "main" package.
280 # LineSource is the stream of input lines
282 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
283 # if necessary. A token is any section of the input line which should be
284 # manipulated as a single entity during formatting. For example, a single
285 # ',' character is a token, and so is an entire side comment. It handles
286 # the complexities of Perl syntax, such as distinguishing between '<<' as
287 # a shift operator and as a here-document, or distinguishing between '/'
288 # as a divide symbol and as a pattern delimiter.
290 # Formatter inserts and deletes whitespace between tokens, and breaks
291 # sequences of tokens at appropriate points as output lines. It bases its
292 # decisions on the default rules as modified by any command-line options.
294 # VerticalAligner collects groups of lines together and tries to line up
295 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
297 # FileWriter simply writes lines to the output stream.
299 # The Logger package, not shown, records significant events and warning
300 # messages. It writes a .LOG file, which may be saved with a
301 # '-log' or a '-g' flag.
307 destination => undef,
314 dump_options => undef,
315 dump_options_type => undef,
316 dump_getopt_flags => undef,
317 dump_options_category => undef,
318 dump_options_range => undef,
319 dump_abbreviations => undef,
324 # don't overwrite callers ARGV
326 local *STDERR = *STDERR;
330 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
332 my @good_keys = sort keys %defaults;
333 @bad_keys = sort @bad_keys;
335 ------------------------------------------------------------------------
336 Unknown perltidy parameter : (@bad_keys)
337 perltidy only understands : (@good_keys)
338 ------------------------------------------------------------------------
343 my $get_hash_ref = sub {
345 my $hash_ref = $input_hash{$key};
346 if ( defined($hash_ref) ) {
347 unless ( ref($hash_ref) eq 'HASH' ) {
348 my $what = ref($hash_ref);
350 $what ? "but is ref to $what" : "but is not a reference";
352 ------------------------------------------------------------------------
353 error in call to perltidy:
354 -$key must be reference to HASH $but_is
355 ------------------------------------------------------------------------
362 %input_hash = ( %defaults, %input_hash );
363 my $argv = $input_hash{'argv'};
364 my $destination_stream = $input_hash{'destination'};
365 my $errorfile_stream = $input_hash{'errorfile'};
366 my $logfile_stream = $input_hash{'logfile'};
367 my $perltidyrc_stream = $input_hash{'perltidyrc'};
368 my $source_stream = $input_hash{'source'};
369 my $stderr_stream = $input_hash{'stderr'};
370 my $user_formatter = $input_hash{'formatter'};
371 my $prefilter = $input_hash{'prefilter'};
372 my $postfilter = $input_hash{'postfilter'};
374 if ($stderr_stream) {
375 ( $fh_stderr, my $stderr_file ) =
376 Perl::Tidy::streamhandle( $stderr_stream, 'w' );
379 ------------------------------------------------------------------------
380 Unable to redirect STDERR to $stderr_stream
381 Please check value of -stderr in call to perltidy
382 ------------------------------------------------------------------------
387 $fh_stderr = *STDERR;
390 sub Warn ($) { $fh_stderr->print( $_[0] ); }
393 if ( $_[0] ) { goto ERROR_EXIT }
394 else { goto NORMAL_EXIT }
397 sub Die ($) { Warn $_[0]; Exit(1); }
399 # extract various dump parameters
400 my $dump_options_type = $input_hash{'dump_options_type'};
401 my $dump_options = $get_hash_ref->('dump_options');
402 my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
403 my $dump_options_category = $get_hash_ref->('dump_options_category');
404 my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
405 my $dump_options_range = $get_hash_ref->('dump_options_range');
407 # validate dump_options_type
408 if ( defined($dump_options) ) {
409 unless ( defined($dump_options_type) ) {
410 $dump_options_type = 'perltidyrc';
412 unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
414 ------------------------------------------------------------------------
415 Please check value of -dump_options_type in call to perltidy;
416 saw: '$dump_options_type'
417 expecting: 'perltidyrc' or 'full'
418 ------------------------------------------------------------------------
424 $dump_options_type = "";
427 if ($user_formatter) {
429 # if the user defines a formatter, there is no output stream,
430 # but we need a null stream to keep coding simple
431 $destination_stream = Perl::Tidy::DevNull->new();
434 # see if ARGV is overridden
435 if ( defined($argv) ) {
437 my $rargv = ref $argv;
438 if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
442 if ( $rargv eq 'ARRAY' ) {
447 ------------------------------------------------------------------------
448 Please check value of -argv in call to perltidy;
449 it must be a string or ref to ARRAY but is: $rargv
450 ------------------------------------------------------------------------
457 my ( $rargv, $msg ) = parse_args($argv);
460 Error parsing this string passed to to perltidy with 'argv':
468 my $rpending_complaint;
469 $$rpending_complaint = "";
470 my $rpending_logfile_message;
471 $$rpending_logfile_message = "";
473 my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint);
475 # VMS file names are restricted to a 40.40 format, so we append _tdy
476 # instead of .tdy, etc. (but see also sub check_vms_filename)
479 if ( $^O eq 'VMS' ) {
485 $dot_pattern = '\.'; # must escape for use in regex
488 #---------------------------------------------------------------
489 # get command line options
490 #---------------------------------------------------------------
492 $rOpts, $config_file, $rraw_options,
493 $saw_extrude, $saw_pbp, $roption_string,
494 $rexpansion, $roption_category, $roption_range
496 = process_command_line(
497 $perltidyrc_stream, $is_Windows, $Windows_type,
498 $rpending_complaint, $dump_options_type,
501 #---------------------------------------------------------------
502 # Handle requests to dump information
503 #---------------------------------------------------------------
505 # return or exit immediately after all dumps
508 # Getopt parameters and their flags
509 if ( defined($dump_getopt_flags) ) {
511 foreach my $op ( @{$roption_string} ) {
520 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
524 $dump_getopt_flags->{$opt} = $flag;
528 if ( defined($dump_options_category) ) {
530 %{$dump_options_category} = %{$roption_category};
533 if ( defined($dump_options_range) ) {
535 %{$dump_options_range} = %{$roption_range};
538 if ( defined($dump_abbreviations) ) {
540 %{$dump_abbreviations} = %{$rexpansion};
543 if ( defined($dump_options) ) {
545 %{$dump_options} = %{$rOpts};
548 Exit 0 if ($quit_now);
550 # make printable string of options for this run as possible diagnostic
551 my $readable_options = readable_options( $rOpts, $roption_string );
553 # dump from command line
554 if ( $rOpts->{'dump-options'} ) {
555 print STDOUT $readable_options;
559 #---------------------------------------------------------------
560 # check parameters and their interactions
561 #---------------------------------------------------------------
563 check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint );
565 if ($user_formatter) {
566 $rOpts->{'format'} = 'user';
569 # there must be one entry here for every possible format
570 my %default_file_extension = (
576 # be sure we have a valid output format
577 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
578 my $formats = join ' ',
579 sort map { "'" . $_ . "'" } keys %default_file_extension;
580 my $fmt = $rOpts->{'format'};
581 Die "-format='$fmt' but must be one of: $formats\n";
584 my $output_extension = make_extension( $rOpts->{'output-file-extension'},
585 $default_file_extension{ $rOpts->{'format'} }, $dot );
587 # If the backup extension contains a / character then the backup should
588 # be deleted when the -b option is used. On older versions of
589 # perltidy this will generate an error message due to an illegal
592 # A backup file will still be generated but will be deleted
593 # at the end. If -bext='/' then this extension will be
594 # the default 'bak'. Otherwise it will be whatever characters
595 # remains after all '/' characters are removed. For example:
596 # -bext extension slashes
600 # '/dev/null' devnull 2 (Currently not allowed)
601 my $bext = $rOpts->{'backup-file-extension'};
602 my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g );
604 # At present only one forward slash is allowed. In the future multiple
605 # slashes may be allowed to allow for other options
606 if ( $delete_backup > 1 ) {
607 Die "-bext=$bext contains more than one '/'\n";
610 my $backup_extension =
611 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
613 my $html_toc_extension =
614 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
616 my $html_src_extension =
617 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
619 # check for -b option;
620 # silently ignore unless beautify mode
621 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
622 && $rOpts->{'format'} eq 'tidy';
624 # turn off -b with warnings in case of conflicts with other options
625 if ($in_place_modify) {
626 if ( $rOpts->{'standard-output'} ) {
627 my $msg = "Ignoring -b; you may not use -b and -st together";
628 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
630 $in_place_modify = 0;
632 if ($destination_stream) {
634 "Ignoring -b; you may not specify a destination stream and -b together\n";
635 $in_place_modify = 0;
637 if ( ref($source_stream) ) {
639 "Ignoring -b; you may not specify a source array and -b together\n";
640 $in_place_modify = 0;
642 if ( $rOpts->{'outfile'} ) {
643 Warn "Ignoring -b; you may not use -b and -o together\n";
644 $in_place_modify = 0;
646 if ( defined( $rOpts->{'output-path'} ) ) {
647 Warn "Ignoring -b; you may not use -b and -opath together\n";
648 $in_place_modify = 0;
652 Perl::Tidy::Formatter::check_options($rOpts);
653 if ( $rOpts->{'format'} eq 'html' ) {
654 Perl::Tidy::HtmlWriter->check_options($rOpts);
657 # make the pattern of file extensions that we shouldn't touch
658 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
659 if ($output_extension) {
660 my $ext = quotemeta($output_extension);
661 $forbidden_file_extensions .= "|$ext";
663 if ( $in_place_modify && $backup_extension ) {
664 my $ext = quotemeta($backup_extension);
665 $forbidden_file_extensions .= "|$ext";
667 $forbidden_file_extensions .= ')$';
669 # Create a diagnostics object if requested;
670 # This is only useful for code development
671 my $diagnostics_object = undef;
672 if ( $rOpts->{'DIAGNOSTICS'} ) {
673 $diagnostics_object = Perl::Tidy::Diagnostics->new();
676 # no filenames should be given if input is from an array
677 if ($source_stream) {
680 "You may not specify any filenames when a source array is given\n";
683 # we'll stuff the source array into ARGV
684 unshift( @ARGV, $source_stream );
686 # No special treatment for source stream which is a filename.
687 # This will enable checks for binary files and other bad stuff.
688 $source_stream = undef unless ref($source_stream);
691 # use stdin by default if no source array and no args
693 unshift( @ARGV, '-' ) unless @ARGV;
696 #---------------------------------------------------------------
698 # main loop to process all files in argument list
699 #---------------------------------------------------------------
700 my $number_of_files = @ARGV;
701 my $formatter = undef;
702 my $tokenizer = undef;
703 while ( my $input_file = shift @ARGV ) {
705 my $input_file_permissions;
707 #---------------------------------------------------------------
708 # prepare this input stream
709 #---------------------------------------------------------------
710 if ($source_stream) {
711 $fileroot = "perltidy";
713 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
714 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
715 $in_place_modify = 0;
718 $fileroot = $input_file;
719 unless ( -e $input_file ) {
721 # file doesn't exist - check for a file glob
722 if ( $input_file =~ /([\?\*\[\{])/ ) {
724 # Windows shell may not remove quotes, so do it
725 my $input_file = $input_file;
726 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
727 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
728 my $pattern = fileglob_to_re($input_file);
730 if ( !$@ && opendir( DIR, './' ) ) {
732 grep { /$pattern/ && !-d $_ } readdir(DIR);
735 unshift @ARGV, @files;
740 Warn "skipping file: '$input_file': no matches found\n";
744 unless ( -f $input_file ) {
745 Warn "skipping file: $input_file: not a regular file\n";
749 # As a safety precaution, skip zero length files.
750 # If for example a source file got clobbered somehow,
751 # the old .tdy or .bak files might still exist so we
752 # shouldn't overwrite them with zero length files.
753 unless ( -s $input_file ) {
754 Warn "skipping file: $input_file: Zero size\n";
758 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
760 "skipping file: $input_file: Non-text (override with -f)\n";
764 # we should have a valid filename now
765 $fileroot = $input_file;
766 $input_file_permissions = ( stat $input_file )[2] & 07777;
768 if ( $^O eq 'VMS' ) {
769 ( $fileroot, $dot ) = check_vms_filename($fileroot);
772 # add option to change path here
773 if ( defined( $rOpts->{'output-path'} ) ) {
775 my ( $base, $old_path ) = fileparse($fileroot);
776 my $new_path = $rOpts->{'output-path'};
777 unless ( -d $new_path ) {
778 unless ( mkdir $new_path, 0777 ) {
779 Die "unable to create directory $new_path: $!\n";
782 my $path = $new_path;
783 $fileroot = catfile( $path, $base );
786 ------------------------------------------------------------------------
787 Problem combining $new_path and $base to make a filename; check -opath
788 ------------------------------------------------------------------------
794 # Skip files with same extension as the output files because
795 # this can lead to a messy situation with files like
796 # script.tdy.tdy.tdy ... or worse problems ... when you
797 # rerun perltidy over and over with wildcard input.
800 && ( $input_file =~ /$forbidden_file_extensions/o
801 || $input_file eq 'DIAGNOSTICS' )
804 Warn "skipping file: $input_file: wrong extension\n";
808 # the 'source_object' supplies a method to read the input file
810 Perl::Tidy::LineSource->new( $input_file, $rOpts,
811 $rpending_logfile_message );
812 next unless ($source_object);
814 # Prefilters and postfilters: The prefilter is a code reference
815 # that will be applied to the source before tidying, and the
816 # postfilter is a code reference to the result before outputting.
819 while ( my $line = $source_object->get_line() ) {
822 $buf = $prefilter->($buf);
824 $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
825 $rpending_logfile_message );
828 # register this file name with the Diagnostics package
829 $diagnostics_object->set_input_file($input_file)
830 if $diagnostics_object;
832 #---------------------------------------------------------------
833 # prepare the output stream
834 #---------------------------------------------------------------
835 my $output_file = undef;
836 my $actual_output_extension;
838 if ( $rOpts->{'outfile'} ) {
840 if ( $number_of_files <= 1 ) {
842 if ( $rOpts->{'standard-output'} ) {
843 my $msg = "You may not use -o and -st together";
844 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
847 elsif ($destination_stream) {
849 "You may not specify a destination array and -o together\n";
851 elsif ( defined( $rOpts->{'output-path'} ) ) {
852 Die "You may not specify -o and -opath together\n";
854 elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
855 Die "You may not specify -o and -oext together\n";
857 $output_file = $rOpts->{outfile};
859 # make sure user gives a file name after -o
860 if ( $output_file =~ /^-/ ) {
861 Die "You must specify a valid filename after -o\n";
864 # do not overwrite input file with -o
865 if ( defined($input_file_permissions)
866 && ( $output_file eq $input_file ) )
868 Die "Use 'perltidy -b $input_file' to modify in-place\n";
872 Die "You may not use -o with more than one input file\n";
875 elsif ( $rOpts->{'standard-output'} ) {
876 if ($destination_stream) {
878 "You may not specify a destination array and -st together\n";
879 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
884 if ( $number_of_files <= 1 ) {
887 Die "You may not use -st with more than one input file\n";
890 elsif ($destination_stream) {
891 $output_file = $destination_stream;
893 elsif ($source_stream) { # source but no destination goes to stdout
896 elsif ( $input_file eq '-' ) {
900 if ($in_place_modify) {
901 $output_file = IO::File->new_tmpfile()
902 or Die "cannot open temp file for -b option: $!\n";
905 $actual_output_extension = $output_extension;
906 $output_file = $fileroot . $output_extension;
910 # the 'sink_object' knows how to write the output file
911 my $tee_file = $fileroot . $dot . "TEE";
913 my $line_separator = $rOpts->{'output-line-ending'};
914 if ( $rOpts->{'preserve-line-endings'} ) {
915 $line_separator = find_input_line_ending($input_file);
918 # Eventually all I/O may be done with binmode, but for now it is
919 # only done when a user requests a particular line separator
920 # through the -ple or -ole flags
922 if ( defined($line_separator) ) { $binmode = 1 }
923 else { $line_separator = "\n" }
925 my ( $sink_object, $postfilter_buffer );
928 Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
929 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
933 Perl::Tidy::LineSink->new( $output_file, $tee_file,
934 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
937 #---------------------------------------------------------------
938 # initialize the error logger for this file
939 #---------------------------------------------------------------
940 my $warning_file = $fileroot . $dot . "ERR";
941 if ($errorfile_stream) { $warning_file = $errorfile_stream }
942 my $log_file = $fileroot . $dot . "LOG";
943 if ($logfile_stream) { $log_file = $logfile_stream }
946 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
947 $fh_stderr, $saw_extrude );
948 write_logfile_header(
949 $rOpts, $logger_object, $config_file,
950 $rraw_options, $Windows_type, $readable_options,
952 if ($$rpending_logfile_message) {
953 $logger_object->write_logfile_entry($$rpending_logfile_message);
955 if ($$rpending_complaint) {
956 $logger_object->complain($$rpending_complaint);
959 #---------------------------------------------------------------
960 # initialize the debug object, if any
961 #---------------------------------------------------------------
962 my $debugger_object = undef;
963 if ( $rOpts->{DEBUG} ) {
965 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
968 #---------------------------------------------------------------
969 # loop over iterations for one source stream
970 #---------------------------------------------------------------
972 # We will do a convergence test if 3 or more iterations are allowed.
973 # It would be pointless for fewer because we have to make at least
974 # two passes before we can see if we are converged, and the test
975 # would just slow things down.
976 my $max_iterations = $rOpts->{'iterations'};
977 my $convergence_log_message;
979 my $do_convergence_test = $max_iterations > 2;
980 if ($do_convergence_test) {
981 eval "use Digest::MD5 qw(md5_hex)";
982 $do_convergence_test = !$@;
984 # Trying to avoid problems with ancient versions of perl because
985 # I don't know in which version number utf8::encode was introduced.
986 eval { my $string = "perltidy"; utf8::encode($string) };
987 $do_convergence_test = $do_convergence_test && !$@;
990 # save objects to allow redirecting output during iterations
991 my $sink_object_final = $sink_object;
992 my $debugger_object_final = $debugger_object;
993 my $logger_object_final = $logger_object;
995 for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
997 # send output stream to temp buffers until last iteration
999 if ( $iter < $max_iterations ) {
1001 Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file,
1002 $line_separator, $rOpts, $rpending_logfile_message,
1006 $sink_object = $sink_object_final;
1009 # Save logger, debugger output only on pass 1 because:
1010 # (1) line number references must be to the starting
1011 # source, not an intermediate result, and
1012 # (2) we need to know if there are errors so we can stop the
1013 # iterations early if necessary.
1015 $debugger_object = undef;
1016 $logger_object = undef;
1019 #------------------------------------------------------------
1020 # create a formatter for this file : html writer or
1022 #------------------------------------------------------------
1024 # we have to delete any old formatter because, for safety,
1025 # the formatter will check to see that there is only one.
1028 if ($user_formatter) {
1029 $formatter = $user_formatter;
1031 elsif ( $rOpts->{'format'} eq 'html' ) {
1033 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
1034 $actual_output_extension, $html_toc_extension,
1035 $html_src_extension );
1037 elsif ( $rOpts->{'format'} eq 'tidy' ) {
1038 $formatter = Perl::Tidy::Formatter->new(
1039 logger_object => $logger_object,
1040 diagnostics_object => $diagnostics_object,
1041 sink_object => $sink_object,
1045 Die "I don't know how to do -format=$rOpts->{'format'}\n";
1048 unless ($formatter) {
1049 Die "Unable to continue with $rOpts->{'format'} formatting\n";
1052 #---------------------------------------------------------------
1053 # create the tokenizer for this file
1054 #---------------------------------------------------------------
1055 $tokenizer = undef; # must destroy old tokenizer
1056 $tokenizer = Perl::Tidy::Tokenizer->new(
1057 source_object => $source_object,
1058 logger_object => $logger_object,
1059 debugger_object => $debugger_object,
1060 diagnostics_object => $diagnostics_object,
1061 tabsize => $tabsize,
1063 starting_level => $rOpts->{'starting-indentation-level'},
1064 indent_columns => $rOpts->{'indent-columns'},
1065 look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
1066 look_for_autoloader => $rOpts->{'look-for-autoloader'},
1067 look_for_selfloader => $rOpts->{'look-for-selfloader'},
1068 trim_qw => $rOpts->{'trim-qw'},
1070 continuation_indentation =>
1071 $rOpts->{'continuation-indentation'},
1072 outdent_labels => $rOpts->{'outdent-labels'},
1075 #---------------------------------------------------------------
1077 #---------------------------------------------------------------
1078 process_this_file( $tokenizer, $formatter );
1080 #---------------------------------------------------------------
1081 # close the input source and report errors
1082 #---------------------------------------------------------------
1083 $source_object->close_input_file();
1085 # line source for next iteration (if any) comes from the current
1086 # temporary output buffer
1087 if ( $iter < $max_iterations ) {
1089 $sink_object->close_output_file();
1091 Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts,
1092 $rpending_logfile_message );
1094 # stop iterations if errors or converged
1095 my $stop_now = $logger_object->{_warning_count};
1097 $convergence_log_message = <<EOM;
1098 Stopping iterations because of errors.
1101 elsif ($do_convergence_test) {
1103 # Patch for [rt.cpan.org #88020]
1104 # Use utf8::encode since md5_hex() only operates on bytes.
1105 my $digest = md5_hex( utf8::encode($sink_buffer) );
1106 if ( !$saw_md5{$digest} ) {
1107 $saw_md5{$digest} = $iter;
1111 # Deja vu, stop iterating
1113 my $iterm = $iter - 1;
1114 if ( $saw_md5{$digest} != $iterm ) {
1116 # Blinking (oscillating) between two stable
1117 # end states. This has happened in the past
1118 # but at present there are no known instances.
1119 $convergence_log_message = <<EOM;
1120 Blinking. Output for iteration $iter same as for $saw_md5{$digest}.
1122 $diagnostics_object->write_diagnostics(
1123 $convergence_log_message)
1124 if $diagnostics_object;
1127 $convergence_log_message = <<EOM;
1128 Converged. Output for iteration $iter same as for iter $iterm.
1130 $diagnostics_object->write_diagnostics(
1131 $convergence_log_message)
1132 if $diagnostics_object && $iterm > 2;
1135 } ## end if ($do_convergence_test)
1139 # we are stopping the iterations early;
1140 # copy the output stream to its final destination
1141 $sink_object = $sink_object_final;
1142 while ( my $line = $source_object->get_line() ) {
1143 $sink_object->write_line($line);
1145 $source_object->close_input_file();
1148 } ## end if ( $iter < $max_iterations)
1149 } # end loop over iterations for one source file
1151 # restore objects which have been temporarily undefined
1152 # for second and higher iterations
1153 $debugger_object = $debugger_object_final;
1154 $logger_object = $logger_object_final;
1156 $logger_object->write_logfile_entry($convergence_log_message)
1157 if $convergence_log_message;
1159 #---------------------------------------------------------------
1160 # Perform any postfilter operation
1161 #---------------------------------------------------------------
1163 $sink_object->close_output_file();
1165 Perl::Tidy::LineSink->new( $output_file, $tee_file,
1166 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
1167 my $buf = $postfilter->($postfilter_buffer);
1169 Perl::Tidy::LineSource->new( \$buf, $rOpts,
1170 $rpending_logfile_message );
1171 while ( my $line = $source_object->get_line() ) {
1172 $sink_object->write_line($line);
1174 $source_object->close_input_file();
1177 # Save names of the input and output files for syntax check
1178 my $ifname = $input_file;
1179 my $ofname = $output_file;
1181 #---------------------------------------------------------------
1182 # handle the -b option (backup and modify in-place)
1183 #---------------------------------------------------------------
1184 if ($in_place_modify) {
1185 unless ( -f $input_file ) {
1187 # oh, oh, no real file to backup ..
1188 # shouldn't happen because of numerous preliminary checks
1190 "problem with -b backing up input file '$input_file': not a file\n";
1192 my $backup_name = $input_file . $backup_extension;
1193 if ( -f $backup_name ) {
1194 unlink($backup_name)
1196 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1199 # backup the input file
1200 # we use copy for symlinks, move for regular files
1201 if ( -l $input_file ) {
1202 File::Copy::copy( $input_file, $backup_name )
1203 or Die "File::Copy failed trying to backup source: $!";
1206 rename( $input_file, $backup_name )
1208 "problem renaming $input_file to $backup_name for -b option: $!\n";
1210 $ifname = $backup_name;
1212 # copy the output to the original input file
1213 # NOTE: it would be nice to just close $output_file and use
1214 # File::Copy::copy here, but in this case $output_file is the
1215 # handle of an open nameless temporary file so we would lose
1216 # everything if we closed it.
1217 seek( $output_file, 0, 0 )
1218 or Die "unable to rewind a temporary file for -b option: $!\n";
1219 my $fout = IO::File->new("> $input_file")
1221 "problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
1224 while ( $line = $output_file->getline() ) {
1225 $fout->print($line);
1228 $output_file = $input_file;
1229 $ofname = $input_file;
1232 #---------------------------------------------------------------
1233 # clean up and report errors
1234 #---------------------------------------------------------------
1235 $sink_object->close_output_file() if $sink_object;
1236 $debugger_object->close_debug_file() if $debugger_object;
1238 # set output file permissions
1239 if ( $output_file && -f $output_file && !-l $output_file ) {
1240 if ($input_file_permissions) {
1242 # give output script same permissions as input script, but
1243 # make it user-writable or else we can't run perltidy again.
1244 # Thus we retain whatever executable flags were set.
1245 if ( $rOpts->{'format'} eq 'tidy' ) {
1246 chmod( $input_file_permissions | 0600, $output_file );
1249 # else use default permissions for html and any other format
1253 #---------------------------------------------------------------
1254 # Do syntax check if requested and possible
1255 #---------------------------------------------------------------
1256 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
1258 && $rOpts->{'check-syntax'}
1263 check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1266 #---------------------------------------------------------------
1267 # remove the original file for in-place modify as follows:
1268 # $delete_backup=0 never
1269 # $delete_backup=1 only if no errors
1270 # $delete_backup>1 always : NOT ALLOWED, too risky, see above
1271 #---------------------------------------------------------------
1272 if ( $in_place_modify
1275 && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) )
1278 # As an added safety precaution, do not delete the source file
1279 # if its size has dropped from positive to zero, since this
1280 # could indicate a disaster of some kind, including a hardware
1281 # failure. Actually, this could happen if you had a file of
1282 # all comments (or pod) and deleted everything with -dac (-dap)
1284 if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) {
1286 "output file '$output_file' missing or zero length; original '$ifname' not deleted\n"
1292 "unable to remove previous '$ifname' for -b option; check permissions: $!\n";
1296 $logger_object->finish( $infile_syntax_ok, $formatter )
1298 } # end of main loop to process all files
1305 } # end of main program perltidy
1307 sub get_stream_as_named_file {
1309 # Return the name of a file containing a stream of data, creating
1310 # a temporary file if necessary.
1312 # $stream - the name of a file or stream
1314 # $fname = name of file if possible, or undef
1315 # $if_tmpfile = true if temp file, undef if not temp file
1317 # This routine is needed for passing actual files to Perl for
1323 if ( ref($stream) ) {
1324 my ( $fh_stream, $fh_name ) =
1325 Perl::Tidy::streamhandle( $stream, 'r' );
1327 my ( $fout, $tmpnam );
1329 # TODO: fix the tmpnam routine to return an open filehandle
1330 $tmpnam = Perl::Tidy::make_temporary_filename();
1331 $fout = IO::File->new( $tmpnam, 'w' );
1337 while ( my $line = $fh_stream->getline() ) {
1338 $fout->print($line);
1342 $fh_stream->close();
1345 elsif ( $stream ne '-' && -f $stream ) {
1349 return ( $fname, $is_tmpfile );
1352 sub fileglob_to_re {
1354 # modified (corrected) from version in find2perl
1356 $x =~ s#([./^\$()])#\\$1#g; # escape special characters
1357 $x =~ s#\*#.*#g; # '*' -> '.*'
1358 $x =~ s#\?#.#g; # '?' -> '.'
1359 "^$x\\z"; # match whole word
1362 sub make_extension {
1364 # Make a file extension, including any leading '.' if necessary
1365 # The '.' may actually be an '_' under VMS
1366 my ( $extension, $default, $dot ) = @_;
1368 # Use the default if none specified
1369 $extension = $default unless ($extension);
1371 # Only extensions with these leading characters get a '.'
1372 # This rule gives the user some freedom
1373 if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1374 $extension = $dot . $extension;
1379 sub write_logfile_header {
1381 $rOpts, $logger_object, $config_file,
1382 $rraw_options, $Windows_type, $readable_options
1384 $logger_object->write_logfile_entry(
1385 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1387 if ($Windows_type) {
1388 $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1390 my $options_string = join( ' ', @$rraw_options );
1393 $logger_object->write_logfile_entry(
1394 "Found Configuration File >>> $config_file \n");
1396 $logger_object->write_logfile_entry(
1397 "Configuration and command line parameters for this run:\n");
1398 $logger_object->write_logfile_entry("$options_string\n");
1400 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1401 $rOpts->{'logfile'} = 1; # force logfile to be saved
1402 $logger_object->write_logfile_entry(
1403 "Final parameter set for this run\n");
1404 $logger_object->write_logfile_entry(
1405 "------------------------------------\n");
1407 $logger_object->write_logfile_entry($readable_options);
1409 $logger_object->write_logfile_entry(
1410 "------------------------------------\n");
1412 $logger_object->write_logfile_entry(
1413 "To find error messages search for 'WARNING' with your editor\n");
1416 sub generate_options {
1418 ######################################################################
1419 # Generate and return references to:
1420 # @option_string - the list of options to be passed to Getopt::Long
1421 # @defaults - the list of default options
1422 # %expansion - a hash showing how all abbreviations are expanded
1423 # %category - a hash giving the general category of each option
1424 # %option_range - a hash giving the valid ranges of certain options
1426 # Note: a few options are not documented in the man page and usage
1427 # message. This is because these are experimental or debug options and
1428 # may or may not be retained in future versions.
1430 # Here are the undocumented flags as far as I know. Any of them
1431 # may disappear at any time. They are mainly for fine-tuning
1434 # fll --> fuzzy-line-length # a trivial parameter which gets
1435 # turned off for the extrude option
1436 # which is mainly for debugging
1437 # scl --> short-concatenation-item-length # helps break at '.'
1438 # recombine # for debugging line breaks
1439 # valign # for debugging vertical alignment
1440 # I --> DIAGNOSTICS # for debugging
1441 ######################################################################
1443 # here is a summary of the Getopt codes:
1444 # <none> does not take an argument
1445 # =s takes a mandatory string
1446 # :s takes an optional string (DO NOT USE - filenames will get eaten up)
1447 # =i takes a mandatory integer
1448 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1449 # ! does not take an argument and may be negated
1450 # i.e., -foo and -nofoo are allowed
1451 # a double dash signals the end of the options list
1453 #---------------------------------------------------------------
1454 # Define the option string passed to GetOptions.
1455 #---------------------------------------------------------------
1457 my @option_string = ();
1459 my %option_category = ();
1460 my %option_range = ();
1461 my $rexpansion = \%expansion;
1463 # names of categories in manual
1464 # leading integers will allow sorting
1465 my @category_name = (
1467 '1. Basic formatting options',
1468 '2. Code indentation control',
1469 '3. Whitespace control',
1470 '4. Comment controls',
1471 '5. Linebreak controls',
1472 '6. Controlling list formatting',
1473 '7. Retaining or ignoring existing line breaks',
1474 '8. Blank line control',
1475 '9. Other controls',
1477 '11. pod2html options',
1478 '12. Controlling HTML properties',
1482 # These options are parsed directly by perltidy:
1485 # However, they are included in the option set so that they will
1486 # be seen in the options dump.
1488 # These long option names have no abbreviations or are treated specially
1489 @option_string = qw(
1499 my $category = 13; # Debugging
1500 foreach (@option_string) {
1501 my $opt = $_; # must avoid changing the actual flag
1503 $option_category{$opt} = $category_name[$category];
1506 $category = 11; # HTML
1507 $option_category{html} = $category_name[$category];
1509 # routine to install and check options
1510 my $add_option = sub {
1511 my ( $long_name, $short_name, $flag ) = @_;
1512 push @option_string, $long_name . $flag;
1513 $option_category{$long_name} = $category_name[$category];
1515 if ( $expansion{$short_name} ) {
1516 my $existing_name = $expansion{$short_name}[0];
1518 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1520 $expansion{$short_name} = [$long_name];
1521 if ( $flag eq '!' ) {
1522 my $nshort_name = 'n' . $short_name;
1523 my $nolong_name = 'no' . $long_name;
1524 if ( $expansion{$nshort_name} ) {
1525 my $existing_name = $expansion{$nshort_name}[0];
1527 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1529 $expansion{$nshort_name} = [$nolong_name];
1534 # Install long option names which have a simple abbreviation.
1535 # Options with code '!' get standard negation ('no' for long names,
1536 # 'n' for abbreviations). Categories follow the manual.
1538 ###########################
1539 $category = 0; # I/O_Control
1540 ###########################
1541 $add_option->( 'backup-and-modify-in-place', 'b', '!' );
1542 $add_option->( 'backup-file-extension', 'bext', '=s' );
1543 $add_option->( 'force-read-binary', 'f', '!' );
1544 $add_option->( 'format', 'fmt', '=s' );
1545 $add_option->( 'iterations', 'it', '=i' );
1546 $add_option->( 'logfile', 'log', '!' );
1547 $add_option->( 'logfile-gap', 'g', ':i' );
1548 $add_option->( 'outfile', 'o', '=s' );
1549 $add_option->( 'output-file-extension', 'oext', '=s' );
1550 $add_option->( 'output-path', 'opath', '=s' );
1551 $add_option->( 'profile', 'pro', '=s' );
1552 $add_option->( 'quiet', 'q', '!' );
1553 $add_option->( 'standard-error-output', 'se', '!' );
1554 $add_option->( 'standard-output', 'st', '!' );
1555 $add_option->( 'warning-output', 'w', '!' );
1557 # options which are both toggle switches and values moved here
1558 # to hide from tidyview (which does not show category 0 flags):
1559 # -ole moved here from category 1
1560 # -sil moved here from category 2
1561 $add_option->( 'output-line-ending', 'ole', '=s' );
1562 $add_option->( 'starting-indentation-level', 'sil', '=i' );
1564 ########################################
1565 $category = 1; # Basic formatting options
1566 ########################################
1567 $add_option->( 'check-syntax', 'syn', '!' );
1568 $add_option->( 'entab-leading-whitespace', 'et', '=i' );
1569 $add_option->( 'indent-columns', 'i', '=i' );
1570 $add_option->( 'maximum-line-length', 'l', '=i' );
1571 $add_option->( 'variable-maximum-line-length', 'vmll', '!' );
1572 $add_option->( 'whitespace-cycle', 'wc', '=i' );
1573 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
1574 $add_option->( 'preserve-line-endings', 'ple', '!' );
1575 $add_option->( 'tabs', 't', '!' );
1576 $add_option->( 'default-tabsize', 'dt', '=i' );
1578 ########################################
1579 $category = 2; # Code indentation control
1580 ########################################
1581 $add_option->( 'continuation-indentation', 'ci', '=i' );
1582 $add_option->( 'line-up-parentheses', 'lp', '!' );
1583 $add_option->( 'outdent-keyword-list', 'okwl', '=s' );
1584 $add_option->( 'outdent-keywords', 'okw', '!' );
1585 $add_option->( 'outdent-labels', 'ola', '!' );
1586 $add_option->( 'outdent-long-quotes', 'olq', '!' );
1587 $add_option->( 'indent-closing-brace', 'icb', '!' );
1588 $add_option->( 'closing-token-indentation', 'cti', '=i' );
1589 $add_option->( 'closing-paren-indentation', 'cpi', '=i' );
1590 $add_option->( 'closing-brace-indentation', 'cbi', '=i' );
1591 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1592 $add_option->( 'brace-left-and-indent', 'bli', '!' );
1593 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
1595 ########################################
1596 $category = 3; # Whitespace control
1597 ########################################
1598 $add_option->( 'add-semicolons', 'asc', '!' );
1599 $add_option->( 'add-whitespace', 'aws', '!' );
1600 $add_option->( 'block-brace-tightness', 'bbt', '=i' );
1601 $add_option->( 'brace-tightness', 'bt', '=i' );
1602 $add_option->( 'delete-old-whitespace', 'dws', '!' );
1603 $add_option->( 'delete-semicolons', 'dsm', '!' );
1604 $add_option->( 'nospace-after-keyword', 'nsak', '=s' );
1605 $add_option->( 'nowant-left-space', 'nwls', '=s' );
1606 $add_option->( 'nowant-right-space', 'nwrs', '=s' );
1607 $add_option->( 'paren-tightness', 'pt', '=i' );
1608 $add_option->( 'space-after-keyword', 'sak', '=s' );
1609 $add_option->( 'space-for-semicolon', 'sfs', '!' );
1610 $add_option->( 'space-function-paren', 'sfp', '!' );
1611 $add_option->( 'space-keyword-paren', 'skp', '!' );
1612 $add_option->( 'space-terminal-semicolon', 'sts', '!' );
1613 $add_option->( 'square-bracket-tightness', 'sbt', '=i' );
1614 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
1615 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1616 $add_option->( 'tight-secret-operators', 'tso', '!' );
1617 $add_option->( 'trim-qw', 'tqw', '!' );
1618 $add_option->( 'trim-pod', 'trp', '!' );
1619 $add_option->( 'want-left-space', 'wls', '=s' );
1620 $add_option->( 'want-right-space', 'wrs', '=s' );
1622 ########################################
1623 $category = 4; # Comment controls
1624 ########################################
1625 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
1626 $add_option->( 'closing-side-comment-interval', 'csci', '=i' );
1627 $add_option->( 'closing-side-comment-list', 'cscl', '=s' );
1628 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1629 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
1630 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
1631 $add_option->( 'closing-side-comments', 'csc', '!' );
1632 $add_option->( 'closing-side-comments-balanced', 'cscb', '!' );
1633 $add_option->( 'format-skipping', 'fs', '!' );
1634 $add_option->( 'format-skipping-begin', 'fsb', '=s' );
1635 $add_option->( 'format-skipping-end', 'fse', '=s' );
1636 $add_option->( 'hanging-side-comments', 'hsc', '!' );
1637 $add_option->( 'indent-block-comments', 'ibc', '!' );
1638 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
1639 $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
1640 $add_option->( 'minimum-space-to-comment', 'msc', '=i' );
1641 $add_option->( 'outdent-long-comments', 'olc', '!' );
1642 $add_option->( 'outdent-static-block-comments', 'osbc', '!' );
1643 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
1644 $add_option->( 'static-block-comments', 'sbc', '!' );
1645 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
1646 $add_option->( 'static-side-comments', 'ssc', '!' );
1647 $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' );
1649 ########################################
1650 $category = 5; # Linebreak controls
1651 ########################################
1652 $add_option->( 'add-newlines', 'anl', '!' );
1653 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
1654 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1655 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
1656 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
1657 $add_option->( 'cuddled-else', 'ce', '!' );
1658 $add_option->( 'delete-old-newlines', 'dnl', '!' );
1659 $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
1660 $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
1661 $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
1662 $add_option->( 'opening-paren-right', 'opr', '!' );
1663 $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
1664 $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' );
1665 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
1666 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
1667 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
1668 $add_option->( 'stack-closing-block-brace', 'scbb', '!' );
1669 $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
1670 $add_option->( 'stack-closing-paren', 'scp', '!' );
1671 $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
1672 $add_option->( 'stack-opening-block-brace', 'sobb', '!' );
1673 $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
1674 $add_option->( 'stack-opening-paren', 'sop', '!' );
1675 $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
1676 $add_option->( 'vertical-tightness', 'vt', '=i' );
1677 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
1678 $add_option->( 'want-break-after', 'wba', '=s' );
1679 $add_option->( 'want-break-before', 'wbb', '=s' );
1680 $add_option->( 'break-after-all-operators', 'baao', '!' );
1681 $add_option->( 'break-before-all-operators', 'bbao', '!' );
1682 $add_option->( 'keep-interior-semicolons', 'kis', '!' );
1684 ########################################
1685 $category = 6; # Controlling list formatting
1686 ########################################
1687 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1688 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
1689 $add_option->( 'maximum-fields-per-table', 'mft', '=i' );
1691 ########################################
1692 $category = 7; # Retaining or ignoring existing line breaks
1693 ########################################
1694 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1695 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1696 $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1697 $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
1698 $add_option->( 'ignore-old-breakpoints', 'iob', '!' );
1700 ########################################
1701 $category = 8; # Blank line control
1702 ########################################
1703 $add_option->( 'blanks-before-blocks', 'bbb', '!' );
1704 $add_option->( 'blanks-before-comments', 'bbc', '!' );
1705 $add_option->( 'blank-lines-before-subs', 'blbs', '=i' );
1706 $add_option->( 'blank-lines-before-packages', 'blbp', '=i' );
1707 $add_option->( 'long-block-line-count', 'lbl', '=i' );
1708 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1709 $add_option->( 'keep-old-blank-lines', 'kbl', '=i' );
1711 ########################################
1712 $category = 9; # Other controls
1713 ########################################
1714 $add_option->( 'delete-block-comments', 'dbc', '!' );
1715 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1716 $add_option->( 'delete-pod', 'dp', '!' );
1717 $add_option->( 'delete-side-comments', 'dsc', '!' );
1718 $add_option->( 'tee-block-comments', 'tbc', '!' );
1719 $add_option->( 'tee-pod', 'tp', '!' );
1720 $add_option->( 'tee-side-comments', 'tsc', '!' );
1721 $add_option->( 'look-for-autoloader', 'lal', '!' );
1722 $add_option->( 'look-for-hash-bang', 'x', '!' );
1723 $add_option->( 'look-for-selfloader', 'lsl', '!' );
1724 $add_option->( 'pass-version-line', 'pvl', '!' );
1726 ########################################
1727 $category = 13; # Debugging
1728 ########################################
1729 $add_option->( 'DEBUG', 'D', '!' );
1730 $add_option->( 'DIAGNOSTICS', 'I', '!' );
1731 $add_option->( 'dump-defaults', 'ddf', '!' );
1732 $add_option->( 'dump-long-names', 'dln', '!' );
1733 $add_option->( 'dump-options', 'dop', '!' );
1734 $add_option->( 'dump-profile', 'dpro', '!' );
1735 $add_option->( 'dump-short-names', 'dsn', '!' );
1736 $add_option->( 'dump-token-types', 'dtt', '!' );
1737 $add_option->( 'dump-want-left-space', 'dwls', '!' );
1738 $add_option->( 'dump-want-right-space', 'dwrs', '!' );
1739 $add_option->( 'fuzzy-line-length', 'fll', '!' );
1740 $add_option->( 'help', 'h', '' );
1741 $add_option->( 'short-concatenation-item-length', 'scl', '=i' );
1742 $add_option->( 'show-options', 'opt', '!' );
1743 $add_option->( 'version', 'v', '' );
1744 $add_option->( 'memoize', 'mem', '!' );
1746 #---------------------------------------------------------------------
1748 # The Perl::Tidy::HtmlWriter will add its own options to the string
1749 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1751 ########################################
1752 # Set categories 10, 11, 12
1753 ########################################
1754 # Based on their known order
1755 $category = 12; # HTML properties
1756 foreach my $opt (@option_string) {
1757 my $long_name = $opt;
1758 $long_name =~ s/(!|=.*|:.*)$//;
1759 unless ( defined( $option_category{$long_name} ) ) {
1760 if ( $long_name =~ /^html-linked/ ) {
1761 $category = 10; # HTML options
1763 elsif ( $long_name =~ /^pod2html/ ) {
1764 $category = 11; # Pod2html
1766 $option_category{$long_name} = $category_name[$category];
1770 #---------------------------------------------------------------
1771 # Assign valid ranges to certain options
1772 #---------------------------------------------------------------
1773 # In the future, these may be used to make preliminary checks
1774 # hash keys are long names
1775 # If key or value is undefined:
1776 # strings may have any value
1777 # integer ranges are >=0
1778 # If value is defined:
1779 # value is [qw(any valid words)] for strings
1780 # value is [min, max] for integers
1781 # if min is undefined, there is no lower limit
1782 # if max is undefined, there is no upper limit
1783 # Parameters not listed here have defaults
1785 'format' => [ 'tidy', 'html', 'user' ],
1786 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
1788 'block-brace-tightness' => [ 0, 2 ],
1789 'brace-tightness' => [ 0, 2 ],
1790 'paren-tightness' => [ 0, 2 ],
1791 'square-bracket-tightness' => [ 0, 2 ],
1793 'block-brace-vertical-tightness' => [ 0, 2 ],
1794 'brace-vertical-tightness' => [ 0, 2 ],
1795 'brace-vertical-tightness-closing' => [ 0, 2 ],
1796 'paren-vertical-tightness' => [ 0, 2 ],
1797 'paren-vertical-tightness-closing' => [ 0, 2 ],
1798 'square-bracket-vertical-tightness' => [ 0, 2 ],
1799 'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1800 'vertical-tightness' => [ 0, 2 ],
1801 'vertical-tightness-closing' => [ 0, 2 ],
1803 'closing-brace-indentation' => [ 0, 3 ],
1804 'closing-paren-indentation' => [ 0, 3 ],
1805 'closing-square-bracket-indentation' => [ 0, 3 ],
1806 'closing-token-indentation' => [ 0, 3 ],
1808 'closing-side-comment-else-flag' => [ 0, 2 ],
1809 'comma-arrow-breakpoints' => [ 0, 5 ],
1812 # Note: we could actually allow negative ci if someone really wants it:
1813 # $option_range{'continuation-indentation'} = [ undef, undef ];
1815 #---------------------------------------------------------------
1816 # Assign default values to the above options here, except
1817 # for 'outfile' and 'help'.
1818 # These settings should approximate the perlstyle(1) suggestions.
1819 #---------------------------------------------------------------
1824 blanks-before-blocks
1825 blanks-before-comments
1826 blank-lines-before-subs=1
1827 blank-lines-before-packages=1
1828 block-brace-tightness=0
1829 block-brace-vertical-tightness=0
1831 brace-vertical-tightness-closing=0
1832 brace-vertical-tightness=0
1833 break-at-old-logical-breakpoints
1834 break-at-old-ternary-breakpoints
1835 break-at-old-attribute-breakpoints
1836 break-at-old-keyword-breakpoints
1837 comma-arrow-breakpoints=5
1839 closing-side-comment-interval=6
1840 closing-side-comment-maximum-text=20
1841 closing-side-comment-else-flag=0
1842 closing-side-comments-balanced
1843 closing-paren-indentation=0
1844 closing-brace-indentation=0
1845 closing-square-bracket-indentation=0
1846 continuation-indentation=2
1850 hanging-side-comments
1851 indent-block-comments
1854 keep-old-blank-lines=1
1855 long-block-line-count=8
1858 maximum-consecutive-blank-lines=1
1859 maximum-fields-per-table=0
1860 maximum-line-length=80
1862 minimum-space-to-comment=4
1863 nobrace-left-and-indent
1865 nodelete-old-whitespace
1870 nostatic-side-comments
1875 outdent-long-comments
1877 paren-vertical-tightness-closing=0
1878 paren-vertical-tightness=0
1882 short-concatenation-item-length=8
1884 square-bracket-tightness=1
1885 square-bracket-vertical-tightness-closing=0
1886 square-bracket-vertical-tightness=0
1887 static-block-comments
1890 backup-file-extension=bak
1895 html-table-of-contents
1899 push @defaults, "perl-syntax-check-flags=-c -T";
1901 #---------------------------------------------------------------
1902 # Define abbreviations which will be expanded into the above primitives.
1903 # These may be defined recursively.
1904 #---------------------------------------------------------------
1907 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
1908 'fnl' => [qw(freeze-newlines)],
1909 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1910 'fws' => [qw(freeze-whitespace)],
1911 'freeze-blank-lines' =>
1912 [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1913 'fbl' => [qw(freeze-blank-lines)],
1914 'indent-only' => [qw(freeze-newlines freeze-whitespace)],
1915 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1916 'nooutdent-long-lines' =>
1917 [qw(nooutdent-long-quotes nooutdent-long-comments)],
1918 'noll' => [qw(nooutdent-long-lines)],
1919 'io' => [qw(indent-only)],
1920 'delete-all-comments' =>
1921 [qw(delete-block-comments delete-side-comments delete-pod)],
1922 'nodelete-all-comments' =>
1923 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1924 'dac' => [qw(delete-all-comments)],
1925 'ndac' => [qw(nodelete-all-comments)],
1926 'gnu' => [qw(gnu-style)],
1927 'pbp' => [qw(perl-best-practices)],
1928 'tee-all-comments' =>
1929 [qw(tee-block-comments tee-side-comments tee-pod)],
1930 'notee-all-comments' =>
1931 [qw(notee-block-comments notee-side-comments notee-pod)],
1932 'tac' => [qw(tee-all-comments)],
1933 'ntac' => [qw(notee-all-comments)],
1934 'html' => [qw(format=html)],
1935 'nhtml' => [qw(format=tidy)],
1936 'tidy' => [qw(format=tidy)],
1938 'swallow-optional-blank-lines' => [qw(kbl=0)],
1939 'noswallow-optional-blank-lines' => [qw(kbl=1)],
1940 'sob' => [qw(kbl=0)],
1941 'nsob' => [qw(kbl=1)],
1943 'break-after-comma-arrows' => [qw(cab=0)],
1944 'nobreak-after-comma-arrows' => [qw(cab=1)],
1945 'baa' => [qw(cab=0)],
1946 'nbaa' => [qw(cab=1)],
1948 'blanks-before-subs' => [qw(blbs=1 blbp=1)],
1949 'bbs' => [qw(blbs=1 blbp=1)],
1950 'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
1951 'nbbs' => [qw(blbs=0 blbp=0)],
1953 'break-at-old-trinary-breakpoints' => [qw(bot)],
1955 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1956 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1957 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1958 'icp' => [qw(cpi=2 cbi=2 csbi=2)],
1959 'nicp' => [qw(cpi=0 cbi=0 csbi=0)],
1961 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1962 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1963 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1964 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],
1965 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],
1967 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1968 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1969 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1971 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1972 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1973 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1975 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1976 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1977 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1979 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1980 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1981 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1983 'otr' => [qw(opr ohbr osbr)],
1984 'opening-token-right' => [qw(opr ohbr osbr)],
1985 'notr' => [qw(nopr nohbr nosbr)],
1986 'noopening-token-right' => [qw(nopr nohbr nosbr)],
1988 'sot' => [qw(sop sohb sosb)],
1989 'nsot' => [qw(nsop nsohb nsosb)],
1990 'stack-opening-tokens' => [qw(sop sohb sosb)],
1991 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1993 'sct' => [qw(scp schb scsb)],
1994 'stack-closing-tokens' => => [qw(scp schb scsb)],
1995 'nsct' => [qw(nscp nschb nscsb)],
1996 'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1998 'sac' => [qw(sot sct)],
1999 'nsac' => [qw(nsot nsct)],
2000 'stack-all-containers' => [qw(sot sct)],
2001 'nostack-all-containers' => [qw(nsot nsct)],
2003 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
2004 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
2005 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
2006 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
2007 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
2008 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
2010 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)],
2011 'sobb' => [qw(bbvt=2 bbvtl=*)],
2012 'nostack-opening-block-brace' => [qw(bbvt=0)],
2013 'nsobb' => [qw(bbvt=0)],
2015 'converge' => [qw(it=4)],
2016 'noconverge' => [qw(it=1)],
2017 'conv' => [qw(it=4)],
2018 'nconv' => [qw(it=1)],
2020 # 'mangle' originally deleted pod and comments, but to keep it
2021 # reversible, it no longer does. But if you really want to
2022 # delete them, just use:
2025 # An interesting use for 'mangle' is to do this:
2026 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
2027 # which will form as many one-line blocks as possible
2032 keep-old-blank-lines=0
2034 delete-old-whitespace
2037 maximum-consecutive-blank-lines=0
2038 maximum-line-length=100000
2042 noblanks-before-blocks
2043 blank-lines-before-subs=0
2044 blank-lines-before-packages=0
2049 # 'extrude' originally deleted pod and comments, but to keep it
2050 # reversible, it no longer does. But if you really want to
2051 # delete them, just use
2054 # An interesting use for 'extrude' is to do this:
2055 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
2056 # which will break up all one-line blocks.
2063 delete-old-whitespace
2066 maximum-consecutive-blank-lines=0
2067 maximum-line-length=1
2070 noblanks-before-blocks
2071 blank-lines-before-subs=0
2072 blank-lines-before-packages=0
2079 # this style tries to follow the GNU Coding Standards (which do
2080 # not really apply to perl but which are followed by some perl
2084 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
2088 # Style suggested in Damian Conway's Perl Best Practices
2089 'perl-best-practices' => [
2090 qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
2091 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
2094 # Additional styles can be added here
2097 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
2099 # Uncomment next line to dump all expansions for debugging:
2100 # dump_short_names(\%expansion);
2102 \@option_string, \@defaults, \%expansion,
2103 \%option_category, \%option_range
2106 } # end of generate_options
2108 # Memoize process_command_line. Given same @ARGV passed in, return same
2109 # values and same @ARGV back.
2110 # This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds
2111 # up masontidy (https://metacpan.org/module/masontidy)
2113 my %process_command_line_cache;
2115 sub process_command_line {
2118 $perltidyrc_stream, $is_Windows, $Windows_type,
2119 $rpending_complaint, $dump_options_type
2122 my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type;
2124 my $cache_key = join( chr(28), @ARGV );
2125 if ( my $result = $process_command_line_cache{$cache_key} ) {
2126 my ( $argv, @retvals ) = @$result;
2131 my @retvals = _process_command_line(@_);
2132 $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ]
2133 if $retvals[0]->{'memoize'};
2138 return _process_command_line(@_);
2142 # (note the underscore here)
2143 sub _process_command_line {
2146 $perltidyrc_stream, $is_Windows, $Windows_type,
2147 $rpending_complaint, $dump_options_type
2153 $roption_string, $rdefaults, $rexpansion,
2154 $roption_category, $roption_range
2155 ) = generate_options();
2157 #---------------------------------------------------------------
2158 # set the defaults by passing the above list through GetOptions
2159 #---------------------------------------------------------------
2165 # do not load the defaults if we are just dumping perltidyrc
2166 unless ( $dump_options_type eq 'perltidyrc' ) {
2167 for $i (@$rdefaults) { push @ARGV, "--" . $i }
2170 # Patch to save users Getopt::Long configuration
2171 # and set to Getopt::Long defaults. Use eval to avoid
2172 # breaking old versions of Perl without these routines.
2174 eval { $glc = Getopt::Long::Configure() };
2176 eval { Getopt::Long::ConfigDefaults() };
2178 else { $glc = undef }
2180 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2181 Die "Programming Bug: error in setting default options";
2184 # Patch to put the previous Getopt::Long configuration back
2185 eval { Getopt::Long::Configure($glc) } if defined $glc;
2189 my @raw_options = ();
2190 my $config_file = "";
2191 my $saw_ignore_profile = 0;
2192 my $saw_extrude = 0;
2194 my $saw_dump_profile = 0;
2197 #---------------------------------------------------------------
2198 # Take a first look at the command-line parameters. Do as many
2199 # immediate dumps as possible, which can avoid confusion if the
2200 # perltidyrc file has an error.
2201 #---------------------------------------------------------------
2202 foreach $i (@ARGV) {
2205 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
2206 $saw_ignore_profile = 1;
2209 # note: this must come before -pro and -profile, below:
2210 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
2211 $saw_dump_profile = 1;
2213 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
2216 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
2220 # resolve <dir>/.../<file>, meaning look upwards from directory
2221 if ( defined($config_file) ) {
2222 if ( my ( $start_dir, $search_file ) =
2223 ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
2225 $start_dir = '.' if !$start_dir;
2226 $start_dir = Cwd::realpath($start_dir);
2227 if ( my $found_file =
2228 find_file_upwards( $start_dir, $search_file ) )
2230 $config_file = $found_file;
2234 unless ( -e $config_file ) {
2235 Warn "cannot find file given with -pro=$config_file: $!\n";
2239 elsif ( $i =~ /^-(pro|profile)=?$/ ) {
2240 Die "usage: -pro=filename or --profile=filename, no spaces\n";
2242 elsif ( $i =~ /^-extrude$/ ) {
2245 elsif ( $i =~ /^-(pbp|perl-best-practices)$/ ) {
2248 elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
2252 elsif ( $i =~ /^-(version|v)$/ ) {
2256 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
2257 dump_defaults(@$rdefaults);
2260 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
2261 dump_long_names(@$roption_string);
2264 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
2265 dump_short_names($rexpansion);
2268 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
2269 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
2274 if ( $saw_dump_profile && $saw_ignore_profile ) {
2275 Warn "No profile to dump because of -npro\n";
2279 #---------------------------------------------------------------
2280 # read any .perltidyrc configuration file
2281 #---------------------------------------------------------------
2282 unless ($saw_ignore_profile) {
2284 # resolve possible conflict between $perltidyrc_stream passed
2285 # as call parameter to perltidy and -pro=filename on command
2287 if ($perltidyrc_stream) {
2290 Conflict: a perltidyrc configuration file was specified both as this
2291 perltidy call parameter: $perltidyrc_stream
2292 and with this -profile=$config_file.
2293 Using -profile=$config_file.
2297 $config_file = $perltidyrc_stream;
2301 # look for a config file if we don't have one yet
2302 my $rconfig_file_chatter;
2303 $$rconfig_file_chatter = "";
2305 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2306 $rpending_complaint )
2307 unless $config_file;
2309 # open any config file
2312 ( $fh_config, $config_file ) =
2313 Perl::Tidy::streamhandle( $config_file, 'r' );
2314 unless ($fh_config) {
2315 $$rconfig_file_chatter .=
2316 "# $config_file exists but cannot be opened\n";
2320 if ($saw_dump_profile) {
2321 dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
2327 my ( $rconfig_list, $death_message, $_saw_pbp ) =
2328 read_config_file( $fh_config, $config_file, $rexpansion );
2329 Die $death_message if ($death_message);
2330 $saw_pbp ||= $_saw_pbp;
2332 # process any .perltidyrc parameters right now so we can
2334 if (@$rconfig_list) {
2335 local @ARGV = @$rconfig_list;
2337 expand_command_abbreviations( $rexpansion, \@raw_options,
2340 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2342 "Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
2345 # Anything left in this local @ARGV is an error and must be
2346 # invalid bare words from the configuration file. We cannot
2347 # check this earlier because bare words may have been valid
2348 # values for parameters. We had to wait for GetOptions to have
2352 my $str = "\'" . pop(@ARGV) . "\'";
2353 while ( my $param = pop(@ARGV) ) {
2354 if ( length($str) < 70 ) {
2355 $str .= ", '$param'";
2363 There are $count unrecognized values in the configuration file '$config_file':
2365 Use leading dashes for parameters. Use -npro to ignore this file.
2369 # Undo any options which cause premature exit. They are not
2370 # appropriate for a config file, and it could be hard to
2371 # diagnose the cause of the premature exit.
2380 dump-want-left-space
2381 dump-want-right-space
2389 if ( defined( $Opts{$_} ) ) {
2391 Warn "ignoring --$_ in config file: $config_file\n";
2398 #---------------------------------------------------------------
2399 # now process the command line parameters
2400 #---------------------------------------------------------------
2401 expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2403 local $SIG{'__WARN__'} = sub { Warn $_[0] };
2404 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2405 Die "Error on command line; for help try 'perltidy -h'\n";
2409 \%Opts, $config_file, \@raw_options,
2410 $saw_extrude, $saw_pbp, $roption_string,
2411 $rexpansion, $roption_category, $roption_range
2413 } # end of process_command_line
2417 my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2419 #---------------------------------------------------------------
2420 # check and handle any interactions among the basic options..
2421 #---------------------------------------------------------------
2423 # Since -vt, -vtc, and -cti are abbreviations, but under
2424 # msdos, an unquoted input parameter like vtc=1 will be
2425 # seen as 2 parameters, vtc and 1, so the abbreviations
2426 # won't be seen. Therefore, we will catch them here if
2429 if ( defined $rOpts->{'vertical-tightness'} ) {
2430 my $vt = $rOpts->{'vertical-tightness'};
2431 $rOpts->{'paren-vertical-tightness'} = $vt;
2432 $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2433 $rOpts->{'brace-vertical-tightness'} = $vt;
2436 if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2437 my $vtc = $rOpts->{'vertical-tightness-closing'};
2438 $rOpts->{'paren-vertical-tightness-closing'} = $vtc;
2439 $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2440 $rOpts->{'brace-vertical-tightness-closing'} = $vtc;
2443 if ( defined $rOpts->{'closing-token-indentation'} ) {
2444 my $cti = $rOpts->{'closing-token-indentation'};
2445 $rOpts->{'closing-square-bracket-indentation'} = $cti;
2446 $rOpts->{'closing-brace-indentation'} = $cti;
2447 $rOpts->{'closing-paren-indentation'} = $cti;
2450 # In quiet mode, there is no log file and hence no way to report
2451 # results of syntax check, so don't do it.
2452 if ( $rOpts->{'quiet'} ) {
2453 $rOpts->{'check-syntax'} = 0;
2456 # can't check syntax if no output
2457 if ( $rOpts->{'format'} ne 'tidy' ) {
2458 $rOpts->{'check-syntax'} = 0;
2461 # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2462 # wide variety of nasty problems on these systems, because they cannot
2463 # reliably run backticks. Don't even think about changing this!
2464 if ( $rOpts->{'check-syntax'}
2466 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2468 $rOpts->{'check-syntax'} = 0;
2471 # It's really a bad idea to check syntax as root unless you wrote
2472 # the script yourself. FIXME: not sure if this works with VMS
2473 unless ($is_Windows) {
2475 if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2476 $rOpts->{'check-syntax'} = 0;
2477 $$rpending_complaint .=
2478 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2482 # check iteration count and quietly fix if necessary:
2483 # - iterations option only applies to code beautification mode
2484 # - the convergence check should stop most runs on iteration 2, and
2485 # virtually all on iteration 3. But we'll allow up to 6.
2486 if ( $rOpts->{'format'} ne 'tidy' ) {
2487 $rOpts->{'iterations'} = 1;
2489 elsif ( defined( $rOpts->{'iterations'} ) ) {
2490 if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2491 elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 }
2494 $rOpts->{'iterations'} = 1;
2497 # check for reasonable number of blank lines and fix to avoid problems
2498 if ( $rOpts->{'blank-lines-before-subs'} ) {
2499 if ( $rOpts->{'blank-lines-before-subs'} < 0 ) {
2500 $rOpts->{'blank-lines-before-subs'} = 0;
2501 Warn "negative value of -blbs, setting 0\n";
2503 if ( $rOpts->{'blank-lines-before-subs'} > 100 ) {
2504 Warn "unreasonably large value of -blbs, reducing\n";
2505 $rOpts->{'blank-lines-before-subs'} = 100;
2508 if ( $rOpts->{'blank-lines-before-packages'} ) {
2509 if ( $rOpts->{'blank-lines-before-packages'} < 0 ) {
2510 Warn "negative value of -blbp, setting 0\n";
2511 $rOpts->{'blank-lines-before-packages'} = 0;
2513 if ( $rOpts->{'blank-lines-before-packages'} > 100 ) {
2514 Warn "unreasonably large value of -blbp, reducing\n";
2515 $rOpts->{'blank-lines-before-packages'} = 100;
2519 # see if user set a non-negative logfile-gap
2520 if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2522 # a zero gap will be taken as a 1
2523 if ( $rOpts->{'logfile-gap'} == 0 ) {
2524 $rOpts->{'logfile-gap'} = 1;
2527 # setting a non-negative logfile gap causes logfile to be saved
2528 $rOpts->{'logfile'} = 1;
2531 # not setting logfile gap, or setting it negative, causes default of 50
2533 $rOpts->{'logfile-gap'} = 50;
2536 # set short-cut flag when only indentation is to be done.
2537 # Note that the user may or may not have already set the
2539 if ( !$rOpts->{'add-whitespace'}
2540 && !$rOpts->{'delete-old-whitespace'}
2541 && !$rOpts->{'add-newlines'}
2542 && !$rOpts->{'delete-old-newlines'} )
2544 $rOpts->{'indent-only'} = 1;
2547 # -isbc implies -ibc
2548 if ( $rOpts->{'indent-spaced-block-comments'} ) {
2549 $rOpts->{'indent-block-comments'} = 1;
2552 # -bli flag implies -bl
2553 if ( $rOpts->{'brace-left-and-indent'} ) {
2554 $rOpts->{'opening-brace-on-new-line'} = 1;
2557 if ( $rOpts->{'opening-brace-always-on-right'}
2558 && $rOpts->{'opening-brace-on-new-line'} )
2561 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2562 'opening-brace-on-new-line' (-bl). Ignoring -bl.
2564 $rOpts->{'opening-brace-on-new-line'} = 0;
2567 # it simplifies things if -bl is 0 rather than undefined
2568 if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2569 $rOpts->{'opening-brace-on-new-line'} = 0;
2572 # -sbl defaults to -bl if not defined
2573 if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2574 $rOpts->{'opening-sub-brace-on-new-line'} =
2575 $rOpts->{'opening-brace-on-new-line'};
2578 if ( $rOpts->{'entab-leading-whitespace'} ) {
2579 if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2580 Warn "-et=n must use a positive integer; ignoring -et\n";
2581 $rOpts->{'entab-leading-whitespace'} = undef;
2584 # entab leading whitespace has priority over the older 'tabs' option
2585 if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2588 # set a default tabsize to be used in guessing the starting indentation
2589 # level if and only if this run does not use tabs and the old code does
2591 if ( $rOpts->{'default-tabsize'} ) {
2592 if ( $rOpts->{'default-tabsize'} < 0 ) {
2593 Warn "negative value of -dt, setting 0\n";
2594 $rOpts->{'default-tabsize'} = 0;
2596 if ( $rOpts->{'default-tabsize'} > 20 ) {
2597 Warn "unreasonably large value of -dt, reducing\n";
2598 $rOpts->{'default-tabsize'} = 20;
2602 $rOpts->{'default-tabsize'} = 8;
2605 # Define $tabsize, the number of spaces per tab for use in
2606 # guessing the indentation of source lines with leading tabs.
2607 # Assume same as for this run if tabs are used , otherwise assume
2608 # a default value, typically 8
2610 $rOpts->{'entab-leading-whitespace'}
2611 ? $rOpts->{'entab-leading-whitespace'}
2612 : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
2613 : $rOpts->{'default-tabsize'};
2617 sub find_file_upwards {
2618 my ( $search_dir, $search_file ) = @_;
2620 $search_dir =~ s{/+$}{};
2621 $search_file =~ s{^/+}{};
2624 my $try_path = "$search_dir/$search_file";
2625 if ( -f $try_path ) {
2628 elsif ( $search_dir eq '/' ) {
2632 $search_dir = dirname($search_dir);
2637 sub expand_command_abbreviations {
2639 # go through @ARGV and expand any abbreviations
2641 my ( $rexpansion, $rraw_options, $config_file ) = @_;
2644 # set a pass limit to prevent an infinite loop;
2645 # 10 should be plenty, but it may be increased to allow deeply
2646 # nested expansions.
2647 my $max_passes = 10;
2650 # keep looping until all expansions have been converted into actual
2652 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2654 my $abbrev_count = 0;
2656 # loop over each item in @ARGV..
2657 foreach $word (@ARGV) {
2659 # convert any leading 'no-' to just 'no'
2660 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2662 # if it is a dash flag (instead of a file name)..
2663 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2668 # save the raw input for debug output in case of circular refs
2669 if ( $pass_count == 0 ) {
2670 push( @$rraw_options, $word );
2673 # recombine abbreviation and flag, if necessary,
2674 # to allow abbreviations with arguments such as '-vt=1'
2675 if ( $rexpansion->{ $abr . $flags } ) {
2676 $abr = $abr . $flags;
2680 # if we see this dash item in the expansion hash..
2681 if ( $rexpansion->{$abr} ) {
2684 # stuff all of the words that it expands to into the
2685 # new arg list for the next pass
2686 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2687 next unless $abbrev; # for safety; shouldn't happen
2688 push( @new_argv, '--' . $abbrev . $flags );
2692 # not in expansion hash, must be actual long name
2694 push( @new_argv, $word );
2698 # not a dash item, so just save it for the next pass
2700 push( @new_argv, $word );
2702 } # end of this pass
2704 # update parameter list @ARGV to the new one
2706 last unless ( $abbrev_count > 0 );
2708 # make sure we are not in an infinite loop
2709 if ( $pass_count == $max_passes ) {
2712 I'm tired. We seem to be in an infinite loop trying to expand aliases.
2713 Here are the raw options;
2716 my $num = @new_argv;
2719 After $max_passes passes here is ARGV
2725 After $max_passes passes ARGV has $num entries
2731 Please check your configuration file $config_file for circular-references.
2732 To deactivate it, use -npro.
2737 Program bug - circular-references in the %expansion hash, probably due to
2738 a recent program change.
2741 } # end of check for circular references
2742 } # end of loop over all passes
2745 # Debug routine -- this will dump the expansion hash
2746 sub dump_short_names {
2747 my $rexpansion = shift;
2749 List of short names. This list shows how all abbreviations are
2750 translated into other abbreviations and, eventually, into long names.
2751 New abbreviations may be defined in a .perltidyrc file.
2752 For a list of all long names, use perltidy --dump-long-names (-dln).
2753 --------------------------------------------------------------------------
2755 foreach my $abbrev ( sort keys %$rexpansion ) {
2756 my @list = @{ $$rexpansion{$abbrev} };
2757 print STDOUT "$abbrev --> @list\n";
2761 sub check_vms_filename {
2763 # given a valid filename (the perltidy input file)
2764 # create a modified filename and separator character
2767 # Contributed by Michael Cartmell
2769 my ( $base, $path ) = fileparse( $_[0] );
2771 # remove explicit ; version
2772 $base =~ s/;-?\d*$//
2774 # remove explicit . version ie two dots in filename NB ^ escapes a dot
2775 or $base =~ s/( # begin capture $1
2776 (?:^|[^^])\. # match a dot not preceded by a caret
2777 (?: # followed by nothing
2779 .*[^^] # anything ending in a non caret
2782 \.-?\d*$ # match . version number
2785 # normalise filename, if there are no unescaped dots then append one
2786 $base .= '.' unless $base =~ /(?:^|[^^])\./;
2788 # if we don't already have an extension then we just append the extension
2789 my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2790 return ( $path . $base, $separator );
2795 # TODO: are these more standard names?
2796 # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2798 # Returns a string that determines what MS OS we are on.
2799 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2800 # Returns blank string if not an MS system.
2801 # Original code contributed by: Yves Orton
2802 # We need to know this to decide where to look for config files
2804 my $rpending_complaint = shift;
2806 return $os unless $^O =~ /win32|dos/i; # is it a MS box?
2808 # Systems built from Perl source may not have Win32.pm
2809 # But probably have Win32::GetOSVersion() anyway so the
2810 # following line is not 'required':
2811 # return $os unless eval('require Win32');
2813 # Use the standard API call to determine the version
2814 my ( $undef, $major, $minor, $build, $id );
2815 eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2818 # NAME ID MAJOR MINOR
2819 # Windows NT 4 2 4 0
2820 # Windows 2000 2 5 0
2822 # Windows Server 2003 2 5 2
2824 return "win32s" unless $id; # If id==0 then its a win32s box.
2825 $os = { # Magic numbers from MSDN
2826 # documentation of GetOSVersion
2833 0 => "2000", # or NT 4, see below
2840 # If $os is undefined, the above code is out of date. Suggested updates
2842 unless ( defined $os ) {
2844 $$rpending_complaint .= <<EOS;
2845 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2846 We won't be able to look for a system-wide config file.
2850 # Unfortunately the logic used for the various versions isn't so clever..
2851 # so we have to handle an outside case.
2852 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2857 ( $^O !~ /win32|dos/i )
2860 && ( $^O ne 'MacOS' );
2863 sub look_for_Windows {
2865 # determine Windows sub-type and location of
2866 # system-wide configuration files
2867 my $rpending_complaint = shift;
2868 my $is_Windows = ( $^O =~ /win32|dos/i );
2869 my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
2870 return ( $is_Windows, $Windows_type );
2873 sub find_config_file {
2875 # look for a .perltidyrc configuration file
2876 # For Windows also look for a file named perltidy.ini
2877 my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2878 $rpending_complaint ) = @_;
2880 $$rconfig_file_chatter .= "# Config file search...system reported as:";
2882 $$rconfig_file_chatter .= "Windows $Windows_type\n";
2885 $$rconfig_file_chatter .= " $^O\n";
2888 # sub to check file existence and record all tests
2889 my $exists_config_file = sub {
2890 my $config_file = shift;
2891 return 0 unless $config_file;
2892 $$rconfig_file_chatter .= "# Testing: $config_file\n";
2893 return -f $config_file;
2898 # look in current directory first
2899 $config_file = ".perltidyrc";
2900 return $config_file if $exists_config_file->($config_file);
2902 $config_file = "perltidy.ini";
2903 return $config_file if $exists_config_file->($config_file);
2906 # Default environment vars.
2907 my @envs = qw(PERLTIDY HOME);
2909 # Check the NT/2k/XP locations, first a local machine def, then a
2911 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2913 # Now go through the environment ...
2914 foreach my $var (@envs) {
2915 $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2916 if ( defined( $ENV{$var} ) ) {
2917 $$rconfig_file_chatter .= " = $ENV{$var}\n";
2919 # test ENV{ PERLTIDY } as file:
2920 if ( $var eq 'PERLTIDY' ) {
2921 $config_file = "$ENV{$var}";
2922 return $config_file if $exists_config_file->($config_file);
2925 # test ENV as directory:
2926 $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2927 return $config_file if $exists_config_file->($config_file);
2930 $config_file = catfile( $ENV{$var}, "perltidy.ini" );
2931 return $config_file if $exists_config_file->($config_file);
2935 $$rconfig_file_chatter .= "\n";
2939 # then look for a system-wide definition
2940 # where to look varies with OS
2943 if ($Windows_type) {
2944 my ( $os, $system, $allusers ) =
2945 Win_Config_Locs( $rpending_complaint, $Windows_type );
2947 # Check All Users directory, if there is one.
2948 # i.e. C:\Documents and Settings\User\perltidy.ini
2951 $config_file = catfile( $allusers, ".perltidyrc" );
2952 return $config_file if $exists_config_file->($config_file);
2954 $config_file = catfile( $allusers, "perltidy.ini" );
2955 return $config_file if $exists_config_file->($config_file);
2958 # Check system directory.
2959 # retain old code in case someone has been able to create
2960 # a file with a leading period.
2961 $config_file = catfile( $system, ".perltidyrc" );
2962 return $config_file if $exists_config_file->($config_file);
2964 $config_file = catfile( $system, "perltidy.ini" );
2965 return $config_file if $exists_config_file->($config_file);
2969 # Place to add customization code for other systems
2970 elsif ( $^O eq 'OS2' ) {
2972 elsif ( $^O eq 'MacOS' ) {
2974 elsif ( $^O eq 'VMS' ) {
2977 # Assume some kind of Unix
2980 $config_file = "/usr/local/etc/perltidyrc";
2981 return $config_file if $exists_config_file->($config_file);
2983 $config_file = "/etc/perltidyrc";
2984 return $config_file if $exists_config_file->($config_file);
2987 # Couldn't find a config file
2991 sub Win_Config_Locs {
2993 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2994 # or undef if its not a win32 OS. In list context returns OS, System
2995 # Directory, and All Users Directory. All Users will be empty on a
2996 # 9x/Me box. Contributed by: Yves Orton.
2998 my $rpending_complaint = shift;
2999 my $os = (@_) ? shift : Win_OS_Type();
3005 if ( $os =~ /9[58]|Me/ ) {
3006 $system = "C:/Windows";
3008 elsif ( $os =~ /NT|XP|200?/ ) {
3009 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
3012 ? "C:/WinNT/profiles/All Users/"
3013 : "C:/Documents and Settings/All Users/";
3017 # This currently would only happen on a win32s computer. I don't have
3018 # one to test, so I am unsure how to proceed. Suggestions welcome!
3019 $$rpending_complaint .=
3020 "I dont know a sensible place to look for config files on an $os system.\n";
3023 return wantarray ? ( $os, $system, $allusers ) : $os;
3026 sub dump_config_file {
3028 my $config_file = shift;
3029 my $rconfig_file_chatter = shift;
3030 print STDOUT "$$rconfig_file_chatter";
3032 print STDOUT "# Dump of file: '$config_file'\n";
3033 while ( my $line = $fh->getline() ) { print STDOUT $line }
3034 eval { $fh->close() };
3037 print STDOUT "# ...no config file found\n";
3041 sub read_config_file {
3043 my ( $fh, $config_file, $rexpansion ) = @_;
3044 my @config_list = ();
3047 # file is bad if non-empty $death_message is returned
3048 my $death_message = "";
3052 while ( my $line = $fh->getline() ) {
3055 ( $line, $death_message ) =
3056 strip_comment( $line, $config_file, $line_no );
3057 last if ($death_message);
3059 $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
3062 # look for something of the general form
3069 if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) {
3070 ( $newname, $body ) = ( $2, $3, );
3074 if ( !$saw_pbp && $body =~ /-(pbp|perl-best-practices)/ ) {
3078 # handle a new alias definition
3082 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
3087 if ( ${$rexpansion}{$name} ) {
3089 my @names = sort keys %$rexpansion;
3091 "Here is a list of all installed aliases\n(@names)\n"
3092 . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
3095 ${$rexpansion}{$name} = [];
3101 my ( $rbody_parts, $msg ) = parse_args($body);
3103 $death_message = <<EOM;
3104 Error reading file '$config_file' at line number $line_no.
3106 Please fix this line or use -npro to avoid reading this file
3113 # remove leading dashes if this is an alias
3114 foreach (@$rbody_parts) { s/^\-+//; }
3115 push @{ ${$rexpansion}{$name} }, @$rbody_parts;
3118 push( @config_list, @$rbody_parts );
3123 eval { $fh->close() };
3124 return ( \@config_list, $death_message, $saw_pbp );
3129 # Strip any comment from a command line
3130 my ( $instr, $config_file, $line_no ) = @_;
3133 # check for full-line comment
3134 if ( $instr =~ /^\s*#/ ) {
3135 return ( "", $msg );
3138 # nothing to do if no comments
3139 if ( $instr !~ /#/ ) {
3140 return ( $instr, $msg );
3143 # handle case of no quotes
3144 elsif ( $instr !~ /['"]/ ) {
3146 # We now require a space before the # of a side comment
3147 # this allows something like:
3149 # Otherwise, it would have to be quoted:
3151 $instr =~ s/\s+\#.*$//;
3152 return ( $instr, $msg );
3155 # handle comments and quotes
3157 my $quote_char = "";
3160 # looking for ending quote character
3162 if ( $instr =~ /\G($quote_char)/gc ) {
3166 elsif ( $instr =~ /\G(.)/gc ) {
3170 # error..we reached the end without seeing the ending quote char
3173 Error reading file $config_file at line number $line_no.
3174 Did not see ending quote character <$quote_char> in this text:
3176 Please fix this line or use -npro to avoid reading this file
3182 # accumulating characters and looking for start of a quoted string
3184 if ( $instr =~ /\G([\"\'])/gc ) {
3189 # Note: not yet enforcing the space-before-hash rule for side
3190 # comments if the parameter is quoted.
3191 elsif ( $instr =~ /\G#/gc ) {
3194 elsif ( $instr =~ /\G(.)/gc ) {
3202 return ( $outstr, $msg );
3207 # Parse a command string containing multiple string with possible
3208 # quotes, into individual commands. It might look like this, for example:
3210 # -wba=" + - " -some-thing -wbb='. && ||'
3212 # There is no need, at present, to handle escaped quote characters.
3213 # (They are not perltidy tokens, so needn't be in strings).
3216 my @body_parts = ();
3217 my $quote_char = "";
3222 # looking for ending quote character
3224 if ( $body =~ /\G($quote_char)/gc ) {
3227 elsif ( $body =~ /\G(.)/gc ) {
3231 # error..we reached the end without seeing the ending quote char
3233 if ( length($part) ) { push @body_parts, $part; }
3235 Did not see ending quote character <$quote_char> in this text:
3242 # accumulating characters and looking for start of a quoted string
3244 if ( $body =~ /\G([\"\'])/gc ) {
3247 elsif ( $body =~ /\G(\s+)/gc ) {
3248 if ( length($part) ) { push @body_parts, $part; }
3251 elsif ( $body =~ /\G(.)/gc ) {
3255 if ( length($part) ) { push @body_parts, $part; }
3260 return ( \@body_parts, $msg );
3263 sub dump_long_names {
3265 my @names = sort @_;
3267 # Command line long names (passed to GetOptions)
3268 #---------------------------------------------------------------
3269 # here is a summary of the Getopt codes:
3270 # <none> does not take an argument
3271 # =s takes a mandatory string
3272 # :s takes an optional string
3273 # =i takes a mandatory integer
3274 # :i takes an optional integer
3275 # ! does not take an argument and may be negated
3276 # i.e., -foo and -nofoo are allowed
3277 # a double dash signals the end of the options list
3279 #---------------------------------------------------------------
3282 foreach (@names) { print STDOUT "$_\n" }
3286 my @defaults = sort @_;
3287 print STDOUT "Default command line options:\n";
3288 foreach (@_) { print STDOUT "$_\n" }
3291 sub readable_options {
3293 # return options for this run as a string which could be
3294 # put in a perltidyrc file
3295 my ( $rOpts, $roption_string ) = @_;
3297 my $rGetopt_flags = \%Getopt_flags;
3298 my $readable_options = "# Final parameter set for this run.\n";
3299 $readable_options .=
3300 "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
3301 foreach my $opt ( @{$roption_string} ) {
3303 if ( $opt =~ /(.*)(!|=.*)$/ ) {
3307 if ( defined( $rOpts->{$opt} ) ) {
3308 $rGetopt_flags->{$opt} = $flag;
3311 foreach my $key ( sort keys %{$rOpts} ) {
3312 my $flag = $rGetopt_flags->{$key};
3313 my $value = $rOpts->{$key};
3317 if ( $flag =~ /^=/ ) {
3318 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
3319 $suffix = "=" . $value;
3321 elsif ( $flag =~ /^!/ ) {
3322 $prefix .= "no" unless ($value);
3327 $readable_options .=
3328 "# ERROR in dump_options: unrecognized flag $flag for $key\n";
3331 $readable_options .= $prefix . $key . $suffix . "\n";
3333 return $readable_options;
3337 print STDOUT <<"EOM";
3338 This is perltidy, v$VERSION
3340 Copyright 2000-2013, Steve Hancock
3342 Perltidy is free software and may be copied under the terms of the GNU
3343 General Public License, which is included in the distribution files.
3345 Complete documentation for perltidy can be found using 'man perltidy'
3346 or on the internet at http://perltidy.sourceforge.net.
3353 This is perltidy version $VERSION, a perl script indenter. Usage:
3355 perltidy [ options ] file1 file2 file3 ...
3356 (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
3357 perltidy [ options ] file1 -o outfile
3358 perltidy [ options ] file1 -st >outfile
3359 perltidy [ options ] <infile >outfile
3361 Options have short and long forms. Short forms are shown; see
3362 man pages for long forms. Note: '=s' indicates a required string,
3363 and '=n' indicates a required integer.
3367 -o=file name of the output file (only if single input file)
3368 -oext=s change output extension from 'tdy' to s
3369 -opath=path change path to be 'path' for output files
3370 -b backup original to .bak and modify file in-place
3371 -bext=s change default backup extension from 'bak' to s
3372 -q deactivate error messages (for running under editor)
3373 -w include non-critical warning messages in the .ERR error output
3374 -syn run perl -c to check syntax (default under unix systems)
3375 -log save .LOG file, which has useful diagnostics
3376 -f force perltidy to read a binary file
3377 -g like -log but writes more detailed .LOG file, for debugging scripts
3378 -opt write the set of options actually used to a .LOG file
3379 -npro ignore .perltidyrc configuration command file
3380 -pro=file read configuration commands from file instead of .perltidyrc
3381 -st send output to standard output, STDOUT
3382 -se send all error output to standard error output, STDERR
3383 -v display version number to standard output and quit
3386 -i=n use n columns per indentation level (default n=4)
3387 -t tabs: use one tab character per indentation level, not recommeded
3388 -nt no tabs: use n spaces per indentation level (default)
3389 -et=n entab leading whitespace n spaces per tab; not recommended
3390 -io "indent only": just do indentation, no other formatting.
3391 -sil=n set starting indentation level to n; use if auto detection fails
3392 -ole=s specify output line ending (s=dos or win, mac, unix)
3393 -ple keep output line endings same as input (input must be filename)
3396 -fws freeze whitespace; this disables all whitespace changes
3397 and disables the following switches:
3398 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)
3399 -bbt same as -bt but for code block braces; same as -bt if not given
3400 -bbvt block braces vertically tight; use with -bl or -bli
3401 -bbvtl=s make -bbvt to apply to selected list of block types
3402 -pt=n paren tightness (n=0, 1 or 2)
3403 -sbt=n square bracket tightness (n=0, 1, or 2)
3404 -bvt=n brace vertical tightness,
3405 n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3406 -pvt=n paren vertical tightness (see -bvt for n)
3407 -sbvt=n square bracket vertical tightness (see -bvt for n)
3408 -bvtc=n closing brace vertical tightness:
3409 n=(0=open, 1=sometimes close, 2=always close)
3410 -pvtc=n closing paren vertical tightness, see -bvtc for n.
3411 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3412 -ci=n sets continuation indentation=n, default is n=2 spaces
3413 -lp line up parentheses, brackets, and non-BLOCK braces
3414 -sfs add space before semicolon in for( ; ; )
3415 -aws allow perltidy to add whitespace (default)
3416 -dws delete all old non-essential whitespace
3417 -icb indent closing brace of a code block
3418 -cti=n closing indentation of paren, square bracket, or non-block brace:
3419 n=0 none, =1 align with opening, =2 one full indentation level
3420 -icp equivalent to -cti=2
3421 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /'
3422 -wrs=s want space right of tokens in string;
3423 -sts put space before terminal semicolon of a statement
3424 -sak=s put space between keywords given in s and '(';
3425 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3428 -fnl freeze newlines; this disables all line break changes
3429 and disables the following switches:
3430 -anl add newlines; ok to introduce new line breaks
3431 -bbs add blank line before subs and packages
3432 -bbc add blank line before block comments
3433 -bbb add blank line between major blocks
3434 -kbl=n keep old blank lines? 0=no, 1=some, 2=all
3435 -mbl=n maximum consecutive blank lines to output (default=1)
3436 -ce cuddled else; use this style: '} else {'
3437 -dnl delete old newlines (default)
3438 -l=n maximum line length; default n=80
3439 -bl opening brace on new line
3440 -sbl opening sub brace on new line. value of -bl is used if not given.
3441 -bli opening brace on new line and indented
3442 -bar opening brace always on right, even for long clauses
3443 -vt=n vertical tightness (requires -lp); n controls break after opening
3444 token: 0=never 1=no break if next line balanced 2=no break
3445 -vtc=n vertical tightness of closing container; n controls if closing
3446 token starts new line: 0=always 1=not unless list 1=never
3447 -wba=s want break after tokens in string; i.e. wba=': .'
3448 -wbb=s want break before tokens in string
3450 Following Old Breakpoints
3451 -kis keep interior semicolons. Allows multiple statements per line.
3452 -boc break at old comma breaks: turns off all automatic list formatting
3453 -bol break at old logical breakpoints: or, and, ||, && (default)
3454 -bok break at old list keyword breakpoints such as map, sort (default)
3455 -bot break at old conditional (ternary ?:) operator breakpoints (default)
3456 -boa break at old attribute breakpoints
3457 -cab=n break at commas after a comma-arrow (=>):
3458 n=0 break at all commas after =>
3459 n=1 stable: break unless this breaks an existing one-line container
3460 n=2 break only if a one-line container cannot be formed
3461 n=3 do not treat commas after => specially at all
3464 -ibc indent block comments (default)
3465 -isbc indent spaced block comments; may indent unless no leading space
3466 -msc=n minimum desired spaces to side comment, default 4
3467 -fpsc=n fix position for side comments; default 0;
3468 -csc add or update closing side comments after closing BLOCK brace
3469 -dcsc delete closing side comments created by a -csc command
3470 -cscp=s change closing side comment prefix to be other than '## end'
3471 -cscl=s change closing side comment to apply to selected list of blocks
3472 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3473 -csct=n maximum number of columns of appended text, default n=20
3474 -cscw causes warning if old side comment is overwritten with -csc
3476 -sbc use 'static block comments' identified by leading '##' (default)
3477 -sbcp=s change static block comment identifier to be other than '##'
3478 -osbc outdent static block comments
3480 -ssc use 'static side comments' identified by leading '##' (default)
3481 -sscp=s change static side comment identifier to be other than '##'
3483 Delete selected text
3484 -dac delete all comments AND pod
3485 -dbc delete block comments
3486 -dsc delete side comments
3489 Send selected text to a '.TEE' file
3490 -tac tee all comments AND pod
3491 -tbc tee block comments
3492 -tsc tee side comments
3496 -olq outdent long quoted strings (default)
3497 -olc outdent a long block comment line
3498 -ola outdent statement labels
3499 -okw outdent control keywords (redo, next, last, goto, return)
3500 -okwl=s specify alternative keywords for -okw command
3503 -mft=n maximum fields per table; default n=40
3504 -x do not format lines before hash-bang line (i.e., for VMS)
3505 -asc allows perltidy to add a ';' when missing (default)
3506 -dsm allows perltidy to delete an unnecessary ';' (default)
3508 Combinations of other parameters
3509 -gnu attempt to follow GNU Coding Standards as applied to perl
3510 -mangle remove as many newlines as possible (but keep comments and pods)
3511 -extrude insert as many newlines as possible
3513 Dump and die, debugging
3514 -dop dump options used in this run to standard output and quit
3515 -ddf dump default options to standard output and quit
3516 -dsn dump all option short names to standard output and quit
3517 -dln dump option long names to standard output and quit
3518 -dpro dump whatever configuration file is in effect to standard output
3519 -dtt dump all token types to standard output and quit
3522 -html write an html file (see 'man perl2web' for many options)
3523 Note: when -html is used, no indentation or formatting are done.
3524 Hint: try perltidy -html -css=mystyle.css filename.pl
3525 and edit mystyle.css to change the appearance of filename.html.
3526 -nnn gives line numbers
3527 -pre only writes out <pre>..</pre> code section
3528 -toc places a table of contents to subs at the top (default)
3529 -pod passes pod text through pod2html (default)
3530 -frm write html as a frame (3 files)
3531 -text=s extra extension for table of contents if -frm, default='toc'
3532 -sext=s extra extension for file content if -frm, default='src'
3534 A prefix of "n" negates short form toggle switches, and a prefix of "no"
3535 negates the long forms. For example, -nasc means don't add missing
3538 If you are unable to see this entire text, try "perltidy -h | more"
3539 For more detailed information, and additional options, try "man perltidy",
3540 or go to the perltidy home page at http://perltidy.sourceforge.net
3545 sub process_this_file {
3547 my ( $truth, $beauty ) = @_;
3549 # loop to process each line of this file
3550 while ( my $line_of_tokens = $truth->get_line() ) {
3551 $beauty->write_line($line_of_tokens);
3555 eval { $beauty->finish_formatting() };
3556 $truth->report_tokenization_errors();
3561 # Use 'perl -c' to make sure that we did not create bad syntax
3562 # This is a very good independent check for programming errors
3564 # Given names of the input and output files, ($istream, $ostream),
3565 # we do the following:
3566 # - check syntax of the input file
3567 # - if bad, all done (could be an incomplete code snippet)
3568 # - if infile syntax ok, then check syntax of the output file;
3569 # - if outfile syntax bad, issue warning; this implies a code bug!
3570 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3572 my ( $istream, $ostream, $logger_object, $rOpts ) = @_;
3573 my $infile_syntax_ok = 0;
3574 my $line_of_dashes = '-' x 42 . "\n";
3576 my $flags = $rOpts->{'perl-syntax-check-flags'};
3578 # be sure we invoke perl with -c
3579 # note: perl will accept repeated flags like '-c -c'. It is safest
3580 # to append another -c than try to find an interior bundled c, as
3581 # in -Tc, because such a 'c' might be in a quoted string, for example.
3582 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3584 # be sure we invoke perl with -x if requested
3585 # same comments about repeated parameters applies
3586 if ( $rOpts->{'look-for-hash-bang'} ) {
3587 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3590 # this shouldn't happen unless a temporary file couldn't be made
3591 if ( $istream eq '-' ) {
3592 $logger_object->write_logfile_entry(
3593 "Cannot run perl -c on STDIN and STDOUT\n");
3594 return $infile_syntax_ok;
3597 $logger_object->write_logfile_entry(
3598 "checking input file syntax with perl $flags\n");
3600 # Not all operating systems/shells support redirection of the standard
3602 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3604 my ( $istream_filename, $perl_output ) =
3605 do_syntax_check( $istream, $flags, $error_redirection );
3606 $logger_object->write_logfile_entry(
3607 "Input stream passed to Perl as file $istream_filename\n");
3608 $logger_object->write_logfile_entry($line_of_dashes);
3609 $logger_object->write_logfile_entry("$perl_output\n");
3611 if ( $perl_output =~ /syntax\s*OK/ ) {
3612 $infile_syntax_ok = 1;
3613 $logger_object->write_logfile_entry($line_of_dashes);
3614 $logger_object->write_logfile_entry(
3615 "checking output file syntax with perl $flags ...\n");
3616 my ( $ostream_filename, $perl_output ) =
3617 do_syntax_check( $ostream, $flags, $error_redirection );
3618 $logger_object->write_logfile_entry(
3619 "Output stream passed to Perl as file $ostream_filename\n");
3620 $logger_object->write_logfile_entry($line_of_dashes);
3621 $logger_object->write_logfile_entry("$perl_output\n");
3623 unless ( $perl_output =~ /syntax\s*OK/ ) {
3624 $logger_object->write_logfile_entry($line_of_dashes);
3625 $logger_object->warning(
3626 "The output file has a syntax error when tested with perl $flags $ostream !\n"
3628 $logger_object->warning(
3629 "This implies an error in perltidy; the file $ostream is bad\n"
3631 $logger_object->report_definite_bug();
3633 # the perl version number will be helpful for diagnosing the problem
3634 $logger_object->write_logfile_entry(
3635 qx/perl -v $error_redirection/ . "\n" );
3640 # Only warn of perl -c syntax errors. Other messages,
3641 # such as missing modules, are too common. They can be
3642 # seen by running with perltidy -w
3643 $logger_object->complain("A syntax check using perl $flags\n");
3644 $logger_object->complain(
3645 "for the output in file $istream_filename gives:\n");
3646 $logger_object->complain($line_of_dashes);
3647 $logger_object->complain("$perl_output\n");
3648 $logger_object->complain($line_of_dashes);
3649 $infile_syntax_ok = -1;
3650 $logger_object->write_logfile_entry($line_of_dashes);
3651 $logger_object->write_logfile_entry(
3652 "The output file will not be checked because of input file problems\n"
3655 return $infile_syntax_ok;
3658 sub do_syntax_check {
3659 my ( $stream, $flags, $error_redirection ) = @_;
3661 # We need a named input file for executing perl
3662 my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
3664 # TODO: Need to add name of file to log somewhere
3665 # otherwise Perl output is hard to read
3666 if ( !$stream_filename ) { return $stream_filename, "" }
3668 # We have to quote the filename in case it has unusual characters
3669 # or spaces. Example: this filename #CM11.pm# gives trouble.
3670 my $quoted_stream_filename = '"' . $stream_filename . '"';
3672 # Under VMS something like -T will become -t (and an error) so we
3673 # will put quotes around the flags. Double quotes seem to work on
3674 # Unix/Windows/VMS, but this may not work on all systems. (Single
3675 # quotes do not work under Windows). It could become necessary to
3676 # put double quotes around each flag, such as: -"c" -"T"
3677 # We may eventually need some system-dependent coding here.
3678 $flags = '"' . $flags . '"';
3680 # now wish for luck...
3681 my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
3683 unlink $stream_filename if ($is_tmpfile);
3684 return $stream_filename, $msg;
3687 #####################################################################
3689 # This is a stripped down version of IO::Scalar
3690 # Given a reference to a scalar, it supplies either:
3691 # a getline method which reads lines (mode='r'), or
3692 # a print method which reads lines (mode='w')
3694 #####################################################################
3695 package Perl::Tidy::IOScalar;
3699 my ( $package, $rscalar, $mode ) = @_;
3700 my $ref = ref $rscalar;
3701 if ( $ref ne 'SCALAR' ) {
3703 ------------------------------------------------------------------------
3704 expecting ref to SCALAR but got ref to ($ref); trace follows:
3705 ------------------------------------------------------------------------
3709 if ( $mode eq 'w' ) {
3711 return bless [ $rscalar, $mode ], $package;
3713 elsif ( $mode eq 'r' ) {
3715 # Convert a scalar to an array.
3716 # This avoids looking for "\n" on each call to getline
3718 # NOTES: The -1 count is needed to avoid loss of trailing blank lines
3719 # (which might be important in a DATA section).
3721 if ( $rscalar && ${$rscalar} ) {
3722 @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
3724 # remove possible extra blank line introduced with split
3725 if ( @array && $array[-1] eq "\n" ) { pop @array }
3728 return bless [ \@array, $mode, $i_next ], $package;
3732 ------------------------------------------------------------------------
3733 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3734 ------------------------------------------------------------------------
3741 my $mode = $self->[1];
3742 if ( $mode ne 'r' ) {
3744 ------------------------------------------------------------------------
3745 getline call requires mode = 'r' but mode = ($mode); trace follows:
3746 ------------------------------------------------------------------------
3749 my $i = $self->[2]++;
3750 return $self->[0]->[$i];
3755 my $mode = $self->[1];
3756 if ( $mode ne 'w' ) {
3758 ------------------------------------------------------------------------
3759 print call requires mode = 'w' but mode = ($mode); trace follows:
3760 ------------------------------------------------------------------------
3763 ${ $self->[0] } .= $_[0];
3765 sub close { return }
3767 #####################################################################
3769 # This is a stripped down version of IO::ScalarArray
3770 # Given a reference to an array, it supplies either:
3771 # a getline method which reads lines (mode='r'), or
3772 # a print method which reads lines (mode='w')
3774 # NOTE: this routine assumes that there aren't any embedded
3775 # newlines within any of the array elements. There are no checks
3778 #####################################################################
3779 package Perl::Tidy::IOScalarArray;
3783 my ( $package, $rarray, $mode ) = @_;
3784 my $ref = ref $rarray;
3785 if ( $ref ne 'ARRAY' ) {
3787 ------------------------------------------------------------------------
3788 expecting ref to ARRAY but got ref to ($ref); trace follows:
3789 ------------------------------------------------------------------------
3793 if ( $mode eq 'w' ) {
3795 return bless [ $rarray, $mode ], $package;
3797 elsif ( $mode eq 'r' ) {
3799 return bless [ $rarray, $mode, $i_next ], $package;
3803 ------------------------------------------------------------------------
3804 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3805 ------------------------------------------------------------------------
3812 my $mode = $self->[1];
3813 if ( $mode ne 'r' ) {
3815 ------------------------------------------------------------------------
3816 getline requires mode = 'r' but mode = ($mode); trace follows:
3817 ------------------------------------------------------------------------
3820 my $i = $self->[2]++;
3821 return $self->[0]->[$i];
3826 my $mode = $self->[1];
3827 if ( $mode ne 'w' ) {
3829 ------------------------------------------------------------------------
3830 print requires mode = 'w' but mode = ($mode); trace follows:
3831 ------------------------------------------------------------------------
3834 push @{ $self->[0] }, $_[0];
3836 sub close { return }
3838 #####################################################################
3840 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3841 # which returns the next line to be parsed
3843 #####################################################################
3845 package Perl::Tidy::LineSource;
3849 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3851 my $input_line_ending;
3852 if ( $rOpts->{'preserve-line-endings'} ) {
3853 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3856 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3857 return undef unless $fh;
3859 # in order to check output syntax when standard output is used,
3860 # or when it is an object, we have to make a copy of the file
3861 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3864 # Turning off syntax check when input output is used.
3865 # The reason is that temporary files cause problems on
3867 $rOpts->{'check-syntax'} = 0;
3869 $$rpending_logfile_message .= <<EOM;
3870 Note: --syntax check will be skipped because standard input is used
3877 _filename => $input_file,
3878 _input_line_ending => $input_line_ending,
3879 _rinput_buffer => [],
3884 sub close_input_file {
3887 # Only close physical files, not STDIN and other objects
3888 my $filename = $self->{_filename};
3889 if ( $filename ne '-' && !ref $filename ) {
3890 eval { $self->{_fh}->close() };
3897 my $fh = $self->{_fh};
3898 my $rinput_buffer = $self->{_rinput_buffer};
3900 if ( scalar(@$rinput_buffer) ) {
3901 $line = shift @$rinput_buffer;
3904 $line = $fh->getline();
3906 # patch to read raw mac files under unix, dos
3907 # see if the first line has embedded \r's
3908 if ( $line && !$self->{_started} ) {
3909 if ( $line =~ /[\015][^\015\012]/ ) {
3911 # found one -- break the line up and store in a buffer
3912 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3913 my $count = @$rinput_buffer;
3914 $line = shift @$rinput_buffer;
3916 $self->{_started}++;
3922 #####################################################################
3924 # the Perl::Tidy::LineSink class supplies a write_line method for
3925 # actual file writing
3927 #####################################################################
3929 package Perl::Tidy::LineSink;
3933 my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3934 $rpending_logfile_message, $binmode )
3939 my $output_file_open = 0;
3941 if ( $rOpts->{'format'} eq 'tidy' ) {
3942 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3943 unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; }
3944 $output_file_open = 1;
3946 if ( ref($fh) eq 'IO::File' ) {
3949 if ( $output_file eq '-' ) { binmode STDOUT }
3953 # in order to check output syntax when standard output is used,
3954 # or when it is an object, we have to make a copy of the file
3955 if ( $output_file eq '-' || ref $output_file ) {
3956 if ( $rOpts->{'check-syntax'} ) {
3958 # Turning off syntax check when standard output is used.
3959 # The reason is that temporary files cause problems on
3961 $rOpts->{'check-syntax'} = 0;
3962 $$rpending_logfile_message .= <<EOM;
3963 Note: --syntax check will be skipped because standard output is used
3972 _output_file => $output_file,
3973 _output_file_open => $output_file_open,
3975 _tee_file => $tee_file,
3976 _tee_file_opened => 0,
3977 _line_separator => $line_separator,
3978 _binmode => $binmode,
3985 my $fh = $self->{_fh};
3987 my $output_file_open = $self->{_output_file_open};
3989 $_[0] .= $self->{_line_separator};
3991 $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3993 if ( $self->{_tee_flag} ) {
3994 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3995 my $fh_tee = $self->{_fh_tee};
3996 print $fh_tee $_[0];
4002 $self->{_tee_flag} = 1;
4007 $self->{_tee_flag} = 0;
4010 sub really_open_tee_file {
4012 my $tee_file = $self->{_tee_file};
4014 $fh_tee = IO::File->new(">$tee_file")
4015 or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n");
4016 binmode $fh_tee if $self->{_binmode};
4017 $self->{_tee_file_opened} = 1;
4018 $self->{_fh_tee} = $fh_tee;
4021 sub close_output_file {
4024 # Only close physical files, not STDOUT and other objects
4025 my $output_file = $self->{_output_file};
4026 if ( $output_file ne '-' && !ref $output_file ) {
4027 eval { $self->{_fh}->close() } if $self->{_output_file_open};
4029 $self->close_tee_file();
4032 sub close_tee_file {
4035 # Only close physical files, not STDOUT and other objects
4036 if ( $self->{_tee_file_opened} ) {
4037 my $tee_file = $self->{_tee_file};
4038 if ( $tee_file ne '-' && !ref $tee_file ) {
4039 eval { $self->{_fh_tee}->close() };
4040 $self->{_tee_file_opened} = 0;
4045 #####################################################################
4047 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
4048 # useful for program development.
4050 # Only one such file is created regardless of the number of input
4051 # files processed. This allows the results of processing many files
4052 # to be summarized in a single file.
4054 #####################################################################
4056 package Perl::Tidy::Diagnostics;
4062 _write_diagnostics_count => 0,
4063 _last_diagnostic_file => "",
4069 sub set_input_file {
4071 $self->{_input_file} = $_[0];
4074 # This is a diagnostic routine which is useful for program development.
4075 # Output from debug messages go to a file named DIAGNOSTICS, where
4076 # they are labeled by file and line. This allows many files to be
4077 # scanned at once for some particular condition of interest.
4078 sub write_diagnostics {
4081 unless ( $self->{_write_diagnostics_count} ) {
4082 open DIAGNOSTICS, ">DIAGNOSTICS"
4083 or death("couldn't open DIAGNOSTICS: $!\n");
4086 my $last_diagnostic_file = $self->{_last_diagnostic_file};
4087 my $input_file = $self->{_input_file};
4088 if ( $last_diagnostic_file ne $input_file ) {
4089 print DIAGNOSTICS "\nFILE:$input_file\n";
4091 $self->{_last_diagnostic_file} = $input_file;
4092 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
4093 print DIAGNOSTICS "$input_line_number:\t@_";
4094 $self->{_write_diagnostics_count}++;
4097 #####################################################################
4099 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
4101 #####################################################################
4103 package Perl::Tidy::Logger;
4108 my ( $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude, ) = @_;
4110 my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef;
4112 # remove any old error output file if we might write a new one
4113 unless ( $fh_warnings || ref($warning_file) ) {
4114 if ( -e $warning_file ) { unlink($warning_file) }
4118 _log_file => $log_file,
4120 _fh_warnings => $fh_warnings,
4121 _last_input_line_written => 0,
4122 _at_end_of_file => 0,
4124 _block_log_output => 0,
4125 _line_of_tokens => undef,
4126 _output_line_number => undef,
4127 _wrote_line_information_string => 0,
4128 _wrote_column_headings => 0,
4129 _warning_file => $warning_file,
4130 _warning_count => 0,
4131 _complaint_count => 0,
4132 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure
4133 _saw_brace_error => 0,
4134 _saw_extrude => $saw_extrude,
4135 _output_array => [],
4139 sub get_warning_count {
4141 return $self->{_warning_count};
4144 sub get_use_prefix {
4146 return $self->{_use_prefix};
4149 sub block_log_output {
4151 $self->{_block_log_output} = 1;
4154 sub unblock_log_output {
4156 $self->{_block_log_output} = 0;
4159 sub interrupt_logfile {
4161 $self->{_use_prefix} = 0;
4162 $self->warning("\n");
4163 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
4166 sub resume_logfile {
4168 $self->write_logfile_entry( '#' x 60 . "\n" );
4169 $self->{_use_prefix} = 1;
4172 sub we_are_at_the_last_line {
4174 unless ( $self->{_wrote_line_information_string} ) {
4175 $self->write_logfile_entry("Last line\n\n");
4177 $self->{_at_end_of_file} = 1;
4180 # record some stuff in case we go down in flames
4183 my ( $line_of_tokens, $output_line_number ) = @_;
4184 my $input_line = $line_of_tokens->{_line_text};
4185 my $input_line_number = $line_of_tokens->{_line_number};
4187 # save line information in case we have to write a logfile message
4188 $self->{_line_of_tokens} = $line_of_tokens;
4189 $self->{_output_line_number} = $output_line_number;
4190 $self->{_wrote_line_information_string} = 0;
4192 my $last_input_line_written = $self->{_last_input_line_written};
4193 my $rOpts = $self->{_rOpts};
4196 ( $input_line_number - $last_input_line_written ) >=
4197 $rOpts->{'logfile-gap'}
4199 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
4202 my $rlevels = $line_of_tokens->{_rlevels};
4203 my $structural_indentation_level = $$rlevels[0];
4204 $self->{_last_input_line_written} = $input_line_number;
4205 ( my $out_str = $input_line ) =~ s/^\s*//;
4208 $out_str = ( '.' x $structural_indentation_level ) . $out_str;
4210 if ( length($out_str) > 35 ) {
4211 $out_str = substr( $out_str, 0, 35 ) . " ....";
4213 $self->logfile_output( "", "$out_str\n" );
4217 sub write_logfile_entry {
4220 # add leading >>> to avoid confusing error messages and code
4221 $self->logfile_output( ">>>", "@_" );
4224 sub write_column_headings {
4227 $self->{_wrote_column_headings} = 1;
4228 my $routput_array = $self->{_output_array};
4229 push @{$routput_array}, <<EOM;
4230 The nesting depths in the table below are at the start of the lines.
4231 The indicated output line numbers are not always exact.
4232 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
4234 in:out indent c b nesting code + messages; (messages begin with >>>)
4235 lines levels i k (code begins with one '.' per indent level)
4236 ------ ----- - - -------- -------------------------------------------
4240 sub make_line_information_string {
4242 # make columns of information when a logfile message needs to go out
4244 my $line_of_tokens = $self->{_line_of_tokens};
4245 my $input_line_number = $line_of_tokens->{_line_number};
4246 my $line_information_string = "";
4247 if ($input_line_number) {
4249 my $output_line_number = $self->{_output_line_number};
4250 my $brace_depth = $line_of_tokens->{_curly_brace_depth};
4251 my $paren_depth = $line_of_tokens->{_paren_depth};
4252 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
4253 my $guessed_indentation_level =
4254 $line_of_tokens->{_guessed_indentation_level};
4255 my $rlevels = $line_of_tokens->{_rlevels};
4256 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
4257 my $rci_levels = $line_of_tokens->{_rci_levels};
4258 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
4260 my $structural_indentation_level = $$rlevels[0];
4262 $self->write_column_headings() unless $self->{_wrote_column_headings};
4264 # keep logfile columns aligned for scripts up to 999 lines;
4265 # for longer scripts it doesn't really matter
4266 my $extra_space = "";
4268 ( $input_line_number < 10 ) ? " "
4269 : ( $input_line_number < 100 ) ? " "
4272 ( $output_line_number < 10 ) ? " "
4273 : ( $output_line_number < 100 ) ? " "
4276 # there are 2 possible nesting strings:
4277 # the original which looks like this: (0 [1 {2
4278 # the new one, which looks like this: {{[
4279 # the new one is easier to read, and shows the order, but
4280 # could be arbitrarily long, so we use it unless it is too long
4281 my $nesting_string =
4282 "($paren_depth [$square_bracket_depth {$brace_depth";
4283 my $nesting_string_new = $$rnesting_tokens[0];
4285 my $ci_level = $$rci_levels[0];
4286 if ( $ci_level > 9 ) { $ci_level = '*' }
4287 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
4289 if ( length($nesting_string_new) <= 8 ) {
4291 $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
4293 $line_information_string =
4294 "L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
4296 return $line_information_string;
4299 sub logfile_output {
4301 my ( $prompt, $msg ) = @_;
4302 return if ( $self->{_block_log_output} );
4304 my $routput_array = $self->{_output_array};
4305 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
4306 push @{$routput_array}, "$msg";
4309 my $line_information_string = $self->make_line_information_string();
4310 $self->{_wrote_line_information_string} = 1;
4312 if ($line_information_string) {
4313 push @{$routput_array}, "$line_information_string $prompt$msg";
4316 push @{$routput_array}, "$msg";
4321 sub get_saw_brace_error {
4323 return $self->{_saw_brace_error};
4326 sub increment_brace_error {
4328 $self->{_saw_brace_error}++;
4333 use constant BRACE_WARNING_LIMIT => 10;
4334 my $saw_brace_error = $self->{_saw_brace_error};
4336 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
4340 $self->{_saw_brace_error} = $saw_brace_error;
4342 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
4343 $self->warning("No further warnings of this type will be given\n");
4349 # handle non-critical warning messages based on input flag
4351 my $rOpts = $self->{_rOpts};
4353 # these appear in .ERR output only if -w flag is used
4354 if ( $rOpts->{'warning-output'} ) {
4358 # otherwise, they go to the .LOG file
4360 $self->{_complaint_count}++;
4361 $self->write_logfile_entry(@_);
4367 # report errors to .ERR file (or stdout)
4369 use constant WARNING_LIMIT => 50;
4371 my $rOpts = $self->{_rOpts};
4372 unless ( $rOpts->{'quiet'} ) {
4374 my $warning_count = $self->{_warning_count};
4375 my $fh_warnings = $self->{_fh_warnings};
4376 if ( !$fh_warnings ) {
4377 my $warning_file = $self->{_warning_file};
4378 ( $fh_warnings, my $filename ) =
4379 Perl::Tidy::streamhandle( $warning_file, 'w' );
4380 $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n");
4381 Perl::Tidy::Warn "## Please see file $filename\n"
4382 unless ref($warning_file);
4383 $self->{_fh_warnings} = $fh_warnings;
4386 if ( $warning_count < WARNING_LIMIT ) {
4387 if ( $self->get_use_prefix() > 0 ) {
4388 my $input_line_number =
4389 Perl::Tidy::Tokenizer::get_input_line_number();
4390 $fh_warnings->print("$input_line_number:\t@_");
4391 $self->write_logfile_entry("WARNING: @_");
4394 $fh_warnings->print(@_);
4395 $self->write_logfile_entry(@_);
4399 $self->{_warning_count} = $warning_count;
4401 if ( $warning_count == WARNING_LIMIT ) {
4402 $fh_warnings->print("No further warnings will be given\n");
4407 # programming bug codes:
4409 # 0 = maybe, not sure.
4411 sub report_possible_bug {
4413 my $saw_code_bug = $self->{_saw_code_bug};
4414 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4417 sub report_definite_bug {
4419 $self->{_saw_code_bug} = 1;
4422 sub ask_user_for_bug_report {
4425 my ( $infile_syntax_ok, $formatter ) = @_;
4426 my $saw_code_bug = $self->{_saw_code_bug};
4427 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4428 $self->warning(<<EOM);
4430 You may have encountered a code bug in perltidy. If you think so, and
4431 the problem is not listed in the BUGS file at
4432 http://perltidy.sourceforge.net, please report it so that it can be
4433 corrected. Include the smallest possible script which has the problem,
4434 along with the .LOG file. See the manual pages for contact information.
4439 elsif ( $saw_code_bug == 1 ) {
4440 if ( $self->{_saw_extrude} ) {
4441 $self->warning(<<EOM);
4443 You may have encountered a bug in perltidy. However, since you are using the
4444 -extrude option, the problem may be with perl or one of its modules, which have
4445 occasional problems with this type of file. If you believe that the
4446 problem is with perltidy, and the problem is not listed in the BUGS file at
4447 http://perltidy.sourceforge.net, please report it so that it can be corrected.
4448 Include the smallest possible script which has the problem, along with the .LOG
4449 file. See the manual pages for contact information.
4454 $self->warning(<<EOM);
4456 Oops, you seem to have encountered a bug in perltidy. Please check the
4457 BUGS file at http://perltidy.sourceforge.net. If the problem is not
4458 listed there, please report it so that it can be corrected. Include the
4459 smallest possible script which produces this message, along with the
4460 .LOG file if appropriate. See the manual pages for contact information.
4461 Your efforts are appreciated.
4464 my $added_semicolon_count = 0;
4466 $added_semicolon_count =
4467 $formatter->get_added_semicolon_count();
4469 if ( $added_semicolon_count > 0 ) {
4470 $self->warning(<<EOM);
4472 The log file shows that perltidy added $added_semicolon_count semicolons.
4473 Please rerun with -nasc to see if that is the cause of the syntax error. Even
4474 if that is the problem, please report it so that it can be fixed.
4484 # called after all formatting to summarize errors
4486 my ( $infile_syntax_ok, $formatter ) = @_;
4488 my $rOpts = $self->{_rOpts};
4489 my $warning_count = $self->{_warning_count};
4490 my $saw_code_bug = $self->{_saw_code_bug};
4493 ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4494 || $saw_code_bug == 1
4495 || $rOpts->{'logfile'};
4496 my $log_file = $self->{_log_file};
4497 if ($warning_count) {
4498 if ($save_logfile) {
4499 $self->block_log_output(); # avoid echoing this to the logfile
4501 "The logfile $log_file may contain useful information\n");
4502 $self->unblock_log_output();
4505 if ( $self->{_complaint_count} > 0 ) {
4507 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4511 if ( $self->{_saw_brace_error}
4512 && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
4514 $self->warning("To save a full .LOG file rerun with -g\n");
4517 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4519 if ($save_logfile) {
4520 my $log_file = $self->{_log_file};
4521 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4523 my $routput_array = $self->{_output_array};
4524 foreach ( @{$routput_array} ) { $fh->print($_) }
4525 if ( $log_file ne '-' && !ref $log_file ) {
4526 eval { $fh->close() };
4532 #####################################################################
4534 # The Perl::Tidy::DevNull class supplies a dummy print method
4536 #####################################################################
4538 package Perl::Tidy::DevNull;
4539 sub new { return bless {}, $_[0] }
4540 sub print { return }
4541 sub close { return }
4543 #####################################################################
4545 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4547 #####################################################################
4549 package Perl::Tidy::HtmlWriter;
4559 %short_to_long_names
4563 $missing_html_entities
4566 # replace unsafe characters with HTML entity representation if HTML::Entities
4568 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4572 my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4573 $html_src_extension )
4576 my $html_file_opened = 0;
4578 ( $html_fh, my $html_filename ) =
4579 Perl::Tidy::streamhandle( $html_file, 'w' );
4581 Perl::Tidy::Warn("can't open $html_file: $!\n");
4584 $html_file_opened = 1;
4586 if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4587 $input_file = "NONAME";
4590 # write the table of contents to a string
4592 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4595 my @pre_string_stack;
4596 if ( $rOpts->{'html-pre-only'} ) {
4598 # pre section goes directly to the output stream
4599 $html_pre_fh = $html_fh;
4600 $html_pre_fh->print( <<"PRE_END");
4606 # pre section go out to a temporary string
4608 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4609 push @pre_string_stack, \$pre_string;
4612 # pod text gets diverted if the 'pod2html' is used
4615 if ( $rOpts->{'pod2html'} ) {
4616 if ( $rOpts->{'html-pre-only'} ) {
4617 undef $rOpts->{'pod2html'};
4620 eval "use Pod::Html";
4623 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4624 undef $rOpts->{'pod2html'};
4627 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4634 if ( $rOpts->{'frames'} ) {
4635 unless ($extension) {
4637 "cannot use frames without a specified output extension; ignoring -frm\n";
4638 undef $rOpts->{'frames'};
4641 $toc_filename = $input_file . $html_toc_extension . $extension;
4642 $src_filename = $input_file . $html_src_extension . $extension;
4646 # ----------------------------------------------------------
4647 # Output is now directed as follows:
4648 # html_toc_fh <-- table of contents items
4649 # html_pre_fh <-- the <pre> section of formatted code, except:
4650 # html_pod_fh <-- pod goes here with the pod2html option
4651 # ----------------------------------------------------------
4653 my $title = $rOpts->{'title'};
4655 ( $title, my $path ) = fileparse($input_file);
4657 my $toc_item_count = 0;
4658 my $in_toc_package = "";
4661 _input_file => $input_file, # name of input file
4662 _title => $title, # title, unescaped
4663 _html_file => $html_file, # name of .html output file
4664 _toc_filename => $toc_filename, # for frames option
4665 _src_filename => $src_filename, # for frames option
4666 _html_file_opened => $html_file_opened, # a flag
4667 _html_fh => $html_fh, # the output stream
4668 _html_pre_fh => $html_pre_fh, # pre section goes here
4669 _rpre_string_stack => \@pre_string_stack, # stack of pre sections
4670 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html
4671 _rpod_string => \$pod_string, # string holding pod
4672 _pod_cut_count => 0, # how many =cut's?
4673 _html_toc_fh => $html_toc_fh, # fh for table of contents
4674 _rtoc_string => \$toc_string, # string holding toc
4675 _rtoc_item_count => \$toc_item_count, # how many toc items
4676 _rin_toc_package => \$in_toc_package, # package name
4677 _rtoc_name_count => {}, # hash to track unique names
4678 _rpackage_stack => [], # stack to check for package
4680 _rlast_level => \$last_level, # brace indentation level
4686 # Add an item to the html table of contents.
4687 # This is called even if no table of contents is written,
4688 # because we still want to put the anchors in the <pre> text.
4689 # We are given an anchor name and its type; types are:
4690 # 'package', 'sub', '__END__', '__DATA__', 'EOF'
4691 # There must be an 'EOF' call at the end to wrap things up.
4693 my ( $name, $type ) = @_;
4694 my $html_toc_fh = $self->{_html_toc_fh};
4695 my $html_pre_fh = $self->{_html_pre_fh};
4696 my $rtoc_name_count = $self->{_rtoc_name_count};
4697 my $rtoc_item_count = $self->{_rtoc_item_count};
4698 my $rlast_level = $self->{_rlast_level};
4699 my $rin_toc_package = $self->{_rin_toc_package};
4700 my $rpackage_stack = $self->{_rpackage_stack};
4702 # packages contain sublists of subs, so to avoid errors all package
4703 # items are written and finished with the following routines
4704 my $end_package_list = sub {
4705 if ($$rin_toc_package) {
4706 $html_toc_fh->print("</ul>\n</li>\n");
4707 $$rin_toc_package = "";
4711 my $start_package_list = sub {
4712 my ( $unique_name, $package ) = @_;
4713 if ($$rin_toc_package) { $end_package_list->() }
4714 $html_toc_fh->print(<<EOM);
4715 <li><a href=\"#$unique_name\">package $package</a>
4718 $$rin_toc_package = $package;
4721 # start the table of contents on the first item
4722 unless ($$rtoc_item_count) {
4724 # but just quit if we hit EOF without any other entries
4725 # in this case, there will be no toc
4726 return if ( $type eq 'EOF' );
4727 $html_toc_fh->print( <<"TOC_END");
4728 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4732 $$rtoc_item_count++;
4734 # make a unique anchor name for this location:
4735 # - packages get a 'package-' prefix
4736 # - subs use their names
4737 my $unique_name = $name;
4738 if ( $type eq 'package' ) { $unique_name = "package-$name" }
4740 # append '-1', '-2', etc if necessary to make unique; this will
4741 # be unique because subs and packages cannot have a '-'
4742 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4743 $unique_name .= "-$count";
4746 # - all names get terminal '-' if pod2html is used, to avoid
4747 # conflicts with anchor names created by pod2html
4748 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4750 # start/stop lists of subs
4751 if ( $type eq 'sub' ) {
4752 my $package = $rpackage_stack->[$$rlast_level];
4753 unless ($package) { $package = 'main' }
4755 # if we're already in a package/sub list, be sure its the right
4756 # package or else close it
4757 if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4758 $end_package_list->();
4761 # start a package/sub list if necessary
4762 unless ($$rin_toc_package) {
4763 $start_package_list->( $unique_name, $package );
4767 # now write an entry in the toc for this item
4768 if ( $type eq 'package' ) {
4769 $start_package_list->( $unique_name, $name );
4771 elsif ( $type eq 'sub' ) {
4772 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4775 $end_package_list->();
4776 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4779 # write the anchor in the <pre> section
4780 $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4782 # end the table of contents, if any, on the end of file
4783 if ( $type eq 'EOF' ) {
4784 $html_toc_fh->print( <<"TOC_END");
4786 <!-- END CODE INDEX -->
4793 # This is the official list of tokens which may be identified by the
4794 # user. Long names are used as getopt keys. Short names are
4795 # convenient short abbreviations for specifying input. Short names
4796 # somewhat resemble token type characters, but are often different
4797 # because they may only be alphanumeric, to allow command line
4798 # input. Also, note that because of case insensitivity of html,
4799 # this table must be in a single case only (I've chosen to use all
4801 # When adding NEW_TOKENS: update this hash table
4802 # short names => long names
4803 %short_to_long_names = (
4813 'pu' => 'punctuation',
4814 'i' => 'identifier',
4816 'h' => 'here-doc-target',
4817 'hh' => 'here-doc-text',
4819 'sc' => 'semicolon',
4820 'm' => 'subroutine',
4824 # Now we have to map actual token types into one of the above short
4825 # names; any token types not mapped will get 'punctuation'
4828 # The values of this hash table correspond to the keys of the
4829 # previous hash table.
4830 # The keys of this hash table are token types and can be seen
4831 # by running with --dump-token-types (-dtt).
4833 # When adding NEW_TOKENS: update this hash table
4834 # $type => $short_name
4835 %token_short_names = (
4860 # These token types will all be called identifiers for now
4861 # FIXME: could separate user defined modules as separate type
4862 my @identifier = qw" i t U C Y Z G :: CORE::";
4863 @token_short_names{@identifier} = ('i') x scalar(@identifier);
4865 # These token types will be called 'structure'
4866 my @structure = qw" { } ";
4867 @token_short_names{@structure} = ('s') x scalar(@structure);
4869 # OLD NOTES: save for reference
4870 # Any of these could be added later if it would be useful.
4871 # For now, they will by default become punctuation
4872 # my @list = qw" L R [ ] ";
4873 # @token_long_names{@list} = ('non-structure') x scalar(@list);
4876 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4878 # @token_long_names{@list} = ('math') x scalar(@list);
4880 # my @list = qw" & &= ~ ~= ^ ^= | |= ";
4881 # @token_long_names{@list} = ('bit') x scalar(@list);
4883 # my @list = qw" == != < > <= <=> ";
4884 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4886 # my @list = qw" && || ! &&= ||= //= ";
4887 # @token_long_names{@list} = ('logical') x scalar(@list);
4889 # my @list = qw" . .= =~ !~ x x= ";
4890 # @token_long_names{@list} = ('string-operators') x scalar(@list);
4893 # my @list = qw" .. -> <> ... \ ? ";
4894 # @token_long_names{@list} = ('misc-operators') x scalar(@list);
4898 sub make_getopt_long_names {
4900 my ($rgetopt_names) = @_;
4901 while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4902 push @$rgetopt_names, "html-color-$name=s";
4903 push @$rgetopt_names, "html-italic-$name!";
4904 push @$rgetopt_names, "html-bold-$name!";
4906 push @$rgetopt_names, "html-color-background=s";
4907 push @$rgetopt_names, "html-linked-style-sheet=s";
4908 push @$rgetopt_names, "nohtml-style-sheets";
4909 push @$rgetopt_names, "html-pre-only";
4910 push @$rgetopt_names, "html-line-numbers";
4911 push @$rgetopt_names, "html-entities!";
4912 push @$rgetopt_names, "stylesheet";
4913 push @$rgetopt_names, "html-table-of-contents!";
4914 push @$rgetopt_names, "pod2html!";
4915 push @$rgetopt_names, "frames!";
4916 push @$rgetopt_names, "html-toc-extension=s";
4917 push @$rgetopt_names, "html-src-extension=s";
4919 # Pod::Html parameters:
4920 push @$rgetopt_names, "backlink=s";
4921 push @$rgetopt_names, "cachedir=s";
4922 push @$rgetopt_names, "htmlroot=s";
4923 push @$rgetopt_names, "libpods=s";
4924 push @$rgetopt_names, "podpath=s";
4925 push @$rgetopt_names, "podroot=s";
4926 push @$rgetopt_names, "title=s";
4928 # Pod::Html parameters with leading 'pod' which will be removed
4929 # before the call to Pod::Html
4930 push @$rgetopt_names, "podquiet!";
4931 push @$rgetopt_names, "podverbose!";
4932 push @$rgetopt_names, "podrecurse!";
4933 push @$rgetopt_names, "podflush";
4934 push @$rgetopt_names, "podheader!";
4935 push @$rgetopt_names, "podindex!";
4938 sub make_abbreviated_names {
4940 # We're appending things like this to the expansion list:
4941 # 'hcc' => [qw(html-color-comment)],
4942 # 'hck' => [qw(html-color-keyword)],
4945 my ($rexpansion) = @_;
4947 # abbreviations for color/bold/italic properties
4948 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4949 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
4950 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
4951 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
4952 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4953 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4956 # abbreviations for all other html options
4957 ${$rexpansion}{"hcbg"} = ["html-color-background"];
4958 ${$rexpansion}{"pre"} = ["html-pre-only"];
4959 ${$rexpansion}{"toc"} = ["html-table-of-contents"];
4960 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
4961 ${$rexpansion}{"nnn"} = ["html-line-numbers"];
4962 ${$rexpansion}{"hent"} = ["html-entities"];
4963 ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4964 ${$rexpansion}{"css"} = ["html-linked-style-sheet"];
4965 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
4966 ${$rexpansion}{"ss"} = ["stylesheet"];
4967 ${$rexpansion}{"pod"} = ["pod2html"];
4968 ${$rexpansion}{"npod"} = ["nopod2html"];
4969 ${$rexpansion}{"frm"} = ["frames"];
4970 ${$rexpansion}{"nfrm"} = ["noframes"];
4971 ${$rexpansion}{"text"} = ["html-toc-extension"];
4972 ${$rexpansion}{"sext"} = ["html-src-extension"];
4977 # This will be called once after options have been parsed
4981 # X11 color names for default settings that seemed to look ok
4982 # (these color names are only used for programming clarity; the hex
4983 # numbers are actually written)
4984 use constant ForestGreen => "#228B22";
4985 use constant SaddleBrown => "#8B4513";
4986 use constant magenta4 => "#8B008B";
4987 use constant IndianRed3 => "#CD5555";
4988 use constant DeepSkyBlue4 => "#00688B";
4989 use constant MediumOrchid3 => "#B452CD";
4990 use constant black => "#000000";
4991 use constant white => "#FFFFFF";
4992 use constant red => "#FF0000";
4994 # set default color, bold, italic properties
4995 # anything not listed here will be given the default (punctuation) color --
4996 # these types currently not listed and get default: ws pu s sc cm co p
4997 # When adding NEW_TOKENS: add an entry here if you don't want defaults
4999 # set_default_properties( $short_name, default_color, bold?, italic? );
5000 set_default_properties( 'c', ForestGreen, 0, 0 );
5001 set_default_properties( 'pd', ForestGreen, 0, 1 );
5002 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown
5003 set_default_properties( 'q', IndianRed3, 0, 0 );
5004 set_default_properties( 'hh', IndianRed3, 0, 1 );
5005 set_default_properties( 'h', IndianRed3, 1, 0 );
5006 set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
5007 set_default_properties( 'w', black, 0, 0 );
5008 set_default_properties( 'n', MediumOrchid3, 0, 0 );
5009 set_default_properties( 'v', MediumOrchid3, 0, 0 );
5010 set_default_properties( 'j', IndianRed3, 1, 0 );
5011 set_default_properties( 'm', red, 1, 0 );
5013 set_default_color( 'html-color-background', white );
5014 set_default_color( 'html-color-punctuation', black );
5016 # setup property lookup tables for tokens based on their short names
5017 # every token type has a short name, and will use these tables
5018 # to do the html markup
5019 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
5020 $html_color{$short_name} = $rOpts->{"html-color-$long_name"};
5021 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
5022 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
5025 # write style sheet to STDOUT and die if requested
5026 if ( defined( $rOpts->{'stylesheet'} ) ) {
5027 write_style_sheet_file('-');
5031 # make sure user gives a file name after -css
5032 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
5033 $css_linkname = $rOpts->{'html-linked-style-sheet'};
5034 if ( $css_linkname =~ /^-/ ) {
5035 Perl::Tidy::Die "You must specify a valid filename after -css\n";
5039 # check for conflict
5040 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
5041 $rOpts->{'nohtml-style-sheets'} = 0;
5042 warning("You can't specify both -css and -nss; -nss ignored\n");
5045 # write a style sheet file if necessary
5046 if ($css_linkname) {
5048 # if the selected filename exists, don't write, because user may
5049 # have done some work by hand to create it; use backup name instead
5050 # Also, this will avoid a potential disaster in which the user
5051 # forgets to specify the style sheet, like this:
5052 # perltidy -html -css myfile1.pl myfile2.pl
5053 # This would cause myfile1.pl to parsed as the style sheet by GetOpts
5054 my $css_filename = $css_linkname;
5055 unless ( -e $css_filename ) {
5056 write_style_sheet_file($css_filename);
5059 $missing_html_entities = 1 unless $rOpts->{'html-entities'};
5062 sub write_style_sheet_file {
5064 my $css_filename = shift;
5066 unless ( $fh = IO::File->new("> $css_filename") ) {
5067 Perl::Tidy::Die "can't open $css_filename: $!\n";
5069 write_style_sheet_data($fh);
5070 eval { $fh->close };
5073 sub write_style_sheet_data {
5075 # write the style sheet data to an open file handle
5078 my $bg_color = $rOpts->{'html-color-background'};
5079 my $text_color = $rOpts->{'html-color-punctuation'};
5081 # pre-bgcolor is new, and may not be defined
5082 my $pre_bg_color = $rOpts->{'html-pre-color-background'};
5083 $pre_bg_color = $bg_color unless $pre_bg_color;
5085 $fh->print(<<"EOM");
5086 /* default style sheet generated by perltidy */
5087 body {background: $bg_color; color: $text_color}
5088 pre { color: $text_color;
5089 background: $pre_bg_color;
5090 font-family: courier;
5095 foreach my $short_name ( sort keys %short_to_long_names ) {
5096 my $long_name = $short_to_long_names{$short_name};
5098 my $abbrev = '.' . $short_name;
5099 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment
5100 my $color = $html_color{$short_name};
5101 if ( !defined($color) ) { $color = $text_color }
5102 $fh->print("$abbrev \{ color: $color;");
5104 if ( $html_bold{$short_name} ) {
5105 $fh->print(" font-weight:bold;");
5108 if ( $html_italic{$short_name} ) {
5109 $fh->print(" font-style:italic;");
5111 $fh->print("} /* $long_name */\n");
5115 sub set_default_color {
5117 # make sure that options hash $rOpts->{$key} contains a valid color
5118 my ( $key, $color ) = @_;
5119 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
5120 $rOpts->{$key} = check_RGB($color);
5125 # if color is a 6 digit hex RGB value, prepend a #, otherwise
5126 # assume that it is a valid ascii color name
5128 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
5132 sub set_default_properties {
5133 my ( $short_name, $color, $bold, $italic ) = @_;
5135 set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
5137 $key = "html-bold-$short_to_long_names{$short_name}";
5138 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
5139 $key = "html-italic-$short_to_long_names{$short_name}";
5140 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
5145 # Use Pod::Html to process the pod and make the page
5146 # then merge the perltidy code sections into it.
5147 # return 1 if success, 0 otherwise
5149 my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
5150 my $input_file = $self->{_input_file};
5151 my $title = $self->{_title};
5152 my $success_flag = 0;
5154 # don't try to use pod2html if no pod
5155 unless ($pod_string) {
5156 return $success_flag;
5159 # Pod::Html requires a real temporary filename
5160 # If we are making a frame, we have a name available
5161 # Otherwise, we have to fine one
5163 if ( $rOpts->{'frames'} ) {
5164 $tmpfile = $self->{_toc_filename};
5167 $tmpfile = Perl::Tidy::make_temporary_filename();
5169 my $fh_tmp = IO::File->new( $tmpfile, 'w' );
5172 "unable to open temporary file $tmpfile; cannot use pod2html\n";
5173 return $success_flag;
5176 #------------------------------------------------------------------
5177 # Warning: a temporary file is open; we have to clean up if
5178 # things go bad. From here on all returns should be by going to
5179 # RETURN so that the temporary file gets unlinked.
5180 #------------------------------------------------------------------
5182 # write the pod text to the temporary file
5183 $fh_tmp->print($pod_string);
5186 # Hand off the pod to pod2html.
5187 # Note that we can use the same temporary filename for input and output
5188 # because of the way pod2html works.
5192 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
5195 # Flags with string args:
5196 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
5197 # "podpath=s", "podroot=s"
5198 # Note: -css=s is handled by perltidy itself
5199 foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
5200 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
5203 # Toggle switches; these have extra leading 'pod'
5204 # "header!", "index!", "recurse!", "quiet!", "verbose!"
5205 foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
5206 my $kwd = $kw; # allows us to strip 'pod'
5207 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
5208 elsif ( defined( $rOpts->{$kw} ) ) {
5210 push @args, "--no$kwd";
5216 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
5218 # Must clean up if pod2html dies (it can);
5219 # Be careful not to overwrite callers __DIE__ routine
5220 local $SIG{__DIE__} = sub {
5221 unlink $tmpfile if -e $tmpfile;
5222 Perl::Tidy::Die $_[0];
5227 $fh_tmp = IO::File->new( $tmpfile, 'r' );
5230 # this error shouldn't happen ... we just used this filename
5232 "unable to open temporary file $tmpfile; cannot use pod2html\n";
5236 my $html_fh = $self->{_html_fh};
5242 # This routine will write the html selectively and store the toc
5243 my $html_print = sub {
5245 $html_fh->print($_) unless ($no_print);
5246 if ($in_toc) { push @toc, $_ }
5250 # loop over lines of html output from pod2html and merge in
5251 # the necessary perltidy html sections
5252 my ( $saw_body, $saw_index, $saw_body_end );
5253 while ( my $line = $fh_tmp->getline() ) {
5255 if ( $line =~ /^\s*<html>\s*$/i ) {
5256 my $date = localtime;
5257 $html_print->("<!-- Generated by perltidy on $date -->\n");
5258 $html_print->($line);
5261 # Copy the perltidy css, if any, after <body> tag
5262 elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
5264 $html_print->($css_string) if $css_string;
5265 $html_print->($line);
5267 # add a top anchor and heading
5268 $html_print->("<a name=\"-top-\"></a>\n");
5269 $title = escape_html($title);
5270 $html_print->("<h1>$title</h1>\n");
5273 # check for start of index, old pod2html
5274 # before Pod::Html VERSION 1.15_02 it is delimited by comments as:
5275 # <!-- INDEX BEGIN -->
5279 # <!-- INDEX END -->
5281 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
5284 # when frames are used, an extra table of contents in the
5285 # contents panel is confusing, so don't print it
5286 $no_print = $rOpts->{'frames'}
5287 || !$rOpts->{'html-table-of-contents'};
5288 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5289 $html_print->($line);
5292 # check for start of index, new pod2html
5293 # After Pod::Html VERSION 1.15_02 it is delimited as:
5297 elsif ( $line =~ /^\s*<ul\s+id="index">/i ) {
5301 # when frames are used, an extra table of contents in the
5302 # contents panel is confusing, so don't print it
5303 $no_print = $rOpts->{'frames'}
5304 || !$rOpts->{'html-table-of-contents'};
5305 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5306 $html_print->($line);
5309 # Check for end of index, old pod2html
5310 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
5312 $html_print->($line);
5314 # Copy the perltidy toc, if any, after the Pod::Html toc
5316 $html_print->("<hr />\n") if $rOpts->{'frames'};
5317 $html_print->("<h2>Code Index:</h2>\n");
5318 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5319 $html_print->(@toc);
5325 # must track <ul> depth level for new pod2html
5326 elsif ( $line =~ /\s*<ul>\s*$/i && $in_toc eq 'UL' ) {
5328 $html_print->($line);
5331 # Check for end of index, for new pod2html
5332 elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) {
5334 $html_print->($line);
5336 # Copy the perltidy toc, if any, after the Pod::Html toc
5337 if ( $ul_level <= 0 ) {
5340 $html_print->("<hr />\n") if $rOpts->{'frames'};
5341 $html_print->("<h2>Code Index:</h2>\n");
5342 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5343 $html_print->(@toc);
5351 # Copy one perltidy section after each marker
5352 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
5354 $html_print->($1) if $1;
5356 # Intermingle code and pod sections if we saw multiple =cut's.
5357 if ( $self->{_pod_cut_count} > 1 ) {
5358 my $rpre_string = shift(@$rpre_string_stack);
5359 if ($$rpre_string) {
5360 $html_print->('<pre>');
5361 $html_print->($$rpre_string);
5362 $html_print->('</pre>');
5366 # shouldn't happen: we stored a string before writing
5369 "Problem merging html stream with pod2html; order may be wrong\n";
5371 $html_print->($line);
5374 # If didn't see multiple =cut lines, we'll put the pod out first
5375 # and then the code, because it's less confusing.
5378 # since we are not intermixing code and pod, we don't need
5379 # or want any <hr> lines which separated pod and code
5380 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
5384 # Copy any remaining code section before the </body> tag
5385 elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
5387 if (@$rpre_string_stack) {
5388 unless ( $self->{_pod_cut_count} > 1 ) {
5389 $html_print->('<hr />');
5391 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
5392 $html_print->('<pre>');
5393 $html_print->($$rpre_string);
5394 $html_print->('</pre>');
5397 $html_print->($line);
5400 $html_print->($line);
5405 unless ($saw_body) {
5406 Perl::Tidy::Warn "Did not see <body> in pod2html output\n";
5409 unless ($saw_body_end) {
5410 Perl::Tidy::Warn "Did not see </body> in pod2html output\n";
5413 unless ($saw_index) {
5414 Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n";
5419 eval { $html_fh->close() };
5421 # note that we have to unlink tmpfile before making frames
5422 # because the tmpfile may be one of the names used for frames
5423 unlink $tmpfile if -e $tmpfile;
5424 if ( $success_flag && $rOpts->{'frames'} ) {
5425 $self->make_frame( \@toc );
5427 return $success_flag;
5432 # Make a frame with table of contents in the left panel
5433 # and the text in the right panel.
5435 # $html_filename contains the no-frames html output
5436 # $rtoc is a reference to an array with the table of contents
5439 my $input_file = $self->{_input_file};
5440 my $html_filename = $self->{_html_file};
5441 my $toc_filename = $self->{_toc_filename};
5442 my $src_filename = $self->{_src_filename};
5443 my $title = $self->{_title};
5444 $title = escape_html($title);
5446 # FUTURE input parameter:
5447 my $top_basename = "";
5449 # We need to produce 3 html files:
5450 # 1. - the table of contents
5451 # 2. - the contents (source code) itself
5452 # 3. - the frame which contains them
5454 # get basenames for relative links
5455 my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5456 my ( $src_basename, $src_path ) = fileparse($src_filename);
5458 # 1. Make the table of contents panel, with appropriate changes
5459 # to the anchor names
5460 my $src_frame_name = 'SRC';
5462 write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5465 # 2. The current .html filename is renamed to be the contents panel
5466 rename( $html_filename, $src_filename )
5467 or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n";
5469 # 3. Then use the original html filename for the frame
5471 $title, $html_filename, $top_basename,
5472 $toc_basename, $src_basename, $src_frame_name
5476 sub write_toc_html {
5478 # write a separate html table of contents file for frames
5479 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5480 my $fh = IO::File->new( $toc_filename, 'w' )
5481 or Perl::Tidy::Die "Cannot open $toc_filename:$!\n";
5485 <title>$title</title>
5488 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5492 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5493 $fh->print( join "", @$rtoc );
5502 sub write_frame_html {
5504 # write an html file to be the table of contents frame
5506 $title, $frame_filename, $top_basename,
5507 $toc_basename, $src_basename, $src_frame_name
5510 my $fh = IO::File->new( $frame_filename, 'w' )
5511 or Perl::Tidy::Die "Cannot open $toc_basename:$!\n";
5514 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5515 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5516 <?xml version="1.0" encoding="iso-8859-1" ?>
5517 <html xmlns="http://www.w3.org/1999/xhtml">
5519 <title>$title</title>
5523 # two left panels, one right, if master index file
5524 if ($top_basename) {
5526 <frameset cols="20%,80%">
5527 <frameset rows="30%,70%">
5528 <frame src = "$top_basename" />
5529 <frame src = "$toc_basename" />
5534 # one left panels, one right, if no master index file
5537 <frameset cols="20%,*">
5538 <frame src = "$toc_basename" />
5542 <frame src = "$src_basename" name = "$src_frame_name" />
5545 <p>If you see this message, you are using a non-frame-capable web client.</p>
5546 <p>This document contains:</p>
5548 <li><a href="$toc_basename">A table of contents</a></li>
5549 <li><a href="$src_basename">The source code</a></li>
5558 sub change_anchor_names {
5560 # add a filename and target to anchors
5561 # also return the first anchor
5562 my ( $rlines, $filename, $target ) = @_;
5564 foreach my $line (@$rlines) {
5566 # We're looking for lines like this:
5567 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5568 # ---- - -------- -----------------
5570 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5574 my $href = "$filename#$name";
5575 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5576 unless ($first_anchor) { $first_anchor = $href }
5579 return $first_anchor;
5582 sub close_html_file {
5584 return unless $self->{_html_file_opened};
5586 my $html_fh = $self->{_html_fh};
5587 my $rtoc_string = $self->{_rtoc_string};
5589 # There are 3 basic paths to html output...
5591 # ---------------------------------
5592 # Path 1: finish up if in -pre mode
5593 # ---------------------------------
5594 if ( $rOpts->{'html-pre-only'} ) {
5595 $html_fh->print( <<"PRE_END");
5598 eval { $html_fh->close() };
5603 $self->add_toc_item( 'EOF', 'EOF' );
5605 my $rpre_string_stack = $self->{_rpre_string_stack};
5607 # Patch to darken the <pre> background color in case of pod2html and
5608 # interleaved code/documentation. Otherwise, the distinction
5609 # between code and documentation is blurred.
5610 if ( $rOpts->{pod2html}
5611 && $self->{_pod_cut_count} >= 1
5612 && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5614 $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5617 # put the css or its link into a string, if used
5619 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5621 # use css linked to another file
5622 if ( $rOpts->{'html-linked-style-sheet'} ) {
5624 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5628 # use css embedded in this file
5629 elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5630 $fh_css->print( <<'ENDCSS');
5631 <style type="text/css">
5634 write_style_sheet_data($fh_css);
5635 $fh_css->print( <<"ENDCSS");
5641 # -----------------------------------------------------------
5642 # path 2: use pod2html if requested
5643 # If we fail for some reason, continue on to path 3
5644 # -----------------------------------------------------------
5645 if ( $rOpts->{'pod2html'} ) {
5646 my $rpod_string = $self->{_rpod_string};
5647 $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5648 $rpre_string_stack )
5652 # --------------------------------------------------
5653 # path 3: write code in html, with pod only in italics
5654 # --------------------------------------------------
5655 my $input_file = $self->{_input_file};
5656 my $title = escape_html($input_file);
5657 my $date = localtime;
5658 $html_fh->print( <<"HTML_START");
5659 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5660 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5661 <!-- Generated by perltidy on $date -->
5662 <html xmlns="http://www.w3.org/1999/xhtml">
5664 <title>$title</title>
5667 # output the css, if used
5669 $html_fh->print($css_string);
5670 $html_fh->print( <<"ENDCSS");
5677 $html_fh->print( <<"HTML_START");
5679 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5683 $html_fh->print("<a name=\"-top-\"></a>\n");
5684 $html_fh->print( <<"EOM");
5688 # copy the table of contents
5690 && !$rOpts->{'frames'}
5691 && $rOpts->{'html-table-of-contents'} )
5693 $html_fh->print($$rtoc_string);
5696 # copy the pre section(s)
5697 my $fname_comment = $input_file;
5698 $fname_comment =~ s/--+/-/g; # protect HTML comment tags
5699 $html_fh->print( <<"END_PRE");
5701 <!-- contents of filename: $fname_comment -->
5705 foreach my $rpre_string (@$rpre_string_stack) {
5706 $html_fh->print($$rpre_string);
5709 # and finish the html page
5710 $html_fh->print( <<"HTML_END");
5715 eval { $html_fh->close() }; # could be object without close method
5717 if ( $rOpts->{'frames'} ) {
5718 my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5719 $self->make_frame( \@toc );
5725 my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5726 my ( @colored_tokens, $j, $string, $type, $token, $level );
5727 my $rlast_level = $self->{_rlast_level};
5728 my $rpackage_stack = $self->{_rpackage_stack};
5730 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5731 $type = $$rtoken_type[$j];
5732 $token = $$rtokens[$j];
5733 $level = $$rlevels[$j];
5734 $level = 0 if ( $level < 0 );
5736 #-------------------------------------------------------
5737 # Update the package stack. The package stack is needed to keep
5738 # the toc correct because some packages may be declared within
5739 # blocks and go out of scope when we leave the block.
5740 #-------------------------------------------------------
5741 if ( $level > $$rlast_level ) {
5742 unless ( $rpackage_stack->[ $level - 1 ] ) {
5743 $rpackage_stack->[ $level - 1 ] = 'main';
5745 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5747 elsif ( $level < $$rlast_level ) {
5748 my $package = $rpackage_stack->[$level];
5749 unless ($package) { $package = 'main' }
5751 # if we change packages due to a nesting change, we
5752 # have to make an entry in the toc
5753 if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5754 $self->add_toc_item( $package, 'package' );
5757 $$rlast_level = $level;
5759 #-------------------------------------------------------
5760 # Intercept a sub name here; split it
5761 # into keyword 'sub' and sub name; and add an
5763 #-------------------------------------------------------
5764 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5765 $token = $self->markup_html_element( $1, 'k' );
5766 push @colored_tokens, $token;
5770 # but don't include sub declarations in the toc;
5771 # these wlll have leading token types 'i;'
5772 my $signature = join "", @$rtoken_type;
5773 unless ( $signature =~ /^i;/ ) {
5774 my $subname = $token;
5775 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5776 $self->add_toc_item( $subname, 'sub' );
5780 #-------------------------------------------------------
5781 # Intercept a package name here; split it
5782 # into keyword 'package' and name; add to the toc,
5783 # and update the package stack
5784 #-------------------------------------------------------
5785 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5786 $token = $self->markup_html_element( $1, 'k' );
5787 push @colored_tokens, $token;
5790 $self->add_toc_item( "$token", 'package' );
5791 $rpackage_stack->[$level] = $token;
5794 $token = $self->markup_html_element( $token, $type );
5795 push @colored_tokens, $token;
5797 return ( \@colored_tokens );
5800 sub markup_html_element {
5802 my ( $token, $type ) = @_;
5804 return $token if ( $type eq 'b' ); # skip a blank token
5805 return $token if ( $token =~ /^\s*$/ ); # skip a blank line
5806 $token = escape_html($token);
5808 # get the short abbreviation for this token type
5809 my $short_name = $token_short_names{$type};
5810 if ( !defined($short_name) ) {
5811 $short_name = "pu"; # punctuation is default
5814 # handle style sheets..
5815 if ( !$rOpts->{'nohtml-style-sheets'} ) {
5816 if ( $short_name ne 'pu' ) {
5817 $token = qq(<span class="$short_name">) . $token . "</span>";
5821 # handle no style sheets..
5823 my $color = $html_color{$short_name};
5825 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5826 $token = qq(<font color="$color">) . $token . "</font>";
5828 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5829 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
5837 if ($missing_html_entities) {
5838 $token =~ s/\&/&/g;
5839 $token =~ s/\</</g;
5840 $token =~ s/\>/>/g;
5841 $token =~ s/\"/"/g;
5844 HTML::Entities::encode_entities($token);
5849 sub finish_formatting {
5851 # called after last line
5853 $self->close_html_file();
5860 return unless $self->{_html_file_opened};
5861 my $html_pre_fh = $self->{_html_pre_fh};
5862 my ($line_of_tokens) = @_;
5863 my $line_type = $line_of_tokens->{_line_type};
5864 my $input_line = $line_of_tokens->{_line_text};
5865 my $line_number = $line_of_tokens->{_line_number};
5868 # markup line of code..
5870 if ( $line_type eq 'CODE' ) {
5871 my $rtoken_type = $line_of_tokens->{_rtoken_type};
5872 my $rtokens = $line_of_tokens->{_rtokens};
5873 my $rlevels = $line_of_tokens->{_rlevels};
5875 if ( $input_line =~ /(^\s*)/ ) {
5881 my ($rcolored_tokens) =
5882 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5883 $html_line .= join '', @$rcolored_tokens;
5886 # markup line of non-code..
5889 if ( $line_type eq 'HERE' ) { $line_character = 'H' }
5890 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
5891 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
5892 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5893 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
5894 elsif ( $line_type eq 'END_START' ) {
5895 $line_character = 'k';
5896 $self->add_toc_item( '__END__', '__END__' );
5898 elsif ( $line_type eq 'DATA_START' ) {
5899 $line_character = 'k';
5900 $self->add_toc_item( '__DATA__', '__DATA__' );
5902 elsif ( $line_type =~ /^POD/ ) {
5903 $line_character = 'P';
5904 if ( $rOpts->{'pod2html'} ) {
5905 my $html_pod_fh = $self->{_html_pod_fh};
5906 if ( $line_type eq 'POD_START' ) {
5908 my $rpre_string_stack = $self->{_rpre_string_stack};
5909 my $rpre_string = $rpre_string_stack->[-1];
5911 # if we have written any non-blank lines to the
5912 # current pre section, start writing to a new output
5914 if ( $$rpre_string =~ /\S/ ) {
5917 Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5918 $self->{_html_pre_fh} = $html_pre_fh;
5919 push @$rpre_string_stack, \$pre_string;
5921 # leave a marker in the pod stream so we know
5922 # where to put the pre section we just
5924 my $for_html = '=for html'; # don't confuse pod utils
5925 $html_pod_fh->print(<<EOM);
5928 <!-- pERLTIDY sECTION -->
5933 # otherwise, just clear the current string and start
5937 $html_pod_fh->print("\n");
5940 $html_pod_fh->print( $input_line . "\n" );
5941 if ( $line_type eq 'POD_END' ) {
5942 $self->{_pod_cut_count}++;
5943 $html_pod_fh->print("\n");
5948 else { $line_character = 'Q' }
5949 $html_line = $self->markup_html_element( $input_line, $line_character );
5952 # add the line number if requested
5953 if ( $rOpts->{'html-line-numbers'} ) {
5955 ( $line_number < 10 ) ? " "
5956 : ( $line_number < 100 ) ? " "
5957 : ( $line_number < 1000 ) ? " "
5959 $html_line = $extra_space . $line_number . " " . $html_line;
5963 $html_pre_fh->print("$html_line\n");
5966 #####################################################################
5968 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5969 # line breaks to the token stream
5971 # WARNING: This is not a real class for speed reasons. Only one
5972 # Formatter may be used.
5974 #####################################################################
5976 package Perl::Tidy::Formatter;
5980 # Caution: these debug flags produce a lot of output
5981 # They should all be 0 except when debugging small scripts
5982 use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
5983 use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
5984 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
5985 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
5986 use constant FORMATTER_DEBUG_FLAG_CI => 0;
5987 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
5988 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
5989 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
5990 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5991 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
5992 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
5993 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
5994 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
5995 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
5997 my $debug_warning = sub {
5998 print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
6001 FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
6002 FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
6003 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
6004 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
6005 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
6006 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
6007 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
6008 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
6009 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
6010 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
6011 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
6012 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
6013 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
6014 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
6021 $max_gnu_stack_index
6022 $gnu_position_predictor
6023 $line_start_index_to_go
6024 $last_indentation_written
6025 $last_unadjusted_indentation
6027 $last_output_short_opening_token
6029 $saw_VERSION_in_this_file
6034 $gnu_sequence_number
6035 $last_output_indentation
6041 @type_sequence_to_go
6042 @container_environment_to_go
6043 @bond_strength_to_go
6044 @forced_breakpoint_to_go
6045 @token_lengths_to_go
6046 @summed_lengths_to_go
6048 @leading_spaces_to_go
6049 @reduced_spaces_to_go
6050 @matching_token_to_go
6052 @nesting_blocks_to_go
6054 @nesting_depth_to_go
6056 @old_breakpoint_to_go
6062 %saved_opening_indentation
6065 $comma_count_in_batch
6066 $old_line_count_in_batch
6067 $last_nonblank_index_to_go
6068 $last_nonblank_type_to_go
6069 $last_nonblank_token_to_go
6070 $last_last_nonblank_index_to_go
6071 $last_last_nonblank_type_to_go
6072 $last_last_nonblank_token_to_go
6073 @nonblank_lines_at_depth
6076 @whitespace_level_stack
6077 $whitespace_last_level
6079 $in_format_skipping_section
6080 $format_skipping_pattern_begin
6081 $format_skipping_pattern_end
6083 $forced_breakpoint_count
6084 $forced_breakpoint_undo_count
6085 @forced_breakpoint_undo_stack
6086 %postponed_breakpoint
6090 $first_embedded_tab_at
6091 $last_embedded_tab_at
6092 $deleted_semicolon_count
6093 $first_deleted_semicolon_at
6094 $last_deleted_semicolon_at
6095 $added_semicolon_count
6096 $first_added_semicolon_at
6097 $last_added_semicolon_at
6098 $first_tabbing_disagreement
6099 $last_tabbing_disagreement
6100 $in_tabbing_disagreement
6101 $tabbing_disagreement_count
6105 $last_line_leading_type
6106 $last_line_leading_level
6107 $last_last_line_leading_level
6110 %block_opening_line_number
6111 $csc_new_statement_ok
6114 $accumulating_text_for_block
6116 $rleading_block_if_elsif_text
6117 $leading_block_text_level
6118 $leading_block_text_length_exceeded
6119 $leading_block_text_line_length
6120 $leading_block_text_line_number
6121 $closing_side_comment_prefix_pattern
6122 $closing_side_comment_list_pattern
6124 $last_nonblank_token
6126 $last_last_nonblank_token
6127 $last_last_nonblank_type
6128 $last_nonblank_block_type
6131 %is_if_brace_follower
6132 %space_after_keyword
6135 %is_last_next_redo_return
6136 %is_other_brace_follower
6137 %is_else_brace_follower
6138 %is_anon_sub_brace_follower
6139 %is_anon_sub_1_brace_follower
6141 %is_sort_map_grep_eval
6142 %is_sort_map_grep_eval_do
6143 %is_block_without_semicolon
6148 %is_if_unless_and_or_last_next_redo_return
6154 $is_static_block_comment
6155 $index_start_one_line_block
6156 $semicolons_before_block_self_destruct
6157 $index_max_forced_break
6160 $vertical_aligner_object
6165 $last_line_had_side_comment
6168 $static_block_comment_pattern
6169 $static_side_comment_pattern
6170 %opening_vertical_tightness
6171 %closing_vertical_tightness
6172 %closing_token_indentation
6173 $some_closing_token_indentation
6175 %opening_token_right
6176 %stack_opening_token
6177 %stack_closing_token
6179 $block_brace_vertical_tightness_pattern
6182 $rOpts_add_whitespace
6183 $rOpts_block_brace_tightness
6184 $rOpts_block_brace_vertical_tightness
6185 $rOpts_brace_left_and_indent
6186 $rOpts_comma_arrow_breakpoints
6187 $rOpts_break_at_old_keyword_breakpoints
6188 $rOpts_break_at_old_comma_breakpoints
6189 $rOpts_break_at_old_logical_breakpoints
6190 $rOpts_break_at_old_ternary_breakpoints
6191 $rOpts_break_at_old_attribute_breakpoints
6192 $rOpts_closing_side_comment_else_flag
6193 $rOpts_closing_side_comment_maximum_text
6194 $rOpts_continuation_indentation
6196 $rOpts_delete_old_whitespace
6197 $rOpts_fuzzy_line_length
6198 $rOpts_indent_columns
6199 $rOpts_line_up_parentheses
6200 $rOpts_maximum_fields_per_table
6201 $rOpts_maximum_line_length
6202 $rOpts_variable_maximum_line_length
6203 $rOpts_short_concatenation_item_length
6204 $rOpts_keep_old_blank_lines
6205 $rOpts_ignore_old_breakpoints
6206 $rOpts_format_skipping
6207 $rOpts_space_function_paren
6208 $rOpts_space_keyword_paren
6209 $rOpts_keep_interior_semicolons
6210 $rOpts_ignore_side_comment_lengths
6211 $rOpts_stack_closing_block_brace
6212 $rOpts_whitespace_cycle
6213 $rOpts_tight_secret_operators
6217 %is_keyword_returning_list
6221 %right_bond_strength
6238 # default list of block types for which -bli would apply
6239 $bli_list_string = 'if else elsif unless while for foreach do : sub';
6242 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
6243 <= >= == =~ !~ != ++ -- /= x=
6245 @is_digraph{@_} = (1) x scalar(@_);
6247 @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
6248 @is_trigraph{@_} = (1) x scalar(@_);
6251 = **= += *= &= <<= &&=
6252 -= /= |= >>= ||= //=
6256 @is_assignment{@_} = (1) x scalar(@_);
6266 @is_keyword_returning_list{@_} = (1) x scalar(@_);
6268 @_ = qw(is if unless and or err last next redo return);
6269 @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
6271 @_ = qw(last next redo return);
6272 @is_last_next_redo_return{@_} = (1) x scalar(@_);
6274 @_ = qw(sort map grep);
6275 @is_sort_map_grep{@_} = (1) x scalar(@_);
6277 @_ = qw(sort map grep eval);
6278 @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
6280 @_ = qw(sort map grep eval do);
6281 @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
6284 @is_if_unless{@_} = (1) x scalar(@_);
6286 @_ = qw(and or err);
6287 @is_and_or{@_} = (1) x scalar(@_);
6289 # Identify certain operators which often occur in chains.
6290 # Note: the minus (-) causes a side effect of padding of the first line in
6291 # something like this (by sub set_logical_padding):
6292 # Checkbutton => 'Transmission checked',
6293 # -variable => \$TRANS
6294 # This usually improves appearance so it seems ok.
6295 @_ = qw(&& || and or : ? . + - * /);
6296 @is_chain_operator{@_} = (1) x scalar(@_);
6298 # We can remove semicolons after blocks preceded by these keywords
6300 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6301 unless while until for foreach given when default);
6302 @is_block_without_semicolon{@_} = (1) x scalar(@_);
6304 # 'L' is token for opening { at hash key
6306 @is_opening_type{@_} = (1) x scalar(@_);
6308 # 'R' is token for closing } at hash key
6310 @is_closing_type{@_} = (1) x scalar(@_);
6313 @is_opening_token{@_} = (1) x scalar(@_);
6316 @is_closing_token{@_} = (1) x scalar(@_);
6320 use constant WS_YES => 1;
6321 use constant WS_OPTIONAL => 0;
6322 use constant WS_NO => -1;
6324 # Token bond strengths.
6325 use constant NO_BREAK => 10000;
6326 use constant VERY_STRONG => 100;
6327 use constant STRONG => 2.1;
6328 use constant NOMINAL => 1.1;
6329 use constant WEAK => 0.8;
6330 use constant VERY_WEAK => 0.55;
6332 # values for testing indexes in output array
6333 use constant UNDEFINED_INDEX => -1;
6335 # Maximum number of little messages; probably need not be changed.
6336 use constant MAX_NAG_MESSAGES => 6;
6338 # increment between sequence numbers for each type
6339 # For example, ?: pairs might have numbers 7,11,15,...
6340 use constant TYPE_SEQUENCE_INCREMENT => 4;
6344 # methods to count instances
6346 sub get_count { $_count; }
6347 sub _increment_count { ++$_count }
6348 sub _decrement_count { --$_count }
6353 # trim leading and trailing whitespace from a string
6362 $max = ( $max < $_ ) ? $_ : $max;
6370 $min = ( $min > $_ ) ? $_ : $min;
6377 # given a string containing words separated by whitespace,
6378 # return the list of words
6383 return split( /\s+/, $str );
6386 # interface to Perl::Tidy::Logger routines
6388 if ($logger_object) {
6389 $logger_object->warning(@_);
6394 if ($logger_object) {
6395 $logger_object->complain(@_);
6399 sub write_logfile_entry {
6400 if ($logger_object) {
6401 $logger_object->write_logfile_entry(@_);
6406 if ($logger_object) {
6407 $logger_object->black_box(@_);
6411 sub report_definite_bug {
6412 if ($logger_object) {
6413 $logger_object->report_definite_bug();
6417 sub get_saw_brace_error {
6418 if ($logger_object) {
6419 $logger_object->get_saw_brace_error();
6423 sub we_are_at_the_last_line {
6424 if ($logger_object) {
6425 $logger_object->we_are_at_the_last_line();
6429 # interface to Perl::Tidy::Diagnostics routine
6430 sub write_diagnostics {
6432 if ($diagnostics_object) {
6433 $diagnostics_object->write_diagnostics(@_);
6437 sub get_added_semicolon_count {
6439 return $added_semicolon_count;
6443 $_[0]->_decrement_count();
6450 # we are given an object with a write_line() method to take lines
6452 sink_object => undef,
6453 diagnostics_object => undef,
6454 logger_object => undef,
6456 my %args = ( %defaults, @_ );
6458 $logger_object = $args{logger_object};
6459 $diagnostics_object = $args{diagnostics_object};
6461 # we create another object with a get_line() and peek_ahead() method
6462 my $sink_object = $args{sink_object};
6463 $file_writer_object =
6464 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6466 # initialize the leading whitespace stack to negative levels
6467 # so that we can never run off the end of the stack
6468 $gnu_position_predictor = 0; # where the current token is predicted to be
6469 $max_gnu_stack_index = 0;
6470 $max_gnu_item_index = -1;
6471 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6472 @gnu_item_list = ();
6473 $last_output_indentation = 0;
6474 $last_indentation_written = 0;
6475 $last_unadjusted_indentation = 0;
6476 $last_leading_token = "";
6477 $last_output_short_opening_token = 0;
6479 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6480 $saw_END_or_DATA_ = 0;
6482 @block_type_to_go = ();
6483 @type_sequence_to_go = ();
6484 @container_environment_to_go = ();
6485 @bond_strength_to_go = ();
6486 @forced_breakpoint_to_go = ();
6487 @summed_lengths_to_go = (); # line length to start of ith token
6488 @token_lengths_to_go = ();
6490 @matching_token_to_go = ();
6491 @mate_index_to_go = ();
6492 @nesting_blocks_to_go = ();
6493 @ci_levels_to_go = ();
6494 @nesting_depth_to_go = (0);
6495 @nobreak_to_go = ();
6496 @old_breakpoint_to_go = ();
6499 @leading_spaces_to_go = ();
6500 @reduced_spaces_to_go = ();
6504 @whitespace_level_stack = ();
6505 $whitespace_last_level = -1;
6508 @has_broken_sublist = ();
6509 @want_comma_break = ();
6512 $first_tabbing_disagreement = 0;
6513 $last_tabbing_disagreement = 0;
6514 $tabbing_disagreement_count = 0;
6515 $in_tabbing_disagreement = 0;
6516 $input_line_tabbing = undef;
6518 $last_line_type = "";
6519 $last_last_line_leading_level = 0;
6520 $last_line_leading_level = 0;
6521 $last_line_leading_type = '#';
6523 $last_nonblank_token = ';';
6524 $last_nonblank_type = ';';
6525 $last_last_nonblank_token = ';';
6526 $last_last_nonblank_type = ';';
6527 $last_nonblank_block_type = "";
6528 $last_output_level = 0;
6529 $looking_for_else = 0;
6530 $embedded_tab_count = 0;
6531 $first_embedded_tab_at = 0;
6532 $last_embedded_tab_at = 0;
6533 $deleted_semicolon_count = 0;
6534 $first_deleted_semicolon_at = 0;
6535 $last_deleted_semicolon_at = 0;
6536 $added_semicolon_count = 0;
6537 $first_added_semicolon_at = 0;
6538 $last_added_semicolon_at = 0;
6539 $last_line_had_side_comment = 0;
6540 $is_static_block_comment = 0;
6541 %postponed_breakpoint = ();
6543 # variables for adding side comments
6544 %block_leading_text = ();
6545 %block_opening_line_number = ();
6546 $csc_new_statement_ok = 1;
6547 %csc_block_label = ();
6549 %saved_opening_indentation = ();
6550 $in_format_skipping_section = 0;
6552 reset_block_text_accumulator();
6554 prepare_for_new_input_lines();
6556 $vertical_aligner_object =
6557 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6558 $logger_object, $diagnostics_object );
6560 if ( $rOpts->{'entab-leading-whitespace'} ) {
6561 write_logfile_entry(
6562 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6565 elsif ( $rOpts->{'tabs'} ) {
6566 write_logfile_entry("Indentation will be with a tab character\n");
6569 write_logfile_entry(
6570 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6573 # This was the start of a formatter referent, but object-oriented
6574 # coding has turned out to be too slow here.
6575 $formatter_self = {};
6577 bless $formatter_self, $class;
6579 # Safety check..this is not a class yet
6580 if ( _increment_count() > 1 ) {
6582 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
6584 return $formatter_self;
6587 sub prepare_for_new_input_lines {
6589 $gnu_sequence_number++; # increment output batch counter
6590 %last_gnu_equals = ();
6591 %gnu_comma_count = ();
6592 %gnu_arrow_count = ();
6593 $line_start_index_to_go = 0;
6594 $max_gnu_item_index = UNDEFINED_INDEX;
6595 $index_max_forced_break = UNDEFINED_INDEX;
6596 $max_index_to_go = UNDEFINED_INDEX;
6597 $last_nonblank_index_to_go = UNDEFINED_INDEX;
6598 $last_nonblank_type_to_go = '';
6599 $last_nonblank_token_to_go = '';
6600 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6601 $last_last_nonblank_type_to_go = '';
6602 $last_last_nonblank_token_to_go = '';
6603 $forced_breakpoint_count = 0;
6604 $forced_breakpoint_undo_count = 0;
6605 $rbrace_follower = undef;
6606 $summed_lengths_to_go[0] = 0;
6607 $old_line_count_in_batch = 1;
6608 $comma_count_in_batch = 0;
6609 $starting_in_quote = 0;
6611 destroy_one_line_block();
6617 my ($line_of_tokens) = @_;
6619 my $line_type = $line_of_tokens->{_line_type};
6620 my $input_line = $line_of_tokens->{_line_text};
6622 if ( $rOpts->{notidy} ) {
6623 write_unindented_line($input_line);
6624 $last_line_type = $line_type;
6628 # _line_type codes are:
6629 # SYSTEM - system-specific code before hash-bang line
6630 # CODE - line of perl code (including comments)
6631 # POD_START - line starting pod, such as '=head'
6632 # POD - pod documentation text
6633 # POD_END - last line of pod section, '=cut'
6634 # HERE - text of here-document
6635 # HERE_END - last line of here-doc (target word)
6636 # FORMAT - format section
6637 # FORMAT_END - last line of format section, '.'
6638 # DATA_START - __DATA__ line
6639 # DATA - unidentified text following __DATA__
6640 # END_START - __END__ line
6641 # END - unidentified text following __END__
6642 # ERROR - we are in big trouble, probably not a perl script
6644 # put a blank line after an =cut which comes before __END__ and __DATA__
6645 # (required by podchecker)
6646 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6647 $file_writer_object->reset_consecutive_blank_lines();
6648 if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6651 # handle line of code..
6652 if ( $line_type eq 'CODE' ) {
6654 # let logger see all non-blank lines of code
6655 if ( $input_line !~ /^\s*$/ ) {
6656 my $output_line_number =
6657 $vertical_aligner_object->get_output_line_number();
6658 black_box( $line_of_tokens, $output_line_number );
6660 print_line_of_tokens($line_of_tokens);
6663 # handle line of non-code..
6669 if ( $line_type =~ /^POD/ ) {
6671 # Pod docs should have a preceding blank line. But stay
6672 # out of __END__ and __DATA__ sections, because
6673 # the user may be using this section for any purpose whatsoever
6674 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6675 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
6676 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
6678 && $line_type eq 'POD_START'
6679 && !$saw_END_or_DATA_ )
6685 # leave the blank counters in a predictable state
6686 # after __END__ or __DATA__
6687 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6688 $file_writer_object->reset_consecutive_blank_lines();
6689 $saw_END_or_DATA_ = 1;
6692 # write unindented non-code line
6693 if ( !$skip_line ) {
6694 if ($tee_line) { $file_writer_object->tee_on() }
6695 write_unindented_line($input_line);
6696 if ($tee_line) { $file_writer_object->tee_off() }
6699 $last_line_type = $line_type;
6702 sub create_one_line_block {
6703 $index_start_one_line_block = $_[0];
6704 $semicolons_before_block_self_destruct = $_[1];
6707 sub destroy_one_line_block {
6708 $index_start_one_line_block = UNDEFINED_INDEX;
6709 $semicolons_before_block_self_destruct = 0;
6712 sub leading_spaces_to_go {
6714 # return the number of indentation spaces for a token in the output stream;
6715 # these were previously stored by 'set_leading_whitespace'.
6718 if ( $ii < 0 ) { $ii = 0 }
6719 return get_SPACES( $leading_spaces_to_go[$ii] );
6725 # return the number of leading spaces associated with an indentation
6726 # variable $indentation is either a constant number of spaces or an object
6727 # with a get_SPACES method.
6728 my $indentation = shift;
6729 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6732 sub get_RECOVERABLE_SPACES {
6734 # return the number of spaces (+ means shift right, - means shift left)
6735 # that we would like to shift a group of lines with the same indentation
6736 # to get them to line up with their opening parens
6737 my $indentation = shift;
6738 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6741 sub get_AVAILABLE_SPACES_to_go {
6743 my $item = $leading_spaces_to_go[ $_[0] ];
6745 # return the number of available leading spaces associated with an
6746 # indentation variable. $indentation is either a constant number of
6747 # spaces or an object with a get_AVAILABLE_SPACES method.
6748 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6751 sub new_lp_indentation_item {
6753 # this is an interface to the IndentationItem class
6754 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6756 # A negative level implies not to store the item in the item_list
6758 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6760 my $item = Perl::Tidy::IndentationItem->new(
6762 $ci_level, $available_spaces,
6763 $index, $gnu_sequence_number,
6764 $align_paren, $max_gnu_stack_index,
6765 $line_start_index_to_go,
6768 if ( $level >= 0 ) {
6769 $gnu_item_list[$max_gnu_item_index] = $item;
6775 sub set_leading_whitespace {
6777 # This routine defines leading whitespace
6778 # given: the level and continuation_level of a token,
6779 # define: space count of leading string which would apply if it
6780 # were the first token of a new line.
6782 my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
6784 # Adjust levels if necessary to recycle whitespace:
6785 # given $level_abs, the absolute level
6786 # define $level, a possibly reduced level for whitespace
6787 my $level = $level_abs;
6788 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
6789 if ( $level_abs < $whitespace_last_level ) {
6790 pop(@whitespace_level_stack);
6792 if ( !@whitespace_level_stack ) {
6793 push @whitespace_level_stack, $level_abs;
6795 elsif ( $level_abs > $whitespace_last_level ) {
6796 $level = $whitespace_level_stack[-1] +
6797 ( $level_abs - $whitespace_last_level );
6800 # 1 Try to break at a block brace
6802 $level > $rOpts_whitespace_cycle
6803 && $last_nonblank_type eq '{'
6804 && $last_nonblank_token eq '{'
6807 # 2 Then either a brace or bracket
6808 || ( $level > $rOpts_whitespace_cycle + 1
6809 && $last_nonblank_token =~ /^[\{\[]$/ )
6811 # 3 Then a paren too
6812 || $level > $rOpts_whitespace_cycle + 2
6817 push @whitespace_level_stack, $level;
6819 $level = $whitespace_level_stack[-1];
6821 $whitespace_last_level = $level_abs;
6823 # modify for -bli, which adds one continuation indentation for
6825 if ( $rOpts_brace_left_and_indent
6826 && $max_index_to_go == 0
6827 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6832 # patch to avoid trouble when input file has negative indentation.
6833 # other logic should catch this error.
6834 if ( $level < 0 ) { $level = 0 }
6836 #-------------------------------------------
6837 # handle the standard indentation scheme
6838 #-------------------------------------------
6839 unless ($rOpts_line_up_parentheses) {
6841 $ci_level * $rOpts_continuation_indentation +
6842 $level * $rOpts_indent_columns;
6844 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6846 if ($in_continued_quote) {
6850 $leading_spaces_to_go[$max_index_to_go] = $space_count;
6851 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6855 #-------------------------------------------------------------
6856 # handle case of -lp indentation..
6857 #-------------------------------------------------------------
6859 # The continued_quote flag means that this is the first token of a
6860 # line, and it is the continuation of some kind of multi-line quote
6861 # or pattern. It requires special treatment because it must have no
6862 # added leading whitespace. So we create a special indentation item
6863 # which is not in the stack.
6864 if ($in_continued_quote) {
6865 my $space_count = 0;
6866 my $available_space = 0;
6867 $level = -1; # flag to prevent storing in item_list
6868 $leading_spaces_to_go[$max_index_to_go] =
6869 $reduced_spaces_to_go[$max_index_to_go] =
6870 new_lp_indentation_item( $space_count, $level, $ci_level,
6871 $available_space, 0 );
6875 # get the top state from the stack
6876 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6877 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6878 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6880 my $type = $types_to_go[$max_index_to_go];
6881 my $token = $tokens_to_go[$max_index_to_go];
6882 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6884 if ( $type eq '{' || $type eq '(' ) {
6886 $gnu_comma_count{ $total_depth + 1 } = 0;
6887 $gnu_arrow_count{ $total_depth + 1 } = 0;
6889 # If we come to an opening token after an '=' token of some type,
6890 # see if it would be helpful to 'break' after the '=' to save space
6891 my $last_equals = $last_gnu_equals{$total_depth};
6892 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6894 # find the position if we break at the '='
6895 my $i_test = $last_equals;
6896 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6899 ##my $too_close = ($i_test==$max_index_to_go-1);
6901 my $test_position = total_line_length( $i_test, $max_index_to_go );
6902 my $mll = maximum_line_length($i_test);
6906 # the equals is not just before an open paren (testing)
6909 # if we are beyond the midpoint
6910 $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
6912 # or we are beyond the 1/4 point and there was an old
6913 # break at the equals
6915 $gnu_position_predictor >
6916 $mll - $rOpts_maximum_line_length * 3 / 4
6918 $old_breakpoint_to_go[$last_equals]
6919 || ( $last_equals > 0
6920 && $old_breakpoint_to_go[ $last_equals - 1 ] )
6921 || ( $last_equals > 1
6922 && $types_to_go[ $last_equals - 1 ] eq 'b'
6923 && $old_breakpoint_to_go[ $last_equals - 2 ] )
6929 # then make the switch -- note that we do not set a real
6930 # breakpoint here because we may not really need one; sub
6931 # scan_list will do that if necessary
6932 $line_start_index_to_go = $i_test + 1;
6933 $gnu_position_predictor = $test_position;
6939 maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
6941 # Check for decreasing depth ..
6942 # Note that one token may have both decreasing and then increasing
6943 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
6944 # in this example we would first go back to (1,0) then up to (2,0)
6946 if ( $level < $current_level || $ci_level < $current_ci_level ) {
6948 # loop to find the first entry at or completely below this level
6949 my ( $lev, $ci_lev );
6951 if ($max_gnu_stack_index) {
6953 # save index of token which closes this level
6954 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6956 # Undo any extra indentation if we saw no commas
6957 my $available_spaces =
6958 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6960 my $comma_count = 0;
6961 my $arrow_count = 0;
6962 if ( $type eq '}' || $type eq ')' ) {
6963 $comma_count = $gnu_comma_count{$total_depth};
6964 $arrow_count = $gnu_arrow_count{$total_depth};
6965 $comma_count = 0 unless $comma_count;
6966 $arrow_count = 0 unless $arrow_count;
6968 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6969 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6971 if ( $available_spaces > 0 ) {
6973 if ( $comma_count <= 0 || $arrow_count > 0 ) {
6975 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6977 $gnu_stack[$max_gnu_stack_index]
6978 ->get_SEQUENCE_NUMBER();
6980 # Be sure this item was created in this batch. This
6981 # should be true because we delete any available
6982 # space from open items at the end of each batch.
6983 if ( $gnu_sequence_number != $seqno
6984 || $i > $max_gnu_item_index )
6987 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6989 report_definite_bug();
6993 if ( $arrow_count == 0 ) {
6995 ->permanently_decrease_AVAILABLE_SPACES(
7000 ->tentatively_decrease_AVAILABLE_SPACES(
7007 $j <= $max_gnu_item_index ;
7012 ->decrease_SPACES($available_spaces);
7019 --$max_gnu_stack_index;
7020 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
7021 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
7023 # stop when we reach a level at or below the current level
7024 if ( $lev <= $level && $ci_lev <= $ci_level ) {
7026 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
7027 $current_level = $lev;
7028 $current_ci_level = $ci_lev;
7033 # reached bottom of stack .. should never happen because
7034 # only negative levels can get here, and $level was forced
7035 # to be positive above.
7038 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
7040 report_definite_bug();
7046 # handle increasing depth
7047 if ( $level > $current_level || $ci_level > $current_ci_level ) {
7049 # Compute the standard incremental whitespace. This will be
7050 # the minimum incremental whitespace that will be used. This
7051 # choice results in a smooth transition between the gnu-style
7052 # and the standard style.
7053 my $standard_increment =
7054 ( $level - $current_level ) * $rOpts_indent_columns +
7055 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
7057 # Now we have to define how much extra incremental space
7058 # ("$available_space") we want. This extra space will be
7059 # reduced as necessary when long lines are encountered or when
7060 # it becomes clear that we do not have a good list.
7061 my $available_space = 0;
7062 my $align_paren = 0;
7065 # initialization on empty stack..
7066 if ( $max_gnu_stack_index == 0 ) {
7067 $space_count = $level * $rOpts_indent_columns;
7070 # if this is a BLOCK, add the standard increment
7071 elsif ($last_nonblank_block_type) {
7072 $space_count += $standard_increment;
7075 # if last nonblank token was not structural indentation,
7076 # just use standard increment
7077 elsif ( $last_nonblank_type ne '{' ) {
7078 $space_count += $standard_increment;
7081 # otherwise use the space to the first non-blank level change token
7084 $space_count = $gnu_position_predictor;
7086 my $min_gnu_indentation =
7087 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
7089 $available_space = $space_count - $min_gnu_indentation;
7090 if ( $available_space >= $standard_increment ) {
7091 $min_gnu_indentation += $standard_increment;
7093 elsif ( $available_space > 1 ) {
7094 $min_gnu_indentation += $available_space + 1;
7096 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
7097 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
7098 $min_gnu_indentation += 2;
7101 $min_gnu_indentation += 1;
7105 $min_gnu_indentation += $standard_increment;
7107 $available_space = $space_count - $min_gnu_indentation;
7109 if ( $available_space < 0 ) {
7110 $space_count = $min_gnu_indentation;
7111 $available_space = 0;
7116 # update state, but not on a blank token
7117 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
7119 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
7121 ++$max_gnu_stack_index;
7122 $gnu_stack[$max_gnu_stack_index] =
7123 new_lp_indentation_item( $space_count, $level, $ci_level,
7124 $available_space, $align_paren );
7126 # If the opening paren is beyond the half-line length, then
7127 # we will use the minimum (standard) indentation. This will
7128 # help avoid problems associated with running out of space
7129 # near the end of a line. As a result, in deeply nested
7130 # lists, there will be some indentations which are limited
7131 # to this minimum standard indentation. But the most deeply
7132 # nested container will still probably be able to shift its
7133 # parameters to the right for proper alignment, so in most
7134 # cases this will not be noticeable.
7135 if ( $available_space > 0 && $space_count > $halfway ) {
7136 $gnu_stack[$max_gnu_stack_index]
7137 ->tentatively_decrease_AVAILABLE_SPACES($available_space);
7142 # Count commas and look for non-list characters. Once we see a
7143 # non-list character, we give up and don't look for any more commas.
7144 if ( $type eq '=>' ) {
7145 $gnu_arrow_count{$total_depth}++;
7147 # tentatively treating '=>' like '=' for estimating breaks
7148 # TODO: this could use some experimentation
7149 $last_gnu_equals{$total_depth} = $max_index_to_go;
7152 elsif ( $type eq ',' ) {
7153 $gnu_comma_count{$total_depth}++;
7156 elsif ( $is_assignment{$type} ) {
7157 $last_gnu_equals{$total_depth} = $max_index_to_go;
7160 # this token might start a new line
7161 # if this is a non-blank..
7162 if ( $type ne 'b' ) {
7167 # this is the first nonblank token of the line
7168 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
7170 # or previous character was one of these:
7171 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
7173 # or previous character was opening and this does not close it
7174 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
7175 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
7177 # or this token is one of these:
7178 || $type =~ /^([\.]|\|\||\&\&)$/
7180 # or this is a closing structure
7181 || ( $last_nonblank_type_to_go eq '}'
7182 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
7184 # or previous token was keyword 'return'
7185 || ( $last_nonblank_type_to_go eq 'k'
7186 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
7188 # or starting a new line at certain keywords is fine
7190 && $is_if_unless_and_or_last_next_redo_return{$token} )
7192 # or this is after an assignment after a closing structure
7194 $is_assignment{$last_nonblank_type_to_go}
7196 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
7198 # and it is significantly to the right
7199 || $gnu_position_predictor > $halfway
7204 check_for_long_gnu_style_lines();
7205 $line_start_index_to_go = $max_index_to_go;
7207 # back up 1 token if we want to break before that type
7208 # otherwise, we may strand tokens like '?' or ':' on a line
7209 if ( $line_start_index_to_go > 0 ) {
7210 if ( $last_nonblank_type_to_go eq 'k' ) {
7212 if ( $want_break_before{$last_nonblank_token_to_go} ) {
7213 $line_start_index_to_go--;
7216 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
7217 $line_start_index_to_go--;
7223 # remember the predicted position of this token on the output line
7224 if ( $max_index_to_go > $line_start_index_to_go ) {
7225 $gnu_position_predictor =
7226 total_line_length( $line_start_index_to_go, $max_index_to_go );
7229 $gnu_position_predictor =
7230 $space_count + $token_lengths_to_go[$max_index_to_go];
7233 # store the indentation object for this token
7234 # this allows us to manipulate the leading whitespace
7235 # (in case we have to reduce indentation to fit a line) without
7236 # having to change any token values
7237 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
7238 $reduced_spaces_to_go[$max_index_to_go] =
7239 ( $max_gnu_stack_index > 0 && $ci_level )
7240 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
7241 : $gnu_stack[$max_gnu_stack_index];
7245 sub check_for_long_gnu_style_lines {
7247 # look at the current estimated maximum line length, and
7248 # remove some whitespace if it exceeds the desired maximum
7250 # this is only for the '-lp' style
7251 return unless ($rOpts_line_up_parentheses);
7253 # nothing can be done if no stack items defined for this line
7254 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7256 # see if we have exceeded the maximum desired line length
7257 # keep 2 extra free because they are needed in some cases
7258 # (result of trial-and-error testing)
7260 $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
7262 return if ( $spaces_needed <= 0 );
7264 # We are over the limit, so try to remove a requested number of
7265 # spaces from leading whitespace. We are only allowed to remove
7266 # from whitespace items created on this batch, since others have
7267 # already been used and cannot be undone.
7268 my @candidates = ();
7271 # loop over all whitespace items created for the current batch
7272 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7273 my $item = $gnu_item_list[$i];
7275 # item must still be open to be a candidate (otherwise it
7276 # cannot influence the current token)
7277 next if ( $item->get_CLOSED() >= 0 );
7279 my $available_spaces = $item->get_AVAILABLE_SPACES();
7281 if ( $available_spaces > 0 ) {
7282 push( @candidates, [ $i, $available_spaces ] );
7286 return unless (@candidates);
7288 # sort by available whitespace so that we can remove whitespace
7289 # from the maximum available first
7290 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
7292 # keep removing whitespace until we are done or have no more
7294 foreach $candidate (@candidates) {
7295 my ( $i, $available_spaces ) = @{$candidate};
7296 my $deleted_spaces =
7297 ( $available_spaces > $spaces_needed )
7299 : $available_spaces;
7301 # remove the incremental space from this item
7302 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
7306 # update the leading whitespace of this item and all items
7307 # that came after it
7308 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
7310 my $old_spaces = $gnu_item_list[$i]->get_SPACES();
7311 if ( $old_spaces >= $deleted_spaces ) {
7312 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
7315 # shouldn't happen except for code bug:
7317 my $level = $gnu_item_list[$i_debug]->get_LEVEL();
7318 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();
7319 my $old_level = $gnu_item_list[$i]->get_LEVEL();
7320 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
7322 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
7324 report_definite_bug();
7327 $gnu_position_predictor -= $deleted_spaces;
7328 $spaces_needed -= $deleted_spaces;
7329 last unless ( $spaces_needed > 0 );
7333 sub finish_lp_batch {
7335 # This routine is called once after each output stream batch is
7336 # finished to undo indentation for all incomplete -lp
7337 # indentation levels. It is too risky to leave a level open,
7338 # because then we can't backtrack in case of a long line to follow.
7339 # This means that comments and blank lines will disrupt this
7340 # indentation style. But the vertical aligner may be able to
7341 # get the space back if there are side comments.
7343 # this is only for the 'lp' style
7344 return unless ($rOpts_line_up_parentheses);
7346 # nothing can be done if no stack items defined for this line
7347 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7349 # loop over all whitespace items created for the current batch
7351 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7352 my $item = $gnu_item_list[$i];
7354 # only look for open items
7355 next if ( $item->get_CLOSED() >= 0 );
7357 # Tentatively remove all of the available space
7358 # (The vertical aligner will try to get it back later)
7359 my $available_spaces = $item->get_AVAILABLE_SPACES();
7360 if ( $available_spaces > 0 ) {
7362 # delete incremental space for this item
7364 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
7366 # Reduce the total indentation space of any nodes that follow
7367 # Note that any such nodes must necessarily be dependents
7369 foreach ( $i + 1 .. $max_gnu_item_index ) {
7370 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
7377 sub reduce_lp_indentation {
7379 # reduce the leading whitespace at token $i if possible by $spaces_needed
7380 # (a large value of $spaces_needed will remove all excess space)
7381 # NOTE: to be called from scan_list only for a sequence of tokens
7382 # contained between opening and closing parens/braces/brackets
7384 my ( $i, $spaces_wanted ) = @_;
7385 my $deleted_spaces = 0;
7387 my $item = $leading_spaces_to_go[$i];
7388 my $available_spaces = $item->get_AVAILABLE_SPACES();
7391 $available_spaces > 0
7392 && ( ( $spaces_wanted <= $available_spaces )
7393 || !$item->get_HAVE_CHILD() )
7397 # we'll remove these spaces, but mark them as recoverable
7399 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
7402 return $deleted_spaces;
7405 sub token_sequence_length {
7407 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
7408 # returns 0 if $ibeg > $iend (shouldn't happen)
7409 my ( $ibeg, $iend ) = @_;
7410 return 0 if ( $iend < 0 || $ibeg > $iend );
7411 return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
7412 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
7415 sub total_line_length {
7417 # return length of a line of tokens ($ibeg .. $iend)
7418 my ( $ibeg, $iend ) = @_;
7419 return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
7422 sub maximum_line_length_for_level {
7424 # return maximum line length for line starting with a given level
7425 my $maximum_line_length = $rOpts_maximum_line_length;
7427 # Modify if -vmll option is selected
7428 if ($rOpts_variable_maximum_line_length) {
7430 if ( $level < 0 ) { $level = 0 }
7431 $maximum_line_length += $level * $rOpts_indent_columns;
7433 return $maximum_line_length;
7436 sub maximum_line_length {
7438 # return maximum line length for line starting with the token at given index
7439 return maximum_line_length_for_level( $levels_to_go[ $_[0] ] );
7443 sub excess_line_length {
7445 # return number of characters by which a line of tokens ($ibeg..$iend)
7446 # exceeds the allowable line length.
7447 my ( $ibeg, $iend ) = @_;
7448 return total_line_length( $ibeg, $iend ) - maximum_line_length($ibeg);
7451 sub finish_formatting {
7453 # flush buffer and write any informative messages
7457 $file_writer_object->decrement_output_line_number()
7458 ; # fix up line number since it was incremented
7459 we_are_at_the_last_line();
7460 if ( $added_semicolon_count > 0 ) {
7461 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
7463 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
7464 write_logfile_entry("$added_semicolon_count $what added:\n");
7465 write_logfile_entry(
7466 " $first at input line $first_added_semicolon_at\n");
7468 if ( $added_semicolon_count > 1 ) {
7469 write_logfile_entry(
7470 " Last at input line $last_added_semicolon_at\n");
7472 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
7473 write_logfile_entry("\n");
7476 if ( $deleted_semicolon_count > 0 ) {
7477 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
7479 ( $deleted_semicolon_count > 1 )
7482 write_logfile_entry(
7483 "$deleted_semicolon_count unnecessary $what deleted:\n");
7484 write_logfile_entry(
7485 " $first at input line $first_deleted_semicolon_at\n");
7487 if ( $deleted_semicolon_count > 1 ) {
7488 write_logfile_entry(
7489 " Last at input line $last_deleted_semicolon_at\n");
7491 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
7492 write_logfile_entry("\n");
7495 if ( $embedded_tab_count > 0 ) {
7496 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
7498 ( $embedded_tab_count > 1 )
7499 ? "quotes or patterns"
7500 : "quote or pattern";
7501 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
7502 write_logfile_entry(
7503 "This means the display of this script could vary with device or software\n"
7505 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
7507 if ( $embedded_tab_count > 1 ) {
7508 write_logfile_entry(
7509 " Last at input line $last_embedded_tab_at\n");
7511 write_logfile_entry("\n");
7514 if ($first_tabbing_disagreement) {
7515 write_logfile_entry(
7516 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
7520 if ($in_tabbing_disagreement) {
7521 write_logfile_entry(
7522 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
7527 if ($last_tabbing_disagreement) {
7529 write_logfile_entry(
7530 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
7534 write_logfile_entry("No indentation disagreement seen\n");
7537 if ($first_tabbing_disagreement) {
7538 write_logfile_entry(
7539 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
7542 write_logfile_entry("\n");
7544 $vertical_aligner_object->report_anything_unusual();
7546 $file_writer_object->report_line_length_errors();
7551 # This routine is called to check the Opts hash after it is defined
7555 make_static_block_comment_pattern();
7556 make_static_side_comment_pattern();
7557 make_closing_side_comment_prefix();
7558 make_closing_side_comment_list_pattern();
7559 $format_skipping_pattern_begin =
7560 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
7561 $format_skipping_pattern_end =
7562 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
7564 # If closing side comments ARE selected, then we can safely
7565 # delete old closing side comments unless closing side comment
7566 # warnings are requested. This is a good idea because it will
7567 # eliminate any old csc's which fall below the line count threshold.
7568 # We cannot do this if warnings are turned on, though, because we
7569 # might delete some text which has been added. So that must
7570 # be handled when comments are created.
7571 if ( $rOpts->{'closing-side-comments'} ) {
7572 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
7573 $rOpts->{'delete-closing-side-comments'} = 1;
7577 # If closing side comments ARE NOT selected, but warnings ARE
7578 # selected and we ARE DELETING csc's, then we will pretend to be
7579 # adding with a huge interval. This will force the comments to be
7580 # generated for comparison with the old comments, but not added.
7581 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
7582 if ( $rOpts->{'delete-closing-side-comments'} ) {
7583 $rOpts->{'delete-closing-side-comments'} = 0;
7584 $rOpts->{'closing-side-comments'} = 1;
7585 $rOpts->{'closing-side-comment-interval'} = 100000000;
7590 make_block_brace_vertical_tightness_pattern();
7592 if ( $rOpts->{'line-up-parentheses'} ) {
7594 if ( $rOpts->{'indent-only'}
7595 || !$rOpts->{'add-newlines'}
7596 || !$rOpts->{'delete-old-newlines'} )
7598 Perl::Tidy::Warn <<EOM;
7599 -----------------------------------------------------------------------
7600 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
7602 The -lp indentation logic requires that perltidy be able to coordinate
7603 arbitrarily large numbers of line breakpoints. This isn't possible
7604 with these flags. Sometimes an acceptable workaround is to use -wocb=3
7605 -----------------------------------------------------------------------
7607 $rOpts->{'line-up-parentheses'} = 0;
7611 # At present, tabs are not compatible with the line-up-parentheses style
7612 # (it would be possible to entab the total leading whitespace
7613 # just prior to writing the line, if desired).
7614 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
7615 Perl::Tidy::Warn <<EOM;
7616 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
7618 $rOpts->{'tabs'} = 0;
7621 # Likewise, tabs are not compatible with outdenting..
7622 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
7623 Perl::Tidy::Warn <<EOM;
7624 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
7626 $rOpts->{'tabs'} = 0;
7629 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
7630 Perl::Tidy::Warn <<EOM;
7631 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
7633 $rOpts->{'tabs'} = 0;
7636 if ( !$rOpts->{'space-for-semicolon'} ) {
7637 $want_left_space{'f'} = -1;
7640 if ( $rOpts->{'space-terminal-semicolon'} ) {
7641 $want_left_space{';'} = 1;
7644 # implement outdenting preferences for keywords
7645 %outdent_keyword = ();
7646 unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
7647 @_ = qw(next last redo goto return); # defaults
7650 # FUTURE: if not a keyword, assume that it is an identifier
7652 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
7653 $outdent_keyword{$_} = 1;
7656 Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword";
7660 # implement user whitespace preferences
7661 if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
7662 @want_left_space{@_} = (1) x scalar(@_);
7665 if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
7666 @want_right_space{@_} = (1) x scalar(@_);
7669 if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
7670 @want_left_space{@_} = (-1) x scalar(@_);
7673 if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
7674 @want_right_space{@_} = (-1) x scalar(@_);
7676 if ( $rOpts->{'dump-want-left-space'} ) {
7677 dump_want_left_space(*STDOUT);
7681 if ( $rOpts->{'dump-want-right-space'} ) {
7682 dump_want_right_space(*STDOUT);
7686 # default keywords for which space is introduced before an opening paren
7687 # (at present, including them messes up vertical alignment)
7688 @_ = qw(my local our and or err eq ne if else elsif until
7689 unless while for foreach return switch case given when);
7690 @space_after_keyword{@_} = (1) x scalar(@_);
7692 # first remove any or all of these if desired
7693 if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7695 # -nsak='*' selects all the above keywords
7696 if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
7697 @space_after_keyword{@_} = (0) x scalar(@_);
7700 # then allow user to add to these defaults
7701 if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
7702 @space_after_keyword{@_} = (1) x scalar(@_);
7705 # implement user break preferences
7706 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7707 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7708 . : ? && || and or err xor
7711 my $break_after = sub {
7712 foreach my $tok (@_) {
7713 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
7714 my $lbs = $left_bond_strength{$tok};
7715 my $rbs = $right_bond_strength{$tok};
7716 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7717 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7723 my $break_before = sub {
7724 foreach my $tok (@_) {
7725 my $lbs = $left_bond_strength{$tok};
7726 my $rbs = $right_bond_strength{$tok};
7727 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7728 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7734 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7735 $break_before->(@all_operators)
7736 if ( $rOpts->{'break-before-all-operators'} );
7738 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7739 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7741 # make note if breaks are before certain key types
7742 %want_break_before = ();
7743 foreach my $tok ( @all_operators, ',' ) {
7744 $want_break_before{$tok} =
7745 $left_bond_strength{$tok} < $right_bond_strength{$tok};
7748 # Coordinate ?/: breaks, which must be similar
7749 if ( !$want_break_before{':'} ) {
7750 $want_break_before{'?'} = $want_break_before{':'};
7751 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7752 $left_bond_strength{'?'} = NO_BREAK;
7755 # Define here tokens which may follow the closing brace of a do statement
7756 # on the same line, as in:
7757 # } while ( $something);
7758 @_ = qw(until while unless if ; : );
7760 @is_do_follower{@_} = (1) x scalar(@_);
7762 # These tokens may follow the closing brace of an if or elsif block.
7763 # In other words, for cuddled else we want code to look like:
7764 # } elsif ( $something) {
7766 if ( $rOpts->{'cuddled-else'} ) {
7767 @_ = qw(else elsif);
7768 @is_if_brace_follower{@_} = (1) x scalar(@_);
7771 %is_if_brace_follower = ();
7774 # nothing can follow the closing curly of an else { } block:
7775 %is_else_brace_follower = ();
7777 # what can follow a multi-line anonymous sub definition closing curly:
7778 @_ = qw# ; : => or and && || ~~ !~~ ) #;
7780 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7782 # what can follow a one-line anonymous sub closing curly:
7783 # one-line anonymous subs also have ']' here...
7784 # see tk3.t and PP.pm
7785 @_ = qw# ; : => or and && || ) ] ~~ !~~ #;
7787 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7789 # What can follow a closing curly of a block
7790 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7791 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7792 @_ = qw# ; : => or and && || ) #;
7795 # allow cuddled continue if cuddled else is specified
7796 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7798 @is_other_brace_follower{@_} = (1) x scalar(@_);
7800 $right_bond_strength{'{'} = WEAK;
7801 $left_bond_strength{'{'} = VERY_STRONG;
7803 # make -l=0 equal to -l=infinite
7804 if ( !$rOpts->{'maximum-line-length'} ) {
7805 $rOpts->{'maximum-line-length'} = 1000000;
7808 # make -lbl=0 equal to -lbl=infinite
7809 if ( !$rOpts->{'long-block-line-count'} ) {
7810 $rOpts->{'long-block-line-count'} = 1000000;
7813 my $ole = $rOpts->{'output-line-ending'};
7822 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7823 my $str = join " ", keys %endings;
7824 Perl::Tidy::Die <<EOM;
7825 Unrecognized line ending '$ole'; expecting one of: $str
7828 if ( $rOpts->{'preserve-line-endings'} ) {
7829 Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
7830 $rOpts->{'preserve-line-endings'} = undef;
7834 # hashes used to simplify setting whitespace
7836 '{' => $rOpts->{'brace-tightness'},
7837 '}' => $rOpts->{'brace-tightness'},
7838 '(' => $rOpts->{'paren-tightness'},
7839 ')' => $rOpts->{'paren-tightness'},
7840 '[' => $rOpts->{'square-bracket-tightness'},
7841 ']' => $rOpts->{'square-bracket-tightness'},
7850 # frequently used parameters
7851 $rOpts_add_newlines = $rOpts->{'add-newlines'};
7852 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
7853 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7854 $rOpts_block_brace_vertical_tightness =
7855 $rOpts->{'block-brace-vertical-tightness'};
7856 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
7857 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7858 $rOpts_break_at_old_ternary_breakpoints =
7859 $rOpts->{'break-at-old-ternary-breakpoints'};
7860 $rOpts_break_at_old_attribute_breakpoints =
7861 $rOpts->{'break-at-old-attribute-breakpoints'};
7862 $rOpts_break_at_old_comma_breakpoints =
7863 $rOpts->{'break-at-old-comma-breakpoints'};
7864 $rOpts_break_at_old_keyword_breakpoints =
7865 $rOpts->{'break-at-old-keyword-breakpoints'};
7866 $rOpts_break_at_old_logical_breakpoints =
7867 $rOpts->{'break-at-old-logical-breakpoints'};
7868 $rOpts_closing_side_comment_else_flag =
7869 $rOpts->{'closing-side-comment-else-flag'};
7870 $rOpts_closing_side_comment_maximum_text =
7871 $rOpts->{'closing-side-comment-maximum-text'};
7872 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7873 $rOpts_cuddled_else = $rOpts->{'cuddled-else'};
7874 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
7875 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
7876 $rOpts_indent_columns = $rOpts->{'indent-columns'};
7877 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
7878 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7879 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
7880 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
7882 $rOpts_variable_maximum_line_length =
7883 $rOpts->{'variable-maximum-line-length'};
7884 $rOpts_short_concatenation_item_length =
7885 $rOpts->{'short-concatenation-item-length'};
7887 $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
7888 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
7889 $rOpts_format_skipping = $rOpts->{'format-skipping'};
7890 $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
7891 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
7892 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7893 $rOpts_ignore_side_comment_lengths =
7894 $rOpts->{'ignore-side-comment-lengths'};
7896 # Note that both opening and closing tokens can access the opening
7897 # and closing flags of their container types.
7898 %opening_vertical_tightness = (
7899 '(' => $rOpts->{'paren-vertical-tightness'},
7900 '{' => $rOpts->{'brace-vertical-tightness'},
7901 '[' => $rOpts->{'square-bracket-vertical-tightness'},
7902 ')' => $rOpts->{'paren-vertical-tightness'},
7903 '}' => $rOpts->{'brace-vertical-tightness'},
7904 ']' => $rOpts->{'square-bracket-vertical-tightness'},
7907 %closing_vertical_tightness = (
7908 '(' => $rOpts->{'paren-vertical-tightness-closing'},
7909 '{' => $rOpts->{'brace-vertical-tightness-closing'},
7910 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7911 ')' => $rOpts->{'paren-vertical-tightness-closing'},
7912 '}' => $rOpts->{'brace-vertical-tightness-closing'},
7913 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7916 $rOpts_tight_secret_operators = $rOpts->{'tight-secret-operators'};
7918 # assume flag for '>' same as ')' for closing qw quotes
7919 %closing_token_indentation = (
7920 ')' => $rOpts->{'closing-paren-indentation'},
7921 '}' => $rOpts->{'closing-brace-indentation'},
7922 ']' => $rOpts->{'closing-square-bracket-indentation'},
7923 '>' => $rOpts->{'closing-paren-indentation'},
7926 # flag indicating if any closing tokens are indented
7927 $some_closing_token_indentation =
7928 $rOpts->{'closing-paren-indentation'}
7929 || $rOpts->{'closing-brace-indentation'}
7930 || $rOpts->{'closing-square-bracket-indentation'}
7931 || $rOpts->{'indent-closing-brace'};
7933 %opening_token_right = (
7934 '(' => $rOpts->{'opening-paren-right'},
7935 '{' => $rOpts->{'opening-hash-brace-right'},
7936 '[' => $rOpts->{'opening-square-bracket-right'},
7939 %stack_opening_token = (
7940 '(' => $rOpts->{'stack-opening-paren'},
7941 '{' => $rOpts->{'stack-opening-hash-brace'},
7942 '[' => $rOpts->{'stack-opening-square-bracket'},
7945 %stack_closing_token = (
7946 ')' => $rOpts->{'stack-closing-paren'},
7947 '}' => $rOpts->{'stack-closing-hash-brace'},
7948 ']' => $rOpts->{'stack-closing-square-bracket'},
7950 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
7953 sub make_static_block_comment_pattern {
7955 # create the pattern used to identify static block comments
7956 $static_block_comment_pattern = '^\s*##';
7958 # allow the user to change it
7959 if ( $rOpts->{'static-block-comment-prefix'} ) {
7960 my $prefix = $rOpts->{'static-block-comment-prefix'};
7961 $prefix =~ s/^\s*//;
7962 my $pattern = $prefix;
7964 # user may give leading caret to force matching left comments only
7965 if ( $prefix !~ /^\^#/ ) {
7966 if ( $prefix !~ /^#/ ) {
7968 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7970 $pattern = '^\s*' . $prefix;
7972 eval "'##'=~/$pattern/";
7975 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7977 $static_block_comment_pattern = $pattern;
7981 sub make_format_skipping_pattern {
7982 my ( $opt_name, $default ) = @_;
7983 my $param = $rOpts->{$opt_name};
7984 unless ($param) { $param = $default }
7986 if ( $param !~ /^#/ ) {
7988 "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7990 my $pattern = '^' . $param . '\s';
7991 eval "'#'=~/$pattern/";
7994 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7999 sub make_closing_side_comment_list_pattern {
8001 # turn any input list into a regex for recognizing selected block types
8002 $closing_side_comment_list_pattern = '^\w+';
8003 if ( defined( $rOpts->{'closing-side-comment-list'} )
8004 && $rOpts->{'closing-side-comment-list'} )
8006 $closing_side_comment_list_pattern =
8007 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
8011 sub make_bli_pattern {
8013 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
8014 && $rOpts->{'brace-left-and-indent-list'} )
8016 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
8019 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
8022 sub make_block_brace_vertical_tightness_pattern {
8024 # turn any input list into a regex for recognizing selected block types
8025 $block_brace_vertical_tightness_pattern =
8026 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
8027 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
8028 && $rOpts->{'block-brace-vertical-tightness-list'} )
8030 $block_brace_vertical_tightness_pattern =
8031 make_block_pattern( '-bbvtl',
8032 $rOpts->{'block-brace-vertical-tightness-list'} );
8036 sub make_block_pattern {
8038 # given a string of block-type keywords, return a regex to match them
8039 # The only tricky part is that labels are indicated with a single ':'
8040 # and the 'sub' token text may have additional text after it (name of
8045 # input string: "if else elsif unless while for foreach do : sub";
8046 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
8048 my ( $abbrev, $string ) = @_;
8049 my @list = split_words($string);
8053 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
8056 if ( $i eq 'sub' ) {
8058 elsif ( $i eq ';' ) {
8061 elsif ( $i eq '{' ) {
8064 elsif ( $i eq ':' ) {
8065 push @words, '\w+:';
8067 elsif ( $i =~ /^\w/ ) {
8072 "unrecognized block type $i after $abbrev, ignoring\n";
8075 my $pattern = '(' . join( '|', @words ) . ')$';
8076 if ( $seen{'sub'} ) {
8077 $pattern = '(' . $pattern . '|sub)';
8079 $pattern = '^' . $pattern;
8083 sub make_static_side_comment_pattern {
8085 # create the pattern used to identify static side comments
8086 $static_side_comment_pattern = '^##';
8088 # allow the user to change it
8089 if ( $rOpts->{'static-side-comment-prefix'} ) {
8090 my $prefix = $rOpts->{'static-side-comment-prefix'};
8091 $prefix =~ s/^\s*//;
8092 my $pattern = '^' . $prefix;
8093 eval "'##'=~/$pattern/";
8096 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
8098 $static_side_comment_pattern = $pattern;
8102 sub make_closing_side_comment_prefix {
8104 # Be sure we have a valid closing side comment prefix
8105 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
8106 my $csc_prefix_pattern;
8107 if ( !defined($csc_prefix) ) {
8108 $csc_prefix = '## end';
8109 $csc_prefix_pattern = '^##\s+end';
8112 my $test_csc_prefix = $csc_prefix;
8113 if ( $test_csc_prefix !~ /^#/ ) {
8114 $test_csc_prefix = '#' . $test_csc_prefix;
8117 # make a regex to recognize the prefix
8118 my $test_csc_prefix_pattern = $test_csc_prefix;
8120 # escape any special characters
8121 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
8123 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
8125 # allow exact number of intermediate spaces to vary
8126 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
8128 # make sure we have a good pattern
8129 # if we fail this we probably have an error in escaping
8131 eval "'##'=~/$test_csc_prefix_pattern/";
8134 # shouldn't happen..must have screwed up escaping, above
8135 report_definite_bug();
8137 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
8139 # just warn and keep going with defaults
8140 Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n";
8142 "Using default -cscp instead; please check output\n";
8145 $csc_prefix = $test_csc_prefix;
8146 $csc_prefix_pattern = $test_csc_prefix_pattern;
8149 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
8150 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
8153 sub dump_want_left_space {
8157 These values are the main control of whitespace to the left of a token type;
8158 They may be altered with the -wls parameter.
8159 For a list of token types, use perltidy --dump-token-types (-dtt)
8160 1 means the token wants a space to its left
8161 -1 means the token does not want a space to its left
8162 ------------------------------------------------------------------------
8164 foreach ( sort keys %want_left_space ) {
8165 print $fh "$_\t$want_left_space{$_}\n";
8169 sub dump_want_right_space {
8173 These values are the main control of whitespace to the right of a token type;
8174 They may be altered with the -wrs parameter.
8175 For a list of token types, use perltidy --dump-token-types (-dtt)
8176 1 means the token wants a space to its right
8177 -1 means the token does not want a space to its right
8178 ------------------------------------------------------------------------
8180 foreach ( sort keys %want_right_space ) {
8181 print $fh "$_\t$want_right_space{$_}\n";
8185 { # begin is_essential_whitespace
8187 my %is_sort_grep_map;
8192 @_ = qw(sort grep map);
8193 @is_sort_grep_map{@_} = (1) x scalar(@_);
8195 @_ = qw(for foreach);
8196 @is_for_foreach{@_} = (1) x scalar(@_);
8200 sub is_essential_whitespace {
8202 # Essential whitespace means whitespace which cannot be safely deleted
8203 # without risking the introduction of a syntax error.
8204 # We are given three tokens and their types:
8205 # ($tokenl, $typel) is the token to the left of the space in question
8206 # ($tokenr, $typer) is the token to the right of the space in question
8207 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
8209 # This is a slow routine but is not needed too often except when -mangle
8212 # Note: This routine should almost never need to be changed. It is
8213 # for avoiding syntax problems rather than for formatting.
8214 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
8218 # never combine two bare words or numbers
8219 # examples: and ::ok(1)
8221 # for bla::bla:: abc
8222 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
8223 # $input eq"quit" to make $inputeq"quit"
8224 # my $size=-s::SINK if $file; <==OK but we won't do it
8225 # don't join something like: for bla::bla:: abc
8226 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
8227 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
8228 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
8230 # do not combine a number with a concatenation dot
8231 # example: pom.caputo:
8232 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
8233 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
8234 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
8236 # do not join a minus with a bare word, because you might form
8237 # a file test operator. Example from Complex.pm:
8238 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
8239 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
8241 # and something like this could become ambiguous without space
8243 # use constant III=>1;
8247 || ( ( $tokenl eq '-' )
8248 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
8250 # '= -' should not become =- or you will get a warning
8252 # || ($tokenr eq '-')
8254 # keep a space between a quote and a bareword to prevent the
8255 # bareword from becoming a quote modifier.
8256 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8258 # keep a space between a token ending in '$' and any word;
8259 # this caused trouble: "die @$ if $@"
8260 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
8261 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8263 # perl is very fussy about spaces before <<
8264 || ( $tokenr =~ /^\<\</ )
8266 # avoid combining tokens to create new meanings. Example:
8267 # $a+ +$b must not become $a++$b
8268 || ( $is_digraph{ $tokenl . $tokenr } )
8269 || ( $is_trigraph{ $tokenl . $tokenr } )
8271 # another example: do not combine these two &'s:
8272 # allow_options & &OPT_EXECCGI
8273 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
8275 # don't combine $$ or $# with any alphanumeric
8276 # (testfile mangle.t with --mangle)
8277 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
8279 # retain any space after possible filehandle
8280 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
8281 || ( $typel eq 'Z' )
8283 # Perl is sensitive to whitespace after the + here:
8284 # $b = xvals $a + 0.1 * yvals $a;
8285 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
8287 # keep paren separate in 'use Foo::Bar ()'
8291 && $tokenll eq 'use' )
8293 # keep any space between filehandle and paren:
8294 # file mangle.t with --mangle:
8295 || ( $typel eq 'Y' && $tokenr eq '(' )
8297 # retain any space after here doc operator ( hereerr.t)
8298 || ( $typel eq 'h' )
8300 # be careful with a space around ++ and --, to avoid ambiguity as to
8301 # which token it applies
8302 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
8303 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
8305 # need space after foreach my; for example, this will fail in
8306 # older versions of Perl:
8307 # foreach my$ft(@filetypes)...
8312 && $is_for_foreach{$tokenll}
8316 # must have space between grep and left paren; "grep(" will fail
8317 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
8319 # don't stick numbers next to left parens, as in:
8320 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
8321 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
8323 # We must be sure that a space between a ? and a quoted string
8324 # remains if the space before the ? remains. [Loca.pm, lockarea]
8326 # $b=join $comma ? ',' : ':', @_; # ok
8327 # $b=join $comma?',' : ':', @_; # ok!
8328 # $b=join $comma ?',' : ':', @_; # error!
8329 # Not really required:
8330 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
8332 # do not remove space between an '&' and a bare word because
8333 # it may turn into a function evaluation, like here
8334 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
8335 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
8336 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8338 # space stacked labels (TODO: check if really necessary)
8339 || ( $typel eq 'J' && $typer eq 'J' )
8341 ; # the value of this long logic sequence is the result we want
8347 my %secret_operators;
8348 my %is_leading_secret_token;
8352 # token lists for perl secret operators as compiled by Philippe Bruhat
8353 # at: https://metacpan.org/module/perlsecret
8354 %secret_operators = (
8355 'Goatse' => [qw#= ( ) =#], #=( )=
8356 'Venus1' => [qw#0 +#], # 0+
8357 'Venus2' => [qw#+ 0#], # +0
8358 'Enterprise' => [qw#) x ! !#], # ()x!!
8359 'Kite1' => [qw#~ ~ <>#], # ~~<>
8360 'Kite2' => [qw#~~ <>#], # ~~<>
8361 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
8364 # The following operators and constants are not included because they
8365 # are normally kept tight by perltidy:
8369 # Make a lookup table indexed by the first token of each operator:
8370 # first token => [list, list, ...]
8371 foreach my $value ( values(%secret_operators) ) {
8372 my $tok = $value->[0];
8373 push @{ $is_leading_secret_token{$tok} }, $value;
8377 sub secret_operator_whitespace {
8379 my ( $jmax, $rtokens, $rtoken_type, $rwhite_space_flag ) = @_;
8381 # Loop over all tokens in this line
8382 my ( $j, $token, $type );
8383 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8385 $token = $$rtokens[$j];
8386 $type = $$rtoken_type[$j];
8388 # Skip unless this token might start a secret operator
8389 next if ( $type eq 'b' );
8390 next unless ( $is_leading_secret_token{$token} );
8392 # Loop over all secret operators with this leading token
8393 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
8395 foreach my $tok ( @{$rpattern} ) {
8399 if ( $jend <= $jmax && $$rtoken_type[$jend] eq 'b' );
8400 if ( $jend > $jmax || $tok ne $$rtokens[$jend] ) {
8408 # set flags to prevent spaces within this operator
8409 for ( my $jj = $j + 1 ; $jj <= $jend ; $jj++ ) {
8410 $rwhite_space_flag->[$jj] = WS_NO;
8415 } ## End Loop over all operators
8416 } ## End loop over all tokens
8420 sub set_white_space_flag {
8422 # This routine examines each pair of nonblank tokens and
8423 # sets values for array @white_space_flag.
8425 # $white_space_flag[$j] is a flag indicating whether a white space
8426 # BEFORE token $j is needed, with the following values:
8428 # WS_NO = -1 do not want a space before token $j
8429 # WS_OPTIONAL= 0 optional space or $j is a whitespace
8430 # WS_YES = 1 want a space before token $j
8433 # The values for the first token will be defined based
8434 # upon the contents of the "to_go" output array.
8436 # Note: retain debug print statements because they are usually
8437 # required after adding new token types.
8441 # initialize these global hashes, which control the use of
8442 # whitespace around tokens:
8447 # %space_after_keyword
8449 # Many token types are identical to the tokens themselves.
8450 # See the tokenizer for a complete list. Here are some special types:
8452 # f = semicolon in for statement
8455 # Note that :: is excluded since it should be contained in an identifier
8456 # Note that '->' is excluded because it never gets space
8457 # parentheses and brackets are excluded since they are handled specially
8458 # curly braces are included but may be overridden by logic, such as
8461 # NEW_TOKENS: create a whitespace rule here. This can be as
8462 # simple as adding your new letter to @spaces_both_sides, for
8466 @is_opening_type{@_} = (1) x scalar(@_);
8469 @is_closing_type{@_} = (1) x scalar(@_);
8471 my @spaces_both_sides = qw"
8472 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
8473 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
8474 &&= ||= //= <=> A k f w F n C Y U G v
8477 my @spaces_left_side = qw"
8478 t ! ~ m p { \ h pp mm Z j
8480 push( @spaces_left_side, '#' ); # avoids warning message
8482 my @spaces_right_side = qw"
8483 ; } ) ] R J ++ -- **=
8485 push( @spaces_right_side, ',' ); # avoids warning message
8487 # Note that we are in a BEGIN block here. Later in processing
8488 # the values of %want_left_space and %want_right_space
8489 # may be overridden by any user settings specified by the
8490 # -wls and -wrs parameters. However the binary_whitespace_rules
8491 # are hardwired and have priority.
8492 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
8493 @want_right_space{@spaces_both_sides} =
8494 (1) x scalar(@spaces_both_sides);
8495 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);
8496 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
8497 @want_left_space{@spaces_right_side} =
8498 (-1) x scalar(@spaces_right_side);
8499 @want_right_space{@spaces_right_side} =
8500 (1) x scalar(@spaces_right_side);
8501 $want_left_space{'->'} = WS_NO;
8502 $want_right_space{'->'} = WS_NO;
8503 $want_left_space{'**'} = WS_NO;
8504 $want_right_space{'**'} = WS_NO;
8505 $want_right_space{'CORE::'} = WS_NO;
8507 # These binary_ws_rules are hardwired and have priority over the above
8508 # settings. It would be nice to allow adjustment by the user,
8509 # but it would be complicated to specify.
8511 # hash type information must stay tightly bound
8513 $binary_ws_rules{'i'}{'L'} = WS_NO;
8514 $binary_ws_rules{'i'}{'{'} = WS_YES;
8515 $binary_ws_rules{'k'}{'{'} = WS_YES;
8516 $binary_ws_rules{'U'}{'{'} = WS_YES;
8517 $binary_ws_rules{'i'}{'['} = WS_NO;
8518 $binary_ws_rules{'R'}{'L'} = WS_NO;
8519 $binary_ws_rules{'R'}{'{'} = WS_NO;
8520 $binary_ws_rules{'t'}{'L'} = WS_NO;
8521 $binary_ws_rules{'t'}{'{'} = WS_NO;
8522 $binary_ws_rules{'}'}{'L'} = WS_NO;
8523 $binary_ws_rules{'}'}{'{'} = WS_NO;
8524 $binary_ws_rules{'$'}{'L'} = WS_NO;
8525 $binary_ws_rules{'$'}{'{'} = WS_NO;
8526 $binary_ws_rules{'@'}{'L'} = WS_NO;
8527 $binary_ws_rules{'@'}{'{'} = WS_NO;
8528 $binary_ws_rules{'='}{'L'} = WS_YES;
8529 $binary_ws_rules{'J'}{'J'} = WS_YES;
8531 # the following includes ') {'
8532 # as in : if ( xxx ) { yyy }
8533 $binary_ws_rules{']'}{'L'} = WS_NO;
8534 $binary_ws_rules{']'}{'{'} = WS_NO;
8535 $binary_ws_rules{')'}{'{'} = WS_YES;
8536 $binary_ws_rules{')'}{'['} = WS_NO;
8537 $binary_ws_rules{']'}{'['} = WS_NO;
8538 $binary_ws_rules{']'}{'{'} = WS_NO;
8539 $binary_ws_rules{'}'}{'['} = WS_NO;
8540 $binary_ws_rules{'R'}{'['} = WS_NO;
8542 $binary_ws_rules{']'}{'++'} = WS_NO;
8543 $binary_ws_rules{']'}{'--'} = WS_NO;
8544 $binary_ws_rules{')'}{'++'} = WS_NO;
8545 $binary_ws_rules{')'}{'--'} = WS_NO;
8547 $binary_ws_rules{'R'}{'++'} = WS_NO;
8548 $binary_ws_rules{'R'}{'--'} = WS_NO;
8550 $binary_ws_rules{'i'}{'Q'} = WS_YES;
8551 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
8553 # FIXME: we could to split 'i' into variables and functions
8554 # and have no space for functions but space for variables. For now,
8555 # I have a special patch in the special rules below
8556 $binary_ws_rules{'i'}{'('} = WS_NO;
8558 $binary_ws_rules{'w'}{'('} = WS_NO;
8559 $binary_ws_rules{'w'}{'{'} = WS_YES;
8560 } ## end BEGIN block
8562 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
8563 my ( $last_token, $last_type, $last_block_type, $token, $type,
8565 my (@white_space_flag);
8566 my $j_tight_closing_paren = -1;
8568 if ( $max_index_to_go >= 0 ) {
8569 $token = $tokens_to_go[$max_index_to_go];
8570 $type = $types_to_go[$max_index_to_go];
8571 $block_type = $block_type_to_go[$max_index_to_go];
8573 #---------------------------------------------------------------
8574 # Patch due to splitting of tokens with leading ->
8575 #---------------------------------------------------------------
8577 # This routine is dealing with the raw tokens from the tokenizer,
8578 # but to get started it needs the previous token, which will
8579 # have been stored in the '_to_go' arrays.
8581 # This patch avoids requiring two iterations to
8582 # converge for cases such as the following, where a paren
8583 # comes in on a line following a variable with leading arrow:
8584 # $self->{main}->add_content_defer_opening
8585 # ($name, $wmkf, $self->{attrs}, $self);
8586 # In this case when we see the opening paren on line 2 we need
8587 # to know if the last token on the previous line had an arrow,
8588 # but it has already been split off so we have to add it back
8589 # in to avoid getting an unwanted space before the paren.
8590 if ( $type =~ /^[wi]$/ ) {
8591 my $im = $iprev_to_go[$max_index_to_go];
8592 my $tm = ( $im >= 0 ) ? $types_to_go[$im] : "";
8593 if ( $tm eq '->' ) { $token = $tm . $token }
8596 #---------------------------------------------------------------
8597 # End patch due to splitting of tokens with leading ->
8598 #---------------------------------------------------------------
8608 # main loop over all tokens to define the whitespace flags
8609 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8611 if ( $$rtoken_type[$j] eq 'b' ) {
8612 $white_space_flag[$j] = WS_OPTIONAL;
8616 # set a default value, to be changed as needed
8618 $last_token = $token;
8620 $last_block_type = $block_type;
8621 $token = $$rtokens[$j];
8622 $type = $$rtoken_type[$j];
8623 $block_type = $$rblock_type[$j];
8625 #---------------------------------------------------------------
8626 # Whitespace Rules Section 1:
8627 # Handle space on the inside of opening braces.
8628 #---------------------------------------------------------------
8631 if ( $is_opening_type{$last_type} ) {
8633 $j_tight_closing_paren = -1;
8635 # let's keep empty matched braces together: () {} []
8637 if ( $token eq $matching_token{$last_token} ) {
8647 # we're considering the right of an opening brace
8648 # tightness = 0 means always pad inside with space
8649 # tightness = 1 means pad inside if "complex"
8650 # tightness = 2 means never pad inside with space
8653 if ( $last_type eq '{'
8654 && $last_token eq '{'
8655 && $last_block_type )
8657 $tightness = $rOpts_block_brace_tightness;
8659 else { $tightness = $tightness{$last_token} }
8661 #=============================================================
8662 # Patch for test problem fabrice_bug.pl
8663 # We must always avoid spaces around a bare word beginning
8665 # my $before = ${^PREMATCH};
8666 # Because all of the following cause an error in perl:
8667 # my $before = ${ ^PREMATCH };
8668 # my $before = ${ ^PREMATCH};
8669 # my $before = ${^PREMATCH };
8670 # So if brace tightness flag is -bt=0 we must temporarily reset
8671 # to bt=1. Note that here we must set tightness=1 and not 2 so
8672 # that the closing space
8673 # is also avoided (via the $j_tight_closing_paren flag in coding)
8674 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
8676 #=============================================================
8678 if ( $tightness <= 0 ) {
8681 elsif ( $tightness > 1 ) {
8686 # Patch to count '-foo' as single token so that
8687 # each of $a{-foo} and $a{foo} and $a{'foo'} do
8688 # not get spaces with default formatting.
8692 && $last_token eq '{'
8693 && $$rtoken_type[ $j + 1 ] eq 'w' );
8695 # $j_next is where a closing token should be if
8696 # the container has a single token
8698 ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
8701 my $tok_next = $$rtokens[$j_next];
8702 my $type_next = $$rtoken_type[$j_next];
8704 # for tightness = 1, if there is just one token
8705 # within the matching pair, we will keep it tight
8707 $tok_next eq $matching_token{$last_token}
8709 # but watch out for this: [ [ ] (misc.t)
8710 && $last_token ne $token
8714 # remember where to put the space for the closing paren
8715 $j_tight_closing_paren = $j_next;
8723 } # end setting space flag inside opening tokens
8725 if FORMATTER_DEBUG_FLAG_WHITE;
8727 #---------------------------------------------------------------
8728 # Whitespace Rules Section 2:
8729 # Handle space on inside of closing brace pairs.
8730 #---------------------------------------------------------------
8733 if ( $is_closing_type{$type} ) {
8735 if ( $j == $j_tight_closing_paren ) {
8737 $j_tight_closing_paren = -1;
8742 if ( !defined($ws) ) {
8745 if ( $type eq '}' && $token eq '}' && $block_type ) {
8746 $tightness = $rOpts_block_brace_tightness;
8748 else { $tightness = $tightness{$token} }
8750 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
8753 } # end setting space flag inside closing tokens
8756 if FORMATTER_DEBUG_FLAG_WHITE;
8758 #---------------------------------------------------------------
8759 # Whitespace Rules Section 3:
8760 # Use the binary rule table.
8761 #---------------------------------------------------------------
8762 if ( !defined($ws) ) {
8763 $ws = $binary_ws_rules{$last_type}{$type};
8766 if FORMATTER_DEBUG_FLAG_WHITE;
8768 #---------------------------------------------------------------
8769 # Whitespace Rules Section 4:
8770 # Handle some special cases.
8771 #---------------------------------------------------------------
8772 if ( $token eq '(' ) {
8774 # This will have to be tweaked as tokenization changes.
8775 # We usually want a space at '} (', for example:
8776 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
8779 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
8780 # At present, the above & block is marked as type L/R so this case
8781 # won't go through here.
8782 if ( $last_type eq '}' ) { $ws = WS_YES }
8784 # NOTE: some older versions of Perl had occasional problems if
8785 # spaces are introduced between keywords or functions and opening
8786 # parens. So the default is not to do this except is certain
8787 # cases. The current Perl seems to tolerate spaces.
8789 # Space between keyword and '('
8790 elsif ( $last_type eq 'k' ) {
8792 unless ( $rOpts_space_keyword_paren
8793 || $space_after_keyword{$last_token} );
8796 # Space between function and '('
8797 # -----------------------------------------------------
8798 # 'w' and 'i' checks for something like:
8799 # myfun( &myfun( ->myfun(
8800 # -----------------------------------------------------
8801 elsif (( $last_type =~ /^[wUG]$/ )
8802 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
8804 $ws = WS_NO unless ($rOpts_space_function_paren);
8807 # space between something like $i and ( in
8808 # for $i ( 0 .. 20 ) {
8809 # FIXME: eventually, type 'i' needs to be split into multiple
8810 # token types so this can be a hardwired rule.
8811 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
8815 # allow constant function followed by '()' to retain no space
8816 elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
8821 # patch for SWITCH/CASE: make space at ']{' optional
8822 # since the '{' might begin a case or when block
8823 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
8827 # keep space between 'sub' and '{' for anonymous sub definition
8828 if ( $type eq '{' ) {
8829 if ( $last_token eq 'sub' ) {
8833 # this is needed to avoid no space in '){'
8834 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
8836 # avoid any space before the brace or bracket in something like
8837 # @opts{'a','b',...}
8838 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
8843 elsif ( $type eq 'i' ) {
8845 # never a space before ->
8846 if ( $token =~ /^\-\>/ ) {
8851 # retain any space between '-' and bare word
8852 elsif ( $type eq 'w' || $type eq 'C' ) {
8853 $ws = WS_OPTIONAL if $last_type eq '-';
8855 # never a space before ->
8856 if ( $token =~ /^\-\>/ ) {
8861 # retain any space between '-' and bare word
8862 # example: avoid space between 'USER' and '-' here:
8863 # $myhash{USER-NAME}='steve';
8864 elsif ( $type eq 'm' || $type eq '-' ) {
8865 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
8868 # always space before side comment
8869 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8871 # always preserver whatever space was used after a possible
8872 # filehandle (except _) or here doc operator
8875 && ( ( $last_type eq 'Z' && $last_token ne '_' )
8876 || $last_type eq 'h' )
8883 if FORMATTER_DEBUG_FLAG_WHITE;
8885 #---------------------------------------------------------------
8886 # Whitespace Rules Section 5:
8887 # Apply default rules not covered above.
8888 #---------------------------------------------------------------
8890 # If we fall through to here, look at the pre-defined hash tables for
8891 # the two tokens, and:
8892 # if (they are equal) use the common value
8893 # if (either is zero or undef) use the other
8894 # if (either is -1) use it
8908 if ( !defined($ws) ) {
8909 my $wl = $want_left_space{$type};
8910 my $wr = $want_right_space{$last_type};
8911 if ( !defined($wl) ) { $wl = 0 }
8912 if ( !defined($wr) ) { $wr = 0 }
8913 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8916 if ( !defined($ws) ) {
8919 "WS flag is undefined for tokens $last_token $token\n");
8922 # Treat newline as a whitespace. Otherwise, we might combine
8923 # 'Send' and '-recipients' here according to the above rules:
8924 # my $msg = new Fax::Send
8925 # -recipients => $to,
8927 if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8932 && ( $last_type !~ /^[Zh]$/ ) )
8935 # If this happens, we have a non-fatal but undesirable
8936 # hole in the above rules which should be patched.
8938 "WS flag is zero for tokens $last_token $token\n");
8940 $white_space_flag[$j] = $ws;
8942 FORMATTER_DEBUG_FLAG_WHITE && do {
8943 my $str = substr( $last_token, 0, 15 );
8944 $str .= ' ' x ( 16 - length($str) );
8945 if ( !defined($ws_1) ) { $ws_1 = "*" }
8946 if ( !defined($ws_2) ) { $ws_2 = "*" }
8947 if ( !defined($ws_3) ) { $ws_3 = "*" }
8948 if ( !defined($ws_4) ) { $ws_4 = "*" }
8950 "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8954 if ($rOpts_tight_secret_operators) {
8955 secret_operator_whitespace( $jmax, $rtokens, $rtoken_type,
8956 \@white_space_flag );
8959 return \@white_space_flag;
8960 } ## end sub set_white_space_flag
8962 { # begin print_line_of_tokens
8969 my $rcontainer_type;
8970 my $rcontainer_environment;
8973 my $rnesting_tokens;
8975 my $rnesting_blocks;
8978 my $guessed_indentation_level;
8980 # These local token variables are stored by store_token_to_go:
8983 my $container_environment;
8985 my $in_continued_quote;
8988 my $no_internal_newlines;
8994 # routine to pull the jth token from the line of tokens
8997 $token = $$rtokens[$j];
8998 $type = $$rtoken_type[$j];
8999 $block_type = $$rblock_type[$j];
9000 $container_type = $$rcontainer_type[$j];
9001 $container_environment = $$rcontainer_environment[$j];
9002 $type_sequence = $$rtype_sequence[$j];
9003 $level = $$rlevels[$j];
9004 $slevel = $$rslevels[$j];
9005 $nesting_blocks = $$rnesting_blocks[$j];
9006 $ci_level = $$rci_levels[$j];
9012 sub save_current_token {
9015 $block_type, $ci_level,
9016 $container_environment, $container_type,
9017 $in_continued_quote, $level,
9018 $nesting_blocks, $no_internal_newlines,
9020 $type, $type_sequence,
9024 sub restore_current_token {
9026 $block_type, $ci_level,
9027 $container_environment, $container_type,
9028 $in_continued_quote, $level,
9029 $nesting_blocks, $no_internal_newlines,
9031 $type, $type_sequence,
9038 # Returns the length of a token, given:
9039 # $token=text of the token
9041 # $not_first_token = should be TRUE if this is not the first token of
9042 # the line. It might the index of this token in an array. It is
9043 # used to test for a side comment vs a block comment.
9044 # Note: Eventually this should be the only routine determining the
9045 # length of a token in this package.
9046 my ( $token, $type, $not_first_token ) = @_;
9047 my $token_length = length($token);
9049 # We mark lengths of side comments as just 1 if we are
9050 # ignoring their lengths when setting line breaks.
9052 if ( $rOpts_ignore_side_comment_lengths
9055 return $token_length;
9060 # return length of ith token in @{$rtokens}
9062 return token_length( $$rtokens[$i], $$rtoken_type[$i], $i );
9065 # Routine to place the current token into the output stream.
9066 # Called once per output token.
9067 sub store_token_to_go {
9069 my $flag = $no_internal_newlines;
9070 if ( $_[0] ) { $flag = 1 }
9072 $tokens_to_go[ ++$max_index_to_go ] = $token;
9073 $types_to_go[$max_index_to_go] = $type;
9074 $nobreak_to_go[$max_index_to_go] = $flag;
9075 $old_breakpoint_to_go[$max_index_to_go] = 0;
9076 $forced_breakpoint_to_go[$max_index_to_go] = 0;
9077 $block_type_to_go[$max_index_to_go] = $block_type;
9078 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
9079 $container_environment_to_go[$max_index_to_go] = $container_environment;
9080 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;
9081 $ci_levels_to_go[$max_index_to_go] = $ci_level;
9082 $mate_index_to_go[$max_index_to_go] = -1;
9083 $matching_token_to_go[$max_index_to_go] = '';
9084 $bond_strength_to_go[$max_index_to_go] = 0;
9086 # Note: negative levels are currently retained as a diagnostic so that
9087 # the 'final indentation level' is correctly reported for bad scripts.
9088 # But this means that every use of $level as an index must be checked.
9089 # If this becomes too much of a problem, we might give up and just clip
9091 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
9092 $levels_to_go[$max_index_to_go] = $level;
9093 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
9095 # link the non-blank tokens
9096 my $iprev = $max_index_to_go - 1;
9097 $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
9098 $iprev_to_go[$max_index_to_go] = $iprev;
9099 $inext_to_go[$iprev] = $max_index_to_go
9100 if ( $iprev >= 0 && $type ne 'b' );
9101 $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
9103 $token_lengths_to_go[$max_index_to_go] =
9104 token_length( $token, $type, $max_index_to_go );
9106 # We keep a running sum of token lengths from the start of this batch:
9107 # summed_lengths_to_go[$i] = total length to just before token $i
9108 # summed_lengths_to_go[$i+1] = total length to just after token $i
9109 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
9110 $summed_lengths_to_go[$max_index_to_go] +
9111 $token_lengths_to_go[$max_index_to_go];
9113 # Define the indentation that this token would have if it started
9114 # a new line. We have to do this now because we need to know this
9115 # when considering one-line blocks.
9116 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
9118 # remember previous nonblank tokens seen
9119 if ( $type ne 'b' ) {
9120 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
9121 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
9122 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
9123 $last_nonblank_index_to_go = $max_index_to_go;
9124 $last_nonblank_type_to_go = $type;
9125 $last_nonblank_token_to_go = $token;
9126 if ( $type eq ',' ) {
9127 $comma_count_in_batch++;
9131 FORMATTER_DEBUG_FLAG_STORE && do {
9132 my ( $a, $b, $c ) = caller();
9134 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
9138 sub insert_new_token_to_go {
9140 # insert a new token into the output stream. use same level as
9141 # previous token; assumes a character at max_index_to_go.
9142 save_current_token();
9143 ( $token, $type, $slevel, $no_internal_newlines ) = @_;
9145 if ( $max_index_to_go == UNDEFINED_INDEX ) {
9146 warning("code bug: bad call to insert_new_token_to_go\n");
9148 $level = $levels_to_go[$max_index_to_go];
9150 # FIXME: it seems to be necessary to use the next, rather than
9151 # previous, value of this variable when creating a new blank (align.t)
9152 #my $slevel = $nesting_depth_to_go[$max_index_to_go];
9153 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
9154 $ci_level = $ci_levels_to_go[$max_index_to_go];
9155 $container_environment = $container_environment_to_go[$max_index_to_go];
9156 $in_continued_quote = 0;
9158 $type_sequence = "";
9159 store_token_to_go();
9160 restore_current_token();
9164 sub print_line_of_tokens {
9166 my $line_of_tokens = shift;
9168 # This routine is called once per input line to process all of
9169 # the tokens on that line. This is the first stage of
9172 # Full-line comments and blank lines may be processed immediately.
9174 # For normal lines of code, the tokens are stored one-by-one,
9175 # via calls to 'sub store_token_to_go', until a known line break
9176 # point is reached. Then, the batch of collected tokens is
9177 # passed along to 'sub output_line_to_go' for further
9178 # processing. This routine decides if there should be
9179 # whitespace between each pair of non-white tokens, so later
9180 # routines only need to decide on any additional line breaks.
9181 # Any whitespace is initially a single space character. Later,
9182 # the vertical aligner may expand that to be multiple space
9183 # characters if necessary for alignment.
9185 # extract input line number for error messages
9186 $input_line_number = $line_of_tokens->{_line_number};
9188 $rtoken_type = $line_of_tokens->{_rtoken_type};
9189 $rtokens = $line_of_tokens->{_rtokens};
9190 $rlevels = $line_of_tokens->{_rlevels};
9191 $rslevels = $line_of_tokens->{_rslevels};
9192 $rblock_type = $line_of_tokens->{_rblock_type};
9193 $rcontainer_type = $line_of_tokens->{_rcontainer_type};
9194 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
9195 $rtype_sequence = $line_of_tokens->{_rtype_sequence};
9196 $input_line = $line_of_tokens->{_line_text};
9197 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
9198 $rci_levels = $line_of_tokens->{_rci_levels};
9199 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
9201 $in_continued_quote = $starting_in_quote =
9202 $line_of_tokens->{_starting_in_quote};
9203 $in_quote = $line_of_tokens->{_ending_in_quote};
9204 $ending_in_quote = $in_quote;
9205 $guessed_indentation_level =
9206 $line_of_tokens->{_guessed_indentation_level};
9211 my $next_nonblank_token;
9212 my $next_nonblank_token_type;
9213 my $rwhite_space_flag;
9215 $jmax = @$rtokens - 1;
9217 $container_type = "";
9218 $container_environment = "";
9219 $type_sequence = "";
9220 $no_internal_newlines = 1 - $rOpts_add_newlines;
9221 $is_static_block_comment = 0;
9223 # Handle a continued quote..
9224 if ($in_continued_quote) {
9226 # A line which is entirely a quote or pattern must go out
9227 # verbatim. Note: the \n is contained in $input_line.
9229 if ( ( $input_line =~ "\t" ) ) {
9230 note_embedded_tab();
9232 write_unindented_line("$input_line");
9233 $last_line_had_side_comment = 0;
9238 # Write line verbatim if we are in a formatting skip section
9239 if ($in_format_skipping_section) {
9240 write_unindented_line("$input_line");
9241 $last_line_had_side_comment = 0;
9243 # Note: extra space appended to comment simplifies pattern matching
9245 && $$rtoken_type[0] eq '#'
9246 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
9248 $in_format_skipping_section = 0;
9249 write_logfile_entry("Exiting formatting skip section\n");
9250 $file_writer_object->reset_consecutive_blank_lines();
9255 # See if we are entering a formatting skip section
9256 if ( $rOpts_format_skipping
9258 && $$rtoken_type[0] eq '#'
9259 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
9262 $in_format_skipping_section = 1;
9263 write_logfile_entry("Entering formatting skip section\n");
9264 write_unindented_line("$input_line");
9265 $last_line_had_side_comment = 0;
9269 # delete trailing blank tokens
9270 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
9272 # Handle a blank line..
9275 # If keep-old-blank-lines is zero, we delete all
9276 # old blank lines and let the blank line rules generate any
9278 if ($rOpts_keep_old_blank_lines) {
9280 $file_writer_object->write_blank_code_line(
9281 $rOpts_keep_old_blank_lines == 2 );
9282 $last_line_leading_type = 'b';
9284 $last_line_had_side_comment = 0;
9288 # see if this is a static block comment (starts with ## by default)
9289 my $is_static_block_comment_without_leading_space = 0;
9291 && $$rtoken_type[0] eq '#'
9292 && $rOpts->{'static-block-comments'}
9293 && $input_line =~ /$static_block_comment_pattern/o )
9295 $is_static_block_comment = 1;
9296 $is_static_block_comment_without_leading_space =
9297 substr( $input_line, 0, 1 ) eq '#';
9300 # Check for comments which are line directives
9301 # Treat exactly as static block comments without leading space
9302 # reference: perlsyn, near end, section Plain Old Comments (Not!)
9303 # example: '# line 42 "new_filename.plx"'
9306 && $$rtoken_type[0] eq '#'
9307 && $input_line =~ /^\# \s*
9309 (?:\s("?)([^"]+)\2)? \s*
9313 $is_static_block_comment = 1;
9314 $is_static_block_comment_without_leading_space = 1;
9317 # create a hanging side comment if appropriate
9318 my $is_hanging_side_comment;
9321 && $$rtoken_type[0] eq '#' # only token is a comment
9322 && $last_line_had_side_comment # last line had side comment
9323 && $input_line =~ /^\s/ # there is some leading space
9324 && !$is_static_block_comment # do not make static comment hanging
9325 && $rOpts->{'hanging-side-comments'} # user is allowing
9326 # hanging side comments
9331 # We will insert an empty qw string at the start of the token list
9332 # to force this comment to be a side comment. The vertical aligner
9333 # should then line it up with the previous side comment.
9334 $is_hanging_side_comment = 1;
9335 unshift @$rtoken_type, 'q';
9336 unshift @$rtokens, '';
9337 unshift @$rlevels, $$rlevels[0];
9338 unshift @$rslevels, $$rslevels[0];
9339 unshift @$rblock_type, '';
9340 unshift @$rcontainer_type, '';
9341 unshift @$rcontainer_environment, '';
9342 unshift @$rtype_sequence, '';
9343 unshift @$rnesting_tokens, $$rnesting_tokens[0];
9344 unshift @$rci_levels, $$rci_levels[0];
9345 unshift @$rnesting_blocks, $$rnesting_blocks[0];
9349 # remember if this line has a side comment
9350 $last_line_had_side_comment =
9351 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
9353 # Handle a block (full-line) comment..
9354 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
9356 if ( $rOpts->{'delete-block-comments'} ) { return }
9358 if ( $rOpts->{'tee-block-comments'} ) {
9359 $file_writer_object->tee_on();
9362 destroy_one_line_block();
9363 output_line_to_go();
9365 # output a blank line before block comments
9367 # unless we follow a blank or comment line
9368 $last_line_leading_type !~ /^[#b]$/
9371 && $rOpts->{'blanks-before-comments'}
9373 # not if this is an empty comment line
9374 && $$rtokens[0] ne '#'
9376 # not after a short line ending in an opening token
9377 # because we already have space above this comment.
9378 # Note that the first comment in this if block, after
9379 # the 'if (', does not get a blank line because of this.
9380 && !$last_output_short_opening_token
9382 # never before static block comments
9383 && !$is_static_block_comment
9386 flush(); # switching to new output stream
9387 $file_writer_object->write_blank_code_line();
9388 $last_line_leading_type = 'b';
9391 # TRIM COMMENTS -- This could be turned off as a option
9392 $$rtokens[0] =~ s/\s*$//; # trim right end
9395 $rOpts->{'indent-block-comments'}
9396 && ( !$rOpts->{'indent-spaced-block-comments'}
9397 || $input_line =~ /^\s+/ )
9398 && !$is_static_block_comment_without_leading_space
9402 store_token_to_go();
9403 output_line_to_go();
9406 flush(); # switching to new output stream
9407 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
9408 $last_line_leading_type = '#';
9410 if ( $rOpts->{'tee-block-comments'} ) {
9411 $file_writer_object->tee_off();
9416 # compare input/output indentation except for continuation lines
9417 # (because they have an unknown amount of initial blank space)
9418 # and lines which are quotes (because they may have been outdented)
9419 # Note: this test is placed here because we know the continuation flag
9420 # at this point, which allows us to avoid non-meaningful checks.
9421 my $structural_indentation_level = $$rlevels[0];
9422 compare_indentation_levels( $guessed_indentation_level,
9423 $structural_indentation_level )
9424 unless ( $is_hanging_side_comment
9425 || $$rci_levels[0] > 0
9426 || $guessed_indentation_level == 0 && $$rtoken_type[0] eq 'Q' );
9428 # Patch needed for MakeMaker. Do not break a statement
9429 # in which $VERSION may be calculated. See MakeMaker.pm;
9430 # this is based on the coding in it.
9431 # The first line of a file that matches this will be eval'd:
9432 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9434 # *VERSION = \'1.01';
9435 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
9436 # We will pass such a line straight through without breaking
9437 # it unless -npvl is used
9439 my $is_VERSION_statement = 0;
9442 !$saw_VERSION_in_this_file
9443 && $input_line =~ /VERSION/ # quick check to reject most lines
9444 && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9447 $saw_VERSION_in_this_file = 1;
9448 $is_VERSION_statement = 1;
9449 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
9450 $no_internal_newlines = 1;
9453 # take care of indentation-only
9454 # NOTE: In previous versions we sent all qw lines out immediately here.
9455 # No longer doing this: also write a line which is entirely a 'qw' list
9456 # to allow stacking of opening and closing tokens. Note that interior
9457 # qw lines will still go out at the end of this routine.
9458 if ( $rOpts->{'indent-only'} ) {
9463 $token = $input_line;
9466 $container_type = "";
9467 $container_environment = "";
9468 $type_sequence = "";
9469 store_token_to_go();
9470 output_line_to_go();
9474 push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding
9475 push( @$rtoken_type, 'b', 'b' );
9476 ($rwhite_space_flag) =
9477 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
9479 # if the buffer hasn't been flushed, add a leading space if
9480 # necessary to keep essential whitespace. This is really only
9481 # necessary if we are squeezing out all ws.
9482 if ( $max_index_to_go >= 0 ) {
9484 $old_line_count_in_batch++;
9487 is_essential_whitespace(
9488 $last_last_nonblank_token,
9489 $last_last_nonblank_type,
9490 $tokens_to_go[$max_index_to_go],
9491 $types_to_go[$max_index_to_go],
9497 my $slevel = $$rslevels[0];
9498 insert_new_token_to_go( ' ', 'b', $slevel,
9499 $no_internal_newlines );
9503 # If we just saw the end of an elsif block, write nag message
9504 # if we do not see another elseif or an else.
9505 if ($looking_for_else) {
9507 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
9508 write_logfile_entry("(No else block)\n");
9510 $looking_for_else = 0;
9513 # This is a good place to kill incomplete one-line blocks
9514 if ( ( $semicolons_before_block_self_destruct == 0 )
9515 && ( $max_index_to_go >= 0 )
9516 && ( $types_to_go[$max_index_to_go] eq ';' )
9517 && ( $$rtokens[0] ne '}' ) )
9519 destroy_one_line_block();
9520 output_line_to_go();
9523 # loop to process the tokens one-by-one
9527 foreach $j ( 0 .. $jmax ) {
9529 # pull out the local values for this token
9532 if ( $type eq '#' ) {
9534 # trim trailing whitespace
9535 # (there is no option at present to prevent this)
9539 $rOpts->{'delete-side-comments'}
9541 # delete closing side comments if necessary
9542 || ( $rOpts->{'delete-closing-side-comments'}
9543 && $token =~ /$closing_side_comment_prefix_pattern/o
9544 && $last_nonblank_block_type =~
9545 /$closing_side_comment_list_pattern/o )
9548 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9549 unstore_token_to_go();
9555 # If we are continuing after seeing a right curly brace, flush
9556 # buffer unless we see what we are looking for, as in
9558 if ( $rbrace_follower && $type ne 'b' ) {
9560 unless ( $rbrace_follower->{$token} ) {
9561 output_line_to_go();
9563 $rbrace_follower = undef;
9566 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
9567 $next_nonblank_token = $$rtokens[$j_next];
9568 $next_nonblank_token_type = $$rtoken_type[$j_next];
9570 #--------------------------------------------------------
9571 # Start of section to patch token text
9572 #--------------------------------------------------------
9574 # Modify certain tokens here for whitespace
9575 # The following is not yet done, but could be:
9577 if ( $type =~ /^[wit]$/ ) {
9580 # change '$ var' to '$var' etc
9581 # '-> new' to '->new'
9582 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
9586 # Split identifiers with leading arrows, inserting blanks if
9587 # necessary. It is easier and safer here than in the
9588 # tokenizer. For example '->new' becomes two tokens, '->' and
9589 # 'new' with a possible blank between.
9591 # Note: there is a related patch in sub set_white_space_flag
9592 if ( $token =~ /^\-\>(.*)$/ && $1 ) {
9593 my $token_save = $1;
9594 my $type_save = $type;
9596 # store a blank to left of arrow if necessary
9597 if ( $max_index_to_go >= 0
9598 && $types_to_go[$max_index_to_go] ne 'b'
9599 && $want_left_space{'->'} == WS_YES )
9601 insert_new_token_to_go( ' ', 'b', $slevel,
9602 $no_internal_newlines );
9605 # then store the arrow
9608 store_token_to_go();
9610 # then reset the current token to be the remainder,
9611 # and reset the whitespace flag according to the arrow
9612 $$rwhite_space_flag[$j] = $want_right_space{'->'};
9613 $token = $token_save;
9617 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
9619 # trim identifiers of trailing blanks which can occur
9620 # under some unusual circumstances, such as if the
9621 # identifier 'witch' has trailing blanks on input here:
9625 # () # prototype may be on new line ...
9627 if ( $type eq 'i' ) { $token =~ s/\s+$//g }
9630 # change 'LABEL :' to 'LABEL:'
9631 elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
9633 # patch to add space to something like "x10"
9634 # This avoids having to split this token in the pre-tokenizer
9635 elsif ( $type eq 'n' ) {
9636 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
9639 elsif ( $type eq 'Q' ) {
9640 note_embedded_tab() if ( $token =~ "\t" );
9642 # make note of something like '$var = s/xxx/yyy/;'
9643 # in case it should have been '$var =~ s/xxx/yyy/;'
9645 $token =~ /^(s|tr|y|m|\/)/
9646 && $last_nonblank_token =~ /^(=|==|!=)$/
9648 # preceded by simple scalar
9649 && $last_last_nonblank_type eq 'i'
9650 && $last_last_nonblank_token =~ /^\$/
9652 # followed by some kind of termination
9653 # (but give complaint if we can's see far enough ahead)
9654 && $next_nonblank_token =~ /^[; \)\}]$/
9656 # scalar is not declared
9658 $types_to_go[0] eq 'k'
9659 && $tokens_to_go[0] =~ /^(my|our|local)$/
9663 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
9665 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
9670 # trim blanks from right of qw quotes
9671 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
9672 elsif ( $type eq 'q' ) {
9674 note_embedded_tab() if ( $token =~ "\t" );
9677 #--------------------------------------------------------
9678 # End of section to patch token text
9679 #--------------------------------------------------------
9681 # insert any needed whitespace
9682 if ( ( $type ne 'b' )
9683 && ( $max_index_to_go >= 0 )
9684 && ( $types_to_go[$max_index_to_go] ne 'b' )
9685 && $rOpts_add_whitespace )
9687 my $ws = $$rwhite_space_flag[$j];
9690 insert_new_token_to_go( ' ', 'b', $slevel,
9691 $no_internal_newlines );
9695 # Do not allow breaks which would promote a side comment to a
9696 # block comment. In order to allow a break before an opening
9697 # or closing BLOCK, followed by a side comment, those sections
9698 # of code will handle this flag separately.
9699 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
9700 my $is_opening_BLOCK =
9704 && $block_type ne 't' );
9705 my $is_closing_BLOCK =
9709 && $block_type ne 't' );
9711 if ( $side_comment_follows
9712 && !$is_opening_BLOCK
9713 && !$is_closing_BLOCK )
9715 $no_internal_newlines = 1;
9718 # We're only going to handle breaking for code BLOCKS at this
9719 # (top) level. Other indentation breaks will be handled by
9720 # sub scan_list, which is better suited to dealing with them.
9721 if ($is_opening_BLOCK) {
9723 # Tentatively output this token. This is required before
9724 # calling starting_one_line_block. We may have to unstore
9725 # it, though, if we have to break before it.
9726 store_token_to_go($side_comment_follows);
9728 # Look ahead to see if we might form a one-line block
9730 starting_one_line_block( $j, $jmax, $level, $slevel,
9731 $ci_level, $rtokens, $rtoken_type, $rblock_type );
9732 clear_breakpoint_undo_stack();
9734 # to simplify the logic below, set a flag to indicate if
9735 # this opening brace is far from the keyword which introduces it
9736 my $keyword_on_same_line = 1;
9737 if ( ( $max_index_to_go >= 0 )
9738 && ( $last_nonblank_type eq ')' ) )
9740 if ( $block_type =~ /^(if|else|elsif)$/
9741 && ( $tokens_to_go[0] eq '}' )
9742 && $rOpts_cuddled_else )
9744 $keyword_on_same_line = 1;
9746 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
9748 $keyword_on_same_line = 0;
9752 # decide if user requested break before '{'
9755 # use -bl flag if not a sub block of any type
9756 $block_type !~ /^sub/
9757 ? $rOpts->{'opening-brace-on-new-line'}
9759 # use -sbl flag for a named sub block
9760 : $block_type !~ /^sub\W*$/
9761 ? $rOpts->{'opening-sub-brace-on-new-line'}
9763 # use -asbl flag for an anonymous sub block
9764 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
9766 # Break before an opening '{' ...
9772 # and we were unable to start looking for a block,
9773 && $index_start_one_line_block == UNDEFINED_INDEX
9775 # or if it will not be on same line as its keyword, so that
9776 # it will be outdented (eval.t, overload.t), and the user
9777 # has not insisted on keeping it on the right
9778 || ( !$keyword_on_same_line
9779 && !$rOpts->{'opening-brace-always-on-right'} )
9784 # but only if allowed
9785 unless ($no_internal_newlines) {
9787 # since we already stored this token, we must unstore it
9788 unstore_token_to_go();
9790 # then output the line
9791 output_line_to_go();
9793 # and now store this token at the start of a new line
9794 store_token_to_go($side_comment_follows);
9798 # Now update for side comment
9799 if ($side_comment_follows) { $no_internal_newlines = 1 }
9801 # now output this line
9802 unless ($no_internal_newlines) {
9803 output_line_to_go();
9807 elsif ($is_closing_BLOCK) {
9809 # If there is a pending one-line block ..
9810 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9812 # we have to terminate it if..
9815 # it is too long (final length may be different from
9816 # initial estimate). note: must allow 1 space for this token
9817 excess_line_length( $index_start_one_line_block,
9818 $max_index_to_go ) >= 0
9820 # or if it has too many semicolons
9821 || ( $semicolons_before_block_self_destruct == 0
9822 && $last_nonblank_type ne ';' )
9825 destroy_one_line_block();
9829 # put a break before this closing curly brace if appropriate
9830 unless ( $no_internal_newlines
9831 || $index_start_one_line_block != UNDEFINED_INDEX )
9834 # add missing semicolon if ...
9835 # there are some tokens
9837 ( $max_index_to_go > 0 )
9839 # and we don't have one
9840 && ( $last_nonblank_type ne ';' )
9842 # patch until some block type issues are fixed:
9843 # Do not add semi-colon for block types '{',
9844 # '}', and ';' because we cannot be sure yet
9845 # that this is a block and not an anonymous
9846 # hash (blktype.t, blktype1.t)
9847 && ( $block_type !~ /^[\{\};]$/ )
9849 # patch: and do not add semi-colons for recently
9850 # added block types (see tmp/semicolon.t)
9852 /^(switch|case|given|when|default)$/ )
9854 # it seems best not to add semicolons in these
9855 # special block types: sort|map|grep
9856 && ( !$is_sort_map_grep{$block_type} )
9858 # and we are allowed to do so.
9859 && $rOpts->{'add-semicolons'}
9863 save_current_token();
9866 $level = $levels_to_go[$max_index_to_go];
9867 $slevel = $nesting_depth_to_go[$max_index_to_go];
9869 $nesting_blocks_to_go[$max_index_to_go];
9870 $ci_level = $ci_levels_to_go[$max_index_to_go];
9872 $container_type = "";
9873 $container_environment = "";
9874 $type_sequence = "";
9876 # Note - we remove any blank AFTER extracting its
9877 # parameters such as level, etc, above
9878 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9879 unstore_token_to_go();
9881 store_token_to_go();
9883 note_added_semicolon();
9884 restore_current_token();
9887 # then write out everything before this closing curly brace
9888 output_line_to_go();
9892 # Now update for side comment
9893 if ($side_comment_follows) { $no_internal_newlines = 1 }
9895 # store the closing curly brace
9896 store_token_to_go();
9898 # ok, we just stored a closing curly brace. Often, but
9899 # not always, we want to end the line immediately.
9900 # So now we have to check for special cases.
9902 # if this '}' successfully ends a one-line block..
9903 my $is_one_line_block = 0;
9905 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9907 # Remember the type of token just before the
9908 # opening brace. It would be more general to use
9909 # a stack, but this will work for one-line blocks.
9910 $is_one_line_block =
9911 $types_to_go[$index_start_one_line_block];
9913 # we have to actually make it by removing tentative
9914 # breaks that were set within it
9915 undo_forced_breakpoint_stack(0);
9916 set_nobreaks( $index_start_one_line_block,
9917 $max_index_to_go - 1 );
9919 # then re-initialize for the next one-line block
9920 destroy_one_line_block();
9922 # then decide if we want to break after the '}' ..
9923 # We will keep going to allow certain brace followers as in:
9924 # do { $ifclosed = 1; last } unless $losing;
9926 # But make a line break if the curly ends a
9927 # significant block:
9929 $is_block_without_semicolon{$block_type}
9931 # if needless semicolon follows we handle it later
9932 && $next_nonblank_token ne ';'
9935 output_line_to_go() unless ($no_internal_newlines);
9939 # set string indicating what we need to look for brace follower
9941 if ( $block_type eq 'do' ) {
9942 $rbrace_follower = \%is_do_follower;
9944 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
9945 $rbrace_follower = \%is_if_brace_follower;
9947 elsif ( $block_type eq 'else' ) {
9948 $rbrace_follower = \%is_else_brace_follower;
9951 # added eval for borris.t
9952 elsif ($is_sort_map_grep_eval{$block_type}
9953 || $is_one_line_block eq 'G' )
9955 $rbrace_follower = undef;
9960 elsif ( $block_type =~ /^sub\W*$/ ) {
9962 if ($is_one_line_block) {
9963 $rbrace_follower = \%is_anon_sub_1_brace_follower;
9966 $rbrace_follower = \%is_anon_sub_brace_follower;
9970 # None of the above: specify what can follow a closing
9971 # brace of a block which is not an
9972 # if/elsif/else/do/sort/map/grep/eval
9974 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
9976 $rbrace_follower = \%is_other_brace_follower;
9979 # See if an elsif block is followed by another elsif or else;
9981 if ( $block_type eq 'elsif' ) {
9983 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
9984 $looking_for_else = 1; # ok, check on next line
9988 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
9989 write_logfile_entry("No else block :(\n");
9994 # keep going after certain block types (map,sort,grep,eval)
9995 # added eval for borris.t
10001 # if no more tokens, postpone decision until re-entring
10002 elsif ( ( $next_nonblank_token_type eq 'b' )
10003 && $rOpts_add_newlines )
10005 unless ($rbrace_follower) {
10006 output_line_to_go() unless ($no_internal_newlines);
10010 elsif ($rbrace_follower) {
10012 unless ( $rbrace_follower->{$next_nonblank_token} ) {
10013 output_line_to_go() unless ($no_internal_newlines);
10015 $rbrace_follower = undef;
10019 output_line_to_go() unless ($no_internal_newlines);
10022 } # end treatment of closing block token
10025 elsif ( $type eq ';' ) {
10027 # kill one-line blocks with too many semicolons
10028 $semicolons_before_block_self_destruct--;
10030 ( $semicolons_before_block_self_destruct < 0 )
10031 || ( $semicolons_before_block_self_destruct == 0
10032 && $next_nonblank_token_type !~ /^[b\}]$/ )
10035 destroy_one_line_block();
10038 # Remove unnecessary semicolons, but not after bare
10039 # blocks, where it could be unsafe if the brace is
10043 $last_nonblank_token eq '}'
10045 $is_block_without_semicolon{
10046 $last_nonblank_block_type}
10047 || $last_nonblank_block_type =~ /^sub\s+\w/
10048 || $last_nonblank_block_type =~ /^\w+:$/ )
10050 || $last_nonblank_type eq ';'
10055 $rOpts->{'delete-semicolons'}
10057 # don't delete ; before a # because it would promote it
10058 # to a block comment
10059 && ( $next_nonblank_token_type ne '#' )
10062 note_deleted_semicolon();
10063 output_line_to_go()
10064 unless ( $no_internal_newlines
10065 || $index_start_one_line_block != UNDEFINED_INDEX );
10069 write_logfile_entry("Extra ';'\n");
10072 store_token_to_go();
10074 output_line_to_go()
10075 unless ( $no_internal_newlines
10076 || ( $rOpts_keep_interior_semicolons && $j < $jmax )
10077 || ( $next_nonblank_token eq '}' ) );
10081 # handle here_doc target string
10082 elsif ( $type eq 'h' ) {
10083 $no_internal_newlines =
10084 1; # no newlines after seeing here-target
10085 destroy_one_line_block();
10086 store_token_to_go();
10089 # handle all other token types
10092 # if this is a blank...
10093 if ( $type eq 'b' ) {
10095 # make it just one character
10096 $token = ' ' if $rOpts_add_whitespace;
10098 # delete it if unwanted by whitespace rules
10099 # or we are deleting all whitespace
10100 my $ws = $$rwhite_space_flag[ $j + 1 ];
10101 if ( ( defined($ws) && $ws == -1 )
10102 || $rOpts_delete_old_whitespace )
10105 # unless it might make a syntax error
10107 unless is_essential_whitespace(
10108 $last_last_nonblank_token,
10109 $last_last_nonblank_type,
10110 $tokens_to_go[$max_index_to_go],
10111 $types_to_go[$max_index_to_go],
10112 $$rtokens[ $j + 1 ],
10113 $$rtoken_type[ $j + 1 ]
10117 store_token_to_go();
10120 # remember two previous nonblank OUTPUT tokens
10121 if ( $type ne '#' && $type ne 'b' ) {
10122 $last_last_nonblank_token = $last_nonblank_token;
10123 $last_last_nonblank_type = $last_nonblank_type;
10124 $last_nonblank_token = $token;
10125 $last_nonblank_type = $type;
10126 $last_nonblank_block_type = $block_type;
10129 # unset the continued-quote flag since it only applies to the
10130 # first token, and we want to resume normal formatting if
10131 # there are additional tokens on the line
10132 $in_continued_quote = 0;
10134 } # end of loop over all tokens in this 'line_of_tokens'
10136 # we have to flush ..
10139 # if there is a side comment
10140 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
10142 # if this line ends in a quote
10143 # NOTE: This is critically important for insuring that quoted lines
10144 # do not get processed by things like -sot and -sct
10147 # if this is a VERSION statement
10148 || $is_VERSION_statement
10150 # to keep a label at the end of a line
10153 # if we are instructed to keep all old line breaks
10154 || !$rOpts->{'delete-old-newlines'}
10157 destroy_one_line_block();
10158 output_line_to_go();
10161 # mark old line breakpoints in current output stream
10162 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
10163 $old_breakpoint_to_go[$max_index_to_go] = 1;
10165 } ## end sub print_line_of_tokens
10166 } ## end block print_line_of_tokens
10168 # sub output_line_to_go sends one logical line of tokens on down the
10169 # pipeline to the VerticalAligner package, breaking the line into continuation
10170 # lines as necessary. The line of tokens is ready to go in the "to_go"
10172 sub output_line_to_go {
10174 # debug stuff; this routine can be called from many points
10175 FORMATTER_DEBUG_FLAG_OUTPUT && do {
10176 my ( $a, $b, $c ) = caller;
10178 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
10180 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
10181 write_diagnostics("$output_str\n");
10184 # just set a tentative breakpoint if we might be in a one-line block
10185 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10186 set_forced_breakpoint($max_index_to_go);
10190 my $cscw_block_comment;
10191 $cscw_block_comment = add_closing_side_comment()
10192 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
10194 my $comma_arrow_count_contained = match_opening_and_closing_tokens();
10196 # tell the -lp option we are outputting a batch so it can close
10197 # any unfinished items in its stack
10200 # If this line ends in a code block brace, set breaks at any
10201 # previous closing code block braces to breakup a chain of code
10202 # blocks on one line. This is very rare but can happen for
10203 # user-defined subs. For example we might be looking at this:
10204 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
10205 my $saw_good_break = 0; # flag to force breaks even if short line
10208 # looking for opening or closing block brace
10209 $block_type_to_go[$max_index_to_go]
10211 # but not one of these which are never duplicated on a line:
10212 # until|while|for|if|elsif|else
10213 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
10216 my $lev = $nesting_depth_to_go[$max_index_to_go];
10218 # Walk backwards from the end and
10219 # set break at any closing block braces at the same level.
10220 # But quit if we are not in a chain of blocks.
10221 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
10222 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
10223 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
10225 if ( $block_type_to_go[$i] ) {
10226 if ( $tokens_to_go[$i] eq '}' ) {
10227 set_forced_breakpoint($i);
10228 $saw_good_break = 1;
10232 # quit if we see anything besides words, function, blanks
10234 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
10239 my $imax = $max_index_to_go;
10241 # trim any blank tokens
10242 if ( $max_index_to_go >= 0 ) {
10243 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
10244 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
10247 # anything left to write?
10248 if ( $imin <= $imax ) {
10250 # add a blank line before certain key types but not after a comment
10251 if ( $last_line_leading_type !~ /^[#]/ ) {
10252 my $want_blank = 0;
10253 my $leading_token = $tokens_to_go[$imin];
10254 my $leading_type = $types_to_go[$imin];
10256 # blank lines before subs except declarations and one-liners
10257 # MCONVERSION LOCATION - for sub tokenization change
10258 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
10259 $want_blank = $rOpts->{'blank-lines-before-subs'}
10261 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10262 $imax ) !~ /^[\;\}]$/
10266 # break before all package declarations
10267 # MCONVERSION LOCATION - for tokenizaton change
10268 elsif ($leading_token =~ /^(package\s)/
10269 && $leading_type eq 'i' )
10271 $want_blank = $rOpts->{'blank-lines-before-packages'};
10274 # break before certain key blocks except one-liners
10275 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
10276 $want_blank = $rOpts->{'blank-lines-before-subs'}
10278 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10283 # Break before certain block types if we haven't had a
10284 # break at this level for a while. This is the
10285 # difficult decision..
10286 elsif ($leading_type eq 'k'
10287 && $last_line_leading_type ne 'b'
10288 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
10290 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
10291 if ( !defined($lc) ) { $lc = 0 }
10294 $rOpts->{'blanks-before-blocks'}
10295 && $lc >= $rOpts->{'long-block-line-count'}
10296 && $file_writer_object->get_consecutive_nonblank_lines() >=
10297 $rOpts->{'long-block-line-count'}
10299 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10306 # future: send blank line down normal path to VerticalAligner
10307 Perl::Tidy::VerticalAligner::flush();
10308 $file_writer_object->require_blank_code_lines($want_blank);
10312 # update blank line variables and count number of consecutive
10313 # non-blank, non-comment lines at this level
10314 $last_last_line_leading_level = $last_line_leading_level;
10315 $last_line_leading_level = $levels_to_go[$imin];
10316 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
10317 $last_line_leading_type = $types_to_go[$imin];
10318 if ( $last_line_leading_level == $last_last_line_leading_level
10319 && $last_line_leading_type ne 'b'
10320 && $last_line_leading_type ne '#'
10321 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
10323 $nonblank_lines_at_depth[$last_line_leading_level]++;
10326 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
10329 FORMATTER_DEBUG_FLAG_FLUSH && do {
10330 my ( $package, $file, $line ) = caller;
10332 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
10335 # add a couple of extra terminal blank tokens
10338 # set all forced breakpoints for good list formatting
10339 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
10343 || $old_line_count_in_batch > 1
10345 # must always call scan_list() with unbalanced batches because it
10346 # is maintaining some stacks
10347 || is_unbalanced_batch()
10349 # call scan_list if we might want to break at commas
10351 $comma_count_in_batch
10352 && ( $rOpts_maximum_fields_per_table > 0
10353 || $rOpts_comma_arrow_breakpoints == 0 )
10356 # call scan_list if user may want to break open some one-line
10358 || ( $comma_arrow_count_contained
10359 && $rOpts_comma_arrow_breakpoints != 3 )
10362 ## This caused problems in one version of perl for unknown reasons:
10363 ## $saw_good_break ||= scan_list();
10364 my $sgb = scan_list();
10365 $saw_good_break ||= $sgb;
10368 # let $ri_first and $ri_last be references to lists of
10369 # first and last tokens of line fragments to output..
10370 my ( $ri_first, $ri_last );
10372 # write a single line if..
10375 # we aren't allowed to add any newlines
10376 !$rOpts_add_newlines
10378 # or, we don't already have an interior breakpoint
10379 # and we didn't see a good breakpoint
10381 !$forced_breakpoint_count
10382 && !$saw_good_break
10384 # and this line is 'short'
10389 @$ri_first = ($imin);
10390 @$ri_last = ($imax);
10393 # otherwise use multiple lines
10396 ( $ri_first, $ri_last, my $colon_count ) =
10397 set_continuation_breaks($saw_good_break);
10399 break_all_chain_tokens( $ri_first, $ri_last );
10401 break_equals( $ri_first, $ri_last );
10403 # now we do a correction step to clean this up a bit
10404 # (The only time we would not do this is for debugging)
10405 if ( $rOpts->{'recombine'} ) {
10406 ( $ri_first, $ri_last ) =
10407 recombine_breakpoints( $ri_first, $ri_last );
10410 insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
10413 # do corrector step if -lp option is used
10414 my $do_not_pad = 0;
10415 if ($rOpts_line_up_parentheses) {
10416 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
10418 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
10420 prepare_for_new_input_lines();
10422 # output any new -cscw block comment
10423 if ($cscw_block_comment) {
10425 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
10429 sub note_added_semicolon {
10430 $last_added_semicolon_at = $input_line_number;
10431 if ( $added_semicolon_count == 0 ) {
10432 $first_added_semicolon_at = $last_added_semicolon_at;
10434 $added_semicolon_count++;
10435 write_logfile_entry("Added ';' here\n");
10438 sub note_deleted_semicolon {
10439 $last_deleted_semicolon_at = $input_line_number;
10440 if ( $deleted_semicolon_count == 0 ) {
10441 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
10443 $deleted_semicolon_count++;
10444 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
10447 sub note_embedded_tab {
10448 $embedded_tab_count++;
10449 $last_embedded_tab_at = $input_line_number;
10450 if ( !$first_embedded_tab_at ) {
10451 $first_embedded_tab_at = $last_embedded_tab_at;
10454 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
10455 write_logfile_entry("Embedded tabs in quote or pattern\n");
10459 sub starting_one_line_block {
10461 # after seeing an opening curly brace, look for the closing brace
10462 # and see if the entire block will fit on a line. This routine is
10463 # not always right because it uses the old whitespace, so a check
10464 # is made later (at the closing brace) to make sure we really
10465 # have a one-line block. We have to do this preliminary check,
10466 # though, because otherwise we would always break at a semicolon
10467 # within a one-line block if the block contains multiple statements.
10469 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
10473 # kill any current block - we can only go 1 deep
10474 destroy_one_line_block();
10477 # 1=distance from start of block to opening brace exceeds line length
10482 # shouldn't happen: there must have been a prior call to
10483 # store_token_to_go to put the opening brace in the output stream
10484 if ( $max_index_to_go < 0 ) {
10485 warning("program bug: store_token_to_go called incorrectly\n");
10486 report_definite_bug();
10490 # cannot use one-line blocks with cuddled else/elsif lines
10491 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
10496 my $block_type = $$rblock_type[$j];
10498 # find the starting keyword for this block (such as 'if', 'else', ...)
10500 if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
10501 $i_start = $max_index_to_go;
10504 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
10506 # For something like "if (xxx) {", the keyword "if" will be
10507 # just after the most recent break. This will be 0 unless
10508 # we have just killed a one-line block and are starting another.
10510 # Note: cannot use inext_index_to_go[] here because that array
10511 # is still being constructed.
10512 $i_start = $index_max_forced_break + 1;
10513 if ( $types_to_go[$i_start] eq 'b' ) {
10517 unless ( $tokens_to_go[$i_start] eq $block_type ) {
10522 # the previous nonblank token should start these block types
10523 elsif (( $last_last_nonblank_token_to_go eq $block_type )
10524 || ( $block_type =~ /^sub/ ) )
10526 $i_start = $last_last_nonblank_index_to_go;
10529 # patch for SWITCH/CASE to retain one-line case/when blocks
10530 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
10532 # Note: cannot use inext_index_to_go[] here because that array
10533 # is still being constructed.
10534 $i_start = $index_max_forced_break + 1;
10535 if ( $types_to_go[$i_start] eq 'b' ) {
10538 unless ( $tokens_to_go[$i_start] eq $block_type ) {
10547 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
10551 # see if length is too long to even start
10552 if ( $pos > maximum_line_length($i_start) ) {
10556 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
10558 # old whitespace could be arbitrarily large, so don't use it
10559 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
10560 else { $pos += rtoken_length($i) }
10562 # Return false result if we exceed the maximum line length,
10563 if ( $pos > maximum_line_length($i_start) ) {
10567 # or encounter another opening brace before finding the closing brace.
10568 elsif ($$rtokens[$i] eq '{'
10569 && $$rtoken_type[$i] eq '{'
10570 && $$rblock_type[$i] )
10575 # if we find our closing brace..
10576 elsif ($$rtokens[$i] eq '}'
10577 && $$rtoken_type[$i] eq '}'
10578 && $$rblock_type[$i] )
10581 # be sure any trailing comment also fits on the line
10583 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
10585 # Patch for one-line sort/map/grep/eval blocks with side comments:
10586 # We will ignore the side comment length for sort/map/grep/eval
10587 # because this can lead to statements which change every time
10588 # perltidy is run. Here is an example from Denis Moskowitz which
10589 # oscillates between these two states without this patch:
10592 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10596 ## $_->foo ne 'bar'
10597 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10601 # When the first line is input it gets broken apart by the main
10602 # line break logic in sub print_line_of_tokens.
10603 # When the second line is input it gets recombined by
10604 # print_line_of_tokens and passed to the output routines. The
10605 # output routines (set_continuation_breaks) do not break it apart
10606 # because the bond strengths are set to the highest possible value
10607 # for grep/map/eval/sort blocks, so the first version gets output.
10608 # It would be possible to fix this by changing bond strengths,
10609 # but they are high to prevent errors in older versions of perl.
10611 if ( $$rtoken_type[$i_nonblank] eq '#'
10612 && !$is_sort_map_grep{$block_type} )
10615 $pos += rtoken_length($i_nonblank);
10617 if ( $i_nonblank > $i + 1 ) {
10619 # source whitespace could be anything, assume
10620 # at least one space before the hash on output
10621 if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 }
10622 else { $pos += rtoken_length( $i + 1 ) }
10625 if ( $pos >= maximum_line_length($i_start) ) {
10630 # ok, it's a one-line block
10631 create_one_line_block( $i_start, 20 );
10635 # just keep going for other characters
10640 # Allow certain types of new one-line blocks to form by joining
10641 # input lines. These can be safely done, but for other block types,
10642 # we keep old one-line blocks but do not form new ones. It is not
10643 # always a good idea to make as many one-line blocks as possible,
10644 # so other types are not done. The user can always use -mangle.
10645 if ( $is_sort_map_grep_eval{$block_type} ) {
10646 create_one_line_block( $i_start, 1 );
10652 sub unstore_token_to_go {
10654 # remove most recent token from output stream
10655 if ( $max_index_to_go > 0 ) {
10656 $max_index_to_go--;
10659 $max_index_to_go = UNDEFINED_INDEX;
10664 sub want_blank_line {
10666 $file_writer_object->want_blank_line();
10669 sub write_unindented_line {
10671 $file_writer_object->write_line( $_[0] );
10676 # Undo continuation indentation in certain sequences
10677 # For example, we can undo continuation indentation in sort/map/grep chains
10678 # my $dat1 = pack( "n*",
10679 # map { $_, $lookup->{$_} }
10680 # sort { $a <=> $b }
10681 # grep { $lookup->{$_} ne $default } keys %$lookup );
10682 # To align the map/sort/grep keywords like this:
10683 # my $dat1 = pack( "n*",
10684 # map { $_, $lookup->{$_} }
10685 # sort { $a <=> $b }
10686 # grep { $lookup->{$_} ne $default } keys %$lookup );
10687 my ( $ri_first, $ri_last ) = @_;
10688 my ( $line_1, $line_2, $lev_last );
10689 my $this_line_is_semicolon_terminated;
10690 my $max_line = @$ri_first - 1;
10692 # looking at each line of this batch..
10693 # We are looking at leading tokens and looking for a sequence
10694 # all at the same level and higher level than enclosing lines.
10695 foreach my $line ( 0 .. $max_line ) {
10697 my $ibeg = $$ri_first[$line];
10698 my $lev = $levels_to_go[$ibeg];
10701 # if we have started a chain..
10704 # see if it continues..
10705 if ( $lev == $lev_last ) {
10706 if ( $types_to_go[$ibeg] eq 'k'
10707 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10710 # chain continues...
10711 # check for chain ending at end of a statement
10712 if ( $line == $max_line ) {
10714 # see of this line ends a statement
10715 my $iend = $$ri_last[$line];
10716 $this_line_is_semicolon_terminated =
10717 $types_to_go[$iend] eq ';'
10719 # with possible side comment
10720 || ( $types_to_go[$iend] eq '#'
10721 && $iend - $ibeg >= 2
10722 && $types_to_go[ $iend - 2 ] eq ';'
10723 && $types_to_go[ $iend - 1 ] eq 'b' );
10725 $line_2 = $line if ($this_line_is_semicolon_terminated);
10733 elsif ( $lev < $lev_last ) {
10735 # chain ends with previous line
10736 $line_2 = $line - 1;
10738 elsif ( $lev > $lev_last ) {
10744 # undo the continuation indentation if a chain ends
10745 if ( defined($line_2) && defined($line_1) ) {
10746 my $continuation_line_count = $line_2 - $line_1 + 1;
10747 @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10748 (0) x ($continuation_line_count);
10749 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10750 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
10755 # not in a chain yet..
10758 # look for start of a new sort/map/grep chain
10759 if ( $lev > $lev_last ) {
10760 if ( $types_to_go[$ibeg] eq 'k'
10761 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10774 # If there is a single, long parameter within parens, like this:
10776 # $self->command( "/msg "
10777 # . $infoline->chan
10778 # . " You said $1, but did you know that it's square was "
10779 # . $1 * $1 . " ?" );
10781 # we can remove the continuation indentation of the 2nd and higher lines
10782 # to achieve this effect, which is more pleasing:
10784 # $self->command("/msg "
10785 # . $infoline->chan
10786 # . " You said $1, but did you know that it's square was "
10787 # . $1 * $1 . " ?");
10789 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
10790 my $max_line = @$ri_first - 1;
10792 # must be multiple lines
10793 return unless $max_line > $line_open;
10795 my $lev_start = $levels_to_go[$i_start];
10796 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
10798 # see if all additional lines in this container have continuation
10801 my $line_1 = 1 + $line_open;
10802 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
10803 my $ibeg = $$ri_first[$n];
10804 my $iend = $$ri_last[$n];
10805 if ( $ibeg eq $closing_index ) { $n--; last }
10806 return if ( $lev_start != $levels_to_go[$ibeg] );
10807 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
10808 last if ( $closing_index <= $iend );
10811 # we can reduce the indentation of all continuation lines
10812 my $continuation_line_count = $n - $line_open;
10813 @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10814 (0) x ($continuation_line_count);
10815 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10816 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
10821 # insert $pad_spaces before token number $ipad
10822 my ( $ipad, $pad_spaces ) = @_;
10823 if ( $pad_spaces > 0 ) {
10824 $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
10826 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
10827 $tokens_to_go[$ipad] = "";
10835 $token_lengths_to_go[$ipad] += $pad_spaces;
10836 for ( my $i = $ipad ; $i <= $max_index_to_go ; $i++ ) {
10837 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
10846 @_ = qw( + - * / );
10847 @is_math_op{@_} = (1) x scalar(@_);
10850 sub set_logical_padding {
10852 # Look at a batch of lines and see if extra padding can improve the
10853 # alignment when there are certain leading operators. Here is an
10854 # example, in which some extra space is introduced before
10855 # '( $year' to make it line up with the subsequent lines:
10857 # if ( ( $Year < 1601 )
10858 # || ( $Year > 2899 )
10859 # || ( $EndYear < 1601 )
10860 # || ( $EndYear > 2899 ) )
10862 # &Error_OutOfRange;
10865 my ( $ri_first, $ri_last ) = @_;
10866 my $max_line = @$ri_first - 1;
10868 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line,
10870 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
10872 # looking at each line of this batch..
10873 foreach $line ( 0 .. $max_line - 1 ) {
10875 # see if the next line begins with a logical operator
10876 $ibeg = $$ri_first[$line];
10877 $iend = $$ri_last[$line];
10878 $ibeg_next = $$ri_first[ $line + 1 ];
10879 $tok_next = $tokens_to_go[$ibeg_next];
10880 $type_next = $types_to_go[$ibeg_next];
10882 $has_leading_op_next = ( $tok_next =~ /^\w/ )
10883 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
10884 : $is_chain_operator{$type_next}; # and, or
10886 next unless ($has_leading_op_next);
10888 # next line must not be at lesser depth
10890 if ( $nesting_depth_to_go[$ibeg] >
10891 $nesting_depth_to_go[$ibeg_next] );
10893 # identify the token in this line to be padded on the left
10896 # handle lines at same depth...
10897 if ( $nesting_depth_to_go[$ibeg] ==
10898 $nesting_depth_to_go[$ibeg_next] )
10901 # if this is not first line of the batch ...
10904 # and we have leading operator..
10905 next if $has_leading_op;
10907 # Introduce padding if..
10908 # 1. the previous line is at lesser depth, or
10909 # 2. the previous line ends in an assignment
10910 # 3. the previous line ends in a 'return'
10911 # 4. the previous line ends in a comma
10912 # Example 1: previous line at lesser depth
10913 # if ( ( $Year < 1601 ) # <- we are here but
10914 # || ( $Year > 2899 ) # list has not yet
10915 # || ( $EndYear < 1601 ) # collapsed vertically
10916 # || ( $EndYear > 2899 ) )
10919 # Example 2: previous line ending in assignment:
10921 # $year % 4 ? 0 # <- We are here
10922 # : $year % 100 ? 1
10923 # : $year % 400 ? 0
10926 # Example 3: previous line ending in comma:
10933 # be sure levels agree (do not indent after an indented 'if')
10935 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
10937 # allow padding on first line after a comma but only if:
10938 # (1) this is line 2 and
10939 # (2) there are at more than three lines and
10940 # (3) lines 3 and 4 have the same leading operator
10941 # These rules try to prevent padding within a long
10942 # comma-separated list.
10944 if ( $types_to_go[$iendm] eq ','
10948 my $ibeg_next_next = $$ri_first[ $line + 2 ];
10949 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
10950 $ok_comma = $tok_next_next eq $tok_next;
10955 $is_assignment{ $types_to_go[$iendm] }
10957 || ( $nesting_depth_to_go[$ibegm] <
10958 $nesting_depth_to_go[$ibeg] )
10959 || ( $types_to_go[$iendm] eq 'k'
10960 && $tokens_to_go[$iendm] eq 'return' )
10963 # we will add padding before the first token
10967 # for first line of the batch..
10970 # WARNING: Never indent if first line is starting in a
10971 # continued quote, which would change the quote.
10972 next if $starting_in_quote;
10974 # if this is text after closing '}'
10975 # then look for an interior token to pad
10976 if ( $types_to_go[$ibeg] eq '}' ) {
10980 # otherwise, we might pad if it looks really good
10983 # we might pad token $ibeg, so be sure that it
10984 # is at the same depth as the next line.
10986 if ( $nesting_depth_to_go[$ibeg] !=
10987 $nesting_depth_to_go[$ibeg_next] );
10989 # We can pad on line 1 of a statement if at least 3
10990 # lines will be aligned. Otherwise, it
10991 # can look very confusing.
10993 # We have to be careful not to pad if there are too few
10994 # lines. The current rule is:
10995 # (1) in general we require at least 3 consecutive lines
10996 # with the same leading chain operator token,
10997 # (2) but an exception is that we only require two lines
10998 # with leading colons if there are no more lines. For example,
10999 # the first $i in the following snippet would get padding
11000 # by the second rule:
11002 # $i == 1 ? ( "First", "Color" )
11003 # : $i == 2 ? ( "Then", "Rarity" )
11004 # : ( "Then", "Name" );
11006 if ( $max_line > 1 ) {
11007 my $leading_token = $tokens_to_go[$ibeg_next];
11010 # never indent line 1 of a '.' series because
11011 # previous line is most likely at same level.
11012 # TODO: we should also look at the leasing_spaces
11013 # of the last output line and skip if it is same
11015 next if ( $leading_token eq '.' );
11018 foreach my $l ( 2 .. 3 ) {
11019 last if ( $line + $l > $max_line );
11020 my $ibeg_next_next = $$ri_first[ $line + $l ];
11021 if ( $tokens_to_go[$ibeg_next_next] ne
11024 $tokens_differ = 1;
11029 next if ($tokens_differ);
11030 next if ( $count < 3 && $leading_token ne ':' );
11040 # find interior token to pad if necessary
11041 if ( !defined($ipad) ) {
11043 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
11045 # find any unclosed container
11047 unless ( $type_sequence_to_go[$i]
11048 && $mate_index_to_go[$i] > $iend );
11050 # find next nonblank token to pad
11051 $ipad = $inext_to_go[$i];
11052 last if ( $ipad > $iend );
11057 # We cannot pad a leading token at the lowest level because
11058 # it could cause a bug in which the starting indentation
11059 # level is guessed incorrectly each time the code is run
11060 # though perltidy, thus causing the code to march off to
11061 # the right. For example, the following snippet would have
11064 ## ov_method mycan( $package, '(""' ), $package
11065 ## or ov_method mycan( $package, '(0+' ), $package
11066 ## or ov_method mycan( $package, '(bool' ), $package
11067 ## or ov_method mycan( $package, '(nomethod' ), $package;
11069 # If this snippet is within a block this won't happen
11070 # unless the user just processes the snippet alone within
11071 # an editor. In that case either the user will see and
11072 # fix the problem or it will be corrected next time the
11073 # entire file is processed with perltidy.
11074 next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
11076 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
11077 ## IT DID MORE HARM THAN GOOD
11079 ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
11082 ##? # do not put leading padding for just 2 lines of math
11083 ##? if ( $ipad == $ibeg
11085 ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
11086 ##? && $is_math_op{$type_next}
11087 ##? && $line + 2 <= $max_line )
11089 ##? my $ibeg_next_next = $$ri_first[ $line + 2 ];
11090 ##? my $type_next_next = $types_to_go[$ibeg_next_next];
11091 ##? next if !$is_math_op{$type_next_next};
11094 # next line must not be at greater depth
11095 my $iend_next = $$ri_last[ $line + 1 ];
11097 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
11098 $nesting_depth_to_go[$ipad] );
11100 # lines must be somewhat similar to be padded..
11101 my $inext_next = $inext_to_go[$ibeg_next];
11102 my $type = $types_to_go[$ipad];
11103 my $type_next = $types_to_go[ $ipad + 1 ];
11105 # see if there are multiple continuation lines
11106 my $logical_continuation_lines = 1;
11107 if ( $line + 2 <= $max_line ) {
11108 my $leading_token = $tokens_to_go[$ibeg_next];
11109 my $ibeg_next_next = $$ri_first[ $line + 2 ];
11110 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
11111 && $nesting_depth_to_go[$ibeg_next] eq
11112 $nesting_depth_to_go[$ibeg_next_next] )
11114 $logical_continuation_lines++;
11118 # see if leading types match
11119 my $types_match = $types_to_go[$inext_next] eq $type;
11120 my $matches_without_bang;
11122 # if first line has leading ! then compare the following token
11123 if ( !$types_match && $type eq '!' ) {
11124 $types_match = $matches_without_bang =
11125 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
11130 # either we have multiple continuation lines to follow
11131 # and we are not padding the first token
11132 ( $logical_continuation_lines > 1 && $ipad > 0 )
11140 # and keywords must match if keyword
11143 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
11149 #----------------------begin special checks--------------
11152 # A check is needed before we can make the pad.
11153 # If we are in a list with some long items, we want each
11154 # item to stand out. So in the following example, the
11155 # first line beginning with '$casefold->' would look good
11156 # padded to align with the next line, but then it
11157 # would be indented more than the last line, so we
11161 # $casefold->{code} eq '0041'
11162 # && $casefold->{status} eq 'C'
11163 # && $casefold->{mapping} eq '0061',
11168 # It would be faster, and almost as good, to use a comma
11169 # count, and not pad if comma_count > 1 and the previous
11170 # line did not end with a comma.
11174 my $ibg = $$ri_first[ $line + 1 ];
11175 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
11177 # just use simplified formula for leading spaces to avoid
11178 # needless sub calls
11179 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
11181 # look at each line beyond the next ..
11183 foreach $l ( $line + 2 .. $max_line ) {
11184 my $ibg = $$ri_first[$l];
11186 # quit looking at the end of this container
11188 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
11189 || ( $nesting_depth_to_go[$ibg] < $depth );
11191 # cannot do the pad if a later line would be
11193 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
11199 # don't pad if we end in a broken list
11200 if ( $l == $max_line ) {
11201 my $i2 = $$ri_last[$l];
11202 if ( $types_to_go[$i2] eq '#' ) {
11203 my $i1 = $$ri_first[$l];
11206 terminal_type( \@types_to_go, \@block_type_to_go,
11213 # a minus may introduce a quoted variable, and we will
11214 # add the pad only if this line begins with a bare word,
11215 # such as for the word 'Button' here:
11217 # Button => "Print letter \"~$_\"",
11218 # -command => [ sub { print "$_[0]\n" }, $_ ],
11219 # -accelerator => "Meta+$_"
11222 # On the other hand, if 'Button' is quoted, it looks best
11225 # 'Button' => "Print letter \"~$_\"",
11226 # -command => [ sub { print "$_[0]\n" }, $_ ],
11227 # -accelerator => "Meta+$_"
11229 if ( $types_to_go[$ibeg_next] eq 'm' ) {
11230 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
11233 next unless $ok_to_pad;
11235 #----------------------end special check---------------
11237 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
11238 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
11239 $pad_spaces = $length_2 - $length_1;
11241 # If the first line has a leading ! and the second does
11242 # not, then remove one space to try to align the next
11243 # leading characters, which are often the same. For example:
11245 # || $ts == $self->Holder
11246 # || $self->Holder->Type eq "Arena" )
11248 # This usually helps readability, but if there are subsequent
11249 # ! operators things will still get messed up. For example:
11251 # if ( !exists $Net::DNS::typesbyname{$qtype}
11252 # && exists $Net::DNS::classesbyname{$qtype}
11253 # && !exists $Net::DNS::classesbyname{$qclass}
11254 # && exists $Net::DNS::typesbyname{$qclass} )
11255 # We can't fix that.
11256 if ($matches_without_bang) { $pad_spaces-- }
11258 # make sure this won't change if -lp is used
11259 my $indentation_1 = $leading_spaces_to_go[$ibeg];
11260 if ( ref($indentation_1) ) {
11261 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
11262 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
11263 unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 )
11270 # we might be able to handle a pad of -1 by removing a blank
11272 if ( $pad_spaces < 0 ) {
11274 if ( $pad_spaces == -1 ) {
11275 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
11277 pad_token( $ipad - 1, $pad_spaces );
11283 # now apply any padding for alignment
11284 if ( $ipad >= 0 && $pad_spaces ) {
11286 my $length_t = total_line_length( $ibeg, $iend );
11287 if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
11289 pad_token( $ipad, $pad_spaces );
11297 $has_leading_op = $has_leading_op_next;
11298 } # end of loop over lines
11303 sub correct_lp_indentation {
11305 # When the -lp option is used, we need to make a last pass through
11306 # each line to correct the indentation positions in case they differ
11307 # from the predictions. This is necessary because perltidy uses a
11308 # predictor/corrector method for aligning with opening parens. The
11309 # predictor is usually good, but sometimes stumbles. The corrector
11310 # tries to patch things up once the actual opening paren locations
11312 my ( $ri_first, $ri_last ) = @_;
11313 my $do_not_pad = 0;
11315 # Note on flag '$do_not_pad':
11316 # We want to avoid a situation like this, where the aligner inserts
11317 # whitespace before the '=' to align it with a previous '=', because
11318 # otherwise the parens might become mis-aligned in a situation like
11319 # this, where the '=' has become aligned with the previous line,
11320 # pushing the opening '(' forward beyond where we want it.
11322 # $mkFloor::currentRoom = '';
11323 # $mkFloor::c_entry = $c->Entry(
11325 # -relief => 'sunken',
11329 # We leave it to the aligner to decide how to do this.
11331 # first remove continuation indentation if appropriate
11332 my $max_line = @$ri_first - 1;
11334 # looking at each line of this batch..
11335 my ( $ibeg, $iend );
11337 foreach $line ( 0 .. $max_line ) {
11338 $ibeg = $$ri_first[$line];
11339 $iend = $$ri_last[$line];
11341 # looking at each token in this output line..
11343 foreach $i ( $ibeg .. $iend ) {
11345 # How many space characters to place before this token
11346 # for special alignment. Actual padding is done in the
11349 # looking for next unvisited indentation item
11350 my $indentation = $leading_spaces_to_go[$i];
11351 if ( !$indentation->get_MARKED() ) {
11352 $indentation->set_MARKED(1);
11354 # looking for indentation item for which we are aligning
11355 # with parens, braces, and brackets
11356 next unless ( $indentation->get_ALIGN_PAREN() );
11358 # skip closed container on this line
11359 if ( $i > $ibeg ) {
11360 my $im = max( $ibeg, $iprev_to_go[$i] );
11361 if ( $type_sequence_to_go[$im]
11362 && $mate_index_to_go[$im] <= $iend )
11368 if ( $line == 1 && $i == $ibeg ) {
11372 # Ok, let's see what the error is and try to fix it
11374 my $predicted_pos = $indentation->get_SPACES();
11375 if ( $i > $ibeg ) {
11377 # token is mid-line - use length to previous token
11378 $actual_pos = total_line_length( $ibeg, $i - 1 );
11380 # for mid-line token, we must check to see if all
11381 # additional lines have continuation indentation,
11382 # and remove it if so. Otherwise, we do not get
11384 my $closing_index = $indentation->get_CLOSED();
11385 if ( $closing_index > $iend ) {
11386 my $ibeg_next = $$ri_first[ $line + 1 ];
11387 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
11388 undo_lp_ci( $line, $i, $closing_index, $ri_first,
11393 elsif ( $line > 0 ) {
11395 # handle case where token starts a new line;
11396 # use length of previous line
11397 my $ibegm = $$ri_first[ $line - 1 ];
11398 my $iendm = $$ri_last[ $line - 1 ];
11399 $actual_pos = total_line_length( $ibegm, $iendm );
11403 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
11407 # token is first character of first line of batch
11408 $actual_pos = $predicted_pos;
11411 my $move_right = $actual_pos - $predicted_pos;
11413 # done if no error to correct (gnu2.t)
11414 if ( $move_right == 0 ) {
11415 $indentation->set_RECOVERABLE_SPACES($move_right);
11419 # if we have not seen closure for this indentation in
11420 # this batch, we can only pass on a request to the
11422 my $closing_index = $indentation->get_CLOSED();
11424 if ( $closing_index < 0 ) {
11425 $indentation->set_RECOVERABLE_SPACES($move_right);
11429 # If necessary, look ahead to see if there is really any
11430 # leading whitespace dependent on this whitespace, and
11431 # also find the longest line using this whitespace.
11432 # Since it is always safe to move left if there are no
11433 # dependents, we only need to do this if we may have
11434 # dependent nodes or need to move right.
11436 my $right_margin = 0;
11437 my $have_child = $indentation->get_HAVE_CHILD();
11439 my %saw_indentation;
11440 my $line_count = 1;
11441 $saw_indentation{$indentation} = $indentation;
11443 if ( $have_child || $move_right > 0 ) {
11445 my $max_length = 0;
11446 if ( $i == $ibeg ) {
11447 $max_length = total_line_length( $ibeg, $iend );
11450 # look ahead at the rest of the lines of this batch..
11452 foreach $line_t ( $line + 1 .. $max_line ) {
11453 my $ibeg_t = $$ri_first[$line_t];
11454 my $iend_t = $$ri_last[$line_t];
11455 last if ( $closing_index <= $ibeg_t );
11457 # remember all different indentation objects
11458 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
11459 $saw_indentation{$indentation_t} = $indentation_t;
11462 # remember longest line in the group
11463 my $length_t = total_line_length( $ibeg_t, $iend_t );
11464 if ( $length_t > $max_length ) {
11465 $max_length = $length_t;
11468 $right_margin = maximum_line_length($ibeg) - $max_length;
11469 if ( $right_margin < 0 ) { $right_margin = 0 }
11472 my $first_line_comma_count =
11473 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
11474 my $comma_count = $indentation->get_COMMA_COUNT();
11475 my $arrow_count = $indentation->get_ARROW_COUNT();
11477 # This is a simple approximate test for vertical alignment:
11478 # if we broke just after an opening paren, brace, bracket,
11479 # and there are 2 or more commas in the first line,
11480 # and there are no '=>'s,
11481 # then we are probably vertically aligned. We could set
11482 # an exact flag in sub scan_list, but this is good
11484 my $indentation_count = keys %saw_indentation;
11485 my $is_vertically_aligned =
11487 && $first_line_comma_count > 1
11488 && $indentation_count == 1
11489 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
11491 # Make the move if possible ..
11494 # we can always move left
11497 # but we should only move right if we are sure it will
11498 # not spoil vertical alignment
11499 || ( $comma_count == 0 )
11500 || ( $comma_count > 0 && !$is_vertically_aligned )
11504 ( $move_right <= $right_margin )
11508 foreach ( keys %saw_indentation ) {
11509 $saw_indentation{$_}
11510 ->permanently_decrease_AVAILABLE_SPACES( -$move );
11514 # Otherwise, record what we want and the vertical aligner
11515 # will try to recover it.
11517 $indentation->set_RECOVERABLE_SPACES($move_right);
11522 return $do_not_pad;
11525 # flush is called to output any tokens in the pipeline, so that
11526 # an alternate source of lines can be written in the correct order
11529 destroy_one_line_block();
11530 output_line_to_go();
11531 Perl::Tidy::VerticalAligner::flush();
11534 sub reset_block_text_accumulator {
11536 # save text after 'if' and 'elsif' to append after 'else'
11537 if ($accumulating_text_for_block) {
11539 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
11540 push @{$rleading_block_if_elsif_text}, $leading_block_text;
11543 $accumulating_text_for_block = "";
11544 $leading_block_text = "";
11545 $leading_block_text_level = 0;
11546 $leading_block_text_length_exceeded = 0;
11547 $leading_block_text_line_number = 0;
11548 $leading_block_text_line_length = 0;
11551 sub set_block_text_accumulator {
11553 $accumulating_text_for_block = $tokens_to_go[$i];
11554 if ( $accumulating_text_for_block !~ /^els/ ) {
11555 $rleading_block_if_elsif_text = [];
11557 $leading_block_text = "";
11558 $leading_block_text_level = $levels_to_go[$i];
11559 $leading_block_text_line_number =
11560 $vertical_aligner_object->get_output_line_number();
11561 $leading_block_text_length_exceeded = 0;
11563 # this will contain the column number of the last character
11564 # of the closing side comment
11565 $leading_block_text_line_length =
11566 length($csc_last_label) +
11567 length($accumulating_text_for_block) +
11568 length( $rOpts->{'closing-side-comment-prefix'} ) +
11569 $leading_block_text_level * $rOpts_indent_columns + 3;
11572 sub accumulate_block_text {
11575 # accumulate leading text for -csc, ignoring any side comments
11576 if ( $accumulating_text_for_block
11577 && !$leading_block_text_length_exceeded
11578 && $types_to_go[$i] ne '#' )
11581 my $added_length = $token_lengths_to_go[$i];
11582 $added_length += 1 if $i == 0;
11583 my $new_line_length = $leading_block_text_line_length + $added_length;
11585 # we can add this text if we don't exceed some limits..
11588 # we must not have already exceeded the text length limit
11589 length($leading_block_text) <
11590 $rOpts_closing_side_comment_maximum_text
11593 # the new total line length must be below the line length limit
11594 # or the new length must be below the text length limit
11595 # (ie, we may allow one token to exceed the text length limit)
11598 maximum_line_length_for_level($leading_block_text_level)
11600 || length($leading_block_text) + $added_length <
11601 $rOpts_closing_side_comment_maximum_text
11604 # UNLESS: we are adding a closing paren before the brace we seek.
11605 # This is an attempt to avoid situations where the ... to be
11606 # added are longer than the omitted right paren, as in:
11608 # foreach my $item (@a_rather_long_variable_name_here) {
11610 # } ## end foreach my $item (@a_rather_long_variable_name_here...
11613 $tokens_to_go[$i] eq ')'
11616 $i + 1 <= $max_index_to_go
11617 && $block_type_to_go[ $i + 1 ] eq
11618 $accumulating_text_for_block
11620 || ( $i + 2 <= $max_index_to_go
11621 && $block_type_to_go[ $i + 2 ] eq
11622 $accumulating_text_for_block )
11628 # add an extra space at each newline
11629 if ( $i == 0 ) { $leading_block_text .= ' ' }
11631 # add the token text
11632 $leading_block_text .= $tokens_to_go[$i];
11633 $leading_block_text_line_length = $new_line_length;
11636 # show that text was truncated if necessary
11637 elsif ( $types_to_go[$i] ne 'b' ) {
11638 $leading_block_text_length_exceeded = 1;
11639 ## Please see file perltidy.ERR
11640 $leading_block_text .= '...';
11646 my %is_if_elsif_else_unless_while_until_for_foreach;
11650 # These block types may have text between the keyword and opening
11651 # curly. Note: 'else' does not, but must be included to allow trailing
11652 # if/elsif text to be appended.
11653 # patch for SWITCH/CASE: added 'case' and 'when'
11654 @_ = qw(if elsif else unless while until for foreach case when);
11655 @is_if_elsif_else_unless_while_until_for_foreach{@_} =
11659 sub accumulate_csc_text {
11661 # called once per output buffer when -csc is used. Accumulates
11662 # the text placed after certain closing block braces.
11663 # Defines and returns the following for this buffer:
11665 my $block_leading_text = ""; # the leading text of the last '}'
11666 my $rblock_leading_if_elsif_text;
11667 my $i_block_leading_text =
11668 -1; # index of token owning block_leading_text
11669 my $block_line_count = 100; # how many lines the block spans
11670 my $terminal_type = 'b'; # type of last nonblank token
11671 my $i_terminal = 0; # index of last nonblank token
11672 my $terminal_block_type = "";
11674 # update most recent statement label
11675 $csc_last_label = "" unless ($csc_last_label);
11676 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
11677 my $block_label = $csc_last_label;
11679 # Loop over all tokens of this batch
11680 for my $i ( 0 .. $max_index_to_go ) {
11681 my $type = $types_to_go[$i];
11682 my $block_type = $block_type_to_go[$i];
11683 my $token = $tokens_to_go[$i];
11685 # remember last nonblank token type
11686 if ( $type ne '#' && $type ne 'b' ) {
11687 $terminal_type = $type;
11688 $terminal_block_type = $block_type;
11692 my $type_sequence = $type_sequence_to_go[$i];
11693 if ( $block_type && $type_sequence ) {
11695 if ( $token eq '}' ) {
11697 # restore any leading text saved when we entered this block
11698 if ( defined( $block_leading_text{$type_sequence} ) ) {
11699 ( $block_leading_text, $rblock_leading_if_elsif_text )
11700 = @{ $block_leading_text{$type_sequence} };
11701 $i_block_leading_text = $i;
11702 delete $block_leading_text{$type_sequence};
11703 $rleading_block_if_elsif_text =
11704 $rblock_leading_if_elsif_text;
11707 if ( defined( $csc_block_label{$type_sequence} ) ) {
11708 $block_label = $csc_block_label{$type_sequence};
11709 delete $csc_block_label{$type_sequence};
11712 # if we run into a '}' then we probably started accumulating
11713 # at something like a trailing 'if' clause..no harm done.
11714 if ( $accumulating_text_for_block
11715 && $levels_to_go[$i] <= $leading_block_text_level )
11717 my $lev = $levels_to_go[$i];
11718 reset_block_text_accumulator();
11721 if ( defined( $block_opening_line_number{$type_sequence} ) )
11723 my $output_line_number =
11724 $vertical_aligner_object->get_output_line_number();
11725 $block_line_count =
11726 $output_line_number -
11727 $block_opening_line_number{$type_sequence} + 1;
11728 delete $block_opening_line_number{$type_sequence};
11732 # Error: block opening line undefined for this line..
11733 # This shouldn't be possible, but it is not a
11734 # significant problem.
11738 elsif ( $token eq '{' ) {
11741 $vertical_aligner_object->get_output_line_number();
11742 $block_opening_line_number{$type_sequence} = $line_number;
11744 # set a label for this block, except for
11745 # a bare block which already has the label
11746 # A label can only be used on the next {
11747 if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
11748 $csc_block_label{$type_sequence} = $csc_last_label;
11749 $csc_last_label = "";
11751 if ( $accumulating_text_for_block
11752 && $levels_to_go[$i] == $leading_block_text_level )
11755 if ( $accumulating_text_for_block eq $block_type ) {
11757 # save any leading text before we enter this block
11758 $block_leading_text{$type_sequence} = [
11759 $leading_block_text,
11760 $rleading_block_if_elsif_text
11762 $block_opening_line_number{$type_sequence} =
11763 $leading_block_text_line_number;
11764 reset_block_text_accumulator();
11768 # shouldn't happen, but not a serious error.
11769 # We were accumulating -csc text for block type
11770 # $accumulating_text_for_block and unexpectedly
11771 # encountered a '{' for block type $block_type.
11778 && $csc_new_statement_ok
11779 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
11780 && $token =~ /$closing_side_comment_list_pattern/o )
11782 set_block_text_accumulator($i);
11786 # note: ignoring type 'q' because of tricks being played
11787 # with 'q' for hanging side comments
11788 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
11789 $csc_new_statement_ok =
11790 ( $block_type || $type eq 'J' || $type eq ';' );
11793 && $accumulating_text_for_block
11794 && $levels_to_go[$i] == $leading_block_text_level )
11796 reset_block_text_accumulator();
11799 accumulate_block_text($i);
11804 # Treat an 'else' block specially by adding preceding 'if' and
11805 # 'elsif' text. Otherwise, the 'end else' is not helpful,
11806 # especially for cuddled-else formatting.
11807 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
11808 $block_leading_text =
11809 make_else_csc_text( $i_terminal, $terminal_block_type,
11810 $block_leading_text, $rblock_leading_if_elsif_text );
11813 # if this line ends in a label then remember it for the next pass
11814 $csc_last_label = "";
11815 if ( $terminal_type eq 'J' ) {
11816 $csc_last_label = $tokens_to_go[$i_terminal];
11819 return ( $terminal_type, $i_terminal, $i_block_leading_text,
11820 $block_leading_text, $block_line_count, $block_label );
11824 sub make_else_csc_text {
11826 # create additional -csc text for an 'else' and optionally 'elsif',
11827 # depending on the value of switch
11828 # $rOpts_closing_side_comment_else_flag:
11830 # = 0 add 'if' text to trailing else
11831 # = 1 same as 0 plus:
11832 # add 'if' to 'elsif's if can fit in line length
11833 # add last 'elsif' to trailing else if can fit in one line
11834 # = 2 same as 1 but do not check if exceed line length
11836 # $rif_elsif_text = a reference to a list of all previous closing
11837 # side comments created for this if block
11839 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
11840 my $csc_text = $block_leading_text;
11842 if ( $block_type eq 'elsif'
11843 && $rOpts_closing_side_comment_else_flag == 0 )
11848 my $count = @{$rif_elsif_text};
11849 return $csc_text unless ($count);
11851 my $if_text = '[ if' . $rif_elsif_text->[0];
11853 # always show the leading 'if' text on 'else'
11854 if ( $block_type eq 'else' ) {
11855 $csc_text .= $if_text;
11858 # see if that's all
11859 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
11863 my $last_elsif_text = "";
11864 if ( $count > 1 ) {
11865 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
11866 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
11869 # tentatively append one more item
11870 my $saved_text = $csc_text;
11871 if ( $block_type eq 'else' ) {
11872 $csc_text .= $last_elsif_text;
11875 $csc_text .= ' ' . $if_text;
11878 # all done if no length checks requested
11879 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
11883 # undo it if line length exceeded
11885 length($csc_text) +
11886 length($block_type) +
11887 length( $rOpts->{'closing-side-comment-prefix'} ) +
11888 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
11889 if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
11890 $csc_text = $saved_text;
11895 { # sub balance_csc_text
11910 sub balance_csc_text {
11912 # Append characters to balance a closing side comment so that editors
11913 # such as vim can correctly jump through code.
11915 # input = ## end foreach my $foo ( sort { $b ...
11916 # output = ## end foreach my $foo ( sort { $b ...})
11918 # NOTE: This routine does not currently filter out structures within
11919 # quoted text because the bounce algorithms in text editors do not
11920 # necessarily do this either (a version of vim was checked and
11921 # did not do this).
11923 # Some complex examples which will cause trouble for some editors:
11924 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
11925 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
11926 # if ( $1 eq '{' ) {
11927 # test file test1/braces.pl has many such examples.
11931 # loop to examine characters one-by-one, RIGHT to LEFT and
11932 # build a balancing ending, LEFT to RIGHT.
11933 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
11935 my $char = substr( $csc, $pos, 1 );
11937 # ignore everything except structural characters
11938 next unless ( $matching_char{$char} );
11940 # pop most recently appended character
11941 my $top = chop($csc);
11943 # push it back plus the mate to the newest character
11944 # unless they balance each other.
11945 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
11948 # return the balanced string
11953 sub add_closing_side_comment {
11955 # add closing side comments after closing block braces if -csc used
11956 my $cscw_block_comment;
11958 #---------------------------------------------------------------
11959 # Step 1: loop through all tokens of this line to accumulate
11960 # the text needed to create the closing side comments. Also see
11961 # how the line ends.
11962 #---------------------------------------------------------------
11964 my ( $terminal_type, $i_terminal, $i_block_leading_text,
11965 $block_leading_text, $block_line_count, $block_label )
11966 = accumulate_csc_text();
11968 #---------------------------------------------------------------
11969 # Step 2: make the closing side comment if this ends a block
11970 #---------------------------------------------------------------
11971 my $have_side_comment = $i_terminal != $max_index_to_go;
11973 # if this line might end in a block closure..
11975 $terminal_type eq '}'
11980 # the block is long enough
11981 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
11983 # or there is an existing comment to check
11984 || ( $have_side_comment
11985 && $rOpts->{'closing-side-comment-warnings'} )
11988 # .. and if this is one of the types of interest
11989 && $block_type_to_go[$i_terminal] =~
11990 /$closing_side_comment_list_pattern/o
11992 # .. but not an anonymous sub
11993 # These are not normally of interest, and their closing braces are
11994 # often followed by commas or semicolons anyway. This also avoids
11995 # possible erratic output due to line numbering inconsistencies
11996 # in the cases where their closing braces terminate a line.
11997 && $block_type_to_go[$i_terminal] ne 'sub'
11999 # ..and the corresponding opening brace must is not in this batch
12000 # (because we do not need to tag one-line blocks, although this
12001 # should also be caught with a positive -csci value)
12002 && $mate_index_to_go[$i_terminal] < 0
12007 # this is the last token (line doesn't have a side comment)
12008 !$have_side_comment
12010 # or the old side comment is a closing side comment
12011 || $tokens_to_go[$max_index_to_go] =~
12012 /$closing_side_comment_prefix_pattern/o
12017 # then make the closing side comment text
12018 if ($block_label) { $block_label .= " " }
12020 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
12022 # append any extra descriptive text collected above
12023 if ( $i_block_leading_text == $i_terminal ) {
12024 $token .= $block_leading_text;
12027 $token = balance_csc_text($token)
12028 if $rOpts->{'closing-side-comments-balanced'};
12030 $token =~ s/\s*$//; # trim any trailing whitespace
12032 # handle case of existing closing side comment
12033 if ($have_side_comment) {
12035 # warn if requested and tokens differ significantly
12036 if ( $rOpts->{'closing-side-comment-warnings'} ) {
12037 my $old_csc = $tokens_to_go[$max_index_to_go];
12038 my $new_csc = $token;
12039 $new_csc =~ s/\s+//g; # trim all whitespace
12040 $old_csc =~ s/\s+//g; # trim all whitespace
12041 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
12042 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
12043 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
12044 my $new_trailing_dots = $1;
12045 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
12047 # Patch to handle multiple closing side comments at
12048 # else and elsif's. These have become too complicated
12049 # to check, so if we see an indication of
12050 # '[ if' or '[ # elsif', then assume they were made
12052 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
12053 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
12055 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
12056 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
12059 # if old comment is contained in new comment,
12060 # only compare the common part.
12061 if ( length($new_csc) > length($old_csc) ) {
12062 $new_csc = substr( $new_csc, 0, length($old_csc) );
12065 # if the new comment is shorter and has been limited,
12066 # only compare the common part.
12067 if ( length($new_csc) < length($old_csc)
12068 && $new_trailing_dots )
12070 $old_csc = substr( $old_csc, 0, length($new_csc) );
12073 # any remaining difference?
12074 if ( $new_csc ne $old_csc ) {
12076 # just leave the old comment if we are below the threshold
12077 # for creating side comments
12078 if ( $block_line_count <
12079 $rOpts->{'closing-side-comment-interval'} )
12084 # otherwise we'll make a note of it
12088 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
12091 # save the old side comment in a new trailing block comment
12092 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
12095 $cscw_block_comment =
12096 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
12101 # No differences.. we can safely delete old comment if we
12102 # are below the threshold
12103 if ( $block_line_count <
12104 $rOpts->{'closing-side-comment-interval'} )
12107 unstore_token_to_go()
12108 if ( $types_to_go[$max_index_to_go] eq '#' );
12109 unstore_token_to_go()
12110 if ( $types_to_go[$max_index_to_go] eq 'b' );
12115 # switch to the new csc (unless we deleted it!)
12116 $tokens_to_go[$max_index_to_go] = $token if $token;
12119 # handle case of NO existing closing side comment
12122 # insert the new side comment into the output token stream
12124 my $block_type = '';
12125 my $type_sequence = '';
12126 my $container_environment =
12127 $container_environment_to_go[$max_index_to_go];
12128 my $level = $levels_to_go[$max_index_to_go];
12129 my $slevel = $nesting_depth_to_go[$max_index_to_go];
12130 my $no_internal_newlines = 0;
12132 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
12133 my $ci_level = $ci_levels_to_go[$max_index_to_go];
12134 my $in_continued_quote = 0;
12136 # first insert a blank token
12137 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
12139 # then the side comment
12140 insert_new_token_to_go( $token, $type, $slevel,
12141 $no_internal_newlines );
12144 return $cscw_block_comment;
12147 sub previous_nonblank_token {
12151 return "" if ( $im < 0 );
12152 if ( $types_to_go[$im] eq 'b' ) { $im--; }
12153 return "" if ( $im < 0 );
12154 $name = $tokens_to_go[$im];
12156 # prepend any sub name to an isolated -> to avoid unwanted alignments
12157 # [test case is test8/penco.pl]
12158 if ( $name eq '->' ) {
12160 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
12161 $name = $tokens_to_go[$im] . $name;
12167 sub send_lines_to_vertical_aligner {
12169 my ( $ri_first, $ri_last, $do_not_pad ) = @_;
12171 my $rindentation_list = [0]; # ref to indentations for each line
12173 # define the array @matching_token_to_go for the output tokens
12174 # which will be non-blank for each special token (such as =>)
12175 # for which alignment is required.
12176 set_vertical_alignment_markers( $ri_first, $ri_last );
12178 # flush if necessary to avoid unwanted alignment
12179 my $must_flush = 0;
12180 if ( @$ri_first > 1 ) {
12182 # flush before a long if statement
12183 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
12188 Perl::Tidy::VerticalAligner::flush();
12191 undo_ci( $ri_first, $ri_last );
12193 set_logical_padding( $ri_first, $ri_last );
12195 # loop to prepare each line for shipment
12196 my $n_last_line = @$ri_first - 1;
12198 for my $n ( 0 .. $n_last_line ) {
12199 my $ibeg = $$ri_first[$n];
12200 my $iend = $$ri_last[$n];
12202 my ( $rtokens, $rfields, $rpatterns ) =
12203 make_alignment_patterns( $ibeg, $iend );
12205 # Set flag to show how much level changes between this line
12206 # and the next line, if we have it.
12208 if ( $n < $n_last_line ) {
12209 my $ibegp = $$ri_first[ $n + 1 ];
12210 $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
12213 my ( $indentation, $lev, $level_end, $terminal_type,
12214 $is_semicolon_terminated, $is_outdented_line )
12215 = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
12216 $ri_first, $ri_last, $rindentation_list, $ljump );
12218 # we will allow outdenting of long lines..
12219 my $outdent_long_lines = (
12221 # which are long quotes, if allowed
12222 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
12224 # which are long block comments, if allowed
12226 $types_to_go[$ibeg] eq '#'
12227 && $rOpts->{'outdent-long-comments'}
12229 # but not if this is a static block comment
12230 && !$is_static_block_comment
12235 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
12237 my $rvertical_tightness_flags =
12238 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
12239 $ri_first, $ri_last );
12241 # flush an outdented line to avoid any unwanted vertical alignment
12242 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
12244 # Set a flag at the final ':' of a ternary chain to request
12245 # vertical alignment of the final term. Here is a
12246 # slightly complex example:
12248 # $self->{_text} = (
12250 # : $type eq 'item' ? "the $section entry"
12251 # : "the section on $section"
12255 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
12256 # : ' elsewhere in this document'
12259 my $is_terminal_ternary = 0;
12260 if ( $tokens_to_go[$ibeg] eq ':'
12261 || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
12263 my $last_leading_type = ":";
12265 my $iprev = $$ri_first[ $n - 1 ];
12266 $last_leading_type = $types_to_go[$iprev];
12268 if ( $terminal_type ne ';'
12269 && $n_last_line > $n
12270 && $level_end == $lev )
12272 my $inext = $$ri_first[ $n + 1 ];
12273 $level_end = $levels_to_go[$inext];
12274 $terminal_type = $types_to_go[$inext];
12277 $is_terminal_ternary = $last_leading_type eq ':'
12278 && ( ( $terminal_type eq ';' && $level_end <= $lev )
12279 || ( $terminal_type ne ':' && $level_end < $lev ) )
12281 # the terminal term must not contain any ternary terms, as in
12283 # $Is_MSWin32 ? ".\\echo$$"
12284 # : $Is_MacOS ? ":echo$$"
12285 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
12287 && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ];
12290 # send this new line down the pipe
12291 my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
12292 Perl::Tidy::VerticalAligner::valign_input(
12299 $forced_breakpoint_to_go[$iend] || $in_comma_list,
12300 $outdent_long_lines,
12301 $is_terminal_ternary,
12302 $is_semicolon_terminated,
12304 $rvertical_tightness_flags,
12308 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
12310 # flush an outdented line to avoid any unwanted vertical alignment
12311 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
12315 # Set flag indicating if this line ends in an opening
12316 # token and is very short, so that a blank line is not
12317 # needed if the subsequent line is a comment.
12318 # Examples of what we are looking for:
12324 $last_output_short_opening_token
12326 # line ends in opening token
12327 = $types_to_go[$iend] =~ /^[\{\(\[L]$/
12331 # line has either single opening token
12334 # or is a single token followed by opening token.
12335 # Note that sub identifiers have blanks like 'sub doit'
12336 || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
12339 # and limit total to 10 character widths
12340 && token_sequence_length( $ibeg, $iend ) <= 10;
12342 } # end of loop to output each line
12344 # remember indentation of lines containing opening containers for
12345 # later use by sub set_adjusted_indentation
12346 save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
12349 { # begin make_alignment_patterns
12351 my %block_type_map;
12356 # map related block names into a common name to
12358 %block_type_map = (
12369 # map certain keywords to the same 'if' class to align
12370 # long if/elsif sequences. [elsif.pl]
12376 'default' => 'given',
12377 'case' => 'switch',
12379 # treat an 'undef' similar to numbers and quotes
12384 sub make_alignment_patterns {
12386 # Here we do some important preliminary work for the
12387 # vertical aligner. We create three arrays for one
12388 # output line. These arrays contain strings that can
12389 # be tested by the vertical aligner to see if
12390 # consecutive lines can be aligned vertically.
12392 # The three arrays are indexed on the vertical
12393 # alignment fields and are:
12394 # @tokens - a list of any vertical alignment tokens for this line.
12395 # These are tokens, such as '=' '&&' '#' etc which
12396 # we want to might align vertically. These are
12397 # decorated with various information such as
12398 # nesting depth to prevent unwanted vertical
12399 # alignment matches.
12400 # @fields - the actual text of the line between the vertical alignment
12402 # @patterns - a modified list of token types, one for each alignment
12403 # field. These should normally each match before alignment is
12404 # allowed, even when the alignment tokens match.
12405 my ( $ibeg, $iend ) = @_;
12409 my $i_start = $ibeg;
12413 my @container_name = ("");
12414 my @multiple_comma_arrows = (undef);
12416 my $j = 0; # field index
12419 for $i ( $ibeg .. $iend ) {
12421 # Keep track of containers balanced on this line only.
12422 # These are used below to prevent unwanted cross-line alignments.
12423 # Unbalanced containers already avoid aligning across
12424 # container boundaries.
12425 if ( $tokens_to_go[$i] eq '(' ) {
12427 # if container is balanced on this line...
12428 my $i_mate = $mate_index_to_go[$i];
12429 if ( $i_mate > $i && $i_mate <= $iend ) {
12431 my $seqno = $type_sequence_to_go[$i];
12432 my $count = comma_arrow_count($seqno);
12433 $multiple_comma_arrows[$depth] = $count && $count > 1;
12435 # Append the previous token name to make the container name
12436 # more unique. This name will also be given to any commas
12437 # within this container, and it helps avoid undesirable
12438 # alignments of different types of containers.
12439 my $name = previous_nonblank_token($i);
12441 $container_name[$depth] = "+" . $name;
12443 # Make the container name even more unique if necessary.
12444 # If we are not vertically aligning this opening paren,
12445 # append a character count to avoid bad alignment because
12446 # it usually looks bad to align commas within containers
12447 # for which the opening parens do not align. Here
12448 # is an example very BAD alignment of commas (because
12449 # the atan2 functions are not all aligned):
12451 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
12452 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
12453 # $X * atan2( $X, 1 ) -
12454 # $Y * atan2( $Y, 1 );
12456 # On the other hand, it is usually okay to align commas if
12457 # opening parens align, such as:
12458 # glVertex3d( $cx + $s * $xs, $cy, $z );
12459 # glVertex3d( $cx, $cy + $s * $ys, $z );
12460 # glVertex3d( $cx - $s * $xs, $cy, $z );
12461 # glVertex3d( $cx, $cy - $s * $ys, $z );
12463 # To distinguish between these situations, we will
12464 # append the length of the line from the previous matching
12465 # token, or beginning of line, to the function name. This
12466 # will allow the vertical aligner to reject undesirable
12469 # if we are not aligning on this paren...
12470 if ( $matching_token_to_go[$i] eq '' ) {
12472 # Sum length from previous alignment, or start of line.
12474 ( $i_start == $ibeg )
12475 ? total_line_length( $i_start, $i - 1 )
12476 : token_sequence_length( $i_start, $i - 1 );
12478 # tack length onto the container name to make unique
12479 $container_name[$depth] .= "-" . $len;
12483 elsif ( $tokens_to_go[$i] eq ')' ) {
12484 $depth-- if $depth > 0;
12487 # if we find a new synchronization token, we are done with
12489 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
12491 my $tok = my $raw_tok = $matching_token_to_go[$i];
12493 # make separators in different nesting depths unique
12494 # by appending the nesting depth digit.
12495 if ( $raw_tok ne '#' ) {
12496 $tok .= "$nesting_depth_to_go[$i]";
12499 # also decorate commas with any container name to avoid
12500 # unwanted cross-line alignments.
12501 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
12502 if ( $container_name[$depth] ) {
12503 $tok .= $container_name[$depth];
12507 # Patch to avoid aligning leading and trailing if, unless.
12508 # Mark trailing if, unless statements with container names.
12509 # This makes them different from leading if, unless which
12510 # are not so marked at present. If we ever need to name
12511 # them too, we could use ci to distinguish them.
12512 # Example problem to avoid:
12513 # return ( 2, "DBERROR" )
12514 # if ( $retval == 2 );
12515 # if ( scalar @_ ) {
12516 # my ( $a, $b, $c, $d, $e, $f ) = @_;
12518 if ( $raw_tok eq '(' ) {
12519 my $ci = $ci_levels_to_go[$ibeg];
12520 if ( $container_name[$depth] =~ /^\+(if|unless)/
12523 $tok .= $container_name[$depth];
12527 # Decorate block braces with block types to avoid
12528 # unwanted alignments such as the following:
12529 # foreach ( @{$routput_array} ) { $fh->print($_) }
12530 # eval { $fh->close() };
12531 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
12532 my $block_type = $block_type_to_go[$i];
12534 # map certain related block types to allow
12535 # else blocks to align
12536 $block_type = $block_type_map{$block_type}
12537 if ( defined( $block_type_map{$block_type} ) );
12539 # remove sub names to allow one-line sub braces to align
12540 # regardless of name
12541 if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
12543 # allow all control-type blocks to align
12544 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
12546 $tok .= $block_type;
12549 # concatenate the text of the consecutive tokens to form
12552 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
12554 # store the alignment token for this field
12555 push( @tokens, $tok );
12557 # get ready for the next batch
12560 $patterns[$j] = "";
12563 # continue accumulating tokens
12564 # handle non-keywords..
12565 if ( $types_to_go[$i] ne 'k' ) {
12566 my $type = $types_to_go[$i];
12568 # Mark most things before arrows as a quote to
12569 # get them to line up. Testfile: mixed.pl.
12570 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
12571 my $next_type = $types_to_go[ $i + 1 ];
12572 my $i_next_nonblank =
12573 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12575 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
12578 # Patch to ignore leading minus before words,
12579 # by changing pattern 'mQ' into just 'Q',
12580 # so that we can align things like this:
12581 # Button => "Print letter \"~$_\"",
12582 # -command => [ sub { print "$_[0]\n" }, $_ ],
12583 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
12587 # patch to make numbers and quotes align
12588 if ( $type eq 'n' ) { $type = 'Q' }
12590 # patch to ignore any ! in patterns
12591 if ( $type eq '!' ) { $type = '' }
12593 $patterns[$j] .= $type;
12596 # for keywords we have to use the actual text
12599 my $tok = $tokens_to_go[$i];
12601 # but map certain keywords to a common string to allow
12603 $tok = $keyword_map{$tok}
12604 if ( defined( $keyword_map{$tok} ) );
12605 $patterns[$j] .= $tok;
12609 # done with this line .. join text of tokens to make the last field
12610 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
12611 return ( \@tokens, \@fields, \@patterns );
12614 } # end make_alignment_patterns
12616 { # begin unmatched_indexes
12618 # closure to keep track of unbalanced containers.
12619 # arrays shared by the routines in this block:
12620 my @unmatched_opening_indexes_in_this_batch;
12621 my @unmatched_closing_indexes_in_this_batch;
12622 my %comma_arrow_count;
12624 sub is_unbalanced_batch {
12625 @unmatched_opening_indexes_in_this_batch +
12626 @unmatched_closing_indexes_in_this_batch;
12629 sub comma_arrow_count {
12631 return $comma_arrow_count{$seqno};
12634 sub match_opening_and_closing_tokens {
12636 # Match up indexes of opening and closing braces, etc, in this batch.
12637 # This has to be done after all tokens are stored because unstoring
12638 # of tokens would otherwise cause trouble.
12640 @unmatched_opening_indexes_in_this_batch = ();
12641 @unmatched_closing_indexes_in_this_batch = ();
12642 %comma_arrow_count = ();
12643 my $comma_arrow_count_contained = 0;
12645 my ( $i, $i_mate, $token );
12646 foreach $i ( 0 .. $max_index_to_go ) {
12647 if ( $type_sequence_to_go[$i] ) {
12648 $token = $tokens_to_go[$i];
12649 if ( $token =~ /^[\(\[\{\?]$/ ) {
12650 push @unmatched_opening_indexes_in_this_batch, $i;
12652 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
12654 $i_mate = pop @unmatched_opening_indexes_in_this_batch;
12655 if ( defined($i_mate) && $i_mate >= 0 ) {
12656 if ( $type_sequence_to_go[$i_mate] ==
12657 $type_sequence_to_go[$i] )
12659 $mate_index_to_go[$i] = $i_mate;
12660 $mate_index_to_go[$i_mate] = $i;
12661 my $seqno = $type_sequence_to_go[$i];
12662 if ( $comma_arrow_count{$seqno} ) {
12663 $comma_arrow_count_contained +=
12664 $comma_arrow_count{$seqno};
12668 push @unmatched_opening_indexes_in_this_batch,
12670 push @unmatched_closing_indexes_in_this_batch, $i;
12674 push @unmatched_closing_indexes_in_this_batch, $i;
12678 elsif ( $tokens_to_go[$i] eq '=>' ) {
12679 if (@unmatched_opening_indexes_in_this_batch) {
12680 my $j = $unmatched_opening_indexes_in_this_batch[-1];
12681 my $seqno = $type_sequence_to_go[$j];
12682 $comma_arrow_count{$seqno}++;
12686 return $comma_arrow_count_contained;
12689 sub save_opening_indentation {
12691 # This should be called after each batch of tokens is output. It
12692 # saves indentations of lines of all unmatched opening tokens.
12693 # These will be used by sub get_opening_indentation.
12695 my ( $ri_first, $ri_last, $rindentation_list ) = @_;
12697 # we no longer need indentations of any saved indentations which
12698 # are unmatched closing tokens in this batch, because we will
12699 # never encounter them again. So we can delete them to keep
12700 # the hash size down.
12701 foreach (@unmatched_closing_indexes_in_this_batch) {
12702 my $seqno = $type_sequence_to_go[$_];
12703 delete $saved_opening_indentation{$seqno};
12706 # we need to save indentations of any unmatched opening tokens
12707 # in this batch because we may need them in a subsequent batch.
12708 foreach (@unmatched_opening_indexes_in_this_batch) {
12709 my $seqno = $type_sequence_to_go[$_];
12710 $saved_opening_indentation{$seqno} = [
12711 lookup_opening_indentation(
12712 $_, $ri_first, $ri_last, $rindentation_list
12717 } # end unmatched_indexes
12719 sub get_opening_indentation {
12721 # get the indentation of the line which output the opening token
12722 # corresponding to a given closing token in the current output batch.
12725 # $i_closing - index in this line of a closing token ')' '}' or ']'
12727 # $ri_first - reference to list of the first index $i for each output
12728 # line in this batch
12729 # $ri_last - reference to list of the last index $i for each output line
12731 # $rindentation_list - reference to a list containing the indentation
12732 # used for each line.
12735 # -the indentation of the line which contained the opening token
12736 # which matches the token at index $i_opening
12737 # -and its offset (number of columns) from the start of the line
12739 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
12741 # first, see if the opening token is in the current batch
12742 my $i_opening = $mate_index_to_go[$i_closing];
12743 my ( $indent, $offset, $is_leading, $exists );
12745 if ( $i_opening >= 0 ) {
12747 # it is..look up the indentation
12748 ( $indent, $offset, $is_leading ) =
12749 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
12750 $rindentation_list );
12753 # if not, it should have been stored in the hash by a previous batch
12755 my $seqno = $type_sequence_to_go[$i_closing];
12757 if ( $saved_opening_indentation{$seqno} ) {
12758 ( $indent, $offset, $is_leading ) =
12759 @{ $saved_opening_indentation{$seqno} };
12762 # some kind of serious error
12763 # (example is badfile.t)
12772 # if no sequence number it must be an unbalanced container
12780 return ( $indent, $offset, $is_leading, $exists );
12783 sub lookup_opening_indentation {
12785 # get the indentation of the line in the current output batch
12786 # which output a selected opening token
12789 # $i_opening - index of an opening token in the current output batch
12790 # whose line indentation we need
12791 # $ri_first - reference to list of the first index $i for each output
12792 # line in this batch
12793 # $ri_last - reference to list of the last index $i for each output line
12795 # $rindentation_list - reference to a list containing the indentation
12796 # used for each line. (NOTE: the first slot in
12797 # this list is the last returned line number, and this is
12798 # followed by the list of indentations).
12801 # -the indentation of the line which contained token $i_opening
12802 # -and its offset (number of columns) from the start of the line
12804 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
12806 my $nline = $rindentation_list->[0]; # line number of previous lookup
12808 # reset line location if necessary
12809 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
12811 # find the correct line
12812 unless ( $i_opening > $ri_last->[-1] ) {
12813 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
12816 # error - token index is out of bounds - shouldn't happen
12819 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
12821 report_definite_bug();
12822 $nline = $#{$ri_last};
12825 $rindentation_list->[0] =
12826 $nline; # save line number to start looking next call
12827 my $ibeg = $ri_start->[$nline];
12828 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
12829 my $is_leading = ( $ibeg == $i_opening );
12830 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
12834 my %is_if_elsif_else_unless_while_until_for_foreach;
12838 # These block types may have text between the keyword and opening
12839 # curly. Note: 'else' does not, but must be included to allow trailing
12840 # if/elsif text to be appended.
12841 # patch for SWITCH/CASE: added 'case' and 'when'
12842 @_ = qw(if elsif else unless while until for foreach case when);
12843 @is_if_elsif_else_unless_while_until_for_foreach{@_} =
12847 sub set_adjusted_indentation {
12849 # This routine has the final say regarding the actual indentation of
12850 # a line. It starts with the basic indentation which has been
12851 # defined for the leading token, and then takes into account any
12852 # options that the user has set regarding special indenting and
12855 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
12856 $rindentation_list, $level_jump )
12859 # we need to know the last token of this line
12860 my ( $terminal_type, $i_terminal ) =
12861 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
12863 my $is_outdented_line = 0;
12865 my $is_semicolon_terminated = $terminal_type eq ';'
12866 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
12868 ##########################################################
12869 # Section 1: set a flag and a default indentation
12871 # Most lines are indented according to the initial token.
12872 # But it is common to outdent to the level just after the
12873 # terminal token in certain cases...
12874 # adjust_indentation flag:
12875 # 0 - do not adjust
12877 # 2 - vertically align with opening token
12879 ##########################################################
12880 my $adjust_indentation = 0;
12881 my $default_adjust_indentation = $adjust_indentation;
12884 $opening_indentation, $opening_offset,
12885 $is_leading, $opening_exists
12888 # if we are at a closing token of some type..
12889 if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
12891 # get the indentation of the line containing the corresponding
12894 $opening_indentation, $opening_offset,
12895 $is_leading, $opening_exists
12897 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12898 $rindentation_list );
12900 # First set the default behavior:
12903 # default behavior is to outdent closing lines
12904 # of the form: "); }; ]; )->xxx;"
12905 $is_semicolon_terminated
12907 # and 'cuddled parens' of the form: ")->pack("
12909 $terminal_type eq '('
12910 && $types_to_go[$ibeg] eq ')'
12911 && ( $nesting_depth_to_go[$iend] + 1 ==
12912 $nesting_depth_to_go[$ibeg] )
12915 # and when the next line is at a lower indentation level
12916 # PATCH: and only if the style allows undoing continuation
12917 # for all closing token types. We should really wait until
12918 # the indentation of the next line is known and then make
12919 # a decision, but that would require another pass.
12920 || ( $level_jump < 0 && !$some_closing_token_indentation )
12923 $adjust_indentation = 1;
12926 # outdent something like '),'
12928 $terminal_type eq ','
12930 # allow just one character before the comma
12931 && $i_terminal == $ibeg + 1
12933 # require LIST environment; otherwise, we may outdent too much -
12934 # this can happen in calls without parentheses (overload.t);
12935 && $container_environment_to_go[$i_terminal] eq 'LIST'
12938 $adjust_indentation = 1;
12941 # undo continuation indentation of a terminal closing token if
12942 # it is the last token before a level decrease. This will allow
12943 # a closing token to line up with its opening counterpart, and
12944 # avoids a indentation jump larger than 1 level.
12945 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
12946 && $i_terminal == $ibeg )
12948 my $ci = $ci_levels_to_go[$ibeg];
12949 my $lev = $levels_to_go[$ibeg];
12950 my $next_type = $types_to_go[ $ibeg + 1 ];
12951 my $i_next_nonblank =
12952 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
12953 if ( $i_next_nonblank <= $max_index_to_go
12954 && $levels_to_go[$i_next_nonblank] < $lev )
12956 $adjust_indentation = 1;
12960 # YVES patch 1 of 2:
12961 # Undo ci of line with leading closing eval brace,
12962 # but not beyond the indention of the line with
12963 # the opening brace.
12964 if ( $block_type_to_go[$ibeg] eq 'eval'
12965 && !$rOpts->{'line-up-parentheses'}
12966 && !$rOpts->{'indent-closing-brace'} )
12969 $opening_indentation, $opening_offset,
12970 $is_leading, $opening_exists
12972 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12973 $rindentation_list );
12974 my $indentation = $leading_spaces_to_go[$ibeg];
12975 if ( defined($opening_indentation)
12976 && $indentation > $opening_indentation )
12978 $adjust_indentation = 1;
12982 $default_adjust_indentation = $adjust_indentation;
12984 # Now modify default behavior according to user request:
12985 # handle option to indent non-blocks of the form ); }; ];
12986 # But don't do special indentation to something like ')->pack('
12987 if ( !$block_type_to_go[$ibeg] ) {
12988 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
12990 if ( $i_terminal <= $ibeg + 1
12991 || $is_semicolon_terminated )
12993 $adjust_indentation = 2;
12996 $adjust_indentation = 0;
12999 elsif ( $cti == 2 ) {
13000 if ($is_semicolon_terminated) {
13001 $adjust_indentation = 3;
13004 $adjust_indentation = 0;
13007 elsif ( $cti == 3 ) {
13008 $adjust_indentation = 3;
13012 # handle option to indent blocks
13015 $rOpts->{'indent-closing-brace'}
13017 $i_terminal == $ibeg # isolated terminal '}'
13018 || $is_semicolon_terminated
13022 $adjust_indentation = 3;
13027 # if at ');', '};', '>;', and '];' of a terminal qw quote
13028 elsif ($$rpatterns[0] =~ /^qb*;$/
13029 && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
13031 if ( $closing_token_indentation{$1} == 0 ) {
13032 $adjust_indentation = 1;
13035 $adjust_indentation = 3;
13039 # if line begins with a ':', align it with any
13040 # previous line leading with corresponding ?
13041 elsif ( $types_to_go[$ibeg] eq ':' ) {
13043 $opening_indentation, $opening_offset,
13044 $is_leading, $opening_exists
13046 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
13047 $rindentation_list );
13048 if ($is_leading) { $adjust_indentation = 2; }
13051 ##########################################################
13052 # Section 2: set indentation according to flag set above
13054 # Select the indentation object to define leading
13055 # whitespace. If we are outdenting something like '} } );'
13056 # then we want to use one level below the last token
13057 # ($i_terminal) in order to get it to fully outdent through
13059 ##########################################################
13062 my $level_end = $levels_to_go[$iend];
13064 if ( $adjust_indentation == 0 ) {
13065 $indentation = $leading_spaces_to_go[$ibeg];
13066 $lev = $levels_to_go[$ibeg];
13068 elsif ( $adjust_indentation == 1 ) {
13069 $indentation = $reduced_spaces_to_go[$i_terminal];
13070 $lev = $levels_to_go[$i_terminal];
13073 # handle indented closing token which aligns with opening token
13074 elsif ( $adjust_indentation == 2 ) {
13076 # handle option to align closing token with opening token
13077 $lev = $levels_to_go[$ibeg];
13079 # calculate spaces needed to align with opening token
13081 get_SPACES($opening_indentation) + $opening_offset;
13083 # Indent less than the previous line.
13085 # Problem: For -lp we don't exactly know what it was if there
13086 # were recoverable spaces sent to the aligner. A good solution
13087 # would be to force a flush of the vertical alignment buffer, so
13088 # that we would know. For now, this rule is used for -lp:
13090 # When the last line did not start with a closing token we will
13091 # be optimistic that the aligner will recover everything wanted.
13093 # This rule will prevent us from breaking a hierarchy of closing
13094 # tokens, and in a worst case will leave a closing paren too far
13095 # indented, but this is better than frequently leaving it not
13097 my $last_spaces = get_SPACES($last_indentation_written);
13098 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
13100 get_RECOVERABLE_SPACES($last_indentation_written);
13103 # reset the indentation to the new space count if it works
13104 # only options are all or none: nothing in-between looks good
13105 $lev = $levels_to_go[$ibeg];
13106 if ( $space_count < $last_spaces ) {
13107 if ($rOpts_line_up_parentheses) {
13108 my $lev = $levels_to_go[$ibeg];
13110 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
13113 $indentation = $space_count;
13117 # revert to default if it doesn't work
13119 $space_count = leading_spaces_to_go($ibeg);
13120 if ( $default_adjust_indentation == 0 ) {
13121 $indentation = $leading_spaces_to_go[$ibeg];
13123 elsif ( $default_adjust_indentation == 1 ) {
13124 $indentation = $reduced_spaces_to_go[$i_terminal];
13125 $lev = $levels_to_go[$i_terminal];
13130 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
13133 # handle -icb (indented closing code block braces)
13134 # Updated method for indented block braces: indent one full level if
13135 # there is no continuation indentation. This will occur for major
13136 # structures such as sub, if, else, but not for things like map
13139 # Note: only code blocks without continuation indentation are
13140 # handled here (if, else, unless, ..). In the following snippet,
13141 # the terminal brace of the sort block will have continuation
13142 # indentation as shown so it will not be handled by the coding
13143 # here. We would have to undo the continuation indentation to do
13144 # this, but it probably looks ok as is. This is a possible future
13145 # update for semicolon terminated lines.
13147 # if ($sortby eq 'date' or $sortby eq 'size') {
13149 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
13154 if ( $block_type_to_go[$ibeg]
13155 && $ci_levels_to_go[$i_terminal] == 0 )
13157 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
13158 $indentation = $spaces + $rOpts_indent_columns;
13160 # NOTE: for -lp we could create a new indentation object, but
13161 # there is probably no need to do it
13164 # handle -icp and any -icb block braces which fall through above
13165 # test such as the 'sort' block mentioned above.
13168 # There are currently two ways to handle -icp...
13169 # One way is to use the indentation of the previous line:
13170 # $indentation = $last_indentation_written;
13172 # The other way is to use the indentation that the previous line
13173 # would have had if it hadn't been adjusted:
13174 $indentation = $last_unadjusted_indentation;
13176 # Current method: use the minimum of the two. This avoids
13177 # inconsistent indentation.
13178 if ( get_SPACES($last_indentation_written) <
13179 get_SPACES($indentation) )
13181 $indentation = $last_indentation_written;
13185 # use previous indentation but use own level
13186 # to cause list to be flushed properly
13187 $lev = $levels_to_go[$ibeg];
13190 # remember indentation except for multi-line quotes, which get
13192 unless ( $ibeg == 0 && $starting_in_quote ) {
13193 $last_indentation_written = $indentation;
13194 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
13195 $last_leading_token = $tokens_to_go[$ibeg];
13198 # be sure lines with leading closing tokens are not outdented more
13199 # than the line which contained the corresponding opening token.
13201 #############################################################
13202 # updated per bug report in alex_bug.pl: we must not
13203 # mess with the indentation of closing logical braces so
13204 # we must treat something like '} else {' as if it were
13205 # an isolated brace my $is_isolated_block_brace = (
13206 # $iend == $ibeg ) && $block_type_to_go[$ibeg];
13207 #############################################################
13208 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
13209 && ( $iend == $ibeg
13210 || $is_if_elsif_else_unless_while_until_for_foreach{
13211 $block_type_to_go[$ibeg]
13214 # only do this for a ':; which is aligned with its leading '?'
13215 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
13216 if ( defined($opening_indentation)
13217 && !$is_isolated_block_brace
13218 && !$is_unaligned_colon )
13220 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
13221 $indentation = $opening_indentation;
13225 # remember the indentation of each line of this batch
13226 push @{$rindentation_list}, $indentation;
13228 # outdent lines with certain leading tokens...
13231 # must be first word of this batch
13237 # certain leading keywords if requested
13239 $rOpts->{'outdent-keywords'}
13240 && $types_to_go[$ibeg] eq 'k'
13241 && $outdent_keyword{ $tokens_to_go[$ibeg] }
13244 # or labels if requested
13245 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
13247 # or static block comments if requested
13248 || ( $types_to_go[$ibeg] eq '#'
13249 && $rOpts->{'outdent-static-block-comments'}
13250 && $is_static_block_comment )
13255 my $space_count = leading_spaces_to_go($ibeg);
13256 if ( $space_count > 0 ) {
13257 $space_count -= $rOpts_continuation_indentation;
13258 $is_outdented_line = 1;
13259 if ( $space_count < 0 ) { $space_count = 0 }
13261 # do not promote a spaced static block comment to non-spaced;
13262 # this is not normally necessary but could be for some
13263 # unusual user inputs (such as -ci = -i)
13264 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
13268 if ($rOpts_line_up_parentheses) {
13270 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
13273 $indentation = $space_count;
13278 return ( $indentation, $lev, $level_end, $terminal_type,
13279 $is_semicolon_terminated, $is_outdented_line );
13283 sub set_vertical_tightness_flags {
13285 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
13287 # Define vertical tightness controls for the nth line of a batch.
13288 # We create an array of parameters which tell the vertical aligner
13289 # if we should combine this line with the next line to achieve the
13290 # desired vertical tightness. The array of parameters contains:
13292 # [0] type: 1=opening non-block 2=closing non-block
13293 # 3=opening block brace 4=closing block brace
13295 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
13296 # if closing: spaces of padding to use
13297 # [2] sequence number of container
13298 # [3] valid flag: do not append if this flag is false. Will be
13299 # true if appropriate -vt flag is set. Otherwise, Will be
13300 # made true only for 2 line container in parens with -lp
13302 # These flags are used by sub set_leading_whitespace in
13303 # the vertical aligner
13305 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
13307 #--------------------------------------------------------------
13308 # Vertical Tightness Flags Section 1:
13309 # Handle Lines 1 .. n-1 but not the last line
13310 # For non-BLOCK tokens, we will need to examine the next line
13311 # too, so we won't consider the last line.
13312 #--------------------------------------------------------------
13313 if ( $n < $n_last_line ) {
13315 #--------------------------------------------------------------
13316 # Vertical Tightness Flags Section 1a:
13317 # Look for Type 1, last token of this line is a non-block opening token
13318 #--------------------------------------------------------------
13319 my $ibeg_next = $$ri_first[ $n + 1 ];
13320 my $token_end = $tokens_to_go[$iend];
13321 my $iend_next = $$ri_last[ $n + 1 ];
13323 $type_sequence_to_go[$iend]
13324 && !$block_type_to_go[$iend]
13325 && $is_opening_token{$token_end}
13327 $opening_vertical_tightness{$token_end} > 0
13329 # allow 2-line method call to be closed up
13330 || ( $rOpts_line_up_parentheses
13331 && $token_end eq '('
13333 && $types_to_go[ $iend - 1 ] ne 'b' )
13338 # avoid multiple jumps in nesting depth in one line if
13340 my $ovt = $opening_vertical_tightness{$token_end};
13341 my $iend_next = $$ri_last[ $n + 1 ];
13344 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
13345 $nesting_depth_to_go[$ibeg_next] )
13349 # If -vt flag has not been set, mark this as invalid
13350 # and aligner will validate it if it sees the closing paren
13352 my $valid_flag = $ovt;
13353 @{$rvertical_tightness_flags} =
13354 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
13358 #--------------------------------------------------------------
13359 # Vertical Tightness Flags Section 1b:
13360 # Look for Type 2, first token of next line is a non-block closing
13361 # token .. and be sure this line does not have a side comment
13362 #--------------------------------------------------------------
13363 my $token_next = $tokens_to_go[$ibeg_next];
13364 if ( $type_sequence_to_go[$ibeg_next]
13365 && !$block_type_to_go[$ibeg_next]
13366 && $is_closing_token{$token_next}
13367 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
13369 my $ovt = $opening_vertical_tightness{$token_next};
13370 my $cvt = $closing_vertical_tightness{$token_next};
13373 # never append a trailing line like )->pack(
13374 # because it will throw off later alignment
13376 $nesting_depth_to_go[$ibeg_next] ==
13377 $nesting_depth_to_go[ $iend_next + 1 ] + 1
13382 $container_environment_to_go[$ibeg_next] ne 'LIST'
13386 # allow closing up 2-line method calls
13387 || ( $rOpts_line_up_parentheses
13388 && $token_next eq ')' )
13395 # decide which trailing closing tokens to append..
13397 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
13399 my $str = join( '',
13400 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
13402 # append closing token if followed by comment or ';'
13403 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
13407 my $valid_flag = $cvt;
13408 @{$rvertical_tightness_flags} = (
13410 $tightness{$token_next} == 2 ? 0 : 1,
13411 $type_sequence_to_go[$ibeg_next], $valid_flag,
13417 #--------------------------------------------------------------
13418 # Vertical Tightness Flags Section 1c:
13419 # Implement the Opening Token Right flag (Type 2)..
13420 # If requested, move an isolated trailing opening token to the end of
13421 # the previous line which ended in a comma. We could do this
13422 # in sub recombine_breakpoints but that would cause problems
13423 # with -lp formatting. The problem is that indentation will
13424 # quickly move far to the right in nested expressions. By
13425 # doing it after indentation has been set, we avoid changes
13426 # to the indentation. Actual movement of the token takes place
13427 # in sub valign_output_step_B.
13428 #--------------------------------------------------------------
13430 $opening_token_right{ $tokens_to_go[$ibeg_next] }
13432 # previous line is not opening
13433 # (use -sot to combine with it)
13434 && !$is_opening_token{$token_end}
13436 # previous line ended in one of these
13437 # (add other cases if necessary; '=>' and '.' are not necessary
13438 && !$block_type_to_go[$ibeg_next]
13440 # this is a line with just an opening token
13441 && ( $iend_next == $ibeg_next
13442 || $iend_next == $ibeg_next + 2
13443 && $types_to_go[$iend_next] eq '#' )
13445 # looks bad if we align vertically with the wrong container
13446 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
13449 my $valid_flag = 1;
13450 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
13451 @{$rvertical_tightness_flags} =
13452 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
13455 #--------------------------------------------------------------
13456 # Vertical Tightness Flags Section 1d:
13457 # Stacking of opening and closing tokens (Type 2)
13458 #--------------------------------------------------------------
13460 my $token_beg_next = $tokens_to_go[$ibeg_next];
13462 # patch to make something like 'qw(' behave like an opening paren
13464 if ( $types_to_go[$ibeg_next] eq 'q' ) {
13465 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
13466 $token_beg_next = $1;
13470 if ( $is_closing_token{$token_end}
13471 && $is_closing_token{$token_beg_next} )
13473 $stackable = $stack_closing_token{$token_beg_next}
13474 unless ( $block_type_to_go[$ibeg_next] )
13475 ; # shouldn't happen; just checking
13477 elsif ($is_opening_token{$token_end}
13478 && $is_opening_token{$token_beg_next} )
13480 $stackable = $stack_opening_token{$token_beg_next}
13481 unless ( $block_type_to_go[$ibeg_next] )
13482 ; # shouldn't happen; just checking
13487 my $is_semicolon_terminated;
13488 if ( $n + 1 == $n_last_line ) {
13489 my ( $terminal_type, $i_terminal ) = terminal_type(
13490 \@types_to_go, \@block_type_to_go,
13491 $ibeg_next, $iend_next
13493 $is_semicolon_terminated = $terminal_type eq ';'
13494 && $nesting_depth_to_go[$iend_next] <
13495 $nesting_depth_to_go[$ibeg_next];
13498 # this must be a line with just an opening token
13499 # or end in a semicolon
13501 $is_semicolon_terminated
13502 || ( $iend_next == $ibeg_next
13503 || $iend_next == $ibeg_next + 2
13504 && $types_to_go[$iend_next] eq '#' )
13507 my $valid_flag = 1;
13508 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
13509 @{$rvertical_tightness_flags} =
13510 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
13516 #--------------------------------------------------------------
13517 # Vertical Tightness Flags Section 2:
13518 # Handle type 3, opening block braces on last line of the batch
13519 # Check for a last line with isolated opening BLOCK curly
13520 #--------------------------------------------------------------
13521 elsif ($rOpts_block_brace_vertical_tightness
13523 && $types_to_go[$iend] eq '{'
13524 && $block_type_to_go[$iend] =~
13525 /$block_brace_vertical_tightness_pattern/o )
13527 @{$rvertical_tightness_flags} =
13528 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
13531 #--------------------------------------------------------------
13532 # Vertical Tightness Flags Section 3:
13533 # Handle type 4, a closing block brace on the last line of the batch Check
13534 # for a last line with isolated closing BLOCK curly
13535 #--------------------------------------------------------------
13536 elsif ($rOpts_stack_closing_block_brace
13538 && $block_type_to_go[$iend]
13539 && $types_to_go[$iend] eq '}' )
13541 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
13542 @{$rvertical_tightness_flags} =
13543 ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
13546 # pack in the sequence numbers of the ends of this line
13547 $rvertical_tightness_flags->[4] = get_seqno($ibeg);
13548 $rvertical_tightness_flags->[5] = get_seqno($iend);
13549 return $rvertical_tightness_flags;
13554 # get opening and closing sequence numbers of a token for the vertical
13555 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
13556 # to be treated somewhat like opening and closing tokens for stacking
13557 # tokens by the vertical aligner.
13559 my $seqno = $type_sequence_to_go[$ii];
13560 if ( $types_to_go[$ii] eq 'q' ) {
13563 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
13566 if ( !$ending_in_quote ) {
13567 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
13575 my %is_vertical_alignment_type;
13576 my %is_vertical_alignment_keyword;
13577 my %is_terminal_alignment_type;
13581 # Removed =~ from list to improve chances of alignment
13583 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
13584 { ? : => && || // ~~ !~~
13586 @is_vertical_alignment_type{@_} = (1) x scalar(@_);
13588 # only align these at end of line
13590 @is_terminal_alignment_type{@_} = (1) x scalar(@_);
13592 # eq and ne were removed from this list to improve alignment chances
13593 @_ = qw(if unless and or err for foreach while until);
13594 @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
13597 sub set_vertical_alignment_markers {
13599 # This routine takes the first step toward vertical alignment of the
13600 # lines of output text. It looks for certain tokens which can serve as
13601 # vertical alignment markers (such as an '=').
13603 # Method: We look at each token $i in this output batch and set
13604 # $matching_token_to_go[$i] equal to those tokens at which we would
13605 # accept vertical alignment.
13607 # nothing to do if we aren't allowed to change whitespace
13608 if ( !$rOpts_add_whitespace ) {
13609 for my $i ( 0 .. $max_index_to_go ) {
13610 $matching_token_to_go[$i] = '';
13615 my ( $ri_first, $ri_last ) = @_;
13617 # remember the index of last nonblank token before any sidecomment
13618 my $i_terminal = $max_index_to_go;
13619 if ( $types_to_go[$i_terminal] eq '#' ) {
13620 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
13621 if ( $i_terminal > 0 ) { --$i_terminal }
13625 # look at each line of this batch..
13626 my $last_vertical_alignment_before_index;
13627 my $vert_last_nonblank_type;
13628 my $vert_last_nonblank_token;
13629 my $vert_last_nonblank_block_type;
13630 my $max_line = @$ri_first - 1;
13631 my ( $i, $type, $token, $block_type, $alignment_type );
13632 my ( $ibeg, $iend, $line );
13634 foreach $line ( 0 .. $max_line ) {
13635 $ibeg = $$ri_first[$line];
13636 $iend = $$ri_last[$line];
13637 $last_vertical_alignment_before_index = -1;
13638 $vert_last_nonblank_type = '';
13639 $vert_last_nonblank_token = '';
13640 $vert_last_nonblank_block_type = '';
13642 # look at each token in this output line..
13643 foreach $i ( $ibeg .. $iend ) {
13644 $alignment_type = '';
13645 $type = $types_to_go[$i];
13646 $block_type = $block_type_to_go[$i];
13647 $token = $tokens_to_go[$i];
13649 # check for flag indicating that we should not align
13651 if ( $matching_token_to_go[$i] ) {
13652 $matching_token_to_go[$i] = '';
13656 #--------------------------------------------------------
13657 # First see if we want to align BEFORE this token
13658 #--------------------------------------------------------
13660 # The first possible token that we can align before
13661 # is index 2 because: 1) it doesn't normally make sense to
13662 # align before the first token and 2) the second
13663 # token must be a blank if we are to align before
13665 if ( $i < $ibeg + 2 ) { }
13667 # must follow a blank token
13668 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
13670 # align a side comment --
13671 elsif ( $type eq '#' ) {
13675 # it is a static side comment
13677 $rOpts->{'static-side-comments'}
13678 && $token =~ /$static_side_comment_pattern/o
13681 # or a closing side comment
13682 || ( $vert_last_nonblank_block_type
13684 /$closing_side_comment_prefix_pattern/o )
13687 $alignment_type = $type;
13688 } ## Example of a static side comment
13691 # otherwise, do not align two in a row to create a
13693 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
13695 # align before one of these keywords
13696 # (within a line, since $i>1)
13697 elsif ( $type eq 'k' ) {
13699 # /^(if|unless|and|or|eq|ne)$/
13700 if ( $is_vertical_alignment_keyword{$token} ) {
13701 $alignment_type = $token;
13705 # align before one of these types..
13706 # Note: add '.' after new vertical aligner is operational
13707 elsif ( $is_vertical_alignment_type{$type} ) {
13708 $alignment_type = $token;
13710 # Do not align a terminal token. Although it might
13711 # occasionally look ok to do this, this has been found to be
13712 # a good general rule. The main problems are:
13713 # (1) that the terminal token (such as an = or :) might get
13714 # moved far to the right where it is hard to see because
13715 # nothing follows it, and
13716 # (2) doing so may prevent other good alignments.
13717 # Current exceptions are && and ||
13718 if ( $i == $iend || $i >= $i_terminal ) {
13719 $alignment_type = ""
13720 unless ( $is_terminal_alignment_type{$type} );
13723 # Do not align leading ': (' or '. ('. This would prevent
13724 # alignment in something like the following:
13726 # ( $input_line_number < 10 ) ? " "
13727 # : ( $input_line_number < 100 ) ? " "
13731 # ( $case_matters ? $accessor : " lc($accessor) " )
13732 # . ( $yesno ? " eq " : " ne " )
13733 if ( $i == $ibeg + 2
13734 && $types_to_go[$ibeg] =~ /^[\.\:]$/
13735 && $types_to_go[ $i - 1 ] eq 'b' )
13737 $alignment_type = "";
13740 # For a paren after keyword, only align something like this:
13742 # elsif ( $b ) { &b }
13743 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
13744 $alignment_type = ""
13745 unless $vert_last_nonblank_token =~
13746 /^(if|unless|elsif)$/;
13749 # be sure the alignment tokens are unique
13750 # This didn't work well: reason not determined
13751 # if ($token ne $type) {$alignment_type .= $type}
13754 # NOTE: This is deactivated because it causes the previous
13755 # if/elsif alignment to fail
13756 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
13757 #{ $alignment_type = $type; }
13759 if ($alignment_type) {
13760 $last_vertical_alignment_before_index = $i;
13763 #--------------------------------------------------------
13764 # Next see if we want to align AFTER the previous nonblank
13765 #--------------------------------------------------------
13767 # We want to line up ',' and interior ';' tokens, with the added
13768 # space AFTER these tokens. (Note: interior ';' is included
13769 # because it may occur in short blocks).
13772 # we haven't already set it
13775 # and its not the first token of the line
13778 # and it follows a blank
13779 && $types_to_go[ $i - 1 ] eq 'b'
13781 # and previous token IS one of these:
13782 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
13784 # and it's NOT one of these
13785 && ( $type !~ /^[b\#\)\]\}]$/ )
13787 # then go ahead and align
13791 $alignment_type = $vert_last_nonblank_type;
13794 #--------------------------------------------------------
13795 # then store the value
13796 #--------------------------------------------------------
13797 $matching_token_to_go[$i] = $alignment_type;
13798 if ( $type ne 'b' ) {
13799 $vert_last_nonblank_type = $type;
13800 $vert_last_nonblank_token = $token;
13801 $vert_last_nonblank_block_type = $block_type;
13808 sub terminal_type {
13810 # returns type of last token on this line (terminal token), as follows:
13811 # returns # for a full-line comment
13812 # returns ' ' for a blank line
13813 # otherwise returns final token type
13815 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
13817 # check for full-line comment..
13818 if ( $$rtype[$ibeg] eq '#' ) {
13819 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
13823 # start at end and walk backwards..
13824 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
13826 # skip past any side comment and blanks
13827 next if ( $$rtype[$i] eq 'b' );
13828 next if ( $$rtype[$i] eq '#' );
13830 # found it..make sure it is a BLOCK termination,
13831 # but hide a terminal } after sort/grep/map because it is not
13832 # necessarily the end of the line. (terminal.t)
13833 my $terminal_type = $$rtype[$i];
13835 $terminal_type eq '}'
13836 && ( !$$rblock_type[$i]
13837 || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
13840 $terminal_type = 'b';
13842 return wantarray ? ( $terminal_type, $i ) : $terminal_type;
13846 return wantarray ? ( ' ', $ibeg ) : ' ';
13850 { # set_bond_strengths
13852 my %is_good_keyword_breakpoint;
13853 my %is_lt_gt_le_ge;
13855 my %binary_bond_strength;
13862 sub bias_table_key {
13863 my ( $type, $token ) = @_;
13864 my $bias_table_key = $type;
13865 if ( $type eq 'k' ) {
13866 $bias_table_key = $token;
13867 if ( $token eq 'err' ) { $bias_table_key = 'or' }
13869 return $bias_table_key;
13872 sub set_bond_strengths {
13876 @_ = qw(if unless while until for foreach);
13877 @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
13879 @_ = qw(lt gt le ge);
13880 @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
13882 # The decision about where to break a line depends upon a "bond
13883 # strength" between tokens. The LOWER the bond strength, the MORE
13884 # likely a break. A bond strength may be any value but to simplify
13885 # things there are several pre-defined strength levels:
13887 # NO_BREAK => 10000;
13888 # VERY_STRONG => 100;
13892 # VERY_WEAK => 0.55;
13894 # The strength values are based on trial-and-error, and need to be
13895 # tweaked occasionally to get desired results. Some comments:
13897 # 1. Only relative strengths are important. small differences
13898 # in strengths can make big formatting differences.
13899 # 2. Each indentation level adds one unit of bond strength.
13900 # 3. A value of NO_BREAK makes an unbreakable bond
13901 # 4. A value of VERY_WEAK is the strength of a ','
13902 # 5. Values below NOMINAL are considered ok break points.
13903 # 6. Values above NOMINAL are considered poor break points.
13905 # The bond strengths should roughly follow precedence order where
13906 # possible. If you make changes, please check the results very
13907 # carefully on a variety of scripts. Testing with the -extrude
13908 # options is particularly helpful in exercising all of the rules.
13910 # Wherever possible, bond strengths are defined in the following
13911 # tables. There are two main stages to setting bond strengths and
13912 # two types of tables:
13914 # The first stage involves looking at each token individually and
13915 # defining left and right bond strengths, according to if we want
13916 # to break to the left or right side, and how good a break point it
13917 # is. For example tokens like =, ||, && make good break points and
13918 # will have low strengths, but one might want to break on either
13919 # side to put them at the end of one line or beginning of the next.
13921 # The second stage involves looking at certain pairs of tokens and
13922 # defining a bond strength for that particular pair. This second
13923 # stage has priority.
13925 #---------------------------------------------------------------
13926 # Bond Strength BEGIN Section 1.
13927 # Set left and right bond strengths of individual tokens.
13928 #---------------------------------------------------------------
13930 # NOTE: NO_BREAK's set in this section first are HINTS which will
13931 # probably not be honored. Essential NO_BREAKS's should be set in
13932 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
13933 # of this subroutine.
13935 # Note that we are setting defaults in this section. The user
13936 # cannot change bond strengths but can cause the left and right
13937 # bond strengths of any token type to be swapped through the use of
13938 # the -wba and -wbb flags. In this way the user can determine if a
13939 # breakpoint token should appear at the end of one line or the
13940 # beginning of the next line.
13942 # The hash keys in this section are token types, plus the text of
13943 # certain keywords like 'or', 'and'.
13945 # no break around possible filehandle
13946 $left_bond_strength{'Z'} = NO_BREAK;
13947 $right_bond_strength{'Z'} = NO_BREAK;
13949 # never put a bare word on a new line:
13950 # example print (STDERR, "bla"); will fail with break after (
13951 $left_bond_strength{'w'} = NO_BREAK;
13953 # blanks always have infinite strength to force breaks after
13955 $right_bond_strength{'b'} = NO_BREAK;
13957 # try not to break on exponentation
13958 @_ = qw" ** .. ... <=> ";
13959 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13960 @right_bond_strength{@_} = (STRONG) x scalar(@_);
13962 # The comma-arrow has very low precedence but not a good break point
13963 $left_bond_strength{'=>'} = NO_BREAK;
13964 $right_bond_strength{'=>'} = NOMINAL;
13966 # ok to break after label
13967 $left_bond_strength{'J'} = NO_BREAK;
13968 $right_bond_strength{'J'} = NOMINAL;
13969 $left_bond_strength{'j'} = STRONG;
13970 $right_bond_strength{'j'} = STRONG;
13971 $left_bond_strength{'A'} = STRONG;
13972 $right_bond_strength{'A'} = STRONG;
13974 $left_bond_strength{'->'} = STRONG;
13975 $right_bond_strength{'->'} = VERY_STRONG;
13977 $left_bond_strength{'CORE::'} = NOMINAL;
13978 $right_bond_strength{'CORE::'} = NO_BREAK;
13980 # breaking AFTER modulus operator is ok:
13982 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13983 @right_bond_strength{@_} =
13984 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
13986 # Break AFTER math operators * and /
13988 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13989 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
13991 # Break AFTER weakest math operators + and -
13992 # Make them weaker than * but a bit stronger than '.'
13994 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13995 @right_bond_strength{@_} =
13996 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
13998 # breaking BEFORE these is just ok:
14000 @right_bond_strength{@_} = (STRONG) x scalar(@_);
14001 @left_bond_strength{@_} = (NOMINAL) x scalar(@_);
14003 # breaking before the string concatenation operator seems best
14004 # because it can be hard to see at the end of a line
14005 $right_bond_strength{'.'} = STRONG;
14006 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
14009 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14010 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
14012 # make these a little weaker than nominal so that they get
14013 # favored for end-of-line characters
14014 @_ = qw"!= == =~ !~ ~~ !~~";
14015 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14016 @right_bond_strength{@_} =
14017 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
14019 # break AFTER these
14020 @_ = qw" < > | & >= <=";
14021 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
14022 @right_bond_strength{@_} =
14023 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
14025 # breaking either before or after a quote is ok
14026 # but bias for breaking before a quote
14027 $left_bond_strength{'Q'} = NOMINAL;
14028 $right_bond_strength{'Q'} = NOMINAL + 0.02;
14029 $left_bond_strength{'q'} = NOMINAL;
14030 $right_bond_strength{'q'} = NOMINAL;
14032 # starting a line with a keyword is usually ok
14033 $left_bond_strength{'k'} = NOMINAL;
14035 # we usually want to bond a keyword strongly to what immediately
14036 # follows, rather than leaving it stranded at the end of a line
14037 $right_bond_strength{'k'} = STRONG;
14039 $left_bond_strength{'G'} = NOMINAL;
14040 $right_bond_strength{'G'} = STRONG;
14042 # assignment operators
14044 = **= += *= &= <<= &&=
14045 -= /= |= >>= ||= //=
14050 # Default is to break AFTER various assignment operators
14051 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14052 @right_bond_strength{@_} =
14053 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
14055 # Default is to break BEFORE '&&' and '||' and '//'
14056 # set strength of '||' to same as '=' so that chains like
14057 # $a = $b || $c || $d will break before the first '||'
14058 $right_bond_strength{'||'} = NOMINAL;
14059 $left_bond_strength{'||'} = $right_bond_strength{'='};
14061 # same thing for '//'
14062 $right_bond_strength{'//'} = NOMINAL;
14063 $left_bond_strength{'//'} = $right_bond_strength{'='};
14065 # set strength of && a little higher than ||
14066 $right_bond_strength{'&&'} = NOMINAL;
14067 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
14069 $left_bond_strength{';'} = VERY_STRONG;
14070 $right_bond_strength{';'} = VERY_WEAK;
14071 $left_bond_strength{'f'} = VERY_STRONG;
14073 # make right strength of for ';' a little less than '='
14074 # to make for contents break after the ';' to avoid this:
14075 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
14076 # $number_of_fields )
14077 # and make it weaker than ',' and 'and' too
14078 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
14080 # The strengths of ?/: should be somewhere between
14081 # an '=' and a quote (NOMINAL),
14082 # make strength of ':' slightly less than '?' to help
14083 # break long chains of ? : after the colons
14084 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
14085 $right_bond_strength{':'} = NO_BREAK;
14086 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
14087 $right_bond_strength{'?'} = NO_BREAK;
14089 $left_bond_strength{','} = VERY_STRONG;
14090 $right_bond_strength{','} = VERY_WEAK;
14092 # remaining digraphs and trigraphs not defined above
14093 @_ = qw( :: <> ++ --);
14094 @left_bond_strength{@_} = (WEAK) x scalar(@_);
14095 @right_bond_strength{@_} = (STRONG) x scalar(@_);
14097 # Set bond strengths of certain keywords
14098 # make 'or', 'err', 'and' slightly weaker than a ','
14099 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
14100 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
14101 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
14102 $left_bond_strength{'xor'} = NOMINAL;
14103 $right_bond_strength{'and'} = NOMINAL;
14104 $right_bond_strength{'or'} = NOMINAL;
14105 $right_bond_strength{'err'} = NOMINAL;
14106 $right_bond_strength{'xor'} = STRONG;
14108 #---------------------------------------------------------------
14109 # Bond Strength BEGIN Section 2.
14110 # Set binary rules for bond strengths between certain token types.
14111 #---------------------------------------------------------------
14113 # We have a little problem making tables which apply to the
14114 # container tokens. Here is a list of container tokens and
14117 # type tokens // meaning
14118 # { {, [, ( // indent
14119 # } }, ], ) // outdent
14120 # [ [ // left non-structural [ (enclosing an array index)
14121 # ] ] // right non-structural square bracket
14122 # ( ( // left non-structural paren
14123 # ) ) // right non-structural paren
14124 # L { // left non-structural curly brace (enclosing a key)
14125 # R } // right non-structural curly brace
14127 # Some rules apply to token types and some to just the token
14128 # itself. We solve the problem by combining type and token into a
14129 # new hash key for the container types.
14131 # If a rule applies to a token 'type' then we need to make rules
14132 # for each of these 'type.token' combinations:
14143 # If a rule applies to a token then we need to make rules for
14144 # these 'type.token' combinations:
14153 # allow long lines before final { in an if statement, as in:
14158 # Otherwise, the line before the { tends to be too short.
14160 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
14161 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
14163 # break on something like '} (', but keep this stronger than a ','
14164 # example is in 'howe.pl'
14165 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
14166 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
14168 # keep matrix and hash indices together
14169 # but make them a little below STRONG to allow breaking open
14170 # something like {'some-word'}{'some-very-long-word'} at the }{
14172 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
14173 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
14174 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
14175 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
14177 # increase strength to the point where a break in the following
14178 # will be after the opening paren rather than at the arrow:
14180 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
14182 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14183 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14184 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14185 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14186 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14187 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14189 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
14190 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
14191 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
14192 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
14194 #---------------------------------------------------------------
14195 # Binary NO_BREAK rules
14196 #---------------------------------------------------------------
14198 # use strict requires that bare word and => not be separated
14199 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
14200 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
14202 # Never break between a bareword and a following paren because
14203 # perl may give an error. For example, if a break is placed
14204 # between 'to_filehandle' and its '(' the following line will
14205 # give a syntax error [Carp.pm]: my( $no) =fileno(
14206 # to_filehandle( $in)) ;
14207 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
14208 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
14209 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
14210 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
14212 # use strict requires that bare word within braces not start new
14214 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
14216 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
14218 # use strict requires that bare word and => not be separated
14219 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
14221 # use strict does not allow separating type info from trailing { }
14222 # testfile is readmail.pl
14223 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
14224 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
14226 # As a defensive measure, do not break between a '(' and a
14227 # filehandle. In some cases, this can cause an error. For
14228 # example, the following program works:
14235 # But this program fails:
14243 # This is normally only a problem with the 'extrude' option
14244 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
14245 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
14247 # never break between sub name and opening paren
14248 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
14249 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
14251 # keep '}' together with ';'
14252 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
14254 # Breaking before a ++ can cause perl to guess wrong. For
14255 # example the following line will cause a syntax error
14256 # with -extrude if we break between '$i' and '++' [fixstyle2]
14257 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
14258 $nobreak_lhs{'++'} = NO_BREAK;
14260 # Do not break before a possible file handle
14261 $nobreak_lhs{'Z'} = NO_BREAK;
14263 # use strict hates bare words on any new line. For
14264 # example, a break before the underscore here provokes the
14265 # wrath of use strict:
14266 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
14267 $nobreak_rhs{'F'} = NO_BREAK;
14268 $nobreak_rhs{'CORE::'} = NO_BREAK;
14270 #---------------------------------------------------------------
14271 # Bond Strength BEGIN Section 3.
14272 # Define tables and values for applying a small bias to the above
14274 #---------------------------------------------------------------
14275 # Adding a small 'bias' to strengths is a simple way to make a line
14276 # break at the first of a sequence of identical terms. For
14277 # example, to force long string of conditional operators to break
14278 # with each line ending in a ':', we can add a small number to the
14279 # bond strength of each ':' (colon.t)
14280 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
14281 $delta_bias = 0.0001; # a very small strength level
14285 # patch-its always ok to break at end of line
14286 $nobreak_to_go[$max_index_to_go] = 0;
14288 # we start a new set of bias values for each line
14290 @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
14291 my $code_bias = -.01; # bias for closing block braces
14296 my $last_nonblank_type = $type;
14297 my $last_nonblank_token = $token;
14298 my $list_str = $left_bond_strength{'?'};
14300 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
14301 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
14304 # main loop to compute bond strengths between each pair of tokens
14305 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
14306 $last_type = $type;
14307 if ( $type ne 'b' ) {
14308 $last_nonblank_type = $type;
14309 $last_nonblank_token = $token;
14311 $type = $types_to_go[$i];
14313 # strength on both sides of a blank is the same
14314 if ( $type eq 'b' && $last_type ne 'b' ) {
14315 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
14319 $token = $tokens_to_go[$i];
14320 $block_type = $block_type_to_go[$i];
14322 $next_type = $types_to_go[$i_next];
14323 $next_token = $tokens_to_go[$i_next];
14324 $total_nesting_depth = $nesting_depth_to_go[$i_next];
14325 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
14326 $next_nonblank_type = $types_to_go[$i_next_nonblank];
14327 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14329 # We are computing the strength of the bond between the current
14330 # token and the NEXT token.
14332 #---------------------------------------------------------------
14333 # Bond Strength Section 1:
14334 # First Approximation.
14335 # Use minimum of individual left and right tabulated bond
14337 #---------------------------------------------------------------
14338 my $bsr = $right_bond_strength{$type};
14339 my $bsl = $left_bond_strength{$next_nonblank_type};
14341 # define right bond strengths of certain keywords
14342 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
14343 $bsr = $right_bond_strength{$token};
14345 elsif ( $token eq 'ne' or $token eq 'eq' ) {
14349 # set terminal bond strength to the nominal value
14350 # this will cause good preceding breaks to be retained
14351 if ( $i_next_nonblank > $max_index_to_go ) {
14355 # define right bond strengths of certain keywords
14356 if ( $next_nonblank_type eq 'k'
14357 && defined( $left_bond_strength{$next_nonblank_token} ) )
14359 $bsl = $left_bond_strength{$next_nonblank_token};
14361 elsif ($next_nonblank_token eq 'ne'
14362 or $next_nonblank_token eq 'eq' )
14366 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
14367 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
14370 # Use the minimum of the left and right strengths. Note: it might
14371 # seem that we would want to keep a NO_BREAK if either token has
14372 # this value. This didn't work, for example because in an arrow
14373 # list, it prevents the comma from separating from the following
14374 # bare word (which is probably quoted by its arrow). So necessary
14375 # NO_BREAK's have to be handled as special cases in the final
14377 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
14378 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
14379 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
14380 my $bond_str_1 = $bond_str;
14382 #---------------------------------------------------------------
14383 # Bond Strength Section 2:
14384 # Apply hardwired rules..
14385 #---------------------------------------------------------------
14387 # Patch to put terminal or clauses on a new line: Weaken the bond
14388 # at an || followed by die or similar keyword to make the terminal
14389 # or clause fall on a new line, like this:
14391 # my $class = shift
14392 # || die "Cannot add broadcast: No class identifier found";
14394 # Otherwise the break will be at the previous '=' since the || and
14395 # = have the same starting strength and the or is biased, like
14399 # shift || die "Cannot add broadcast: No class identifier found";
14401 # In any case if the user places a break at either the = or the ||
14402 # it should remain there.
14403 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
14404 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
14405 if ( $want_break_before{$token} && $i > 0 ) {
14406 $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
14409 $bond_str -= $delta_bias;
14414 # good to break after end of code blocks
14415 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
14417 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
14418 $code_bias += $delta_bias;
14421 if ( $type eq 'k' ) {
14423 # allow certain control keywords to stand out
14424 if ( $next_nonblank_type eq 'k'
14425 && $is_last_next_redo_return{$token} )
14427 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
14430 # Don't break after keyword my. This is a quick fix for a
14431 # rare problem with perl. An example is this line from file
14434 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
14435 # $this->{'question'} ) )
14437 if ( $token eq 'my' ) {
14438 $bond_str = NO_BREAK;
14443 # good to break before 'if', 'unless', etc
14444 if ( $is_if_brace_follower{$next_nonblank_token} ) {
14445 $bond_str = VERY_WEAK;
14448 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
14450 # FIXME: needs more testing
14451 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
14452 $bond_str = $list_str if ( $bond_str > $list_str );
14455 # keywords like 'unless', 'if', etc, within statements
14457 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
14458 $bond_str = VERY_WEAK / 1.05;
14462 # try not to break before a comma-arrow
14463 elsif ( $next_nonblank_type eq '=>' ) {
14464 if ( $bond_str < STRONG ) { $bond_str = STRONG }
14467 #---------------------------------------------------------------
14468 # Additional hardwired NOBREAK rules
14469 #---------------------------------------------------------------
14471 # map1.t -- correct for a quirk in perl
14473 && $next_nonblank_type eq 'i'
14474 && $last_nonblank_type eq 'k'
14475 && $is_sort_map_grep{$last_nonblank_token} )
14477 # /^(sort|map|grep)$/ )
14479 $bond_str = NO_BREAK;
14482 # extrude.t: do not break before paren at:
14484 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
14485 $bond_str = NO_BREAK;
14488 # in older version of perl, use strict can cause problems with
14489 # breaks before bare words following opening parens. For example,
14490 # this will fail under older versions if a break is made between
14491 # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
14492 # command"); close MAIL;
14493 if ( $type eq '{' ) {
14495 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
14497 # but it's fine to break if the word is followed by a '=>'
14498 # or if it is obviously a sub call
14499 my $i_next_next_nonblank = $i_next_nonblank + 1;
14500 my $next_next_type = $types_to_go[$i_next_next_nonblank];
14501 if ( $next_next_type eq 'b'
14502 && $i_next_nonblank < $max_index_to_go )
14504 $i_next_next_nonblank++;
14505 $next_next_type = $types_to_go[$i_next_next_nonblank];
14508 # We'll check for an old breakpoint and keep a leading
14509 # bareword if it was that way in the input file.
14510 # Presumably it was ok that way. For example, the
14511 # following would remain unchanged:
14514 # January, February, March, April,
14515 # May, June, July, August,
14516 # September, October, November, December,
14519 # This should be sufficient:
14521 !$old_breakpoint_to_go[$i]
14522 && ( $next_next_type eq ','
14523 || $next_next_type eq '}' )
14526 $bond_str = NO_BREAK;
14531 # Do not break between a possible filehandle and a ? or / and do
14532 # not introduce a break after it if there is no blank
14534 elsif ( $type eq 'Z' ) {
14539 # if there is no blank and we do not want one. Examples:
14540 # print $x++ # do not break after $x
14541 # print HTML"HELLO" # break ok after HTML
14544 && defined( $want_left_space{$next_type} )
14545 && $want_left_space{$next_type} == WS_NO
14548 # or we might be followed by the start of a quote
14549 || $next_nonblank_type =~ /^[\/\?]$/
14552 $bond_str = NO_BREAK;
14556 # Breaking before a ? before a quote can cause trouble if
14557 # they are not separated by a blank.
14558 # Example: a syntax error occurs if you break before the ? here
14559 # my$logic=join$all?' && ':' || ',@regexps;
14560 # From: Professional_Perl_Programming_Code/multifind.pl
14561 if ( $next_nonblank_type eq '?' ) {
14562 $bond_str = NO_BREAK
14563 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
14566 # Breaking before a . followed by a number
14567 # can cause trouble if there is no intervening space
14568 # Example: a syntax error occurs if you break before the .2 here
14569 # $str .= pack($endian.2, ensurrogate($ord));
14570 # From: perl58/Unicode.pm
14571 elsif ( $next_nonblank_type eq '.' ) {
14572 $bond_str = NO_BREAK
14573 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
14576 # patch to put cuddled elses back together when on multiple
14577 # lines, as in: } \n else \n { \n
14578 if ($rOpts_cuddled_else) {
14580 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
14581 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
14583 $bond_str = NO_BREAK;
14586 my $bond_str_2 = $bond_str;
14588 #---------------------------------------------------------------
14589 # End of hardwired rules
14590 #---------------------------------------------------------------
14592 #---------------------------------------------------------------
14593 # Bond Strength Section 3:
14594 # Apply table rules. These have priority over the above
14596 #---------------------------------------------------------------
14598 my $tabulated_bond_str;
14600 my $rtype = $next_nonblank_type;
14601 if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
14602 if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
14603 $rtype = $next_nonblank_type . $next_nonblank_token;
14606 if ( $binary_bond_strength{$ltype}{$rtype} ) {
14607 $bond_str = $binary_bond_strength{$ltype}{$rtype};
14608 $tabulated_bond_str = $bond_str;
14611 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
14612 $bond_str = NO_BREAK;
14613 $tabulated_bond_str = $bond_str;
14615 my $bond_str_3 = $bond_str;
14617 # If the hardwired rules conflict with the tabulated bond
14618 # strength then there is an inconsistency that should be fixed
14619 FORMATTER_DEBUG_FLAG_BOND_TABLES
14620 && $tabulated_bond_str
14622 && $bond_str_1 != $bond_str_2
14623 && $bond_str_2 != $tabulated_bond_str
14626 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
14629 #-----------------------------------------------------------------
14630 # Bond Strength Section 4:
14631 # Modify strengths of certain tokens which often occur in sequence
14632 # by adding a small bias to each one in turn so that the breaks
14633 # occur from left to right.
14635 # Note that we only changing strengths by small amounts here,
14636 # and usually increasing, so we should not be altering any NO_BREAKs.
14637 # Other routines which check for NO_BREAKs will use a tolerance
14638 # of one to avoid any problem.
14639 #-----------------------------------------------------------------
14641 # The bias tables use special keys
14642 my $left_key = bias_table_key( $type, $token );
14644 bias_table_key( $next_nonblank_type, $next_nonblank_token );
14646 # add any bias set by sub scan_list at old comma break points.
14647 if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
14650 elsif ( defined( $bias{$left_key} ) ) {
14651 if ( !$want_break_before{$left_key} ) {
14652 $bias{$left_key} += $delta_bias;
14653 $bond_str += $bias{$left_key};
14658 if ( defined( $bias{$right_key} ) ) {
14659 if ( $want_break_before{$right_key} ) {
14661 # for leading '.' align all but 'short' quotes; the idea
14662 # is to not place something like "\n" on a single line.
14663 if ( $right_key eq '.' ) {
14665 $last_nonblank_type eq '.'
14668 $rOpts_short_concatenation_item_length )
14669 && ( $token !~ /^[\)\]\}]$/ )
14672 $bias{$right_key} += $delta_bias;
14676 $bias{$right_key} += $delta_bias;
14678 $bond_str += $bias{$right_key};
14681 my $bond_str_4 = $bond_str;
14683 #---------------------------------------------------------------
14684 # Bond Strength Section 5:
14685 # Fifth Approximation.
14686 # Take nesting depth into account by adding the nesting depth
14687 # to the bond strength.
14688 #---------------------------------------------------------------
14691 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
14692 if ( $total_nesting_depth > 0 ) {
14693 $strength = $bond_str + $total_nesting_depth;
14696 $strength = $bond_str;
14700 $strength = NO_BREAK;
14703 # always break after side comment
14704 if ( $type eq '#' ) { $strength = 0 }
14706 $bond_strength_to_go[$i] = $strength;
14708 FORMATTER_DEBUG_FLAG_BOND && do {
14709 my $str = substr( $token, 0, 15 );
14710 $str .= ' ' x ( 16 - length($str) );
14712 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
14715 } ## end sub set_bond_strengths
14718 sub pad_array_to_go {
14720 # to simplify coding in scan_list and set_bond_strengths, it helps
14721 # to create some extra blank tokens at the end of the arrays
14722 $tokens_to_go[ $max_index_to_go + 1 ] = '';
14723 $tokens_to_go[ $max_index_to_go + 2 ] = '';
14724 $types_to_go[ $max_index_to_go + 1 ] = 'b';
14725 $types_to_go[ $max_index_to_go + 2 ] = 'b';
14726 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
14727 $nesting_depth_to_go[$max_index_to_go];
14730 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
14731 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
14733 # shouldn't happen:
14734 unless ( get_saw_brace_error() ) {
14736 "Program bug in scan_list: hit nesting error which should have been caught\n"
14738 report_definite_bug();
14742 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
14747 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
14748 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
14752 { # begin scan_list
14755 $block_type, $current_depth,
14757 $i_last_nonblank_token, $last_colon_sequence_number,
14758 $last_nonblank_token, $last_nonblank_type,
14759 $last_nonblank_block_type, $last_old_breakpoint_count,
14760 $minimum_depth, $next_nonblank_block_type,
14761 $next_nonblank_token, $next_nonblank_type,
14762 $old_breakpoint_count, $starting_breakpoint_count,
14763 $starting_depth, $token,
14764 $type, $type_sequence,
14768 @breakpoint_stack, @breakpoint_undo_stack,
14769 @comma_index, @container_type,
14770 @identifier_count_stack, @index_before_arrow,
14771 @interrupted_list, @item_count_stack,
14772 @last_comma_index, @last_dot_index,
14773 @last_nonblank_type, @old_breakpoint_count_stack,
14774 @opening_structure_index_stack, @rfor_semicolon_list,
14775 @has_old_logical_breakpoints, @rand_or_list,
14779 # routine to define essential variables when we go 'up' to
14781 sub check_for_new_minimum_depth {
14783 if ( $depth < $minimum_depth ) {
14785 $minimum_depth = $depth;
14787 # these arrays need not retain values between calls
14788 $breakpoint_stack[$depth] = $starting_breakpoint_count;
14789 $container_type[$depth] = "";
14790 $identifier_count_stack[$depth] = 0;
14791 $index_before_arrow[$depth] = -1;
14792 $interrupted_list[$depth] = 1;
14793 $item_count_stack[$depth] = 0;
14794 $last_nonblank_type[$depth] = "";
14795 $opening_structure_index_stack[$depth] = -1;
14797 $breakpoint_undo_stack[$depth] = undef;
14798 $comma_index[$depth] = undef;
14799 $last_comma_index[$depth] = undef;
14800 $last_dot_index[$depth] = undef;
14801 $old_breakpoint_count_stack[$depth] = undef;
14802 $has_old_logical_breakpoints[$depth] = 0;
14803 $rand_or_list[$depth] = [];
14804 $rfor_semicolon_list[$depth] = [];
14805 $i_equals[$depth] = -1;
14807 # these arrays must retain values between calls
14808 if ( !defined( $has_broken_sublist[$depth] ) ) {
14809 $dont_align[$depth] = 0;
14810 $has_broken_sublist[$depth] = 0;
14811 $want_comma_break[$depth] = 0;
14816 # routine to decide which commas to break at within a container;
14818 # $bp_count = number of comma breakpoints set
14819 # $do_not_break_apart = a flag indicating if container need not
14821 sub set_comma_breakpoints {
14825 my $do_not_break_apart = 0;
14828 if ( $item_count_stack[$dd] ) {
14830 # handle commas not in containers...
14831 if ( $dont_align[$dd] ) {
14832 do_uncontained_comma_breaks($dd);
14835 # handle commas within containers...
14837 my $fbc = $forced_breakpoint_count;
14839 # always open comma lists not preceded by keywords,
14840 # barewords, identifiers (that is, anything that doesn't
14841 # look like a function call)
14842 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
14844 set_comma_breakpoints_do(
14846 $opening_structure_index_stack[$dd],
14848 $item_count_stack[$dd],
14849 $identifier_count_stack[$dd],
14851 $next_nonblank_type,
14852 $container_type[$dd],
14853 $interrupted_list[$dd],
14854 \$do_not_break_apart,
14857 $bp_count = $forced_breakpoint_count - $fbc;
14858 $do_not_break_apart = 0 if $must_break_open;
14861 return ( $bp_count, $do_not_break_apart );
14864 sub do_uncontained_comma_breaks {
14866 # Handle commas not in containers...
14867 # This is a catch-all routine for commas that we
14868 # don't know what to do with because the don't fall
14869 # within containers. We will bias the bond strength
14870 # to break at commas which ended lines in the input
14871 # file. This usually works better than just trying
14872 # to put as many items on a line as possible. A
14873 # downside is that if the input file is garbage it
14874 # won't work very well. However, the user can always
14875 # prevent following the old breakpoints with the
14879 my $old_comma_break_count = 0;
14880 foreach my $ii ( @{ $comma_index[$dd] } ) {
14881 if ( $old_breakpoint_to_go[$ii] ) {
14882 $old_comma_break_count++;
14883 $bond_strength_to_go[$ii] = $bias;
14885 # reduce bias magnitude to force breaks in order
14890 # Also put a break before the first comma if
14891 # (1) there was a break there in the input, and
14892 # (2) there was exactly one old break before the first comma break
14893 # (3) OLD: there are multiple old comma breaks
14894 # (3) NEW: there are one or more old comma breaks (see return example)
14896 # For example, we will follow the user and break after
14897 # 'print' in this snippet:
14899 # "conformability (Not the same dimension)\n",
14900 # "\t", $have, " is ", text_unit($hu), "\n",
14901 # "\t", $want, " is ", text_unit($wu), "\n",
14904 # Another example, just one comma, where we will break after
14907 # $x * cos($a) - $y * sin($a),
14908 # $x * sin($a) + $y * cos($a);
14910 # Breaking a print statement:
14912 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
14913 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
14915 # But we will not force a break after the opening paren here
14916 # (causes a blinker):
14917 # $heap->{stream}->set_output_filter(
14918 # poe::filter::reference->new('myotherfreezer') ),
14921 my $i_first_comma = $comma_index[$dd]->[0];
14922 if ( $old_breakpoint_to_go[$i_first_comma] ) {
14923 my $level_comma = $levels_to_go[$i_first_comma];
14926 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
14927 if ( $old_breakpoint_to_go[$ii] ) {
14929 last if ( $obp_count > 1 );
14931 if ( $levels_to_go[$ii] == $level_comma );
14935 # Changed rule from multiple old commas to just one here:
14936 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
14938 # Do not to break before an opening token because
14939 # it can lead to "blinkers".
14940 my $ibreakm = $ibreak;
14941 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
14942 if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
14944 set_forced_breakpoint($ibreak);
14950 my %is_logical_container;
14953 @_ = qw# if elsif unless while and or err not && | || ? : ! #;
14954 @is_logical_container{@_} = (1) x scalar(@_);
14957 sub set_for_semicolon_breakpoints {
14959 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
14960 set_forced_breakpoint($_);
14964 sub set_logical_breakpoints {
14967 $item_count_stack[$dd] == 0
14968 && $is_logical_container{ $container_type[$dd] }
14970 || $has_old_logical_breakpoints[$dd]
14974 # Look for breaks in this order:
14977 foreach my $i ( 0 .. 3 ) {
14978 if ( $rand_or_list[$dd][$i] ) {
14979 foreach ( @{ $rand_or_list[$dd][$i] } ) {
14980 set_forced_breakpoint($_);
14983 # break at any 'if' and 'unless' too
14984 foreach ( @{ $rand_or_list[$dd][4] } ) {
14985 set_forced_breakpoint($_);
14987 $rand_or_list[$dd] = [];
14994 sub is_unbreakable_container {
14996 # never break a container of one of these types
14997 # because bad things can happen (map1.t)
14999 $is_sort_map_grep{ $container_type[$dd] };
15004 # This routine is responsible for setting line breaks for all lists,
15005 # so that hierarchical structure can be displayed and so that list
15006 # items can be vertically aligned. The output of this routine is
15007 # stored in the array @forced_breakpoint_to_go, which is used to set
15008 # final breakpoints.
15010 $starting_depth = $nesting_depth_to_go[0];
15013 $current_depth = $starting_depth;
15015 $last_colon_sequence_number = -1;
15016 $last_nonblank_token = ';';
15017 $last_nonblank_type = ';';
15018 $last_nonblank_block_type = ' ';
15019 $last_old_breakpoint_count = 0;
15020 $minimum_depth = $current_depth + 1; # forces update in check below
15021 $old_breakpoint_count = 0;
15022 $starting_breakpoint_count = $forced_breakpoint_count;
15025 $type_sequence = '';
15027 my $total_depth_variation = 0;
15028 my $i_old_assignment_break;
15029 my $depth_last = $starting_depth;
15031 check_for_new_minimum_depth($current_depth);
15033 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
15034 my $want_previous_breakpoint = -1;
15036 my $saw_good_breakpoint;
15037 my $i_line_end = -1;
15038 my $i_line_start = -1;
15040 # loop over all tokens in this batch
15041 while ( ++$i <= $max_index_to_go ) {
15042 if ( $type ne 'b' ) {
15043 $i_last_nonblank_token = $i - 1;
15044 $last_nonblank_type = $type;
15045 $last_nonblank_token = $token;
15046 $last_nonblank_block_type = $block_type;
15047 } ## end if ( $type ne 'b' )
15048 $type = $types_to_go[$i];
15049 $block_type = $block_type_to_go[$i];
15050 $token = $tokens_to_go[$i];
15051 $type_sequence = $type_sequence_to_go[$i];
15052 my $next_type = $types_to_go[ $i + 1 ];
15053 my $next_token = $tokens_to_go[ $i + 1 ];
15054 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
15055 $next_nonblank_type = $types_to_go[$i_next_nonblank];
15056 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15057 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
15059 # set break if flag was set
15060 if ( $want_previous_breakpoint >= 0 ) {
15061 set_forced_breakpoint($want_previous_breakpoint);
15062 $want_previous_breakpoint = -1;
15065 $last_old_breakpoint_count = $old_breakpoint_count;
15066 if ( $old_breakpoint_to_go[$i] ) {
15068 $i_line_start = $i_next_nonblank;
15070 $old_breakpoint_count++;
15072 # Break before certain keywords if user broke there and
15073 # this is a 'safe' break point. The idea is to retain
15074 # any preferred breaks for sequential list operations,
15075 # like a schwartzian transform.
15076 if ($rOpts_break_at_old_keyword_breakpoints) {
15078 $next_nonblank_type eq 'k'
15079 && $is_keyword_returning_list{$next_nonblank_token}
15080 && ( $type =~ /^[=\)\]\}Riw]$/
15082 && $is_keyword_returning_list{$token} )
15086 # we actually have to set this break next time through
15087 # the loop because if we are at a closing token (such
15088 # as '}') which forms a one-line block, this break might
15090 $want_previous_breakpoint = $i;
15091 } ## end if ( $next_nonblank_type...)
15092 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
15094 # Break before attributes if user broke there
15095 if ($rOpts_break_at_old_attribute_breakpoints) {
15096 if ( $next_nonblank_type eq 'A' ) {
15097 $want_previous_breakpoint = $i;
15101 # remember an = break as possible good break point
15102 if ( $is_assignment{$type} ) {
15103 $i_old_assignment_break = $i;
15105 elsif ( $is_assignment{$next_nonblank_type} ) {
15106 $i_old_assignment_break = $i_next_nonblank;
15108 } ## end if ( $old_breakpoint_to_go...)
15109 next if ( $type eq 'b' );
15110 $depth = $nesting_depth_to_go[ $i + 1 ];
15112 $total_depth_variation += abs( $depth - $depth_last );
15113 $depth_last = $depth;
15115 # safety check - be sure we always break after a comment
15116 # Shouldn't happen .. an error here probably means that the
15117 # nobreak flag did not get turned off correctly during
15119 if ( $type eq '#' ) {
15120 if ( $i != $max_index_to_go ) {
15122 "Non-fatal program bug: backup logic needed to break after a comment\n"
15124 report_definite_bug();
15125 $nobreak_to_go[$i] = 0;
15126 set_forced_breakpoint($i);
15127 } ## end if ( $i != $max_index_to_go)
15128 } ## end if ( $type eq '#' )
15130 # Force breakpoints at certain tokens in long lines.
15131 # Note that such breakpoints will be undone later if these tokens
15132 # are fully contained within parens on a line.
15135 # break before a keyword within a line
15139 # if one of these keywords:
15140 && $token =~ /^(if|unless|while|until|for)$/
15142 # but do not break at something like '1 while'
15143 && ( $last_nonblank_type ne 'n' || $i > 2 )
15145 # and let keywords follow a closing 'do' brace
15146 && $last_nonblank_block_type ne 'do'
15151 # or container is broken (by side-comment, etc)
15152 || ( $next_nonblank_token eq '('
15153 && $mate_index_to_go[$i_next_nonblank] < $i )
15157 set_forced_breakpoint( $i - 1 );
15158 } ## end if ( $type eq 'k' && $i...)
15160 # remember locations of '||' and '&&' for possible breaks if we
15161 # decide this is a long logical expression.
15162 if ( $type eq '||' ) {
15163 push @{ $rand_or_list[$depth][2] }, $i;
15164 ++$has_old_logical_breakpoints[$depth]
15165 if ( ( $i == $i_line_start || $i == $i_line_end )
15166 && $rOpts_break_at_old_logical_breakpoints );
15167 } ## end if ( $type eq '||' )
15168 elsif ( $type eq '&&' ) {
15169 push @{ $rand_or_list[$depth][3] }, $i;
15170 ++$has_old_logical_breakpoints[$depth]
15171 if ( ( $i == $i_line_start || $i == $i_line_end )
15172 && $rOpts_break_at_old_logical_breakpoints );
15173 } ## end elsif ( $type eq '&&' )
15174 elsif ( $type eq 'f' ) {
15175 push @{ $rfor_semicolon_list[$depth] }, $i;
15177 elsif ( $type eq 'k' ) {
15178 if ( $token eq 'and' ) {
15179 push @{ $rand_or_list[$depth][1] }, $i;
15180 ++$has_old_logical_breakpoints[$depth]
15181 if ( ( $i == $i_line_start || $i == $i_line_end )
15182 && $rOpts_break_at_old_logical_breakpoints );
15183 } ## end if ( $token eq 'and' )
15185 # break immediately at 'or's which are probably not in a logical
15186 # block -- but we will break in logical breaks below so that
15187 # they do not add to the forced_breakpoint_count
15188 elsif ( $token eq 'or' ) {
15189 push @{ $rand_or_list[$depth][0] }, $i;
15190 ++$has_old_logical_breakpoints[$depth]
15191 if ( ( $i == $i_line_start || $i == $i_line_end )
15192 && $rOpts_break_at_old_logical_breakpoints );
15193 if ( $is_logical_container{ $container_type[$depth] } ) {
15196 if ($is_long_line) { set_forced_breakpoint($i) }
15197 elsif ( ( $i == $i_line_start || $i == $i_line_end )
15198 && $rOpts_break_at_old_logical_breakpoints )
15200 $saw_good_breakpoint = 1;
15202 } ## end else [ if ( $is_logical_container...)]
15203 } ## end elsif ( $token eq 'or' )
15204 elsif ( $token eq 'if' || $token eq 'unless' ) {
15205 push @{ $rand_or_list[$depth][4] }, $i;
15206 if ( ( $i == $i_line_start || $i == $i_line_end )
15207 && $rOpts_break_at_old_logical_breakpoints )
15209 set_forced_breakpoint($i);
15211 } ## end elsif ( $token eq 'if' ||...)
15212 } ## end elsif ( $type eq 'k' )
15213 elsif ( $is_assignment{$type} ) {
15214 $i_equals[$depth] = $i;
15217 if ($type_sequence) {
15219 # handle any postponed closing breakpoints
15220 if ( $token =~ /^[\)\]\}\:]$/ ) {
15221 if ( $type eq ':' ) {
15222 $last_colon_sequence_number = $type_sequence;
15224 # retain break at a ':' line break
15225 if ( ( $i == $i_line_start || $i == $i_line_end )
15226 && $rOpts_break_at_old_ternary_breakpoints )
15229 set_forced_breakpoint($i);
15231 # break at previous '='
15232 if ( $i_equals[$depth] > 0 ) {
15233 set_forced_breakpoint( $i_equals[$depth] );
15234 $i_equals[$depth] = -1;
15236 } ## end if ( ( $i == $i_line_start...))
15237 } ## end if ( $type eq ':' )
15238 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
15239 my $inc = ( $type eq ':' ) ? 0 : 1;
15240 set_forced_breakpoint( $i - $inc );
15241 delete $postponed_breakpoint{$type_sequence};
15243 } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
15245 # set breaks at ?/: if they will get separated (and are
15246 # not a ?/: chain), or if the '?' is at the end of the
15248 elsif ( $token eq '?' ) {
15249 my $i_colon = $mate_index_to_go[$i];
15251 $i_colon <= 0 # the ':' is not in this batch
15252 || $i == 0 # this '?' is the first token of the line
15254 $max_index_to_go # or this '?' is the last token
15258 # don't break at a '?' if preceded by ':' on
15259 # this line of previous ?/: pair on this line.
15260 # This is an attempt to preserve a chain of ?/:
15261 # expressions (elsif2.t). And don't break if
15262 # this has a side comment.
15263 set_forced_breakpoint($i)
15265 $type_sequence == (
15266 $last_colon_sequence_number +
15267 TYPE_SEQUENCE_INCREMENT
15269 || $tokens_to_go[$max_index_to_go] eq '#'
15271 set_closing_breakpoint($i);
15272 } ## end if ( $i_colon <= 0 ||...)
15273 } ## end elsif ( $token eq '?' )
15274 } ## end if ($type_sequence)
15276 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
15278 #------------------------------------------------------------
15279 # Handle Increasing Depth..
15281 # prepare for a new list when depth increases
15282 # token $i is a '(','{', or '['
15283 #------------------------------------------------------------
15284 if ( $depth > $current_depth ) {
15286 $breakpoint_stack[$depth] = $forced_breakpoint_count;
15287 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
15288 $has_broken_sublist[$depth] = 0;
15289 $identifier_count_stack[$depth] = 0;
15290 $index_before_arrow[$depth] = -1;
15291 $interrupted_list[$depth] = 0;
15292 $item_count_stack[$depth] = 0;
15293 $last_comma_index[$depth] = undef;
15294 $last_dot_index[$depth] = undef;
15295 $last_nonblank_type[$depth] = $last_nonblank_type;
15296 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
15297 $opening_structure_index_stack[$depth] = $i;
15298 $rand_or_list[$depth] = [];
15299 $rfor_semicolon_list[$depth] = [];
15300 $i_equals[$depth] = -1;
15301 $want_comma_break[$depth] = 0;
15302 $container_type[$depth] =
15303 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
15304 ? $last_nonblank_token
15306 $has_old_logical_breakpoints[$depth] = 0;
15308 # if line ends here then signal closing token to break
15309 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
15311 set_closing_breakpoint($i);
15314 # Not all lists of values should be vertically aligned..
15315 $dont_align[$depth] =
15317 # code BLOCKS are handled at a higher level
15318 ( $block_type ne "" )
15320 # certain paren lists
15321 || ( $type eq '(' ) && (
15323 # it does not usually look good to align a list of
15324 # identifiers in a parameter list, as in:
15325 # my($var1, $var2, ...)
15326 # (This test should probably be refined, for now I'm just
15327 # testing for any keyword)
15328 ( $last_nonblank_type eq 'k' )
15330 # a trailing '(' usually indicates a non-list
15331 || ( $next_nonblank_type eq '(' )
15334 # patch to outdent opening brace of long if/for/..
15335 # statements (like this one). See similar coding in
15336 # set_continuation breaks. We have also catch it here for
15337 # short line fragments which otherwise will not go through
15338 # set_continuation_breaks.
15342 # if we have the ')' but not its '(' in this batch..
15343 && ( $last_nonblank_token eq ')' )
15344 && $mate_index_to_go[$i_last_nonblank_token] < 0
15346 # and user wants brace to left
15347 && !$rOpts->{'opening-brace-always-on-right'}
15349 && ( $type eq '{' ) # should be true
15350 && ( $token eq '{' ) # should be true
15353 set_forced_breakpoint( $i - 1 );
15354 } ## end if ( $block_type && ( ...))
15355 } ## end if ( $depth > $current_depth)
15357 #------------------------------------------------------------
15358 # Handle Decreasing Depth..
15360 # finish off any old list when depth decreases
15361 # token $i is a ')','}', or ']'
15362 #------------------------------------------------------------
15363 elsif ( $depth < $current_depth ) {
15365 check_for_new_minimum_depth($depth);
15367 # force all outer logical containers to break after we see on
15369 $has_old_logical_breakpoints[$depth] ||=
15370 $has_old_logical_breakpoints[$current_depth];
15372 # Patch to break between ') {' if the paren list is broken.
15373 # There is similar logic in set_continuation_breaks for
15374 # non-broken lists.
15376 && $next_nonblank_block_type
15377 && $interrupted_list[$current_depth]
15378 && $next_nonblank_type eq '{'
15379 && !$rOpts->{'opening-brace-always-on-right'} )
15381 set_forced_breakpoint($i);
15382 } ## end if ( $token eq ')' && ...
15384 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
15386 # set breaks at commas if necessary
15387 my ( $bp_count, $do_not_break_apart ) =
15388 set_comma_breakpoints($current_depth);
15390 my $i_opening = $opening_structure_index_stack[$current_depth];
15391 my $saw_opening_structure = ( $i_opening >= 0 );
15393 # this term is long if we had to break at interior commas..
15394 my $is_long_term = $bp_count > 0;
15396 # If this is a short container with one or more comma arrows,
15397 # then we will mark it as a long term to open it if requested.
15398 # $rOpts_comma_arrow_breakpoints =
15399 # 0 - open only if comma precedes closing brace
15400 # 1 - stable: except for one line blocks
15401 # 2 - try to form 1 line blocks
15403 # 4 - always open up if vt=0
15404 # 5 - stable: even for one line blocks if vt=0
15407 ##BUBBA: TYPO && $tokens_to_go[$i_opening] =~ /^[\(\{\]L]$/
15408 && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
15409 && $index_before_arrow[ $depth + 1 ] > 0
15410 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
15413 $is_long_term = $rOpts_comma_arrow_breakpoints == 4
15414 || ( $rOpts_comma_arrow_breakpoints == 0
15415 && $last_nonblank_token eq ',' )
15416 || ( $rOpts_comma_arrow_breakpoints == 5
15417 && $old_breakpoint_to_go[$i_opening] );
15418 } ## end if ( !$is_long_term &&...)
15420 # mark term as long if the length between opening and closing
15421 # parens exceeds allowed line length
15422 if ( !$is_long_term && $saw_opening_structure ) {
15423 my $i_opening_minus = find_token_starting_list($i_opening);
15425 # Note: we have to allow for one extra space after a
15426 # closing token so that we do not strand a comma or
15427 # semicolon, hence the '>=' here (oneline.t)
15429 excess_line_length( $i_opening_minus, $i ) >= 0;
15430 } ## end if ( !$is_long_term &&...)
15432 # We've set breaks after all comma-arrows. Now we have to
15433 # undo them if this can be a one-line block
15434 # (the only breakpoints set will be due to comma-arrows)
15437 # user doesn't require breaking after all comma-arrows
15438 ( $rOpts_comma_arrow_breakpoints != 0 )
15439 && ( $rOpts_comma_arrow_breakpoints != 4 )
15441 # and if the opening structure is in this batch
15442 && $saw_opening_structure
15444 # and either on the same old line
15446 $old_breakpoint_count_stack[$current_depth] ==
15447 $last_old_breakpoint_count
15449 # or user wants to form long blocks with arrows
15450 || $rOpts_comma_arrow_breakpoints == 2
15453 # and we made some breakpoints between the opening and closing
15454 && ( $breakpoint_undo_stack[$current_depth] <
15455 $forced_breakpoint_undo_count )
15457 # and this block is short enough to fit on one line
15458 # Note: use < because need 1 more space for possible comma
15463 undo_forced_breakpoint_stack(
15464 $breakpoint_undo_stack[$current_depth] );
15465 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
15467 # now see if we have any comma breakpoints left
15468 my $has_comma_breakpoints =
15469 ( $breakpoint_stack[$current_depth] !=
15470 $forced_breakpoint_count );
15472 # update broken-sublist flag of the outer container
15473 $has_broken_sublist[$depth] =
15474 $has_broken_sublist[$depth]
15475 || $has_broken_sublist[$current_depth]
15477 || $has_comma_breakpoints;
15479 # Having come to the closing ')', '}', or ']', now we have to decide if we
15480 # should 'open up' the structure by placing breaks at the opening and
15481 # closing containers. This is a tricky decision. Here are some of the
15482 # basic considerations:
15484 # -If this is a BLOCK container, then any breakpoints will have already
15485 # been set (and according to user preferences), so we need do nothing here.
15487 # -If we have a comma-separated list for which we can align the list items,
15488 # then we need to do so because otherwise the vertical aligner cannot
15489 # currently do the alignment.
15491 # -If this container does itself contain a container which has been broken
15492 # open, then it should be broken open to properly show the structure.
15494 # -If there is nothing to align, and no other reason to break apart,
15495 # then do not do it.
15497 # We will not break open the parens of a long but 'simple' logical expression.
15500 # This is an example of a simple logical expression and its formatting:
15502 # if ( $bigwasteofspace1 && $bigwasteofspace2
15503 # || $bigwasteofspace3 && $bigwasteofspace4 )
15505 # Most people would prefer this than the 'spacey' version:
15508 # $bigwasteofspace1 && $bigwasteofspace2
15509 # || $bigwasteofspace3 && $bigwasteofspace4
15512 # To illustrate the rules for breaking logical expressions, consider:
15516 # and ( exists $ids_excl_uc{$id_uc}
15517 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
15519 # This is on the verge of being difficult to read. The current default is to
15520 # open it up like this:
15525 # and ( exists $ids_excl_uc{$id_uc}
15526 # or grep $id_uc =~ /$_/, @ids_excl_uc )
15529 # This is a compromise which tries to avoid being too dense and to spacey.
15530 # A more spaced version would be:
15536 # exists $ids_excl_uc{$id_uc}
15537 # or grep $id_uc =~ /$_/, @ids_excl_uc
15541 # Some people might prefer the spacey version -- an option could be added. The
15542 # innermost expression contains a long block '( exists $ids_... ')'.
15544 # Here is how the logic goes: We will force a break at the 'or' that the
15545 # innermost expression contains, but we will not break apart its opening and
15546 # closing containers because (1) it contains no multi-line sub-containers itself,
15547 # and (2) there is no alignment to be gained by breaking it open like this
15550 # exists $ids_excl_uc{$id_uc}
15551 # or grep $id_uc =~ /$_/, @ids_excl_uc
15554 # (although this looks perfectly ok and might be good for long expressions). The
15555 # outer 'if' container, though, contains a broken sub-container, so it will be
15556 # broken open to avoid too much density. Also, since it contains no 'or's, there
15557 # will be a forced break at its 'and'.
15559 # set some flags telling something about this container..
15560 my $is_simple_logical_expression = 0;
15561 if ( $item_count_stack[$current_depth] == 0
15562 && $saw_opening_structure
15563 && $tokens_to_go[$i_opening] eq '('
15564 && $is_logical_container{ $container_type[$current_depth] }
15568 # This seems to be a simple logical expression with
15569 # no existing breakpoints. Set a flag to prevent
15571 if ( !$has_comma_breakpoints ) {
15572 $is_simple_logical_expression = 1;
15575 # This seems to be a simple logical expression with
15576 # breakpoints (broken sublists, for example). Break
15577 # at all 'or's and '||'s.
15579 set_logical_breakpoints($current_depth);
15581 } ## end if ( $item_count_stack...)
15584 && @{ $rfor_semicolon_list[$current_depth] } )
15586 set_for_semicolon_breakpoints($current_depth);
15588 # open up a long 'for' or 'foreach' container to allow
15589 # leading term alignment unless -lp is used.
15590 $has_comma_breakpoints = 1
15591 unless $rOpts_line_up_parentheses;
15592 } ## end if ( $is_long_term && ...)
15596 # breaks for code BLOCKS are handled at a higher level
15599 # we do not need to break at the top level of an 'if'
15601 && !$is_simple_logical_expression
15603 ## modification to keep ': (' containers vertically tight;
15604 ## but probably better to let user set -vt=1 to avoid
15605 ## inconsistency with other paren types
15606 ## && ($container_type[$current_depth] ne ':')
15608 # otherwise, we require one of these reasons for breaking:
15611 # - this term has forced line breaks
15612 $has_comma_breakpoints
15614 # - the opening container is separated from this batch
15615 # for some reason (comment, blank line, code block)
15616 # - this is a non-paren container spanning multiple lines
15617 || !$saw_opening_structure
15619 # - this is a long block contained in another breakable
15622 && $container_environment_to_go[$i_opening] ne
15628 # For -lp option, we must put a breakpoint before
15629 # the token which has been identified as starting
15630 # this indentation level. This is necessary for
15631 # proper alignment.
15632 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
15634 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
15635 if ( $i_opening + 1 < $max_index_to_go
15636 && $types_to_go[ $i_opening + 1 ] eq 'b' )
15638 $item = $leading_spaces_to_go[ $i_opening + 2 ];
15640 if ( defined($item) ) {
15641 my $i_start_2 = $item->get_STARTING_INDEX();
15643 defined($i_start_2)
15645 # we are breaking after an opening brace, paren,
15646 # so don't break before it too
15647 && $i_start_2 ne $i_opening
15651 # Only break for breakpoints at the same
15652 # indentation level as the opening paren
15653 my $test1 = $nesting_depth_to_go[$i_opening];
15654 my $test2 = $nesting_depth_to_go[$i_start_2];
15655 if ( $test2 == $test1 ) {
15656 set_forced_breakpoint( $i_start_2 - 1 );
15658 } ## end if ( defined($i_start_2...))
15659 } ## end if ( defined($item) )
15660 } ## end if ( $rOpts_line_up_parentheses...)
15662 # break after opening structure.
15663 # note: break before closing structure will be automatic
15664 if ( $minimum_depth <= $current_depth ) {
15666 set_forced_breakpoint($i_opening)
15667 unless ( $do_not_break_apart
15668 || is_unbreakable_container($current_depth) );
15670 # break at ',' of lower depth level before opening token
15671 if ( $last_comma_index[$depth] ) {
15672 set_forced_breakpoint( $last_comma_index[$depth] );
15675 # break at '.' of lower depth level before opening token
15676 if ( $last_dot_index[$depth] ) {
15677 set_forced_breakpoint( $last_dot_index[$depth] );
15680 # break before opening structure if preceded by another
15681 # closing structure and a comma. This is normally
15682 # done by the previous closing brace, but not
15683 # if it was a one-line block.
15684 if ( $i_opening > 2 ) {
15686 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
15690 if ( $types_to_go[$i_prev] eq ','
15691 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
15693 set_forced_breakpoint($i_prev);
15696 # also break before something like ':(' or '?('
15699 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
15701 my $token_prev = $tokens_to_go[$i_prev];
15702 if ( $want_break_before{$token_prev} ) {
15703 set_forced_breakpoint($i_prev);
15705 } ## end elsif ( $types_to_go[$i_prev...])
15706 } ## end if ( $i_opening > 2 )
15707 } ## end if ( $minimum_depth <=...)
15709 # break after comma following closing structure
15710 if ( $next_type eq ',' ) {
15711 set_forced_breakpoint( $i + 1 );
15714 # break before an '=' following closing structure
15716 $is_assignment{$next_nonblank_type}
15717 && ( $breakpoint_stack[$current_depth] !=
15718 $forced_breakpoint_count )
15721 set_forced_breakpoint($i);
15722 } ## end if ( $is_assignment{$next_nonblank_type...})
15724 # break at any comma before the opening structure Added
15725 # for -lp, but seems to be good in general. It isn't
15726 # obvious how far back to look; the '5' below seems to
15727 # work well and will catch the comma in something like
15728 # push @list, myfunc( $param, $param, ..
15730 my $icomma = $last_comma_index[$depth];
15731 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
15732 unless ( $forced_breakpoint_to_go[$icomma] ) {
15733 set_forced_breakpoint($icomma);
15736 } # end logic to open up a container
15738 # Break open a logical container open if it was already open
15739 elsif ($is_simple_logical_expression
15740 && $has_old_logical_breakpoints[$current_depth] )
15742 set_logical_breakpoints($current_depth);
15745 # Handle long container which does not get opened up
15746 elsif ($is_long_term) {
15748 # must set fake breakpoint to alert outer containers that
15750 set_fake_breakpoint();
15751 } ## end elsif ($is_long_term)
15753 } ## end elsif ( $depth < $current_depth)
15755 #------------------------------------------------------------
15756 # Handle this token
15757 #------------------------------------------------------------
15759 $current_depth = $depth;
15761 # handle comma-arrow
15762 if ( $type eq '=>' ) {
15763 next if ( $last_nonblank_type eq '=>' );
15764 next if $rOpts_break_at_old_comma_breakpoints;
15765 next if $rOpts_comma_arrow_breakpoints == 3;
15766 $want_comma_break[$depth] = 1;
15767 $index_before_arrow[$depth] = $i_last_nonblank_token;
15769 } ## end if ( $type eq '=>' )
15771 elsif ( $type eq '.' ) {
15772 $last_dot_index[$depth] = $i;
15775 # Turn off alignment if we are sure that this is not a list
15776 # environment. To be safe, we will do this if we see certain
15777 # non-list tokens, such as ';', and also the environment is
15778 # not a list. Note that '=' could be in any of the = operators
15779 # (lextest.t). We can't just use the reported environment
15780 # because it can be incorrect in some cases.
15781 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
15782 && $container_environment_to_go[$i] ne 'LIST' )
15784 $dont_align[$depth] = 1;
15785 $want_comma_break[$depth] = 0;
15786 $index_before_arrow[$depth] = -1;
15787 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
15789 # now just handle any commas
15790 next unless ( $type eq ',' );
15792 $last_dot_index[$depth] = undef;
15793 $last_comma_index[$depth] = $i;
15795 # break here if this comma follows a '=>'
15796 # but not if there is a side comment after the comma
15797 if ( $want_comma_break[$depth] ) {
15799 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
15800 if ($rOpts_comma_arrow_breakpoints) {
15801 $want_comma_break[$depth] = 0;
15802 ##$index_before_arrow[$depth] = -1;
15807 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
15809 # break before the previous token if it looks safe
15810 # Example of something that we will not try to break before:
15811 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
15812 # Also we don't want to break at a binary operator (like +):
15816 # $y - $R, -fill => 'black',
15818 my $ibreak = $index_before_arrow[$depth] - 1;
15820 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
15822 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
15823 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
15824 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
15826 # don't break pointer calls, such as the following:
15827 # File::Spec->curdir => 1,
15828 # (This is tokenized as adjacent 'w' tokens)
15829 if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
15830 set_forced_breakpoint($ibreak);
15832 } ## end if ( $types_to_go[$ibreak...])
15833 } ## end if ( $ibreak > 0 && $tokens_to_go...)
15835 $want_comma_break[$depth] = 0;
15836 $index_before_arrow[$depth] = -1;
15838 # handle list which mixes '=>'s and ','s:
15839 # treat any list items so far as an interrupted list
15840 $interrupted_list[$depth] = 1;
15842 } ## end if ( $want_comma_break...)
15844 # break after all commas above starting depth
15845 if ( $depth < $starting_depth && !$dont_align[$depth] ) {
15846 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
15850 # add this comma to the list..
15851 my $item_count = $item_count_stack[$depth];
15852 if ( $item_count == 0 ) {
15854 # but do not form a list with no opening structure
15857 # open INFILE_COPY, ">$input_file_copy"
15858 # or die ("very long message");
15860 if ( ( $opening_structure_index_stack[$depth] < 0 )
15861 && $container_environment_to_go[$i] eq 'BLOCK' )
15863 $dont_align[$depth] = 1;
15865 } ## end if ( $item_count == 0 )
15867 $comma_index[$depth][$item_count] = $i;
15868 ++$item_count_stack[$depth];
15869 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
15870 $identifier_count_stack[$depth]++;
15872 } ## end while ( ++$i <= $max_index_to_go)
15874 #-------------------------------------------
15875 # end of loop over all tokens in this batch
15876 #-------------------------------------------
15878 # set breaks for any unfinished lists ..
15879 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
15881 $interrupted_list[$dd] = 1;
15882 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
15883 set_comma_breakpoints($dd);
15884 set_logical_breakpoints($dd)
15885 if ( $has_old_logical_breakpoints[$dd] );
15886 set_for_semicolon_breakpoints($dd);
15888 # break open container...
15889 my $i_opening = $opening_structure_index_stack[$dd];
15890 set_forced_breakpoint($i_opening)
15892 is_unbreakable_container($dd)
15894 # Avoid a break which would place an isolated ' or "
15897 && $i_opening >= $max_index_to_go - 2
15898 && $token =~ /^['"]$/ )
15900 } ## end for ( my $dd = $current_depth...)
15902 # Return a flag indicating if the input file had some good breakpoints.
15903 # This flag will be used to force a break in a line shorter than the
15904 # allowed line length.
15905 if ( $has_old_logical_breakpoints[$current_depth] ) {
15906 $saw_good_breakpoint = 1;
15909 # A complex line with one break at an = has a good breakpoint.
15910 # This is not complex ($total_depth_variation=0):
15914 # This is complex ($total_depth_variation=6):
15916 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
15917 elsif ($i_old_assignment_break
15918 && $total_depth_variation > 4
15919 && $old_breakpoint_count == 1 )
15921 $saw_good_breakpoint = 1;
15922 } ## end elsif ( $i_old_assignment_break...)
15924 return $saw_good_breakpoint;
15925 } ## end sub scan_list
15928 sub find_token_starting_list {
15930 # When testing to see if a block will fit on one line, some
15931 # previous token(s) may also need to be on the line; particularly
15932 # if this is a sub call. So we will look back at least one
15933 # token. NOTE: This isn't perfect, but not critical, because
15934 # if we mis-identify a block, it will be wrapped and therefore
15935 # fixed the next time it is formatted.
15936 my $i_opening_paren = shift;
15937 my $i_opening_minus = $i_opening_paren;
15938 my $im1 = $i_opening_paren - 1;
15939 my $im2 = $i_opening_paren - 2;
15940 my $im3 = $i_opening_paren - 3;
15941 my $typem1 = $types_to_go[$im1];
15942 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
15943 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
15944 $i_opening_minus = $i_opening_paren;
15946 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
15947 $i_opening_minus = $im1 if $im1 >= 0;
15949 # walk back to improve length estimate
15950 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
15951 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
15952 $i_opening_minus = $j;
15954 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
15956 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
15957 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
15958 $i_opening_minus = $im2;
15960 return $i_opening_minus;
15963 { # begin set_comma_breakpoints_do
15965 my %is_keyword_with_special_leading_term;
15969 # These keywords have prototypes which allow a special leading item
15970 # followed by a list
15972 qw(formline grep kill map printf sprintf push chmod join pack unshift);
15973 @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
15976 sub set_comma_breakpoints_do {
15978 # Given a list with some commas, set breakpoints at some of the
15979 # commas, if necessary, to make it easy to read. This list is
15982 $depth, $i_opening_paren, $i_closing_paren,
15983 $item_count, $identifier_count, $rcomma_index,
15984 $next_nonblank_type, $list_type, $interrupted,
15985 $rdo_not_break_apart, $must_break_open,
15988 # nothing to do if no commas seen
15989 return if ( $item_count < 1 );
15990 my $i_first_comma = $$rcomma_index[0];
15991 my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
15992 my $i_last_comma = $i_true_last_comma;
15993 if ( $i_last_comma >= $max_index_to_go ) {
15994 $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
15995 return if ( $item_count < 1 );
15998 #---------------------------------------------------------------
15999 # find lengths of all items in the list to calculate page layout
16000 #---------------------------------------------------------------
16001 my $comma_count = $item_count;
16007 my @max_length = ( 0, 0 );
16008 my $first_term_length;
16009 my $i = $i_opening_paren;
16012 for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
16013 $is_odd = 1 - $is_odd;
16014 $i_prev_plus = $i + 1;
16015 $i = $$rcomma_index[$j];
16018 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
16020 ( $types_to_go[$i_prev_plus] eq 'b' )
16023 push @i_term_begin, $i_term_begin;
16024 push @i_term_end, $i_term_end;
16025 push @i_term_comma, $i;
16027 # note: currently adding 2 to all lengths (for comma and space)
16029 2 + token_sequence_length( $i_term_begin, $i_term_end );
16030 push @item_lengths, $length;
16033 $first_term_length = $length;
16037 if ( $length > $max_length[$is_odd] ) {
16038 $max_length[$is_odd] = $length;
16043 # now we have to make a distinction between the comma count and item
16044 # count, because the item count will be one greater than the comma
16045 # count if the last item is not terminated with a comma
16047 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
16048 ? $i_last_comma + 1
16051 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
16052 ? $i_closing_paren - 2
16053 : $i_closing_paren - 1;
16054 my $i_effective_last_comma = $i_last_comma;
16056 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
16058 if ( $last_item_length > 0 ) {
16060 # add 2 to length because other lengths include a comma and a blank
16061 $last_item_length += 2;
16062 push @item_lengths, $last_item_length;
16063 push @i_term_begin, $i_b + 1;
16064 push @i_term_end, $i_e;
16065 push @i_term_comma, undef;
16067 my $i_odd = $item_count % 2;
16069 if ( $last_item_length > $max_length[$i_odd] ) {
16070 $max_length[$i_odd] = $last_item_length;
16074 $i_effective_last_comma = $i_e + 1;
16076 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
16077 $identifier_count++;
16081 #---------------------------------------------------------------
16082 # End of length calculations
16083 #---------------------------------------------------------------
16085 #---------------------------------------------------------------
16086 # Compound List Rule 1:
16087 # Break at (almost) every comma for a list containing a broken
16088 # sublist. This has higher priority than the Interrupted List
16090 #---------------------------------------------------------------
16091 if ( $has_broken_sublist[$depth] ) {
16093 # Break at every comma except for a comma between two
16094 # simple, small terms. This prevents long vertical
16095 # columns of, say, just 0's.
16096 my $small_length = 10; # 2 + actual maximum length wanted
16098 # We'll insert a break in long runs of small terms to
16099 # allow alignment in uniform tables.
16100 my $skipped_count = 0;
16101 my $columns = table_columns_available($i_first_comma);
16102 my $fields = int( $columns / $small_length );
16103 if ( $rOpts_maximum_fields_per_table
16104 && $fields > $rOpts_maximum_fields_per_table )
16106 $fields = $rOpts_maximum_fields_per_table;
16108 my $max_skipped_count = $fields - 1;
16110 my $is_simple_last_term = 0;
16111 my $is_simple_next_term = 0;
16112 foreach my $j ( 0 .. $item_count ) {
16113 $is_simple_last_term = $is_simple_next_term;
16114 $is_simple_next_term = 0;
16115 if ( $j < $item_count
16116 && $i_term_end[$j] == $i_term_begin[$j]
16117 && $item_lengths[$j] <= $small_length )
16119 $is_simple_next_term = 1;
16122 if ( $is_simple_last_term
16123 && $is_simple_next_term
16124 && $skipped_count < $max_skipped_count )
16129 $skipped_count = 0;
16130 my $i = $i_term_comma[ $j - 1 ];
16131 last unless defined $i;
16132 set_forced_breakpoint($i);
16136 # always break at the last comma if this list is
16137 # interrupted; we wouldn't want to leave a terminal '{', for
16139 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
16143 #my ( $a, $b, $c ) = caller();
16144 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
16145 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
16146 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
16148 #---------------------------------------------------------------
16149 # Interrupted List Rule:
16150 # A list is forced to use old breakpoints if it was interrupted
16151 # by side comments or blank lines, or requested by user.
16152 #---------------------------------------------------------------
16153 if ( $rOpts_break_at_old_comma_breakpoints
16155 || $i_opening_paren < 0 )
16157 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
16161 #---------------------------------------------------------------
16162 # Looks like a list of items. We have to look at it and size it up.
16163 #---------------------------------------------------------------
16165 my $opening_token = $tokens_to_go[$i_opening_paren];
16166 my $opening_environment =
16167 $container_environment_to_go[$i_opening_paren];
16169 #-------------------------------------------------------------------
16170 # Return if this will fit on one line
16171 #-------------------------------------------------------------------
16173 my $i_opening_minus = find_token_starting_list($i_opening_paren);
16175 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
16177 #-------------------------------------------------------------------
16178 # Now we know that this block spans multiple lines; we have to set
16179 # at least one breakpoint -- real or fake -- as a signal to break
16180 # open any outer containers.
16181 #-------------------------------------------------------------------
16182 set_fake_breakpoint();
16184 # be sure we do not extend beyond the current list length
16185 if ( $i_effective_last_comma >= $max_index_to_go ) {
16186 $i_effective_last_comma = $max_index_to_go - 1;
16189 # Set a flag indicating if we need to break open to keep -lp
16190 # items aligned. This is necessary if any of the list terms
16191 # exceeds the available space after the '('.
16192 my $need_lp_break_open = $must_break_open;
16193 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
16194 my $columns_if_unbroken =
16195 maximum_line_length($i_opening_minus) -
16196 total_line_length( $i_opening_minus, $i_opening_paren );
16197 $need_lp_break_open =
16198 ( $max_length[0] > $columns_if_unbroken )
16199 || ( $max_length[1] > $columns_if_unbroken )
16200 || ( $first_term_length > $columns_if_unbroken );
16203 # Specify if the list must have an even number of fields or not.
16204 # It is generally safest to assume an even number, because the
16205 # list items might be a hash list. But if we can be sure that
16206 # it is not a hash, then we can allow an odd number for more
16208 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
16210 if ( $identifier_count >= $item_count - 1
16211 || $is_assignment{$next_nonblank_type}
16212 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
16218 # do we have a long first term which should be
16219 # left on a line by itself?
16220 my $use_separate_first_term = (
16221 $odd_or_even == 1 # only if we can use 1 field/line
16222 && $item_count > 3 # need several items
16223 && $first_term_length >
16224 2 * $max_length[0] - 2 # need long first term
16225 && $first_term_length >
16226 2 * $max_length[1] - 2 # need long first term
16229 # or do we know from the type of list that the first term should
16231 if ( !$use_separate_first_term ) {
16232 if ( $is_keyword_with_special_leading_term{$list_type} ) {
16233 $use_separate_first_term = 1;
16235 # should the container be broken open?
16236 if ( $item_count < 3 ) {
16237 if ( $i_first_comma - $i_opening_paren < 4 ) {
16238 $$rdo_not_break_apart = 1;
16241 elsif ($first_term_length < 20
16242 && $i_first_comma - $i_opening_paren < 4 )
16244 my $columns = table_columns_available($i_first_comma);
16245 if ( $first_term_length < $columns ) {
16246 $$rdo_not_break_apart = 1;
16253 if ($use_separate_first_term) {
16255 # ..set a break and update starting values
16256 $use_separate_first_term = 1;
16257 set_forced_breakpoint($i_first_comma);
16258 $i_opening_paren = $i_first_comma;
16259 $i_first_comma = $$rcomma_index[1];
16261 return if $comma_count == 1;
16262 shift @item_lengths;
16263 shift @i_term_begin;
16265 shift @i_term_comma;
16268 # if not, update the metrics to include the first term
16270 if ( $first_term_length > $max_length[0] ) {
16271 $max_length[0] = $first_term_length;
16275 # Field width parameters
16276 my $pair_width = ( $max_length[0] + $max_length[1] );
16278 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
16280 # Number of free columns across the page width for laying out tables
16281 my $columns = table_columns_available($i_first_comma);
16283 # Estimated maximum number of fields which fit this space
16284 # This will be our first guess
16285 my $number_of_fields_max =
16286 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
16288 my $number_of_fields = $number_of_fields_max;
16290 # Find the best-looking number of fields
16291 # and make this our second guess if possible
16292 my ( $number_of_fields_best, $ri_ragged_break_list,
16293 $new_identifier_count )
16294 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
16297 if ( $number_of_fields_best != 0
16298 && $number_of_fields_best < $number_of_fields_max )
16300 $number_of_fields = $number_of_fields_best;
16303 # ----------------------------------------------------------------------
16304 # If we are crowded and the -lp option is being used, try to
16305 # undo some indentation
16306 # ----------------------------------------------------------------------
16308 $rOpts_line_up_parentheses
16310 $number_of_fields == 0
16311 || ( $number_of_fields == 1
16312 && $number_of_fields != $number_of_fields_best )
16316 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
16317 if ( $available_spaces > 0 ) {
16319 my $spaces_wanted = $max_width - $columns; # for 1 field
16321 if ( $number_of_fields_best == 0 ) {
16322 $number_of_fields_best =
16323 get_maximum_fields_wanted( \@item_lengths );
16326 if ( $number_of_fields_best != 1 ) {
16327 my $spaces_wanted_2 =
16328 1 + $pair_width - $columns; # for 2 fields
16329 if ( $available_spaces > $spaces_wanted_2 ) {
16330 $spaces_wanted = $spaces_wanted_2;
16334 if ( $spaces_wanted > 0 ) {
16335 my $deleted_spaces =
16336 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
16339 if ( $deleted_spaces > 0 ) {
16340 $columns = table_columns_available($i_first_comma);
16341 $number_of_fields_max =
16342 maximum_number_of_fields( $columns, $odd_or_even,
16343 $max_width, $pair_width );
16344 $number_of_fields = $number_of_fields_max;
16346 if ( $number_of_fields_best == 1
16347 && $number_of_fields >= 1 )
16349 $number_of_fields = $number_of_fields_best;
16356 # try for one column if two won't work
16357 if ( $number_of_fields <= 0 ) {
16358 $number_of_fields = int( $columns / $max_width );
16361 # The user can place an upper bound on the number of fields,
16362 # which can be useful for doing maintenance on tables
16363 if ( $rOpts_maximum_fields_per_table
16364 && $number_of_fields > $rOpts_maximum_fields_per_table )
16366 $number_of_fields = $rOpts_maximum_fields_per_table;
16369 # How many columns (characters) and lines would this container take
16370 # if no additional whitespace were added?
16371 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
16372 $i_effective_last_comma + 1 );
16373 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
16374 my $packed_lines = 1 + int( $packed_columns / $columns );
16376 # are we an item contained in an outer list?
16377 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
16379 if ( $number_of_fields <= 0 ) {
16381 # #---------------------------------------------------------------
16382 # # We're in trouble. We can't find a single field width that works.
16383 # # There is no simple answer here; we may have a single long list
16385 # #---------------------------------------------------------------
16387 # In many cases, it may be best to not force a break if there is just one
16388 # comma, because the standard continuation break logic will do a better
16391 # In the common case that all but one of the terms can fit
16392 # on a single line, it may look better not to break open the
16393 # containing parens. Consider, for example
16397 # sort { $color_value{$::a} <=> $color_value{$::b}; }
16400 # which will look like this with the container broken:
16404 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
16407 # Here is an example of this rule for a long last term:
16409 # log_message( 0, 256, 128,
16410 # "Number of routes in adj-RIB-in to be considered: $peercount" );
16412 # And here is an example with a long first term:
16415 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
16416 # $r, $pu, $ps, $cu, $cs, $tt
16418 # if $style eq 'all';
16420 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
16421 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
16422 my $long_first_term =
16423 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
16425 # break at every comma ...
16428 # if requested by user or is best looking
16429 $number_of_fields_best == 1
16431 # or if this is a sublist of a larger list
16432 || $in_hierarchical_list
16434 # or if multiple commas and we don't have a long first or last
16436 || ( $comma_count > 1
16437 && !( $long_last_term || $long_first_term ) )
16440 foreach ( 0 .. $comma_count - 1 ) {
16441 set_forced_breakpoint( $$rcomma_index[$_] );
16444 elsif ($long_last_term) {
16446 set_forced_breakpoint($i_last_comma);
16447 $$rdo_not_break_apart = 1 unless $must_break_open;
16449 elsif ($long_first_term) {
16451 set_forced_breakpoint($i_first_comma);
16455 # let breaks be defined by default bond strength logic
16460 # --------------------------------------------------------
16461 # We have a tentative field count that seems to work.
16462 # How many lines will this require?
16463 # --------------------------------------------------------
16464 my $formatted_lines = $item_count / ($number_of_fields);
16465 if ( $formatted_lines != int $formatted_lines ) {
16466 $formatted_lines = 1 + int $formatted_lines;
16469 # So far we've been trying to fill out to the right margin. But
16470 # compact tables are easier to read, so let's see if we can use fewer
16471 # fields without increasing the number of lines.
16472 $number_of_fields =
16473 compactify_table( $item_count, $number_of_fields, $formatted_lines,
16476 # How many spaces across the page will we fill?
16477 my $columns_per_line =
16478 ( int $number_of_fields / 2 ) * $pair_width +
16479 ( $number_of_fields % 2 ) * $max_width;
16481 my $formatted_columns;
16483 if ( $number_of_fields > 1 ) {
16484 $formatted_columns =
16485 ( $pair_width * ( int( $item_count / 2 ) ) +
16486 ( $item_count % 2 ) * $max_width );
16489 $formatted_columns = $max_width * $item_count;
16491 if ( $formatted_columns < $packed_columns ) {
16492 $formatted_columns = $packed_columns;
16495 my $unused_columns = $formatted_columns - $packed_columns;
16497 # set some empirical parameters to help decide if we should try to
16498 # align; high sparsity does not look good, especially with few lines
16499 my $sparsity = ($unused_columns) / ($formatted_columns);
16500 my $max_allowed_sparsity =
16501 ( $item_count < 3 ) ? 0.1
16502 : ( $packed_lines == 1 ) ? 0.15
16503 : ( $packed_lines == 2 ) ? 0.4
16506 # Begin check for shortcut methods, which avoid treating a list
16507 # as a table for relatively small parenthesized lists. These
16508 # are usually easier to read if not formatted as tables.
16510 $packed_lines <= 2 # probably can fit in 2 lines
16511 && $item_count < 9 # doesn't have too many items
16512 && $opening_environment eq 'BLOCK' # not a sub-container
16513 && $opening_token eq '(' # is paren list
16517 # Shortcut method 1: for -lp and just one comma:
16518 # This is a no-brainer, just break at the comma.
16520 $rOpts_line_up_parentheses # -lp
16521 && $item_count == 2 # two items, one comma
16522 && !$must_break_open
16525 my $i_break = $$rcomma_index[0];
16526 set_forced_breakpoint($i_break);
16527 $$rdo_not_break_apart = 1;
16528 set_non_alignment_flags( $comma_count, $rcomma_index );
16533 # method 2 is for most small ragged lists which might look
16534 # best if not displayed as a table.
16536 ( $number_of_fields == 2 && $item_count == 3 )
16538 $new_identifier_count > 0 # isn't all quotes
16539 && $sparsity > 0.15
16540 ) # would be fairly spaced gaps if aligned
16544 my $break_count = set_ragged_breakpoints( \@i_term_comma,
16545 $ri_ragged_break_list );
16546 ++$break_count if ($use_separate_first_term);
16548 # NOTE: we should really use the true break count here,
16549 # which can be greater if there are large terms and
16550 # little space, but usually this will work well enough.
16551 unless ($must_break_open) {
16553 if ( $break_count <= 1 ) {
16554 $$rdo_not_break_apart = 1;
16556 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
16558 $$rdo_not_break_apart = 1;
16561 set_non_alignment_flags( $comma_count, $rcomma_index );
16565 } # end shortcut methods
16569 FORMATTER_DEBUG_FLAG_SPARSE && do {
16571 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
16575 #---------------------------------------------------------------
16576 # Compound List Rule 2:
16577 # If this list is too long for one line, and it is an item of a
16578 # larger list, then we must format it, regardless of sparsity
16579 # (ian.t). One reason that we have to do this is to trigger
16580 # Compound List Rule 1, above, which causes breaks at all commas of
16581 # all outer lists. In this way, the structure will be properly
16583 #---------------------------------------------------------------
16585 # Decide if this list is too long for one line unless broken
16586 my $total_columns = table_columns_available($i_opening_paren);
16587 my $too_long = $packed_columns > $total_columns;
16589 # For a paren list, include the length of the token just before the
16590 # '(' because this is likely a sub call, and we would have to
16591 # include the sub name on the same line as the list. This is still
16592 # imprecise, but not too bad. (steve.t)
16593 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
16595 $too_long = excess_line_length( $i_opening_minus,
16596 $i_effective_last_comma + 1 ) > 0;
16599 # FIXME: For an item after a '=>', try to include the length of the
16600 # thing before the '=>'. This is crude and should be improved by
16601 # actually looking back token by token.
16602 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
16603 my $i_opening_minus = $i_opening_paren - 4;
16604 if ( $i_opening_minus >= 0 ) {
16605 $too_long = excess_line_length( $i_opening_minus,
16606 $i_effective_last_comma + 1 ) > 0;
16610 # Always break lists contained in '[' and '{' if too long for 1 line,
16611 # and always break lists which are too long and part of a more complex
16613 my $must_break_open_container = $must_break_open
16615 && ( $in_hierarchical_list || $opening_token ne '(' ) );
16617 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
16619 #---------------------------------------------------------------
16620 # The main decision:
16621 # Now decide if we will align the data into aligned columns. Do not
16622 # attempt to align columns if this is a tiny table or it would be
16623 # too spaced. It seems that the more packed lines we have, the
16624 # sparser the list that can be allowed and still look ok.
16625 #---------------------------------------------------------------
16627 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
16628 || ( $formatted_lines < 2 )
16629 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
16633 #---------------------------------------------------------------
16634 # too sparse: would look ugly if aligned in a table;
16635 #---------------------------------------------------------------
16637 # use old breakpoints if this is a 'big' list
16638 # FIXME: goal is to improve set_ragged_breakpoints so that
16639 # this is not necessary.
16640 if ( $packed_lines > 2 && $item_count > 10 ) {
16641 write_logfile_entry("List sparse: using old breakpoints\n");
16642 copy_old_breakpoints( $i_first_comma, $i_last_comma );
16645 # let the continuation logic handle it if 2 lines
16648 my $break_count = set_ragged_breakpoints( \@i_term_comma,
16649 $ri_ragged_break_list );
16650 ++$break_count if ($use_separate_first_term);
16652 unless ($must_break_open_container) {
16653 if ( $break_count <= 1 ) {
16654 $$rdo_not_break_apart = 1;
16656 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
16658 $$rdo_not_break_apart = 1;
16661 set_non_alignment_flags( $comma_count, $rcomma_index );
16666 #---------------------------------------------------------------
16667 # go ahead and format as a table
16668 #---------------------------------------------------------------
16669 write_logfile_entry(
16670 "List: auto formatting with $number_of_fields fields/row\n");
16672 my $j_first_break =
16673 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
16676 my $j = $j_first_break ;
16677 $j < $comma_count ;
16678 $j += $number_of_fields
16681 my $i = $$rcomma_index[$j];
16682 set_forced_breakpoint($i);
16688 sub set_non_alignment_flags {
16690 # set flag which indicates that these commas should not be
16692 my ( $comma_count, $rcomma_index ) = @_;
16693 foreach ( 0 .. $comma_count - 1 ) {
16694 $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
16698 sub study_list_complexity {
16700 # Look for complex tables which should be formatted with one term per line.
16701 # Returns the following:
16703 # \@i_ragged_break_list = list of good breakpoints to avoid lines
16704 # which are hard to read
16705 # $number_of_fields_best = suggested number of fields based on
16706 # complexity; = 0 if any number may be used.
16708 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
16709 my $item_count = @{$ri_term_begin};
16710 my $complex_item_count = 0;
16711 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
16712 my $i_max = @{$ritem_lengths} - 1;
16713 ##my @item_complexity;
16715 my $i_last_last_break = -3;
16716 my $i_last_break = -2;
16717 my @i_ragged_break_list;
16719 my $definitely_complex = 30;
16720 my $definitely_simple = 12;
16721 my $quote_count = 0;
16723 for my $i ( 0 .. $i_max ) {
16724 my $ib = $ri_term_begin->[$i];
16725 my $ie = $ri_term_end->[$i];
16727 # define complexity: start with the actual term length
16728 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
16730 ##TBD: join types here and check for variations
16731 ##my $str=join "", @tokens_to_go[$ib..$ie];
16734 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
16738 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
16742 if ( $ib eq $ie ) {
16743 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
16744 $complex_item_count++;
16745 $weighted_length *= 2;
16751 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
16752 $complex_item_count++;
16753 $weighted_length *= 2;
16755 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
16756 $weighted_length += 4;
16760 # add weight for extra tokens.
16761 $weighted_length += 2 * ( $ie - $ib );
16763 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
16764 ## print "# COMPLEXITY:$weighted_length $BUB\n";
16766 ##push @item_complexity, $weighted_length;
16768 # now mark a ragged break after this item it if it is 'long and
16770 if ( $weighted_length >= $definitely_complex ) {
16772 # if we broke after the previous term
16773 # then break before it too
16774 if ( $i_last_break == $i - 1
16776 && $i_last_last_break != $i - 2 )
16779 ## FIXME: don't strand a small term
16780 pop @i_ragged_break_list;
16781 push @i_ragged_break_list, $i - 2;
16782 push @i_ragged_break_list, $i - 1;
16785 push @i_ragged_break_list, $i;
16786 $i_last_last_break = $i_last_break;
16787 $i_last_break = $i;
16790 # don't break before a small last term -- it will
16791 # not look good on a line by itself.
16792 elsif ($i == $i_max
16793 && $i_last_break == $i - 1
16794 && $weighted_length <= $definitely_simple )
16796 pop @i_ragged_break_list;
16800 my $identifier_count = $i_max + 1 - $quote_count;
16802 # Need more tuning here..
16803 if ( $max_width > 12
16804 && $complex_item_count > $item_count / 2
16805 && $number_of_fields_best != 2 )
16807 $number_of_fields_best = 1;
16810 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
16813 sub get_maximum_fields_wanted {
16815 # Not all tables look good with more than one field of items.
16816 # This routine looks at a table and decides if it should be
16817 # formatted with just one field or not.
16818 # This coding is still under development.
16819 my ($ritem_lengths) = @_;
16821 my $number_of_fields_best = 0;
16823 # For just a few items, we tentatively assume just 1 field.
16824 my $item_count = @{$ritem_lengths};
16825 if ( $item_count <= 5 ) {
16826 $number_of_fields_best = 1;
16829 # For larger tables, look at it both ways and see what looks best
16833 my @max_length = ( 0, 0 );
16834 my @last_length_2 = ( undef, undef );
16835 my @first_length_2 = ( undef, undef );
16836 my $last_length = undef;
16837 my $total_variation_1 = 0;
16838 my $total_variation_2 = 0;
16839 my @total_variation_2 = ( 0, 0 );
16840 for ( my $j = 0 ; $j < $item_count ; $j++ ) {
16842 $is_odd = 1 - $is_odd;
16843 my $length = $ritem_lengths->[$j];
16844 if ( $length > $max_length[$is_odd] ) {
16845 $max_length[$is_odd] = $length;
16848 if ( defined($last_length) ) {
16849 my $dl = abs( $length - $last_length );
16850 $total_variation_1 += $dl;
16852 $last_length = $length;
16854 my $ll = $last_length_2[$is_odd];
16855 if ( defined($ll) ) {
16856 my $dl = abs( $length - $ll );
16857 $total_variation_2[$is_odd] += $dl;
16860 $first_length_2[$is_odd] = $length;
16862 $last_length_2[$is_odd] = $length;
16864 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
16866 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
16867 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
16868 $number_of_fields_best = 1;
16871 return ($number_of_fields_best);
16874 sub table_columns_available {
16875 my $i_first_comma = shift;
16877 maximum_line_length($i_first_comma) -
16878 leading_spaces_to_go($i_first_comma);
16880 # Patch: the vertical formatter does not line up lines whose lengths
16881 # exactly equal the available line length because of allowances
16882 # that must be made for side comments. Therefore, the number of
16883 # available columns is reduced by 1 character.
16888 sub maximum_number_of_fields {
16890 # how many fields will fit in the available space?
16891 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
16892 my $max_pairs = int( $columns / $pair_width );
16893 my $number_of_fields = $max_pairs * 2;
16894 if ( $odd_or_even == 1
16895 && $max_pairs * $pair_width + $max_width <= $columns )
16897 $number_of_fields++;
16899 return $number_of_fields;
16902 sub compactify_table {
16904 # given a table with a certain number of fields and a certain number
16905 # of lines, see if reducing the number of fields will make it look
16907 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
16908 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
16912 $min_fields = $number_of_fields ;
16913 $min_fields >= $odd_or_even
16914 && $min_fields * $formatted_lines >= $item_count ;
16915 $min_fields -= $odd_or_even
16918 $number_of_fields = $min_fields;
16921 return $number_of_fields;
16924 sub set_ragged_breakpoints {
16926 # Set breakpoints in a list that cannot be formatted nicely as a
16928 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
16930 my $break_count = 0;
16931 foreach (@$ri_ragged_break_list) {
16932 my $j = $ri_term_comma->[$_];
16934 set_forced_breakpoint($j);
16938 return $break_count;
16941 sub copy_old_breakpoints {
16942 my ( $i_first_comma, $i_last_comma ) = @_;
16943 for my $i ( $i_first_comma .. $i_last_comma ) {
16944 if ( $old_breakpoint_to_go[$i] ) {
16945 set_forced_breakpoint($i);
16951 my ( $i, $j ) = @_;
16952 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
16954 FORMATTER_DEBUG_FLAG_NOBREAK && do {
16955 my ( $a, $b, $c ) = caller();
16957 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
16960 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
16963 # shouldn't happen; non-critical error
16965 FORMATTER_DEBUG_FLAG_NOBREAK && do {
16966 my ( $a, $b, $c ) = caller();
16968 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
16973 sub set_fake_breakpoint {
16975 # Just bump up the breakpoint count as a signal that there are breaks.
16976 # This is useful if we have breaks but may want to postpone deciding where
16978 $forced_breakpoint_count++;
16981 sub set_forced_breakpoint {
16984 return unless defined $i && $i >= 0;
16986 # when called with certain tokens, use bond strengths to decide
16987 # if we break before or after it
16988 my $token = $tokens_to_go[$i];
16990 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
16991 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
16994 # breaks are forced before 'if' and 'unless'
16995 elsif ( $is_if_unless{$token} ) { $i-- }
16997 if ( $i >= 0 && $i <= $max_index_to_go ) {
16998 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
17000 FORMATTER_DEBUG_FLAG_FORCE && do {
17001 my ( $a, $b, $c ) = caller();
17003 "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
17006 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
17007 $forced_breakpoint_to_go[$i_nonblank] = 1;
17009 if ( $i_nonblank > $index_max_forced_break ) {
17010 $index_max_forced_break = $i_nonblank;
17012 $forced_breakpoint_count++;
17013 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
17016 # if we break at an opening container..break at the closing
17017 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
17018 set_closing_breakpoint($i_nonblank);
17024 sub clear_breakpoint_undo_stack {
17025 $forced_breakpoint_undo_count = 0;
17028 sub undo_forced_breakpoint_stack {
17030 my $i_start = shift;
17031 if ( $i_start < 0 ) {
17033 my ( $a, $b, $c ) = caller();
17035 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
17039 while ( $forced_breakpoint_undo_count > $i_start ) {
17041 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
17042 if ( $i >= 0 && $i <= $max_index_to_go ) {
17043 $forced_breakpoint_to_go[$i] = 0;
17044 $forced_breakpoint_count--;
17046 FORMATTER_DEBUG_FLAG_UNDOBP && do {
17047 my ( $a, $b, $c ) = caller();
17049 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
17053 # shouldn't happen, but not a critical error
17055 FORMATTER_DEBUG_FLAG_UNDOBP && do {
17056 my ( $a, $b, $c ) = caller();
17058 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
17064 { # begin recombine_breakpoints
17075 @is_amp_amp{@_} = (1) x scalar(@_);
17078 @is_ternary{@_} = (1) x scalar(@_);
17080 @_ = qw( + - * / );
17081 @is_math_op{@_} = (1) x scalar(@_);
17084 @is_plus_minus{@_} = (1) x scalar(@_);
17087 @is_mult_div{@_} = (1) x scalar(@_);
17090 sub recombine_breakpoints {
17092 # sub set_continuation_breaks is very liberal in setting line breaks
17093 # for long lines, always setting breaks at good breakpoints, even
17094 # when that creates small lines. Sometimes small line fragments
17095 # are produced which would look better if they were combined.
17096 # That's the task of this routine.
17098 # We are given indexes to the current lines:
17099 # $ri_beg = ref to array of BEGinning indexes of each line
17100 # $ri_end = ref to array of ENDing indexes of each line
17101 my ( $ri_beg, $ri_end ) = @_;
17103 # Make a list of all good joining tokens between the lines
17106 my $nmax = @$ri_end - 1;
17107 for my $n ( 1 .. $nmax ) {
17108 my $ibeg_1 = $$ri_beg[ $n - 1 ];
17109 my $iend_1 = $$ri_end[ $n - 1 ];
17110 my $iend_2 = $$ri_end[$n];
17111 my $ibeg_2 = $$ri_beg[$n];
17113 my ( $itok, $itokp, $itokm );
17115 foreach my $itest ( $iend_1, $ibeg_2 ) {
17116 my $type = $types_to_go[$itest];
17117 if ( $is_math_op{$type}
17118 || $is_amp_amp{$type}
17119 || $is_assignment{$type}
17125 $joint[$n] = [$itok];
17128 my $more_to_do = 1;
17130 # We keep looping over all of the lines of this batch
17131 # until there are no more possible recombinations
17132 my $nmax_last = @$ri_end;
17133 while ($more_to_do) {
17137 my $nmax = @$ri_end - 1;
17139 # Safety check for infinite loop
17140 unless ( $nmax < $nmax_last ) {
17142 # Shouldn't happen because splice below decreases nmax on each
17145 "Program bug-infinite loop in recombine breakpoints\n";
17147 $nmax_last = $nmax;
17149 my $previous_outdentable_closing_paren;
17150 my $leading_amp_count = 0;
17151 my $this_line_is_semicolon_terminated;
17153 # loop over all remaining lines in this batch
17154 for $n ( 1 .. $nmax ) {
17156 #----------------------------------------------------------
17157 # If we join the current pair of lines,
17158 # line $n-1 will become the left part of the joined line
17159 # line $n will become the right part of the joined line
17161 # Here are Indexes of the endpoint tokens of the two lines:
17163 # -----line $n-1--- | -----line $n-----
17164 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
17167 # We want to decide if we should remove the line break
17168 # between the tokens at $iend_1 and $ibeg_2
17170 # We will apply a number of ad-hoc tests to see if joining
17171 # here will look ok. The code will just issue a 'next'
17172 # command if the join doesn't look good. If we get through
17173 # the gauntlet of tests, the lines will be recombined.
17174 #----------------------------------------------------------
17176 # beginning and ending tokens of the lines we are working on
17177 my $ibeg_1 = $$ri_beg[ $n - 1 ];
17178 my $iend_1 = $$ri_end[ $n - 1 ];
17179 my $iend_2 = $$ri_end[$n];
17180 my $ibeg_2 = $$ri_beg[$n];
17181 my $ibeg_nmax = $$ri_beg[$nmax];
17183 my $type_iend_1 = $types_to_go[$iend_1];
17184 my $type_iend_2 = $types_to_go[$iend_2];
17185 my $type_ibeg_1 = $types_to_go[$ibeg_1];
17186 my $type_ibeg_2 = $types_to_go[$ibeg_2];
17188 # some beginning indexes of other lines, which may not exist
17189 my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1;
17190 my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1;
17191 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
17195 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
17196 # $nesting_depth_to_go[$ibeg_1] );
17198 FORMATTER_DEBUG_FLAG_RECOMBINE && do {
17200 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
17203 # If line $n is the last line, we set some flags and
17204 # do any special checks for it
17205 if ( $n == $nmax ) {
17207 # a terminal '{' should stay where it is
17208 next if $type_ibeg_2 eq '{';
17210 # set flag if statement $n ends in ';'
17211 $this_line_is_semicolon_terminated = $type_iend_2 eq ';'
17213 # with possible side comment
17214 || ( $type_iend_2 eq '#'
17215 && $iend_2 - $ibeg_2 >= 2
17216 && $types_to_go[ $iend_2 - 2 ] eq ';'
17217 && $types_to_go[ $iend_2 - 1 ] eq 'b' );
17220 #----------------------------------------------------------
17221 # Recombine Section 1:
17222 # Examine the special token joining this line pair, if any.
17223 # Put as many tests in this section to avoid duplicate code and
17224 # to make formatting independent of whether breaks are to the
17225 # left or right of an operator.
17226 #----------------------------------------------------------
17228 my ($itok) = @{ $joint[$n] };
17231 # FIXME: Patch - may not be necessary
17233 $type_iend_1 eq 'b'
17238 $type_iend_2 eq 'b'
17243 my $type = $types_to_go[$itok];
17245 if ( $type eq ':' ) {
17247 # do not join at a colon unless it disobeys the break request
17248 if ( $itok eq $iend_1 ) {
17249 next unless $want_break_before{$type};
17252 $leading_amp_count++;
17253 next if $want_break_before{$type};
17257 # handle math operators + - * /
17258 elsif ( $is_math_op{$type} ) {
17260 # Combine these lines if this line is a single
17261 # number, or if it is a short term with same
17262 # operator as the previous line. For example, in
17263 # the following code we will combine all of the
17264 # short terms $A, $B, $C, $D, $E, $F, together
17265 # instead of leaving them one per line:
17267 # $A * $B * $C * $D * $E * $F *
17268 # ( 2. * $eps * $sigma * $area ) *
17269 # ( 1. / $tcold**3 - 1. / $thot**3 );
17271 # This can be important in math-intensive code.
17275 my $itokp = min( $inext_to_go[$itok], $iend_2 );
17276 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
17277 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
17278 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
17280 # check for a number on the right
17281 if ( $types_to_go[$itokp] eq 'n' ) {
17283 # ok if nothing else on right
17284 if ( $itokp == $iend_2 ) {
17289 # look one more token to right..
17290 # okay if math operator or some termination
17292 ( ( $itokpp == $iend_2 )
17293 && $is_math_op{ $types_to_go[$itokpp] } )
17294 || $types_to_go[$itokpp] =~ /^[#,;]$/;
17298 # check for a number on the left
17299 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
17301 # okay if nothing else to left
17302 if ( $itokm == $ibeg_1 ) {
17306 # otherwise look one more token to left
17309 # okay if math operator, comma, or assignment
17310 $good_combo = ( $itokmm == $ibeg_1 )
17311 && ( $is_math_op{ $types_to_go[$itokmm] }
17312 || $types_to_go[$itokmm] =~ /^[,]$/
17313 || $is_assignment{ $types_to_go[$itokmm] }
17318 # look for a single short token either side of the
17320 if ( !$good_combo ) {
17322 # Slight adjustment factor to make results
17323 # independent of break before or after operator in
17324 # long summed lists. (An operator and a space make
17326 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
17330 # numbers or id's on both sides of this joint
17331 $types_to_go[$itokp] =~ /^[in]$/
17332 && $types_to_go[$itokm] =~ /^[in]$/
17334 # one of the two lines must be short:
17337 # no more than 2 nonblank tokens right of
17342 && token_sequence_length( $itokp, $iend_2 )
17344 $rOpts_short_concatenation_item_length
17347 # no more than 2 nonblank tokens left of
17352 && token_sequence_length( $ibeg_1, $itokm )
17354 $rOpts_short_concatenation_item_length
17359 # keep pure terms; don't mix +- with */
17361 $is_plus_minus{$type}
17362 && ( $is_mult_div{ $types_to_go[$itokmm] }
17363 || $is_mult_div{ $types_to_go[$itokpp] } )
17366 $is_mult_div{$type}
17367 && ( $is_plus_minus{ $types_to_go[$itokmm] }
17368 || $is_plus_minus{ $types_to_go[$itokpp] } )
17374 # it is also good to combine if we can reduce to 2 lines
17375 if ( !$good_combo ) {
17377 # index on other line where same token would be in a
17380 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
17385 && $types_to_go[$iother] ne $type;
17388 next unless ($good_combo);
17392 elsif ( $is_amp_amp{$type} ) {
17396 elsif ( $is_assignment{$type} ) {
17398 } ## end assignment
17401 #----------------------------------------------------------
17402 # Recombine Section 2:
17403 # Examine token at $iend_1 (right end of first line of pair)
17404 #----------------------------------------------------------
17406 # an isolated '}' may join with a ';' terminated segment
17407 if ( $type_iend_1 eq '}' ) {
17409 # Check for cases where combining a semicolon terminated
17410 # statement with a previous isolated closing paren will
17411 # allow the combined line to be outdented. This is
17412 # generally a good move. For example, we can join up
17413 # the last two lines here:
17415 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
17416 # $size, $atime, $mtime, $ctime, $blksize, $blocks
17422 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
17423 # $size, $atime, $mtime, $ctime, $blksize, $blocks
17426 # which makes the parens line up.
17428 # Another example, from Joe Matarazzo, probably looks best
17429 # with the 'or' clause appended to the trailing paren:
17430 # $self->some_method(
17433 # ) or die "Some_method didn't work";
17435 # But we do not want to do this for something like the -lp
17436 # option where the paren is not outdentable because the
17437 # trailing clause will be far to the right.
17439 # The logic here is synchronized with the logic in sub
17440 # sub set_adjusted_indentation, which actually does
17443 $previous_outdentable_closing_paren =
17444 $this_line_is_semicolon_terminated
17446 # only one token on last line
17447 && $ibeg_1 == $iend_1
17449 # must be structural paren
17450 && $tokens_to_go[$iend_1] eq ')'
17452 # style must allow outdenting,
17453 && !$closing_token_indentation{')'}
17455 # only leading '&&', '||', and ':' if no others seen
17456 # (but note: our count made below could be wrong
17457 # due to intervening comments)
17458 && ( $leading_amp_count == 0
17459 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
17461 # but leading colons probably line up with a
17462 # previous colon or question (count could be wrong).
17463 && $type_ibeg_2 ne ':'
17465 # only one step in depth allowed. this line must not
17466 # begin with a ')' itself.
17467 && ( $nesting_depth_to_go[$iend_1] ==
17468 $nesting_depth_to_go[$iend_2] + 1 );
17470 # YVES patch 2 of 2:
17471 # Allow cuddled eval chains, like this:
17478 # This patch works together with a patch in
17479 # setting adjusted indentation (where the closing eval
17480 # brace is outdented if possible).
17481 # The problem is that an 'eval' block has continuation
17482 # indentation and it looks better to undo it in some
17483 # cases. If we do not use this patch we would get:
17491 # The alternative, for uncuddled style, is to create
17492 # a patch in set_adjusted_indentation which undoes
17493 # the indentation of a leading line like 'or do {'.
17494 # This doesn't work well with -icb through
17496 $block_type_to_go[$iend_1] eq 'eval'
17497 && !$rOpts->{'line-up-parentheses'}
17498 && !$rOpts->{'indent-closing-brace'}
17499 && $tokens_to_go[$iend_2] eq '{'
17501 ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
17502 || ( $type_ibeg_2 eq 'k'
17503 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
17504 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
17508 $previous_outdentable_closing_paren ||= 1;
17513 $previous_outdentable_closing_paren
17515 # handle '.' and '?' specially below
17516 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
17521 # honor breaks at opening brace
17522 # Added to prevent recombining something like this:
17523 # } || eval { package main;
17524 elsif ( $type_iend_1 eq '{' ) {
17525 next if $forced_breakpoint_to_go[$iend_1];
17528 # do not recombine lines with ending &&, ||,
17529 elsif ( $is_amp_amp{$type_iend_1} ) {
17530 next unless $want_break_before{$type_iend_1};
17533 # Identify and recombine a broken ?/: chain
17534 elsif ( $type_iend_1 eq '?' ) {
17536 # Do not recombine different levels
17538 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
17540 # do not recombine unless next line ends in :
17541 next unless $type_iend_2 eq ':';
17544 # for lines ending in a comma...
17545 elsif ( $type_iend_1 eq ',' ) {
17547 # Do not recombine at comma which is following the
17549 # TODO: might be best to make a special flag
17550 next if ( $old_breakpoint_to_go[$iend_1] );
17552 # an isolated '},' may join with an identifier + ';'
17553 # this is useful for the class of a 'bless' statement (bless.t)
17554 if ( $type_ibeg_1 eq '}'
17555 && $type_ibeg_2 eq 'i' )
17558 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
17559 && ( $iend_2 == ( $ibeg_2 + 1 ) )
17560 && $this_line_is_semicolon_terminated );
17562 # override breakpoint
17563 $forced_breakpoint_to_go[$iend_1] = 0;
17569 # do not recombine after a comma unless this will leave
17571 next unless ( $n + 1 >= $nmax );
17573 # do not recombine if there is a change in indentation depth
17576 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
17578 # do not recombine a "complex expression" after a
17579 # comma. "complex" means no parens.
17581 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
17582 if ( $tokens_to_go[$ii] eq '(' ) {
17587 next if $saw_paren;
17592 elsif ( $type_iend_1 eq '(' ) {
17594 # No longer doing this
17597 elsif ( $type_iend_1 eq ')' ) {
17599 # No longer doing this
17602 # keep a terminal for-semicolon
17603 elsif ( $type_iend_1 eq 'f' ) {
17607 # if '=' at end of line ...
17608 elsif ( $is_assignment{$type_iend_1} ) {
17610 # keep break after = if it was in input stream
17611 # this helps prevent 'blinkers'
17612 next if $old_breakpoint_to_go[$iend_1]
17614 # don't strand an isolated '='
17615 && $iend_1 != $ibeg_1;
17617 my $is_short_quote =
17618 ( $type_ibeg_2 eq 'Q'
17619 && $ibeg_2 == $iend_2
17620 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
17621 $rOpts_short_concatenation_item_length );
17623 ( $type_ibeg_1 eq '?'
17624 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
17626 # always join an isolated '=', a short quote, or if this
17627 # will put ?/: at start of adjacent lines
17628 if ( $ibeg_1 != $iend_1
17629 && !$is_short_quote
17636 # unless we can reduce this to two lines
17639 # or three lines, the last with a leading semicolon
17640 || ( $nmax == $n + 2
17641 && $types_to_go[$ibeg_nmax] eq ';' )
17643 # or the next line ends with a here doc
17644 || $type_iend_2 eq 'h'
17646 # or the next line ends in an open paren or brace
17647 # and the break hasn't been forced [dima.t]
17648 || ( !$forced_breakpoint_to_go[$iend_1]
17649 && $type_iend_2 eq '{' )
17652 # do not recombine if the two lines might align well
17653 # this is a very approximate test for this
17655 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
17660 # Recombine if we can make two lines
17663 # -lp users often prefer this:
17664 # my $title = function($env, $env, $sysarea,
17665 # "bubba Borrower Entry");
17666 # so we will recombine if -lp is used we have
17668 && ( !$rOpts_line_up_parentheses
17669 || $type_iend_2 ne ',' )
17673 # otherwise, scan the rhs line up to last token for
17674 # complexity. Note that we are not counting the last
17675 # token in case it is an opening paren.
17677 my $depth = $nesting_depth_to_go[$ibeg_2];
17678 for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
17679 if ( $nesting_depth_to_go[$i] != $depth ) {
17681 last if ( $tv > 1 );
17683 $depth = $nesting_depth_to_go[$i];
17686 # ok to recombine if no level changes before last token
17689 # otherwise, do not recombine if more than two
17691 next if ( $tv > 1 );
17693 # check total complexity of the two adjacent lines
17694 # that will occur if we do this join
17696 ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
17697 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
17698 if ( $nesting_depth_to_go[$i] != $depth ) {
17700 last if ( $tv > 2 );
17702 $depth = $nesting_depth_to_go[$i];
17705 # do not recombine if total is more than 2 level changes
17706 next if ( $tv > 2 );
17711 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
17712 $forced_breakpoint_to_go[$iend_1] = 0;
17717 elsif ( $type_iend_1 eq 'k' ) {
17719 # make major control keywords stand out
17724 #/^(last|next|redo|return)$/
17725 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
17727 # but only if followed by multiple lines
17731 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
17733 unless $want_break_before{ $tokens_to_go[$iend_1] };
17737 #----------------------------------------------------------
17738 # Recombine Section 3:
17739 # Examine token at $ibeg_2 (left end of second line of pair)
17740 #----------------------------------------------------------
17742 # join lines identified above as capable of
17743 # causing an outdented line with leading closing paren
17744 # Note that we are skipping the rest of this section
17745 if ($previous_outdentable_closing_paren) {
17746 $forced_breakpoint_to_go[$iend_1] = 0;
17749 # handle lines with leading &&, ||
17750 elsif ( $is_amp_amp{$type_ibeg_2} ) {
17752 $leading_amp_count++;
17754 # ok to recombine if it follows a ? or :
17755 # and is followed by an open paren..
17757 ( $is_ternary{$type_ibeg_1}
17758 && $tokens_to_go[$iend_2] eq '(' )
17760 # or is followed by a ? or : at same depth
17762 # We are looking for something like this. We can
17763 # recombine the && line with the line above to make the
17764 # structure more clear:
17766 # exists $G->{Attr}->{V}
17767 # && exists $G->{Attr}->{V}->{$u}
17768 # ? %{ $G->{Attr}->{V}->{$u} }
17771 # We should probably leave something like this alone:
17773 # exists $G->{Attr}->{E}
17774 # && exists $G->{Attr}->{E}->{$u}
17775 # && exists $G->{Attr}->{E}->{$u}->{$v}
17776 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
17778 # so that we either have all of the &&'s (or ||'s)
17779 # on one line, as in the first example, or break at
17780 # each one as in the second example. However, it
17781 # sometimes makes things worse to check for this because
17782 # it prevents multiple recombinations. So this is not done.
17784 && $is_ternary{ $types_to_go[$ibeg_3] }
17785 && $nesting_depth_to_go[$ibeg_3] ==
17786 $nesting_depth_to_go[$ibeg_2] );
17788 next if !$ok && $want_break_before{$type_ibeg_2};
17789 $forced_breakpoint_to_go[$iend_1] = 0;
17791 # tweak the bond strength to give this joint priority
17796 # Identify and recombine a broken ?/: chain
17797 elsif ( $type_ibeg_2 eq '?' ) {
17799 # Do not recombine different levels
17800 my $lev = $levels_to_go[$ibeg_2];
17801 next if ( $lev ne $levels_to_go[$ibeg_1] );
17803 # Do not recombine a '?' if either next line or
17804 # previous line does not start with a ':'. The reasons
17805 # are that (1) no alignment of the ? will be possible
17806 # and (2) the expression is somewhat complex, so the
17807 # '?' is harder to see in the interior of the line.
17808 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
17809 my $precedes_colon =
17810 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
17811 next unless ( $follows_colon || $precedes_colon );
17813 # we will always combining a ? line following a : line
17814 if ( !$follows_colon ) {
17816 # ...otherwise recombine only if it looks like a chain.
17817 # we will just look at a few nearby lines to see if
17818 # this looks like a chain.
17819 my $local_count = 0;
17820 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
17823 && $types_to_go[$ii] eq ':'
17824 && $levels_to_go[$ii] == $lev;
17826 next unless ( $local_count > 1 );
17828 $forced_breakpoint_to_go[$iend_1] = 0;
17831 # do not recombine lines with leading '.'
17832 elsif ( $type_ibeg_2 eq '.' ) {
17833 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
17837 # ... unless there is just one and we can reduce
17838 # this to two lines if we do. For example, this
17842 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
17844 # looks better than this:
17845 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
17846 # . '$args .= $pat;'
17851 && $type_ibeg_1 ne $type_ibeg_2
17854 # ... or this would strand a short quote , like this
17855 # . "some long quote"
17858 || ( $types_to_go[$i_next_nonblank] eq 'Q'
17859 && $i_next_nonblank >= $iend_2 - 1
17860 && $token_lengths_to_go[$i_next_nonblank] <
17861 $rOpts_short_concatenation_item_length )
17865 # handle leading keyword..
17866 elsif ( $type_ibeg_2 eq 'k' ) {
17868 # handle leading "or"
17869 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
17872 $this_line_is_semicolon_terminated
17875 # following 'if' or 'unless' or 'or'
17876 $type_ibeg_1 eq 'k'
17877 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
17879 # important: only combine a very simple or
17880 # statement because the step below may have
17881 # combined a trailing 'and' with this or,
17882 # and we do not want to then combine
17883 # everything together
17884 && ( $iend_2 - $ibeg_2 <= 7 )
17888 $forced_breakpoint_to_go[$iend_1] = 0
17889 unless $old_breakpoint_to_go[$iend_1];
17892 # handle leading 'and'
17893 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
17895 # Decide if we will combine a single terminal 'and'
17896 # after an 'if' or 'unless'.
17898 # This looks best with the 'and' on the same
17899 # line as the 'if':
17902 # if $seconds and $nu < 2;
17904 # But this looks better as shown:
17907 # if !$this->{Parents}{$_}
17908 # or $this->{Parents}{$_} eq $_;
17912 $this_line_is_semicolon_terminated
17915 # following 'if' or 'unless' or 'or'
17916 $type_ibeg_1 eq 'k'
17917 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
17918 || $tokens_to_go[$ibeg_1] eq 'or' )
17923 # handle leading "if" and "unless"
17924 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
17926 # FIXME: This is still experimental..may not be too useful
17929 $this_line_is_semicolon_terminated
17931 # previous line begins with 'and' or 'or'
17932 && $type_ibeg_1 eq 'k'
17933 && $is_and_or{ $tokens_to_go[$ibeg_1] }
17938 # handle all other leading keywords
17941 # keywords look best at start of lines,
17942 # but combine things like "1 while"
17943 unless ( $is_assignment{$type_iend_1} ) {
17945 if ( ( $type_iend_1 ne 'k' )
17946 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
17951 # similar treatment of && and || as above for 'and' and 'or':
17952 # NOTE: This block of code is currently bypassed because
17953 # of a previous block but is retained for possible future use.
17954 elsif ( $is_amp_amp{$type_ibeg_2} ) {
17956 # maybe looking at something like:
17957 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
17961 $this_line_is_semicolon_terminated
17963 # previous line begins with an 'if' or 'unless' keyword
17964 && $type_ibeg_1 eq 'k'
17965 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
17970 # handle line with leading = or similar
17971 elsif ( $is_assignment{$type_ibeg_2} ) {
17972 next unless ( $n == 1 || $n == $nmax );
17973 next if $old_breakpoint_to_go[$iend_1];
17977 # unless we can reduce this to two lines
17980 # or three lines, the last with a leading semicolon
17981 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
17983 # or the next line ends with a here doc
17984 || $type_iend_2 eq 'h'
17986 # or this is a short line ending in ;
17987 || ( $n == $nmax && $this_line_is_semicolon_terminated )
17989 $forced_breakpoint_to_go[$iend_1] = 0;
17992 #----------------------------------------------------------
17993 # Recombine Section 4:
17994 # Combine the lines if we arrive here and it is possible
17995 #----------------------------------------------------------
17997 # honor hard breakpoints
17998 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
18000 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
18002 # combined line cannot be too long
18003 my $excess = excess_line_length( $ibeg_1, $iend_2 );
18004 next if ( $excess > 0 );
18006 # Require a few extra spaces before recombining lines if we are
18007 # at an old breakpoint unless this is a simple list or terminal
18008 # line. The goal is to avoid oscillating between two
18009 # quasi-stable end states. For example this snippet caused
18013 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
18017 if ( $old_breakpoint_to_go[$iend_1]
18018 && !$this_line_is_semicolon_terminated
18021 && $type_iend_2 ne ',' );
18023 # do not recombine if we would skip in indentation levels
18024 if ( $n < $nmax ) {
18025 my $if_next = $$ri_beg[ $n + 1 ];
18028 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
18029 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
18031 # but an isolated 'if (' is undesirable
18034 && $iend_1 - $ibeg_1 <= 2
18035 && $type_ibeg_1 eq 'k'
18036 && $tokens_to_go[$ibeg_1] eq 'if'
18037 && $tokens_to_go[$iend_1] ne '('
18043 next if ( $bs >= NO_BREAK - 1 );
18045 # remember the pair with the greatest bond strength
18052 if ( $bs > $bs_best ) {
18059 # recombine the pair with the greatest bond strength
18061 splice @$ri_beg, $n_best, 1;
18062 splice @$ri_end, $n_best - 1, 1;
18063 splice @joint, $n_best, 1;
18065 # keep going if we are still making progress
18069 return ( $ri_beg, $ri_end );
18071 } # end recombine_breakpoints
18073 sub break_all_chain_tokens {
18075 # scan the current breakpoints looking for breaks at certain "chain
18076 # operators" (. : && || + etc) which often occur repeatedly in a long
18077 # statement. If we see a break at any one, break at all similar tokens
18078 # within the same container.
18080 my ( $ri_left, $ri_right ) = @_;
18082 my %saw_chain_type;
18083 my %left_chain_type;
18084 my %right_chain_type;
18085 my %interior_chain_type;
18086 my $nmax = @$ri_right - 1;
18088 # scan the left and right end tokens of all lines
18090 for my $n ( 0 .. $nmax ) {
18091 my $il = $$ri_left[$n];
18092 my $ir = $$ri_right[$n];
18093 my $typel = $types_to_go[$il];
18094 my $typer = $types_to_go[$ir];
18095 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
18096 $typer = '+' if ( $typer eq '-' );
18097 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
18098 $typer = '*' if ( $typer eq '/' );
18099 my $tokenl = $tokens_to_go[$il];
18100 my $tokenr = $tokens_to_go[$ir];
18102 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
18103 next if ( $typel eq '?' );
18104 push @{ $left_chain_type{$typel} }, $il;
18105 $saw_chain_type{$typel} = 1;
18108 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
18109 next if ( $typer eq '?' );
18110 push @{ $right_chain_type{$typer} }, $ir;
18111 $saw_chain_type{$typer} = 1;
18115 return unless $count;
18117 # now look for any interior tokens of the same types
18119 for my $n ( 0 .. $nmax ) {
18120 my $il = $$ri_left[$n];
18121 my $ir = $$ri_right[$n];
18122 for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
18123 my $type = $types_to_go[$i];
18124 $type = '+' if ( $type eq '-' );
18125 $type = '*' if ( $type eq '/' );
18126 if ( $saw_chain_type{$type} ) {
18127 push @{ $interior_chain_type{$type} }, $i;
18132 return unless $count;
18134 # now make a list of all new break points
18137 # loop over all chain types
18138 foreach my $type ( keys %saw_chain_type ) {
18140 # quit if just ONE continuation line with leading . For example--
18141 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
18143 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
18145 # loop over all interior chain tokens
18146 foreach my $itest ( @{ $interior_chain_type{$type} } ) {
18148 # loop over all left end tokens of same type
18149 if ( $left_chain_type{$type} ) {
18150 next if $nobreak_to_go[ $itest - 1 ];
18151 foreach my $i ( @{ $left_chain_type{$type} } ) {
18152 next unless in_same_container( $i, $itest );
18153 push @insert_list, $itest - 1;
18155 # Break at matching ? if this : is at a different level.
18156 # For example, the ? before $THRf_DEAD in the following
18157 # should get a break if its : gets a break.
18160 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
18161 # : ( $_ & 4 ) ? $THRf_R_DETACHED
18162 # : $THRf_R_JOINABLE;
18164 && $levels_to_go[$i] != $levels_to_go[$itest] )
18166 my $i_question = $mate_index_to_go[$itest];
18167 if ( $i_question > 0 ) {
18168 push @insert_list, $i_question - 1;
18175 # loop over all right end tokens of same type
18176 if ( $right_chain_type{$type} ) {
18177 next if $nobreak_to_go[$itest];
18178 foreach my $i ( @{ $right_chain_type{$type} } ) {
18179 next unless in_same_container( $i, $itest );
18180 push @insert_list, $itest;
18182 # break at matching ? if this : is at a different level
18184 && $levels_to_go[$i] != $levels_to_go[$itest] )
18186 my $i_question = $mate_index_to_go[$itest];
18187 if ( $i_question >= 0 ) {
18188 push @insert_list, $i_question;
18197 # insert any new break points
18198 if (@insert_list) {
18199 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18205 # Look for assignment operators that could use a breakpoint.
18206 # For example, in the following snippet
18208 # $HOME = $ENV{HOME}
18211 # || die "no home directory for user $<";
18213 # we could break at the = to get this, which is a little nicer:
18218 # || die "no home directory for user $<";
18220 # The logic here follows the logic in set_logical_padding, which
18221 # will add the padding in the second line to improve alignment.
18223 my ( $ri_left, $ri_right ) = @_;
18224 my $nmax = @$ri_right - 1;
18225 return unless ( $nmax >= 2 );
18227 # scan the left ends of first two lines
18230 for my $n ( 1 .. 2 ) {
18231 my $il = $$ri_left[$n];
18232 my $typel = $types_to_go[$il];
18233 my $tokenl = $tokens_to_go[$il];
18235 my $has_leading_op = ( $tokenl =~ /^\w/ )
18236 ? $is_chain_operator{$tokenl} # + - * / : ? && ||
18237 : $is_chain_operator{$typel}; # and, or
18238 return unless ($has_leading_op);
18241 unless ( $tokenl eq $tokbeg
18242 && $nesting_depth_to_go[$il] eq $depth_beg );
18245 $depth_beg = $nesting_depth_to_go[$il];
18248 # now look for any interior tokens of the same types
18249 my $il = $$ri_left[0];
18250 my $ir = $$ri_right[0];
18252 # now make a list of all new break points
18254 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
18255 my $type = $types_to_go[$i];
18256 if ( $is_assignment{$type}
18257 && $nesting_depth_to_go[$i] eq $depth_beg )
18259 if ( $want_break_before{$type} ) {
18260 push @insert_list, $i - 1;
18263 push @insert_list, $i;
18268 # Break after a 'return' followed by a chain of operators
18269 # return ( $^O !~ /win32|dos/i )
18270 # && ( $^O ne 'VMS' )
18271 # && ( $^O ne 'OS2' )
18272 # && ( $^O ne 'MacOS' );
18275 # ( $^O !~ /win32|dos/i )
18276 # && ( $^O ne 'VMS' )
18277 # && ( $^O ne 'OS2' )
18278 # && ( $^O ne 'MacOS' );
18280 if ( $types_to_go[$i] eq 'k'
18281 && $tokens_to_go[$i] eq 'return'
18283 && $nesting_depth_to_go[$i] eq $depth_beg )
18285 push @insert_list, $i;
18288 return unless (@insert_list);
18290 # One final check...
18291 # scan second and third lines and be sure there are no assignments
18292 # we want to avoid breaking at an = to make something like this:
18294 # $html_icons{"$type-$state"}
18295 # or $icon = $html_icons{$type}
18296 # or $icon = $html_icons{$state} )
18297 for my $n ( 1 .. 2 ) {
18298 my $il = $$ri_left[$n];
18299 my $ir = $$ri_right[$n];
18300 for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
18301 my $type = $types_to_go[$i];
18303 if ( $is_assignment{$type}
18304 && $nesting_depth_to_go[$i] eq $depth_beg );
18308 # ok, insert any new break point
18309 if (@insert_list) {
18310 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18314 sub insert_final_breaks {
18316 my ( $ri_left, $ri_right ) = @_;
18318 my $nmax = @$ri_right - 1;
18320 # scan the left and right end tokens of all lines
18322 my $i_first_colon = -1;
18323 for my $n ( 0 .. $nmax ) {
18324 my $il = $$ri_left[$n];
18325 my $ir = $$ri_right[$n];
18326 my $typel = $types_to_go[$il];
18327 my $typer = $types_to_go[$ir];
18328 return if ( $typel eq '?' );
18329 return if ( $typer eq '?' );
18330 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
18331 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
18334 # For long ternary chains,
18335 # if the first : we see has its # ? is in the interior
18336 # of a preceding line, then see if there are any good
18337 # breakpoints before the ?.
18338 if ( $i_first_colon > 0 ) {
18339 my $i_question = $mate_index_to_go[$i_first_colon];
18340 if ( $i_question > 0 ) {
18342 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
18343 my $token = $tokens_to_go[$ii];
18344 my $type = $types_to_go[$ii];
18346 # For now, a good break is either a comma or a 'return'.
18347 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
18348 && in_same_container( $ii, $i_question ) )
18350 push @insert_list, $ii;
18355 # insert any new break points
18356 if (@insert_list) {
18357 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18363 sub in_same_container {
18365 # check to see if tokens at i1 and i2 are in the
18366 # same container, and not separated by a comma, ? or :
18367 my ( $i1, $i2 ) = @_;
18368 my $type = $types_to_go[$i1];
18369 my $depth = $nesting_depth_to_go[$i1];
18370 return unless ( $nesting_depth_to_go[$i2] == $depth );
18371 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
18373 ###########################################################
18374 # This is potentially a very slow routine and not critical.
18375 # For safety just give up for large differences.
18376 # See test file 'infinite_loop.txt'
18377 # TODO: replace this loop with a data structure
18378 ###########################################################
18379 return if ( $i2 - $i1 > 200 );
18381 for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
18382 next if ( $nesting_depth_to_go[$i] > $depth );
18383 return if ( $nesting_depth_to_go[$i] < $depth );
18385 my $tok = $tokens_to_go[$i];
18386 $tok = ',' if $tok eq '=>'; # treat => same as ,
18388 # Example: we would not want to break at any of these .'s
18389 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
18390 if ( $type ne ':' ) {
18391 return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
18394 return if ( $tok =~ /^[\,]$/ );
18400 sub set_continuation_breaks {
18402 # Define an array of indexes for inserting newline characters to
18403 # keep the line lengths below the maximum desired length. There is
18404 # an implied break after the last token, so it need not be included.
18407 # This routine is part of series of routines which adjust line
18408 # lengths. It is only called if a statement is longer than the
18409 # maximum line length, or if a preliminary scanning located
18410 # desirable break points. Sub scan_list has already looked at
18411 # these tokens and set breakpoints (in array
18412 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
18413 # after commas, after opening parens, and before closing parens).
18414 # This routine will honor these breakpoints and also add additional
18415 # breakpoints as necessary to keep the line length below the maximum
18416 # requested. It bases its decision on where the 'bond strength' is
18419 # Output: returns references to the arrays:
18422 # which contain the indexes $i of the first and last tokens on each
18425 # In addition, the array:
18426 # $forced_breakpoint_to_go[$i]
18427 # may be updated to be =1 for any index $i after which there must be
18428 # a break. This signals later routines not to undo the breakpoint.
18430 my $saw_good_break = shift;
18431 my @i_first = (); # the first index to output
18432 my @i_last = (); # the last index to output
18433 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
18434 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
18436 set_bond_strengths();
18439 my $imax = $max_index_to_go;
18440 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
18441 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
18442 my $i_begin = $imin; # index for starting next iteration
18444 my $leading_spaces = leading_spaces_to_go($imin);
18445 my $line_count = 0;
18446 my $last_break_strength = NO_BREAK;
18447 my $i_last_break = -1;
18448 my $max_bias = 0.001;
18449 my $tiny_bias = 0.0001;
18450 my $leading_alignment_token = "";
18451 my $leading_alignment_type = "";
18453 # see if any ?/:'s are in order
18454 my $colons_in_order = 1;
18456 my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ];
18457 my $colon_count = @colon_list;
18458 foreach (@colon_list) {
18459 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
18463 # This is a sufficient but not necessary condition for colon chain
18464 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
18466 #-------------------------------------------------------
18467 # BEGINNING of main loop to set continuation breakpoints
18468 # Keep iterating until we reach the end
18469 #-------------------------------------------------------
18470 while ( $i_begin <= $imax ) {
18471 my $lowest_strength = NO_BREAK;
18472 my $starting_sum = $summed_lengths_to_go[$i_begin];
18475 my $lowest_next_token = '';
18476 my $lowest_next_type = 'b';
18477 my $i_lowest_next_nonblank = -1;
18479 #-------------------------------------------------------
18480 # BEGINNING of inner loop to find the best next breakpoint
18481 #-------------------------------------------------------
18482 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
18483 my $type = $types_to_go[$i_test];
18484 my $token = $tokens_to_go[$i_test];
18485 my $next_type = $types_to_go[ $i_test + 1 ];
18486 my $next_token = $tokens_to_go[ $i_test + 1 ];
18487 my $i_next_nonblank = $inext_to_go[$i_test];
18488 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
18489 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18490 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
18491 my $strength = $bond_strength_to_go[$i_test];
18492 my $maximum_line_length = maximum_line_length($i_begin);
18494 # use old breaks as a tie-breaker. For example to
18495 # prevent blinkers with -pbp in this code:
18498 ## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
18501 # At the same time try to prevent a leading * in this code
18502 # with the default formatting:
18505 ## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
18506 ## * ( $x**( $a - 1 ) )
18507 ## * ( ( 1 - $x )**( $b - 1 ) );
18509 # reduce strength a bit to break ties at an old breakpoint ...
18511 $old_breakpoint_to_go[$i_test]
18513 # which is a 'good' breakpoint, meaning ...
18514 # we don't want to break before it
18515 && !$want_break_before{$type}
18517 # and either we want to break before the next token
18518 # or the next token is not short (i.e. not a '*', '/' etc.)
18519 && $i_next_nonblank <= $imax
18520 && ( $want_break_before{$next_nonblank_type}
18521 || $token_lengths_to_go[$i_next_nonblank] > 2
18522 || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
18525 $strength -= $tiny_bias;
18528 # otherwise increase strength a bit if this token would be at the
18529 # maximum line length. This is necessary to avoid blinking
18530 # in the above example when the -iob flag is added.
18534 $summed_lengths_to_go[ $i_test + 1 ] -
18536 if ( $len >= $maximum_line_length ) {
18537 $strength += $tiny_bias;
18541 my $must_break = 0;
18543 # Force an immediate break at certain operators
18544 # with lower level than the start of the line,
18545 # unless we've already seen a better break.
18547 ##############################################
18548 # Note on an issue with a preceding ?
18549 ##############################################
18550 # We don't include a ? in the above list, but there may
18551 # be a break at a previous ? if the line is long.
18552 # Because of this we do not want to force a break if
18553 # there is a previous ? on this line. For now the best way
18554 # to do this is to not break if we have seen a lower strength
18555 # point, which is probably a ?.
18557 # Example of unwanted breaks we are avoiding at a '.' following a ?
18558 # from pod2html using perltidy -gnu:
18560 # ? "\n<A NAME=\""
18562 # . "\">\n$text</A>\n"
18563 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
18566 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
18567 || ( $next_nonblank_type eq 'k'
18568 && $next_nonblank_token =~ /^(and|or)$/ )
18570 && ( $nesting_depth_to_go[$i_begin] >
18571 $nesting_depth_to_go[$i_next_nonblank] )
18572 && ( $strength <= $lowest_strength )
18575 set_forced_breakpoint($i_next_nonblank);
18580 # Try to put a break where requested by scan_list
18581 $forced_breakpoint_to_go[$i_test]
18583 # break between ) { in a continued line so that the '{' can
18585 # See similar logic in scan_list which catches instances
18586 # where a line is just something like ') {'. We have to
18587 # be careful because the corresponding block keyword might
18588 # not be on the first line, such as 'for' here:
18592 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
18597 && ( $token eq ')' )
18598 && ( $next_nonblank_type eq '{' )
18599 && ($next_nonblank_block_type)
18600 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
18601 && !$rOpts->{'opening-brace-always-on-right'} )
18603 # There is an implied forced break at a terminal opening brace
18604 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
18608 # Forced breakpoints must sometimes be overridden, for example
18609 # because of a side comment causing a NO_BREAK. It is easier
18610 # to catch this here than when they are set.
18611 if ( $strength < NO_BREAK - 1 ) {
18612 $strength = $lowest_strength - $tiny_bias;
18617 # quit if a break here would put a good terminal token on
18618 # the next line and we already have a possible break
18621 && ( $next_nonblank_type =~ /^[\;\,]$/ )
18625 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
18627 ) > $maximum_line_length
18631 last if ( $i_lowest >= 0 );
18634 # Avoid a break which would strand a single punctuation
18635 # token. For example, we do not want to strand a leading
18636 # '.' which is followed by a long quoted string.
18637 # But note that we do want to do this with -extrude (l=1)
18638 # so please test any changes to this code on -extrude.
18641 && ( $i_test == $i_begin )
18642 && ( $i_test < $imax )
18643 && ( $token eq $type )
18647 $summed_lengths_to_go[ $i_test + 1 ] -
18649 ) < $maximum_line_length
18653 $i_test = min( $imax, $inext_to_go[$i_test] );
18657 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
18660 # break at previous best break if it would have produced
18661 # a leading alignment of certain common tokens, and it
18662 # is different from the latest candidate break
18664 if ($leading_alignment_type);
18666 # Force at least one breakpoint if old code had good
18667 # break It is only called if a breakpoint is required or
18668 # desired. This will probably need some adjustments
18669 # over time. A goal is to try to be sure that, if a new
18670 # side comment is introduced into formatted text, then
18671 # the same breakpoints will occur. scbreak.t
18674 $i_test == $imax # we are at the end
18675 && !$forced_breakpoint_count #
18676 && $saw_good_break # old line had good break
18677 && $type =~ /^[#;\{]$/ # and this line ends in
18678 # ';' or side comment
18679 && $i_last_break < 0 # and we haven't made a break
18680 && $i_lowest >= 0 # and we saw a possible break
18681 && $i_lowest < $imax - 1 # (but not just before this ;)
18682 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
18685 # Do not skip past an important break point in a short final
18686 # segment. For example, without this check we would miss the
18687 # break at the final / in the following code:
18690 # ( $tau * $mass_pellet * $q_0 *
18691 # ( 1. - exp( -$t_stop / $tau ) ) -
18692 # 4. * $pi * $factor * $k_ice *
18693 # ( $t_melt - $t_ice ) *
18696 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
18698 if ( $line_count > 2
18699 && $i_lowest < $i_test
18700 && $i_test > $imax - 2
18701 && $nesting_depth_to_go[$i_begin] >
18702 $nesting_depth_to_go[$i_lowest]
18703 && $lowest_strength < $last_break_strength - .5 * WEAK )
18705 # Make this break for math operators for now
18706 my $ir = $inext_to_go[$i_lowest];
18707 my $il = $iprev_to_go[$ir];
18709 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
18710 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
18713 # Update the minimum bond strength location
18714 $lowest_strength = $strength;
18715 $i_lowest = $i_test;
18716 $lowest_next_token = $next_nonblank_token;
18717 $lowest_next_type = $next_nonblank_type;
18718 $i_lowest_next_nonblank = $i_next_nonblank;
18719 last if $must_break;
18721 # set flags to remember if a break here will produce a
18722 # leading alignment of certain common tokens
18723 if ( $line_count > 0
18725 && ( $lowest_strength - $last_break_strength <= $max_bias )
18728 my $i_last_end = $iprev_to_go[$i_begin];
18729 my $tok_beg = $tokens_to_go[$i_begin];
18730 my $type_beg = $types_to_go[$i_begin];
18733 # check for leading alignment of certain tokens
18735 $tok_beg eq $next_nonblank_token
18736 && $is_chain_operator{$tok_beg}
18737 && ( $type_beg eq 'k'
18738 || $type_beg eq $tok_beg )
18739 && $nesting_depth_to_go[$i_begin] >=
18740 $nesting_depth_to_go[$i_next_nonblank]
18743 || ( $tokens_to_go[$i_last_end] eq $token
18744 && $is_chain_operator{$token}
18745 && ( $type eq 'k' || $type eq $token )
18746 && $nesting_depth_to_go[$i_last_end] >=
18747 $nesting_depth_to_go[$i_test] )
18750 $leading_alignment_token = $next_nonblank_token;
18751 $leading_alignment_type = $next_nonblank_type;
18756 my $too_long = ( $i_test >= $imax );
18757 if ( !$too_long ) {
18760 $summed_lengths_to_go[ $i_test + 2 ] -
18762 $too_long = $next_length > $maximum_line_length;
18764 # To prevent blinkers we will avoid leaving a token exactly at
18765 # the line length limit unless it is the last token or one of
18766 # several "good" types.
18768 # The following code was a blinker with -pbp before this
18770 ## $last_nonblank_token eq '('
18771 ## && $is_indirect_object_taker{ $paren_type
18772 ## [$paren_depth] }
18773 # The issue causing the problem is that if the
18774 # term [$paren_depth] gets broken across a line then
18775 # the whitespace routine doesn't see both opening and closing
18776 # brackets and will format like '[ $paren_depth ]'. This
18777 # leads to an oscillation in length depending if we break
18778 # before the closing bracket or not.
18780 && $i_test + 1 < $imax
18781 && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
18783 $too_long = $next_length >= $maximum_line_length;
18787 FORMATTER_DEBUG_FLAG_BREAK
18790 my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
18791 my $i_testp2 = $i_test + 2;
18792 if ( $i_testp2 > $max_index_to_go + 1 ) {
18793 $i_testp2 = $max_index_to_go + 1;
18795 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
18796 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
18798 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
18801 # allow one extra terminal token after exceeding line length
18802 # if it would strand this token.
18803 if ( $rOpts_fuzzy_line_length
18805 && $i_lowest == $i_test
18806 && $token_lengths_to_go[$i_test] > 1
18807 && $next_nonblank_type =~ /^[\;\,]$/ )
18814 ( $i_test == $imax ) # we're done if no more tokens,
18816 ( $i_lowest >= 0 ) # or no more space and we have a break
18822 #-------------------------------------------------------
18823 # END of inner loop to find the best next breakpoint
18824 # Now decide exactly where to put the breakpoint
18825 #-------------------------------------------------------
18827 # it's always ok to break at imax if no other break was found
18828 if ( $i_lowest < 0 ) { $i_lowest = $imax }
18830 # semi-final index calculation
18831 my $i_next_nonblank = $inext_to_go[$i_lowest];
18832 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
18833 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18835 #-------------------------------------------------------
18836 # ?/: rule 1 : if a break here will separate a '?' on this
18837 # line from its closing ':', then break at the '?' instead.
18838 #-------------------------------------------------------
18840 foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
18841 next unless ( $tokens_to_go[$i] eq '?' );
18843 # do not break if probable sequence of ?/: statements
18844 next if ($is_colon_chain);
18846 # do not break if statement is broken by side comment
18849 $tokens_to_go[$max_index_to_go] eq '#'
18850 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
18851 $max_index_to_go ) !~ /^[\;\}]$/
18854 # no break needed if matching : is also on the line
18856 if ( $mate_index_to_go[$i] >= 0
18857 && $mate_index_to_go[$i] <= $i_next_nonblank );
18860 if ( $want_break_before{'?'} ) { $i_lowest-- }
18864 #-------------------------------------------------------
18865 # END of inner loop to find the best next breakpoint:
18866 # Break the line after the token with index i=$i_lowest
18867 #-------------------------------------------------------
18869 # final index calculation
18870 $i_next_nonblank = $inext_to_go[$i_lowest];
18871 $next_nonblank_type = $types_to_go[$i_next_nonblank];
18872 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18874 FORMATTER_DEBUG_FLAG_BREAK
18876 "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
18878 #-------------------------------------------------------
18879 # ?/: rule 2 : if we break at a '?', then break at its ':'
18881 # Note: this rule is also in sub scan_list to handle a break
18882 # at the start and end of a line (in case breaks are dictated
18883 # by side comments).
18884 #-------------------------------------------------------
18885 if ( $next_nonblank_type eq '?' ) {
18886 set_closing_breakpoint($i_next_nonblank);
18888 elsif ( $types_to_go[$i_lowest] eq '?' ) {
18889 set_closing_breakpoint($i_lowest);
18892 #-------------------------------------------------------
18893 # ?/: rule 3 : if we break at a ':' then we save
18894 # its location for further work below. We may need to go
18895 # back and break at its '?'.
18896 #-------------------------------------------------------
18897 if ( $next_nonblank_type eq ':' ) {
18898 push @i_colon_breaks, $i_next_nonblank;
18900 elsif ( $types_to_go[$i_lowest] eq ':' ) {
18901 push @i_colon_breaks, $i_lowest;
18904 # here we should set breaks for all '?'/':' pairs which are
18905 # separated by this line
18909 # save this line segment, after trimming blanks at the ends
18911 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
18913 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
18915 # set a forced breakpoint at a container opening, if necessary, to
18916 # signal a break at a closing container. Excepting '(' for now.
18917 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
18918 && !$forced_breakpoint_to_go[$i_lowest] )
18920 set_closing_breakpoint($i_lowest);
18923 # get ready to go again
18924 $i_begin = $i_lowest + 1;
18925 $last_break_strength = $lowest_strength;
18926 $i_last_break = $i_lowest;
18927 $leading_alignment_token = "";
18928 $leading_alignment_type = "";
18929 $lowest_next_token = '';
18930 $lowest_next_type = 'b';
18932 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
18936 # update indentation size
18937 if ( $i_begin <= $imax ) {
18938 $leading_spaces = leading_spaces_to_go($i_begin);
18942 #-------------------------------------------------------
18943 # END of main loop to set continuation breakpoints
18944 # Now go back and make any necessary corrections
18945 #-------------------------------------------------------
18947 #-------------------------------------------------------
18948 # ?/: rule 4 -- if we broke at a ':', then break at
18949 # corresponding '?' unless this is a chain of ?: expressions
18950 #-------------------------------------------------------
18951 if (@i_colon_breaks) {
18953 # using a simple method for deciding if we are in a ?/: chain --
18954 # this is a chain if it has multiple ?/: pairs all in order;
18956 # Note that if line starts in a ':' we count that above as a break
18957 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
18959 unless ($is_chain) {
18960 my @insert_list = ();
18961 foreach (@i_colon_breaks) {
18962 my $i_question = $mate_index_to_go[$_];
18963 if ( $i_question >= 0 ) {
18964 if ( $want_break_before{'?'} ) {
18965 $i_question = $iprev_to_go[$i_question];
18968 if ( $i_question >= 0 ) {
18969 push @insert_list, $i_question;
18972 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
18976 return ( \@i_first, \@i_last, $colon_count );
18979 sub insert_additional_breaks {
18981 # this routine will add line breaks at requested locations after
18982 # sub set_continuation_breaks has made preliminary breaks.
18984 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
18987 my $line_number = 0;
18989 foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
18991 $i_f = $$ri_first[$line_number];
18992 $i_l = $$ri_last[$line_number];
18993 while ( $i_break_left >= $i_l ) {
18996 # shouldn't happen unless caller passes bad indexes
18997 if ( $line_number >= @$ri_last ) {
18999 "Non-fatal program bug: couldn't set break at $i_break_left\n"
19001 report_definite_bug();
19004 $i_f = $$ri_first[$line_number];
19005 $i_l = $$ri_last[$line_number];
19008 # Do not leave a blank at the end of a line; back up if necessary
19009 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
19011 my $i_break_right = $inext_to_go[$i_break_left];
19012 if ( $i_break_left >= $i_f
19013 && $i_break_left < $i_l
19014 && $i_break_right > $i_f
19015 && $i_break_right <= $i_l )
19017 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
19018 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
19023 sub set_closing_breakpoint {
19025 # set a breakpoint at a matching closing token
19026 # at present, this is only used to break at a ':' which matches a '?'
19027 my $i_break = shift;
19029 if ( $mate_index_to_go[$i_break] >= 0 ) {
19031 # CAUTION: infinite recursion possible here:
19032 # set_closing_breakpoint calls set_forced_breakpoint, and
19033 # set_forced_breakpoint call set_closing_breakpoint
19034 # ( test files attrib.t, BasicLyx.pm.html).
19035 # Don't reduce the '2' in the statement below
19036 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
19038 # break before } ] and ), but sub set_forced_breakpoint will decide
19039 # to break before or after a ? and :
19040 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
19041 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
19045 my $type_sequence = $type_sequence_to_go[$i_break];
19046 if ($type_sequence) {
19047 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
19048 $postponed_breakpoint{$type_sequence} = 1;
19053 sub compare_indentation_levels {
19055 # check to see if output line tabbing agrees with input line
19056 # this can be very useful for debugging a script which has an extra
19058 my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
19059 if ( $guessed_indentation_level ne $structural_indentation_level ) {
19060 $last_tabbing_disagreement = $input_line_number;
19062 if ($in_tabbing_disagreement) {
19065 $tabbing_disagreement_count++;
19067 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
19068 write_logfile_entry(
19069 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
19072 $in_tabbing_disagreement = $input_line_number;
19073 $first_tabbing_disagreement = $in_tabbing_disagreement
19074 unless ($first_tabbing_disagreement);
19079 if ($in_tabbing_disagreement) {
19081 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
19082 write_logfile_entry(
19083 "End indentation disagreement from input line $in_tabbing_disagreement\n"
19086 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
19087 write_logfile_entry(
19088 "No further tabbing disagreements will be noted\n");
19091 $in_tabbing_disagreement = 0;
19096 #####################################################################
19098 # the Perl::Tidy::IndentationItem class supplies items which contain
19099 # how much whitespace should be used at the start of a line
19101 #####################################################################
19103 package Perl::Tidy::IndentationItem;
19105 # Indexes for indentation items
19106 use constant SPACES => 0; # total leading white spaces
19107 use constant LEVEL => 1; # the indentation 'level'
19108 use constant CI_LEVEL => 2; # the 'continuation level'
19109 use constant AVAILABLE_SPACES => 3; # how many left spaces available
19111 use constant CLOSED => 4; # index where we saw closing '}'
19112 use constant COMMA_COUNT => 5; # how many commas at this level?
19113 use constant SEQUENCE_NUMBER => 6; # output batch number
19114 use constant INDEX => 7; # index in output batch list
19115 use constant HAVE_CHILD => 8; # any dependents?
19116 use constant RECOVERABLE_SPACES => 9; # how many spaces to the right
19117 # we would like to move to get
19118 # alignment (negative if left)
19119 use constant ALIGN_PAREN => 10; # do we want to try to align
19120 # with an opening structure?
19121 use constant MARKED => 11; # if visited by corrector logic
19122 use constant STACK_DEPTH => 12; # indentation nesting depth
19123 use constant STARTING_INDEX => 13; # first token index of this level
19124 use constant ARROW_COUNT => 14; # how many =>'s
19128 # Create an 'indentation_item' which describes one level of leading
19129 # whitespace when the '-lp' indentation is used. We return
19130 # a reference to an anonymous array of associated variables.
19131 # See above constants for storage scheme.
19133 $class, $spaces, $level,
19134 $ci_level, $available_spaces, $index,
19135 $gnu_sequence_number, $align_paren, $stack_depth,
19139 my $arrow_count = 0;
19140 my $comma_count = 0;
19141 my $have_child = 0;
19142 my $want_right_spaces = 0;
19145 $spaces, $level, $ci_level,
19146 $available_spaces, $closed, $comma_count,
19147 $gnu_sequence_number, $index, $have_child,
19148 $want_right_spaces, $align_paren, $marked,
19149 $stack_depth, $starting_index, $arrow_count,
19153 sub permanently_decrease_AVAILABLE_SPACES {
19155 # make a permanent reduction in the available indentation spaces
19156 # at one indentation item. NOTE: if there are child nodes, their
19157 # total SPACES must be reduced by the caller.
19159 my ( $item, $spaces_needed ) = @_;
19160 my $available_spaces = $item->get_AVAILABLE_SPACES();
19161 my $deleted_spaces =
19162 ( $available_spaces > $spaces_needed )
19164 : $available_spaces;
19165 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
19166 $item->decrease_SPACES($deleted_spaces);
19167 $item->set_RECOVERABLE_SPACES(0);
19169 return $deleted_spaces;
19172 sub tentatively_decrease_AVAILABLE_SPACES {
19174 # We are asked to tentatively delete $spaces_needed of indentation
19175 # for a indentation item. We may want to undo this later. NOTE: if
19176 # there are child nodes, their total SPACES must be reduced by the
19178 my ( $item, $spaces_needed ) = @_;
19179 my $available_spaces = $item->get_AVAILABLE_SPACES();
19180 my $deleted_spaces =
19181 ( $available_spaces > $spaces_needed )
19183 : $available_spaces;
19184 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
19185 $item->decrease_SPACES($deleted_spaces);
19186 $item->increase_RECOVERABLE_SPACES($deleted_spaces);
19187 return $deleted_spaces;
19190 sub get_STACK_DEPTH {
19192 return $self->[STACK_DEPTH];
19197 return $self->[SPACES];
19202 return $self->[MARKED];
19206 my ( $self, $value ) = @_;
19207 if ( defined($value) ) {
19208 $self->[MARKED] = $value;
19210 return $self->[MARKED];
19213 sub get_AVAILABLE_SPACES {
19215 return $self->[AVAILABLE_SPACES];
19218 sub decrease_SPACES {
19219 my ( $self, $value ) = @_;
19220 if ( defined($value) ) {
19221 $self->[SPACES] -= $value;
19223 return $self->[SPACES];
19226 sub decrease_AVAILABLE_SPACES {
19227 my ( $self, $value ) = @_;
19228 if ( defined($value) ) {
19229 $self->[AVAILABLE_SPACES] -= $value;
19231 return $self->[AVAILABLE_SPACES];
19234 sub get_ALIGN_PAREN {
19236 return $self->[ALIGN_PAREN];
19239 sub get_RECOVERABLE_SPACES {
19241 return $self->[RECOVERABLE_SPACES];
19244 sub set_RECOVERABLE_SPACES {
19245 my ( $self, $value ) = @_;
19246 if ( defined($value) ) {
19247 $self->[RECOVERABLE_SPACES] = $value;
19249 return $self->[RECOVERABLE_SPACES];
19252 sub increase_RECOVERABLE_SPACES {
19253 my ( $self, $value ) = @_;
19254 if ( defined($value) ) {
19255 $self->[RECOVERABLE_SPACES] += $value;
19257 return $self->[RECOVERABLE_SPACES];
19262 return $self->[CI_LEVEL];
19267 return $self->[LEVEL];
19270 sub get_SEQUENCE_NUMBER {
19272 return $self->[SEQUENCE_NUMBER];
19277 return $self->[INDEX];
19280 sub get_STARTING_INDEX {
19282 return $self->[STARTING_INDEX];
19285 sub set_HAVE_CHILD {
19286 my ( $self, $value ) = @_;
19287 if ( defined($value) ) {
19288 $self->[HAVE_CHILD] = $value;
19290 return $self->[HAVE_CHILD];
19293 sub get_HAVE_CHILD {
19295 return $self->[HAVE_CHILD];
19298 sub set_ARROW_COUNT {
19299 my ( $self, $value ) = @_;
19300 if ( defined($value) ) {
19301 $self->[ARROW_COUNT] = $value;
19303 return $self->[ARROW_COUNT];
19306 sub get_ARROW_COUNT {
19308 return $self->[ARROW_COUNT];
19311 sub set_COMMA_COUNT {
19312 my ( $self, $value ) = @_;
19313 if ( defined($value) ) {
19314 $self->[COMMA_COUNT] = $value;
19316 return $self->[COMMA_COUNT];
19319 sub get_COMMA_COUNT {
19321 return $self->[COMMA_COUNT];
19325 my ( $self, $value ) = @_;
19326 if ( defined($value) ) {
19327 $self->[CLOSED] = $value;
19329 return $self->[CLOSED];
19334 return $self->[CLOSED];
19337 #####################################################################
19339 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
19340 # contain a single output line
19342 #####################################################################
19344 package Perl::Tidy::VerticalAligner::Line;
19351 use constant JMAX => 0;
19352 use constant JMAX_ORIGINAL_LINE => 1;
19353 use constant RTOKENS => 2;
19354 use constant RFIELDS => 3;
19355 use constant RPATTERNS => 4;
19356 use constant INDENTATION => 5;
19357 use constant LEADING_SPACE_COUNT => 6;
19358 use constant OUTDENT_LONG_LINES => 7;
19359 use constant LIST_TYPE => 8;
19360 use constant IS_HANGING_SIDE_COMMENT => 9;
19361 use constant RALIGNMENTS => 10;
19362 use constant MAXIMUM_LINE_LENGTH => 11;
19363 use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
19366 $_index_map{jmax} = JMAX;
19367 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;
19368 $_index_map{rtokens} = RTOKENS;
19369 $_index_map{rfields} = RFIELDS;
19370 $_index_map{rpatterns} = RPATTERNS;
19371 $_index_map{indentation} = INDENTATION;
19372 $_index_map{leading_space_count} = LEADING_SPACE_COUNT;
19373 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;
19374 $_index_map{list_type} = LIST_TYPE;
19375 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
19376 $_index_map{ralignments} = RALIGNMENTS;
19377 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;
19378 $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
19380 my @_default_data = ();
19381 $_default_data[JMAX] = undef;
19382 $_default_data[JMAX_ORIGINAL_LINE] = undef;
19383 $_default_data[RTOKENS] = undef;
19384 $_default_data[RFIELDS] = undef;
19385 $_default_data[RPATTERNS] = undef;
19386 $_default_data[INDENTATION] = undef;
19387 $_default_data[LEADING_SPACE_COUNT] = undef;
19388 $_default_data[OUTDENT_LONG_LINES] = undef;
19389 $_default_data[LIST_TYPE] = undef;
19390 $_default_data[IS_HANGING_SIDE_COMMENT] = undef;
19391 $_default_data[RALIGNMENTS] = [];
19392 $_default_data[MAXIMUM_LINE_LENGTH] = undef;
19393 $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
19397 # methods to count object population
19399 sub get_count { $_count; }
19400 sub _increment_count { ++$_count }
19401 sub _decrement_count { --$_count }
19404 # Constructor may be called as a class method
19406 my ( $caller, %arg ) = @_;
19407 my $caller_is_obj = ref($caller);
19408 my $class = $caller_is_obj || $caller;
19410 my $self = bless [], $class;
19412 $self->[RALIGNMENTS] = [];
19415 foreach ( keys %_index_map ) {
19416 $index = $_index_map{$_};
19417 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
19418 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
19419 else { $self->[$index] = $_default_data[$index] }
19422 $self->_increment_count();
19427 $_[0]->_decrement_count();
19430 sub get_jmax { $_[0]->[JMAX] }
19431 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }
19432 sub get_rtokens { $_[0]->[RTOKENS] }
19433 sub get_rfields { $_[0]->[RFIELDS] }
19434 sub get_rpatterns { $_[0]->[RPATTERNS] }
19435 sub get_indentation { $_[0]->[INDENTATION] }
19436 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }
19437 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }
19438 sub get_list_type { $_[0]->[LIST_TYPE] }
19439 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
19440 sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
19442 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
19443 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
19444 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
19445 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
19447 sub get_starting_column {
19448 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
19451 sub increment_column {
19452 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
19454 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
19456 sub current_field_width {
19460 return $self->get_column($j);
19463 return $self->get_column($j) - $self->get_column( $j - 1 );
19467 sub field_width_growth {
19470 return $self->get_column($j) - $self->get_starting_column($j);
19473 sub starting_field_width {
19477 return $self->get_starting_column($j);
19480 return $self->get_starting_column($j) -
19481 $self->get_starting_column( $j - 1 );
19485 sub increase_field_width {
19488 my ( $j, $pad ) = @_;
19489 my $jmax = $self->get_jmax();
19490 for my $k ( $j .. $jmax ) {
19491 $self->increment_column( $k, $pad );
19495 sub get_available_space_on_right {
19497 my $jmax = $self->get_jmax();
19498 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
19501 sub set_jmax { $_[0]->[JMAX] = $_[1] }
19502 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }
19503 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }
19504 sub set_rfields { $_[0]->[RFIELDS] = $_[1] }
19505 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }
19506 sub set_indentation { $_[0]->[INDENTATION] = $_[1] }
19507 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
19508 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }
19509 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }
19510 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
19511 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
19515 #####################################################################
19517 # the Perl::Tidy::VerticalAligner::Alignment class holds information
19518 # on a single column being aligned
19520 #####################################################################
19521 package Perl::Tidy::VerticalAligner::Alignment;
19529 # Symbolic array indexes
19530 use constant COLUMN => 0; # the current column number
19531 use constant STARTING_COLUMN => 1; # column number when created
19532 use constant MATCHING_TOKEN => 2; # what token we are matching
19533 use constant STARTING_LINE => 3; # the line index of creation
19534 use constant ENDING_LINE => 4; # the most recent line to use it
19535 use constant SAVED_COLUMN => 5; # the most recent line to use it
19536 use constant SERIAL_NUMBER => 6; # unique number for this alignment
19537 # (just its index in an array)
19539 # Correspondence between variables and array indexes
19541 $_index_map{column} = COLUMN;
19542 $_index_map{starting_column} = STARTING_COLUMN;
19543 $_index_map{matching_token} = MATCHING_TOKEN;
19544 $_index_map{starting_line} = STARTING_LINE;
19545 $_index_map{ending_line} = ENDING_LINE;
19546 $_index_map{saved_column} = SAVED_COLUMN;
19547 $_index_map{serial_number} = SERIAL_NUMBER;
19549 my @_default_data = ();
19550 $_default_data[COLUMN] = undef;
19551 $_default_data[STARTING_COLUMN] = undef;
19552 $_default_data[MATCHING_TOKEN] = undef;
19553 $_default_data[STARTING_LINE] = undef;
19554 $_default_data[ENDING_LINE] = undef;
19555 $_default_data[SAVED_COLUMN] = undef;
19556 $_default_data[SERIAL_NUMBER] = undef;
19558 # class population count
19561 sub get_count { $_count; }
19562 sub _increment_count { ++$_count }
19563 sub _decrement_count { --$_count }
19568 my ( $caller, %arg ) = @_;
19569 my $caller_is_obj = ref($caller);
19570 my $class = $caller_is_obj || $caller;
19572 my $self = bless [], $class;
19574 foreach ( keys %_index_map ) {
19575 my $index = $_index_map{$_};
19576 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
19577 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
19578 else { $self->[$index] = $_default_data[$index] }
19580 $self->_increment_count();
19585 $_[0]->_decrement_count();
19588 sub get_column { return $_[0]->[COLUMN] }
19589 sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
19590 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }
19591 sub get_starting_line { return $_[0]->[STARTING_LINE] }
19592 sub get_ending_line { return $_[0]->[ENDING_LINE] }
19593 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }
19595 sub set_column { $_[0]->[COLUMN] = $_[1] }
19596 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
19597 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }
19598 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }
19599 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }
19600 sub increment_column { $_[0]->[COLUMN] += $_[1] }
19602 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
19603 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }
19607 package Perl::Tidy::VerticalAligner;
19609 # The Perl::Tidy::VerticalAligner package collects output lines and
19610 # attempts to line up certain common tokens, such as => and #, which are
19611 # identified by the calling routine.
19613 # There are two main routines: valign_input and flush. Append acts as a
19614 # storage buffer, collecting lines into a group which can be vertically
19615 # aligned. When alignment is no longer possible or desirable, it dumps
19616 # the group to flush.
19618 # valign_input -----> flush
19626 # Caution: these debug flags produce a lot of output
19627 # They should all be 0 except when debugging small scripts
19629 use constant VALIGN_DEBUG_FLAG_APPEND => 0;
19630 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
19631 use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
19632 use constant VALIGN_DEBUG_FLAG_TABS => 0;
19634 my $debug_warning = sub {
19635 print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
19638 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
19639 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
19640 VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
19641 VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS');
19646 $vertical_aligner_self
19648 $maximum_alignment_index
19652 $previous_minimum_jmax_seen
19653 $previous_maximum_jmax_seen
19654 $maximum_line_index
19659 $last_level_written
19660 $last_leading_space_count
19664 $last_comment_column
19665 $last_side_comment_line_number
19666 $last_side_comment_length
19667 $last_side_comment_level
19668 $outdented_line_count
19669 $first_outdented_line_at
19670 $last_outdented_line_at
19671 $diagnostics_object
19673 $file_writer_object
19674 @side_comment_history
19675 $comment_leading_space_count
19676 $is_matching_terminal_line
19677 $consecutive_block_comments
19684 $cached_line_leading_space_count
19685 $cached_seqno_string
19687 $valign_buffer_filling
19691 $last_nonblank_seqno_string
19695 $rOpts_maximum_line_length
19696 $rOpts_variable_maximum_line_length
19697 $rOpts_continuation_indentation
19698 $rOpts_indent_columns
19700 $rOpts_entab_leading_whitespace
19703 $rOpts_fixed_position_side_comment
19704 $rOpts_minimum_space_to_comment
19712 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
19715 # variables describing the entire space group:
19716 $ralignment_list = [];
19718 $last_level_written = -1;
19719 $extra_indent_ok = 0; # can we move all lines to the right?
19720 $last_side_comment_length = 0;
19721 $maximum_jmax_seen = 0;
19722 $minimum_jmax_seen = 0;
19723 $previous_minimum_jmax_seen = 0;
19724 $previous_maximum_jmax_seen = 0;
19726 # variables describing each line of the group
19727 @group_lines = (); # list of all lines in group
19729 $outdented_line_count = 0;
19730 $first_outdented_line_at = 0;
19731 $last_outdented_line_at = 0;
19732 $last_side_comment_line_number = 0;
19733 $last_side_comment_level = -1;
19734 $is_matching_terminal_line = 0;
19736 # most recent 3 side comments; [ line number, column ]
19737 $side_comment_history[0] = [ -300, 0 ];
19738 $side_comment_history[1] = [ -200, 0 ];
19739 $side_comment_history[2] = [ -100, 0 ];
19741 # valign_output_step_B cache:
19742 $cached_line_text = "";
19743 $cached_line_type = 0;
19744 $cached_line_flag = 0;
19746 $cached_line_valid = 0;
19747 $cached_line_leading_space_count = 0;
19748 $cached_seqno_string = "";
19750 # string of sequence numbers joined together
19751 $seqno_string = "";
19752 $last_nonblank_seqno_string = "";
19754 # frequently used parameters
19755 $rOpts_indent_columns = $rOpts->{'indent-columns'};
19756 $rOpts_tabs = $rOpts->{'tabs'};
19757 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
19758 $rOpts_fixed_position_side_comment =
19759 $rOpts->{'fixed-position-side-comment'};
19760 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
19761 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
19762 $rOpts_variable_maximum_line_length =
19763 $rOpts->{'variable-maximum-line-length'};
19764 $rOpts_valign = $rOpts->{'valign'};
19766 $consecutive_block_comments = 0;
19767 forget_side_comment();
19769 initialize_for_new_group();
19771 $vertical_aligner_self = {};
19772 bless $vertical_aligner_self, $class;
19773 return $vertical_aligner_self;
19776 sub initialize_for_new_group {
19777 $maximum_line_index = -1; # lines in the current group
19778 $maximum_alignment_index = -1; # alignments in current group
19779 $zero_count = 0; # count consecutive lines without tokens
19780 $current_line = undef; # line being matched for alignment
19781 $group_maximum_gap = 0; # largest gap introduced
19783 $marginal_match = 0;
19784 $comment_leading_space_count = 0;
19785 $last_leading_space_count = 0;
19788 # interface to Perl::Tidy::Diagnostics routines
19789 sub write_diagnostics {
19790 if ($diagnostics_object) {
19791 $diagnostics_object->write_diagnostics(@_);
19795 # interface to Perl::Tidy::Logger routines
19797 if ($logger_object) {
19798 $logger_object->warning(@_);
19802 sub write_logfile_entry {
19803 if ($logger_object) {
19804 $logger_object->write_logfile_entry(@_);
19808 sub report_definite_bug {
19809 if ($logger_object) {
19810 $logger_object->report_definite_bug();
19816 # return the number of leading spaces associated with an indentation
19817 # variable $indentation is either a constant number of spaces or an
19818 # object with a get_SPACES method.
19819 my $indentation = shift;
19820 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
19823 sub get_RECOVERABLE_SPACES {
19825 # return the number of spaces (+ means shift right, - means shift left)
19826 # that we would like to shift a group of lines with the same indentation
19827 # to get them to line up with their opening parens
19828 my $indentation = shift;
19829 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
19832 sub get_STACK_DEPTH {
19834 my $indentation = shift;
19835 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
19838 sub make_alignment {
19839 my ( $col, $token ) = @_;
19841 # make one new alignment at column $col which aligns token $token
19842 ++$maximum_alignment_index;
19843 my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
19845 starting_column => $col,
19846 matching_token => $token,
19847 starting_line => $maximum_line_index,
19848 ending_line => $maximum_line_index,
19849 serial_number => $maximum_alignment_index,
19851 $ralignment_list->[$maximum_alignment_index] = $alignment;
19855 sub dump_alignments {
19857 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
19858 for my $i ( 0 .. $maximum_alignment_index ) {
19859 my $column = $ralignment_list->[$i]->get_column();
19860 my $starting_column = $ralignment_list->[$i]->get_starting_column();
19861 my $matching_token = $ralignment_list->[$i]->get_matching_token();
19862 my $starting_line = $ralignment_list->[$i]->get_starting_line();
19863 my $ending_line = $ralignment_list->[$i]->get_ending_line();
19865 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
19869 sub save_alignment_columns {
19870 for my $i ( 0 .. $maximum_alignment_index ) {
19871 $ralignment_list->[$i]->save_column();
19875 sub restore_alignment_columns {
19876 for my $i ( 0 .. $maximum_alignment_index ) {
19877 $ralignment_list->[$i]->restore_column();
19881 sub forget_side_comment {
19882 $last_comment_column = 0;
19885 sub maximum_line_length_for_level {
19887 # return maximum line length for line starting with a given level
19888 my $maximum_line_length = $rOpts_maximum_line_length;
19889 if ($rOpts_variable_maximum_line_length) {
19891 if ( $level < 0 ) { $level = 0 }
19892 $maximum_line_length += $level * $rOpts_indent_columns;
19894 return $maximum_line_length;
19899 # Place one line in the current vertical group.
19901 # The input parameters are:
19902 # $level = indentation level of this line
19903 # $rfields = reference to array of fields
19904 # $rpatterns = reference to array of patterns, one per field
19905 # $rtokens = reference to array of tokens starting fields 1,2,..
19907 # Here is an example of what this package does. In this example,
19908 # we are trying to line up both the '=>' and the '#'.
19910 # '18' => 'grave', # \`
19911 # '19' => 'acute', # `'
19912 # '20' => 'caron', # \v
19913 # <-tabs-><f1-><--field 2 ---><-f3->
19916 # col1 col2 col3 col4
19918 # The calling routine has already broken the entire line into 3 fields as
19919 # indicated. (So the work of identifying promising common tokens has
19920 # already been done).
19922 # In this example, there will be 2 tokens being matched: '=>' and '#'.
19923 # They are the leading parts of fields 2 and 3, but we do need to know
19924 # what they are so that we can dump a group of lines when these tokens
19927 # The fields contain the actual characters of each field. The patterns
19928 # are like the fields, but they contain mainly token types instead
19929 # of tokens, so they have fewer characters. They are used to be
19930 # sure we are matching fields of similar type.
19932 # In this example, there will be 4 column indexes being adjusted. The
19933 # first one is always at zero. The interior columns are at the start of
19934 # the matching tokens, and the last one tracks the maximum line length.
19936 # Each time a new line comes in, it joins the current vertical
19937 # group if possible. Otherwise it causes the current group to be dumped
19938 # and a new group is started.
19940 # For each new group member, the column locations are increased, as
19941 # necessary, to make room for the new fields. When the group is finally
19942 # output, these column numbers are used to compute the amount of spaces of
19943 # padding needed for each field.
19945 # Programming note: the fields are assumed not to have any tab characters.
19946 # Tabs have been previously removed except for tabs in quoted strings and
19947 # side comments. Tabs in these fields can mess up the column counting.
19948 # The log file warns the user if there are any such tabs.
19951 $level, $level_end,
19952 $indentation, $rfields,
19953 $rtokens, $rpatterns,
19954 $is_forced_break, $outdent_long_lines,
19955 $is_terminal_ternary, $is_terminal_statement,
19956 $do_not_pad, $rvertical_tightness_flags,
19960 # number of fields is $jmax
19961 # number of tokens between fields is $jmax-1
19962 my $jmax = $#{$rfields};
19964 my $leading_space_count = get_SPACES($indentation);
19966 # set outdented flag to be sure we either align within statements or
19967 # across statement boundaries, but not both.
19968 my $is_outdented = $last_leading_space_count > $leading_space_count;
19969 $last_leading_space_count = $leading_space_count;
19971 # Patch: undo for hanging side comment
19972 my $is_hanging_side_comment =
19973 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
19974 $is_outdented = 0 if $is_hanging_side_comment;
19976 # Forget side comment alignment after seeing 2 or more block comments
19977 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
19978 if ($is_block_comment) {
19979 $consecutive_block_comments++;
19982 if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
19983 $consecutive_block_comments = 0;
19986 VALIGN_DEBUG_FLAG_APPEND0 && do {
19988 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
19991 # Validate cached line if necessary: If we can produce a container
19992 # with just 2 lines total by combining an existing cached opening
19993 # token with the closing token to follow, then we will mark both
19994 # cached flags as valid.
19995 if ($rvertical_tightness_flags) {
19996 if ( $maximum_line_index <= 0
19997 && $cached_line_type
19999 && $rvertical_tightness_flags->[2]
20000 && $rvertical_tightness_flags->[2] == $cached_seqno )
20002 $rvertical_tightness_flags->[3] ||= 1;
20003 $cached_line_valid ||= 1;
20007 # do not join an opening block brace with an unbalanced line
20008 # unless requested with a flag value of 2
20009 if ( $cached_line_type == 3
20010 && $maximum_line_index < 0
20011 && $cached_line_flag < 2
20012 && $level_jump != 0 )
20014 $cached_line_valid = 0;
20017 # patch until new aligner is finished
20018 if ($do_not_pad) { my_flush() }
20020 # shouldn't happen:
20021 if ( $level < 0 ) { $level = 0 }
20023 # do not align code across indentation level changes
20024 # or if vertical alignment is turned off for debugging
20025 if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
20027 # we are allowed to shift a group of lines to the right if its
20028 # level is greater than the previous and next group
20030 ( $level < $group_level && $last_level_written < $group_level );
20034 # If we know that this line will get flushed out by itself because
20035 # of level changes, we can leave the extra_indent_ok flag set.
20036 # That way, if we get an external flush call, we will still be
20037 # able to do some -lp alignment if necessary.
20038 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
20040 $group_level = $level;
20042 # wait until after the above flush to get the leading space
20043 # count because it may have been changed if the -icp flag is in
20045 $leading_space_count = get_SPACES($indentation);
20049 # --------------------------------------------------------------------
20050 # Patch to collect outdentable block COMMENTS
20051 # --------------------------------------------------------------------
20052 my $is_blank_line = "";
20053 if ( $group_type eq 'COMMENT' ) {
20057 && $outdent_long_lines
20058 && $leading_space_count == $comment_leading_space_count
20063 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
20071 # --------------------------------------------------------------------
20072 # add dummy fields for terminal ternary
20073 # --------------------------------------------------------------------
20074 my $j_terminal_match;
20075 if ( $is_terminal_ternary && $current_line ) {
20076 $j_terminal_match =
20077 fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
20078 $jmax = @{$rfields} - 1;
20081 # --------------------------------------------------------------------
20082 # add dummy fields for else statement
20083 # --------------------------------------------------------------------
20084 if ( $rfields->[0] =~ /^else\s*$/
20086 && $level_jump == 0 )
20088 $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
20089 $jmax = @{$rfields} - 1;
20092 # --------------------------------------------------------------------
20093 # Step 1. Handle simple line of code with no fields to match.
20094 # --------------------------------------------------------------------
20095 if ( $jmax <= 0 ) {
20098 if ( $maximum_line_index >= 0
20099 && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
20102 # flush the current group if it has some aligned columns..
20103 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
20105 # flush current group if we are just collecting side comments..
20108 # ...and we haven't seen a comment lately
20109 ( $zero_count > 3 )
20111 # ..or if this new line doesn't fit to the left of the comments
20112 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
20113 $group_lines[0]->get_column(0) )
20120 # patch to start new COMMENT group if this comment may be outdented
20121 if ( $is_block_comment
20122 && $outdent_long_lines
20123 && $maximum_line_index < 0 )
20125 $group_type = 'COMMENT';
20126 $comment_leading_space_count = $leading_space_count;
20127 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
20131 # just write this line directly if no current group, no side comment,
20132 # and no space recovery is needed.
20133 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
20135 valign_output_step_B( $leading_space_count, $$rfields[0], 0,
20136 $outdent_long_lines, $rvertical_tightness_flags, $level );
20144 # programming check: (shouldn't happen)
20145 # an error here implies an incorrect call was made
20146 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
20148 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
20150 report_definite_bug();
20153 # --------------------------------------------------------------------
20154 # create an object to hold this line
20155 # --------------------------------------------------------------------
20156 my $new_line = new Perl::Tidy::VerticalAligner::Line(
20158 jmax_original_line => $jmax,
20159 rtokens => $rtokens,
20160 rfields => $rfields,
20161 rpatterns => $rpatterns,
20162 indentation => $indentation,
20163 leading_space_count => $leading_space_count,
20164 outdent_long_lines => $outdent_long_lines,
20166 is_hanging_side_comment => $is_hanging_side_comment,
20167 maximum_line_length => maximum_line_length_for_level($level),
20168 rvertical_tightness_flags => $rvertical_tightness_flags,
20171 # Initialize a global flag saying if the last line of the group should
20172 # match end of group and also terminate the group. There should be no
20173 # returns between here and where the flag is handled at the bottom.
20174 my $col_matching_terminal = 0;
20175 if ( defined($j_terminal_match) ) {
20177 # remember the column of the terminal ? or { to match with
20178 $col_matching_terminal = $current_line->get_column($j_terminal_match);
20180 # set global flag for sub decide_if_aligned
20181 $is_matching_terminal_line = 1;
20184 # --------------------------------------------------------------------
20185 # It simplifies things to create a zero length side comment
20187 # --------------------------------------------------------------------
20188 make_side_comment( $new_line, $level_end );
20190 # --------------------------------------------------------------------
20191 # Decide if this is a simple list of items.
20192 # There are 3 list types: none, comma, comma-arrow.
20193 # We use this below to be less restrictive in deciding what to align.
20194 # --------------------------------------------------------------------
20195 if ($is_forced_break) {
20196 decide_if_list($new_line);
20199 if ($current_line) {
20201 # --------------------------------------------------------------------
20202 # Allow hanging side comment to join current group, if any
20203 # This will help keep side comments aligned, because otherwise we
20204 # will have to start a new group, making alignment less likely.
20205 # --------------------------------------------------------------------
20206 join_hanging_comment( $new_line, $current_line )
20207 if $is_hanging_side_comment;
20209 # --------------------------------------------------------------------
20210 # If there is just one previous line, and it has more fields
20211 # than the new line, try to join fields together to get a match with
20212 # the new line. At the present time, only a single leading '=' is
20213 # allowed to be compressed out. This is useful in rare cases where
20214 # a table is forced to use old breakpoints because of side comments,
20215 # and the table starts out something like this:
20216 # my %MonthChars = ('0', 'Jan', # side comment
20219 # Eliminating the '=' field will allow the remaining fields to line up.
20220 # This situation does not occur if there are no side comments
20221 # because scan_list would put a break after the opening '('.
20222 # --------------------------------------------------------------------
20223 eliminate_old_fields( $new_line, $current_line );
20225 # --------------------------------------------------------------------
20226 # If the new line has more fields than the current group,
20227 # see if we can match the first fields and combine the remaining
20228 # fields of the new line.
20229 # --------------------------------------------------------------------
20230 eliminate_new_fields( $new_line, $current_line );
20232 # --------------------------------------------------------------------
20233 # Flush previous group unless all common tokens and patterns match..
20234 # --------------------------------------------------------------------
20235 check_match( $new_line, $current_line );
20237 # --------------------------------------------------------------------
20238 # See if there is space for this line in the current group (if any)
20239 # --------------------------------------------------------------------
20240 if ($current_line) {
20241 check_fit( $new_line, $current_line );
20245 # --------------------------------------------------------------------
20246 # Append this line to the current group (or start new group)
20247 # --------------------------------------------------------------------
20248 add_to_group($new_line);
20250 # Future update to allow this to vary:
20251 $current_line = $new_line if ( $maximum_line_index == 0 );
20253 # output this group if it ends in a terminal else or ternary line
20254 if ( defined($j_terminal_match) ) {
20256 # if there is only one line in the group (maybe due to failure to match
20257 # perfectly with previous lines), then align the ? or { of this
20258 # terminal line with the previous one unless that would make the line
20260 if ( $maximum_line_index == 0 ) {
20261 my $col_now = $current_line->get_column($j_terminal_match);
20262 my $pad = $col_matching_terminal - $col_now;
20263 my $padding_available =
20264 $current_line->get_available_space_on_right();
20265 if ( $pad > 0 && $pad <= $padding_available ) {
20266 $current_line->increase_field_width( $j_terminal_match, $pad );
20270 $is_matching_terminal_line = 0;
20273 # --------------------------------------------------------------------
20274 # Step 8. Some old debugging stuff
20275 # --------------------------------------------------------------------
20276 VALIGN_DEBUG_FLAG_APPEND && do {
20277 print STDOUT "APPEND fields:";
20278 dump_array(@$rfields);
20279 print STDOUT "APPEND tokens:";
20280 dump_array(@$rtokens);
20281 print STDOUT "APPEND patterns:";
20282 dump_array(@$rpatterns);
20289 sub join_hanging_comment {
20292 my $jmax = $line->get_jmax();
20293 return 0 unless $jmax == 1; # must be 2 fields
20294 my $rtokens = $line->get_rtokens();
20295 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment..
20296 my $rfields = $line->get_rfields();
20297 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty...
20298 my $old_line = shift;
20299 my $maximum_field_index = $old_line->get_jmax();
20301 unless $maximum_field_index > $jmax; # the current line has more fields
20302 my $rpatterns = $line->get_rpatterns();
20304 $line->set_is_hanging_side_comment(1);
20305 $jmax = $maximum_field_index;
20306 $line->set_jmax($jmax);
20307 $$rfields[$jmax] = $$rfields[1];
20308 $$rtokens[ $jmax - 1 ] = $$rtokens[0];
20309 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
20310 for ( my $j = 1 ; $j < $jmax ; $j++ ) {
20311 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
20312 $$rtokens[ $j - 1 ] = "";
20313 $$rpatterns[ $j - 1 ] = "";
20318 sub eliminate_old_fields {
20320 my $new_line = shift;
20321 my $jmax = $new_line->get_jmax();
20322 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
20323 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
20325 # there must be one previous line
20326 return unless ( $maximum_line_index == 0 );
20328 my $old_line = shift;
20329 my $maximum_field_index = $old_line->get_jmax();
20331 ###############################################
20332 # this line must have fewer fields
20333 return unless $maximum_field_index > $jmax;
20334 ###############################################
20336 # Identify specific cases where field elimination is allowed:
20337 # case=1: both lines have comma-separated lists, and the first
20338 # line has an equals
20339 # case=2: both lines have leading equals
20341 # case 1 is the default
20344 # See if case 2: both lines have leading '='
20345 # We'll require similar leading patterns in this case
20346 my $old_rtokens = $old_line->get_rtokens();
20347 my $rtokens = $new_line->get_rtokens();
20348 my $rpatterns = $new_line->get_rpatterns();
20349 my $old_rpatterns = $old_line->get_rpatterns();
20350 if ( $rtokens->[0] =~ /^=\d*$/
20351 && $old_rtokens->[0] eq $rtokens->[0]
20352 && $old_rpatterns->[0] eq $rpatterns->[0] )
20357 # not too many fewer fields in new line for case 1
20358 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
20360 # case 1 must have side comment
20361 my $old_rfields = $old_line->get_rfields();
20364 && length( $$old_rfields[$maximum_field_index] ) == 0 );
20366 my $rfields = $new_line->get_rfields();
20368 my $hid_equals = 0;
20370 my @new_alignments = ();
20371 my @new_fields = ();
20372 my @new_matching_patterns = ();
20373 my @new_matching_tokens = ();
20377 my $current_field = '';
20378 my $current_pattern = '';
20380 # loop over all old tokens
20382 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
20383 $current_field .= $$old_rfields[$k];
20384 $current_pattern .= $$old_rpatterns[$k];
20385 last if ( $j > $jmax - 1 );
20387 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
20389 $new_fields[$j] = $current_field;
20390 $new_matching_patterns[$j] = $current_pattern;
20391 $current_field = '';
20392 $current_pattern = '';
20393 $new_matching_tokens[$j] = $$old_rtokens[$k];
20394 $new_alignments[$j] = $old_line->get_alignment($k);
20399 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
20400 last if ( $case == 2 ); # avoid problems with stuff
20401 # like: $a=$b=$c=$d;
20405 if ( $in_match && $case == 1 )
20406 ; # disallow gaps in matching field types in case 1
20410 # Modify the current state if we are successful.
20411 # We must exactly reach the ends of both lists for success.
20412 if ( ( $j == $jmax )
20413 && ( $current_field eq '' )
20414 && ( $case != 1 || $hid_equals ) )
20416 $k = $maximum_field_index;
20417 $current_field .= $$old_rfields[$k];
20418 $current_pattern .= $$old_rpatterns[$k];
20419 $new_fields[$j] = $current_field;
20420 $new_matching_patterns[$j] = $current_pattern;
20422 $new_alignments[$j] = $old_line->get_alignment($k);
20423 $maximum_field_index = $j;
20425 $old_line->set_alignments(@new_alignments);
20426 $old_line->set_jmax($jmax);
20427 $old_line->set_rtokens( \@new_matching_tokens );
20428 $old_line->set_rfields( \@new_fields );
20429 $old_line->set_rpatterns( \@$rpatterns );
20433 # create an empty side comment if none exists
20434 sub make_side_comment {
20435 my $new_line = shift;
20436 my $level_end = shift;
20437 my $jmax = $new_line->get_jmax();
20438 my $rtokens = $new_line->get_rtokens();
20440 # if line does not have a side comment...
20441 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
20442 my $rfields = $new_line->get_rfields();
20443 my $rpatterns = $new_line->get_rpatterns();
20444 $$rtokens[$jmax] = '#';
20445 $$rfields[ ++$jmax ] = '';
20446 $$rpatterns[$jmax] = '#';
20447 $new_line->set_jmax($jmax);
20448 $new_line->set_jmax_original_line($jmax);
20451 # line has a side comment..
20454 # don't remember old side comment location for very long
20455 my $line_number = $vertical_aligner_self->get_output_line_number();
20456 my $rfields = $new_line->get_rfields();
20458 $line_number - $last_side_comment_line_number > 12
20460 # and don't remember comment location across block level changes
20461 || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
20464 forget_side_comment();
20466 $last_side_comment_line_number = $line_number;
20467 $last_side_comment_level = $level_end;
20471 sub decide_if_list {
20475 # A list will be taken to be a line with a forced break in which all
20476 # of the field separators are commas or comma-arrows (except for the
20479 # List separator tokens are things like ',3' or '=>2',
20480 # where the trailing digit is the nesting depth. Allow braces
20481 # to allow nested list items.
20482 my $rtokens = $line->get_rtokens();
20483 my $test_token = $$rtokens[0];
20484 if ( $test_token =~ /^(\,|=>)/ ) {
20485 my $list_type = $test_token;
20486 my $jmax = $line->get_jmax();
20488 foreach ( 1 .. $jmax - 2 ) {
20489 if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
20494 $line->set_list_type($list_type);
20498 sub eliminate_new_fields {
20500 return unless ( $maximum_line_index >= 0 );
20501 my ( $new_line, $old_line ) = @_;
20502 my $jmax = $new_line->get_jmax();
20504 my $old_rtokens = $old_line->get_rtokens();
20505 my $rtokens = $new_line->get_rtokens();
20506 my $is_assignment =
20507 ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
20509 # must be monotonic variation
20510 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
20512 # must be more fields in the new line
20513 my $maximum_field_index = $old_line->get_jmax();
20514 return unless ( $maximum_field_index < $jmax );
20516 unless ($is_assignment) {
20518 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
20519 ; # only if monotonic
20521 # never combine fields of a comma list
20523 unless ( $maximum_field_index > 1 )
20524 && ( $new_line->get_list_type() !~ /^,/ );
20527 my $rfields = $new_line->get_rfields();
20528 my $rpatterns = $new_line->get_rpatterns();
20529 my $old_rpatterns = $old_line->get_rpatterns();
20531 # loop over all OLD tokens except comment and check match
20534 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
20535 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
20536 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
20543 # first tokens agree, so combine extra new tokens
20545 for $k ( $maximum_field_index .. $jmax - 1 ) {
20547 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
20548 $$rfields[$k] = "";
20549 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
20550 $$rpatterns[$k] = "";
20553 $$rtokens[ $maximum_field_index - 1 ] = '#';
20554 $$rfields[$maximum_field_index] = $$rfields[$jmax];
20555 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
20556 $jmax = $maximum_field_index;
20558 $new_line->set_jmax($jmax);
20561 sub fix_terminal_ternary {
20563 # Add empty fields as necessary to align a ternary term
20568 # : $year % 100 ? 1
20569 # : $year % 400 ? 0
20572 # returns 1 if the terminal item should be indented
20574 my ( $rfields, $rtokens, $rpatterns ) = @_;
20576 my $jmax = @{$rfields} - 1;
20577 my $old_line = $group_lines[$maximum_line_index];
20578 my $rfields_old = $old_line->get_rfields();
20580 my $rpatterns_old = $old_line->get_rpatterns();
20581 my $rtokens_old = $old_line->get_rtokens();
20582 my $maximum_field_index = $old_line->get_jmax();
20584 # look for the question mark after the :
20586 my $depth_question;
20588 for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
20589 my $tok = $rtokens_old->[$j];
20590 if ( $tok =~ /^\?(\d+)$/ ) {
20591 $depth_question = $1;
20593 # depth must be correct
20594 next unless ( $depth_question eq $group_level );
20597 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
20598 $pad = " " x length($1);
20601 return; # shouldn't happen
20606 return unless ( defined($jquestion) ); # shouldn't happen
20608 # Now splice the tokens and patterns of the previous line
20609 # into the else line to insure a match. Add empty fields
20611 my $jadd = $jquestion;
20613 # Work on copies of the actual arrays in case we have
20614 # to return due to an error
20615 my @fields = @{$rfields};
20616 my @patterns = @{$rpatterns};
20617 my @tokens = @{$rtokens};
20619 VALIGN_DEBUG_FLAG_TERNARY && do {
20621 print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
20622 print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
20623 print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
20624 print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
20625 print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
20626 print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
20629 # handle cases of leading colon on this line
20630 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
20632 my ( $colon, $therest ) = ( $1, $2 );
20634 # Handle sub-case of first field with leading colon plus additional code
20635 # This is the usual situation as at the '1' below:
20637 # : $year % 400 ? 0
20641 # Split the first field after the leading colon and insert padding.
20642 # Note that this padding will remain even if the terminal value goes
20643 # out on a separate line. This does not seem to look to bad, so no
20644 # mechanism has been included to undo it.
20645 my $field1 = shift @fields;
20646 unshift @fields, ( $colon, $pad . $therest );
20648 # change the leading pattern from : to ?
20649 return unless ( $patterns[0] =~ s/^\:/?/ );
20651 # install leading tokens and patterns of existing line
20652 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
20653 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
20655 # insert appropriate number of empty fields
20656 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
20659 # handle sub-case of first field just equal to leading colon.
20660 # This can happen for example in the example below where
20661 # the leading '(' would create a new alignment token
20662 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
20663 # : ( $mname = $name . '->' );
20666 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
20668 # prepend a leading ? onto the second pattern
20669 $patterns[1] = "?b" . $patterns[1];
20671 # pad the second field
20672 $fields[1] = $pad . $fields[1];
20674 # install leading tokens and patterns of existing line, replacing
20675 # leading token and inserting appropriate number of empty fields
20676 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
20677 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
20678 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
20682 # Handle case of no leading colon on this line. This will
20683 # be the case when -wba=':' is used. For example,
20684 # $year % 400 ? 0 :
20688 # install leading tokens and patterns of existing line
20689 $patterns[0] = '?' . 'b' . $patterns[0];
20690 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
20691 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
20693 # insert appropriate number of empty fields
20694 $jadd = $jquestion + 1;
20695 $fields[0] = $pad . $fields[0];
20696 splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
20699 VALIGN_DEBUG_FLAG_TERNARY && do {
20701 print STDOUT "MODIFIED TOKENS=<@tokens>\n";
20702 print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
20703 print STDOUT "MODIFIED FIELDS=<@fields>\n";
20706 # all ok .. update the arrays
20707 @{$rfields} = @fields;
20708 @{$rtokens} = @tokens;
20709 @{$rpatterns} = @patterns;
20711 # force a flush after this line
20715 sub fix_terminal_else {
20717 # Add empty fields as necessary to align a balanced terminal
20718 # else block to a previous if/elsif/unless block,
20721 # if ( 1 || $x ) { print "ok 13\n"; }
20722 # else { print "not ok 13\n"; }
20724 # returns 1 if the else block should be indented
20726 my ( $rfields, $rtokens, $rpatterns ) = @_;
20727 my $jmax = @{$rfields} - 1;
20728 return unless ( $jmax > 0 );
20730 # check for balanced else block following if/elsif/unless
20731 my $rfields_old = $current_line->get_rfields();
20733 # TBD: add handling for 'case'
20734 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
20736 # look for the opening brace after the else, and extract the depth
20737 my $tok_brace = $rtokens->[0];
20739 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
20741 # probably: "else # side_comment"
20744 my $rpatterns_old = $current_line->get_rpatterns();
20745 my $rtokens_old = $current_line->get_rtokens();
20746 my $maximum_field_index = $current_line->get_jmax();
20748 # be sure the previous if/elsif is followed by an opening paren
20750 my $tok_paren = '(' . $depth_brace;
20751 my $tok_test = $rtokens_old->[$jparen];
20752 return unless ( $tok_test eq $tok_paren ); # shouldn't happen
20754 # Now find the opening block brace
20756 for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
20757 my $tok = $rtokens_old->[$j];
20758 if ( $tok eq $tok_brace ) {
20763 return unless ( defined($jbrace) ); # shouldn't happen
20765 # Now splice the tokens and patterns of the previous line
20766 # into the else line to insure a match. Add empty fields
20768 my $jadd = $jbrace - $jparen;
20769 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
20770 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
20771 splice( @{$rfields}, 1, 0, ('') x $jadd );
20773 # force a flush after this line if it does not follow a case
20775 unless ( $rfields_old->[0] =~ /^case\s*$/ );
20778 { # sub check_match
20779 my %is_good_alignment;
20783 # Vertically aligning on certain "good" tokens is usually okay
20784 # so we can be less restrictive in marginal cases.
20785 @_ = qw( { ? => = );
20787 @is_good_alignment{@_} = (1) x scalar(@_);
20792 # See if the current line matches the current vertical alignment group.
20793 # If not, flush the current group.
20794 my $new_line = shift;
20795 my $old_line = shift;
20797 # uses global variables:
20798 # $previous_minimum_jmax_seen
20799 # $maximum_jmax_seen
20800 # $maximum_line_index
20802 my $jmax = $new_line->get_jmax();
20803 my $maximum_field_index = $old_line->get_jmax();
20805 # flush if this line has too many fields
20806 if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
20808 # flush if adding this line would make a non-monotonic field count
20810 ( $maximum_field_index > $jmax ) # this has too few fields
20812 ( $previous_minimum_jmax_seen <
20813 $jmax ) # and wouldn't be monotonic
20814 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
20821 # otherwise see if this line matches the current group
20822 my $jmax_original_line = $new_line->get_jmax_original_line();
20823 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
20824 my $rtokens = $new_line->get_rtokens();
20825 my $rfields = $new_line->get_rfields();
20826 my $rpatterns = $new_line->get_rpatterns();
20827 my $list_type = $new_line->get_list_type();
20829 my $group_list_type = $old_line->get_list_type();
20830 my $old_rpatterns = $old_line->get_rpatterns();
20831 my $old_rtokens = $old_line->get_rtokens();
20833 my $jlimit = $jmax - 1;
20834 if ( $maximum_field_index > $jmax ) {
20835 $jlimit = $jmax_original_line;
20836 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
20839 # handle comma-separated lists ..
20840 if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
20841 for my $j ( 0 .. $jlimit ) {
20842 my $old_tok = $$old_rtokens[$j];
20843 next unless $old_tok;
20844 my $new_tok = $$rtokens[$j];
20845 next unless $new_tok;
20847 # lists always match ...
20848 # unless they would align any '=>'s with ','s
20850 if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
20851 || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
20855 # do detailed check for everything else except hanging side comments
20856 elsif ( !$is_hanging_side_comment ) {
20858 my $leading_space_count = $new_line->get_leading_space_count();
20862 my $saw_good_alignment;
20864 for my $j ( 0 .. $jlimit ) {
20866 my $old_tok = $$old_rtokens[$j];
20867 my $new_tok = $$rtokens[$j];
20869 # Note on encoding used for alignment tokens:
20870 # -------------------------------------------
20871 # Tokens are "decorated" with information which can help
20872 # prevent unwanted alignments. Consider for example the
20873 # following two lines:
20874 # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
20875 # local ( $i, $f ) = &'bdiv( $xn, $xd );
20876 # There are three alignment tokens in each line, a comma,
20877 # an =, and a comma. In the first line these three tokens
20879 # ,4+local-18 =3 ,4+split-7
20880 # and in the second line they are encoded as
20881 # ,4+local-18 =3 ,4+&'bdiv-8
20882 # Tokens always at least have token name and nesting
20883 # depth. So in this example the ='s are at depth 3 and
20884 # the ,'s are at depth 4. This prevents aligning tokens
20885 # of different depths. Commas contain additional
20886 # information, as follows:
20887 # , {depth} + {container name} - {spaces to opening paren}
20888 # This allows us to reject matching the rightmost commas
20889 # in the above two lines, since they are for different
20890 # function calls. This encoding is done in
20891 # 'sub send_lines_to_vertical_aligner'.
20893 # Pick off actual token.
20894 # Everything up to the first digit is the actual token.
20895 my $alignment_token = $new_tok;
20896 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
20898 # see if the decorated tokens match
20899 my $tokens_match = $new_tok eq $old_tok
20901 # Exception for matching terminal : of ternary statement..
20902 # consider containers prefixed by ? and : a match
20903 || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
20905 # No match if the alignment tokens differ...
20906 if ( !$tokens_match ) {
20908 # ...Unless this is a side comment
20912 # and there is either at least one alignment token
20913 # or this is a single item following a list. This
20914 # latter rule is required for 'December' to join
20915 # the following list:
20917 # '', 'January', 'February', 'March',
20918 # 'April', 'May', 'June', 'July',
20919 # 'August', 'September', 'October', 'November',
20922 # If it doesn't then the -lp formatting will fail.
20923 && ( $j > 0 || $old_tok =~ /^,/ )
20926 $marginal_match = 1
20927 if ( $marginal_match == 0
20928 && $maximum_line_index == 0 );
20935 # Calculate amount of padding required to fit this in.
20936 # $pad is the number of spaces by which we must increase
20937 # the current field to squeeze in this field.
20939 length( $$rfields[$j] ) - $old_line->current_field_width($j);
20940 if ( $j == 0 ) { $pad += $leading_space_count; }
20942 # remember max pads to limit marginal cases
20943 if ( $alignment_token ne '#' ) {
20944 if ( $pad > $max_pad ) { $max_pad = $pad }
20945 if ( $pad < $min_pad ) { $min_pad = $pad }
20947 if ( $is_good_alignment{$alignment_token} ) {
20948 $saw_good_alignment = 1;
20951 # If patterns don't match, we have to be careful...
20952 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
20954 # flag this as a marginal match since patterns differ
20955 $marginal_match = 1
20956 if ( $marginal_match == 0 && $maximum_line_index == 0 );
20958 # We have to be very careful about aligning commas
20959 # when the pattern's don't match, because it can be
20960 # worse to create an alignment where none is needed
20961 # than to omit one. Here's an example where the ','s
20962 # are not in named containers. The first line below
20963 # should not match the next two:
20964 # ( $a, $b ) = ( $b, $r );
20965 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
20966 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
20967 if ( $alignment_token eq ',' ) {
20969 # do not align commas unless they are in named containers
20970 goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
20973 # do not align parens unless patterns match;
20974 # large ugly spaces can occur in math expressions.
20975 elsif ( $alignment_token eq '(' ) {
20977 # But we can allow a match if the parens don't
20978 # require any padding.
20979 if ( $pad != 0 ) { goto NO_MATCH }
20982 # Handle an '=' alignment with different patterns to
20984 elsif ( $alignment_token eq '=' ) {
20986 # It is best to be a little restrictive when
20987 # aligning '=' tokens. Here is an example of
20988 # two lines that we will not align:
20991 # The problem is that one is a 'my' declaration,
20992 # and the other isn't, so they're not very similar.
20993 # We will filter these out by comparing the first
20994 # letter of the pattern. This is crude, but works
20997 substr( $$old_rpatterns[$j], 0, 1 ) ne
20998 substr( $$rpatterns[$j], 0, 1 ) )
21003 # If we pass that test, we'll call it a marginal match.
21004 # Here is an example of a marginal match:
21006 # $op = compile_bblock($op);
21007 # The left tokens are both identifiers, but
21008 # one accesses a hash and the other doesn't.
21009 # We'll let this be a tentative match and undo
21010 # it later if we don't find more than 2 lines
21012 elsif ( $maximum_line_index == 0 ) {
21014 2; # =2 prevents being undone below
21019 # Don't let line with fewer fields increase column widths
21021 if ( $maximum_field_index > $jmax ) {
21023 # Exception: suspend this rule to allow last lines to join
21024 if ( $pad > 0 ) { goto NO_MATCH; }
21026 } ## end for my $j ( 0 .. $jlimit)
21028 # Turn off the "marginal match" flag in some cases...
21029 # A "marginal match" occurs when the alignment tokens agree
21030 # but there are differences in the other tokens (patterns).
21031 # If we leave the marginal match flag set, then the rule is that we
21032 # will align only if there are more than two lines in the group.
21033 # We will turn of the flag if we almost have a match
21034 # and either we have seen a good alignment token or we
21035 # just need a small pad (2 spaces) to fit. These rules are
21036 # the result of experimentation. Tokens which misaligned by just
21037 # one or two characters are annoying. On the other hand,
21038 # large gaps to less important alignment tokens are also annoying.
21039 if ( $marginal_match == 1
21040 && $jmax == $maximum_field_index
21041 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
21044 $marginal_match = 0;
21046 ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
21049 # We have a match (even if marginal).
21050 # If the current line has fewer fields than the current group
21051 # but otherwise matches, copy the remaining group fields to
21052 # make it a perfect match.
21053 if ( $maximum_field_index > $jmax ) {
21054 my $comment = $$rfields[$jmax];
21055 for $jmax ( $jlimit .. $maximum_field_index ) {
21056 $$rtokens[$jmax] = $$old_rtokens[$jmax];
21057 $$rfields[ ++$jmax ] = '';
21058 $$rpatterns[$jmax] = $$old_rpatterns[$jmax];
21060 $$rfields[$jmax] = $comment;
21061 $new_line->set_jmax($jmax);
21066 ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
21074 return unless ( $maximum_line_index >= 0 );
21075 my $new_line = shift;
21076 my $old_line = shift;
21078 my $jmax = $new_line->get_jmax();
21079 my $leading_space_count = $new_line->get_leading_space_count();
21080 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
21081 my $rtokens = $new_line->get_rtokens();
21082 my $rfields = $new_line->get_rfields();
21083 my $rpatterns = $new_line->get_rpatterns();
21085 my $group_list_type = $group_lines[0]->get_list_type();
21087 my $padding_so_far = 0;
21088 my $padding_available = $old_line->get_available_space_on_right();
21090 # save current columns in case this doesn't work
21091 save_alignment_columns();
21093 my ( $j, $pad, $eight );
21094 my $maximum_field_index = $old_line->get_jmax();
21095 for $j ( 0 .. $jmax ) {
21097 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
21100 $pad += $leading_space_count;
21103 # remember largest gap of the group, excluding gap to side comment
21105 && $group_maximum_gap < -$pad
21107 && $j < $jmax - 1 )
21109 $group_maximum_gap = -$pad;
21114 ## This patch helps sometimes, but it doesn't check to see if
21115 ## the line is too long even without the side comment. It needs
21117 ##don't let a long token with no trailing side comment push
21118 ##side comments out, or end a group. (sidecmt1.t)
21119 ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
21121 # This line will need space; lets see if we want to accept it..
21124 # not if this won't fit
21125 ( $pad > $padding_available )
21127 # previously, there were upper bounds placed on padding here
21128 # (maximum_whitespace_columns), but they were not really helpful
21133 # revert to starting state then flush; things didn't work out
21134 restore_alignment_columns();
21139 # patch to avoid excessive gaps in previous lines,
21140 # due to a line of fewer fields.
21141 # return join( ".",
21142 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
21143 # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
21144 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
21146 # looks ok, squeeze this field in
21147 $old_line->increase_field_width( $j, $pad );
21148 $padding_available -= $pad;
21150 # remember largest gap of the group, excluding gap to side comment
21151 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
21152 $group_maximum_gap = $pad;
21159 # The current line either starts a new alignment group or is
21160 # accepted into the current alignment group.
21161 my $new_line = shift;
21162 $group_lines[ ++$maximum_line_index ] = $new_line;
21164 # initialize field lengths if starting new group
21165 if ( $maximum_line_index == 0 ) {
21167 my $jmax = $new_line->get_jmax();
21168 my $rfields = $new_line->get_rfields();
21169 my $rtokens = $new_line->get_rtokens();
21171 my $col = $new_line->get_leading_space_count();
21173 for $j ( 0 .. $jmax ) {
21174 $col += length( $$rfields[$j] );
21176 # create initial alignments for the new group
21178 if ( $j < $jmax ) { $token = $$rtokens[$j] }
21179 my $alignment = make_alignment( $col, $token );
21180 $new_line->set_alignment( $j, $alignment );
21183 $maximum_jmax_seen = $jmax;
21184 $minimum_jmax_seen = $jmax;
21187 # use previous alignments otherwise
21189 my @new_alignments =
21190 $group_lines[ $maximum_line_index - 1 ]->get_alignments();
21191 $new_line->set_alignments(@new_alignments);
21194 # remember group jmax extremes for next call to valign_input
21195 $previous_minimum_jmax_seen = $minimum_jmax_seen;
21196 $previous_maximum_jmax_seen = $maximum_jmax_seen;
21201 # debug routine to dump array contents
21203 print STDOUT "(@_)\n";
21206 # flush() sends the current Perl::Tidy::VerticalAligner group down the
21207 # pipeline to Perl::Tidy::FileWriter.
21209 # This is the external flush, which also empties the buffer and cache
21212 # the buffer must be emptied first, then any cached text
21213 dump_valign_buffer();
21215 if ( $maximum_line_index < 0 ) {
21216 if ($cached_line_type) {
21217 $seqno_string = $cached_seqno_string;
21218 valign_output_step_C( $cached_line_text,
21219 $cached_line_leading_space_count,
21220 $last_level_written );
21221 $cached_line_type = 0;
21222 $cached_line_text = "";
21223 $cached_seqno_string = "";
21231 sub reduce_valign_buffer_indentation {
21234 if ( $valign_buffer_filling && $diff ) {
21235 my $max_valign_buffer = @valign_buffer;
21236 for ( my $i = 0 ; $i < $max_valign_buffer ; $i++ ) {
21237 my ( $line, $leading_space_count, $level ) =
21238 @{ $valign_buffer[$i] };
21239 my $ws = substr( $line, 0, $diff );
21240 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
21241 $line = substr( $line, $diff );
21243 if ( $leading_space_count >= $diff ) {
21244 $leading_space_count -= $diff;
21245 $level = level_change( $leading_space_count, $diff, $level );
21247 $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
21254 # compute decrease in level when we remove $diff spaces from the
21256 my ( $leading_space_count, $diff, $level ) = @_;
21257 if ($rOpts_indent_columns) {
21259 int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
21260 my $nlev = int( $leading_space_count / $rOpts_indent_columns );
21261 $level -= ( $olev - $nlev );
21262 if ( $level < 0 ) { $level = 0 }
21267 sub dump_valign_buffer {
21268 if (@valign_buffer) {
21269 foreach (@valign_buffer) {
21270 valign_output_step_D( @{$_} );
21272 @valign_buffer = ();
21274 $valign_buffer_filling = "";
21277 # This is the internal flush, which leaves the cache intact
21280 return if ( $maximum_line_index < 0 );
21282 # handle a group of comment lines
21283 if ( $group_type eq 'COMMENT' ) {
21285 VALIGN_DEBUG_FLAG_APPEND0 && do {
21286 my ( $a, $b, $c ) = caller();
21288 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
21291 my $leading_space_count = $comment_leading_space_count;
21292 my $leading_string = get_leading_string($leading_space_count);
21294 # zero leading space count if any lines are too long
21295 my $max_excess = 0;
21296 for my $i ( 0 .. $maximum_line_index ) {
21297 my $str = $group_lines[$i];
21300 $leading_space_count -
21301 maximum_line_length_for_level($group_level);
21302 if ( $excess > $max_excess ) {
21303 $max_excess = $excess;
21307 if ( $max_excess > 0 ) {
21308 $leading_space_count -= $max_excess;
21309 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
21310 $last_outdented_line_at =
21311 $file_writer_object->get_output_line_number();
21312 unless ($outdented_line_count) {
21313 $first_outdented_line_at = $last_outdented_line_at;
21315 $outdented_line_count += ( $maximum_line_index + 1 );
21318 # write the group of lines
21319 my $outdent_long_lines = 0;
21320 for my $i ( 0 .. $maximum_line_index ) {
21321 valign_output_step_B( $leading_space_count, $group_lines[$i], 0,
21322 $outdent_long_lines, "", $group_level );
21326 # handle a group of code lines
21329 VALIGN_DEBUG_FLAG_APPEND0 && do {
21330 my $group_list_type = $group_lines[0]->get_list_type();
21331 my ( $a, $b, $c ) = caller();
21332 my $maximum_field_index = $group_lines[0]->get_jmax();
21334 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
21338 # some small groups are best left unaligned
21339 my $do_not_align = decide_if_aligned();
21341 # optimize side comment location
21342 $do_not_align = adjust_side_comment($do_not_align);
21344 # recover spaces for -lp option if possible
21345 my $extra_leading_spaces = get_extra_leading_spaces();
21347 # all lines of this group have the same basic leading spacing
21348 my $group_leader_length = $group_lines[0]->get_leading_space_count();
21350 # add extra leading spaces if helpful
21351 my $min_ci_gap = improve_continuation_indentation( $do_not_align,
21352 $group_leader_length );
21354 # loop to output all lines
21355 for my $i ( 0 .. $maximum_line_index ) {
21356 my $line = $group_lines[$i];
21357 valign_output_step_A( $line, $min_ci_gap, $do_not_align,
21358 $group_leader_length, $extra_leading_spaces );
21361 initialize_for_new_group();
21364 sub decide_if_aligned {
21366 # Do not try to align two lines which are not really similar
21367 return unless $maximum_line_index == 1;
21368 return if ($is_matching_terminal_line);
21370 my $group_list_type = $group_lines[0]->get_list_type();
21372 my $do_not_align = (
21374 # always align lists
21379 # don't align if it was just a marginal match
21382 # don't align two lines with big gap
21383 || $group_maximum_gap > 12
21385 # or lines with differing number of alignment tokens
21386 # TODO: this could be improved. It occasionally rejects
21388 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
21392 # But try to convert them into a simple comment group if the first line
21393 # a has side comment
21394 my $rfields = $group_lines[0]->get_rfields();
21395 my $maximum_field_index = $group_lines[0]->get_jmax();
21397 && ( $maximum_line_index > 0 )
21398 && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
21403 return $do_not_align;
21406 sub adjust_side_comment {
21408 my $do_not_align = shift;
21410 # let's see if we can move the side comment field out a little
21411 # to improve readability (the last field is always a side comment field)
21412 my $have_side_comment = 0;
21413 my $first_side_comment_line = -1;
21414 my $maximum_field_index = $group_lines[0]->get_jmax();
21415 for my $i ( 0 .. $maximum_line_index ) {
21416 my $line = $group_lines[$i];
21418 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
21419 $have_side_comment = 1;
21420 $first_side_comment_line = $i;
21425 my $kmax = $maximum_field_index + 1;
21427 if ($have_side_comment) {
21429 my $line = $group_lines[0];
21431 # the maximum space without exceeding the line length:
21432 my $avail = $line->get_available_space_on_right();
21434 # try to use the previous comment column
21435 my $side_comment_column = $line->get_column( $kmax - 2 );
21436 my $move = $last_comment_column - $side_comment_column;
21438 ## my $sc_line0 = $side_comment_history[0]->[0];
21439 ## my $sc_col0 = $side_comment_history[0]->[1];
21440 ## my $sc_line1 = $side_comment_history[1]->[0];
21441 ## my $sc_col1 = $side_comment_history[1]->[1];
21442 ## my $sc_line2 = $side_comment_history[2]->[0];
21443 ## my $sc_col2 = $side_comment_history[2]->[1];
21445 ## # FUTURE UPDATES:
21446 ## # Be sure to ignore 'do not align' and '} # end comments'
21447 ## # Find first $move > 0 and $move <= $avail as follows:
21448 ## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
21449 ## # 2. try sc_col2 if (line-sc_line2) < 12
21450 ## # 3. try min possible space, plus up to 8,
21451 ## # 4. try min possible space
21453 if ( $kmax > 0 && !$do_not_align ) {
21455 # but if this doesn't work, give up and use the minimum space
21456 if ( $move > $avail ) {
21457 $move = $rOpts_minimum_space_to_comment - 1;
21460 # but we want some minimum space to the comment
21461 my $min_move = $rOpts_minimum_space_to_comment - 1;
21463 && $last_side_comment_length > 0
21464 && ( $first_side_comment_line == 0 )
21465 && $group_level == $last_level_written )
21470 if ( $move < $min_move ) {
21474 # previously, an upper bound was placed on $move here,
21475 # (maximum_space_to_comment), but it was not helpful
21477 # don't exceed the available space
21478 if ( $move > $avail ) { $move = $avail }
21480 # we can only increase space, never decrease
21482 $line->increase_field_width( $maximum_field_index - 1, $move );
21485 # remember this column for the next group
21486 $last_comment_column = $line->get_column( $kmax - 2 );
21490 # try to at least line up the existing side comment location
21491 if ( $kmax > 0 && $move > 0 && $move < $avail ) {
21492 $line->increase_field_width( $maximum_field_index - 1, $move );
21496 # reset side comment column if we can't align
21498 forget_side_comment();
21502 return $do_not_align;
21505 sub improve_continuation_indentation {
21506 my ( $do_not_align, $group_leader_length ) = @_;
21508 # See if we can increase the continuation indentation
21509 # to move all continuation lines closer to the next field
21510 # (unless it is a comment).
21512 # '$min_ci_gap'is the extra indentation that we may need to introduce.
21513 # We will only introduce this to fields which already have some ci.
21514 # Without this variable, we would occasionally get something like this
21517 # use overload '+' => \&plus,
21519 # '*' => \&multiply,
21522 # 'atan2' => \&atan2,
21524 # Whereas with this variable, we can shift variables over to get this:
21526 # use overload '+' => \&plus,
21528 # '*' => \&multiply,
21531 # 'atan2' => \&atan2,
21533 ## Deactivated####################
21534 # The trouble with this patch is that it may, for example,
21535 # move in some 'or's or ':'s, and leave some out, so that the
21536 # left edge alignment suffers.
21538 ###########################################
21540 my $maximum_field_index = $group_lines[0]->get_jmax();
21542 my $min_ci_gap = maximum_line_length_for_level($group_level);
21543 if ( $maximum_field_index > 1 && !$do_not_align ) {
21545 for my $i ( 0 .. $maximum_line_index ) {
21546 my $line = $group_lines[$i];
21547 my $leading_space_count = $line->get_leading_space_count();
21548 my $rfields = $line->get_rfields();
21551 $line->get_column(0) -
21552 $leading_space_count -
21553 length( $$rfields[0] );
21555 if ( $leading_space_count > $group_leader_length ) {
21556 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
21560 if ( $min_ci_gap >= maximum_line_length_for_level($group_level) ) {
21567 return $min_ci_gap;
21570 sub valign_output_step_A {
21572 ###############################################################
21573 # This is Step A in writing vertically aligned lines.
21574 # The line is prepared according to the alignments which have
21575 # been found and shipped to the next step.
21576 ###############################################################
21578 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
21579 $extra_leading_spaces )
21581 my $rfields = $line->get_rfields();
21582 my $leading_space_count = $line->get_leading_space_count();
21583 my $outdent_long_lines = $line->get_outdent_long_lines();
21584 my $maximum_field_index = $line->get_jmax();
21585 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
21587 # add any extra spaces
21588 if ( $leading_space_count > $group_leader_length ) {
21589 $leading_space_count += $min_ci_gap;
21592 my $str = $$rfields[0];
21594 # loop to concatenate all fields of this line and needed padding
21595 my $total_pad_count = 0;
21597 for $j ( 1 .. $maximum_field_index ) {
21599 # skip zero-length side comments
21601 if ( ( $j == $maximum_field_index )
21602 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
21605 # compute spaces of padding before this field
21606 my $col = $line->get_column( $j - 1 );
21607 $pad = $col - ( length($str) + $leading_space_count );
21609 if ($do_not_align) {
21611 ( $j < $maximum_field_index )
21613 : $rOpts_minimum_space_to_comment - 1;
21616 # if the -fpsc flag is set, move the side comment to the selected
21617 # column if and only if it is possible, ignoring constraints on
21618 # line length and minimum space to comment
21619 if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
21621 my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
21622 if ( $newpad >= 0 ) { $pad = $newpad; }
21625 # accumulate the padding
21626 if ( $pad > 0 ) { $total_pad_count += $pad; }
21629 if ( !defined $$rfields[$j] ) {
21630 write_diagnostics("UNDEFined field at j=$j\n");
21633 # only add padding when we have a finite field;
21634 # this avoids extra terminal spaces if we have empty fields
21635 if ( length( $$rfields[$j] ) > 0 ) {
21636 $str .= ' ' x $total_pad_count;
21637 $total_pad_count = 0;
21638 $str .= $$rfields[$j];
21641 $total_pad_count = 0;
21644 # update side comment history buffer
21645 if ( $j == $maximum_field_index ) {
21646 my $lineno = $file_writer_object->get_output_line_number();
21647 shift @side_comment_history;
21648 push @side_comment_history, [ $lineno, $col ];
21652 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
21654 # ship this line off
21655 valign_output_step_B( $leading_space_count + $extra_leading_spaces,
21656 $str, $side_comment_length, $outdent_long_lines,
21657 $rvertical_tightness_flags, $group_level );
21660 sub get_extra_leading_spaces {
21662 #----------------------------------------------------------
21663 # Define any extra indentation space (for the -lp option).
21665 # If a list has side comments, sub scan_list must dump the
21666 # list before it sees everything. When this happens, it sets
21667 # the indentation to the standard scheme, but notes how
21668 # many spaces it would have liked to use. We may be able
21669 # to recover that space here in the event that all of the
21670 # lines of a list are back together again.
21671 #----------------------------------------------------------
21673 my $extra_leading_spaces = 0;
21674 if ($extra_indent_ok) {
21675 my $object = $group_lines[0]->get_indentation();
21676 if ( ref($object) ) {
21677 my $extra_indentation_spaces_wanted =
21678 get_RECOVERABLE_SPACES($object);
21680 # all indentation objects must be the same
21682 for $i ( 1 .. $maximum_line_index ) {
21683 if ( $object != $group_lines[$i]->get_indentation() ) {
21684 $extra_indentation_spaces_wanted = 0;
21689 if ($extra_indentation_spaces_wanted) {
21691 # the maximum space without exceeding the line length:
21692 my $avail = $group_lines[0]->get_available_space_on_right();
21693 $extra_leading_spaces =
21694 ( $avail > $extra_indentation_spaces_wanted )
21695 ? $extra_indentation_spaces_wanted
21698 # update the indentation object because with -icp the terminal
21699 # ');' will use the same adjustment.
21700 $object->permanently_decrease_AVAILABLE_SPACES(
21701 -$extra_leading_spaces );
21705 return $extra_leading_spaces;
21708 sub combine_fields {
21710 # combine all fields except for the comment field ( sidecmt.t )
21711 # Uses global variables:
21713 # $maximum_line_index
21715 my $maximum_field_index = $group_lines[0]->get_jmax();
21716 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
21717 my $line = $group_lines[$j];
21718 my $rfields = $line->get_rfields();
21719 foreach ( 1 .. $maximum_field_index - 1 ) {
21720 $$rfields[0] .= $$rfields[$_];
21722 $$rfields[1] = $$rfields[$maximum_field_index];
21724 $line->set_jmax(1);
21725 $line->set_column( 0, 0 );
21726 $line->set_column( 1, 0 );
21729 $maximum_field_index = 1;
21731 for $j ( 0 .. $maximum_line_index ) {
21732 my $line = $group_lines[$j];
21733 my $rfields = $line->get_rfields();
21734 for $k ( 0 .. $maximum_field_index ) {
21735 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
21737 $pad += $group_lines[$j]->get_leading_space_count();
21740 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
21746 sub get_output_line_number {
21748 # the output line number reported to a caller is the number of items
21749 # written plus the number of items in the buffer
21751 1 + $maximum_line_index + $file_writer_object->get_output_line_number();
21754 sub valign_output_step_B {
21756 ###############################################################
21757 # This is Step B in writing vertically aligned lines.
21758 # Vertical tightness is applied according to preset flags.
21759 # In particular this routine handles stacking of opening
21760 # and closing tokens.
21761 ###############################################################
21763 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
21764 $rvertical_tightness_flags, $level )
21767 # handle outdenting of long lines:
21768 if ($outdent_long_lines) {
21771 $side_comment_length +
21772 $leading_space_count -
21773 maximum_line_length_for_level($level);
21774 if ( $excess > 0 ) {
21775 $leading_space_count = 0;
21776 $last_outdented_line_at =
21777 $file_writer_object->get_output_line_number();
21779 unless ($outdented_line_count) {
21780 $first_outdented_line_at = $last_outdented_line_at;
21782 $outdented_line_count++;
21786 # Make preliminary leading whitespace. It could get changed
21787 # later by entabbing, so we have to keep track of any changes
21788 # to the leading_space_count from here on.
21789 my $leading_string =
21790 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
21792 # Unpack any recombination data; it was packed by
21793 # sub send_lines_to_vertical_aligner. Contents:
21795 # [0] type: 1=opening non-block 2=closing non-block
21796 # 3=opening block brace 4=closing block brace
21797 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
21798 # if closing: spaces of padding to use
21799 # [2] sequence number of container
21800 # [3] valid flag: do not append if this flag is false
21802 my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
21804 if ($rvertical_tightness_flags) {
21806 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
21808 ) = @{$rvertical_tightness_flags};
21811 $seqno_string = $seqno_end;
21813 # handle any cached line ..
21814 # either append this line to it or write it out
21815 if ( length($cached_line_text) ) {
21817 # Dump an invalid cached line
21818 if ( !$cached_line_valid ) {
21819 valign_output_step_C( $cached_line_text,
21820 $cached_line_leading_space_count,
21821 $last_level_written );
21824 # Handle cached line ending in OPENING tokens
21825 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
21827 my $gap = $leading_space_count - length($cached_line_text);
21829 # handle option of just one tight opening per line:
21830 if ( $cached_line_flag == 1 ) {
21831 if ( defined($open_or_close) && $open_or_close == 1 ) {
21836 if ( $gap >= 0 && defined($seqno_beg) ) {
21837 $leading_string = $cached_line_text . ' ' x $gap;
21838 $leading_space_count = $cached_line_leading_space_count;
21839 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
21840 $level = $last_level_written;
21843 valign_output_step_C( $cached_line_text,
21844 $cached_line_leading_space_count,
21845 $last_level_written );
21849 # Handle cached line ending in CLOSING tokens
21851 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
21854 # The new line must start with container
21857 # The container combination must be okay..
21860 # okay to combine like types
21861 ( $open_or_close == $cached_line_type )
21863 # closing block brace may append to non-block
21864 || ( $cached_line_type == 2 && $open_or_close == 4 )
21866 # something like ');'
21867 || ( !$open_or_close && $cached_line_type == 2 )
21871 # The combined line must fit
21873 length($test_line) <=
21874 maximum_line_length_for_level($last_level_written) )
21878 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
21880 # Patch to outdent closing tokens ending # in ');'
21881 # If we are joining a line like ');' to a previous stacked
21882 # set of closing tokens, then decide if we may outdent the
21883 # combined stack to the indentation of the ');'. Since we
21884 # should not normally outdent any of the other tokens more than
21885 # the indentation of the lines that contained them, we will
21886 # only do this if all of the corresponding opening
21887 # tokens were on the same line. This can happen with
21888 # -sot and -sct. For example, it is ok here:
21889 # __PACKAGE__->load_components( qw(
21894 # But, for example, we do not outdent in this example because
21895 # that would put the closing sub brace out farther than the
21896 # opening sub brace:
21898 # perltidy -sot -sct
21900 # '<Control-f>' => sub {
21902 # my $e = $c->XEvent;
21903 # itemsUnderArea $c;
21906 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
21908 # The way to tell this is if the stacked sequence numbers
21909 # of this output line are the reverse of the stacked
21910 # sequence numbers of the previous non-blank line of
21911 # sequence numbers. So we can join if the previous
21912 # nonblank string of tokens is the mirror image. For
21913 # example if stack )}] is 13:8:6 then we are looking for a
21914 # leading stack like [{( which is 6:8:13 We only need to
21915 # check the two ends, because the intermediate tokens must
21916 # fall in order. Note on speed: having to split on colons
21917 # and eliminate multiple colons might appear to be slow,
21918 # but it's not an issue because we almost never come
21919 # through here. In a typical file we don't.
21920 $seqno_string =~ s/^:+//;
21921 $last_nonblank_seqno_string =~ s/^:+//;
21922 $seqno_string =~ s/:+/:/g;
21923 $last_nonblank_seqno_string =~ s/:+/:/g;
21925 # how many spaces can we outdent?
21927 $cached_line_leading_space_count - $leading_space_count;
21929 && length($seqno_string)
21930 && length($last_nonblank_seqno_string) ==
21931 length($seqno_string) )
21934 ( split ':', $last_nonblank_seqno_string );
21935 my @seqno_now = ( split ':', $seqno_string );
21936 if ( $seqno_now[-1] == $seqno_last[0]
21937 && $seqno_now[0] == $seqno_last[-1] )
21941 # for absolute safety, be sure we only remove
21943 my $ws = substr( $test_line, 0, $diff );
21944 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
21946 $test_line = substr( $test_line, $diff );
21947 $cached_line_leading_space_count -= $diff;
21948 $last_level_written =
21950 $cached_line_leading_space_count,
21951 $diff, $last_level_written );
21952 reduce_valign_buffer_indentation($diff);
21955 # shouldn't happen, but not critical:
21957 ## ERROR transferring indentation here
21964 $leading_string = "";
21965 $leading_space_count = $cached_line_leading_space_count;
21966 $level = $last_level_written;
21969 valign_output_step_C( $cached_line_text,
21970 $cached_line_leading_space_count,
21971 $last_level_written );
21975 $cached_line_type = 0;
21976 $cached_line_text = "";
21978 # make the line to be written
21979 my $line = $leading_string . $str;
21981 # write or cache this line
21982 if ( !$open_or_close || $side_comment_length > 0 ) {
21983 valign_output_step_C( $line, $leading_space_count, $level );
21986 $cached_line_text = $line;
21987 $cached_line_type = $open_or_close;
21988 $cached_line_flag = $tightness_flag;
21989 $cached_seqno = $seqno;
21990 $cached_line_valid = $valid;
21991 $cached_line_leading_space_count = $leading_space_count;
21992 $cached_seqno_string = $seqno_string;
21995 $last_level_written = $level;
21996 $last_side_comment_length = $side_comment_length;
21997 $extra_indent_ok = 0;
22000 sub valign_output_step_C {
22002 ###############################################################
22003 # This is Step C in writing vertically aligned lines.
22004 # Lines are either stored in a buffer or passed along to the next step.
22005 # The reason for storing lines is that we may later want to reduce their
22006 # indentation when -sot and -sct are both used.
22007 ###############################################################
22010 # Dump any saved lines if we see a line with an unbalanced opening or
22012 dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
22014 # Either store or write this line
22015 if ($valign_buffer_filling) {
22016 push @valign_buffer, [@args];
22019 valign_output_step_D(@args);
22022 # For lines starting or ending with opening or closing tokens..
22023 if ($seqno_string) {
22024 $last_nonblank_seqno_string = $seqno_string;
22026 # Start storing lines when we see a line with multiple stacked opening
22028 if ( $args[0] =~ /[\{\(\[]\s*[\{\(\[]$/ ) {
22029 $valign_buffer_filling = $seqno_string;
22034 sub valign_output_step_D {
22036 ###############################################################
22037 # This is Step D in writing vertically aligned lines.
22038 # Write one vertically aligned line of code to the output object.
22039 ###############################################################
22041 my ( $line, $leading_space_count, $level ) = @_;
22043 # The line is currently correct if there is no tabbing (recommended!)
22044 # We may have to lop off some leading spaces and replace with tabs.
22045 if ( $leading_space_count > 0 ) {
22047 # Nothing to do if no tabs
22048 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
22049 || $rOpts_indent_columns <= 0 )
22055 # Handle entab option
22056 elsif ($rOpts_entab_leading_whitespace) {
22058 $leading_space_count % $rOpts_entab_leading_whitespace;
22060 int( $leading_space_count / $rOpts_entab_leading_whitespace );
22061 my $leading_string = "\t" x $tab_count . ' ' x $space_count;
22062 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
22063 substr( $line, 0, $leading_space_count ) = $leading_string;
22067 # shouldn't happen - program error counting whitespace
22069 VALIGN_DEBUG_FLAG_TABS
22071 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
22076 # Handle option of one tab per level
22078 my $leading_string = ( "\t" x $level );
22080 $leading_space_count - $level * $rOpts_indent_columns;
22082 # shouldn't happen:
22083 if ( $space_count < 0 ) {
22085 # But it could be an outdented comment
22086 if ( $line !~ /^\s*#/ ) {
22087 VALIGN_DEBUG_FLAG_TABS
22089 "Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
22092 $leading_string = ( ' ' x $leading_space_count );
22095 $leading_string .= ( ' ' x $space_count );
22097 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
22098 substr( $line, 0, $leading_space_count ) = $leading_string;
22102 # shouldn't happen - program error counting whitespace
22103 # we'll skip entabbing
22104 VALIGN_DEBUG_FLAG_TABS
22106 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
22111 $file_writer_object->write_code_line( $line . "\n" );
22114 { # begin get_leading_string
22116 my @leading_string_cache;
22118 sub get_leading_string {
22120 # define the leading whitespace string for this line..
22121 my $leading_whitespace_count = shift;
22123 # Handle case of zero whitespace, which includes multi-line quotes
22124 # (which may have a finite level; this prevents tab problems)
22125 if ( $leading_whitespace_count <= 0 ) {
22129 # look for previous result
22130 elsif ( $leading_string_cache[$leading_whitespace_count] ) {
22131 return $leading_string_cache[$leading_whitespace_count];
22134 # must compute a string for this number of spaces
22135 my $leading_string;
22137 # Handle simple case of no tabs
22138 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
22139 || $rOpts_indent_columns <= 0 )
22141 $leading_string = ( ' ' x $leading_whitespace_count );
22144 # Handle entab option
22145 elsif ($rOpts_entab_leading_whitespace) {
22147 $leading_whitespace_count % $rOpts_entab_leading_whitespace;
22148 my $tab_count = int(
22149 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
22150 $leading_string = "\t" x $tab_count . ' ' x $space_count;
22153 # Handle option of one tab per level
22155 $leading_string = ( "\t" x $group_level );
22157 $leading_whitespace_count - $group_level * $rOpts_indent_columns;
22159 # shouldn't happen:
22160 if ( $space_count < 0 ) {
22161 VALIGN_DEBUG_FLAG_TABS
22163 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
22166 # -- skip entabbing
22167 $leading_string = ( ' ' x $leading_whitespace_count );
22170 $leading_string .= ( ' ' x $space_count );
22173 $leading_string_cache[$leading_whitespace_count] = $leading_string;
22174 return $leading_string;
22176 } # end get_leading_string
22178 sub report_anything_unusual {
22180 if ( $outdented_line_count > 0 ) {
22181 write_logfile_entry(
22182 "$outdented_line_count long lines were outdented:\n");
22183 write_logfile_entry(
22184 " First at output line $first_outdented_line_at\n");
22186 if ( $outdented_line_count > 1 ) {
22187 write_logfile_entry(
22188 " Last at output line $last_outdented_line_at\n");
22190 write_logfile_entry(
22191 " use -noll to prevent outdenting, -l=n to increase line length\n"
22193 write_logfile_entry("\n");
22197 #####################################################################
22199 # the Perl::Tidy::FileWriter class writes the output file
22201 #####################################################################
22203 package Perl::Tidy::FileWriter;
22205 # Maximum number of little messages; probably need not be changed.
22206 use constant MAX_NAG_MESSAGES => 6;
22208 sub write_logfile_entry {
22210 my $logger_object = $self->{_logger_object};
22211 if ($logger_object) {
22212 $logger_object->write_logfile_entry(@_);
22218 my ( $line_sink_object, $rOpts, $logger_object ) = @_;
22221 _line_sink_object => $line_sink_object,
22222 _logger_object => $logger_object,
22224 _output_line_number => 1,
22225 _consecutive_blank_lines => 0,
22226 _consecutive_nonblank_lines => 0,
22227 _first_line_length_error => 0,
22228 _max_line_length_error => 0,
22229 _last_line_length_error => 0,
22230 _first_line_length_error_at => 0,
22231 _max_line_length_error_at => 0,
22232 _last_line_length_error_at => 0,
22233 _line_length_error_count => 0,
22234 _max_output_line_length => 0,
22235 _max_output_line_length_at => 0,
22241 $self->{_line_sink_object}->tee_on();
22246 $self->{_line_sink_object}->tee_off();
22249 sub get_output_line_number {
22251 return $self->{_output_line_number};
22254 sub decrement_output_line_number {
22256 $self->{_output_line_number}--;
22259 sub get_consecutive_nonblank_lines {
22261 return $self->{_consecutive_nonblank_lines};
22264 sub reset_consecutive_blank_lines {
22266 $self->{_consecutive_blank_lines} = 0;
22269 sub want_blank_line {
22271 unless ( $self->{_consecutive_blank_lines} ) {
22272 $self->write_blank_code_line();
22276 sub require_blank_code_lines {
22278 # write out the requested number of blanks regardless of the value of -mbl
22279 # unless -mbl=0. This allows extra blank lines to be written for subs and
22280 # packages even with the default -mbl=1
22283 my $need = $count - $self->{_consecutive_blank_lines};
22284 my $rOpts = $self->{_rOpts};
22285 my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
22286 for ( my $i = 0 ; $i < $need ; $i++ ) {
22287 $self->write_blank_code_line($forced);
22291 sub write_blank_code_line {
22293 my $forced = shift;
22294 my $rOpts = $self->{_rOpts};
22297 && $self->{_consecutive_blank_lines} >=
22298 $rOpts->{'maximum-consecutive-blank-lines'} );
22299 $self->{_consecutive_blank_lines}++;
22300 $self->{_consecutive_nonblank_lines} = 0;
22301 $self->write_line("\n");
22304 sub write_code_line {
22308 if ( $a =~ /^\s*$/ ) {
22309 my $rOpts = $self->{_rOpts};
22311 if ( $self->{_consecutive_blank_lines} >=
22312 $rOpts->{'maximum-consecutive-blank-lines'} );
22313 $self->{_consecutive_blank_lines}++;
22314 $self->{_consecutive_nonblank_lines} = 0;
22317 $self->{_consecutive_blank_lines} = 0;
22318 $self->{_consecutive_nonblank_lines}++;
22320 $self->write_line($a);
22327 # TODO: go through and see if the test is necessary here
22328 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
22330 $self->{_line_sink_object}->write_line($a);
22332 # This calculation of excess line length ignores any internal tabs
22333 my $rOpts = $self->{_rOpts};
22334 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
22335 if ( $a =~ /^\t+/g ) {
22336 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
22339 # Note that we just incremented output line number to future value
22340 # so we must subtract 1 for current line number
22341 if ( length($a) > 1 + $self->{_max_output_line_length} ) {
22342 $self->{_max_output_line_length} = length($a) - 1;
22343 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
22346 if ( $exceed > 0 ) {
22347 my $output_line_number = $self->{_output_line_number};
22348 $self->{_last_line_length_error} = $exceed;
22349 $self->{_last_line_length_error_at} = $output_line_number - 1;
22350 if ( $self->{_line_length_error_count} == 0 ) {
22351 $self->{_first_line_length_error} = $exceed;
22352 $self->{_first_line_length_error_at} = $output_line_number - 1;
22356 $self->{_last_line_length_error} > $self->{_max_line_length_error} )
22358 $self->{_max_line_length_error} = $exceed;
22359 $self->{_max_line_length_error_at} = $output_line_number - 1;
22362 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
22363 $self->write_logfile_entry(
22364 "Line length exceeded by $exceed characters\n");
22366 $self->{_line_length_error_count}++;
22371 sub report_line_length_errors {
22373 my $rOpts = $self->{_rOpts};
22374 my $line_length_error_count = $self->{_line_length_error_count};
22375 if ( $line_length_error_count == 0 ) {
22376 $self->write_logfile_entry(
22377 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
22378 my $max_output_line_length = $self->{_max_output_line_length};
22379 my $max_output_line_length_at = $self->{_max_output_line_length_at};
22380 $self->write_logfile_entry(
22381 " Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
22387 my $word = ( $line_length_error_count > 1 ) ? "s" : "";
22388 $self->write_logfile_entry(
22389 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
22392 $word = ( $line_length_error_count > 1 ) ? "First" : "";
22393 my $first_line_length_error = $self->{_first_line_length_error};
22394 my $first_line_length_error_at = $self->{_first_line_length_error_at};
22395 $self->write_logfile_entry(
22396 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
22399 if ( $line_length_error_count > 1 ) {
22400 my $max_line_length_error = $self->{_max_line_length_error};
22401 my $max_line_length_error_at = $self->{_max_line_length_error_at};
22402 my $last_line_length_error = $self->{_last_line_length_error};
22403 my $last_line_length_error_at = $self->{_last_line_length_error_at};
22404 $self->write_logfile_entry(
22405 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
22407 $self->write_logfile_entry(
22408 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
22414 #####################################################################
22416 # The Perl::Tidy::Debugger class shows line tokenization
22418 #####################################################################
22420 package Perl::Tidy::Debugger;
22424 my ( $class, $filename ) = @_;
22427 _debug_file => $filename,
22428 _debug_file_opened => 0,
22433 sub really_open_debug_file {
22436 my $debug_file = $self->{_debug_file};
22438 unless ( $fh = IO::File->new("> $debug_file") ) {
22439 Perl::Tidy::Warn("can't open $debug_file: $!\n");
22441 $self->{_debug_file_opened} = 1;
22442 $self->{_fh} = $fh;
22444 "Use -dump-token-types (-dtt) to get a list of token type codes\n";
22447 sub close_debug_file {
22450 my $fh = $self->{_fh};
22451 if ( $self->{_debug_file_opened} ) {
22453 eval { $self->{_fh}->close() };
22457 sub write_debug_entry {
22459 # This is a debug dump routine which may be modified as necessary
22460 # to dump tokens on a line-by-line basis. The output will be written
22461 # to the .DEBUG file when the -D flag is entered.
22463 my $line_of_tokens = shift;
22465 my $input_line = $line_of_tokens->{_line_text};
22466 my $rtoken_type = $line_of_tokens->{_rtoken_type};
22467 my $rtokens = $line_of_tokens->{_rtokens};
22468 my $rlevels = $line_of_tokens->{_rlevels};
22469 my $rslevels = $line_of_tokens->{_rslevels};
22470 my $rblock_type = $line_of_tokens->{_rblock_type};
22471 my $input_line_number = $line_of_tokens->{_line_number};
22472 my $line_type = $line_of_tokens->{_line_type};
22476 my $token_str = "$input_line_number: ";
22477 my $reconstructed_original = "$input_line_number: ";
22478 my $block_str = "$input_line_number: ";
22480 #$token_str .= "$line_type: ";
22481 #$reconstructed_original .= "$line_type: ";
22484 my @next_char = ( '"', '"' );
22486 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
22487 my $fh = $self->{_fh};
22489 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
22492 if ( $$rtoken_type[$j] eq 'k' ) {
22493 $pattern .= $$rtokens[$j];
22496 $pattern .= $$rtoken_type[$j];
22498 $reconstructed_original .= $$rtokens[$j];
22499 $block_str .= "($$rblock_type[$j])";
22500 $num = length( $$rtokens[$j] );
22501 my $type_str = $$rtoken_type[$j];
22503 # be sure there are no blank tokens (shouldn't happen)
22504 # This can only happen if a programming error has been made
22505 # because all valid tokens are non-blank
22506 if ( $type_str eq ' ' ) {
22507 print $fh "BLANK TOKEN on the next line\n";
22508 $type_str = $next_char[$i_next];
22509 $i_next = 1 - $i_next;
22512 if ( length($type_str) == 1 ) {
22513 $type_str = $type_str x $num;
22515 $token_str .= $type_str;
22518 # Write what you want here ...
22519 # print $fh "$input_line\n";
22520 # print $fh "$pattern\n";
22521 print $fh "$reconstructed_original\n";
22522 print $fh "$token_str\n";
22524 #print $fh "$block_str\n";
22527 #####################################################################
22529 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
22530 # method for returning the next line to be parsed, as well as a
22531 # 'peek_ahead()' method
22533 # The input parameter is an object with a 'get_line()' method
22534 # which returns the next line to be parsed
22536 #####################################################################
22538 package Perl::Tidy::LineBuffer;
22543 my $line_source_object = shift;
22546 _line_source_object => $line_source_object,
22547 _rlookahead_buffer => [],
22553 my $buffer_index = shift;
22555 my $line_source_object = $self->{_line_source_object};
22556 my $rlookahead_buffer = $self->{_rlookahead_buffer};
22557 if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
22558 $line = $$rlookahead_buffer[$buffer_index];
22561 $line = $line_source_object->get_line();
22562 push( @$rlookahead_buffer, $line );
22570 my $line_source_object = $self->{_line_source_object};
22571 my $rlookahead_buffer = $self->{_rlookahead_buffer};
22573 if ( scalar(@$rlookahead_buffer) ) {
22574 $line = shift @$rlookahead_buffer;
22577 $line = $line_source_object->get_line();
22582 ########################################################################
22584 # the Perl::Tidy::Tokenizer package is essentially a filter which
22585 # reads lines of perl source code from a source object and provides
22586 # corresponding tokenized lines through its get_line() method. Lines
22587 # flow from the source_object to the caller like this:
22589 # source_object --> LineBuffer_object --> Tokenizer --> calling routine
22590 # get_line() get_line() get_line() line_of_tokens
22592 # The source object can be any object with a get_line() method which
22593 # supplies one line (a character string) perl call.
22594 # The LineBuffer object is created by the Tokenizer.
22595 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
22596 # containing one tokenized line for each call to its get_line() method.
22598 # WARNING: This is not a real class yet. Only one tokenizer my be used.
22600 ########################################################################
22602 package Perl::Tidy::Tokenizer;
22606 # Caution: these debug flags produce a lot of output
22607 # They should all be 0 except when debugging small scripts
22609 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
22610 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
22611 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
22612 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
22613 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
22615 my $debug_warning = sub {
22616 print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n";
22619 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
22620 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
22621 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
22622 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
22623 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
22629 # PACKAGE VARIABLES for processing an entire FILE.
22633 $last_nonblank_token
22634 $last_nonblank_type
22635 $last_nonblank_block_type
22643 %user_function_prototype
22645 %is_block_list_function
22646 %saw_function_definition
22650 $square_bracket_depth
22655 @nesting_sequence_number
22656 @current_sequence_number
22658 @paren_semicolon_count
22659 @paren_structural_type
22661 @brace_structural_type
22664 @square_bracket_type
22665 @square_bracket_structural_type
22667 @nested_ternary_flag
22668 @nested_statement_type
22669 @starting_line_of_current_depth
22672 # GLOBAL CONSTANTS for routines in this package
22674 %is_indirect_object_taker
22676 %expecting_operator_token
22677 %expecting_operator_types
22678 %expecting_term_types
22679 %expecting_term_token
22681 %is_file_test_operator
22683 %is_valid_token_type
22685 %is_code_block_token
22687 @opening_brace_names
22688 @closing_brace_names
22689 %is_keyword_taking_list
22690 %is_q_qq_qw_qx_qr_s_y_tr_m
22693 # possible values of operator_expected()
22694 use constant TERM => -1;
22695 use constant UNKNOWN => 0;
22696 use constant OPERATOR => 1;
22698 # possible values of context
22699 use constant SCALAR_CONTEXT => -1;
22700 use constant UNKNOWN_CONTEXT => 0;
22701 use constant LIST_CONTEXT => 1;
22703 # Maximum number of little messages; probably need not be changed.
22704 use constant MAX_NAG_MESSAGES => 6;
22708 # methods to count instances
22710 sub get_count { $_count; }
22711 sub _increment_count { ++$_count }
22712 sub _decrement_count { --$_count }
22716 $_[0]->_decrement_count();
22723 # Note: 'tabs' and 'indent_columns' are temporary and should be
22726 source_object => undef,
22727 debugger_object => undef,
22728 diagnostics_object => undef,
22729 logger_object => undef,
22730 starting_level => undef,
22731 indent_columns => 4,
22733 look_for_hash_bang => 0,
22735 look_for_autoloader => 1,
22736 look_for_selfloader => 1,
22737 starting_line_number => 1,
22739 my %args = ( %defaults, @_ );
22741 # we are given an object with a get_line() method to supply source lines
22742 my $source_object = $args{source_object};
22744 # we create another object with a get_line() and peek_ahead() method
22745 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
22747 # Tokenizer state data is as follows:
22748 # _rhere_target_list reference to list of here-doc targets
22749 # _here_doc_target the target string for a here document
22750 # _here_quote_character the type of here-doc quoting (" ' ` or none)
22751 # to determine if interpolation is done
22752 # _quote_target character we seek if chasing a quote
22753 # _line_start_quote line where we started looking for a long quote
22754 # _in_here_doc flag indicating if we are in a here-doc
22755 # _in_pod flag set if we are in pod documentation
22756 # _in_error flag set if we saw severe error (binary in script)
22757 # _in_data flag set if we are in __DATA__ section
22758 # _in_end flag set if we are in __END__ section
22759 # _in_format flag set if we are in a format description
22760 # _in_attribute_list flag telling if we are looking for attributes
22761 # _in_quote flag telling if we are chasing a quote
22762 # _starting_level indentation level of first line
22763 # _line_buffer_object object with get_line() method to supply source code
22764 # _diagnostics_object place to write debugging information
22765 # _unexpected_error_count error count used to limit output
22766 # _lower_case_labels_at line numbers where lower case labels seen
22767 $tokenizer_self = {
22768 _rhere_target_list => [],
22770 _here_doc_target => "",
22771 _here_quote_character => "",
22777 _in_attribute_list => 0,
22779 _quote_target => "",
22780 _line_start_quote => -1,
22781 _starting_level => $args{starting_level},
22782 _know_starting_level => defined( $args{starting_level} ),
22783 _tabsize => $args{tabsize},
22784 _indent_columns => $args{indent_columns},
22785 _look_for_hash_bang => $args{look_for_hash_bang},
22786 _trim_qw => $args{trim_qw},
22787 _continuation_indentation => $args{continuation_indentation},
22788 _outdent_labels => $args{outdent_labels},
22789 _last_line_number => $args{starting_line_number} - 1,
22790 _saw_perl_dash_P => 0,
22791 _saw_perl_dash_w => 0,
22792 _saw_use_strict => 0,
22793 _saw_v_string => 0,
22794 _look_for_autoloader => $args{look_for_autoloader},
22795 _look_for_selfloader => $args{look_for_selfloader},
22796 _saw_autoloader => 0,
22797 _saw_selfloader => 0,
22798 _saw_hash_bang => 0,
22801 _saw_negative_indentation => 0,
22802 _started_tokenizing => 0,
22803 _line_buffer_object => $line_buffer_object,
22804 _debugger_object => $args{debugger_object},
22805 _diagnostics_object => $args{diagnostics_object},
22806 _logger_object => $args{logger_object},
22807 _unexpected_error_count => 0,
22808 _started_looking_for_here_target_at => 0,
22809 _nearly_matched_here_target_at => undef,
22811 _rlower_case_labels_at => undef,
22814 prepare_for_a_new_file();
22815 find_starting_indentation_level();
22817 bless $tokenizer_self, $class;
22819 # This is not a full class yet, so die if an attempt is made to
22820 # create more than one object.
22822 if ( _increment_count() > 1 ) {
22824 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
22827 return $tokenizer_self;
22831 # interface to Perl::Tidy::Logger routines
22833 my $logger_object = $tokenizer_self->{_logger_object};
22834 if ($logger_object) {
22835 $logger_object->warning(@_);
22840 my $logger_object = $tokenizer_self->{_logger_object};
22841 if ($logger_object) {
22842 $logger_object->complain(@_);
22846 sub write_logfile_entry {
22847 my $logger_object = $tokenizer_self->{_logger_object};
22848 if ($logger_object) {
22849 $logger_object->write_logfile_entry(@_);
22853 sub interrupt_logfile {
22854 my $logger_object = $tokenizer_self->{_logger_object};
22855 if ($logger_object) {
22856 $logger_object->interrupt_logfile();
22860 sub resume_logfile {
22861 my $logger_object = $tokenizer_self->{_logger_object};
22862 if ($logger_object) {
22863 $logger_object->resume_logfile();
22867 sub increment_brace_error {
22868 my $logger_object = $tokenizer_self->{_logger_object};
22869 if ($logger_object) {
22870 $logger_object->increment_brace_error();
22874 sub report_definite_bug {
22875 my $logger_object = $tokenizer_self->{_logger_object};
22876 if ($logger_object) {
22877 $logger_object->report_definite_bug();
22881 sub brace_warning {
22882 my $logger_object = $tokenizer_self->{_logger_object};
22883 if ($logger_object) {
22884 $logger_object->brace_warning(@_);
22888 sub get_saw_brace_error {
22889 my $logger_object = $tokenizer_self->{_logger_object};
22890 if ($logger_object) {
22891 $logger_object->get_saw_brace_error();
22898 # interface to Perl::Tidy::Diagnostics routines
22899 sub write_diagnostics {
22900 if ( $tokenizer_self->{_diagnostics_object} ) {
22901 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
22905 sub report_tokenization_errors {
22909 my $level = get_indentation_level();
22910 if ( $level != $tokenizer_self->{_starting_level} ) {
22911 warning("final indentation level: $level\n");
22914 check_final_nesting_depths();
22916 if ( $tokenizer_self->{_look_for_hash_bang}
22917 && !$tokenizer_self->{_saw_hash_bang} )
22920 "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
22923 if ( $tokenizer_self->{_in_format} ) {
22924 warning("hit EOF while in format description\n");
22927 if ( $tokenizer_self->{_in_pod} ) {
22929 # Just write log entry if this is after __END__ or __DATA__
22930 # because this happens to often, and it is not likely to be
22932 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
22933 write_logfile_entry(
22934 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
22940 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
22946 if ( $tokenizer_self->{_in_here_doc} ) {
22947 my $here_doc_target = $tokenizer_self->{_here_doc_target};
22948 my $started_looking_for_here_target_at =
22949 $tokenizer_self->{_started_looking_for_here_target_at};
22950 if ($here_doc_target) {
22952 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
22957 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
22960 my $nearly_matched_here_target_at =
22961 $tokenizer_self->{_nearly_matched_here_target_at};
22962 if ($nearly_matched_here_target_at) {
22964 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
22969 if ( $tokenizer_self->{_in_quote} ) {
22970 my $line_start_quote = $tokenizer_self->{_line_start_quote};
22971 my $quote_target = $tokenizer_self->{_quote_target};
22973 ( $tokenizer_self->{_in_attribute_list} )
22977 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
22981 unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
22982 if ( $] < 5.006 ) {
22983 write_logfile_entry("Suggest including '-w parameter'\n");
22986 write_logfile_entry("Suggest including 'use warnings;'\n");
22990 if ( $tokenizer_self->{_saw_perl_dash_P} ) {
22991 write_logfile_entry("Use of -P parameter for defines is discouraged\n");
22994 unless ( $tokenizer_self->{_saw_use_strict} ) {
22995 write_logfile_entry("Suggest including 'use strict;'\n");
22998 # it is suggested that labels have at least one upper case character
22999 # for legibility and to avoid code breakage as new keywords are introduced
23000 if ( $tokenizer_self->{_rlower_case_labels_at} ) {
23001 my @lower_case_labels_at =
23002 @{ $tokenizer_self->{_rlower_case_labels_at} };
23003 write_logfile_entry(
23004 "Suggest using upper case characters in label(s)\n");
23006 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
23010 sub report_v_string {
23012 # warn if this version can't handle v-strings
23014 unless ( $tokenizer_self->{_saw_v_string} ) {
23015 $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
23017 if ( $] < 5.006 ) {
23019 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
23024 sub get_input_line_number {
23025 return $tokenizer_self->{_last_line_number};
23028 # returns the next tokenized line
23033 # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
23034 # $square_bracket_depth, $paren_depth
23036 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
23037 $tokenizer_self->{_line_text} = $input_line;
23039 return undef unless ($input_line);
23041 my $input_line_number = ++$tokenizer_self->{_last_line_number};
23043 # Find and remove what characters terminate this line, including any
23045 my $input_line_separator = "";
23046 if ( chomp($input_line) ) { $input_line_separator = $/ }
23048 # TODO: what other characters should be included here?
23049 if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
23050 $input_line_separator = $2 . $input_line_separator;
23053 # for backwards compatibility we keep the line text terminated with
23054 # a newline character
23055 $input_line .= "\n";
23056 $tokenizer_self->{_line_text} = $input_line; # update
23058 # create a data structure describing this line which will be
23059 # returned to the caller.
23061 # _line_type codes are:
23062 # SYSTEM - system-specific code before hash-bang line
23063 # CODE - line of perl code (including comments)
23064 # POD_START - line starting pod, such as '=head'
23065 # POD - pod documentation text
23066 # POD_END - last line of pod section, '=cut'
23067 # HERE - text of here-document
23068 # HERE_END - last line of here-doc (target word)
23069 # FORMAT - format section
23070 # FORMAT_END - last line of format section, '.'
23071 # DATA_START - __DATA__ line
23072 # DATA - unidentified text following __DATA__
23073 # END_START - __END__ line
23074 # END - unidentified text following __END__
23075 # ERROR - we are in big trouble, probably not a perl script
23078 # _curly_brace_depth - depth of curly braces at start of line
23079 # _square_bracket_depth - depth of square brackets at start of line
23080 # _paren_depth - depth of parens at start of line
23081 # _starting_in_quote - this line continues a multi-line quote
23082 # (so don't trim leading blanks!)
23083 # _ending_in_quote - this line ends in a multi-line quote
23084 # (so don't trim trailing blanks!)
23085 my $line_of_tokens = {
23086 _line_type => 'EOF',
23087 _line_text => $input_line,
23088 _line_number => $input_line_number,
23089 _rtoken_type => undef,
23092 _rslevels => undef,
23093 _rblock_type => undef,
23094 _rcontainer_type => undef,
23095 _rcontainer_environment => undef,
23096 _rtype_sequence => undef,
23097 _rnesting_tokens => undef,
23098 _rci_levels => undef,
23099 _rnesting_blocks => undef,
23100 _guessed_indentation_level => 0,
23101 _starting_in_quote => 0, # to be set by subroutine
23102 _ending_in_quote => 0,
23103 _curly_brace_depth => $brace_depth,
23104 _square_bracket_depth => $square_bracket_depth,
23105 _paren_depth => $paren_depth,
23106 _quote_character => '',
23109 # must print line unchanged if we are in a here document
23110 if ( $tokenizer_self->{_in_here_doc} ) {
23112 $line_of_tokens->{_line_type} = 'HERE';
23113 my $here_doc_target = $tokenizer_self->{_here_doc_target};
23114 my $here_quote_character = $tokenizer_self->{_here_quote_character};
23115 my $candidate_target = $input_line;
23116 chomp $candidate_target;
23117 if ( $candidate_target eq $here_doc_target ) {
23118 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
23119 $line_of_tokens->{_line_type} = 'HERE_END';
23120 write_logfile_entry("Exiting HERE document $here_doc_target\n");
23122 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
23123 if (@$rhere_target_list) { # there can be multiple here targets
23124 ( $here_doc_target, $here_quote_character ) =
23125 @{ shift @$rhere_target_list };
23126 $tokenizer_self->{_here_doc_target} = $here_doc_target;
23127 $tokenizer_self->{_here_quote_character} =
23128 $here_quote_character;
23129 write_logfile_entry(
23130 "Entering HERE document $here_doc_target\n");
23131 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
23132 $tokenizer_self->{_started_looking_for_here_target_at} =
23133 $input_line_number;
23136 $tokenizer_self->{_in_here_doc} = 0;
23137 $tokenizer_self->{_here_doc_target} = "";
23138 $tokenizer_self->{_here_quote_character} = "";
23142 # check for error of extra whitespace
23143 # note for PERL6: leading whitespace is allowed
23145 $candidate_target =~ s/\s*$//;
23146 $candidate_target =~ s/^\s*//;
23147 if ( $candidate_target eq $here_doc_target ) {
23148 $tokenizer_self->{_nearly_matched_here_target_at} =
23149 $input_line_number;
23152 return $line_of_tokens;
23155 # must print line unchanged if we are in a format section
23156 elsif ( $tokenizer_self->{_in_format} ) {
23158 if ( $input_line =~ /^\.[\s#]*$/ ) {
23159 write_logfile_entry("Exiting format section\n");
23160 $tokenizer_self->{_in_format} = 0;
23161 $line_of_tokens->{_line_type} = 'FORMAT_END';
23164 $line_of_tokens->{_line_type} = 'FORMAT';
23166 return $line_of_tokens;
23169 # must print line unchanged if we are in pod documentation
23170 elsif ( $tokenizer_self->{_in_pod} ) {
23172 $line_of_tokens->{_line_type} = 'POD';
23173 if ( $input_line =~ /^=cut/ ) {
23174 $line_of_tokens->{_line_type} = 'POD_END';
23175 write_logfile_entry("Exiting POD section\n");
23176 $tokenizer_self->{_in_pod} = 0;
23178 if ( $input_line =~ /^\#\!.*perl\b/ ) {
23180 "Hash-bang in pod can cause older versions of perl to fail! \n"
23184 return $line_of_tokens;
23187 # must print line unchanged if we have seen a severe error (i.e., we
23188 # are seeing illegal tokens and cannot continue. Syntax errors do
23189 # not pass this route). Calling routine can decide what to do, but
23190 # the default can be to just pass all lines as if they were after __END__
23191 elsif ( $tokenizer_self->{_in_error} ) {
23192 $line_of_tokens->{_line_type} = 'ERROR';
23193 return $line_of_tokens;
23196 # print line unchanged if we are __DATA__ section
23197 elsif ( $tokenizer_self->{_in_data} ) {
23199 # ...but look for POD
23200 # Note that the _in_data and _in_end flags remain set
23201 # so that we return to that state after seeing the
23202 # end of a pod section
23203 if ( $input_line =~ /^=(?!cut)/ ) {
23204 $line_of_tokens->{_line_type} = 'POD_START';
23205 write_logfile_entry("Entering POD section\n");
23206 $tokenizer_self->{_in_pod} = 1;
23207 return $line_of_tokens;
23210 $line_of_tokens->{_line_type} = 'DATA';
23211 return $line_of_tokens;
23215 # print line unchanged if we are in __END__ section
23216 elsif ( $tokenizer_self->{_in_end} ) {
23218 # ...but look for POD
23219 # Note that the _in_data and _in_end flags remain set
23220 # so that we return to that state after seeing the
23221 # end of a pod section
23222 if ( $input_line =~ /^=(?!cut)/ ) {
23223 $line_of_tokens->{_line_type} = 'POD_START';
23224 write_logfile_entry("Entering POD section\n");
23225 $tokenizer_self->{_in_pod} = 1;
23226 return $line_of_tokens;
23229 $line_of_tokens->{_line_type} = 'END';
23230 return $line_of_tokens;
23234 # check for a hash-bang line if we haven't seen one
23235 if ( !$tokenizer_self->{_saw_hash_bang} ) {
23236 if ( $input_line =~ /^\#\!.*perl\b/ ) {
23237 $tokenizer_self->{_saw_hash_bang} = $input_line_number;
23239 # check for -w and -P flags
23240 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
23241 $tokenizer_self->{_saw_perl_dash_P} = 1;
23244 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
23245 $tokenizer_self->{_saw_perl_dash_w} = 1;
23248 if ( ( $input_line_number > 1 )
23249 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
23252 # this is helpful for VMS systems; we may have accidentally
23253 # tokenized some DCL commands
23254 if ( $tokenizer_self->{_started_tokenizing} ) {
23256 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
23260 complain("Useless hash-bang after line 1\n");
23264 # Report the leading hash-bang as a system line
23265 # This will prevent -dac from deleting it
23267 $line_of_tokens->{_line_type} = 'SYSTEM';
23268 return $line_of_tokens;
23273 # wait for a hash-bang before parsing if the user invoked us with -x
23274 if ( $tokenizer_self->{_look_for_hash_bang}
23275 && !$tokenizer_self->{_saw_hash_bang} )
23277 $line_of_tokens->{_line_type} = 'SYSTEM';
23278 return $line_of_tokens;
23281 # a first line of the form ': #' will be marked as SYSTEM
23282 # since lines of this form may be used by tcsh
23283 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
23284 $line_of_tokens->{_line_type} = 'SYSTEM';
23285 return $line_of_tokens;
23288 # now we know that it is ok to tokenize the line...
23289 # the line tokenizer will modify any of these private variables:
23290 # _rhere_target_list
23297 my $ending_in_quote_last = $tokenizer_self->{_in_quote};
23298 tokenize_this_line($line_of_tokens);
23300 # Now finish defining the return structure and return it
23301 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
23303 # handle severe error (binary data in script)
23304 if ( $tokenizer_self->{_in_error} ) {
23305 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages
23306 warning("Giving up after error\n");
23307 $line_of_tokens->{_line_type} = 'ERROR';
23308 reset_indentation_level(0); # avoid error messages
23309 return $line_of_tokens;
23312 # handle start of pod documentation
23313 if ( $tokenizer_self->{_in_pod} ) {
23315 # This gets tricky..above a __DATA__ or __END__ section, perl
23316 # accepts '=cut' as the start of pod section. But afterwards,
23317 # only pod utilities see it and they may ignore an =cut without
23318 # leading =head. In any case, this isn't good.
23319 if ( $input_line =~ /^=cut\b/ ) {
23320 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
23321 complain("=cut while not in pod ignored\n");
23322 $tokenizer_self->{_in_pod} = 0;
23323 $line_of_tokens->{_line_type} = 'POD_END';
23326 $line_of_tokens->{_line_type} = 'POD_START';
23328 "=cut starts a pod section .. this can fool pod utilities.\n"
23330 write_logfile_entry("Entering POD section\n");
23335 $line_of_tokens->{_line_type} = 'POD_START';
23336 write_logfile_entry("Entering POD section\n");
23339 return $line_of_tokens;
23342 # update indentation levels for log messages
23343 if ( $input_line !~ /^\s*$/ ) {
23344 my $rlevels = $line_of_tokens->{_rlevels};
23345 $line_of_tokens->{_guessed_indentation_level} =
23346 guess_old_indentation_level($input_line);
23349 # see if this line contains here doc targets
23350 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
23351 if (@$rhere_target_list) {
23353 my ( $here_doc_target, $here_quote_character ) =
23354 @{ shift @$rhere_target_list };
23355 $tokenizer_self->{_in_here_doc} = 1;
23356 $tokenizer_self->{_here_doc_target} = $here_doc_target;
23357 $tokenizer_self->{_here_quote_character} = $here_quote_character;
23358 write_logfile_entry("Entering HERE document $here_doc_target\n");
23359 $tokenizer_self->{_started_looking_for_here_target_at} =
23360 $input_line_number;
23363 # NOTE: __END__ and __DATA__ statements are written unformatted
23364 # because they can theoretically contain additional characters
23365 # which are not tokenized (and cannot be read with <DATA> either!).
23366 if ( $tokenizer_self->{_in_data} ) {
23367 $line_of_tokens->{_line_type} = 'DATA_START';
23368 write_logfile_entry("Starting __DATA__ section\n");
23369 $tokenizer_self->{_saw_data} = 1;
23371 # keep parsing after __DATA__ if use SelfLoader was seen
23372 if ( $tokenizer_self->{_saw_selfloader} ) {
23373 $tokenizer_self->{_in_data} = 0;
23374 write_logfile_entry(
23375 "SelfLoader seen, continuing; -nlsl deactivates\n");
23378 return $line_of_tokens;
23381 elsif ( $tokenizer_self->{_in_end} ) {
23382 $line_of_tokens->{_line_type} = 'END_START';
23383 write_logfile_entry("Starting __END__ section\n");
23384 $tokenizer_self->{_saw_end} = 1;
23386 # keep parsing after __END__ if use AutoLoader was seen
23387 if ( $tokenizer_self->{_saw_autoloader} ) {
23388 $tokenizer_self->{_in_end} = 0;
23389 write_logfile_entry(
23390 "AutoLoader seen, continuing; -nlal deactivates\n");
23392 return $line_of_tokens;
23395 # now, finally, we know that this line is type 'CODE'
23396 $line_of_tokens->{_line_type} = 'CODE';
23398 # remember if we have seen any real code
23399 if ( !$tokenizer_self->{_started_tokenizing}
23400 && $input_line !~ /^\s*$/
23401 && $input_line !~ /^\s*#/ )
23403 $tokenizer_self->{_started_tokenizing} = 1;
23406 if ( $tokenizer_self->{_debugger_object} ) {
23407 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
23410 # Note: if keyword 'format' occurs in this line code, it is still CODE
23411 # (keyword 'format' need not start a line)
23412 if ( $tokenizer_self->{_in_format} ) {
23413 write_logfile_entry("Entering format section\n");
23416 if ( $tokenizer_self->{_in_quote}
23417 and ( $tokenizer_self->{_line_start_quote} < 0 ) )
23420 #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
23422 ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
23424 $tokenizer_self->{_line_start_quote} = $input_line_number;
23425 write_logfile_entry(
23426 "Start multi-line quote or pattern ending in $quote_target\n");
23429 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
23430 and !$tokenizer_self->{_in_quote} )
23432 $tokenizer_self->{_line_start_quote} = -1;
23433 write_logfile_entry("End of multi-line quote or pattern\n");
23436 # we are returning a line of CODE
23437 return $line_of_tokens;
23440 sub find_starting_indentation_level {
23442 # We need to find the indentation level of the first line of the
23443 # script being formatted. Often it will be zero for an entire file,
23444 # but if we are formatting a local block of code (within an editor for
23445 # example) it may not be zero. The user may specify this with the
23446 # -sil=n parameter but normally doesn't so we have to guess.
23448 # USES GLOBAL VARIABLES: $tokenizer_self
23449 my $starting_level = 0;
23451 # use value if given as parameter
23452 if ( $tokenizer_self->{_know_starting_level} ) {
23453 $starting_level = $tokenizer_self->{_starting_level};
23456 # if we know there is a hash_bang line, the level must be zero
23457 elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
23458 $tokenizer_self->{_know_starting_level} = 1;
23461 # otherwise figure it out from the input file
23466 # keep looking at lines until we find a hash bang or piece of code
23469 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
23472 # if first line is #! then assume starting level is zero
23473 if ( $i == 1 && $line =~ /^\#\!/ ) {
23474 $starting_level = 0;
23477 next if ( $line =~ /^\s*#/ ); # skip past comments
23478 next if ( $line =~ /^\s*$/ ); # skip past blank lines
23479 $starting_level = guess_old_indentation_level($line);
23482 $msg = "Line $i implies starting-indentation-level = $starting_level\n";
23483 write_logfile_entry("$msg");
23485 $tokenizer_self->{_starting_level} = $starting_level;
23486 reset_indentation_level($starting_level);
23489 sub guess_old_indentation_level {
23492 # Guess the indentation level of an input line.
23494 # For the first line of code this result will define the starting
23495 # indentation level. It will mainly be non-zero when perltidy is applied
23496 # within an editor to a local block of code.
23498 # This is an impossible task in general because we can't know what tabs
23499 # meant for the old script and how many spaces were used for one
23500 # indentation level in the given input script. For example it may have
23501 # been previously formatted with -i=7 -et=3. But we can at least try to
23502 # make sure that perltidy guesses correctly if it is applied repeatedly to
23503 # a block of code within an editor, so that the block stays at the same
23504 # level when perltidy is applied repeatedly.
23506 # USES GLOBAL VARIABLES: $tokenizer_self
23509 # find leading tabs, spaces, and any statement label
23511 if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
23513 # If there are leading tabs, we use the tab scheme for this run, if
23514 # any, so that the code will remain stable when editing.
23515 if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} }
23517 if ($2) { $spaces += length($2) }
23519 # correct for outdented labels
23520 if ( $3 && $tokenizer_self->{'_outdent_labels'} ) {
23521 $spaces += $tokenizer_self->{_continuation_indentation};
23525 # compute indentation using the value of -i for this run.
23526 # If -i=0 is used for this run (which is possible) it doesn't matter
23527 # what we do here but we'll guess that the old run used 4 spaces per level.
23528 my $indent_columns = $tokenizer_self->{_indent_columns};
23529 $indent_columns = 4 if ( !$indent_columns );
23530 $level = int( $spaces / $indent_columns );
23534 # This is a currently unused debug routine
23535 sub dump_functions {
23539 foreach $pkg ( keys %is_user_function ) {
23540 print $fh "\nnon-constant subs in package $pkg\n";
23542 foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
23544 if ( $is_block_list_function{$pkg}{$sub} ) {
23545 $msg = 'block_list';
23548 if ( $is_block_function{$pkg}{$sub} ) {
23551 print $fh "$sub $msg\n";
23555 foreach $pkg ( keys %is_constant ) {
23556 print $fh "\nconstants and constant subs in package $pkg\n";
23558 foreach $sub ( keys %{ $is_constant{$pkg} } ) {
23559 print $fh "$sub\n";
23566 # count number of 1's in a string of 1's and 0's
23567 # example: ones_count("010101010101") gives 6
23568 return ( my $cis = $_[0] ) =~ tr/1/0/;
23571 sub prepare_for_a_new_file {
23573 # previous tokens needed to determine what to expect next
23574 $last_nonblank_token = ';'; # the only possible starting state which
23575 $last_nonblank_type = ';'; # will make a leading brace a code block
23576 $last_nonblank_block_type = '';
23578 # scalars for remembering statement types across multiple lines
23579 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
23580 $in_attribute_list = 0;
23582 # scalars for remembering where we are in the file
23583 $current_package = "main";
23584 $context = UNKNOWN_CONTEXT;
23586 # hashes used to remember function information
23587 %is_constant = (); # user-defined constants
23588 %is_user_function = (); # user-defined functions
23589 %user_function_prototype = (); # their prototypes
23590 %is_block_function = ();
23591 %is_block_list_function = ();
23592 %saw_function_definition = ();
23594 # variables used to track depths of various containers
23595 # and report nesting errors
23598 $square_bracket_depth = 0;
23599 @current_depth[ 0 .. $#closing_brace_names ] =
23600 (0) x scalar @closing_brace_names;
23603 @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
23604 ( 0 .. $#closing_brace_names );
23605 @current_sequence_number = ();
23606 $paren_type[$paren_depth] = '';
23607 $paren_semicolon_count[$paren_depth] = 0;
23608 $paren_structural_type[$brace_depth] = '';
23609 $brace_type[$brace_depth] = ';'; # identify opening brace as code block
23610 $brace_structural_type[$brace_depth] = '';
23611 $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
23612 $brace_package[$paren_depth] = $current_package;
23613 $square_bracket_type[$square_bracket_depth] = '';
23614 $square_bracket_structural_type[$square_bracket_depth] = '';
23616 initialize_tokenizer_state();
23619 { # begin tokenize_this_line
23621 use constant BRACE => 0;
23622 use constant SQUARE_BRACKET => 1;
23623 use constant PAREN => 2;
23624 use constant QUESTION_COLON => 3;
23626 # TV1: scalars for processing one LINE.
23627 # Re-initialized on each entry to sub tokenize_this_line.
23629 $block_type, $container_type, $expecting,
23630 $i, $i_tok, $input_line,
23631 $input_line_number, $last_nonblank_i, $max_token_index,
23632 $next_tok, $next_type, $peeked_ahead,
23633 $prototype, $rhere_target_list, $rtoken_map,
23634 $rtoken_type, $rtokens, $tok,
23635 $type, $type_sequence, $indent_flag,
23638 # TV2: refs to ARRAYS for processing one LINE
23639 # Re-initialized on each call.
23640 my $routput_token_list = []; # stack of output token indexes
23641 my $routput_token_type = []; # token types
23642 my $routput_block_type = []; # types of code block
23643 my $routput_container_type = []; # paren types, such as if, elsif, ..
23644 my $routput_type_sequence = []; # nesting sequential number
23645 my $routput_indent_flag = []; #
23647 # TV3: SCALARS for quote variables. These are initialized with a
23648 # subroutine call and continually updated as lines are processed.
23649 my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
23650 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
23652 # TV4: SCALARS for multi-line identifiers and
23653 # statements. These are initialized with a subroutine call
23654 # and continually updated as lines are processed.
23655 my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
23657 # TV5: SCALARS for tracking indentation level.
23658 # Initialized once and continually updated as lines are
23661 $nesting_token_string, $nesting_type_string,
23662 $nesting_block_string, $nesting_block_flag,
23663 $nesting_list_string, $nesting_list_flag,
23664 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
23665 $in_statement_continuation, $level_in_tokenizer,
23666 $slevel_in_tokenizer, $rslevel_stack,
23669 # TV6: SCALARS for remembering several previous
23670 # tokens. Initialized once and continually updated as
23671 # lines are processed.
23673 $last_nonblank_container_type, $last_nonblank_type_sequence,
23674 $last_last_nonblank_token, $last_last_nonblank_type,
23675 $last_last_nonblank_block_type, $last_last_nonblank_container_type,
23676 $last_last_nonblank_type_sequence, $last_nonblank_prototype,
23679 # ----------------------------------------------------------------
23680 # beginning of tokenizer variable access and manipulation routines
23681 # ----------------------------------------------------------------
23683 sub initialize_tokenizer_state {
23685 # TV1: initialized on each call
23686 # TV2: initialized on each call
23690 $quote_character = "";
23693 $quoted_string_1 = "";
23694 $quoted_string_2 = "";
23695 $allowed_quote_modifiers = "";
23698 $id_scan_state = '';
23701 $indented_if_level = 0;
23704 $nesting_token_string = "";
23705 $nesting_type_string = "";
23706 $nesting_block_string = '1'; # initially in a block
23707 $nesting_block_flag = 1;
23708 $nesting_list_string = '0'; # initially not in a list
23709 $nesting_list_flag = 0; # initially not in a list
23710 $ci_string_in_tokenizer = "";
23711 $continuation_string_in_tokenizer = "0";
23712 $in_statement_continuation = 0;
23713 $level_in_tokenizer = 0;
23714 $slevel_in_tokenizer = 0;
23715 $rslevel_stack = [];
23718 $last_nonblank_container_type = '';
23719 $last_nonblank_type_sequence = '';
23720 $last_last_nonblank_token = ';';
23721 $last_last_nonblank_type = ';';
23722 $last_last_nonblank_block_type = '';
23723 $last_last_nonblank_container_type = '';
23724 $last_last_nonblank_type_sequence = '';
23725 $last_nonblank_prototype = "";
23728 sub save_tokenizer_state {
23731 $block_type, $container_type, $expecting,
23732 $i, $i_tok, $input_line,
23733 $input_line_number, $last_nonblank_i, $max_token_index,
23734 $next_tok, $next_type, $peeked_ahead,
23735 $prototype, $rhere_target_list, $rtoken_map,
23736 $rtoken_type, $rtokens, $tok,
23737 $type, $type_sequence, $indent_flag,
23741 $routput_token_list, $routput_token_type,
23742 $routput_block_type, $routput_container_type,
23743 $routput_type_sequence, $routput_indent_flag,
23747 $in_quote, $quote_type,
23748 $quote_character, $quote_pos,
23749 $quote_depth, $quoted_string_1,
23750 $quoted_string_2, $allowed_quote_modifiers,
23754 [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
23757 $nesting_token_string, $nesting_type_string,
23758 $nesting_block_string, $nesting_block_flag,
23759 $nesting_list_string, $nesting_list_flag,
23760 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
23761 $in_statement_continuation, $level_in_tokenizer,
23762 $slevel_in_tokenizer, $rslevel_stack,
23766 $last_nonblank_container_type,
23767 $last_nonblank_type_sequence,
23768 $last_last_nonblank_token,
23769 $last_last_nonblank_type,
23770 $last_last_nonblank_block_type,
23771 $last_last_nonblank_container_type,
23772 $last_last_nonblank_type_sequence,
23773 $last_nonblank_prototype,
23775 return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
23778 sub restore_tokenizer_state {
23780 my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
23782 $block_type, $container_type, $expecting,
23783 $i, $i_tok, $input_line,
23784 $input_line_number, $last_nonblank_i, $max_token_index,
23785 $next_tok, $next_type, $peeked_ahead,
23786 $prototype, $rhere_target_list, $rtoken_map,
23787 $rtoken_type, $rtokens, $tok,
23788 $type, $type_sequence, $indent_flag,
23792 $routput_token_list, $routput_token_type,
23793 $routput_block_type, $routput_container_type,
23794 $routput_type_sequence, $routput_type_sequence,
23798 $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
23799 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
23802 ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
23806 $nesting_token_string, $nesting_type_string,
23807 $nesting_block_string, $nesting_block_flag,
23808 $nesting_list_string, $nesting_list_flag,
23809 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
23810 $in_statement_continuation, $level_in_tokenizer,
23811 $slevel_in_tokenizer, $rslevel_stack,
23815 $last_nonblank_container_type,
23816 $last_nonblank_type_sequence,
23817 $last_last_nonblank_token,
23818 $last_last_nonblank_type,
23819 $last_last_nonblank_block_type,
23820 $last_last_nonblank_container_type,
23821 $last_last_nonblank_type_sequence,
23822 $last_nonblank_prototype,
23826 sub get_indentation_level {
23828 # patch to avoid reporting error if indented if is not terminated
23829 if ($indented_if_level) { return $level_in_tokenizer - 1 }
23830 return $level_in_tokenizer;
23833 sub reset_indentation_level {
23834 $level_in_tokenizer = $_[0];
23835 $slevel_in_tokenizer = $_[0];
23836 push @{$rslevel_stack}, $slevel_in_tokenizer;
23840 $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
23843 # ------------------------------------------------------------
23844 # end of tokenizer variable access and manipulation routines
23845 # ------------------------------------------------------------
23847 # ------------------------------------------------------------
23848 # beginning of various scanner interface routines
23849 # ------------------------------------------------------------
23850 sub scan_replacement_text {
23852 # check for here-docs in replacement text invoked by
23853 # a substitution operator with executable modifier 'e'.
23856 # $replacement_text
23858 # $rht = reference to any here-doc targets
23859 my ($replacement_text) = @_;
23862 return undef unless ( $replacement_text =~ /<</ );
23864 write_logfile_entry("scanning replacement text for here-doc targets\n");
23866 # save the logger object for error messages
23867 my $logger_object = $tokenizer_self->{_logger_object};
23869 # localize all package variables
23871 $tokenizer_self, $last_nonblank_token,
23872 $last_nonblank_type, $last_nonblank_block_type,
23873 $statement_type, $in_attribute_list,
23874 $current_package, $context,
23875 %is_constant, %is_user_function,
23876 %user_function_prototype, %is_block_function,
23877 %is_block_list_function, %saw_function_definition,
23878 $brace_depth, $paren_depth,
23879 $square_bracket_depth, @current_depth,
23880 @total_depth, $total_depth,
23881 @nesting_sequence_number, @current_sequence_number,
23882 @paren_type, @paren_semicolon_count,
23883 @paren_structural_type, @brace_type,
23884 @brace_structural_type, @brace_context,
23885 @brace_package, @square_bracket_type,
23886 @square_bracket_structural_type, @depth_array,
23887 @starting_line_of_current_depth, @nested_ternary_flag,
23888 @nested_statement_type,
23891 # save all lexical variables
23892 my $rstate = save_tokenizer_state();
23893 _decrement_count(); # avoid error check for multiple tokenizers
23895 # make a new tokenizer
23897 my $rpending_logfile_message;
23898 my $source_object =
23899 Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
23900 $rpending_logfile_message );
23901 my $tokenizer = Perl::Tidy::Tokenizer->new(
23902 source_object => $source_object,
23903 logger_object => $logger_object,
23904 starting_line_number => $input_line_number,
23907 # scan the replacement text
23908 1 while ( $tokenizer->get_line() );
23910 # remove any here doc targets
23912 if ( $tokenizer_self->{_in_here_doc} ) {
23916 $tokenizer_self->{_here_doc_target},
23917 $tokenizer_self->{_here_quote_character}
23919 if ( $tokenizer_self->{_rhere_target_list} ) {
23920 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
23921 $tokenizer_self->{_rhere_target_list} = undef;
23923 $tokenizer_self->{_in_here_doc} = undef;
23926 # now its safe to report errors
23927 $tokenizer->report_tokenization_errors();
23929 # restore all tokenizer lexical variables
23930 restore_tokenizer_state($rstate);
23932 # return the here doc targets
23936 sub scan_bare_identifier {
23937 ( $i, $tok, $type, $prototype ) =
23938 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
23939 $rtoken_map, $max_token_index );
23942 sub scan_identifier {
23943 ( $i, $tok, $type, $id_scan_state, $identifier ) =
23944 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
23945 $max_token_index, $expecting );
23949 ( $i, $tok, $type, $id_scan_state ) =
23950 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
23951 $id_scan_state, $max_token_index );
23956 ( $i, $type, $number ) =
23957 scan_number_do( $input_line, $i, $rtoken_map, $type,
23958 $max_token_index );
23962 # a sub to warn if token found where term expected
23963 sub error_if_expecting_TERM {
23964 if ( $expecting == TERM ) {
23965 if ( $really_want_term{$last_nonblank_type} ) {
23966 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
23967 $rtoken_type, $input_line );
23973 # a sub to warn if token found where operator expected
23974 sub error_if_expecting_OPERATOR {
23975 if ( $expecting == OPERATOR ) {
23976 my $thing = defined $_[0] ? $_[0] : $tok;
23977 unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
23978 $rtoken_map, $rtoken_type, $input_line );
23979 if ( $i_tok == 0 ) {
23980 interrupt_logfile();
23981 warning("Missing ';' above?\n");
23988 # ------------------------------------------------------------
23989 # end scanner interfaces
23990 # ------------------------------------------------------------
23992 my %is_for_foreach;
23993 @_ = qw(for foreach);
23994 @is_for_foreach{@_} = (1) x scalar(@_);
23998 @is_my_our{@_} = (1) x scalar(@_);
24000 # These keywords may introduce blocks after parenthesized expressions,
24002 # keyword ( .... ) { BLOCK }
24003 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
24004 my %is_blocktype_with_paren;
24005 @_ = qw(if elsif unless while until for foreach switch case given when);
24006 @is_blocktype_with_paren{@_} = (1) x scalar(@_);
24008 # ------------------------------------------------------------
24009 # begin hash of code for handling most token types
24010 # ------------------------------------------------------------
24011 my $tokenization_code = {
24013 # no special code for these types yet, but syntax checks
24048 error_if_expecting_TERM()
24049 if ( $expecting == TERM );
24052 error_if_expecting_TERM()
24053 if ( $expecting == TERM );
24057 # start looking for a scalar
24058 error_if_expecting_OPERATOR("Scalar")
24059 if ( $expecting == OPERATOR );
24062 if ( $identifier eq '$^W' ) {
24063 $tokenizer_self->{_saw_perl_dash_w} = 1;
24066 # Check for identifier in indirect object slot
24067 # (vorboard.pl, sort.t). Something like:
24068 # /^(print|printf|sort|exec|system)$/
24070 $is_indirect_object_taker{$last_nonblank_token}
24072 || ( ( $last_nonblank_token eq '(' )
24073 && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
24074 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
24083 $paren_semicolon_count[$paren_depth] = 0;
24085 $container_type = $want_paren;
24089 $container_type = $last_nonblank_token;
24091 # We can check for a syntax error here of unexpected '(',
24092 # but this is going to get messy...
24094 $expecting == OPERATOR
24096 # be sure this is not a method call of the form
24097 # &method(...), $method->(..), &{method}(...),
24098 # $ref[2](list) is ok & short for $ref[2]->(list)
24099 # NOTE: at present, braces in something like &{ xxx }
24100 # are not marked as a block, we might have a method call
24101 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
24106 # ref: camel 3 p 703.
24107 if ( $last_last_nonblank_token eq 'do' ) {
24109 "do SUBROUTINE is deprecated; consider & or -> notation\n"
24114 # if this is an empty list, (), then it is not an
24115 # error; for example, we might have a constant pi and
24116 # invoke it with pi() or just pi;
24117 my ( $next_nonblank_token, $i_next ) =
24118 find_next_nonblank_token( $i, $rtokens,
24119 $max_token_index );
24120 if ( $next_nonblank_token ne ')' ) {
24122 error_if_expecting_OPERATOR('(');
24124 if ( $last_nonblank_type eq 'C' ) {
24126 "$last_nonblank_token has a void prototype\n";
24128 elsif ( $last_nonblank_type eq 'i' ) {
24130 && $last_nonblank_token =~ /^\$/ )
24133 "Do you mean '$last_nonblank_token->(' ?\n";
24137 interrupt_logfile();
24141 } ## end if ( $next_nonblank_token...
24142 } ## end else [ if ( $last_last_nonblank_token...
24143 } ## end if ( $expecting == OPERATOR...
24145 $paren_type[$paren_depth] = $container_type;
24146 ( $type_sequence, $indent_flag ) =
24147 increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
24149 # propagate types down through nested parens
24150 # for example: the second paren in 'if ((' would be structural
24151 # since the first is.
24153 if ( $last_nonblank_token eq '(' ) {
24154 $type = $last_nonblank_type;
24157 # We exclude parens as structural after a ',' because it
24158 # causes subtle problems with continuation indentation for
24159 # something like this, where the first 'or' will not get
24164 # ( not defined $check )
24166 # or $check eq "new"
24167 # or $check eq "old",
24170 # Likewise, we exclude parens where a statement can start
24171 # because of problems with continuation indentation, like
24174 # ($firstline =~ /^#\!.*perl/)
24175 # and (print $File::Find::name, "\n")
24178 # (ref($usage_fref) =~ /CODE/)
24180 # : (&blast_usage, &blast_params, &blast_general_params);
24186 if ( $last_nonblank_type eq ')' ) {
24188 "Syntax error? found token '$last_nonblank_type' then '('\n"
24191 $paren_structural_type[$paren_depth] = $type;
24195 ( $type_sequence, $indent_flag ) =
24196 decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
24198 if ( $paren_structural_type[$paren_depth] eq '{' ) {
24202 $container_type = $paren_type[$paren_depth];
24204 # /^(for|foreach)$/
24205 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
24206 my $num_sc = $paren_semicolon_count[$paren_depth];
24207 if ( $num_sc > 0 && $num_sc != 2 ) {
24208 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
24212 if ( $paren_depth > 0 ) { $paren_depth-- }
24215 if ( $last_nonblank_type eq ',' ) {
24216 complain("Repeated ','s \n");
24219 # patch for operator_expected: note if we are in the list (use.t)
24220 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
24221 ## FIXME: need to move this elsewhere, perhaps check after a '('
24222 ## elsif ($last_nonblank_token eq '(') {
24223 ## warning("Leading ','s illegal in some versions of perl\n");
24227 $context = UNKNOWN_CONTEXT;
24228 $statement_type = '';
24230 # /^(for|foreach)$/
24231 if ( $is_for_foreach{ $paren_type[$paren_depth] } )
24232 { # mark ; in for loop
24234 # Be careful: we do not want a semicolon such as the
24235 # following to be included:
24237 # for (sort {strcoll($a,$b);} keys %investments) {
24239 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
24240 && $square_bracket_depth ==
24241 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
24245 $paren_semicolon_count[$paren_depth]++;
24251 error_if_expecting_OPERATOR("String")
24252 if ( $expecting == OPERATOR );
24255 $allowed_quote_modifiers = "";
24258 error_if_expecting_OPERATOR("String")
24259 if ( $expecting == OPERATOR );
24262 $allowed_quote_modifiers = "";
24265 error_if_expecting_OPERATOR("String")
24266 if ( $expecting == OPERATOR );
24269 $allowed_quote_modifiers = "";
24274 if ( $expecting == UNKNOWN ) { # indeterminate, must guess..
24276 ( $is_pattern, $msg ) =
24277 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
24278 $max_token_index );
24281 write_diagnostics("DIVIDE:$msg\n");
24282 write_logfile_entry($msg);
24285 else { $is_pattern = ( $expecting == TERM ) }
24290 $allowed_quote_modifiers = '[msixpodualgc]';
24292 else { # not a pattern; check for a /= token
24294 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=
24300 #DEBUG - collecting info on what tokens follow a divide
24301 # for development of guessing algorithm
24302 #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
24303 # #write_diagnostics( "DIVIDE? $input_line\n" );
24309 # if we just saw a ')', we will label this block with
24310 # its type. We need to do this to allow sub
24311 # code_block_type to determine if this brace starts a
24312 # code block or anonymous hash. (The type of a paren
24313 # pair is the preceding token, such as 'if', 'else',
24315 $container_type = "";
24317 # ATTRS: for a '{' following an attribute list, reset
24318 # things to look like we just saw the sub name
24319 if ( $statement_type =~ /^sub/ ) {
24320 $last_nonblank_token = $statement_type;
24321 $last_nonblank_type = 'i';
24322 $statement_type = "";
24325 # patch for SWITCH/CASE: hide these keywords from an immediately
24326 # following opening brace
24327 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
24328 && $statement_type eq $last_nonblank_token )
24330 $last_nonblank_token = ";";
24333 elsif ( $last_nonblank_token eq ')' ) {
24334 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
24336 # defensive move in case of a nesting error (pbug.t)
24337 # in which this ')' had no previous '('
24338 # this nesting error will have been caught
24339 if ( !defined($last_nonblank_token) ) {
24340 $last_nonblank_token = 'if';
24343 # check for syntax error here;
24344 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
24345 my $list = join( ' ', sort keys %is_blocktype_with_paren );
24347 "syntax error at ') {', didn't see one of: $list\n");
24351 # patch for paren-less for/foreach glitch, part 2.
24352 # see note below under 'qw'
24353 elsif ($last_nonblank_token eq 'qw'
24354 && $is_for_foreach{$want_paren} )
24356 $last_nonblank_token = $want_paren;
24357 if ( $last_last_nonblank_token eq $want_paren ) {
24359 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
24366 # now identify which of the three possible types of
24367 # curly braces we have: hash index container, anonymous
24368 # hash reference, or code block.
24370 # non-structural (hash index) curly brace pair
24371 # get marked 'L' and 'R'
24372 if ( is_non_structural_brace() ) {
24375 # patch for SWITCH/CASE:
24376 # allow paren-less identifier after 'when'
24377 # if the brace is preceded by a space
24378 if ( $statement_type eq 'when'
24379 && $last_nonblank_type eq 'i'
24380 && $last_last_nonblank_type eq 'k'
24381 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
24384 $block_type = $statement_type;
24388 # code and anonymous hash have the same type, '{', but are
24389 # distinguished by 'block_type',
24390 # which will be blank for an anonymous hash
24393 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
24394 $max_token_index );
24396 # remember a preceding smartmatch operator
24398 ##if ( $last_nonblank_type eq '~~' ) {
24399 ## $block_type = $last_nonblank_type;
24402 # patch to promote bareword type to function taking block
24404 && $last_nonblank_type eq 'w'
24405 && $last_nonblank_i >= 0 )
24407 if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
24408 $routput_token_type->[$last_nonblank_i] = 'G';
24412 # patch for SWITCH/CASE: if we find a stray opening block brace
24413 # where we might accept a 'case' or 'when' block, then take it
24414 if ( $statement_type eq 'case'
24415 || $statement_type eq 'when' )
24417 if ( !$block_type || $block_type eq '}' ) {
24418 $block_type = $statement_type;
24422 $brace_type[ ++$brace_depth ] = $block_type;
24423 $brace_package[$brace_depth] = $current_package;
24424 $brace_structural_type[$brace_depth] = $type;
24425 $brace_context[$brace_depth] = $context;
24426 ( $type_sequence, $indent_flag ) =
24427 increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
24430 $block_type = $brace_type[$brace_depth];
24431 if ($block_type) { $statement_type = '' }
24432 if ( defined( $brace_package[$brace_depth] ) ) {
24433 $current_package = $brace_package[$brace_depth];
24436 # can happen on brace error (caught elsewhere)
24439 ( $type_sequence, $indent_flag ) =
24440 decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
24442 if ( $brace_structural_type[$brace_depth] eq 'L' ) {
24446 # propagate type information for 'do' and 'eval' blocks, and also
24447 # for smartmatch operator. This is necessary to enable us to know
24448 # if an operator or term is expected next.
24450 ##if ( $is_block_operator{$block_type} || $block_type eq '~~' ) {
24451 if ( $is_block_operator{$block_type} ) {
24452 $tok = $block_type;
24455 $context = $brace_context[$brace_depth];
24456 if ( $brace_depth > 0 ) { $brace_depth--; }
24458 '&' => sub { # maybe sub call? start looking
24460 # We have to check for sub call unless we are sure we
24461 # are expecting an operator. This example from s2p
24462 # got mistaken as a q operator in an early version:
24463 # print BODY &q(<<'EOT');
24464 if ( $expecting != OPERATOR ) {
24466 # But only look for a sub call if we are expecting a term or
24467 # if there is no existing space after the &.
24468 # For example we probably don't want & as sub call here:
24469 # Fcntl::S_IRUSR & $mode;
24470 if ( $expecting == TERM || $next_type ne 'b' ) {
24477 '<' => sub { # angle operator or less than?
24479 if ( $expecting != OPERATOR ) {
24481 find_angle_operator_termination( $input_line, $i, $rtoken_map,
24482 $expecting, $max_token_index );
24484 if ( $type eq '<' && $expecting == TERM ) {
24485 error_if_expecting_TERM();
24486 interrupt_logfile();
24487 warning("Unterminated <> operator?\n");
24494 '?' => sub { # ?: conditional or starting pattern?
24498 if ( $expecting == UNKNOWN ) {
24501 ( $is_pattern, $msg ) =
24502 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
24503 $max_token_index );
24505 if ($msg) { write_logfile_entry($msg) }
24507 else { $is_pattern = ( $expecting == TERM ) }
24512 $allowed_quote_modifiers = '[msixpodualgc]';
24515 ( $type_sequence, $indent_flag ) =
24516 increase_nesting_depth( QUESTION_COLON,
24517 $$rtoken_map[$i_tok] );
24520 '*' => sub { # typeglob, or multiply?
24522 if ( $expecting == TERM ) {
24527 if ( $$rtokens[ $i + 1 ] eq '=' ) {
24532 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
24536 if ( $$rtokens[ $i + 1 ] eq '=' ) {
24544 '.' => sub { # what kind of . ?
24546 if ( $expecting != OPERATOR ) {
24548 if ( $type eq '.' ) {
24549 error_if_expecting_TERM()
24550 if ( $expecting == TERM );
24558 # if this is the first nonblank character, call it a label
24559 # since perl seems to just swallow it
24560 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
24564 # ATTRS: check for a ':' which introduces an attribute list
24565 # (this might eventually get its own token type)
24566 elsif ( $statement_type =~ /^sub/ ) {
24568 $in_attribute_list = 1;
24571 # check for scalar attribute, such as
24572 # my $foo : shared = 1;
24573 elsif ($is_my_our{$statement_type}
24574 && $current_depth[QUESTION_COLON] == 0 )
24577 $in_attribute_list = 1;
24580 # otherwise, it should be part of a ?/: operator
24582 ( $type_sequence, $indent_flag ) =
24583 decrease_nesting_depth( QUESTION_COLON,
24584 $$rtoken_map[$i_tok] );
24585 if ( $last_nonblank_token eq '?' ) {
24586 warning("Syntax error near ? :\n");
24590 '+' => sub { # what kind of plus?
24592 if ( $expecting == TERM ) {
24593 my $number = scan_number();
24595 # unary plus is safest assumption if not a number
24596 if ( !defined($number) ) { $type = 'p'; }
24598 elsif ( $expecting == OPERATOR ) {
24601 if ( $next_type eq 'w' ) { $type = 'p' }
24606 error_if_expecting_OPERATOR("Array")
24607 if ( $expecting == OPERATOR );
24610 '%' => sub { # hash or modulo?
24612 # first guess is hash if no following blank
24613 if ( $expecting == UNKNOWN ) {
24614 if ( $next_type ne 'b' ) { $expecting = TERM }
24616 if ( $expecting == TERM ) {
24621 $square_bracket_type[ ++$square_bracket_depth ] =
24622 $last_nonblank_token;
24623 ( $type_sequence, $indent_flag ) =
24624 increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
24626 # It may seem odd, but structural square brackets have
24627 # type '{' and '}'. This simplifies the indentation logic.
24628 if ( !is_non_structural_brace() ) {
24631 $square_bracket_structural_type[$square_bracket_depth] = $type;
24634 ( $type_sequence, $indent_flag ) =
24635 decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
24637 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
24642 # propagate type information for smartmatch operator. This is
24643 # necessary to enable us to know if an operator or term is expected
24645 if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
24646 $tok = $square_bracket_type[$square_bracket_depth];
24649 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
24651 '-' => sub { # what kind of minus?
24653 if ( ( $expecting != OPERATOR )
24654 && $is_file_test_operator{$next_tok} )
24656 my ( $next_nonblank_token, $i_next ) =
24657 find_next_nonblank_token( $i + 1, $rtokens,
24658 $max_token_index );
24660 # check for a quoted word like "-w=>xx";
24661 # it is sufficient to just check for a following '='
24662 if ( $next_nonblank_token eq '=' ) {
24671 elsif ( $expecting == TERM ) {
24672 my $number = scan_number();
24674 # maybe part of bareword token? unary is safest
24675 if ( !defined($number) ) { $type = 'm'; }
24678 elsif ( $expecting == OPERATOR ) {
24682 if ( $next_type eq 'w' ) {
24690 # check for special variables like ${^WARNING_BITS}
24691 if ( $expecting == TERM ) {
24693 # FIXME: this should work but will not catch errors
24694 # because we also have to be sure that previous token is
24695 # a type character ($,@,%).
24696 if ( $last_nonblank_token eq '{'
24697 && ( $next_tok =~ /^[A-Za-z_]/ ) )
24700 if ( $next_tok eq 'W' ) {
24701 $tokenizer_self->{_saw_perl_dash_w} = 1;
24703 $tok = $tok . $next_tok;
24709 unless ( error_if_expecting_TERM() ) {
24711 # Something like this is valid but strange:
24713 complain("The '^' seems unusual here\n");
24719 '::' => sub { # probably a sub call
24720 scan_bare_identifier();
24722 '<<' => sub { # maybe a here-doc?
24724 unless ( $i < $max_token_index )
24725 ; # here-doc not possible if end of line
24727 if ( $expecting != OPERATOR ) {
24728 my ( $found_target, $here_doc_target, $here_quote_character,
24731 $found_target, $here_doc_target, $here_quote_character, $i,
24734 = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
24735 $max_token_index );
24737 if ($found_target) {
24738 push @{$rhere_target_list},
24739 [ $here_doc_target, $here_quote_character ];
24741 if ( length($here_doc_target) > 80 ) {
24742 my $truncated = substr( $here_doc_target, 0, 80 );
24743 complain("Long here-target: '$truncated' ...\n");
24745 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
24747 "Unconventional here-target: '$here_doc_target'\n"
24751 elsif ( $expecting == TERM ) {
24752 unless ($saw_error) {
24754 # shouldn't happen..
24755 warning("Program bug; didn't find here doc target\n");
24756 report_definite_bug();
24765 # if -> points to a bare word, we must scan for an identifier,
24766 # otherwise something like ->y would look like the y operator
24770 # type = 'pp' for pre-increment, '++' for post-increment
24772 if ( $expecting == TERM ) { $type = 'pp' }
24773 elsif ( $expecting == UNKNOWN ) {
24774 my ( $next_nonblank_token, $i_next ) =
24775 find_next_nonblank_token( $i, $rtokens, $max_token_index );
24776 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
24781 if ( $last_nonblank_type eq $tok ) {
24782 complain("Repeated '=>'s \n");
24785 # patch for operator_expected: note if we are in the list (use.t)
24786 # TODO: make version numbers a new token type
24787 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
24790 # type = 'mm' for pre-decrement, '--' for post-decrement
24793 if ( $expecting == TERM ) { $type = 'mm' }
24794 elsif ( $expecting == UNKNOWN ) {
24795 my ( $next_nonblank_token, $i_next ) =
24796 find_next_nonblank_token( $i, $rtokens, $max_token_index );
24797 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
24802 error_if_expecting_TERM()
24803 if ( $expecting == TERM );
24807 error_if_expecting_TERM()
24808 if ( $expecting == TERM );
24812 error_if_expecting_TERM()
24813 if ( $expecting == TERM );
24817 # ------------------------------------------------------------
24818 # end hash of code for handling individual token types
24819 # ------------------------------------------------------------
24821 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
24823 # These block types terminate statements and do not need a trailing
24825 # patched for SWITCH/CASE/
24826 my %is_zero_continuation_block_type;
24827 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
24828 if elsif else unless while until for foreach switch case given when);
24829 @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
24831 my %is_not_zero_continuation_block_type;
24832 @_ = qw(sort grep map do eval);
24833 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
24835 my %is_logical_container;
24836 @_ = qw(if elsif unless while and or err not && ! || for foreach);
24837 @is_logical_container{@_} = (1) x scalar(@_);
24839 my %is_binary_type;
24841 @is_binary_type{@_} = (1) x scalar(@_);
24843 my %is_binary_keyword;
24844 @_ = qw(and or err eq ne cmp);
24845 @is_binary_keyword{@_} = (1) x scalar(@_);
24847 # 'L' is token for opening { at hash key
24848 my %is_opening_type;
24849 @_ = qw" L { ( [ ";
24850 @is_opening_type{@_} = (1) x scalar(@_);
24852 # 'R' is token for closing } at hash key
24853 my %is_closing_type;
24854 @_ = qw" R } ) ] ";
24855 @is_closing_type{@_} = (1) x scalar(@_);
24857 my %is_redo_last_next_goto;
24858 @_ = qw(redo last next goto);
24859 @is_redo_last_next_goto{@_} = (1) x scalar(@_);
24861 my %is_use_require;
24862 @_ = qw(use require);
24863 @is_use_require{@_} = (1) x scalar(@_);
24865 my %is_sub_package;
24866 @_ = qw(sub package);
24867 @is_sub_package{@_} = (1) x scalar(@_);
24869 # This hash holds the hash key in $tokenizer_self for these keywords:
24870 my %is_format_END_DATA = (
24871 'format' => '_in_format',
24872 '__END__' => '_in_end',
24873 '__DATA__' => '_in_data',
24876 # ref: camel 3 p 147,
24877 # but perl may accept undocumented flags
24878 # perl 5.10 adds 'p' (preserve)
24879 # Perl version 5.16, http://perldoc.perl.org/perlop.html, has these:
24880 # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc
24881 # s/PATTERN/REPLACEMENT/msixpodualgcer
24882 # y/SEARCHLIST/REPLACEMENTLIST/cdsr
24883 # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
24884 # qr/STRING/msixpodual
24885 my %quote_modifiers = (
24886 's' => '[msixpodualgcer]',
24889 'm' => '[msixpodualgc]',
24890 'qr' => '[msixpodual]',
24897 # table showing how many quoted things to look for after quote operator..
24898 # s, y, tr have 2 (pattern and replacement)
24899 # others have 1 (pattern only)
24900 my %quote_items = (
24912 sub tokenize_this_line {
24914 # This routine breaks a line of perl code into tokens which are of use in
24915 # indentation and reformatting. One of my goals has been to define tokens
24916 # such that a newline may be inserted between any pair of tokens without
24917 # changing or invalidating the program. This version comes close to this,
24918 # although there are necessarily a few exceptions which must be caught by
24919 # the formatter. Many of these involve the treatment of bare words.
24921 # The tokens and their types are returned in arrays. See previous
24922 # routine for their names.
24924 # See also the array "valid_token_types" in the BEGIN section for an
24927 # To simplify things, token types are either a single character, or they
24928 # are identical to the tokens themselves.
24930 # As a debugging aid, the -D flag creates a file containing a side-by-side
24931 # comparison of the input string and its tokenization for each line of a file.
24932 # This is an invaluable debugging aid.
24934 # In addition to tokens, and some associated quantities, the tokenizer
24935 # also returns flags indication any special line types. These include
24936 # quotes, here_docs, formats.
24938 # -----------------------------------------------------------------------
24940 # How to add NEW_TOKENS:
24942 # New token types will undoubtedly be needed in the future both to keep up
24943 # with changes in perl and to help adapt the tokenizer to other applications.
24945 # Here are some notes on the minimal steps. I wrote these notes while
24946 # adding the 'v' token type for v-strings, which are things like version
24947 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You
24948 # can use your editor to search for the string "NEW_TOKENS" to find the
24949 # appropriate sections to change):
24951 # *. Try to talk somebody else into doing it! If not, ..
24953 # *. Make a backup of your current version in case things don't work out!
24955 # *. Think of a new, unused character for the token type, and add to
24956 # the array @valid_token_types in the BEGIN section of this package.
24957 # For example, I used 'v' for v-strings.
24959 # *. Implement coding to recognize the $type of the token in this routine.
24960 # This is the hardest part, and is best done by imitating or modifying
24961 # some of the existing coding. For example, to recognize v-strings, I
24962 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
24963 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
24965 # *. Update sub operator_expected. This update is critically important but
24966 # the coding is trivial. Look at the comments in that routine for help.
24967 # For v-strings, which should behave like numbers, I just added 'v' to the
24968 # regex used to handle numbers and strings (types 'n' and 'Q').
24970 # *. Implement a 'bond strength' rule in sub set_bond_strengths in
24971 # Perl::Tidy::Formatter for breaking lines around this token type. You can
24972 # skip this step and take the default at first, then adjust later to get
24973 # desired results. For adding type 'v', I looked at sub bond_strength and
24974 # saw that number type 'n' was using default strengths, so I didn't do
24975 # anything. I may tune it up someday if I don't like the way line
24976 # breaks with v-strings look.
24978 # *. Implement a 'whitespace' rule in sub set_white_space_flag in
24979 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
24980 # and saw that type 'n' used spaces on both sides, so I just added 'v'
24981 # to the array @spaces_both_sides.
24983 # *. Update HtmlWriter package so that users can colorize the token as
24984 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
24985 # that package. For v-strings, I initially chose to use a default color
24986 # equal to the default for numbers, but it might be nice to change that
24989 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
24991 # *. Run lots and lots of debug tests. Start with special files designed
24992 # to test the new token type. Run with the -D flag to create a .DEBUG
24993 # file which shows the tokenization. When these work ok, test as many old
24994 # scripts as possible. Start with all of the '.t' files in the 'test'
24995 # directory of the distribution file. Compare .tdy output with previous
24996 # version and updated version to see the differences. Then include as
24997 # many more files as possible. My own technique has been to collect a huge
24998 # number of perl scripts (thousands!) into one directory and run perltidy
24999 # *, then run diff between the output of the previous version and the
25002 # *. For another example, search for the smartmatch operator '~~'
25003 # with your editor to see where updates were made for it.
25005 # -----------------------------------------------------------------------
25007 my $line_of_tokens = shift;
25008 my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
25010 # patch while coding change is underway
25011 # make callers private data to allow access
25012 # $tokenizer_self = $caller_tokenizer_self;
25014 # extract line number for use in error messages
25015 $input_line_number = $line_of_tokens->{_line_number};
25017 # reinitialize for multi-line quote
25018 $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
25020 # check for pod documentation
25021 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
25023 # must not be in multi-line quote
25024 # and must not be in an equation
25025 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
25027 $tokenizer_self->{_in_pod} = 1;
25032 $input_line = $untrimmed_input_line;
25036 # trim start of this line unless we are continuing a quoted line
25037 # do not trim end because we might end in a quote (test: deken4.pl)
25038 # Perl::Tidy::Formatter will delete needless trailing blanks
25039 unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
25040 $input_line =~ s/^\s*//; # trim left end
25043 # update the copy of the line for use in error messages
25044 # This must be exactly what we give the pre_tokenizer
25045 $tokenizer_self->{_line_text} = $input_line;
25047 # re-initialize for the main loop
25048 $routput_token_list = []; # stack of output token indexes
25049 $routput_token_type = []; # token types
25050 $routput_block_type = []; # types of code block
25051 $routput_container_type = []; # paren types, such as if, elsif, ..
25052 $routput_type_sequence = []; # nesting sequential number
25054 $rhere_target_list = [];
25056 $tok = $last_nonblank_token;
25057 $type = $last_nonblank_type;
25058 $prototype = $last_nonblank_prototype;
25059 $last_nonblank_i = -1;
25060 $block_type = $last_nonblank_block_type;
25061 $container_type = $last_nonblank_container_type;
25062 $type_sequence = $last_nonblank_type_sequence;
25066 # tokenization is done in two stages..
25067 # stage 1 is a very simple pre-tokenization
25068 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
25070 # a little optimization for a full-line comment
25071 if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
25072 $max_tokens_wanted = 1 # no use tokenizing a comment
25075 # start by breaking the line into pre-tokens
25076 ( $rtokens, $rtoken_map, $rtoken_type ) =
25077 pre_tokenize( $input_line, $max_tokens_wanted );
25079 $max_token_index = scalar(@$rtokens) - 1;
25080 push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
25081 push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced
25082 push( @$rtoken_type, 'b', 'b', 'b' );
25084 # initialize for main loop
25085 for $i ( 0 .. $max_token_index + 3 ) {
25086 $routput_token_type->[$i] = "";
25087 $routput_block_type->[$i] = "";
25088 $routput_container_type->[$i] = "";
25089 $routput_type_sequence->[$i] = "";
25090 $routput_indent_flag->[$i] = 0;
25095 # ------------------------------------------------------------
25096 # begin main tokenization loop
25097 # ------------------------------------------------------------
25099 # we are looking at each pre-token of one line and combining them
25101 while ( ++$i <= $max_token_index ) {
25103 if ($in_quote) { # continue looking for end of a quote
25104 $type = $quote_type;
25106 unless ( @{$routput_token_list} )
25107 { # initialize if continuation line
25108 push( @{$routput_token_list}, $i );
25109 $routput_token_type->[$i] = $type;
25112 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
25114 # scan for the end of the quote or pattern
25116 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25117 $quoted_string_1, $quoted_string_2
25120 $i, $in_quote, $quote_character,
25121 $quote_pos, $quote_depth, $quoted_string_1,
25122 $quoted_string_2, $rtokens, $rtoken_map,
25126 # all done if we didn't find it
25127 last if ($in_quote);
25129 # save pattern and replacement text for rescanning
25130 my $qs1 = $quoted_string_1;
25131 my $qs2 = $quoted_string_2;
25133 # re-initialize for next search
25134 $quote_character = '';
25137 $quoted_string_1 = "";
25138 $quoted_string_2 = "";
25139 last if ( ++$i > $max_token_index );
25141 # look for any modifiers
25142 if ($allowed_quote_modifiers) {
25144 # check for exact quote modifiers
25145 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
25146 my $str = $$rtokens[$i];
25147 my $saw_modifier_e;
25148 while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
25149 my $pos = pos($str);
25150 my $char = substr( $str, $pos - 1, 1 );
25151 $saw_modifier_e ||= ( $char eq 'e' );
25154 # For an 'e' quote modifier we must scan the replacement
25155 # text for here-doc targets.
25156 if ($saw_modifier_e) {
25158 my $rht = scan_replacement_text($qs1);
25160 # Change type from 'Q' to 'h' for quotes with
25161 # here-doc targets so that the formatter (see sub
25162 # print_line_of_tokens) will not make any line
25163 # breaks after this point.
25165 push @{$rhere_target_list}, @{$rht};
25167 if ( $i_tok < 0 ) {
25168 my $ilast = $routput_token_list->[-1];
25169 $routput_token_type->[$ilast] = $type;
25174 if ( defined( pos($str) ) ) {
25177 if ( pos($str) == length($str) ) {
25178 last if ( ++$i > $max_token_index );
25181 # Looks like a joined quote modifier
25182 # and keyword, maybe something like
25183 # s/xxx/yyy/gefor @k=...
25184 # Example is "galgen.pl". Would have to split
25185 # the word and insert a new token in the
25186 # pre-token list. This is so rare that I haven't
25187 # done it. Will just issue a warning citation.
25189 # This error might also be triggered if my quote
25190 # modifier characters are incomplete
25194 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
25195 Please put a space between quote modifiers and trailing keywords.
25198 # print "token $$rtokens[$i]\n";
25199 # my $num = length($str) - pos($str);
25200 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
25201 # print "continuing with new token $$rtokens[$i]\n";
25203 # skipping past this token does least damage
25204 last if ( ++$i > $max_token_index );
25209 # example file: rokicki4.pl
25210 # This error might also be triggered if my quote
25211 # modifier characters are incomplete
25212 write_logfile_entry(
25213 "Note: found word $str at quote modifier location\n"
25219 $allowed_quote_modifiers = "";
25223 unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) {
25225 # try to catch some common errors
25226 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
25228 if ( $last_nonblank_token eq 'eq' ) {
25229 complain("Should 'eq' be '==' here ?\n");
25231 elsif ( $last_nonblank_token eq 'ne' ) {
25232 complain("Should 'ne' be '!=' here ?\n");
25236 $last_last_nonblank_token = $last_nonblank_token;
25237 $last_last_nonblank_type = $last_nonblank_type;
25238 $last_last_nonblank_block_type = $last_nonblank_block_type;
25239 $last_last_nonblank_container_type =
25240 $last_nonblank_container_type;
25241 $last_last_nonblank_type_sequence =
25242 $last_nonblank_type_sequence;
25243 $last_nonblank_token = $tok;
25244 $last_nonblank_type = $type;
25245 $last_nonblank_prototype = $prototype;
25246 $last_nonblank_block_type = $block_type;
25247 $last_nonblank_container_type = $container_type;
25248 $last_nonblank_type_sequence = $type_sequence;
25249 $last_nonblank_i = $i_tok;
25252 # store previous token type
25253 if ( $i_tok >= 0 ) {
25254 $routput_token_type->[$i_tok] = $type;
25255 $routput_block_type->[$i_tok] = $block_type;
25256 $routput_container_type->[$i_tok] = $container_type;
25257 $routput_type_sequence->[$i_tok] = $type_sequence;
25258 $routput_indent_flag->[$i_tok] = $indent_flag;
25260 my $pre_tok = $$rtokens[$i]; # get the next pre-token
25261 my $pre_type = $$rtoken_type[$i]; # and type
25263 $type = $pre_type; # to be modified as necessary
25264 $block_type = ""; # blank for all tokens except code block braces
25265 $container_type = ""; # blank for all tokens except some parens
25266 $type_sequence = ""; # blank for all tokens except ?/:
25268 $prototype = ""; # blank for all tokens except user defined subs
25271 # this pre-token will start an output token
25272 push( @{$routput_token_list}, $i_tok );
25274 # continue gathering identifier if necessary
25275 # but do not start on blanks and comments
25276 if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
25278 if ( $id_scan_state =~ /^(sub|package)/ ) {
25285 last if ($id_scan_state);
25286 next if ( ( $i > 0 ) || $type );
25288 # didn't find any token; start over
25293 # handle whitespace tokens..
25294 next if ( $type eq 'b' );
25295 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
25296 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
25298 # Build larger tokens where possible, since we are not in a quote.
25300 # First try to assemble digraphs. The following tokens are
25301 # excluded and handled specially:
25302 # '/=' is excluded because the / might start a pattern.
25303 # 'x=' is excluded since it might be $x=, with $ on previous line
25304 # '**' and *= might be typeglobs of punctuation variables
25305 # I have allowed tokens starting with <, such as <=,
25306 # because I don't think these could be valid angle operators.
25307 # test file: storrs4.pl
25308 my $test_tok = $tok . $$rtokens[ $i + 1 ];
25309 my $combine_ok = $is_digraph{$test_tok};
25311 # check for special cases which cannot be combined
25314 # '//' must be defined_or operator if an operator is expected.
25315 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
25316 # could be migrated here for clarity
25317 if ( $test_tok eq '//' ) {
25318 my $next_type = $$rtokens[ $i + 1 ];
25320 operator_expected( $prev_type, $tok, $next_type );
25321 $combine_ok = 0 unless ( $expecting == OPERATOR );
25327 && ( $test_tok ne '/=' ) # might be pattern
25328 && ( $test_tok ne 'x=' ) # might be $x
25329 && ( $test_tok ne '**' ) # typeglob?
25330 && ( $test_tok ne '*=' ) # typeglob?
25336 # Now try to assemble trigraphs. Note that all possible
25337 # perl trigraphs can be constructed by appending a character
25339 $test_tok = $tok . $$rtokens[ $i + 1 ];
25341 if ( $is_trigraph{$test_tok} ) {
25348 $next_tok = $$rtokens[ $i + 1 ];
25349 $next_type = $$rtoken_type[ $i + 1 ];
25351 TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
25354 $last_nonblank_token, $tok,
25355 $next_tok, $brace_depth,
25356 $brace_type[$brace_depth], $paren_depth,
25357 $paren_type[$paren_depth]
25359 print STDOUT "TOKENIZE:(@debug_list)\n";
25362 # turn off attribute list on first non-blank, non-bareword
25363 if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
25365 ###############################################################
25366 # We have the next token, $tok.
25367 # Now we have to examine this token and decide what it is
25368 # and define its $type
25370 # section 1: bare words
25371 ###############################################################
25373 if ( $pre_type eq 'w' ) {
25374 $expecting = operator_expected( $prev_type, $tok, $next_type );
25375 my ( $next_nonblank_token, $i_next ) =
25376 find_next_nonblank_token( $i, $rtokens, $max_token_index );
25378 # ATTRS: handle sub and variable attributes
25379 if ($in_attribute_list) {
25381 # treat bare word followed by open paren like qw(
25382 if ( $next_nonblank_token eq '(' ) {
25383 $in_quote = $quote_items{'q'};
25384 $allowed_quote_modifiers = $quote_modifiers{'q'};
25390 # handle bareword not followed by open paren
25397 # quote a word followed by => operator
25398 if ( $next_nonblank_token eq '=' ) {
25400 if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
25401 if ( $is_constant{$current_package}{$tok} ) {
25404 elsif ( $is_user_function{$current_package}{$tok} ) {
25407 $user_function_prototype{$current_package}{$tok};
25409 elsif ( $tok =~ /^v\d+$/ ) {
25411 report_v_string($tok);
25413 else { $type = 'w' }
25419 # quote a bare word within braces..like xxx->{s}; note that we
25420 # must be sure this is not a structural brace, to avoid
25421 # mistaking {s} in the following for a quoted bare word:
25422 # for(@[){s}bla}BLA}
25423 # Also treat q in something like var{-q} as a bare word, not qoute operator
25425 $next_nonblank_token eq '}'
25427 $last_nonblank_type eq 'L'
25428 || ( $last_nonblank_type eq 'm'
25429 && $last_last_nonblank_type eq 'L' )
25437 # a bare word immediately followed by :: is not a keyword;
25438 # use $tok_kw when testing for keywords to avoid a mistake
25440 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
25445 # handle operator x (now we know it isn't $x=)
25446 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
25447 if ( $tok eq 'x' ) {
25449 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=
25459 # FIXME: Patch: mark something like x4 as an integer for now
25460 # It gets fixed downstream. This is easier than
25461 # splitting the pretoken.
25466 elsif ( $tok_kw eq 'CORE::' ) {
25467 $type = $tok = $tok_kw;
25470 elsif ( ( $tok eq 'strict' )
25471 and ( $last_nonblank_token eq 'use' ) )
25473 $tokenizer_self->{_saw_use_strict} = 1;
25474 scan_bare_identifier();
25477 elsif ( ( $tok eq 'warnings' )
25478 and ( $last_nonblank_token eq 'use' ) )
25480 $tokenizer_self->{_saw_perl_dash_w} = 1;
25482 # scan as identifier, so that we pick up something like:
25483 # use warnings::register
25484 scan_bare_identifier();
25488 $tok eq 'AutoLoader'
25489 && $tokenizer_self->{_look_for_autoloader}
25491 $last_nonblank_token eq 'use'
25493 # these regexes are from AutoSplit.pm, which we want
25495 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
25496 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
25500 write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
25501 $tokenizer_self->{_saw_autoloader} = 1;
25502 $tokenizer_self->{_look_for_autoloader} = 0;
25503 scan_bare_identifier();
25507 $tok eq 'SelfLoader'
25508 && $tokenizer_self->{_look_for_selfloader}
25509 && ( $last_nonblank_token eq 'use'
25510 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
25511 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
25514 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
25515 $tokenizer_self->{_saw_selfloader} = 1;
25516 $tokenizer_self->{_look_for_selfloader} = 0;
25517 scan_bare_identifier();
25520 elsif ( ( $tok eq 'constant' )
25521 and ( $last_nonblank_token eq 'use' ) )
25523 scan_bare_identifier();
25524 my ( $next_nonblank_token, $i_next ) =
25525 find_next_nonblank_token( $i, $rtokens,
25526 $max_token_index );
25528 if ($next_nonblank_token) {
25530 if ( $is_keyword{$next_nonblank_token} ) {
25532 # Assume qw is used as a quote and okay, as in:
25533 # use constant qw{ DEBUG 0 };
25534 # Not worth trying to parse for just a warning
25536 # NOTE: This warning is deactivated because recent
25537 # versions of perl do not complain here, but
25538 # the coding is retained for reference.
25539 if ( 0 && $next_nonblank_token ne 'qw' ) {
25541 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
25546 # FIXME: could check for error in which next token is
25547 # not a word (number, punctuation, ..)
25549 $is_constant{$current_package}{$next_nonblank_token}
25555 # various quote operators
25556 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
25557 if ( $expecting == OPERATOR ) {
25559 # patch for paren-less for/foreach glitch, part 1
25560 # perl will accept this construct as valid:
25562 # foreach my $key qw\Uno Due Tres Quadro\ {
25563 # print "Set $key\n";
25565 unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
25567 error_if_expecting_OPERATOR();
25570 $in_quote = $quote_items{$tok};
25571 $allowed_quote_modifiers = $quote_modifiers{$tok};
25573 # All quote types are 'Q' except possibly qw quotes.
25574 # qw quotes are special in that they may generally be trimmed
25575 # of leading and trailing whitespace. So they are given a
25576 # separate type, 'q', unless requested otherwise.
25578 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
25581 $quote_type = $type;
25584 # check for a statement label
25586 ( $next_nonblank_token eq ':' )
25587 && ( $$rtokens[ $i_next + 1 ] ne ':' )
25588 && ( $i_next <= $max_token_index ) # colon on same line
25592 if ( $tok !~ /[A-Z]/ ) {
25593 push @{ $tokenizer_self->{_rlower_case_labels_at} },
25594 $input_line_number;
25602 # 'sub' || 'package'
25603 elsif ( $is_sub_package{$tok_kw} ) {
25604 error_if_expecting_OPERATOR()
25605 if ( $expecting == OPERATOR );
25609 # Note on token types for format, __DATA__, __END__:
25610 # It simplifies things to give these type ';', so that when we
25611 # start rescanning we will be expecting a token of type TERM.
25612 # We will switch to type 'k' before outputting the tokens.
25613 elsif ( $is_format_END_DATA{$tok_kw} ) {
25614 $type = ';'; # make tokenizer look for TERM next
25615 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
25619 elsif ( $is_keyword{$tok_kw} ) {
25622 # Since for and foreach may not be followed immediately
25623 # by an opening paren, we have to remember which keyword
25624 # is associated with the next '('
25625 if ( $is_for_foreach{$tok} ) {
25626 if ( new_statement_ok() ) {
25627 $want_paren = $tok;
25631 # recognize 'use' statements, which are special
25632 elsif ( $is_use_require{$tok} ) {
25633 $statement_type = $tok;
25634 error_if_expecting_OPERATOR()
25635 if ( $expecting == OPERATOR );
25638 # remember my and our to check for trailing ": shared"
25639 elsif ( $is_my_our{$tok} ) {
25640 $statement_type = $tok;
25643 # Check for misplaced 'elsif' and 'else', but allow isolated
25644 # else or elsif blocks to be formatted. This is indicated
25645 # by a last noblank token of ';'
25646 elsif ( $tok eq 'elsif' ) {
25647 if ( $last_nonblank_token ne ';'
25648 && $last_nonblank_block_type !~
25649 /^(if|elsif|unless)$/ )
25652 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
25656 elsif ( $tok eq 'else' ) {
25658 # patched for SWITCH/CASE
25659 if ( $last_nonblank_token ne ';'
25660 && $last_nonblank_block_type !~
25661 /^(if|elsif|unless|case|when)$/ )
25664 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
25668 elsif ( $tok eq 'continue' ) {
25669 if ( $last_nonblank_token ne ';'
25670 && $last_nonblank_block_type !~
25671 /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
25674 # note: ';' '{' and '}' in list above
25675 # because continues can follow bare blocks;
25676 # ':' is labeled block
25678 ############################################
25679 # NOTE: This check has been deactivated because
25680 # continue has an alternative usage for given/when
25681 # blocks in perl 5.10
25682 ## warning("'$tok' should follow a block\n");
25683 ############################################
25687 # patch for SWITCH/CASE if 'case' and 'when are
25688 # treated as keywords.
25689 elsif ( $tok eq 'when' || $tok eq 'case' ) {
25690 $statement_type = $tok; # next '{' is block
25694 # indent trailing if/unless/while/until
25695 # outdenting will be handled by later indentation loop
25696 ## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
25708 ## if ( $tok =~ /^(if|unless|while|until)$/
25709 ## && $next_nonblank_token ne '(' )
25711 ## $indent_flag = 1;
25715 # check for inline label following
25716 # /^(redo|last|next|goto)$/
25717 elsif (( $last_nonblank_type eq 'k' )
25718 && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
25724 # something else --
25727 scan_bare_identifier();
25728 if ( $type eq 'w' ) {
25730 if ( $expecting == OPERATOR ) {
25732 # don't complain about possible indirect object
25736 # sub new($) { ... }
25737 # $b = new A::; # calls A::new
25738 # $c = new A; # same thing but suspicious
25739 # This will call A::new but we have a 'new' in
25740 # main:: which looks like a constant.
25742 if ( $last_nonblank_type eq 'C' ) {
25743 if ( $tok !~ /::$/ ) {
25745 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
25746 Maybe indirectet object notation?
25751 error_if_expecting_OPERATOR("bareword");
25755 # mark bare words immediately followed by a paren as
25757 $next_tok = $$rtokens[ $i + 1 ];
25758 if ( $next_tok eq '(' ) {
25762 # underscore after file test operator is file handle
25763 if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
25767 # patch for SWITCH/CASE if 'case' and 'when are
25768 # not treated as keywords:
25772 && $brace_type[$brace_depth] eq 'switch'
25774 || ( $tok eq 'when'
25775 && $brace_type[$brace_depth] eq 'given' )
25778 $statement_type = $tok; # next '{' is block
25779 $type = 'k'; # for keyword syntax coloring
25782 # patch for SWITCH/CASE if switch and given not keywords
25783 # Switch is not a perl 5 keyword, but we will gamble
25784 # and mark switch followed by paren as a keyword. This
25785 # is only necessary to get html syntax coloring nice,
25786 # and does not commit this as being a switch/case.
25787 if ( $next_nonblank_token eq '('
25788 && ( $tok eq 'switch' || $tok eq 'given' ) )
25790 $type = 'k'; # for keyword syntax coloring
25796 ###############################################################
25797 # section 2: strings of digits
25798 ###############################################################
25799 elsif ( $pre_type eq 'd' ) {
25800 $expecting = operator_expected( $prev_type, $tok, $next_type );
25801 error_if_expecting_OPERATOR("Number")
25802 if ( $expecting == OPERATOR );
25803 my $number = scan_number();
25804 if ( !defined($number) ) {
25806 # shouldn't happen - we should always get a number
25807 warning("non-number beginning with digit--program bug\n");
25808 report_definite_bug();
25812 ###############################################################
25813 # section 3: all other tokens
25814 ###############################################################
25817 last if ( $tok eq '#' );
25818 my $code = $tokenization_code->{$tok};
25821 operator_expected( $prev_type, $tok, $next_type );
25828 # -----------------------------
25829 # end of main tokenization loop
25830 # -----------------------------
25832 if ( $i_tok >= 0 ) {
25833 $routput_token_type->[$i_tok] = $type;
25834 $routput_block_type->[$i_tok] = $block_type;
25835 $routput_container_type->[$i_tok] = $container_type;
25836 $routput_type_sequence->[$i_tok] = $type_sequence;
25837 $routput_indent_flag->[$i_tok] = $indent_flag;
25840 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
25841 $last_last_nonblank_token = $last_nonblank_token;
25842 $last_last_nonblank_type = $last_nonblank_type;
25843 $last_last_nonblank_block_type = $last_nonblank_block_type;
25844 $last_last_nonblank_container_type = $last_nonblank_container_type;
25845 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
25846 $last_nonblank_token = $tok;
25847 $last_nonblank_type = $type;
25848 $last_nonblank_block_type = $block_type;
25849 $last_nonblank_container_type = $container_type;
25850 $last_nonblank_type_sequence = $type_sequence;
25851 $last_nonblank_prototype = $prototype;
25854 # reset indentation level if necessary at a sub or package
25855 # in an attempt to recover from a nesting error
25856 if ( $level_in_tokenizer < 0 ) {
25857 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
25858 reset_indentation_level(0);
25859 brace_warning("resetting level to 0 at $1 $2\n");
25863 # all done tokenizing this line ...
25864 # now prepare the final list of tokens and types
25866 my @token_type = (); # stack of output token types
25867 my @block_type = (); # stack of output code block types
25868 my @container_type = (); # stack of output code container types
25869 my @type_sequence = (); # stack of output type sequence numbers
25870 my @tokens = (); # output tokens
25871 my @levels = (); # structural brace levels of output tokens
25872 my @slevels = (); # secondary nesting levels of output tokens
25873 my @nesting_tokens = (); # string of tokens leading to this depth
25874 my @nesting_types = (); # string of token types leading to this depth
25875 my @nesting_blocks = (); # string of block types leading to this depth
25876 my @nesting_lists = (); # string of list types leading to this depth
25877 my @ci_string = (); # string needed to compute continuation indentation
25878 my @container_environment = (); # BLOCK or LIST
25879 my $container_environment = '';
25880 my $im = -1; # previous $i value
25882 my $ci_string_sum = ones_count($ci_string_in_tokenizer);
25884 # Computing Token Indentation
25886 # The final section of the tokenizer forms tokens and also computes
25887 # parameters needed to find indentation. It is much easier to do it
25888 # in the tokenizer than elsewhere. Here is a brief description of how
25889 # indentation is computed. Perl::Tidy computes indentation as the sum
25892 # (1) structural indentation, such as if/else/elsif blocks
25893 # (2) continuation indentation, such as long parameter call lists.
25895 # These are occasionally called primary and secondary indentation.
25897 # Structural indentation is introduced by tokens of type '{', although
25898 # the actual tokens might be '{', '(', or '['. Structural indentation
25899 # is of two types: BLOCK and non-BLOCK. Default structural indentation
25900 # is 4 characters if the standard indentation scheme is used.
25902 # Continuation indentation is introduced whenever a line at BLOCK level
25903 # is broken before its termination. Default continuation indentation
25904 # is 2 characters in the standard indentation scheme.
25906 # Both types of indentation may be nested arbitrarily deep and
25907 # interlaced. The distinction between the two is somewhat arbitrary.
25909 # For each token, we will define two variables which would apply if
25910 # the current statement were broken just before that token, so that
25911 # that token started a new line:
25913 # $level = the structural indentation level,
25914 # $ci_level = the continuation indentation level
25916 # The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
25917 # assuming defaults. However, in some special cases it is customary
25918 # to modify $ci_level from this strict value.
25920 # The total structural indentation is easy to compute by adding and
25921 # subtracting 1 from a saved value as types '{' and '}' are seen. The
25922 # running value of this variable is $level_in_tokenizer.
25924 # The total continuation is much more difficult to compute, and requires
25925 # several variables. These variables are:
25927 # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
25928 # each indentation level, if there are intervening open secondary
25929 # structures just prior to that level.
25930 # $continuation_string_in_tokenizer = a string of 1's and 0's indicating
25931 # if the last token at that level is "continued", meaning that it
25932 # is not the first token of an expression.
25933 # $nesting_block_string = a string of 1's and 0's indicating, for each
25934 # indentation level, if the level is of type BLOCK or not.
25935 # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
25936 # $nesting_list_string = a string of 1's and 0's indicating, for each
25937 # indentation level, if it is appropriate for list formatting.
25938 # If so, continuation indentation is used to indent long list items.
25939 # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
25940 # @{$rslevel_stack} = a stack of total nesting depths at each
25941 # structural indentation level, where "total nesting depth" means
25942 # the nesting depth that would occur if every nesting token -- '{', '[',
25943 # and '(' -- , regardless of context, is used to compute a nesting
25946 #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
25947 #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
25949 my ( $ci_string_i, $level_i, $nesting_block_string_i,
25950 $nesting_list_string_i, $nesting_token_string_i,
25951 $nesting_type_string_i, );
25953 foreach $i ( @{$routput_token_list} )
25954 { # scan the list of pre-tokens indexes
25956 # self-checking for valid token types
25957 my $type = $routput_token_type->[$i];
25958 my $forced_indentation_flag = $routput_indent_flag->[$i];
25960 # See if we should undo the $forced_indentation_flag.
25961 # Forced indentation after 'if', 'unless', 'while' and 'until'
25962 # expressions without trailing parens is optional and doesn't
25963 # always look good. It is usually okay for a trailing logical
25964 # expression, but if the expression is a function call, code block,
25965 # or some kind of list it puts in an unwanted extra indentation
25966 # level which is hard to remove.
25968 # Example where extra indentation looks ok:
25970 # if $det_a < 0 and $det_b > 0
25971 # or $det_a > 0 and $det_b < 0;
25973 # Example where extra indentation is not needed because
25974 # the eval brace also provides indentation:
25975 # print "not " if defined eval {
25976 # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
25979 # The following rule works fairly well:
25980 # Undo the flag if the end of this line, or start of the next
25981 # line, is an opening container token or a comma.
25982 # This almost always works, but if not after another pass it will
25984 if ( $forced_indentation_flag && $type eq 'k' ) {
25986 my $ilast = $routput_token_list->[$ixlast];
25987 my $toklast = $routput_token_type->[$ilast];
25988 if ( $toklast eq '#' ) {
25990 $ilast = $routput_token_list->[$ixlast];
25991 $toklast = $routput_token_type->[$ilast];
25993 if ( $toklast eq 'b' ) {
25995 $ilast = $routput_token_list->[$ixlast];
25996 $toklast = $routput_token_type->[$ilast];
25998 if ( $toklast =~ /^[\{,]$/ ) {
25999 $forced_indentation_flag = 0;
26002 ( $toklast, my $i_next ) =
26003 find_next_nonblank_token( $max_token_index, $rtokens,
26004 $max_token_index );
26005 if ( $toklast =~ /^[\{,]$/ ) {
26006 $forced_indentation_flag = 0;
26011 # if we are already in an indented if, see if we should outdent
26012 if ($indented_if_level) {
26014 # don't try to nest trailing if's - shouldn't happen
26015 if ( $type eq 'k' ) {
26016 $forced_indentation_flag = 0;
26019 # check for the normal case - outdenting at next ';'
26020 elsif ( $type eq ';' ) {
26021 if ( $level_in_tokenizer == $indented_if_level ) {
26022 $forced_indentation_flag = -1;
26023 $indented_if_level = 0;
26027 # handle case of missing semicolon
26028 elsif ( $type eq '}' ) {
26029 if ( $level_in_tokenizer == $indented_if_level ) {
26030 $indented_if_level = 0;
26032 # TBD: This could be a subroutine call
26033 $level_in_tokenizer--;
26034 if ( @{$rslevel_stack} > 1 ) {
26035 pop( @{$rslevel_stack} );
26037 if ( length($nesting_block_string) > 1 )
26038 { # true for valid script
26039 chop $nesting_block_string;
26040 chop $nesting_list_string;
26047 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
26048 $level_i = $level_in_tokenizer;
26050 # This can happen by running perltidy on non-scripts
26051 # although it could also be bug introduced by programming change.
26052 # Perl silently accepts a 032 (^Z) and takes it as the end
26053 if ( !$is_valid_token_type{$type} ) {
26054 my $val = ord($type);
26056 "unexpected character decimal $val ($type) in script\n");
26057 $tokenizer_self->{_in_error} = 1;
26060 # ----------------------------------------------------------------
26061 # TOKEN TYPE PATCHES
26062 # output __END__, __DATA__, and format as type 'k' instead of ';'
26063 # to make html colors correct, etc.
26064 my $fix_type = $type;
26065 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
26067 # output anonymous 'sub' as keyword
26068 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
26070 # -----------------------------------------------------------------
26072 $nesting_token_string_i = $nesting_token_string;
26073 $nesting_type_string_i = $nesting_type_string;
26074 $nesting_block_string_i = $nesting_block_string;
26075 $nesting_list_string_i = $nesting_list_string;
26077 # set primary indentation levels based on structural braces
26078 # Note: these are set so that the leading braces have a HIGHER
26079 # level than their CONTENTS, which is convenient for indentation
26080 # Also, define continuation indentation for each token.
26081 if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
26084 # use environment before updating
26085 $container_environment =
26086 $nesting_block_flag ? 'BLOCK'
26087 : $nesting_list_flag ? 'LIST'
26090 # if the difference between total nesting levels is not 1,
26091 # there are intervening non-structural nesting types between
26092 # this '{' and the previous unclosed '{'
26093 my $intervening_secondary_structure = 0;
26094 if ( @{$rslevel_stack} ) {
26095 $intervening_secondary_structure =
26096 $slevel_in_tokenizer - $rslevel_stack->[-1];
26099 # Continuation Indentation
26101 # Having tried setting continuation indentation both in the formatter and
26102 # in the tokenizer, I can say that setting it in the tokenizer is much,
26103 # much easier. The formatter already has too much to do, and can't
26104 # make decisions on line breaks without knowing what 'ci' will be at
26105 # arbitrary locations.
26107 # But a problem with setting the continuation indentation (ci) here
26108 # in the tokenizer is that we do not know where line breaks will actually
26109 # be. As a result, we don't know if we should propagate continuation
26110 # indentation to higher levels of structure.
26112 # For nesting of only structural indentation, we never need to do this.
26113 # For example, in a long if statement, like this
26115 # if ( !$output_block_type[$i]
26116 # && ($in_statement_continuation) )
26121 # the second line has ci but we do normally give the lines within the BLOCK
26122 # any ci. This would be true if we had blocks nested arbitrarily deeply.
26124 # But consider something like this, where we have created a break after
26125 # an opening paren on line 1, and the paren is not (currently) a
26126 # structural indentation token:
26128 # my $file = $menubar->Menubutton(
26129 # qw/-text File -underline 0 -menuitems/ => [
26131 # Cascade => '~View',
26135 # The second line has ci, so it would seem reasonable to propagate it
26136 # down, giving the third line 1 ci + 1 indentation. This suggests the
26137 # following rule, which is currently used to propagating ci down: if there
26138 # are any non-structural opening parens (or brackets, or braces), before
26139 # an opening structural brace, then ci is propagated down, and otherwise
26140 # not. The variable $intervening_secondary_structure contains this
26141 # information for the current token, and the string
26142 # "$ci_string_in_tokenizer" is a stack of previous values of this
26145 # save the current states
26146 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
26147 $level_in_tokenizer++;
26149 if ($forced_indentation_flag) {
26151 # break BEFORE '?' when there is forced indentation
26152 if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
26153 if ( $type eq 'k' ) {
26154 $indented_if_level = $level_in_tokenizer;
26157 # do not change container environment here if we are not
26158 # at a real list. Adding this check prevents "blinkers"
26159 # often near 'unless" clauses, such as in the following
26164 ## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
26167 $nesting_block_string .= "$nesting_block_flag";
26171 if ( $routput_block_type->[$i] ) {
26172 $nesting_block_flag = 1;
26173 $nesting_block_string .= '1';
26176 $nesting_block_flag = 0;
26177 $nesting_block_string .= '0';
26181 # we will use continuation indentation within containers
26182 # which are not blocks and not logical expressions
26184 if ( !$routput_block_type->[$i] ) {
26186 # propagate flag down at nested open parens
26187 if ( $routput_container_type->[$i] eq '(' ) {
26188 $bit = 1 if $nesting_list_flag;
26191 # use list continuation if not a logical grouping
26192 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
26196 $is_logical_container{ $routput_container_type->[$i]
26200 $nesting_list_string .= $bit;
26201 $nesting_list_flag = $bit;
26203 $ci_string_in_tokenizer .=
26204 ( $intervening_secondary_structure != 0 ) ? '1' : '0';
26205 $ci_string_sum = ones_count($ci_string_in_tokenizer);
26206 $continuation_string_in_tokenizer .=
26207 ( $in_statement_continuation > 0 ) ? '1' : '0';
26209 # Sometimes we want to give an opening brace continuation indentation,
26210 # and sometimes not. For code blocks, we don't do it, so that the leading
26211 # '{' gets outdented, like this:
26213 # if ( !$output_block_type[$i]
26214 # && ($in_statement_continuation) )
26217 # For other types, we will give them continuation indentation. For example,
26218 # here is how a list looks with the opening paren indented:
26221 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
26222 # [ "homer", "marge", "bart" ], );
26224 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
26226 my $total_ci = $ci_string_sum;
26228 !$routput_block_type->[$i] # patch: skip for BLOCK
26229 && ($in_statement_continuation)
26230 && !( $forced_indentation_flag && $type eq ':' )
26233 $total_ci += $in_statement_continuation
26234 unless ( $ci_string_in_tokenizer =~ /1$/ );
26237 $ci_string_i = $total_ci;
26238 $in_statement_continuation = 0;
26241 elsif ($type eq '}'
26243 || $forced_indentation_flag < 0 )
26246 # only a nesting error in the script would prevent popping here
26247 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
26249 $level_i = --$level_in_tokenizer;
26251 # restore previous level values
26252 if ( length($nesting_block_string) > 1 )
26253 { # true for valid script
26254 chop $nesting_block_string;
26255 $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
26256 chop $nesting_list_string;
26257 $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
26259 chop $ci_string_in_tokenizer;
26260 $ci_string_sum = ones_count($ci_string_in_tokenizer);
26262 $in_statement_continuation =
26263 chop $continuation_string_in_tokenizer;
26265 # zero continuation flag at terminal BLOCK '}' which
26266 # ends a statement.
26267 if ( $routput_block_type->[$i] ) {
26269 # ...These include non-anonymous subs
26270 # note: could be sub ::abc { or sub 'abc
26271 if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
26273 # note: older versions of perl require the /gc modifier
26274 # here or else the \G does not work.
26275 if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
26277 $in_statement_continuation = 0;
26281 # ...and include all block types except user subs with
26282 # block prototypes and these: (sort|grep|map|do|eval)
26283 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
26285 $is_zero_continuation_block_type{
26286 $routput_block_type->[$i]
26289 $in_statement_continuation = 0;
26292 # ..but these are not terminal types:
26293 # /^(sort|grep|map|do|eval)$/ )
26295 $is_not_zero_continuation_block_type{
26296 $routput_block_type->[$i]
26301 # ..and a block introduced by a label
26302 # /^\w+\s*:$/gc ) {
26303 elsif ( $routput_block_type->[$i] =~ /:$/ ) {
26304 $in_statement_continuation = 0;
26307 # user function with block prototype
26309 $in_statement_continuation = 0;
26313 # If we are in a list, then
26314 # we must set continuation indentation at the closing
26315 # paren of something like this (paren after $check):
26318 # ( not defined $check )
26320 # or $check eq "new"
26321 # or $check eq "old",
26323 elsif ( $tok eq ')' ) {
26324 $in_statement_continuation = 1
26325 if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
26328 elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
26331 # use environment after updating
26332 $container_environment =
26333 $nesting_block_flag ? 'BLOCK'
26334 : $nesting_list_flag ? 'LIST'
26336 $ci_string_i = $ci_string_sum + $in_statement_continuation;
26337 $nesting_block_string_i = $nesting_block_string;
26338 $nesting_list_string_i = $nesting_list_string;
26341 # not a structural indentation type..
26344 $container_environment =
26345 $nesting_block_flag ? 'BLOCK'
26346 : $nesting_list_flag ? 'LIST'
26349 # zero the continuation indentation at certain tokens so
26350 # that they will be at the same level as its container. For
26351 # commas, this simplifies the -lp indentation logic, which
26352 # counts commas. For ?: it makes them stand out.
26353 if ($nesting_list_flag) {
26354 if ( $type =~ /^[,\?\:]$/ ) {
26355 $in_statement_continuation = 0;
26359 # be sure binary operators get continuation indentation
26361 $container_environment
26362 && ( $type eq 'k' && $is_binary_keyword{$tok}
26363 || $is_binary_type{$type} )
26366 $in_statement_continuation = 1;
26369 # continuation indentation is sum of any open ci from previous
26370 # levels plus the current level
26371 $ci_string_i = $ci_string_sum + $in_statement_continuation;
26373 # update continuation flag ...
26374 # if this isn't a blank or comment..
26375 if ( $type ne 'b' && $type ne '#' ) {
26377 # and we are in a BLOCK
26378 if ($nesting_block_flag) {
26380 # the next token after a ';' and label starts a new stmt
26381 if ( $type eq ';' || $type eq 'J' ) {
26382 $in_statement_continuation = 0;
26385 # otherwise, we are continuing the current statement
26387 $in_statement_continuation = 1;
26391 # if we are not in a BLOCK..
26394 # do not use continuation indentation if not list
26395 # environment (could be within if/elsif clause)
26396 if ( !$nesting_list_flag ) {
26397 $in_statement_continuation = 0;
26400 # otherwise, the next token after a ',' starts a new term
26401 elsif ( $type eq ',' ) {
26402 $in_statement_continuation = 0;
26405 # otherwise, we are continuing the current term
26407 $in_statement_continuation = 1;
26413 if ( $level_in_tokenizer < 0 ) {
26414 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
26415 $tokenizer_self->{_saw_negative_indentation} = 1;
26416 warning("Starting negative indentation\n");
26420 # set secondary nesting levels based on all containment token types
26421 # Note: these are set so that the nesting depth is the depth
26422 # of the PREVIOUS TOKEN, which is convenient for setting
26423 # the strength of token bonds
26424 my $slevel_i = $slevel_in_tokenizer;
26427 if ( $is_opening_type{$type} ) {
26428 $slevel_in_tokenizer++;
26429 $nesting_token_string .= $tok;
26430 $nesting_type_string .= $type;
26434 elsif ( $is_closing_type{$type} ) {
26435 $slevel_in_tokenizer--;
26436 my $char = chop $nesting_token_string;
26438 if ( $char ne $matching_start_token{$tok} ) {
26439 $nesting_token_string .= $char . $tok;
26440 $nesting_type_string .= $type;
26443 chop $nesting_type_string;
26447 push( @block_type, $routput_block_type->[$i] );
26448 push( @ci_string, $ci_string_i );
26449 push( @container_environment, $container_environment );
26450 push( @container_type, $routput_container_type->[$i] );
26451 push( @levels, $level_i );
26452 push( @nesting_tokens, $nesting_token_string_i );
26453 push( @nesting_types, $nesting_type_string_i );
26454 push( @slevels, $slevel_i );
26455 push( @token_type, $fix_type );
26456 push( @type_sequence, $routput_type_sequence->[$i] );
26457 push( @nesting_blocks, $nesting_block_string );
26458 push( @nesting_lists, $nesting_list_string );
26460 # now form the previous token
26463 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters
26467 substr( $input_line, $$rtoken_map[$im], $num ) );
26473 $num = length($input_line) - $$rtoken_map[$im]; # make the last token
26475 push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
26478 $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
26479 $tokenizer_self->{_in_quote} = $in_quote;
26480 $tokenizer_self->{_quote_target} =
26481 $in_quote ? matching_end_token($quote_character) : "";
26482 $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
26484 $line_of_tokens->{_rtoken_type} = \@token_type;
26485 $line_of_tokens->{_rtokens} = \@tokens;
26486 $line_of_tokens->{_rblock_type} = \@block_type;
26487 $line_of_tokens->{_rcontainer_type} = \@container_type;
26488 $line_of_tokens->{_rcontainer_environment} = \@container_environment;
26489 $line_of_tokens->{_rtype_sequence} = \@type_sequence;
26490 $line_of_tokens->{_rlevels} = \@levels;
26491 $line_of_tokens->{_rslevels} = \@slevels;
26492 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
26493 $line_of_tokens->{_rci_levels} = \@ci_string;
26494 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
26498 } # end tokenize_this_line
26500 #########i#############################################################
26501 # Tokenizer routines which assist in identifying token types
26502 #######################################################################
26504 sub operator_expected {
26506 # Many perl symbols have two or more meanings. For example, '<<'
26507 # can be a shift operator or a here-doc operator. The
26508 # interpretation of these symbols depends on the current state of
26509 # the tokenizer, which may either be expecting a term or an
26510 # operator. For this example, a << would be a shift if an operator
26511 # is expected, and a here-doc if a term is expected. This routine
26512 # is called to make this decision for any current token. It returns
26513 # one of three possible values:
26515 # OPERATOR - operator expected (or at least, not a term)
26516 # UNKNOWN - can't tell
26517 # TERM - a term is expected (or at least, not an operator)
26519 # The decision is based on what has been seen so far. This
26520 # information is stored in the "$last_nonblank_type" and
26521 # "$last_nonblank_token" variables. For example, if the
26522 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
26523 # if $last_nonblank_type is 'n' (numeric), we are expecting an
26526 # If a UNKNOWN is returned, the calling routine must guess. A major
26527 # goal of this tokenizer is to minimize the possibility of returning
26528 # UNKNOWN, because a wrong guess can spoil the formatting of a
26531 # adding NEW_TOKENS: it is critically important that this routine be
26532 # updated to allow it to determine if an operator or term is to be
26533 # expected after the new token. Doing this simply involves adding
26534 # the new token character to one of the regexes in this routine or
26535 # to one of the hash lists
26536 # that it uses, which are initialized in the BEGIN section.
26537 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
26540 my ( $prev_type, $tok, $next_type ) = @_;
26542 my $op_expected = UNKNOWN;
26544 ##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
26546 # Note: function prototype is available for token type 'U' for future
26547 # program development. It contains the leading and trailing parens,
26548 # and no blanks. It might be used to eliminate token type 'C', for
26549 # example (prototype = '()'). Thus:
26550 # if ($last_nonblank_type eq 'U') {
26551 # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
26554 # A possible filehandle (or object) requires some care...
26555 if ( $last_nonblank_type eq 'Z' ) {
26558 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
26559 $op_expected = UNKNOWN;
26562 # For possible file handle like "$a", Perl uses weird parsing rules.
26564 # print $a/2,"/hi"; - division
26565 # print $a / 2,"/hi"; - division
26566 # print $a/ 2,"/hi"; - division
26567 # print $a /2,"/hi"; - pattern (and error)!
26568 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
26569 $op_expected = TERM;
26572 # Note when an operation is being done where a
26573 # filehandle might be expected, since a change in whitespace
26574 # could change the interpretation of the statement.
26576 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
26577 complain("operator in print statement not recommended\n");
26578 $op_expected = OPERATOR;
26583 # Check for smartmatch operator before preceding brace or square bracket.
26584 # For example, at the ? after the ] in the following expressions we are
26585 # expecting an operator:
26587 # qr/3/ ~~ ['1234'] ? 1 : 0;
26588 # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
26589 elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) {
26590 $op_expected = OPERATOR;
26593 # handle something after 'do' and 'eval'
26594 elsif ( $is_block_operator{$last_nonblank_token} ) {
26596 # something like $a = eval "expression";
26598 if ( $last_nonblank_type eq 'k' ) {
26599 $op_expected = TERM; # expression or list mode following keyword
26602 # something like $a = do { BLOCK } / 2;
26603 # or this ? after a smartmatch anonynmous hash or array reference:
26604 # qr/3/ ~~ ['1234'] ? 1 : 0;
26607 $op_expected = OPERATOR; # block mode following }
26611 # handle bare word..
26612 elsif ( $last_nonblank_type eq 'w' ) {
26614 # unfortunately, we can't tell what type of token to expect next
26615 # after most bare words
26616 $op_expected = UNKNOWN;
26619 # operator, but not term possible after these types
26620 # Note: moved ')' from type to token because parens in list context
26621 # get marked as '{' '}' now. This is a minor glitch in the following:
26622 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
26624 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
26625 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
26627 $op_expected = OPERATOR;
26629 # in a 'use' statement, numbers and v-strings are not true
26630 # numbers, so to avoid incorrect error messages, we will
26631 # mark them as unknown for now (use.t)
26632 # TODO: it would be much nicer to create a new token V for VERSION
26633 # number in a use statement. Then this could be a check on type V
26634 # and related patches which change $statement_type for '=>'
26635 # and ',' could be removed. Further, it would clean things up to
26636 # scan the 'use' statement with a separate subroutine.
26637 if ( ( $statement_type eq 'use' )
26638 && ( $last_nonblank_type =~ /^[nv]$/ ) )
26640 $op_expected = UNKNOWN;
26643 # expecting VERSION or {} after package NAMESPACE
26644 elsif ($statement_type =~ /^package\b/
26645 && $last_nonblank_token =~ /^package\b/ )
26647 $op_expected = TERM;
26651 # no operator after many keywords, such as "die", "warn", etc
26652 elsif ( $expecting_term_token{$last_nonblank_token} ) {
26654 # patch for dor.t (defined or).
26655 # perl functions which may be unary operators
26656 # TODO: This list is incomplete, and these should be put
26659 && $next_type eq '/'
26660 && $last_nonblank_type eq 'k'
26661 && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
26663 $op_expected = OPERATOR;
26666 $op_expected = TERM;
26670 # no operator after things like + - ** (i.e., other operators)
26671 elsif ( $expecting_term_types{$last_nonblank_type} ) {
26672 $op_expected = TERM;
26675 # a few operators, like "time", have an empty prototype () and so
26676 # take no parameters but produce a value to operate on
26677 elsif ( $expecting_operator_token{$last_nonblank_token} ) {
26678 $op_expected = OPERATOR;
26681 # post-increment and decrement produce values to be operated on
26682 elsif ( $expecting_operator_types{$last_nonblank_type} ) {
26683 $op_expected = OPERATOR;
26686 # no value to operate on after sub block
26687 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
26689 # a right brace here indicates the end of a simple block.
26690 # all non-structural right braces have type 'R'
26691 # all braces associated with block operator keywords have been given those
26692 # keywords as "last_nonblank_token" and caught above.
26693 # (This statement is order dependent, and must come after checking
26694 # $last_nonblank_token).
26695 elsif ( $last_nonblank_type eq '}' ) {
26697 # patch for dor.t (defined or).
26699 && $next_type eq '/'
26700 && $last_nonblank_token eq ']' )
26702 $op_expected = OPERATOR;
26705 $op_expected = TERM;
26709 # something else..what did I forget?
26712 # collecting diagnostics on unknown operator types..see what was missed
26713 $op_expected = UNKNOWN;
26715 "OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
26719 TOKENIZER_DEBUG_FLAG_EXPECT && do {
26721 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
26723 return $op_expected;
26726 sub new_statement_ok {
26728 # return true if the current token can start a new statement
26729 # USES GLOBAL VARIABLES: $last_nonblank_type
26731 return label_ok() # a label would be ok here
26733 || $last_nonblank_type eq 'J'; # or we follow a label
26739 # Decide if a bare word followed by a colon here is a label
26740 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
26741 # $brace_depth, @brace_type
26743 # if it follows an opening or closing code block curly brace..
26744 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
26745 && $last_nonblank_type eq $last_nonblank_token )
26748 # it is a label if and only if the curly encloses a code block
26749 return $brace_type[$brace_depth];
26752 # otherwise, it is a label if and only if it follows a ';' (real or fake)
26755 return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
26759 sub code_block_type {
26761 # Decide if this is a block of code, and its type.
26762 # Must be called only when $type = $token = '{'
26763 # The problem is to distinguish between the start of a block of code
26764 # and the start of an anonymous hash reference
26765 # Returns "" if not code block, otherwise returns 'last_nonblank_token'
26766 # to indicate the type of code block. (For example, 'last_nonblank_token'
26767 # might be 'if' for an if block, 'else' for an else block, etc).
26768 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
26769 # $last_nonblank_block_type, $brace_depth, @brace_type
26771 # handle case of multiple '{'s
26773 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
26775 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
26776 if ( $last_nonblank_token eq '{'
26777 && $last_nonblank_type eq $last_nonblank_token )
26780 # opening brace where a statement may appear is probably
26781 # a code block but might be and anonymous hash reference
26782 if ( $brace_type[$brace_depth] ) {
26783 return decide_if_code_block( $i, $rtokens, $rtoken_type,
26784 $max_token_index );
26787 # cannot start a code block within an anonymous hash
26793 elsif ( $last_nonblank_token eq ';' ) {
26795 # an opening brace where a statement may appear is probably
26796 # a code block but might be and anonymous hash reference
26797 return decide_if_code_block( $i, $rtokens, $rtoken_type,
26798 $max_token_index );
26801 # handle case of '}{'
26802 elsif ($last_nonblank_token eq '}'
26803 && $last_nonblank_type eq $last_nonblank_token )
26806 # a } { situation ...
26807 # could be hash reference after code block..(blktype1.t)
26808 if ($last_nonblank_block_type) {
26809 return decide_if_code_block( $i, $rtokens, $rtoken_type,
26810 $max_token_index );
26813 # must be a block if it follows a closing hash reference
26815 return $last_nonblank_token;
26819 # NOTE: braces after type characters start code blocks, but for
26820 # simplicity these are not identified as such. See also
26821 # sub is_non_structural_brace.
26822 # elsif ( $last_nonblank_type eq 't' ) {
26823 # return $last_nonblank_token;
26826 # brace after label:
26827 elsif ( $last_nonblank_type eq 'J' ) {
26828 return $last_nonblank_token;
26831 # otherwise, look at previous token. This must be a code block if
26832 # it follows any of these:
26833 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
26834 elsif ( $is_code_block_token{$last_nonblank_token} ) {
26836 # Bug Patch: Note that the opening brace after the 'if' in the following
26837 # snippet is an anonymous hash ref and not a code block!
26838 # print 'hi' if { x => 1, }->{x};
26839 # We can identify this situation because the last nonblank type
26840 # will be a keyword (instead of a closing peren)
26841 if ( $last_nonblank_token =~ /^(if|unless)$/
26842 && $last_nonblank_type eq 'k' )
26847 return $last_nonblank_token;
26851 # or a sub or package BLOCK
26852 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
26853 && $last_nonblank_token =~ /^(sub|package)\b/ )
26855 return $last_nonblank_token;
26858 elsif ( $statement_type =~ /^(sub|package)\b/ ) {
26859 return $statement_type;
26862 # user-defined subs with block parameters (like grep/map/eval)
26863 elsif ( $last_nonblank_type eq 'G' ) {
26864 return $last_nonblank_token;
26868 elsif ( $last_nonblank_type eq 'w' ) {
26869 return decide_if_code_block( $i, $rtokens, $rtoken_type,
26870 $max_token_index );
26873 # anything else must be anonymous hash reference
26879 sub decide_if_code_block {
26881 # USES GLOBAL VARIABLES: $last_nonblank_token
26882 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
26883 my ( $next_nonblank_token, $i_next ) =
26884 find_next_nonblank_token( $i, $rtokens, $max_token_index );
26886 # we are at a '{' where a statement may appear.
26887 # We must decide if this brace starts an anonymous hash or a code
26889 # return "" if anonymous hash, and $last_nonblank_token otherwise
26891 # initialize to be code BLOCK
26892 my $code_block_type = $last_nonblank_token;
26894 # Check for the common case of an empty anonymous hash reference:
26895 # Maybe something like sub { { } }
26896 if ( $next_nonblank_token eq '}' ) {
26897 $code_block_type = "";
26902 # To guess if this '{' is an anonymous hash reference, look ahead
26903 # and test as follows:
26905 # it is a hash reference if next come:
26906 # - a string or digit followed by a comma or =>
26907 # - bareword followed by =>
26908 # otherwise it is a code block
26910 # Examples of anonymous hash ref:
26914 # Examples of code blocks:
26915 # {1; print "hello\n", 1;}
26918 # We are only going to look ahead one more (nonblank/comment) line.
26919 # Strange formatting could cause a bad guess, but that's unlikely.
26920 my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
26921 my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
26922 my ( $rpre_tokens, $rpre_types ) =
26923 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
26924 # generous, and prevents
26926 # time in mangled files
26927 if ( defined($rpre_types) && @$rpre_types ) {
26928 push @pre_types, @$rpre_types;
26929 push @pre_tokens, @$rpre_tokens;
26932 # put a sentinel token to simplify stopping the search
26933 push @pre_types, '}';
26936 $jbeg = 1 if $pre_types[0] eq 'b';
26938 # first look for one of these
26940 # - bareword with leading -
26944 if ( $pre_types[$j] =~ /^[\'\"]/ ) {
26946 # find the closing quote; don't worry about escapes
26947 my $quote_mark = $pre_types[$j];
26948 for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
26949 if ( $pre_types[$k] eq $quote_mark ) {
26951 my $next = $pre_types[$j];
26956 elsif ( $pre_types[$j] eq 'd' ) {
26959 elsif ( $pre_types[$j] eq 'w' ) {
26960 unless ( $is_keyword{ $pre_tokens[$j] } ) {
26964 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
26967 if ( $j > $jbeg ) {
26969 $j++ if $pre_types[$j] eq 'b';
26971 # it's a hash ref if a comma or => follow next
26972 if ( $pre_types[$j] eq ','
26973 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
26975 $code_block_type = "";
26980 return $code_block_type;
26985 # report unexpected token type and show where it is
26986 # USES GLOBAL VARIABLES: $tokenizer_self
26987 my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
26988 $rpretoken_type, $input_line )
26991 if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
26992 my $msg = "found $found where $expecting expected";
26993 my $pos = $$rpretoken_map[$i_tok];
26994 interrupt_logfile();
26995 my $input_line_number = $tokenizer_self->{_last_line_number};
26996 my ( $offset, $numbered_line, $underline ) =
26997 make_numbered_line( $input_line_number, $input_line, $pos );
26998 $underline = write_on_underline( $underline, $pos - $offset, '^' );
27001 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
27002 my $pos_prev = $$rpretoken_map[$last_nonblank_i];
27004 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
27005 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
27008 $num = $pos - $pos_prev;
27010 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
27013 write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
27014 $trailer = " (previous token underlined)";
27016 warning( $numbered_line . "\n" );
27017 warning( $underline . "\n" );
27018 warning( $msg . $trailer . "\n" );
27023 sub is_non_structural_brace {
27025 # Decide if a brace or bracket is structural or non-structural
27026 # by looking at the previous token and type
27027 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
27029 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
27030 # Tentatively deactivated because it caused the wrong operator expectation
27032 # $user = @vars[1] / 100;
27033 # Must update sub operator_expected before re-implementing.
27034 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
27038 # NOTE: braces after type characters start code blocks, but for
27039 # simplicity these are not identified as such. See also
27040 # sub code_block_type
27041 # if ($last_nonblank_type eq 't') {return 0}
27043 # otherwise, it is non-structural if it is decorated
27044 # by type information.
27045 # For example, the '{' here is non-structural: ${xxx}
27047 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
27049 # or if we follow a hash or array closing curly brace or bracket
27050 # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
27051 # because the first '}' would have been given type 'R'
27052 || $last_nonblank_type =~ /^([R\]])$/
27056 #########i#############################################################
27057 # Tokenizer routines for tracking container nesting depths
27058 #######################################################################
27060 # The following routines keep track of nesting depths of the nesting
27061 # types, ( [ { and ?. This is necessary for determining the indentation
27062 # level, and also for debugging programs. Not only do they keep track of
27063 # nesting depths of the individual brace types, but they check that each
27064 # of the other brace types is balanced within matching pairs. For
27065 # example, if the program sees this sequence:
27069 # then it can determine that there is an extra left paren somewhere
27070 # between the { and the }. And so on with every other possible
27071 # combination of outer and inner brace types. For another
27076 # which has an extra ] within the parens.
27078 # The brace types have indexes 0 .. 3 which are indexes into
27081 # The pair ? : are treated as just another nesting type, with ? acting
27082 # as the opening brace and : acting as the closing brace.
27086 # $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
27088 # saves the nesting depth of brace type $b (where $b is either of the other
27089 # nesting types) when brace type $a enters a new depth. When this depth
27090 # decreases, a check is made that the current depth of brace types $b is
27091 # unchanged, or otherwise there must have been an error. This can
27092 # be very useful for localizing errors, particularly when perl runs to
27093 # the end of a large file (such as this one) and announces that there
27094 # is a problem somewhere.
27096 # A numerical sequence number is maintained for every nesting type,
27097 # so that each matching pair can be uniquely identified in a simple
27100 sub increase_nesting_depth {
27101 my ( $aa, $pos ) = @_;
27103 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
27104 # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
27107 $current_depth[$aa]++;
27109 $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
27110 my $input_line_number = $tokenizer_self->{_last_line_number};
27111 my $input_line = $tokenizer_self->{_line_text};
27113 # Sequence numbers increment by number of items. This keeps
27114 # a unique set of numbers but still allows the relative location
27115 # of any type to be determined.
27116 $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
27117 my $seqno = $nesting_sequence_number[$aa];
27118 $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
27120 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
27121 [ $input_line_number, $input_line, $pos ];
27123 for $bb ( 0 .. $#closing_brace_names ) {
27124 next if ( $bb == $aa );
27125 $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
27128 # set a flag for indenting a nested ternary statement
27130 if ( $aa == QUESTION_COLON ) {
27131 $nested_ternary_flag[ $current_depth[$aa] ] = 0;
27132 if ( $current_depth[$aa] > 1 ) {
27133 if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
27134 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
27135 if ( $pdepth == $total_depth - 1 ) {
27137 $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
27142 $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
27143 $statement_type = "";
27144 return ( $seqno, $indent );
27147 sub decrease_nesting_depth {
27149 my ( $aa, $pos ) = @_;
27151 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
27152 # @current_sequence_number, @depth_array, @starting_line_of_current_depth
27156 my $input_line_number = $tokenizer_self->{_last_line_number};
27157 my $input_line = $tokenizer_self->{_line_text};
27161 if ( $current_depth[$aa] > 0 ) {
27163 # set a flag for un-indenting after seeing a nested ternary statement
27164 $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
27165 if ( $aa == QUESTION_COLON ) {
27166 $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
27168 $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
27170 # check that any brace types $bb contained within are balanced
27171 for $bb ( 0 .. $#closing_brace_names ) {
27172 next if ( $bb == $aa );
27174 unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
27175 $current_depth[$bb] )
27178 $current_depth[$bb] -
27179 $depth_array[$aa][$bb][ $current_depth[$aa] ];
27181 # don't whine too many times
27182 my $saw_brace_error = get_saw_brace_error();
27184 $saw_brace_error <= MAX_NAG_MESSAGES
27186 # if too many closing types have occurred, we probably
27187 # already caught this error
27188 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
27191 interrupt_logfile();
27193 $starting_line_of_current_depth[$aa]
27194 [ $current_depth[$aa] ];
27196 my $rel = [ $input_line_number, $input_line, $pos ];
27200 if ( $diff == 1 || $diff == -1 ) {
27208 ? $opening_brace_names[$bb]
27209 : $closing_brace_names[$bb];
27210 write_error_indicator_pair( @$rsl, '^' );
27212 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
27217 $starting_line_of_current_depth[$bb]
27218 [ $current_depth[$bb] ];
27221 " The most recent un-matched $bname is on line $ml\n";
27222 write_error_indicator_pair( @$rml, '^' );
27224 write_error_indicator_pair( @$rel, '^' );
27228 increment_brace_error();
27231 $current_depth[$aa]--;
27235 my $saw_brace_error = get_saw_brace_error();
27236 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
27238 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
27240 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
27242 increment_brace_error();
27244 return ( $seqno, $outdent );
27247 sub check_final_nesting_depths {
27250 # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
27252 for $aa ( 0 .. $#closing_brace_names ) {
27254 if ( $current_depth[$aa] ) {
27256 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
27259 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
27260 The most recent un-matched $opening_brace_names[$aa] is on line $sl
27262 indicate_error( $msg, @$rsl, '^' );
27263 increment_brace_error();
27268 #########i#############################################################
27269 # Tokenizer routines for looking ahead in input stream
27270 #######################################################################
27272 sub peek_ahead_for_n_nonblank_pre_tokens {
27274 # returns next n pretokens if they exist
27275 # returns undef's if hits eof without seeing any pretokens
27276 # USES GLOBAL VARIABLES: $tokenizer_self
27277 my $max_pretokens = shift;
27280 my ( $rpre_tokens, $rmap, $rpre_types );
27282 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
27284 $line =~ s/^\s*//; # trim leading blanks
27285 next if ( length($line) <= 0 ); # skip blank
27286 next if ( $line =~ /^#/ ); # skip comment
27287 ( $rpre_tokens, $rmap, $rpre_types ) =
27288 pre_tokenize( $line, $max_pretokens );
27291 return ( $rpre_tokens, $rpre_types );
27294 # look ahead for next non-blank, non-comment line of code
27295 sub peek_ahead_for_nonblank_token {
27297 # USES GLOBAL VARIABLES: $tokenizer_self
27298 my ( $rtokens, $max_token_index ) = @_;
27302 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
27304 $line =~ s/^\s*//; # trim leading blanks
27305 next if ( length($line) <= 0 ); # skip blank
27306 next if ( $line =~ /^#/ ); # skip comment
27307 my ( $rtok, $rmap, $rtype ) =
27308 pre_tokenize( $line, 2 ); # only need 2 pre-tokens
27309 my $j = $max_token_index + 1;
27312 foreach $tok (@$rtok) {
27313 last if ( $tok =~ "\n" );
27314 $$rtokens[ ++$j ] = $tok;
27321 #########i#############################################################
27322 # Tokenizer guessing routines for ambiguous situations
27323 #######################################################################
27325 sub guess_if_pattern_or_conditional {
27327 # this routine is called when we have encountered a ? following an
27328 # unknown bareword, and we must decide if it starts a pattern or not
27329 # input parameters:
27330 # $i - token index of the ? starting possible pattern
27331 # output parameters:
27332 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
27333 # msg = a warning or diagnostic message
27334 # USES GLOBAL VARIABLES: $last_nonblank_token
27335 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27336 my $is_pattern = 0;
27337 my $msg = "guessing that ? after $last_nonblank_token starts a ";
27339 if ( $i >= $max_token_index ) {
27340 $msg .= "conditional (no end to pattern found on the line)\n";
27345 my $next_token = $$rtokens[$i]; # first token after ?
27347 # look for a possible ending ? on this line..
27349 my $quote_depth = 0;
27350 my $quote_character = '';
27354 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27357 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27358 $quote_pos, $quote_depth, $max_token_index );
27362 # we didn't find an ending ? on this line,
27363 # so we bias towards conditional
27365 $msg .= "conditional (no ending ? on this line)\n";
27367 # we found an ending ?, so we bias towards a pattern
27371 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
27373 $msg .= "pattern (found ending ? and pattern expected)\n";
27376 $msg .= "pattern (uncertain, but found ending ?)\n";
27380 return ( $is_pattern, $msg );
27383 sub guess_if_pattern_or_division {
27385 # this routine is called when we have encountered a / following an
27386 # unknown bareword, and we must decide if it starts a pattern or is a
27388 # input parameters:
27389 # $i - token index of the / starting possible pattern
27390 # output parameters:
27391 # $is_pattern = 0 if probably division, =1 if probably a pattern
27392 # msg = a warning or diagnostic message
27393 # USES GLOBAL VARIABLES: $last_nonblank_token
27394 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27395 my $is_pattern = 0;
27396 my $msg = "guessing that / after $last_nonblank_token starts a ";
27398 if ( $i >= $max_token_index ) {
27399 $msg .= "division (no end to pattern found on the line)\n";
27403 my $divide_expected =
27404 numerator_expected( $i, $rtokens, $max_token_index );
27406 my $next_token = $$rtokens[$i]; # first token after slash
27408 # look for a possible ending / on this line..
27410 my $quote_depth = 0;
27411 my $quote_character = '';
27415 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27418 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27419 $quote_pos, $quote_depth, $max_token_index );
27423 # we didn't find an ending / on this line,
27424 # so we bias towards division
27425 if ( $divide_expected >= 0 ) {
27427 $msg .= "division (no ending / on this line)\n";
27430 $msg = "multi-line pattern (division not possible)\n";
27436 # we found an ending /, so we bias towards a pattern
27439 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
27441 if ( $divide_expected >= 0 ) {
27443 if ( $i - $ibeg > 60 ) {
27444 $msg .= "division (matching / too distant)\n";
27448 $msg .= "pattern (but division possible too)\n";
27454 $msg .= "pattern (division not possible)\n";
27459 if ( $divide_expected >= 0 ) {
27461 $msg .= "division (pattern not possible)\n";
27466 "pattern (uncertain, but division would not work here)\n";
27471 return ( $is_pattern, $msg );
27474 # try to resolve here-doc vs. shift by looking ahead for
27475 # non-code or the end token (currently only looks for end token)
27476 # returns 1 if it is probably a here doc, 0 if not
27477 sub guess_if_here_doc {
27479 # This is how many lines we will search for a target as part of the
27480 # guessing strategy. It is a constant because there is probably
27481 # little reason to change it.
27482 # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
27484 use constant HERE_DOC_WINDOW => 40;
27486 my $next_token = shift;
27487 my $here_doc_expected = 0;
27490 my $msg = "checking <<";
27492 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
27496 if ( $line =~ /^$next_token$/ ) {
27497 $msg .= " -- found target $next_token ahead $k lines\n";
27498 $here_doc_expected = 1; # got it
27501 last if ( $k >= HERE_DOC_WINDOW );
27504 unless ($here_doc_expected) {
27506 if ( !defined($line) ) {
27507 $here_doc_expected = -1; # hit eof without seeing target
27508 $msg .= " -- must be shift; target $next_token not in file\n";
27511 else { # still unsure..taking a wild guess
27513 if ( !$is_constant{$current_package}{$next_token} ) {
27514 $here_doc_expected = 1;
27516 " -- guessing it's a here-doc ($next_token not a constant)\n";
27520 " -- guessing it's a shift ($next_token is a constant)\n";
27524 write_logfile_entry($msg);
27525 return $here_doc_expected;
27528 #########i#############################################################
27529 # Tokenizer Routines for scanning identifiers and related items
27530 #######################################################################
27532 sub scan_bare_identifier_do {
27534 # this routine is called to scan a token starting with an alphanumeric
27535 # variable or package separator, :: or '.
27536 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
27537 # $last_nonblank_type,@paren_type, $paren_depth
27539 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
27543 my $package = undef;
27547 # we have to back up one pretoken at a :: since each : is one pretoken
27548 if ( $tok eq '::' ) { $i_beg-- }
27549 if ( $tok eq '->' ) { $i_beg-- }
27550 my $pos_beg = $$rtoken_map[$i_beg];
27551 pos($input_line) = $pos_beg;
27558 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
27560 my $pos = pos($input_line);
27561 my $numc = $pos - $pos_beg;
27562 $tok = substr( $input_line, $pos_beg, $numc );
27564 # type 'w' includes anything without leading type info
27565 # ($,%,@,*) including something like abc::def::ghi
27569 if ( defined($2) ) { $sub_name = $2; }
27570 if ( defined($1) ) {
27573 # patch: don't allow isolated package name which just ends
27574 # in the old style package separator (single quote). Example:
27576 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
27580 $package =~ s/\'/::/g;
27581 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
27582 $package =~ s/::$//;
27585 $package = $current_package;
27587 if ( $is_keyword{$tok} ) {
27592 # if it is a bareword..
27593 if ( $type eq 'w' ) {
27595 # check for v-string with leading 'v' type character
27596 # (This seems to have precedence over filehandle, type 'Y')
27597 if ( $tok =~ /^v\d[_\d]*$/ ) {
27599 # we only have the first part - something like 'v101' -
27601 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
27602 $pos = pos($input_line);
27603 $numc = $pos - $pos_beg;
27604 $tok = substr( $input_line, $pos_beg, $numc );
27608 # warn if this version can't handle v-strings
27609 report_v_string($tok);
27612 elsif ( $is_constant{$package}{$sub_name} ) {
27616 # bareword after sort has implied empty prototype; for example:
27617 # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
27618 # This has priority over whatever the user has specified.
27619 elsif ($last_nonblank_token eq 'sort'
27620 && $last_nonblank_type eq 'k' )
27625 # Note: strangely, perl does not seem to really let you create
27626 # functions which act like eval and do, in the sense that eval
27627 # and do may have operators following the final }, but any operators
27628 # that you create with prototype (&) apparently do not allow
27629 # trailing operators, only terms. This seems strange.
27630 # If this ever changes, here is the update
27631 # to make perltidy behave accordingly:
27633 # elsif ( $is_block_function{$package}{$tok} ) {
27634 # $tok='eval'; # patch to do braces like eval - doesn't work
27637 # FIXME: This could become a separate type to allow for different
27639 elsif ( $is_block_function{$package}{$sub_name} ) {
27643 elsif ( $is_block_list_function{$package}{$sub_name} ) {
27646 elsif ( $is_user_function{$package}{$sub_name} ) {
27648 $prototype = $user_function_prototype{$package}{$sub_name};
27651 # check for indirect object
27654 # added 2001-03-27: must not be followed immediately by '('
27656 ( $input_line !~ m/\G\(/gc )
27661 # preceded by keyword like 'print', 'printf' and friends
27662 $is_indirect_object_taker{$last_nonblank_token}
27664 # or preceded by something like 'print(' or 'printf('
27666 ( $last_nonblank_token eq '(' )
27667 && $is_indirect_object_taker{ $paren_type[$paren_depth]
27675 # may not be indirect object unless followed by a space
27676 if ( $input_line =~ m/\G\s+/gc ) {
27680 # Perl's indirect object notation is a very bad
27681 # thing and can cause subtle bugs, especially for
27682 # beginning programmers. And I haven't even been
27683 # able to figure out a sane warning scheme which
27684 # doesn't get in the way of good scripts.
27686 # Complain if a filehandle has any lower case
27687 # letters. This is suggested good practice.
27688 # Use 'sub_name' because something like
27689 # main::MYHANDLE is ok for filehandle
27690 if ( $sub_name =~ /[a-z]/ ) {
27692 # could be bug caused by older perltidy if
27694 if ( $input_line =~ m/\G\s*\(/gc ) {
27696 "Caution: unknown word '$tok' in indirect object slot\n"
27702 # bareword not followed by a space -- may not be filehandle
27703 # (may be function call defined in a 'use' statement)
27710 # Now we must convert back from character position
27711 # to pre_token index.
27712 # I don't think an error flag can occur here ..but who knows
27715 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27717 warning("scan_bare_identifier: Possibly invalid tokenization\n");
27721 # no match but line not blank - could be syntax error
27722 # perl will take '::' alone without complaint
27726 # change this warning to log message if it becomes annoying
27727 warning("didn't find identifier after leading ::\n");
27729 return ( $i, $tok, $type, $prototype );
27734 # This is the new scanner and will eventually replace scan_identifier.
27735 # Only type 'sub' and 'package' are implemented.
27736 # Token types $ * % @ & -> are not yet implemented.
27738 # Scan identifier following a type token.
27739 # The type of call depends on $id_scan_state: $id_scan_state = ''
27740 # for starting call, in which case $tok must be the token defining
27743 # If the type token is the last nonblank token on the line, a value
27744 # of $id_scan_state = $tok is returned, indicating that further
27745 # calls must be made to get the identifier. If the type token is
27746 # not the last nonblank token on the line, the identifier is
27747 # scanned and handled and a value of '' is returned.
27748 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
27749 # $statement_type, $tokenizer_self
27751 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
27755 my ( $i_beg, $pos_beg );
27757 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
27758 #my ($a,$b,$c) = caller;
27759 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
27761 # on re-entry, start scanning at first token on the line
27762 if ($id_scan_state) {
27767 # on initial entry, start scanning just after type token
27770 $id_scan_state = $tok;
27774 # find $i_beg = index of next nonblank token,
27775 # and handle empty lines
27776 my $blank_line = 0;
27777 my $next_nonblank_token = $$rtokens[$i_beg];
27778 if ( $i_beg > $max_token_index ) {
27783 # only a '#' immediately after a '$' is not a comment
27784 if ( $next_nonblank_token eq '#' ) {
27785 unless ( $tok eq '$' ) {
27790 if ( $next_nonblank_token =~ /^\s/ ) {
27791 ( $next_nonblank_token, $i_beg ) =
27792 find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
27793 $max_token_index );
27794 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
27800 # handle non-blank line; identifier, if any, must follow
27801 unless ($blank_line) {
27803 if ( $id_scan_state eq 'sub' ) {
27804 ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
27805 $input_line, $i, $i_beg,
27806 $tok, $type, $rtokens,
27807 $rtoken_map, $id_scan_state, $max_token_index
27811 elsif ( $id_scan_state eq 'package' ) {
27812 ( $i, $tok, $type ) =
27813 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
27814 $rtoken_map, $max_token_index );
27815 $id_scan_state = '';
27819 warning("invalid token in scan_id: $tok\n");
27820 $id_scan_state = '';
27824 if ( $id_scan_state && ( !defined($type) || !$type ) ) {
27826 # shouldn't happen:
27828 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
27830 report_definite_bug();
27833 TOKENIZER_DEBUG_FLAG_NSCAN && do {
27835 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
27837 return ( $i, $tok, $type, $id_scan_state );
27840 sub check_prototype {
27841 my ( $proto, $package, $subname ) = @_;
27842 return unless ( defined($package) && defined($subname) );
27843 if ( defined($proto) ) {
27844 $proto =~ s/^\s*\(\s*//;
27845 $proto =~ s/\s*\)$//;
27847 $is_user_function{$package}{$subname} = 1;
27848 $user_function_prototype{$package}{$subname} = "($proto)";
27850 # prototypes containing '&' must be treated specially..
27851 if ( $proto =~ /\&/ ) {
27853 # right curly braces of prototypes ending in
27854 # '&' may be followed by an operator
27855 if ( $proto =~ /\&$/ ) {
27856 $is_block_function{$package}{$subname} = 1;
27859 # right curly braces of prototypes NOT ending in
27860 # '&' may NOT be followed by an operator
27861 elsif ( $proto !~ /\&$/ ) {
27862 $is_block_list_function{$package}{$subname} = 1;
27867 $is_constant{$package}{$subname} = 1;
27871 $is_user_function{$package}{$subname} = 1;
27875 sub do_scan_package {
27877 # do_scan_package parses a package name
27878 # it is called with $i_beg equal to the index of the first nonblank
27879 # token following a 'package' token.
27880 # USES GLOBAL VARIABLES: $current_package,
27882 # package NAMESPACE
27883 # package NAMESPACE VERSION
27884 # package NAMESPACE BLOCK
27885 # package NAMESPACE VERSION BLOCK
27887 # If VERSION is provided, package sets the $VERSION variable in the given
27888 # namespace to a version object with the VERSION provided. VERSION must be
27889 # a "strict" style version number as defined by the version module: a
27890 # positive decimal number (integer or decimal-fraction) without
27891 # exponentiation or else a dotted-decimal v-string with a leading 'v'
27892 # character and at least three components.
27893 # reference http://perldoc.perl.org/functions/package.html
27895 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
27898 my $package = undef;
27899 my $pos_beg = $$rtoken_map[$i_beg];
27900 pos($input_line) = $pos_beg;
27902 # handle non-blank line; package name, if any, must follow
27903 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
27905 $package = ( defined($1) && $1 ) ? $1 : 'main';
27906 $package =~ s/\'/::/g;
27907 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
27908 $package =~ s/::$//;
27909 my $pos = pos($input_line);
27910 my $numc = $pos - $pos_beg;
27911 $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
27914 # Now we must convert back from character position
27915 # to pre_token index.
27916 # I don't think an error flag can occur here ..but ?
27919 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27920 if ($error) { warning("Possibly invalid package\n") }
27921 $current_package = $package;
27923 # we should now have package NAMESPACE
27924 # now expecting VERSION, BLOCK, or ; to follow ...
27925 # package NAMESPACE VERSION
27926 # package NAMESPACE BLOCK
27927 # package NAMESPACE VERSION BLOCK
27928 my ( $next_nonblank_token, $i_next ) =
27929 find_next_nonblank_token( $i, $rtokens, $max_token_index );
27931 # check that something recognizable follows, but do not parse.
27932 # A VERSION number will be parsed later as a number or v-string in the
27933 # normal way. What is important is to set the statement type if
27934 # everything looks okay so that the operator_expected() routine
27935 # knows that the number is in a package statement.
27936 # Examples of valid primitive tokens that might follow are:
27937 # 1235 . ; { } v3 v
27938 if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) {
27939 $statement_type = $tok;
27943 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
27948 # no match but line not blank --
27949 # could be a label with name package, like package: , for example.
27954 return ( $i, $tok, $type );
27957 sub scan_identifier_do {
27959 # This routine assembles tokens into identifiers. It maintains a
27960 # scan state, id_scan_state. It updates id_scan_state based upon
27961 # current id_scan_state and token, and returns an updated
27962 # id_scan_state and the next index after the identifier.
27963 # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
27964 # $last_nonblank_type
27966 my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
27971 my $tok_begin = $$rtokens[$i_begin];
27972 if ( $tok_begin eq ':' ) { $tok_begin = '::' }
27973 my $id_scan_state_begin = $id_scan_state;
27974 my $identifier_begin = $identifier;
27975 my $tok = $tok_begin;
27978 # these flags will be used to help figure out the type:
27979 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
27982 # allow old package separator (') except in 'use' statement
27983 my $allow_tick = ( $last_nonblank_token ne 'use' );
27985 # get started by defining a type and a state if necessary
27986 unless ($id_scan_state) {
27987 $context = UNKNOWN_CONTEXT;
27989 # fixup for digraph
27990 if ( $tok eq '>' ) {
27994 $identifier = $tok;
27996 if ( $tok eq '$' || $tok eq '*' ) {
27997 $id_scan_state = '$';
27998 $context = SCALAR_CONTEXT;
28000 elsif ( $tok eq '%' || $tok eq '@' ) {
28001 $id_scan_state = '$';
28002 $context = LIST_CONTEXT;
28004 elsif ( $tok eq '&' ) {
28005 $id_scan_state = '&';
28007 elsif ( $tok eq 'sub' or $tok eq 'package' ) {
28008 $saw_alpha = 0; # 'sub' is considered type info here
28009 $id_scan_state = '$';
28010 $identifier .= ' '; # need a space to separate sub from sub name
28012 elsif ( $tok eq '::' ) {
28013 $id_scan_state = 'A';
28015 elsif ( $tok =~ /^[A-Za-z_]/ ) {
28016 $id_scan_state = ':';
28018 elsif ( $tok eq '->' ) {
28019 $id_scan_state = '$';
28024 my ( $a, $b, $c ) = caller;
28025 warning("Program Bug: scan_identifier given bad token = $tok \n");
28026 warning(" called from sub $a line: $c\n");
28027 report_definite_bug();
28029 $saw_type = !$saw_alpha;
28033 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
28036 # now loop to gather the identifier
28039 while ( $i < $max_token_index ) {
28040 $i_save = $i unless ( $tok =~ /^\s*$/ );
28041 $tok = $$rtokens[ ++$i ];
28043 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
28048 if ( $id_scan_state eq '$' ) { # starting variable name
28050 if ( $tok eq '$' ) {
28052 $identifier .= $tok;
28054 # we've got a punctuation variable if end of line (punct.t)
28055 if ( $i == $max_token_index ) {
28057 $id_scan_state = '';
28061 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
28063 $id_scan_state = ':'; # now need ::
28064 $identifier .= $tok;
28066 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
28068 $id_scan_state = ':'; # now need ::
28069 $identifier .= $tok;
28071 # Perl will accept leading digits in identifiers,
28072 # although they may not always produce useful results.
28073 # Something like $main::0 is ok. But this also works:
28075 # sub howdy::123::bubba{ print "bubba $54321!\n" }
28076 # howdy::123::bubba();
28079 elsif ( $tok =~ /^[0-9]/ ) { # numeric
28081 $id_scan_state = ':'; # now need ::
28082 $identifier .= $tok;
28084 elsif ( $tok eq '::' ) {
28085 $id_scan_state = 'A';
28086 $identifier .= $tok;
28088 elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array
28089 $identifier .= $tok; # keep same state, a $ could follow
28091 elsif ( $tok eq '{' ) {
28093 # check for something like ${#} or ${©}
28094 ##if ( $identifier eq '$'
28098 || $identifier eq '@'
28099 || $identifier eq '$#'
28101 && $i + 2 <= $max_token_index
28102 && $$rtokens[ $i + 2 ] eq '}'
28103 && $$rtokens[ $i + 1 ] !~ /[\s\w]/
28106 my $next2 = $$rtokens[ $i + 2 ];
28107 my $next1 = $$rtokens[ $i + 1 ];
28108 $identifier .= $tok . $next1 . $next2;
28110 $id_scan_state = '';
28114 # skip something like ${xxx} or ->{
28115 $id_scan_state = '';
28117 # if this is the first token of a line, any tokens for this
28118 # identifier have already been accumulated
28119 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
28124 # space ok after leading $ % * & @
28125 elsif ( $tok =~ /^\s*$/ ) {
28127 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
28129 if ( length($identifier) > 1 ) {
28130 $id_scan_state = '';
28132 $type = 'i'; # probably punctuation variable
28137 # spaces after $'s are common, and space after @
28138 # is harmless, so only complain about space
28139 # after other type characters. Space after $ and
28140 # @ will be removed in formatting. Report space
28141 # after % and * because they might indicate a
28142 # parsing error. In other words '% ' might be a
28143 # modulo operator. Delete this warning if it
28145 if ( $identifier !~ /^[\@\$]$/ ) {
28147 "Space in identifier, following $identifier\n";
28153 # space after '->' is ok
28155 elsif ( $tok eq '^' ) {
28157 # check for some special variables like $^W
28158 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
28159 $identifier .= $tok;
28160 $id_scan_state = 'A';
28162 # Perl accepts '$^]' or '@^]', but
28163 # there must not be a space before the ']'.
28164 my $next1 = $$rtokens[ $i + 1 ];
28165 if ( $next1 eq ']' ) {
28167 $identifier .= $next1;
28168 $id_scan_state = "";
28173 $id_scan_state = '';
28176 else { # something else
28178 # check for various punctuation variables
28179 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
28180 $identifier .= $tok;
28183 elsif ( $identifier eq '$#' ) {
28185 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
28187 # perl seems to allow just these: $#: $#- $#+
28188 elsif ( $tok =~ /^[\:\-\+]$/ ) {
28190 $identifier .= $tok;
28194 write_logfile_entry( 'Use of $# is deprecated' . "\n" );
28197 elsif ( $identifier eq '$$' ) {
28199 # perl does not allow references to punctuation
28200 # variables without braces. For example, this
28204 # You would have to use
28208 if ( $tok eq '{' ) { $type = 't' }
28209 else { $type = 'i' }
28211 elsif ( $identifier eq '->' ) {
28216 if ( length($identifier) == 1 ) { $identifier = ''; }
28218 $id_scan_state = '';
28222 elsif ( $id_scan_state eq '&' ) { # starting sub call?
28224 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric ..
28225 $id_scan_state = ':'; # now need ::
28227 $identifier .= $tok;
28229 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
28230 $id_scan_state = ':'; # now need ::
28232 $identifier .= $tok;
28234 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28235 $id_scan_state = ':'; # now need ::
28237 $identifier .= $tok;
28239 elsif ( $tok =~ /^\s*$/ ) { # allow space
28241 elsif ( $tok eq '::' ) { # leading ::
28242 $id_scan_state = 'A'; # accept alpha next
28243 $identifier .= $tok;
28245 elsif ( $tok eq '{' ) {
28246 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
28248 $id_scan_state = '';
28253 # punctuation variable?
28254 # testfile: cunningham4.pl
28256 # We have to be careful here. If we are in an unknown state,
28257 # we will reject the punctuation variable. In the following
28258 # example the '&' is a binary operator but we are in an unknown
28259 # state because there is no sigil on 'Prima', so we don't
28260 # know what it is. But it is a bad guess that
28261 # '&~' is a function variable.
28262 # $self->{text}->{colorMap}->[
28263 # Prima::PodView::COLOR_CODE_FOREGROUND
28264 # & ~tb::COLOR_INDEX ] =
28265 # $sec->{ColorCode}
28266 if ( $identifier eq '&' && $expecting ) {
28267 $identifier .= $tok;
28274 $id_scan_state = '';
28278 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)
28280 if ( $tok =~ /^[A-Za-z_]/ ) { # found it
28281 $identifier .= $tok;
28282 $id_scan_state = ':'; # now need ::
28285 elsif ( $tok eq "'" && $allow_tick ) {
28286 $identifier .= $tok;
28287 $id_scan_state = ':'; # now need ::
28290 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28291 $identifier .= $tok;
28292 $id_scan_state = ':'; # now need ::
28295 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
28296 $id_scan_state = '(';
28297 $identifier .= $tok;
28299 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
28300 $id_scan_state = ')';
28301 $identifier .= $tok;
28304 $id_scan_state = '';
28309 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha
28311 if ( $tok eq '::' ) { # got it
28312 $identifier .= $tok;
28313 $id_scan_state = 'A'; # now require alpha
28315 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here
28316 $identifier .= $tok;
28317 $id_scan_state = ':'; # now need ::
28320 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28321 $identifier .= $tok;
28322 $id_scan_state = ':'; # now need ::
28325 elsif ( $tok eq "'" && $allow_tick ) { # tick
28327 if ( $is_keyword{$identifier} ) {
28328 $id_scan_state = ''; # that's all
28332 $identifier .= $tok;
28335 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
28336 $id_scan_state = '(';
28337 $identifier .= $tok;
28339 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
28340 $id_scan_state = ')';
28341 $identifier .= $tok;
28344 $id_scan_state = ''; # that's all
28349 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype
28351 if ( $tok eq '(' ) { # got it
28352 $identifier .= $tok;
28353 $id_scan_state = ')'; # now find the end of it
28355 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
28356 $identifier .= $tok;
28359 $id_scan_state = ''; # that's all - no prototype
28364 elsif ( $id_scan_state eq ')' ) { # looking for ) to end
28366 if ( $tok eq ')' ) { # got it
28367 $identifier .= $tok;
28368 $id_scan_state = ''; # all done
28371 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
28372 $identifier .= $tok;
28374 else { # probable error in script, but keep going
28375 warning("Unexpected '$tok' while seeking end of prototype\n");
28376 $identifier .= $tok;
28379 else { # can get here due to error in initialization
28380 $id_scan_state = '';
28386 if ( $id_scan_state eq ')' ) {
28387 warning("Hit end of line while seeking ) to end prototype\n");
28390 # once we enter the actual identifier, it may not extend beyond
28391 # the end of the current line
28392 if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
28393 $id_scan_state = '';
28395 if ( $i < 0 ) { $i = 0 }
28402 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
28405 else { $type = 'i' }
28407 elsif ( $identifier eq '->' ) {
28411 ( length($identifier) > 1 )
28413 # In something like '@$=' we have an identifier '@$'
28414 # In something like '$${' we have type '$$' (and only
28415 # part of an identifier)
28416 && !( $identifier =~ /\$$/ && $tok eq '{' )
28417 && ( $identifier !~ /^(sub |package )$/ )
28422 else { $type = 't' }
28424 elsif ($saw_alpha) {
28426 # type 'w' includes anything without leading type info
28427 # ($,%,@,*) including something like abc::def::ghi
28432 } # this can happen on a restart
28436 $tok = $identifier;
28437 if ($message) { write_logfile_entry($message) }
28444 TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
28445 my ( $a, $b, $c ) = caller;
28447 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
28449 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
28451 return ( $i, $tok, $type, $id_scan_state, $identifier );
28456 # saved package and subnames in case prototype is on separate line
28457 my ( $package_saved, $subname_saved );
28461 # do_scan_sub parses a sub name and prototype
28462 # it is called with $i_beg equal to the index of the first nonblank
28463 # token following a 'sub' token.
28465 # TODO: add future error checks to be sure we have a valid
28466 # sub name. For example, 'sub &doit' is wrong. Also, be sure
28467 # a name is given if and only if a non-anonymous sub is
28469 # USES GLOBAL VARS: $current_package, $last_nonblank_token,
28470 # $in_attribute_list, %saw_function_definition,
28474 $input_line, $i, $i_beg,
28475 $tok, $type, $rtokens,
28476 $rtoken_map, $id_scan_state, $max_token_index
28478 $id_scan_state = ""; # normally we get everything in one call
28479 my $subname = undef;
28480 my $package = undef;
28485 my $pos_beg = $$rtoken_map[$i_beg];
28486 pos($input_line) = $pos_beg;
28488 # sub NAME PROTO ATTRS
28490 $input_line =~ m/\G\s*
28491 ((?:\w*(?:'|::))*) # package - something that ends in :: or '
28492 (\w+) # NAME - required
28493 (\s*\([^){]*\))? # PROTO - something in parens
28494 (\s*:)? # ATTRS - leading : of attribute list
28503 $package = ( defined($1) && $1 ) ? $1 : $current_package;
28504 $package =~ s/\'/::/g;
28505 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
28506 $package =~ s/::$//;
28507 my $pos = pos($input_line);
28508 my $numc = $pos - $pos_beg;
28509 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
28513 # Look for prototype/attributes not preceded on this line by subname;
28514 # This might be an anonymous sub with attributes,
28515 # or a prototype on a separate line from its sub name
28517 $input_line =~ m/\G(\s*\([^){]*\))? # PROTO
28518 (\s*:)? # ATTRS leading ':'
28527 # Handle prototype on separate line from subname
28528 if ($subname_saved) {
28529 $package = $package_saved;
28530 $subname = $subname_saved;
28531 $tok = $last_nonblank_token;
28538 # ATTRS: if there are attributes, back up and let the ':' be
28539 # found later by the scanner.
28540 my $pos = pos($input_line);
28542 $pos -= length($attrs);
28545 my $next_nonblank_token = $tok;
28547 # catch case of line with leading ATTR ':' after anonymous sub
28548 if ( $pos == $pos_beg && $tok eq ':' ) {
28550 $in_attribute_list = 1;
28553 # We must convert back from character position
28554 # to pre_token index.
28557 # I don't think an error flag can occur here ..but ?
28559 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
28560 $max_token_index );
28561 if ($error) { warning("Possibly invalid sub\n") }
28563 # check for multiple definitions of a sub
28564 ( $next_nonblank_token, my $i_next ) =
28565 find_next_nonblank_token_on_this_line( $i, $rtokens,
28566 $max_token_index );
28569 if ( $next_nonblank_token =~ /^(\s*|#)$/ )
28570 { # skip blank or side comment
28571 my ( $rpre_tokens, $rpre_types ) =
28572 peek_ahead_for_n_nonblank_pre_tokens(1);
28573 if ( defined($rpre_tokens) && @$rpre_tokens ) {
28574 $next_nonblank_token = $rpre_tokens->[0];
28577 $next_nonblank_token = '}';
28580 $package_saved = "";
28581 $subname_saved = "";
28582 if ( $next_nonblank_token eq '{' ) {
28585 # Check for multiple definitions of a sub, but
28586 # it is ok to have multiple sub BEGIN, etc,
28587 # so we do not complain if name is all caps
28588 if ( $saw_function_definition{$package}{$subname}
28589 && $subname !~ /^[A-Z]+$/ )
28591 my $lno = $saw_function_definition{$package}{$subname};
28593 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
28596 $saw_function_definition{$package}{$subname} =
28597 $tokenizer_self->{_last_line_number};
28600 elsif ( $next_nonblank_token eq ';' ) {
28602 elsif ( $next_nonblank_token eq '}' ) {
28605 # ATTRS - if an attribute list follows, remember the name
28606 # of the sub so the next opening brace can be labeled.
28607 # Setting 'statement_type' causes any ':'s to introduce
28609 elsif ( $next_nonblank_token eq ':' ) {
28610 $statement_type = $tok;
28613 # see if PROTO follows on another line:
28614 elsif ( $next_nonblank_token eq '(' ) {
28615 if ( $attrs || $proto ) {
28617 "unexpected '(' after definition or declaration of sub '$subname'\n"
28621 $id_scan_state = 'sub'; # we must come back to get proto
28622 $statement_type = $tok;
28623 $package_saved = $package;
28624 $subname_saved = $subname;
28627 elsif ($next_nonblank_token) { # EOF technically ok
28629 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
28632 check_prototype( $proto, $package, $subname );
28635 # no match but line not blank
28638 return ( $i, $tok, $type, $id_scan_state );
28642 #########i###############################################################
28643 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
28644 #########################################################################
28646 sub find_next_nonblank_token {
28647 my ( $i, $rtokens, $max_token_index ) = @_;
28649 if ( $i >= $max_token_index ) {
28650 if ( !peeked_ahead() ) {
28653 peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
28656 my $next_nonblank_token = $$rtokens[ ++$i ];
28658 if ( $next_nonblank_token =~ /^\s*$/ ) {
28659 $next_nonblank_token = $$rtokens[ ++$i ];
28661 return ( $next_nonblank_token, $i );
28664 sub numerator_expected {
28666 # this is a filter for a possible numerator, in support of guessing
28667 # for the / pattern delimiter token.
28672 # Note: I am using the convention that variables ending in
28673 # _expected have these 3 possible values.
28674 my ( $i, $rtokens, $max_token_index ) = @_;
28675 my $next_token = $$rtokens[ $i + 1 ];
28676 if ( $next_token eq '=' ) { $i++; } # handle /=
28677 my ( $next_nonblank_token, $i_next ) =
28678 find_next_nonblank_token( $i, $rtokens, $max_token_index );
28680 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
28685 if ( $next_nonblank_token =~ /^\s*$/ ) {
28694 sub pattern_expected {
28696 # This is the start of a filter for a possible pattern.
28697 # It looks at the token after a possible pattern and tries to
28698 # determine if that token could end a pattern.
28703 my ( $i, $rtokens, $max_token_index ) = @_;
28704 my $next_token = $$rtokens[ $i + 1 ];
28705 if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier
28706 my ( $next_nonblank_token, $i_next ) =
28707 find_next_nonblank_token( $i, $rtokens, $max_token_index );
28709 # list of tokens which may follow a pattern
28710 # (can probably be expanded)
28711 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
28717 if ( $next_nonblank_token =~ /^\s*$/ ) {
28726 sub find_next_nonblank_token_on_this_line {
28727 my ( $i, $rtokens, $max_token_index ) = @_;
28728 my $next_nonblank_token;
28730 if ( $i < $max_token_index ) {
28731 $next_nonblank_token = $$rtokens[ ++$i ];
28733 if ( $next_nonblank_token =~ /^\s*$/ ) {
28735 if ( $i < $max_token_index ) {
28736 $next_nonblank_token = $$rtokens[ ++$i ];
28741 $next_nonblank_token = "";
28743 return ( $next_nonblank_token, $i );
28746 sub find_angle_operator_termination {
28748 # We are looking at a '<' and want to know if it is an angle operator.
28749 # We are to return:
28750 # $i = pretoken index of ending '>' if found, current $i otherwise
28751 # $type = 'Q' if found, '>' otherwise
28752 my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
28755 pos($input_line) = 1 + $$rtoken_map[$i];
28759 # we just have to find the next '>' if a term is expected
28760 if ( $expecting == TERM ) { $filter = '[\>]' }
28762 # we have to guess if we don't know what is expected
28763 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
28765 # shouldn't happen - we shouldn't be here if operator is expected
28766 else { warning("Program Bug in find_angle_operator_termination\n") }
28768 # To illustrate what we might be looking at, in case we are
28769 # guessing, here are some examples of valid angle operators
28776 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
28777 # <${PREFIX}*img*.$IMAGE_TYPE>
28778 # <img*.$IMAGE_TYPE>
28779 # <Timg*.$IMAGE_TYPE>
28780 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
28782 # Here are some examples of lines which do not have angle operators:
28783 # return undef unless $self->[2]++ < $#{$self->[1]};
28786 # the following line from dlister.pl caused trouble:
28787 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
28789 # If the '<' starts an angle operator, it must end on this line and
28790 # it must not have certain characters like ';' and '=' in it. I use
28791 # this to limit the testing. This filter should be improved if
28794 if ( $input_line =~ /($filter)/g ) {
28798 # We MAY have found an angle operator termination if we get
28799 # here, but we need to do more to be sure we haven't been
28801 my $pos = pos($input_line);
28803 my $pos_beg = $$rtoken_map[$i];
28804 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
28806 # Reject if the closing '>' follows a '-' as in:
28807 # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
28808 if ( $expecting eq UNKNOWN ) {
28809 my $check = substr( $input_line, $pos - 2, 1 );
28810 if ( $check eq '-' ) {
28811 return ( $i, $type );
28815 ######################################debug#####
28816 #write_diagnostics( "ANGLE? :$str\n");
28817 #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
28818 ######################################debug#####
28822 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
28824 # It may be possible that a quote ends midway in a pretoken.
28825 # If this happens, it may be necessary to split the pretoken.
28828 "Possible tokinization error..please check this line\n");
28829 report_possible_bug();
28832 # Now let's see where we stand....
28833 # OK if math op not possible
28834 if ( $expecting == TERM ) {
28837 # OK if there are no more than 2 pre-tokens inside
28838 # (not possible to write 2 token math between < and >)
28839 # This catches most common cases
28840 elsif ( $i <= $i_beg + 3 ) {
28841 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
28847 # Let's try a Brace Test: any braces inside must balance
28849 while ( $str =~ /\{/g ) { $br++ }
28850 while ( $str =~ /\}/g ) { $br-- }
28852 while ( $str =~ /\[/g ) { $sb++ }
28853 while ( $str =~ /\]/g ) { $sb-- }
28855 while ( $str =~ /\(/g ) { $pr++ }
28856 while ( $str =~ /\)/g ) { $pr-- }
28858 # if braces do not balance - not angle operator
28859 if ( $br || $sb || $pr ) {
28863 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
28866 # we should keep doing more checks here...to be continued
28867 # Tentatively accepting this as a valid angle operator.
28868 # There are lots more things that can be checked.
28871 "ANGLE-Guessing yes: $str expecting=$expecting\n");
28872 write_logfile_entry("Guessing angle operator here: $str\n");
28877 # didn't find ending >
28879 if ( $expecting == TERM ) {
28880 warning("No ending > for angle operator\n");
28884 return ( $i, $type );
28887 sub scan_number_do {
28889 # scan a number in any of the formats that Perl accepts
28890 # Underbars (_) are allowed in decimal numbers.
28891 # input parameters -
28892 # $input_line - the string to scan
28893 # $i - pre_token index to start scanning
28894 # $rtoken_map - reference to the pre_token map giving starting
28895 # character position in $input_line of token $i
28896 # output parameters -
28897 # $i - last pre_token index of the number just scanned
28898 # number - the number (characters); or undef if not a number
28900 my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
28901 my $pos_beg = $$rtoken_map[$i];
28904 my $number = undef;
28905 my $type = $input_type;
28907 my $first_char = substr( $input_line, $pos_beg, 1 );
28909 # Look for bad starting characters; Shouldn't happen..
28910 if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
28911 warning("Program bug - scan_number given character $first_char\n");
28912 report_definite_bug();
28913 return ( $i, $type, $number );
28916 # handle v-string without leading 'v' character ('Two Dot' rule)
28918 # TODO: v-strings may contain underscores
28919 pos($input_line) = $pos_beg;
28920 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
28921 $pos = pos($input_line);
28922 my $numc = $pos - $pos_beg;
28923 $number = substr( $input_line, $pos_beg, $numc );
28925 report_v_string($number);
28928 # handle octal, hex, binary
28929 if ( !defined($number) ) {
28930 pos($input_line) = $pos_beg;
28931 if ( $input_line =~
28932 /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
28934 $pos = pos($input_line);
28935 my $numc = $pos - $pos_beg;
28936 $number = substr( $input_line, $pos_beg, $numc );
28942 if ( !defined($number) ) {
28943 pos($input_line) = $pos_beg;
28945 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
28946 $pos = pos($input_line);
28948 # watch out for things like 0..40 which would give 0. by this;
28949 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
28950 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
28954 my $numc = $pos - $pos_beg;
28955 $number = substr( $input_line, $pos_beg, $numc );
28960 # filter out non-numbers like e + - . e2 .e3 +e6
28961 # the rule: at least one digit, and any 'e' must be preceded by a digit
28963 $number !~ /\d/ # no digits
28964 || ( $number =~ /^(.*)[eE]/
28965 && $1 !~ /\d/ ) # or no digits before the 'e'
28969 $type = $input_type;
28970 return ( $i, $type, $number );
28973 # Found a number; now we must convert back from character position
28974 # to pre_token index. An error here implies user syntax error.
28975 # An example would be an invalid octal number like '009'.
28978 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
28979 if ($error) { warning("Possibly invalid number\n") }
28981 return ( $i, $type, $number );
28984 sub inverse_pretoken_map {
28986 # Starting with the current pre_token index $i, scan forward until
28987 # finding the index of the next pre_token whose position is $pos.
28988 my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
28991 while ( ++$i <= $max_token_index ) {
28993 if ( $pos <= $$rtoken_map[$i] ) {
28995 # Let the calling routine handle errors in which we do not
28996 # land on a pre-token boundary. It can happen by running
28997 # perltidy on some non-perl scripts, for example.
28998 if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
29003 return ( $i, $error );
29006 sub find_here_doc {
29008 # find the target of a here document, if any
29009 # input parameters:
29010 # $i - token index of the second < of <<
29011 # ($i must be less than the last token index if this is called)
29012 # output parameters:
29013 # $found_target = 0 didn't find target; =1 found target
29014 # HERE_TARGET - the target string (may be empty string)
29015 # $i - unchanged if not here doc,
29016 # or index of the last token of the here target
29017 # $saw_error - flag noting unbalanced quote on here target
29018 my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
29020 my $found_target = 0;
29021 my $here_doc_target = '';
29022 my $here_quote_character = '';
29024 my ( $next_nonblank_token, $i_next_nonblank, $next_token );
29025 $next_token = $$rtokens[ $i + 1 ];
29027 # perl allows a backslash before the target string (heredoc.t)
29029 if ( $next_token eq '\\' ) {
29031 $next_token = $$rtokens[ $i + 2 ];
29034 ( $next_nonblank_token, $i_next_nonblank ) =
29035 find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
29037 if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
29040 my $quote_depth = 0;
29045 $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
29048 = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
29049 $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
29051 if ($in_quote) { # didn't find end of quote, so no target found
29053 if ( $expecting == TERM ) {
29055 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
29060 else { # found ending quote
29065 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
29066 $tokj = $$rtokens[$j];
29068 # we have to remove any backslash before the quote character
29069 # so that the here-doc-target exactly matches this string
29073 && $$rtokens[ $j + 1 ] eq $here_quote_character );
29074 $here_doc_target .= $tokj;
29079 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
29081 write_logfile_entry(
29082 "found blank here-target after <<; suggest using \"\"\n");
29085 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
29087 my $here_doc_expected;
29088 if ( $expecting == UNKNOWN ) {
29089 $here_doc_expected = guess_if_here_doc($next_token);
29092 $here_doc_expected = 1;
29095 if ($here_doc_expected) {
29097 $here_doc_target = $next_token;
29104 if ( $expecting == TERM ) {
29106 write_logfile_entry("Note: bare here-doc operator <<\n");
29113 # patch to neglect any prepended backslash
29114 if ( $found_target && $backslash ) { $i++ }
29116 return ( $found_target, $here_doc_target, $here_quote_character, $i,
29122 # follow (or continue following) quoted string(s)
29123 # $in_quote return code:
29124 # 0 - ok, found end
29125 # 1 - still must find end of quote whose target is $quote_character
29126 # 2 - still looking for end of first of two quotes
29128 # Returns updated strings:
29129 # $quoted_string_1 = quoted string seen while in_quote=1
29130 # $quoted_string_2 = quoted string seen while in_quote=2
29132 $i, $in_quote, $quote_character,
29133 $quote_pos, $quote_depth, $quoted_string_1,
29134 $quoted_string_2, $rtokens, $rtoken_map,
29138 my $in_quote_starting = $in_quote;
29141 if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
29144 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29147 = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
29148 $quote_pos, $quote_depth, $max_token_index );
29149 $quoted_string_2 .= $quoted_string;
29150 if ( $in_quote == 1 ) {
29151 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
29152 $quote_character = '';
29155 $quoted_string_2 .= "\n";
29159 if ( $in_quote == 1 ) { # one (more) quote to follow
29162 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29165 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
29166 $quote_pos, $quote_depth, $max_token_index );
29167 $quoted_string_1 .= $quoted_string;
29168 if ( $in_quote == 1 ) {
29169 $quoted_string_1 .= "\n";
29172 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29173 $quoted_string_1, $quoted_string_2 );
29176 sub follow_quoted_string {
29178 # scan for a specific token, skipping escaped characters
29179 # if the quote character is blank, use the first non-blank character
29180 # input parameters:
29181 # $rtokens = reference to the array of tokens
29182 # $i = the token index of the first character to search
29183 # $in_quote = number of quoted strings being followed
29184 # $beginning_tok = the starting quote character
29185 # $quote_pos = index to check next for alphanumeric delimiter
29186 # output parameters:
29187 # $i = the token index of the ending quote character
29188 # $in_quote = decremented if found end, unchanged if not
29189 # $beginning_tok = the starting quote character
29190 # $quote_pos = index to check next for alphanumeric delimiter
29191 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
29192 # $quoted_string = the text of the quote (without quotation tokens)
29193 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
29196 my ( $tok, $end_tok );
29197 my $i = $i_beg - 1;
29198 my $quoted_string = "";
29200 TOKENIZER_DEBUG_FLAG_QUOTE && do {
29202 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
29205 # get the corresponding end token
29206 if ( $beginning_tok !~ /^\s*$/ ) {
29207 $end_tok = matching_end_token($beginning_tok);
29210 # a blank token means we must find and use the first non-blank one
29212 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
29214 while ( $i < $max_token_index ) {
29215 $tok = $$rtokens[ ++$i ];
29217 if ( $tok !~ /^\s*$/ ) {
29219 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
29220 $i = $max_token_index;
29224 if ( length($tok) > 1 ) {
29225 if ( $quote_pos <= 0 ) { $quote_pos = 1 }
29226 $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
29229 $beginning_tok = $tok;
29232 $end_tok = matching_end_token($beginning_tok);
29238 $allow_quote_comments = 1;
29243 # There are two different loops which search for the ending quote
29244 # character. In the rare case of an alphanumeric quote delimiter, we
29245 # have to look through alphanumeric tokens character-by-character, since
29246 # the pre-tokenization process combines multiple alphanumeric
29247 # characters, whereas for a non-alphanumeric delimiter, only tokens of
29248 # length 1 can match.
29250 ###################################################################
29251 # Case 1 (rare): loop for case of alphanumeric quote delimiter..
29252 # "quote_pos" is the position the current word to begin searching
29253 ###################################################################
29254 if ( $beginning_tok =~ /\w/ ) {
29256 # Note this because it is not recommended practice except
29257 # for obfuscated perl contests
29258 if ( $in_quote == 1 ) {
29259 write_logfile_entry(
29260 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
29263 while ( $i < $max_token_index ) {
29265 if ( $quote_pos == 0 || ( $i < 0 ) ) {
29266 $tok = $$rtokens[ ++$i ];
29268 if ( $tok eq '\\' ) {
29270 # retain backslash unless it hides the end token
29271 $quoted_string .= $tok
29272 unless $$rtokens[ $i + 1 ] eq $end_tok;
29274 last if ( $i >= $max_token_index );
29275 $tok = $$rtokens[ ++$i ];
29278 my $old_pos = $quote_pos;
29280 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
29284 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
29286 if ( $quote_pos > 0 ) {
29289 substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
29293 if ( $quote_depth == 0 ) {
29299 $quoted_string .= substr( $tok, $old_pos );
29304 ########################################################################
29305 # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
29306 ########################################################################
29309 while ( $i < $max_token_index ) {
29310 $tok = $$rtokens[ ++$i ];
29312 if ( $tok eq $end_tok ) {
29315 if ( $quote_depth == 0 ) {
29320 elsif ( $tok eq $beginning_tok ) {
29323 elsif ( $tok eq '\\' ) {
29325 # retain backslash unless it hides the beginning or end token
29326 $tok = $$rtokens[ ++$i ];
29327 $quoted_string .= '\\'
29328 unless ( $tok eq $end_tok || $tok eq $beginning_tok );
29330 $quoted_string .= $tok;
29333 if ( $i > $max_token_index ) { $i = $max_token_index }
29334 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
29338 sub indicate_error {
29339 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
29340 interrupt_logfile();
29342 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
29346 sub write_error_indicator_pair {
29347 my ( $line_number, $input_line, $pos, $carrat ) = @_;
29348 my ( $offset, $numbered_line, $underline ) =
29349 make_numbered_line( $line_number, $input_line, $pos );
29350 $underline = write_on_underline( $underline, $pos - $offset, $carrat );
29351 warning( $numbered_line . "\n" );
29352 $underline =~ s/\s*$//;
29353 warning( $underline . "\n" );
29356 sub make_numbered_line {
29358 # Given an input line, its line number, and a character position of
29359 # interest, create a string not longer than 80 characters of the form
29360 # $lineno: sub_string
29361 # such that the sub_string of $str contains the position of interest
29363 # Here is an example of what we want, in this case we add trailing
29364 # '...' because the line is long.
29366 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
29368 # Here is another example, this time in which we used leading '...'
29369 # because of excessive length:
29371 # 2: ... er of the World Wide Web Consortium's
29373 # input parameters are:
29374 # $lineno = line number
29375 # $str = the text of the line
29376 # $pos = position of interest (the error) : 0 = first character
29379 # - $offset = an offset which corrects the position in case we only
29380 # display part of a line, such that $pos-$offset is the effective
29381 # position from the start of the displayed line.
29382 # - $numbered_line = the numbered line as above,
29383 # - $underline = a blank 'underline' which is all spaces with the same
29384 # number of characters as the numbered line.
29386 my ( $lineno, $str, $pos ) = @_;
29387 my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
29388 my $excess = length($str) - $offset - 68;
29389 my $numc = ( $excess > 0 ) ? 68 : undef;
29391 if ( defined($numc) ) {
29392 if ( $offset == 0 ) {
29393 $str = substr( $str, $offset, $numc - 4 ) . " ...";
29396 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
29401 if ( $offset == 0 ) {
29404 $str = "... " . substr( $str, $offset + 4 );
29408 my $numbered_line = sprintf( "%d: ", $lineno );
29409 $offset -= length($numbered_line);
29410 $numbered_line .= $str;
29411 my $underline = " " x length($numbered_line);
29412 return ( $offset, $numbered_line, $underline );
29415 sub write_on_underline {
29417 # The "underline" is a string that shows where an error is; it starts
29418 # out as a string of blanks with the same length as the numbered line of
29419 # code above it, and we have to add marking to show where an error is.
29420 # In the example below, we want to write the string '--^' just below
29421 # the line of bad code:
29423 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
29425 # We are given the current underline string, plus a position and a
29426 # string to write on it.
29428 # In the above example, there will be 2 calls to do this:
29429 # First call: $pos=19, pos_chr=^
29430 # Second call: $pos=16, pos_chr=---
29432 # This is a trivial thing to do with substr, but there is some
29435 my ( $underline, $pos, $pos_chr ) = @_;
29437 # check for error..shouldn't happen
29438 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
29441 my $excess = length($pos_chr) + $pos - length($underline);
29442 if ( $excess > 0 ) {
29443 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
29445 substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
29446 return ($underline);
29451 # Break a string, $str, into a sequence of preliminary tokens. We
29452 # are interested in these types of tokens:
29453 # words (type='w'), example: 'max_tokens_wanted'
29454 # digits (type = 'd'), example: '0755'
29455 # whitespace (type = 'b'), example: ' '
29456 # any other single character (i.e. punct; type = the character itself).
29457 # We cannot do better than this yet because we might be in a quoted
29458 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
29460 my ( $str, $max_tokens_wanted ) = @_;
29462 # we return references to these 3 arrays:
29463 my @tokens = (); # array of the tokens themselves
29464 my @token_map = (0); # string position of start of each token
29465 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
29470 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
29473 # note that this must come before words!
29474 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
29477 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
29479 # single-character punctuation
29480 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
29484 return ( \@tokens, \@token_map, \@type );
29488 push @token_map, pos($str);
29490 } while ( --$max_tokens_wanted != 0 );
29492 return ( \@tokens, \@token_map, \@type );
29497 # this is an old debug routine
29498 my ( $rtokens, $rtoken_map ) = @_;
29499 my $num = scalar(@$rtokens);
29502 for ( $i = 0 ; $i < $num ; $i++ ) {
29503 my $len = length( $$rtokens[$i] );
29504 print STDOUT "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
29508 sub matching_end_token {
29510 # find closing character for a pattern
29511 my $beginning_token = shift;
29513 if ( $beginning_token eq '{' ) {
29516 elsif ( $beginning_token eq '[' ) {
29519 elsif ( $beginning_token eq '<' ) {
29522 elsif ( $beginning_token eq '(' ) {
29530 sub dump_token_types {
29534 # This should be the latest list of token types in use
29535 # adding NEW_TOKENS: add a comment here
29536 print $fh <<'END_OF_LIST';
29538 Here is a list of the token types currently used for lines of type 'CODE'.
29539 For the following tokens, the "type" of a token is just the token itself.
29541 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
29542 ( ) <= >= == =~ !~ != ++ -- /= x=
29543 ... **= <<= >>= &&= ||= //= <=>
29544 , + - / * | % ! x ~ = \ ? : . < > ^ &
29546 The following additional token types are defined:
29549 b blank (white space)
29550 { indent: opening structural curly brace or square bracket or paren
29551 (code block, anonymous hash reference, or anonymous array reference)
29552 } outdent: right structural curly brace or square bracket or paren
29553 [ left non-structural square bracket (enclosing an array index)
29554 ] right non-structural square bracket
29555 ( left non-structural paren (all but a list right of an =)
29556 ) right non-structural paren
29557 L left non-structural curly brace (enclosing a key)
29558 R right non-structural curly brace
29559 ; terminal semicolon
29560 f indicates a semicolon in a "for" statement
29561 h here_doc operator <<
29563 Q indicates a quote or pattern
29564 q indicates a qw quote block
29566 C user-defined constant or constant function (with void prototype = ())
29567 U user-defined function taking parameters
29568 G user-defined function taking block parameter (like grep/map/eval)
29569 M (unused, but reserved for subroutine definition name)
29570 P (unused, but -html uses it to label pod text)
29571 t type indicater such as %,$,@,*,&,sub
29572 w bare word (perhaps a subroutine call)
29573 i identifier of some type (with leading %, $, @, *, &, sub, -> )
29576 F a file test operator (like -e)
29578 Z identifier in indirect object slot: may be file handle, object
29579 J LABEL: code block label
29580 j LABEL after next, last, redo, goto
29583 pp pre-increment operator ++
29584 mm pre-decrement operator --
29585 A : used as attribute separator
29587 Here are the '_line_type' codes used internally:
29588 SYSTEM - system-specific code before hash-bang line
29589 CODE - line of perl code (including comments)
29590 POD_START - line starting pod, such as '=head'
29591 POD - pod documentation text
29592 POD_END - last line of pod section, '=cut'
29593 HERE - text of here-document
29594 HERE_END - last line of here-doc (target word)
29595 FORMAT - format section
29596 FORMAT_END - last line of format section, '.'
29597 DATA_START - __DATA__ line
29598 DATA - unidentified text following __DATA__
29599 END_START - __END__ line
29600 END - unidentified text following __END__
29601 ERROR - we are in big trouble, probably not a perl script
29607 # These names are used in error messages
29608 @opening_brace_names = qw# '{' '[' '(' '?' #;
29609 @closing_brace_names = qw# '}' ']' ')' ':' #;
29612 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
29613 <= >= == =~ !~ != ++ -- /= x= ~~
29615 @is_digraph{@digraphs} = (1) x scalar(@digraphs);
29617 my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
29618 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
29620 # make a hash of all valid token types for self-checking the tokenizer
29621 # (adding NEW_TOKENS : select a new character and add to this list)
29622 my @valid_token_types = qw#
29623 A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
29624 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
29626 push( @valid_token_types, @digraphs );
29627 push( @valid_token_types, @trigraphs );
29628 push( @valid_token_types, ( '#', ',', 'CORE::' ) );
29629 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
29631 # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
29632 my @file_test_operators =
29633 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
29634 @is_file_test_operator{@file_test_operators} =
29635 (1) x scalar(@file_test_operators);
29637 # these functions have prototypes of the form (&), so when they are
29638 # followed by a block, that block MAY BE followed by an operator.
29639 # Smartmatch operator ~~ may be followed by anonymous hash or array ref
29640 @_ = qw( do eval );
29641 @is_block_operator{@_} = (1) x scalar(@_);
29643 # these functions allow an identifier in the indirect object slot
29644 @_ = qw( print printf sort exec system say);
29645 @is_indirect_object_taker{@_} = (1) x scalar(@_);
29647 # These tokens may precede a code block
29648 # patched for SWITCH/CASE
29650 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
29651 unless do while until eval for foreach map grep sort
29652 switch case given when);
29653 @is_code_block_token{@_} = (1) x scalar(@_);
29655 # I'll build the list of keywords incrementally
29658 # keywords and tokens after which a value or pattern is expected,
29659 # but not an operator. In other words, these should consume terms
29660 # to their right, or at least they are not expected to be followed
29661 # immediately by operators.
29662 my @value_requestor = qw(
29883 # patched above for SWITCH/CASE given/when err say
29884 # 'err' is a fairly safe addition.
29885 # TODO: 'default' still needed if appropriate
29886 # 'use feature' seen, but perltidy works ok without it.
29887 # Concerned that 'default' could break code.
29888 push( @Keywords, @value_requestor );
29890 # These are treated the same but are not keywords:
29895 push( @value_requestor, @extra_vr );
29897 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
29899 # this list contains keywords which do not look for arguments,
29900 # so that they might be followed by an operator, or at least
29902 my @operator_requestor = qw(
29926 push( @Keywords, @operator_requestor );
29928 # These are treated the same but are not considered keywords:
29935 push( @operator_requestor, @extra_or );
29937 @expecting_operator_token{@operator_requestor} =
29938 (1) x scalar(@operator_requestor);
29940 # these token TYPES expect trailing operator but not a term
29941 # note: ++ and -- are post-increment and decrement, 'C' = constant
29942 my @operator_requestor_types = qw( ++ -- C <> q );
29943 @expecting_operator_types{@operator_requestor_types} =
29944 (1) x scalar(@operator_requestor_types);
29946 # these token TYPES consume values (terms)
29947 # note: pp and mm are pre-increment and decrement
29948 # f=semicolon in for, F=file test operator
29949 my @value_requestor_type = qw#
29950 L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
29951 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
29952 <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
29953 f F pp mm Y p m U J G j >> << ^ t
29955 push( @value_requestor_type, ',' )
29956 ; # (perl doesn't like a ',' in a qw block)
29957 @expecting_term_types{@value_requestor_type} =
29958 (1) x scalar(@value_requestor_type);
29960 # Note: the following valid token types are not assigned here to
29961 # hashes requesting to be followed by values or terms, but are
29962 # instead currently hard-coded into sub operator_expected:
29963 # ) -> :: Q R Z ] b h i k n v w } #
29965 # For simple syntax checking, it is nice to have a list of operators which
29966 # will really be unhappy if not followed by a term. This includes most
29968 %really_want_term = %expecting_term_types;
29970 # with these exceptions...
29971 delete $really_want_term{'U'}; # user sub, depends on prototype
29972 delete $really_want_term{'F'}; # file test works on $_ if no following term
29973 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
29976 @_ = qw(q qq qw qx qr s y tr m);
29977 @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
29979 # These keywords are handled specially in the tokenizer code:
29980 my @special_keywords = qw(
29996 push( @Keywords, @special_keywords );
29998 # Keywords after which list formatting may be used
29999 # WARNING: do not include |map|grep|eval or perl may die on
30000 # syntax errors (map1.t).
30001 my @keyword_taking_list = qw(
30075 @is_keyword_taking_list{@keyword_taking_list} =
30076 (1) x scalar(@keyword_taking_list);
30078 # These are not used in any way yet
30079 # my @unused_keywords = qw(
30085 # The list of keywords was originally extracted from function 'keyword' in
30086 # perl file toke.c version 5.005.03, using this utility, plus a
30087 # little editing: (file getkwd.pl):
30088 # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
30089 # Add 'get' prefix where necessary, then split into the above lists.
30090 # This list should be updated as necessary.
30091 # The list should not contain these special variables:
30092 # ARGV DATA ENV SIG STDERR STDIN STDOUT
30095 @is_keyword{@Keywords} = (1) x scalar(@Keywords);