2 ###########################################################-
4 # perltidy - a perl script indenter and formatter
6 # Copyright (c) 2000-2018 by Steve Hancock
7 # Distributed under the GPL license agreement; see file COPYING
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License along
20 # with this program; if not, write to the Free Software Foundation, Inc.,
21 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 # For brief instructions, try 'perltidy -h'.
24 # For more complete documentation, try 'man perltidy'
25 # or visit http://perltidy.sourceforge.net
27 # This script is an example of the default style. It was formatted with:
31 # Code Contributions: See ChangeLog.html for a complete history.
32 # Michael Cartmell supplied code for adaptation to VMS and helped with
34 # Hugh S. Myers supplied sub streamhandle and the supporting code to
35 # create a Perl::Tidy module which can operate on strings, arrays, etc.
36 # Yves Orton supplied coding to help detect Windows versions.
37 # Axel Rose supplied a patch for MacPerl.
38 # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
39 # Dan Tyrell contributed a patch for binary I/O.
40 # Ueli Hugenschmidt contributed a patch for -fpsc
41 # Sam Kington supplied a patch to identify the initial indentation of
43 # jonathan swartz supplied patches for:
44 # * .../ pattern, which looks upwards from directory
45 # * --notidy, to be used in directories where we want to avoid
46 # accidentally tidying
47 # * prefilter and postfilter
50 # Many others have supplied key ideas, suggestions, and bug reports;
51 # see the CHANGES file.
53 ############################################################
57 # perlver reports minimum version needed is 5.8.0
58 # 5.004 needed for IO::File
59 # 5.008 needed for wide characters
73 $rOpts_character_encoding
76 @ISA = qw( Exporter );
77 @EXPORT = qw( &perltidy );
84 use File::Temp qw(tempfile);
87 ( $VERSION = q($Id: Tidy.pm,v 1.74 2018/02/20 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
92 # given filename and mode (r or w), create an object which:
93 # has a 'getline' method if mode='r', and
94 # has a 'print' method if mode='w'.
95 # The objects also need a 'close' method.
97 # How the object is made:
99 # if $filename is: Make object using:
100 # ---------------- -----------------
101 # '-' (STDIN if mode = 'r', STDOUT if mode='w')
103 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
104 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar)
106 # (check for 'print' method for 'w' mode)
107 # (check for 'getline' method for 'r' mode)
108 my ( $filename, $mode ) = @_;
110 my $ref = ref($filename);
116 if ( $ref eq 'ARRAY' ) {
117 $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
119 elsif ( $ref eq 'SCALAR' ) {
120 $New = sub { Perl::Tidy::IOScalar->new(@_) };
124 # Accept an object with a getline method for reading. Note:
125 # IO::File is built-in and does not respond to the defined
126 # operator. If this causes trouble, the check can be
127 # skipped and we can just let it crash if there is no
129 if ( $mode =~ /[rR]/ ) {
131 # RT#97159; part 1 of 2: updated to use 'can'
132 ##if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
133 if ( $ref->can('getline') ) {
134 $New = sub { $filename };
137 $New = sub { undef };
139 ------------------------------------------------------------------------
140 No 'getline' method is defined for object of class $ref
141 Please check your call to Perl::Tidy::perltidy. Trace follows.
142 ------------------------------------------------------------------------
147 # Accept an object with a print method for writing.
148 # See note above about IO::File
149 if ( $mode =~ /[wW]/ ) {
151 # RT#97159; part 2 of 2: updated to use 'can'
152 ##if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
153 if ( $ref->can('print') ) {
154 $New = sub { $filename };
157 $New = sub { undef };
159 ------------------------------------------------------------------------
160 No 'print' method is defined for object of class $ref
161 Please check your call to Perl::Tidy::perltidy. Trace follows.
162 ------------------------------------------------------------------------
171 if ( $filename eq '-' ) {
172 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
175 $New = sub { IO::File->new(@_) };
178 $fh = $New->( $filename, $mode )
179 or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
181 return $fh, ( $ref or $filename );
184 sub find_input_line_ending {
186 # Peek at a file and return first line ending character.
187 # Quietly return undef in case of any trouble.
188 my ($input_file) = @_;
191 # silently ignore input from object or stdin
192 if ( ref($input_file) || $input_file eq '-' ) {
197 open( $fh, '<', $input_file ) || return $ending;
201 read( $fh, $buf, 1024 );
203 if ( $buf && $buf =~ /([\012\015]+)/ ) {
207 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
210 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
213 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
227 # concatenate a path and file basename
228 # returns undef in case of error
232 #BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
234 eval { require File::Spec };
235 $missing_file_spec = $@;
238 # use File::Spec if we can
239 unless ($missing_file_spec) {
240 return File::Spec->catfile(@parts);
243 # Perl 5.004 systems may not have File::Spec so we'll make
244 # a simple try. We assume File::Basename is available.
245 # return undef if not successful.
246 my $name = pop @parts;
247 my $path = join '/', @parts;
248 my $test_file = $path . $name;
249 my ( $test_name, $test_path ) = fileparse($test_file);
250 return $test_file if ( $test_name eq $name );
251 return if ( $^O eq 'VMS' );
253 # this should work at least for Windows and Unix:
254 $test_file = $path . '/' . $name;
255 ( $test_name, $test_path ) = fileparse($test_file);
256 return $test_file if ( $test_name eq $name );
260 # Here is a map of the flow of data from the input source to the output
263 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
264 # input groups output
265 # lines tokens lines of lines lines
268 # The names correspond to the package names responsible for the unit processes.
270 # The overall process is controlled by the "main" package.
272 # LineSource is the stream of input lines
274 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
275 # if necessary. A token is any section of the input line which should be
276 # manipulated as a single entity during formatting. For example, a single
277 # ',' character is a token, and so is an entire side comment. It handles
278 # the complexities of Perl syntax, such as distinguishing between '<<' as
279 # a shift operator and as a here-document, or distinguishing between '/'
280 # as a divide symbol and as a pattern delimiter.
282 # Formatter inserts and deletes whitespace between tokens, and breaks
283 # sequences of tokens at appropriate points as output lines. It bases its
284 # decisions on the default rules as modified by any command-line options.
286 # VerticalAligner collects groups of lines together and tries to line up
287 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
289 # FileWriter simply writes lines to the output stream.
291 # The Logger package, not shown, records significant events and warning
292 # messages. It writes a .LOG file, which may be saved with a
293 # '-log' or a '-g' flag.
301 destination => undef,
308 dump_options => undef,
309 dump_options_type => undef,
310 dump_getopt_flags => undef,
311 dump_options_category => undef,
312 dump_options_range => undef,
313 dump_abbreviations => undef,
318 # don't overwrite callers ARGV
320 local *STDERR = *STDERR;
322 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
324 my @good_keys = sort keys %defaults;
325 @bad_keys = sort @bad_keys;
327 ------------------------------------------------------------------------
328 Unknown perltidy parameter : (@bad_keys)
329 perltidy only understands : (@good_keys)
330 ------------------------------------------------------------------------
335 my $get_hash_ref = sub {
337 my $hash_ref = $input_hash{$key};
338 if ( defined($hash_ref) ) {
339 unless ( ref($hash_ref) eq 'HASH' ) {
340 my $what = ref($hash_ref);
342 $what ? "but is ref to $what" : "but is not a reference";
344 ------------------------------------------------------------------------
345 error in call to perltidy:
346 -$key must be reference to HASH $but_is
347 ------------------------------------------------------------------------
354 %input_hash = ( %defaults, %input_hash );
355 my $argv = $input_hash{'argv'};
356 my $destination_stream = $input_hash{'destination'};
357 my $errorfile_stream = $input_hash{'errorfile'};
358 my $logfile_stream = $input_hash{'logfile'};
359 my $perltidyrc_stream = $input_hash{'perltidyrc'};
360 my $source_stream = $input_hash{'source'};
361 my $stderr_stream = $input_hash{'stderr'};
362 my $user_formatter = $input_hash{'formatter'};
363 my $prefilter = $input_hash{'prefilter'};
364 my $postfilter = $input_hash{'postfilter'};
366 if ($stderr_stream) {
367 ( $fh_stderr, my $stderr_file ) =
368 Perl::Tidy::streamhandle( $stderr_stream, 'w' );
371 ------------------------------------------------------------------------
372 Unable to redirect STDERR to $stderr_stream
373 Please check value of -stderr in call to perltidy
374 ------------------------------------------------------------------------
379 $fh_stderr = *STDERR;
382 sub Warn { my $msg = shift; $fh_stderr->print($msg); return }
386 if ($flag) { goto ERROR_EXIT }
387 else { goto NORMAL_EXIT }
390 sub Die { my $msg = shift; Warn($msg); Exit(1); }
392 # extract various dump parameters
393 my $dump_options_type = $input_hash{'dump_options_type'};
394 my $dump_options = $get_hash_ref->('dump_options');
395 my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
396 my $dump_options_category = $get_hash_ref->('dump_options_category');
397 my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
398 my $dump_options_range = $get_hash_ref->('dump_options_range');
400 # validate dump_options_type
401 if ( defined($dump_options) ) {
402 unless ( defined($dump_options_type) ) {
403 $dump_options_type = 'perltidyrc';
405 unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
407 ------------------------------------------------------------------------
408 Please check value of -dump_options_type in call to perltidy;
409 saw: '$dump_options_type'
410 expecting: 'perltidyrc' or 'full'
411 ------------------------------------------------------------------------
417 $dump_options_type = "";
420 if ($user_formatter) {
422 # if the user defines a formatter, there is no output stream,
423 # but we need a null stream to keep coding simple
424 $destination_stream = Perl::Tidy::DevNull->new();
427 # see if ARGV is overridden
428 if ( defined($argv) ) {
430 my $rargv = ref $argv;
431 if ( $rargv eq 'SCALAR' ) { $argv = ${$argv}; $rargv = undef }
435 if ( $rargv eq 'ARRAY' ) {
440 ------------------------------------------------------------------------
441 Please check value of -argv in call to perltidy;
442 it must be a string or ref to ARRAY but is: $rargv
443 ------------------------------------------------------------------------
450 my ( $rargv, $msg ) = parse_args($argv);
453 Error parsing this string passed to to perltidy with 'argv':
461 my $rpending_complaint;
462 ${$rpending_complaint} = "";
463 my $rpending_logfile_message;
464 ${$rpending_logfile_message} = "";
466 my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint);
468 # VMS file names are restricted to a 40.40 format, so we append _tdy
469 # instead of .tdy, etc. (but see also sub check_vms_filename)
472 if ( $^O eq 'VMS' ) {
478 $dot_pattern = '\.'; # must escape for use in regex
481 #---------------------------------------------------------------
482 # get command line options
483 #---------------------------------------------------------------
484 my ( $rOpts, $config_file, $rraw_options, $roption_string,
485 $rexpansion, $roption_category, $roption_range )
486 = process_command_line(
487 $perltidyrc_stream, $is_Windows, $Windows_type,
488 $rpending_complaint, $dump_options_type,
491 my $saw_extrude = ( grep m/^-extrude$/, @{$rraw_options} ) ? 1 : 0;
493 ( grep m/^-(pbp|perl-best-practices)$/, @{$rraw_options} ) ? 1 : 0;
495 #---------------------------------------------------------------
496 # Handle requests to dump information
497 #---------------------------------------------------------------
499 # return or exit immediately after all dumps
502 # Getopt parameters and their flags
503 if ( defined($dump_getopt_flags) ) {
505 foreach my $op ( @{$roption_string} ) {
514 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
518 $dump_getopt_flags->{$opt} = $flag;
522 if ( defined($dump_options_category) ) {
524 %{$dump_options_category} = %{$roption_category};
527 if ( defined($dump_options_range) ) {
529 %{$dump_options_range} = %{$roption_range};
532 if ( defined($dump_abbreviations) ) {
534 %{$dump_abbreviations} = %{$rexpansion};
537 if ( defined($dump_options) ) {
539 %{$dump_options} = %{$rOpts};
542 Exit 0 if ($quit_now);
544 # make printable string of options for this run as possible diagnostic
545 my $readable_options = readable_options( $rOpts, $roption_string );
547 # dump from command line
548 if ( $rOpts->{'dump-options'} ) {
549 print STDOUT $readable_options;
553 #---------------------------------------------------------------
554 # check parameters and their interactions
555 #---------------------------------------------------------------
557 check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint );
559 if ($user_formatter) {
560 $rOpts->{'format'} = 'user';
563 # there must be one entry here for every possible format
564 my %default_file_extension = (
570 $rOpts_character_encoding = $rOpts->{'character-encoding'};
572 # be sure we have a valid output format
573 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
574 my $formats = join ' ',
575 sort map { "'" . $_ . "'" } keys %default_file_extension;
576 my $fmt = $rOpts->{'format'};
577 Die "-format='$fmt' but must be one of: $formats\n";
580 my $output_extension = make_extension( $rOpts->{'output-file-extension'},
581 $default_file_extension{ $rOpts->{'format'} }, $dot );
583 # If the backup extension contains a / character then the backup should
584 # be deleted when the -b option is used. On older versions of
585 # perltidy this will generate an error message due to an illegal
588 # A backup file will still be generated but will be deleted
589 # at the end. If -bext='/' then this extension will be
590 # the default 'bak'. Otherwise it will be whatever characters
591 # remains after all '/' characters are removed. For example:
592 # -bext extension slashes
596 # '/dev/null' devnull 2 (Currently not allowed)
597 my $bext = $rOpts->{'backup-file-extension'};
598 my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g );
600 # At present only one forward slash is allowed. In the future multiple
601 # slashes may be allowed to allow for other options
602 if ( $delete_backup > 1 ) {
603 Die "-bext=$bext contains more than one '/'\n";
606 my $backup_extension =
607 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
609 my $html_toc_extension =
610 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
612 my $html_src_extension =
613 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
615 # check for -b option;
616 # silently ignore unless beautify mode
617 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
618 && $rOpts->{'format'} eq 'tidy';
620 # Turn off -b with warnings in case of conflicts with other options.
621 # NOTE: Do this silently, without warnings, if there is a source or
622 # destination stream, or standard output is used. This is because the -b
623 # flag may have been in a .perltidyrc file and warnings break
624 # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014.
625 if ($in_place_modify) {
626 if ( $rOpts->{'standard-output'}
627 || $destination_stream
628 || ref $source_stream
629 || $rOpts->{'outfile'}
630 || defined( $rOpts->{'output-path'} ) )
632 $in_place_modify = 0;
636 Perl::Tidy::Formatter::check_options($rOpts);
637 if ( $rOpts->{'format'} eq 'html' ) {
638 Perl::Tidy::HtmlWriter->check_options($rOpts);
641 # make the pattern of file extensions that we shouldn't touch
642 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
643 if ($output_extension) {
644 my $ext = quotemeta($output_extension);
645 $forbidden_file_extensions .= "|$ext";
647 if ( $in_place_modify && $backup_extension ) {
648 my $ext = quotemeta($backup_extension);
649 $forbidden_file_extensions .= "|$ext";
651 $forbidden_file_extensions .= ')$';
653 # Create a diagnostics object if requested;
654 # This is only useful for code development
655 my $diagnostics_object = undef;
656 if ( $rOpts->{'DIAGNOSTICS'} ) {
657 $diagnostics_object = Perl::Tidy::Diagnostics->new();
660 # no filenames should be given if input is from an array
661 if ($source_stream) {
664 "You may not specify any filenames when a source array is given\n";
667 # we'll stuff the source array into ARGV
668 unshift( @ARGV, $source_stream );
670 # No special treatment for source stream which is a filename.
671 # This will enable checks for binary files and other bad stuff.
672 $source_stream = undef unless ref($source_stream);
675 # use stdin by default if no source array and no args
677 unshift( @ARGV, '-' ) unless @ARGV;
680 #---------------------------------------------------------------
682 # main loop to process all files in argument list
683 #---------------------------------------------------------------
684 my $number_of_files = @ARGV;
685 my $formatter = undef;
686 my $tokenizer = undef;
688 # If requested, process in order of increasing file size
689 # This can significantly reduce perl's virtual memory usage during testing.
690 if ( $number_of_files > 1 && $rOpts->{'file-size-order'} ) {
693 sort { $a->[1] <=> $b->[1] }
694 map { [ $_, -e $_ ? -s $_ : 0 ] } @ARGV;
697 while ( my $input_file = shift @ARGV ) {
699 my $input_file_permissions;
701 #---------------------------------------------------------------
702 # prepare this input stream
703 #---------------------------------------------------------------
704 if ($source_stream) {
705 $fileroot = "perltidy";
707 # If the source is from an array or string, then .LOG output
708 # is only possible if a logfile stream is specified. This prevents
709 # unexpected perltidy.LOG files.
710 if ( !defined($logfile_stream) ) {
711 $logfile_stream = Perl::Tidy::DevNull->new();
714 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
715 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
716 $in_place_modify = 0;
719 $fileroot = $input_file;
720 unless ( -e $input_file ) {
722 # file doesn't exist - check for a file glob
723 if ( $input_file =~ /([\?\*\[\{])/ ) {
725 # Windows shell may not remove quotes, so do it
726 my $input_file = $input_file;
727 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
728 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
729 my $pattern = fileglob_to_re($input_file);
731 if ( !$@ && opendir( DIR, './' ) ) {
733 grep { /$pattern/ && !-d $_ } readdir(DIR);
736 unshift @ARGV, @files;
741 Warn "skipping file: '$input_file': no matches found\n";
745 unless ( -f $input_file ) {
746 Warn "skipping file: $input_file: not a regular file\n";
750 # As a safety precaution, skip zero length files.
751 # If for example a source file got clobbered somehow,
752 # the old .tdy or .bak files might still exist so we
753 # shouldn't overwrite them with zero length files.
754 unless ( -s $input_file ) {
755 Warn "skipping file: $input_file: Zero size\n";
759 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
761 "skipping file: $input_file: Non-text (override with -f)\n";
765 # we should have a valid filename now
766 $fileroot = $input_file;
767 $input_file_permissions = ( stat $input_file )[2] & oct(7777);
769 if ( $^O eq 'VMS' ) {
770 ( $fileroot, $dot ) = check_vms_filename($fileroot);
773 # add option to change path here
774 if ( defined( $rOpts->{'output-path'} ) ) {
776 my ( $base, $old_path ) = fileparse($fileroot);
777 my $new_path = $rOpts->{'output-path'};
778 unless ( -d $new_path ) {
779 unless ( mkdir $new_path, 0777 ) {
780 Die "unable to create directory $new_path: $!\n";
783 my $path = $new_path;
784 $fileroot = catfile( $path, $base );
787 ------------------------------------------------------------------------
788 Problem combining $new_path and $base to make a filename; check -opath
789 ------------------------------------------------------------------------
795 # Skip files with same extension as the output files because
796 # this can lead to a messy situation with files like
797 # script.tdy.tdy.tdy ... or worse problems ... when you
798 # rerun perltidy over and over with wildcard input.
801 && ( $input_file =~ /$forbidden_file_extensions/o
802 || $input_file eq 'DIAGNOSTICS' )
805 Warn "skipping file: $input_file: wrong extension\n";
809 # the 'source_object' supplies a method to read the input file
811 Perl::Tidy::LineSource->new( $input_file, $rOpts,
812 $rpending_logfile_message );
813 next unless ($source_object);
815 # Prefilters and postfilters: The prefilter is a code reference
816 # that will be applied to the source before tidying, and the
817 # postfilter is a code reference to the result before outputting.
820 || ( $rOpts_character_encoding
821 && $rOpts_character_encoding eq 'utf8' )
825 while ( my $line = $source_object->get_line() ) {
829 $buf = $prefilter->($buf) if $prefilter;
831 if ( $rOpts_character_encoding
832 && $rOpts_character_encoding eq 'utf8'
833 && !utf8::is_utf8($buf) )
836 $buf = Encode::decode( 'UTF-8', $buf,
837 Encode::FB_CROAK | Encode::LEAVE_SRC );
841 "skipping file: $input_file: Unable to decode source as UTF-8\n";
846 $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
847 $rpending_logfile_message );
850 # register this file name with the Diagnostics package
851 $diagnostics_object->set_input_file($input_file)
852 if $diagnostics_object;
854 #---------------------------------------------------------------
855 # prepare the output stream
856 #---------------------------------------------------------------
857 my $output_file = undef;
858 my $actual_output_extension;
860 if ( $rOpts->{'outfile'} ) {
862 if ( $number_of_files <= 1 ) {
864 if ( $rOpts->{'standard-output'} ) {
865 my $msg = "You may not use -o and -st together";
866 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
869 elsif ($destination_stream) {
871 "You may not specify a destination array and -o together\n";
873 elsif ( defined( $rOpts->{'output-path'} ) ) {
874 Die "You may not specify -o and -opath together\n";
876 elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
877 Die "You may not specify -o and -oext together\n";
879 $output_file = $rOpts->{outfile};
881 # make sure user gives a file name after -o
882 if ( $output_file =~ /^-/ ) {
883 Die "You must specify a valid filename after -o\n";
886 # do not overwrite input file with -o
887 if ( defined($input_file_permissions)
888 && ( $output_file eq $input_file ) )
890 Die "Use 'perltidy -b $input_file' to modify in-place\n";
894 Die "You may not use -o with more than one input file\n";
897 elsif ( $rOpts->{'standard-output'} ) {
898 if ($destination_stream) {
900 "You may not specify a destination array and -st together\n";
901 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
906 if ( $number_of_files <= 1 ) {
909 Die "You may not use -st with more than one input file\n";
912 elsif ($destination_stream) {
913 $output_file = $destination_stream;
915 elsif ($source_stream) { # source but no destination goes to stdout
918 elsif ( $input_file eq '-' ) {
922 if ($in_place_modify) {
923 $output_file = IO::File->new_tmpfile()
924 or Die "cannot open temp file for -b option: $!\n";
927 $actual_output_extension = $output_extension;
928 $output_file = $fileroot . $output_extension;
932 # the 'sink_object' knows how to write the output file
933 my $tee_file = $fileroot . $dot . "TEE";
935 my $line_separator = $rOpts->{'output-line-ending'};
936 if ( $rOpts->{'preserve-line-endings'} ) {
937 $line_separator = find_input_line_ending($input_file);
940 # Eventually all I/O may be done with binmode, but for now it is
941 # only done when a user requests a particular line separator
942 # through the -ple or -ole flags
943 my $binmode = defined($line_separator)
944 || defined($rOpts_character_encoding);
945 $line_separator = "\n" unless defined($line_separator);
947 my ( $sink_object, $postfilter_buffer );
950 Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
951 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
955 Perl::Tidy::LineSink->new( $output_file, $tee_file,
956 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
959 #---------------------------------------------------------------
960 # initialize the error logger for this file
961 #---------------------------------------------------------------
962 my $warning_file = $fileroot . $dot . "ERR";
963 if ($errorfile_stream) { $warning_file = $errorfile_stream }
964 my $log_file = $fileroot . $dot . "LOG";
965 if ($logfile_stream) { $log_file = $logfile_stream }
968 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
969 $fh_stderr, $saw_extrude );
970 write_logfile_header(
971 $rOpts, $logger_object, $config_file,
972 $rraw_options, $Windows_type, $readable_options,
974 if ( ${$rpending_logfile_message} ) {
975 $logger_object->write_logfile_entry( ${$rpending_logfile_message} );
977 if ( ${$rpending_complaint} ) {
978 $logger_object->complain( ${$rpending_complaint} );
981 #---------------------------------------------------------------
982 # initialize the debug object, if any
983 #---------------------------------------------------------------
984 my $debugger_object = undef;
985 if ( $rOpts->{DEBUG} ) {
987 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
990 #---------------------------------------------------------------
991 # loop over iterations for one source stream
992 #---------------------------------------------------------------
994 # We will do a convergence test if 3 or more iterations are allowed.
995 # It would be pointless for fewer because we have to make at least
996 # two passes before we can see if we are converged, and the test
997 # would just slow things down.
998 my $max_iterations = $rOpts->{'iterations'};
999 my $convergence_log_message;
1001 my $do_convergence_test = $max_iterations > 2;
1002 if ($do_convergence_test) {
1003 eval "use Digest::MD5 qw(md5_hex)";
1004 $do_convergence_test = !$@;
1006 ### Trying to avoid problems with ancient versions of perl
1007 ##eval { my $string = "perltidy"; utf8::encode($string) };
1008 ##$do_convergence_test = $do_convergence_test && !$@;
1011 # save objects to allow redirecting output during iterations
1012 my $sink_object_final = $sink_object;
1013 my $debugger_object_final = $debugger_object;
1014 my $logger_object_final = $logger_object;
1016 foreach my $iter ( 1 .. $max_iterations ) {
1018 # send output stream to temp buffers until last iteration
1020 if ( $iter < $max_iterations ) {
1022 Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file,
1023 $line_separator, $rOpts, $rpending_logfile_message,
1027 $sink_object = $sink_object_final;
1030 # Save logger, debugger output only on pass 1 because:
1031 # (1) line number references must be to the starting
1032 # source, not an intermediate result, and
1033 # (2) we need to know if there are errors so we can stop the
1034 # iterations early if necessary.
1036 $debugger_object = undef;
1037 $logger_object = undef;
1040 #------------------------------------------------------------
1041 # create a formatter for this file : html writer or
1043 #------------------------------------------------------------
1045 # we have to delete any old formatter because, for safety,
1046 # the formatter will check to see that there is only one.
1049 if ($user_formatter) {
1050 $formatter = $user_formatter;
1052 elsif ( $rOpts->{'format'} eq 'html' ) {
1054 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
1055 $actual_output_extension, $html_toc_extension,
1056 $html_src_extension );
1058 elsif ( $rOpts->{'format'} eq 'tidy' ) {
1059 $formatter = Perl::Tidy::Formatter->new(
1060 logger_object => $logger_object,
1061 diagnostics_object => $diagnostics_object,
1062 sink_object => $sink_object,
1066 Die "I don't know how to do -format=$rOpts->{'format'}\n";
1069 unless ($formatter) {
1070 Die "Unable to continue with $rOpts->{'format'} formatting\n";
1073 #---------------------------------------------------------------
1074 # create the tokenizer for this file
1075 #---------------------------------------------------------------
1076 $tokenizer = undef; # must destroy old tokenizer
1077 $tokenizer = Perl::Tidy::Tokenizer->new(
1078 source_object => $source_object,
1079 logger_object => $logger_object,
1080 debugger_object => $debugger_object,
1081 diagnostics_object => $diagnostics_object,
1082 tabsize => $tabsize,
1084 starting_level => $rOpts->{'starting-indentation-level'},
1085 indent_columns => $rOpts->{'indent-columns'},
1086 look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
1087 look_for_autoloader => $rOpts->{'look-for-autoloader'},
1088 look_for_selfloader => $rOpts->{'look-for-selfloader'},
1089 trim_qw => $rOpts->{'trim-qw'},
1090 extended_syntax => $rOpts->{'extended-syntax'},
1092 continuation_indentation =>
1093 $rOpts->{'continuation-indentation'},
1094 outdent_labels => $rOpts->{'outdent-labels'},
1097 #---------------------------------------------------------------
1099 #---------------------------------------------------------------
1100 process_this_file( $tokenizer, $formatter );
1102 #---------------------------------------------------------------
1103 # close the input source and report errors
1104 #---------------------------------------------------------------
1105 $source_object->close_input_file();
1107 # line source for next iteration (if any) comes from the current
1108 # temporary output buffer
1109 if ( $iter < $max_iterations ) {
1111 $sink_object->close_output_file();
1113 Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts,
1114 $rpending_logfile_message );
1116 # stop iterations if errors or converged
1117 #my $stop_now = $logger_object->{_warning_count};
1118 my $stop_now = $tokenizer->report_tokenization_errors();
1120 $convergence_log_message = <<EOM;
1121 Stopping iterations because of severe errors.
1124 elsif ($do_convergence_test) {
1126 # Patch for [rt.cpan.org #88020]
1127 # Use utf8::encode since md5_hex() only operates on bytes.
1128 # my $digest = md5_hex( utf8::encode($sink_buffer) );
1130 # Note added 20180114: this patch did not work correctly.
1131 # I'm not sure why. But switching to the method
1132 # recommended in the Perl 5 documentation for Encode
1133 # worked. According to this we can either use
1134 # $octets = encode_utf8($string) or equivalently
1135 # $octets = encode("utf8",$string)
1136 # and then calculate the checksum. So:
1137 my $octets = Encode::encode( "utf8", $sink_buffer );
1138 my $digest = md5_hex($octets);
1139 if ( !$saw_md5{$digest} ) {
1140 $saw_md5{$digest} = $iter;
1144 # Deja vu, stop iterating
1146 my $iterm = $iter - 1;
1147 if ( $saw_md5{$digest} != $iterm ) {
1149 # Blinking (oscillating) between two stable
1150 # end states. This has happened in the past
1151 # but at present there are no known instances.
1152 $convergence_log_message = <<EOM;
1153 Blinking. Output for iteration $iter same as for $saw_md5{$digest}.
1155 $diagnostics_object->write_diagnostics(
1156 $convergence_log_message)
1157 if $diagnostics_object;
1160 $convergence_log_message = <<EOM;
1161 Converged. Output for iteration $iter same as for iter $iterm.
1163 $diagnostics_object->write_diagnostics(
1164 $convergence_log_message)
1165 if $diagnostics_object && $iterm > 2;
1168 } ## end if ($do_convergence_test)
1172 # we are stopping the iterations early;
1173 # copy the output stream to its final destination
1174 $sink_object = $sink_object_final;
1175 while ( my $line = $source_object->get_line() ) {
1176 $sink_object->write_line($line);
1178 $source_object->close_input_file();
1181 } ## end if ( $iter < $max_iterations)
1182 } # end loop over iterations for one source file
1184 # restore objects which have been temporarily undefined
1185 # for second and higher iterations
1186 $debugger_object = $debugger_object_final;
1187 $logger_object = $logger_object_final;
1189 $logger_object->write_logfile_entry($convergence_log_message)
1190 if $convergence_log_message;
1192 #---------------------------------------------------------------
1193 # Perform any postfilter operation
1194 #---------------------------------------------------------------
1196 $sink_object->close_output_file();
1198 Perl::Tidy::LineSink->new( $output_file, $tee_file,
1199 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
1200 my $buf = $postfilter->($postfilter_buffer);
1202 Perl::Tidy::LineSource->new( \$buf, $rOpts,
1203 $rpending_logfile_message );
1204 while ( my $line = $source_object->get_line() ) {
1205 $sink_object->write_line($line);
1207 $source_object->close_input_file();
1210 # Save names of the input and output files for syntax check
1211 my $ifname = $input_file;
1212 my $ofname = $output_file;
1214 #---------------------------------------------------------------
1215 # handle the -b option (backup and modify in-place)
1216 #---------------------------------------------------------------
1217 if ($in_place_modify) {
1218 unless ( -f $input_file ) {
1220 # oh, oh, no real file to backup ..
1221 # shouldn't happen because of numerous preliminary checks
1223 "problem with -b backing up input file '$input_file': not a file\n";
1225 my $backup_name = $input_file . $backup_extension;
1226 if ( -f $backup_name ) {
1227 unlink($backup_name)
1229 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1232 # backup the input file
1233 # we use copy for symlinks, move for regular files
1234 if ( -l $input_file ) {
1235 File::Copy::copy( $input_file, $backup_name )
1236 or Die "File::Copy failed trying to backup source: $!";
1239 rename( $input_file, $backup_name )
1241 "problem renaming $input_file to $backup_name for -b option: $!\n";
1243 $ifname = $backup_name;
1245 # copy the output to the original input file
1246 # NOTE: it would be nice to just close $output_file and use
1247 # File::Copy::copy here, but in this case $output_file is the
1248 # handle of an open nameless temporary file so we would lose
1249 # everything if we closed it.
1250 seek( $output_file, 0, 0 )
1251 or Die "unable to rewind a temporary file for -b option: $!\n";
1252 my $fout = IO::File->new("> $input_file")
1254 "problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
1256 if ( $rOpts->{'character-encoding'}
1257 && $rOpts->{'character-encoding'} eq 'utf8' )
1259 binmode $fout, ":encoding(UTF-8)";
1261 else { binmode $fout }
1264 while ( $line = $output_file->getline() ) {
1265 $fout->print($line);
1268 $output_file = $input_file;
1269 $ofname = $input_file;
1272 #---------------------------------------------------------------
1273 # clean up and report errors
1274 #---------------------------------------------------------------
1275 $sink_object->close_output_file() if $sink_object;
1276 $debugger_object->close_debug_file() if $debugger_object;
1278 # set output file permissions
1279 if ( $output_file && -f $output_file && !-l $output_file ) {
1280 if ($input_file_permissions) {
1282 # give output script same permissions as input script, but
1283 # make it user-writable or else we can't run perltidy again.
1284 # Thus we retain whatever executable flags were set.
1285 if ( $rOpts->{'format'} eq 'tidy' ) {
1286 chmod( $input_file_permissions | oct(600), $output_file );
1289 # else use default permissions for html and any other format
1293 #---------------------------------------------------------------
1294 # Do syntax check if requested and possible
1295 #---------------------------------------------------------------
1296 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
1298 && $rOpts->{'check-syntax'}
1303 check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1306 #---------------------------------------------------------------
1307 # remove the original file for in-place modify as follows:
1308 # $delete_backup=0 never
1309 # $delete_backup=1 only if no errors
1310 # $delete_backup>1 always : NOT ALLOWED, too risky, see above
1311 #---------------------------------------------------------------
1312 if ( $in_place_modify
1315 && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) )
1318 # As an added safety precaution, do not delete the source file
1319 # if its size has dropped from positive to zero, since this
1320 # could indicate a disaster of some kind, including a hardware
1321 # failure. Actually, this could happen if you had a file of
1322 # all comments (or pod) and deleted everything with -dac (-dap)
1324 if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) {
1326 "output file '$output_file' missing or zero length; original '$ifname' not deleted\n"
1332 "unable to remove previous '$ifname' for -b option; check permissions: $!\n";
1336 $logger_object->finish( $infile_syntax_ok, $formatter )
1338 } # end of main loop to process all files
1345 } # end of main program perltidy
1347 sub get_stream_as_named_file {
1349 # Return the name of a file containing a stream of data, creating
1350 # a temporary file if necessary.
1352 # $stream - the name of a file or stream
1354 # $fname = name of file if possible, or undef
1355 # $if_tmpfile = true if temp file, undef if not temp file
1357 # This routine is needed for passing actual files to Perl for
1363 if ( ref($stream) ) {
1364 my ( $fh_stream, $fh_name ) =
1365 Perl::Tidy::streamhandle( $stream, 'r' );
1367 my ( $fout, $tmpnam ) = File::Temp::tempfile();
1372 while ( my $line = $fh_stream->getline() ) {
1373 $fout->print($line);
1377 $fh_stream->close();
1380 elsif ( $stream ne '-' && -f $stream ) {
1384 return ( $fname, $is_tmpfile );
1387 sub fileglob_to_re {
1389 # modified (corrected) from version in find2perl
1391 $x =~ s#([./^\$()])#\\$1#g; # escape special characters
1392 $x =~ s#\*#.*#g; # '*' -> '.*'
1393 $x =~ s#\?#.#g; # '?' -> '.'
1394 return "^$x\\z"; # match whole word
1397 sub make_extension {
1399 # Make a file extension, including any leading '.' if necessary
1400 # The '.' may actually be an '_' under VMS
1401 my ( $extension, $default, $dot ) = @_;
1403 # Use the default if none specified
1404 $extension = $default unless ($extension);
1406 # Only extensions with these leading characters get a '.'
1407 # This rule gives the user some freedom
1408 if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1409 $extension = $dot . $extension;
1414 sub write_logfile_header {
1416 $rOpts, $logger_object, $config_file,
1417 $rraw_options, $Windows_type, $readable_options
1419 $logger_object->write_logfile_entry(
1420 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1422 if ($Windows_type) {
1423 $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1425 my $options_string = join( ' ', @{$rraw_options} );
1428 $logger_object->write_logfile_entry(
1429 "Found Configuration File >>> $config_file \n");
1431 $logger_object->write_logfile_entry(
1432 "Configuration and command line parameters for this run:\n");
1433 $logger_object->write_logfile_entry("$options_string\n");
1435 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1436 $rOpts->{'logfile'} = 1; # force logfile to be saved
1437 $logger_object->write_logfile_entry(
1438 "Final parameter set for this run\n");
1439 $logger_object->write_logfile_entry(
1440 "------------------------------------\n");
1442 $logger_object->write_logfile_entry($readable_options);
1444 $logger_object->write_logfile_entry(
1445 "------------------------------------\n");
1447 $logger_object->write_logfile_entry(
1448 "To find error messages search for 'WARNING' with your editor\n");
1452 sub generate_options {
1454 ######################################################################
1455 # Generate and return references to:
1456 # @option_string - the list of options to be passed to Getopt::Long
1457 # @defaults - the list of default options
1458 # %expansion - a hash showing how all abbreviations are expanded
1459 # %category - a hash giving the general category of each option
1460 # %option_range - a hash giving the valid ranges of certain options
1462 # Note: a few options are not documented in the man page and usage
1463 # message. This is because these are experimental or debug options and
1464 # may or may not be retained in future versions.
1466 # Here are the undocumented flags as far as I know. Any of them
1467 # may disappear at any time. They are mainly for fine-tuning
1470 # fll --> fuzzy-line-length # a trivial parameter which gets
1471 # turned off for the extrude option
1472 # which is mainly for debugging
1473 # scl --> short-concatenation-item-length # helps break at '.'
1474 # recombine # for debugging line breaks
1475 # valign # for debugging vertical alignment
1476 # I --> DIAGNOSTICS # for debugging [**DEACTIVATED**]
1477 ######################################################################
1479 # here is a summary of the Getopt codes:
1480 # <none> does not take an argument
1481 # =s takes a mandatory string
1482 # :s takes an optional string (DO NOT USE - filenames will get eaten up)
1483 # =i takes a mandatory integer
1484 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1485 # ! does not take an argument and may be negated
1486 # i.e., -foo and -nofoo are allowed
1487 # a double dash signals the end of the options list
1489 #---------------------------------------------------------------
1490 # Define the option string passed to GetOptions.
1491 #---------------------------------------------------------------
1493 my @option_string = ();
1495 my %option_category = ();
1496 my %option_range = ();
1497 my $rexpansion = \%expansion;
1499 # names of categories in manual
1500 # leading integers will allow sorting
1501 my @category_name = (
1503 '1. Basic formatting options',
1504 '2. Code indentation control',
1505 '3. Whitespace control',
1506 '4. Comment controls',
1507 '5. Linebreak controls',
1508 '6. Controlling list formatting',
1509 '7. Retaining or ignoring existing line breaks',
1510 '8. Blank line control',
1511 '9. Other controls',
1513 '11. pod2html options',
1514 '12. Controlling HTML properties',
1518 # These options are parsed directly by perltidy:
1521 # However, they are included in the option set so that they will
1522 # be seen in the options dump.
1524 # These long option names have no abbreviations or are treated specially
1525 @option_string = qw(
1535 my $category = 13; # Debugging
1536 foreach (@option_string) {
1537 my $opt = $_; # must avoid changing the actual flag
1539 $option_category{$opt} = $category_name[$category];
1542 $category = 11; # HTML
1543 $option_category{html} = $category_name[$category];
1545 # routine to install and check options
1546 my $add_option = sub {
1547 my ( $long_name, $short_name, $flag ) = @_;
1548 push @option_string, $long_name . $flag;
1549 $option_category{$long_name} = $category_name[$category];
1551 if ( $expansion{$short_name} ) {
1552 my $existing_name = $expansion{$short_name}[0];
1554 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1556 $expansion{$short_name} = [$long_name];
1557 if ( $flag eq '!' ) {
1558 my $nshort_name = 'n' . $short_name;
1559 my $nolong_name = 'no' . $long_name;
1560 if ( $expansion{$nshort_name} ) {
1561 my $existing_name = $expansion{$nshort_name}[0];
1563 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1565 $expansion{$nshort_name} = [$nolong_name];
1570 # Install long option names which have a simple abbreviation.
1571 # Options with code '!' get standard negation ('no' for long names,
1572 # 'n' for abbreviations). Categories follow the manual.
1574 ###########################
1575 $category = 0; # I/O_Control
1576 ###########################
1577 $add_option->( 'backup-and-modify-in-place', 'b', '!' );
1578 $add_option->( 'backup-file-extension', 'bext', '=s' );
1579 $add_option->( 'force-read-binary', 'f', '!' );
1580 $add_option->( 'format', 'fmt', '=s' );
1581 $add_option->( 'iterations', 'it', '=i' );
1582 $add_option->( 'logfile', 'log', '!' );
1583 $add_option->( 'logfile-gap', 'g', ':i' );
1584 $add_option->( 'outfile', 'o', '=s' );
1585 $add_option->( 'output-file-extension', 'oext', '=s' );
1586 $add_option->( 'output-path', 'opath', '=s' );
1587 $add_option->( 'profile', 'pro', '=s' );
1588 $add_option->( 'quiet', 'q', '!' );
1589 $add_option->( 'standard-error-output', 'se', '!' );
1590 $add_option->( 'standard-output', 'st', '!' );
1591 $add_option->( 'warning-output', 'w', '!' );
1592 $add_option->( 'character-encoding', 'enc', '=s' );
1594 # options which are both toggle switches and values moved here
1595 # to hide from tidyview (which does not show category 0 flags):
1596 # -ole moved here from category 1
1597 # -sil moved here from category 2
1598 $add_option->( 'output-line-ending', 'ole', '=s' );
1599 $add_option->( 'starting-indentation-level', 'sil', '=i' );
1601 ########################################
1602 $category = 1; # Basic formatting options
1603 ########################################
1604 $add_option->( 'check-syntax', 'syn', '!' );
1605 $add_option->( 'entab-leading-whitespace', 'et', '=i' );
1606 $add_option->( 'indent-columns', 'i', '=i' );
1607 $add_option->( 'maximum-line-length', 'l', '=i' );
1608 $add_option->( 'variable-maximum-line-length', 'vmll', '!' );
1609 $add_option->( 'whitespace-cycle', 'wc', '=i' );
1610 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
1611 $add_option->( 'preserve-line-endings', 'ple', '!' );
1612 $add_option->( 'tabs', 't', '!' );
1613 $add_option->( 'default-tabsize', 'dt', '=i' );
1614 $add_option->( 'extended-syntax', 'xs', '!' );
1616 ########################################
1617 $category = 2; # Code indentation control
1618 ########################################
1619 $add_option->( 'continuation-indentation', 'ci', '=i' );
1620 $add_option->( 'line-up-parentheses', 'lp', '!' );
1621 $add_option->( 'outdent-keyword-list', 'okwl', '=s' );
1622 $add_option->( 'outdent-keywords', 'okw', '!' );
1623 $add_option->( 'outdent-labels', 'ola', '!' );
1624 $add_option->( 'outdent-long-quotes', 'olq', '!' );
1625 $add_option->( 'indent-closing-brace', 'icb', '!' );
1626 $add_option->( 'closing-token-indentation', 'cti', '=i' );
1627 $add_option->( 'closing-paren-indentation', 'cpi', '=i' );
1628 $add_option->( 'closing-brace-indentation', 'cbi', '=i' );
1629 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1630 $add_option->( 'brace-left-and-indent', 'bli', '!' );
1631 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
1633 ########################################
1634 $category = 3; # Whitespace control
1635 ########################################
1636 $add_option->( 'add-semicolons', 'asc', '!' );
1637 $add_option->( 'add-whitespace', 'aws', '!' );
1638 $add_option->( 'block-brace-tightness', 'bbt', '=i' );
1639 $add_option->( 'brace-tightness', 'bt', '=i' );
1640 $add_option->( 'delete-old-whitespace', 'dws', '!' );
1641 $add_option->( 'delete-semicolons', 'dsm', '!' );
1642 $add_option->( 'nospace-after-keyword', 'nsak', '=s' );
1643 $add_option->( 'nowant-left-space', 'nwls', '=s' );
1644 $add_option->( 'nowant-right-space', 'nwrs', '=s' );
1645 $add_option->( 'paren-tightness', 'pt', '=i' );
1646 $add_option->( 'space-after-keyword', 'sak', '=s' );
1647 $add_option->( 'space-for-semicolon', 'sfs', '!' );
1648 $add_option->( 'space-function-paren', 'sfp', '!' );
1649 $add_option->( 'space-keyword-paren', 'skp', '!' );
1650 $add_option->( 'space-terminal-semicolon', 'sts', '!' );
1651 $add_option->( 'square-bracket-tightness', 'sbt', '=i' );
1652 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
1653 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1654 $add_option->( 'tight-secret-operators', 'tso', '!' );
1655 $add_option->( 'trim-qw', 'tqw', '!' );
1656 $add_option->( 'trim-pod', 'trp', '!' );
1657 $add_option->( 'want-left-space', 'wls', '=s' );
1658 $add_option->( 'want-right-space', 'wrs', '=s' );
1660 ########################################
1661 $category = 4; # Comment controls
1662 ########################################
1663 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
1664 $add_option->( 'closing-side-comment-interval', 'csci', '=i' );
1665 $add_option->( 'closing-side-comment-list', 'cscl', '=s' );
1666 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1667 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
1668 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
1669 $add_option->( 'closing-side-comments', 'csc', '!' );
1670 $add_option->( 'closing-side-comments-balanced', 'cscb', '!' );
1671 $add_option->( 'format-skipping', 'fs', '!' );
1672 $add_option->( 'format-skipping-begin', 'fsb', '=s' );
1673 $add_option->( 'format-skipping-end', 'fse', '=s' );
1674 $add_option->( 'hanging-side-comments', 'hsc', '!' );
1675 $add_option->( 'indent-block-comments', 'ibc', '!' );
1676 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
1677 $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
1678 $add_option->( 'minimum-space-to-comment', 'msc', '=i' );
1679 $add_option->( 'outdent-long-comments', 'olc', '!' );
1680 $add_option->( 'outdent-static-block-comments', 'osbc', '!' );
1681 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
1682 $add_option->( 'static-block-comments', 'sbc', '!' );
1683 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
1684 $add_option->( 'static-side-comments', 'ssc', '!' );
1685 $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' );
1687 ########################################
1688 $category = 5; # Linebreak controls
1689 ########################################
1690 $add_option->( 'add-newlines', 'anl', '!' );
1691 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
1692 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1693 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
1694 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
1695 $add_option->( 'cuddled-else', 'ce', '!' );
1696 $add_option->( 'cuddled-blocks', 'cb', '!' );
1697 $add_option->( 'cuddled-block-list', 'cbl', '=s' );
1698 $add_option->( 'cuddled-break-option', 'cbo', '=i' );
1699 $add_option->( 'delete-old-newlines', 'dnl', '!' );
1700 $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
1701 $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
1702 $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
1703 $add_option->( 'opening-paren-right', 'opr', '!' );
1704 $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
1705 $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' );
1706 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
1707 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
1708 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
1709 $add_option->( 'weld-nested-containers', 'wn', '!' );
1710 $add_option->( 'space-backslash-quote', 'sbq', '=i' );
1711 $add_option->( 'stack-closing-block-brace', 'scbb', '!' );
1712 $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
1713 $add_option->( 'stack-closing-paren', 'scp', '!' );
1714 $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
1715 $add_option->( 'stack-opening-block-brace', 'sobb', '!' );
1716 $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
1717 $add_option->( 'stack-opening-paren', 'sop', '!' );
1718 $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
1719 $add_option->( 'vertical-tightness', 'vt', '=i' );
1720 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
1721 $add_option->( 'want-break-after', 'wba', '=s' );
1722 $add_option->( 'want-break-before', 'wbb', '=s' );
1723 $add_option->( 'break-after-all-operators', 'baao', '!' );
1724 $add_option->( 'break-before-all-operators', 'bbao', '!' );
1725 $add_option->( 'keep-interior-semicolons', 'kis', '!' );
1727 ########################################
1728 $category = 6; # Controlling list formatting
1729 ########################################
1730 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1731 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
1732 $add_option->( 'maximum-fields-per-table', 'mft', '=i' );
1734 ########################################
1735 $category = 7; # Retaining or ignoring existing line breaks
1736 ########################################
1737 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1738 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1739 $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1740 $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
1741 $add_option->( 'ignore-old-breakpoints', 'iob', '!' );
1743 ########################################
1744 $category = 8; # Blank line control
1745 ########################################
1746 $add_option->( 'blanks-before-blocks', 'bbb', '!' );
1747 $add_option->( 'blanks-before-comments', 'bbc', '!' );
1748 $add_option->( 'blank-lines-before-subs', 'blbs', '=i' );
1749 $add_option->( 'blank-lines-before-packages', 'blbp', '=i' );
1750 $add_option->( 'long-block-line-count', 'lbl', '=i' );
1751 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1752 $add_option->( 'keep-old-blank-lines', 'kbl', '=i' );
1754 $add_option->( 'blank-lines-after-opening-block', 'blao', '=i' );
1755 $add_option->( 'blank-lines-before-closing-block', 'blbc', '=i' );
1756 $add_option->( 'blank-lines-after-opening-block-list', 'blaol', '=s' );
1757 $add_option->( 'blank-lines-before-closing-block-list', 'blbcl', '=s' );
1759 ########################################
1760 $category = 9; # Other controls
1761 ########################################
1762 $add_option->( 'delete-block-comments', 'dbc', '!' );
1763 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1764 $add_option->( 'delete-pod', 'dp', '!' );
1765 $add_option->( 'delete-side-comments', 'dsc', '!' );
1766 $add_option->( 'tee-block-comments', 'tbc', '!' );
1767 $add_option->( 'tee-pod', 'tp', '!' );
1768 $add_option->( 'tee-side-comments', 'tsc', '!' );
1769 $add_option->( 'look-for-autoloader', 'lal', '!' );
1770 $add_option->( 'look-for-hash-bang', 'x', '!' );
1771 $add_option->( 'look-for-selfloader', 'lsl', '!' );
1772 $add_option->( 'pass-version-line', 'pvl', '!' );
1774 ########################################
1775 $category = 13; # Debugging
1776 ########################################
1777 ## $add_option->( 'DIAGNOSTICS', 'I', '!' );
1778 $add_option->( 'DEBUG', 'D', '!' );
1779 $add_option->( 'dump-cuddled-block-list', 'dcbl', '!' );
1780 $add_option->( 'dump-defaults', 'ddf', '!' );
1781 $add_option->( 'dump-long-names', 'dln', '!' );
1782 $add_option->( 'dump-options', 'dop', '!' );
1783 $add_option->( 'dump-profile', 'dpro', '!' );
1784 $add_option->( 'dump-short-names', 'dsn', '!' );
1785 $add_option->( 'dump-token-types', 'dtt', '!' );
1786 $add_option->( 'dump-want-left-space', 'dwls', '!' );
1787 $add_option->( 'dump-want-right-space', 'dwrs', '!' );
1788 $add_option->( 'fuzzy-line-length', 'fll', '!' );
1789 $add_option->( 'help', 'h', '' );
1790 $add_option->( 'short-concatenation-item-length', 'scl', '=i' );
1791 $add_option->( 'show-options', 'opt', '!' );
1792 $add_option->( 'version', 'v', '' );
1793 $add_option->( 'memoize', 'mem', '!' );
1794 $add_option->( 'file-size-order', 'fso', '!' );
1796 #---------------------------------------------------------------------
1798 # The Perl::Tidy::HtmlWriter will add its own options to the string
1799 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1801 ########################################
1802 # Set categories 10, 11, 12
1803 ########################################
1804 # Based on their known order
1805 $category = 12; # HTML properties
1806 foreach my $opt (@option_string) {
1807 my $long_name = $opt;
1808 $long_name =~ s/(!|=.*|:.*)$//;
1809 unless ( defined( $option_category{$long_name} ) ) {
1810 if ( $long_name =~ /^html-linked/ ) {
1811 $category = 10; # HTML options
1813 elsif ( $long_name =~ /^pod2html/ ) {
1814 $category = 11; # Pod2html
1816 $option_category{$long_name} = $category_name[$category];
1820 #---------------------------------------------------------------
1821 # Assign valid ranges to certain options
1822 #---------------------------------------------------------------
1823 # In the future, these may be used to make preliminary checks
1824 # hash keys are long names
1825 # If key or value is undefined:
1826 # strings may have any value
1827 # integer ranges are >=0
1828 # If value is defined:
1829 # value is [qw(any valid words)] for strings
1830 # value is [min, max] for integers
1831 # if min is undefined, there is no lower limit
1832 # if max is undefined, there is no upper limit
1833 # Parameters not listed here have defaults
1835 'format' => [ 'tidy', 'html', 'user' ],
1836 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
1837 'character-encoding' => [ 'none', 'utf8' ],
1839 'space-backslash-quote' => [ 0, 2 ],
1841 'block-brace-tightness' => [ 0, 2 ],
1842 'brace-tightness' => [ 0, 2 ],
1843 'paren-tightness' => [ 0, 2 ],
1844 'square-bracket-tightness' => [ 0, 2 ],
1846 'block-brace-vertical-tightness' => [ 0, 2 ],
1847 'brace-vertical-tightness' => [ 0, 2 ],
1848 'brace-vertical-tightness-closing' => [ 0, 2 ],
1849 'paren-vertical-tightness' => [ 0, 2 ],
1850 'paren-vertical-tightness-closing' => [ 0, 2 ],
1851 'square-bracket-vertical-tightness' => [ 0, 2 ],
1852 'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1853 'vertical-tightness' => [ 0, 2 ],
1854 'vertical-tightness-closing' => [ 0, 2 ],
1856 'closing-brace-indentation' => [ 0, 3 ],
1857 'closing-paren-indentation' => [ 0, 3 ],
1858 'closing-square-bracket-indentation' => [ 0, 3 ],
1859 'closing-token-indentation' => [ 0, 3 ],
1861 'closing-side-comment-else-flag' => [ 0, 2 ],
1862 'comma-arrow-breakpoints' => [ 0, 5 ],
1865 # Note: we could actually allow negative ci if someone really wants it:
1866 # $option_range{'continuation-indentation'} = [ undef, undef ];
1868 #---------------------------------------------------------------
1869 # Assign default values to the above options here, except
1870 # for 'outfile' and 'help'.
1871 # These settings should approximate the perlstyle(1) suggestions.
1872 #---------------------------------------------------------------
1877 blanks-before-blocks
1878 blanks-before-comments
1879 blank-lines-before-subs=1
1880 blank-lines-before-packages=1
1881 block-brace-tightness=0
1882 block-brace-vertical-tightness=0
1884 brace-vertical-tightness-closing=0
1885 brace-vertical-tightness=0
1886 break-at-old-logical-breakpoints
1887 break-at-old-ternary-breakpoints
1888 break-at-old-attribute-breakpoints
1889 break-at-old-keyword-breakpoints
1890 comma-arrow-breakpoints=5
1892 closing-side-comment-interval=6
1893 closing-side-comment-maximum-text=20
1894 closing-side-comment-else-flag=0
1895 closing-side-comments-balanced
1896 closing-paren-indentation=0
1897 closing-brace-indentation=0
1898 closing-square-bracket-indentation=0
1899 continuation-indentation=2
1900 cuddled-break-option=1
1905 hanging-side-comments
1906 indent-block-comments
1909 keep-old-blank-lines=1
1910 long-block-line-count=8
1913 maximum-consecutive-blank-lines=1
1914 maximum-fields-per-table=0
1915 maximum-line-length=80
1917 minimum-space-to-comment=4
1918 nobrace-left-and-indent
1921 nodelete-old-whitespace
1926 nostatic-side-comments
1929 character-encoding=none
1932 outdent-long-comments
1934 paren-vertical-tightness-closing=0
1935 paren-vertical-tightness=0
1937 noweld-nested-containers
1940 short-concatenation-item-length=8
1942 space-backslash-quote=1
1943 square-bracket-tightness=1
1944 square-bracket-vertical-tightness-closing=0
1945 square-bracket-vertical-tightness=0
1946 static-block-comments
1949 backup-file-extension=bak
1954 html-table-of-contents
1958 push @defaults, "perl-syntax-check-flags=-c -T";
1960 #---------------------------------------------------------------
1961 # Define abbreviations which will be expanded into the above primitives.
1962 # These may be defined recursively.
1963 #---------------------------------------------------------------
1966 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
1967 'fnl' => [qw(freeze-newlines)],
1968 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1969 'fws' => [qw(freeze-whitespace)],
1970 'freeze-blank-lines' =>
1971 [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1972 'fbl' => [qw(freeze-blank-lines)],
1973 'indent-only' => [qw(freeze-newlines freeze-whitespace)],
1974 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1975 'nooutdent-long-lines' =>
1976 [qw(nooutdent-long-quotes nooutdent-long-comments)],
1977 'noll' => [qw(nooutdent-long-lines)],
1978 'io' => [qw(indent-only)],
1979 'delete-all-comments' =>
1980 [qw(delete-block-comments delete-side-comments delete-pod)],
1981 'nodelete-all-comments' =>
1982 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1983 'dac' => [qw(delete-all-comments)],
1984 'ndac' => [qw(nodelete-all-comments)],
1985 'gnu' => [qw(gnu-style)],
1986 'pbp' => [qw(perl-best-practices)],
1987 'tee-all-comments' =>
1988 [qw(tee-block-comments tee-side-comments tee-pod)],
1989 'notee-all-comments' =>
1990 [qw(notee-block-comments notee-side-comments notee-pod)],
1991 'tac' => [qw(tee-all-comments)],
1992 'ntac' => [qw(notee-all-comments)],
1993 'html' => [qw(format=html)],
1994 'nhtml' => [qw(format=tidy)],
1995 'tidy' => [qw(format=tidy)],
1997 'utf8' => [qw(character-encoding=utf8)],
1998 'UTF8' => [qw(character-encoding=utf8)],
2000 'swallow-optional-blank-lines' => [qw(kbl=0)],
2001 'noswallow-optional-blank-lines' => [qw(kbl=1)],
2002 'sob' => [qw(kbl=0)],
2003 'nsob' => [qw(kbl=1)],
2005 'break-after-comma-arrows' => [qw(cab=0)],
2006 'nobreak-after-comma-arrows' => [qw(cab=1)],
2007 'baa' => [qw(cab=0)],
2008 'nbaa' => [qw(cab=1)],
2010 'blanks-before-subs' => [qw(blbs=1 blbp=1)],
2011 'bbs' => [qw(blbs=1 blbp=1)],
2012 'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
2013 'nbbs' => [qw(blbs=0 blbp=0)],
2015 'break-at-old-trinary-breakpoints' => [qw(bot)],
2017 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
2018 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
2019 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
2020 'icp' => [qw(cpi=2 cbi=2 csbi=2)],
2021 'nicp' => [qw(cpi=0 cbi=0 csbi=0)],
2023 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
2024 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
2025 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
2026 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],
2027 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],
2029 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
2030 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
2031 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
2033 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
2034 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
2035 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
2037 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
2038 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
2039 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
2041 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
2042 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
2043 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
2045 'otr' => [qw(opr ohbr osbr)],
2046 'opening-token-right' => [qw(opr ohbr osbr)],
2047 'notr' => [qw(nopr nohbr nosbr)],
2048 'noopening-token-right' => [qw(nopr nohbr nosbr)],
2050 'sot' => [qw(sop sohb sosb)],
2051 'nsot' => [qw(nsop nsohb nsosb)],
2052 'stack-opening-tokens' => [qw(sop sohb sosb)],
2053 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
2055 'sct' => [qw(scp schb scsb)],
2056 'stack-closing-tokens' => => [qw(scp schb scsb)],
2057 'nsct' => [qw(nscp nschb nscsb)],
2058 'nostack-closing-tokens' => [qw(nscp nschb nscsb)],
2060 'sac' => [qw(sot sct)],
2061 'nsac' => [qw(nsot nsct)],
2062 'stack-all-containers' => [qw(sot sct)],
2063 'nostack-all-containers' => [qw(nsot nsct)],
2065 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
2066 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
2067 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
2068 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
2069 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
2070 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
2072 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)],
2073 'sobb' => [qw(bbvt=2 bbvtl=*)],
2074 'nostack-opening-block-brace' => [qw(bbvt=0)],
2075 'nsobb' => [qw(bbvt=0)],
2077 'converge' => [qw(it=4)],
2078 'noconverge' => [qw(it=1)],
2079 'conv' => [qw(it=4)],
2080 'nconv' => [qw(it=1)],
2082 # 'mangle' originally deleted pod and comments, but to keep it
2083 # reversible, it no longer does. But if you really want to
2084 # delete them, just use:
2087 # An interesting use for 'mangle' is to do this:
2088 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
2089 # which will form as many one-line blocks as possible
2094 keep-old-blank-lines=0
2096 delete-old-whitespace
2099 maximum-consecutive-blank-lines=0
2100 maximum-line-length=100000
2104 noblanks-before-blocks
2105 blank-lines-before-subs=0
2106 blank-lines-before-packages=0
2111 # 'extrude' originally deleted pod and comments, but to keep it
2112 # reversible, it no longer does. But if you really want to
2113 # delete them, just use
2116 # An interesting use for 'extrude' is to do this:
2117 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
2118 # which will break up all one-line blocks.
2120 # Removed 'check-syntax' option, which is unsafe because it may execute
2121 # code in BEGIN blocks. Example 'Moose/debugger-duck_type.t'.
2127 delete-old-whitespace
2130 maximum-consecutive-blank-lines=0
2131 maximum-line-length=1
2134 noblanks-before-blocks
2135 blank-lines-before-subs=0
2136 blank-lines-before-packages=0
2143 # this style tries to follow the GNU Coding Standards (which do
2144 # not really apply to perl but which are followed by some perl
2148 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
2152 # Style suggested in Damian Conway's Perl Best Practices
2153 'perl-best-practices' => [
2154 qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
2155 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
2158 # Additional styles can be added here
2161 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
2163 # Uncomment next line to dump all expansions for debugging:
2164 # dump_short_names(\%expansion);
2166 \@option_string, \@defaults, \%expansion,
2167 \%option_category, \%option_range
2170 } # end of generate_options
2172 # Memoize process_command_line. Given same @ARGV passed in, return same
2173 # values and same @ARGV back.
2174 # This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds
2175 # up masontidy (https://metacpan.org/module/masontidy)
2177 my %process_command_line_cache;
2179 sub process_command_line {
2182 $perltidyrc_stream, $is_Windows, $Windows_type,
2183 $rpending_complaint, $dump_options_type
2186 my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type;
2188 my $cache_key = join( chr(28), @ARGV );
2189 if ( my $result = $process_command_line_cache{$cache_key} ) {
2190 my ( $argv, @retvals ) = @{$result};
2195 my @retvals = _process_command_line(@_);
2196 $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ]
2197 if $retvals[0]->{'memoize'};
2202 return _process_command_line(@_);
2206 # (note the underscore here)
2207 sub _process_command_line {
2210 $perltidyrc_stream, $is_Windows, $Windows_type,
2211 $rpending_complaint, $dump_options_type
2216 # Save any current Getopt::Long configuration
2217 # and set to Getopt::Long defaults. Use eval to avoid
2218 # breaking old versions of Perl without these routines.
2219 # Previous configuration is reset at the exit of this routine.
2221 eval { $glc = Getopt::Long::Configure() };
2223 eval { Getopt::Long::ConfigDefaults() };
2225 else { $glc = undef }
2228 $roption_string, $rdefaults, $rexpansion,
2229 $roption_category, $roption_range
2230 ) = generate_options();
2232 #---------------------------------------------------------------
2233 # set the defaults by passing the above list through GetOptions
2234 #---------------------------------------------------------------
2239 # do not load the defaults if we are just dumping perltidyrc
2240 unless ( $dump_options_type eq 'perltidyrc' ) {
2241 for my $i ( @{$rdefaults} ) { push @ARGV, "--" . $i }
2243 if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
2245 "Programming Bug reported by 'GetOptions': error in setting default options";
2250 my @raw_options = ();
2251 my $config_file = "";
2252 my $saw_ignore_profile = 0;
2253 my $saw_dump_profile = 0;
2255 #---------------------------------------------------------------
2256 # Take a first look at the command-line parameters. Do as many
2257 # immediate dumps as possible, which can avoid confusion if the
2258 # perltidyrc file has an error.
2259 #---------------------------------------------------------------
2260 foreach my $i (@ARGV) {
2263 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
2264 $saw_ignore_profile = 1;
2267 # note: this must come before -pro and -profile, below:
2268 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
2269 $saw_dump_profile = 1;
2271 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
2274 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
2278 # resolve <dir>/.../<file>, meaning look upwards from directory
2279 if ( defined($config_file) ) {
2280 if ( my ( $start_dir, $search_file ) =
2281 ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
2283 $start_dir = '.' if !$start_dir;
2284 $start_dir = Cwd::realpath($start_dir);
2285 if ( my $found_file =
2286 find_file_upwards( $start_dir, $search_file ) )
2288 $config_file = $found_file;
2292 unless ( -e $config_file ) {
2293 Warn "cannot find file given with -pro=$config_file: $!\n";
2297 elsif ( $i =~ /^-(pro|profile)=?$/ ) {
2298 Die "usage: -pro=filename or --profile=filename, no spaces\n";
2300 elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
2304 elsif ( $i =~ /^-(version|v)$/ ) {
2308 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
2309 dump_defaults( @{$rdefaults} );
2312 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
2313 dump_long_names( @{$roption_string} );
2316 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
2317 dump_short_names($rexpansion);
2320 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
2321 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
2326 if ( $saw_dump_profile && $saw_ignore_profile ) {
2327 Warn "No profile to dump because of -npro\n";
2331 #---------------------------------------------------------------
2332 # read any .perltidyrc configuration file
2333 #---------------------------------------------------------------
2334 unless ($saw_ignore_profile) {
2336 # resolve possible conflict between $perltidyrc_stream passed
2337 # as call parameter to perltidy and -pro=filename on command
2339 if ($perltidyrc_stream) {
2342 Conflict: a perltidyrc configuration file was specified both as this
2343 perltidy call parameter: $perltidyrc_stream
2344 and with this -profile=$config_file.
2345 Using -profile=$config_file.
2349 $config_file = $perltidyrc_stream;
2353 # look for a config file if we don't have one yet
2354 my $rconfig_file_chatter;
2355 ${$rconfig_file_chatter} = "";
2357 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2358 $rpending_complaint )
2359 unless $config_file;
2361 # open any config file
2364 ( $fh_config, $config_file ) =
2365 Perl::Tidy::streamhandle( $config_file, 'r' );
2366 unless ($fh_config) {
2367 ${$rconfig_file_chatter} .=
2368 "# $config_file exists but cannot be opened\n";
2372 if ($saw_dump_profile) {
2373 dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
2379 my ( $rconfig_list, $death_message ) =
2380 read_config_file( $fh_config, $config_file, $rexpansion );
2381 Die $death_message if ($death_message);
2383 # process any .perltidyrc parameters right now so we can
2385 if ( @{$rconfig_list} ) {
2386 local @ARGV = @{$rconfig_list};
2388 expand_command_abbreviations( $rexpansion, \@raw_options,
2391 if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
2393 "Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
2396 # Anything left in this local @ARGV is an error and must be
2397 # invalid bare words from the configuration file. We cannot
2398 # check this earlier because bare words may have been valid
2399 # values for parameters. We had to wait for GetOptions to have
2403 my $str = "\'" . pop(@ARGV) . "\'";
2404 while ( my $param = pop(@ARGV) ) {
2405 if ( length($str) < 70 ) {
2406 $str .= ", '$param'";
2414 There are $count unrecognized values in the configuration file '$config_file':
2416 Use leading dashes for parameters. Use -npro to ignore this file.
2420 # Undo any options which cause premature exit. They are not
2421 # appropriate for a config file, and it could be hard to
2422 # diagnose the cause of the premature exit.
2425 dump-cuddled-block-list
2432 dump-want-left-space
2433 dump-want-right-space
2441 if ( defined( $Opts{$_} ) ) {
2443 Warn "ignoring --$_ in config file: $config_file\n";
2450 #---------------------------------------------------------------
2451 # now process the command line parameters
2452 #---------------------------------------------------------------
2453 expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2455 local $SIG{'__WARN__'} = sub { Warn $_[0] };
2456 if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
2457 Die "Error on command line; for help try 'perltidy -h'\n";
2460 # reset Getopt::Long configuration back to its previous value
2461 eval { Getopt::Long::Configure($glc) } if defined $glc;
2463 return ( \%Opts, $config_file, \@raw_options, $roption_string,
2464 $rexpansion, $roption_category, $roption_range );
2465 } # end of _process_command_line
2469 my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2471 #---------------------------------------------------------------
2472 # check and handle any interactions among the basic options..
2473 #---------------------------------------------------------------
2475 # Since -vt, -vtc, and -cti are abbreviations, but under
2476 # msdos, an unquoted input parameter like vtc=1 will be
2477 # seen as 2 parameters, vtc and 1, so the abbreviations
2478 # won't be seen. Therefore, we will catch them here if
2481 if ( defined $rOpts->{'vertical-tightness'} ) {
2482 my $vt = $rOpts->{'vertical-tightness'};
2483 $rOpts->{'paren-vertical-tightness'} = $vt;
2484 $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2485 $rOpts->{'brace-vertical-tightness'} = $vt;
2488 if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2489 my $vtc = $rOpts->{'vertical-tightness-closing'};
2490 $rOpts->{'paren-vertical-tightness-closing'} = $vtc;
2491 $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2492 $rOpts->{'brace-vertical-tightness-closing'} = $vtc;
2495 if ( defined $rOpts->{'closing-token-indentation'} ) {
2496 my $cti = $rOpts->{'closing-token-indentation'};
2497 $rOpts->{'closing-square-bracket-indentation'} = $cti;
2498 $rOpts->{'closing-brace-indentation'} = $cti;
2499 $rOpts->{'closing-paren-indentation'} = $cti;
2502 # In quiet mode, there is no log file and hence no way to report
2503 # results of syntax check, so don't do it.
2504 if ( $rOpts->{'quiet'} ) {
2505 $rOpts->{'check-syntax'} = 0;
2508 # can't check syntax if no output
2509 if ( $rOpts->{'format'} ne 'tidy' ) {
2510 $rOpts->{'check-syntax'} = 0;
2513 # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2514 # wide variety of nasty problems on these systems, because they cannot
2515 # reliably run backticks. Don't even think about changing this!
2516 if ( $rOpts->{'check-syntax'}
2518 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2520 $rOpts->{'check-syntax'} = 0;
2523 # Added Dec 2017: Deactivating check-syntax for all systems for safety
2524 # because unexpected results can occur when code in BEGIN blocks is
2525 # executed. This flag was included to help check for perltidy mistakes,
2526 # and may still be useful for debugging. To activate for testing comment
2527 # out the next three lines.
2529 $rOpts->{'check-syntax'} = 0;
2532 # It's really a bad idea to check syntax as root unless you wrote
2533 # the script yourself. FIXME: not sure if this works with VMS
2534 unless ($is_Windows) {
2536 if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2537 $rOpts->{'check-syntax'} = 0;
2538 ${$rpending_complaint} .=
2539 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2543 # check iteration count and quietly fix if necessary:
2544 # - iterations option only applies to code beautification mode
2545 # - the convergence check should stop most runs on iteration 2, and
2546 # virtually all on iteration 3. But we'll allow up to 6.
2547 if ( $rOpts->{'format'} ne 'tidy' ) {
2548 $rOpts->{'iterations'} = 1;
2550 elsif ( defined( $rOpts->{'iterations'} ) ) {
2551 if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2552 elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 }
2555 $rOpts->{'iterations'} = 1;
2558 my $check_blank_count = sub {
2559 my ( $key, $abbrev ) = @_;
2560 if ( $rOpts->{$key} ) {
2561 if ( $rOpts->{$key} < 0 ) {
2563 Warn "negative value of $abbrev, setting 0\n";
2565 if ( $rOpts->{$key} > 100 ) {
2566 Warn "unreasonably large value of $abbrev, reducing\n";
2567 $rOpts->{$key} = 100;
2572 # check for reasonable number of blank lines and fix to avoid problems
2573 $check_blank_count->( 'blank-lines-before-subs', '-blbs' );
2574 $check_blank_count->( 'blank-lines-before-packages', '-blbp' );
2575 $check_blank_count->( 'blank-lines-after-block-opening', '-blao' );
2576 $check_blank_count->( 'blank-lines-before-block-closing', '-blbc' );
2578 # setting a non-negative logfile gap causes logfile to be saved
2579 if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2580 $rOpts->{'logfile'} = 1;
2583 # set short-cut flag when only indentation is to be done.
2584 # Note that the user may or may not have already set the
2586 if ( !$rOpts->{'add-whitespace'}
2587 && !$rOpts->{'delete-old-whitespace'}
2588 && !$rOpts->{'add-newlines'}
2589 && !$rOpts->{'delete-old-newlines'} )
2591 $rOpts->{'indent-only'} = 1;
2594 # -isbc implies -ibc
2595 if ( $rOpts->{'indent-spaced-block-comments'} ) {
2596 $rOpts->{'indent-block-comments'} = 1;
2599 # -bli flag implies -bl
2600 if ( $rOpts->{'brace-left-and-indent'} ) {
2601 $rOpts->{'opening-brace-on-new-line'} = 1;
2604 if ( $rOpts->{'opening-brace-always-on-right'}
2605 && $rOpts->{'opening-brace-on-new-line'} )
2608 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2609 'opening-brace-on-new-line' (-bl). Ignoring -bl.
2611 $rOpts->{'opening-brace-on-new-line'} = 0;
2614 # it simplifies things if -bl is 0 rather than undefined
2615 if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2616 $rOpts->{'opening-brace-on-new-line'} = 0;
2619 # -sbl defaults to -bl if not defined
2620 if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2621 $rOpts->{'opening-sub-brace-on-new-line'} =
2622 $rOpts->{'opening-brace-on-new-line'};
2625 if ( $rOpts->{'entab-leading-whitespace'} ) {
2626 if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2627 Warn "-et=n must use a positive integer; ignoring -et\n";
2628 $rOpts->{'entab-leading-whitespace'} = undef;
2631 # entab leading whitespace has priority over the older 'tabs' option
2632 if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2635 # set a default tabsize to be used in guessing the starting indentation
2636 # level if and only if this run does not use tabs and the old code does
2638 if ( $rOpts->{'default-tabsize'} ) {
2639 if ( $rOpts->{'default-tabsize'} < 0 ) {
2640 Warn "negative value of -dt, setting 0\n";
2641 $rOpts->{'default-tabsize'} = 0;
2643 if ( $rOpts->{'default-tabsize'} > 20 ) {
2644 Warn "unreasonably large value of -dt, reducing\n";
2645 $rOpts->{'default-tabsize'} = 20;
2649 $rOpts->{'default-tabsize'} = 8;
2652 # Define $tabsize, the number of spaces per tab for use in
2653 # guessing the indentation of source lines with leading tabs.
2654 # Assume same as for this run if tabs are used , otherwise assume
2655 # a default value, typically 8
2657 $rOpts->{'entab-leading-whitespace'}
2658 ? $rOpts->{'entab-leading-whitespace'}
2659 : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
2660 : $rOpts->{'default-tabsize'};
2664 sub find_file_upwards {
2665 my ( $search_dir, $search_file ) = @_;
2667 $search_dir =~ s{/+$}{};
2668 $search_file =~ s{^/+}{};
2671 my $try_path = "$search_dir/$search_file";
2672 if ( -f $try_path ) {
2675 elsif ( $search_dir eq '/' ) {
2679 $search_dir = dirname($search_dir);
2684 sub expand_command_abbreviations {
2686 # go through @ARGV and expand any abbreviations
2688 my ( $rexpansion, $rraw_options, $config_file ) = @_;
2690 # set a pass limit to prevent an infinite loop;
2691 # 10 should be plenty, but it may be increased to allow deeply
2692 # nested expansions.
2693 my $max_passes = 10;
2696 # keep looping until all expansions have been converted into actual
2698 foreach my $pass_count ( 0 .. $max_passes ) {
2700 my $abbrev_count = 0;
2702 # loop over each item in @ARGV..
2703 foreach my $word (@ARGV) {
2705 # convert any leading 'no-' to just 'no'
2706 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2708 # if it is a dash flag (instead of a file name)..
2709 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2714 # save the raw input for debug output in case of circular refs
2715 if ( $pass_count == 0 ) {
2716 push( @{$rraw_options}, $word );
2719 # recombine abbreviation and flag, if necessary,
2720 # to allow abbreviations with arguments such as '-vt=1'
2721 if ( $rexpansion->{ $abr . $flags } ) {
2722 $abr = $abr . $flags;
2726 # if we see this dash item in the expansion hash..
2727 if ( $rexpansion->{$abr} ) {
2730 # stuff all of the words that it expands to into the
2731 # new arg list for the next pass
2732 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2733 next unless $abbrev; # for safety; shouldn't happen
2734 push( @new_argv, '--' . $abbrev . $flags );
2738 # not in expansion hash, must be actual long name
2740 push( @new_argv, $word );
2744 # not a dash item, so just save it for the next pass
2746 push( @new_argv, $word );
2748 } # end of this pass
2750 # update parameter list @ARGV to the new one
2752 last unless ( $abbrev_count > 0 );
2754 # make sure we are not in an infinite loop
2755 if ( $pass_count == $max_passes ) {
2758 I'm tired. We seem to be in an infinite loop trying to expand aliases.
2759 Here are the raw options;
2762 my $num = @new_argv;
2765 After $max_passes passes here is ARGV
2771 After $max_passes passes ARGV has $num entries
2777 Please check your configuration file $config_file for circular-references.
2778 To deactivate it, use -npro.
2783 Program bug - circular-references in the %expansion hash, probably due to
2784 a recent program change.
2787 } # end of check for circular references
2788 } # end of loop over all passes
2792 # Debug routine -- this will dump the expansion hash
2793 sub dump_short_names {
2794 my $rexpansion = shift;
2796 List of short names. This list shows how all abbreviations are
2797 translated into other abbreviations and, eventually, into long names.
2798 New abbreviations may be defined in a .perltidyrc file.
2799 For a list of all long names, use perltidy --dump-long-names (-dln).
2800 --------------------------------------------------------------------------
2802 foreach my $abbrev ( sort keys %$rexpansion ) {
2803 my @list = @{ $rexpansion->{$abbrev} };
2804 print STDOUT "$abbrev --> @list\n";
2809 sub check_vms_filename {
2811 # given a valid filename (the perltidy input file)
2812 # create a modified filename and separator character
2815 # Contributed by Michael Cartmell
2817 my $filename = shift;
2818 my ( $base, $path ) = fileparse($filename);
2820 # remove explicit ; version
2821 $base =~ s/;-?\d*$//
2823 # remove explicit . version ie two dots in filename NB ^ escapes a dot
2824 or $base =~ s/( # begin capture $1
2825 (?:^|[^^])\. # match a dot not preceded by a caret
2826 (?: # followed by nothing
2828 .*[^^] # anything ending in a non caret
2831 \.-?\d*$ # match . version number
2834 # normalise filename, if there are no unescaped dots then append one
2835 $base .= '.' unless $base =~ /(?:^|[^^])\./;
2837 # if we don't already have an extension then we just append the extension
2838 my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2839 return ( $path . $base, $separator );
2844 # TODO: are these more standard names?
2845 # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2847 # Returns a string that determines what MS OS we are on.
2848 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2849 # Returns blank string if not an MS system.
2850 # Original code contributed by: Yves Orton
2851 # We need to know this to decide where to look for config files
2853 my $rpending_complaint = shift;
2855 return $os unless $^O =~ /win32|dos/i; # is it a MS box?
2857 # Systems built from Perl source may not have Win32.pm
2858 # But probably have Win32::GetOSVersion() anyway so the
2859 # following line is not 'required':
2860 # return $os unless eval('require Win32');
2862 # Use the standard API call to determine the version
2863 my ( $undef, $major, $minor, $build, $id );
2864 eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2867 # NAME ID MAJOR MINOR
2868 # Windows NT 4 2 4 0
2869 # Windows 2000 2 5 0
2871 # Windows Server 2003 2 5 2
2873 return "win32s" unless $id; # If id==0 then its a win32s box.
2874 $os = { # Magic numbers from MSDN
2875 # documentation of GetOSVersion
2882 0 => "2000", # or NT 4, see below
2889 # If $os is undefined, the above code is out of date. Suggested updates
2891 unless ( defined $os ) {
2893 ${$rpending_complaint} .= <<EOS;
2894 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2895 We won't be able to look for a system-wide config file.
2899 # Unfortunately the logic used for the various versions isn't so clever..
2900 # so we have to handle an outside case.
2901 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2906 ( $^O !~ /win32|dos/i )
2909 && ( $^O ne 'MacOS' );
2912 sub look_for_Windows {
2914 # determine Windows sub-type and location of
2915 # system-wide configuration files
2916 my $rpending_complaint = shift;
2917 my $is_Windows = ( $^O =~ /win32|dos/i );
2919 $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
2920 return ( $is_Windows, $Windows_type );
2923 sub find_config_file {
2925 # look for a .perltidyrc configuration file
2926 # For Windows also look for a file named perltidy.ini
2927 my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2928 $rpending_complaint ) = @_;
2930 ${$rconfig_file_chatter} .= "# Config file search...system reported as:";
2932 ${$rconfig_file_chatter} .= "Windows $Windows_type\n";
2935 ${$rconfig_file_chatter} .= " $^O\n";
2938 # sub to check file existence and record all tests
2939 my $exists_config_file = sub {
2940 my $config_file = shift;
2941 return 0 unless $config_file;
2942 ${$rconfig_file_chatter} .= "# Testing: $config_file\n";
2943 return -f $config_file;
2946 # Sub to search upward for config file
2947 my $resolve_config_file = sub {
2949 # resolve <dir>/.../<file>, meaning look upwards from directory
2950 my $config_file = shift;
2952 if ( my ( $start_dir, $search_file ) =
2953 ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
2955 ${$rconfig_file_chatter} .=
2956 "# Searching Upward: $config_file\n";
2957 $start_dir = '.' if !$start_dir;
2958 $start_dir = Cwd::realpath($start_dir);
2959 if ( my $found_file =
2960 find_file_upwards( $start_dir, $search_file ) )
2962 $config_file = $found_file;
2963 ${$rconfig_file_chatter} .= "# Found: $config_file\n";
2967 return $config_file;
2972 # look in current directory first
2973 $config_file = ".perltidyrc";
2974 return $config_file if $exists_config_file->($config_file);
2976 $config_file = "perltidy.ini";
2977 return $config_file if $exists_config_file->($config_file);
2980 # Default environment vars.
2981 my @envs = qw(PERLTIDY HOME);
2983 # Check the NT/2k/XP locations, first a local machine def, then a
2985 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2987 # Now go through the environment ...
2988 foreach my $var (@envs) {
2989 ${$rconfig_file_chatter} .= "# Examining: \$ENV{$var}";
2990 if ( defined( $ENV{$var} ) ) {
2991 ${$rconfig_file_chatter} .= " = $ENV{$var}\n";
2993 # test ENV{ PERLTIDY } as file:
2994 if ( $var eq 'PERLTIDY' ) {
2995 $config_file = "$ENV{$var}";
2996 $config_file = $resolve_config_file->($config_file);
2997 return $config_file if $exists_config_file->($config_file);
3000 # test ENV as directory:
3001 $config_file = catfile( $ENV{$var}, ".perltidyrc" );
3002 $config_file = $resolve_config_file->($config_file);
3003 return $config_file if $exists_config_file->($config_file);
3006 $config_file = catfile( $ENV{$var}, "perltidy.ini" );
3007 $config_file = $resolve_config_file->($config_file);
3008 return $config_file if $exists_config_file->($config_file);
3012 ${$rconfig_file_chatter} .= "\n";
3016 # then look for a system-wide definition
3017 # where to look varies with OS
3020 if ($Windows_type) {
3021 my ( $os, $system, $allusers ) =
3022 Win_Config_Locs( $rpending_complaint, $Windows_type );
3024 # Check All Users directory, if there is one.
3025 # i.e. C:\Documents and Settings\User\perltidy.ini
3028 $config_file = catfile( $allusers, ".perltidyrc" );
3029 return $config_file if $exists_config_file->($config_file);
3031 $config_file = catfile( $allusers, "perltidy.ini" );
3032 return $config_file if $exists_config_file->($config_file);
3035 # Check system directory.
3036 # retain old code in case someone has been able to create
3037 # a file with a leading period.
3038 $config_file = catfile( $system, ".perltidyrc" );
3039 return $config_file if $exists_config_file->($config_file);
3041 $config_file = catfile( $system, "perltidy.ini" );
3042 return $config_file if $exists_config_file->($config_file);
3046 # Place to add customization code for other systems
3047 elsif ( $^O eq 'OS2' ) {
3049 elsif ( $^O eq 'MacOS' ) {
3051 elsif ( $^O eq 'VMS' ) {
3054 # Assume some kind of Unix
3057 $config_file = "/usr/local/etc/perltidyrc";
3058 return $config_file if $exists_config_file->($config_file);
3060 $config_file = "/etc/perltidyrc";
3061 return $config_file if $exists_config_file->($config_file);
3064 # Couldn't find a config file
3068 sub Win_Config_Locs {
3070 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
3071 # or undef if its not a win32 OS. In list context returns OS, System
3072 # Directory, and All Users Directory. All Users will be empty on a
3073 # 9x/Me box. Contributed by: Yves Orton.
3075 # my ( $rpending_complaint, $os ) = @_;
3076 # if ( !$os ) { $os = Win_OS_Type(); }
3078 my $rpending_complaint = shift;
3079 my $os = (@_) ? shift : Win_OS_Type();
3085 if ( $os =~ /9[58]|Me/ ) {
3086 $system = "C:/Windows";
3088 elsif ( $os =~ /NT|XP|200?/ ) {
3089 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
3092 ? "C:/WinNT/profiles/All Users/"
3093 : "C:/Documents and Settings/All Users/";
3097 # This currently would only happen on a win32s computer. I don't have
3098 # one to test, so I am unsure how to proceed. Suggestions welcome!
3099 ${$rpending_complaint} .=
3100 "I dont know a sensible place to look for config files on an $os system.\n";
3103 return wantarray ? ( $os, $system, $allusers ) : $os;
3106 sub dump_config_file {
3107 my ( $fh, $config_file, $rconfig_file_chatter ) = @_;
3108 print STDOUT "$$rconfig_file_chatter";
3110 print STDOUT "# Dump of file: '$config_file'\n";
3111 while ( my $line = $fh->getline() ) { print STDOUT $line }
3112 eval { $fh->close() };
3115 print STDOUT "# ...no config file found\n";
3120 sub read_config_file {
3122 my ( $fh, $config_file, $rexpansion ) = @_;
3123 my @config_list = ();
3125 # file is bad if non-empty $death_message is returned
3126 my $death_message = "";
3130 my $opening_brace_line;
3131 while ( my $line = $fh->getline() ) {
3134 ( $line, $death_message ) =
3135 strip_comment( $line, $config_file, $line_no );
3136 last if ($death_message);
3138 $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
3143 # Look for complete or partial abbreviation definition of the form
3144 # name { body } or name { or name { body
3145 # See rules in perltidy's perldoc page
3146 # Section: Other Controls - Creating a new abbreviation
3147 if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) {
3148 my $oldname = $name;
3149 ( $name, $body ) = ( $2, $3 );
3151 # Cannot start new abbreviation unless old abbreviation is complete
3152 last if ($opening_brace_line);
3154 $opening_brace_line = $line_no unless ( $body && $body =~ s/\}$// );
3156 # handle a new alias definition
3157 if ( ${$rexpansion}{$name} ) {
3159 my @names = sort keys %$rexpansion;
3161 "Here is a list of all installed aliases\n(@names)\n"
3162 . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
3165 ${$rexpansion}{$name} = [];
3168 # leading opening braces not allowed
3169 elsif ( $line =~ /^{/ ) {
3170 $opening_brace_line = undef;
3172 "Unexpected '{' at line $line_no in config file '$config_file'\n";
3176 # Look for abbreviation closing: body } or }
3177 elsif ( $line =~ /^(.*)?\}$/ ) {
3179 if ($opening_brace_line) {
3180 $opening_brace_line = undef;
3184 "Unexpected '}' at line $line_no in config file '$config_file'\n";
3189 # Now store any parameters
3192 my ( $rbody_parts, $msg ) = parse_args($body);
3194 $death_message = <<EOM;
3195 Error reading file '$config_file' at line number $line_no.
3197 Please fix this line or use -npro to avoid reading this file
3204 # remove leading dashes if this is an alias
3205 foreach ( @{$rbody_parts} ) { s/^\-+//; }
3206 push @{ ${$rexpansion}{$name} }, @{$rbody_parts};
3209 push( @config_list, @{$rbody_parts} );
3214 if ($opening_brace_line) {
3216 "Didn't see a '}' to match the '{' at line $opening_brace_line in config file '$config_file'\n";
3218 eval { $fh->close() };
3219 return ( \@config_list, $death_message );
3224 # Strip any comment from a command line
3225 my ( $instr, $config_file, $line_no ) = @_;
3228 # check for full-line comment
3229 if ( $instr =~ /^\s*#/ ) {
3230 return ( "", $msg );
3233 # nothing to do if no comments
3234 if ( $instr !~ /#/ ) {
3235 return ( $instr, $msg );
3238 # handle case of no quotes
3239 elsif ( $instr !~ /['"]/ ) {
3241 # We now require a space before the # of a side comment
3242 # this allows something like:
3244 # Otherwise, it would have to be quoted:
3246 $instr =~ s/\s+\#.*$//;
3247 return ( $instr, $msg );
3250 # handle comments and quotes
3252 my $quote_char = "";
3255 # looking for ending quote character
3257 if ( $instr =~ /\G($quote_char)/gc ) {
3261 elsif ( $instr =~ /\G(.)/gc ) {
3265 # error..we reached the end without seeing the ending quote char
3268 Error reading file $config_file at line number $line_no.
3269 Did not see ending quote character <$quote_char> in this text:
3271 Please fix this line or use -npro to avoid reading this file
3277 # accumulating characters and looking for start of a quoted string
3279 if ( $instr =~ /\G([\"\'])/gc ) {
3284 # Note: not yet enforcing the space-before-hash rule for side
3285 # comments if the parameter is quoted.
3286 elsif ( $instr =~ /\G#/gc ) {
3289 elsif ( $instr =~ /\G(.)/gc ) {
3297 return ( $outstr, $msg );
3302 # Parse a command string containing multiple string with possible
3303 # quotes, into individual commands. It might look like this, for example:
3305 # -wba=" + - " -some-thing -wbb='. && ||'
3307 # There is no need, at present, to handle escaped quote characters.
3308 # (They are not perltidy tokens, so needn't be in strings).
3311 my @body_parts = ();
3312 my $quote_char = "";
3317 # looking for ending quote character
3319 if ( $body =~ /\G($quote_char)/gc ) {
3322 elsif ( $body =~ /\G(.)/gc ) {
3326 # error..we reached the end without seeing the ending quote char
3328 if ( length($part) ) { push @body_parts, $part; }
3330 Did not see ending quote character <$quote_char> in this text:
3337 # accumulating characters and looking for start of a quoted string
3339 if ( $body =~ /\G([\"\'])/gc ) {
3342 elsif ( $body =~ /\G(\s+)/gc ) {
3343 if ( length($part) ) { push @body_parts, $part; }
3346 elsif ( $body =~ /\G(.)/gc ) {
3350 if ( length($part) ) { push @body_parts, $part; }
3355 return ( \@body_parts, $msg );
3358 sub dump_long_names {
3362 # Command line long names (passed to GetOptions)
3363 #---------------------------------------------------------------
3364 # here is a summary of the Getopt codes:
3365 # <none> does not take an argument
3366 # =s takes a mandatory string
3367 # :s takes an optional string
3368 # =i takes a mandatory integer
3369 # :i takes an optional integer
3370 # ! does not take an argument and may be negated
3371 # i.e., -foo and -nofoo are allowed
3372 # a double dash signals the end of the options list
3374 #---------------------------------------------------------------
3377 foreach my $name ( sort @names ) { print STDOUT "$name\n" }
3383 print STDOUT "Default command line options:\n";
3384 foreach my $line ( sort @defaults ) { print STDOUT "$line\n" }
3388 sub readable_options {
3390 # return options for this run as a string which could be
3391 # put in a perltidyrc file
3392 my ( $rOpts, $roption_string ) = @_;
3394 my $rGetopt_flags = \%Getopt_flags;
3395 my $readable_options = "# Final parameter set for this run.\n";
3396 $readable_options .=
3397 "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
3398 foreach my $opt ( @{$roption_string} ) {
3400 if ( $opt =~ /(.*)(!|=.*)$/ ) {
3404 if ( defined( $rOpts->{$opt} ) ) {
3405 $rGetopt_flags->{$opt} = $flag;
3408 foreach my $key ( sort keys %{$rOpts} ) {
3409 my $flag = $rGetopt_flags->{$key};
3410 my $value = $rOpts->{$key};
3414 if ( $flag =~ /^=/ ) {
3415 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
3416 $suffix = "=" . $value;
3418 elsif ( $flag =~ /^!/ ) {
3419 $prefix .= "no" unless ($value);
3424 $readable_options .=
3425 "# ERROR in dump_options: unrecognized flag $flag for $key\n";
3428 $readable_options .= $prefix . $key . $suffix . "\n";
3430 return $readable_options;
3434 print STDOUT <<"EOM";
3435 This is perltidy, v$VERSION
3437 Copyright 2000-2018, Steve Hancock
3439 Perltidy is free software and may be copied under the terms of the GNU
3440 General Public License, which is included in the distribution files.
3442 Complete documentation for perltidy can be found using 'man perltidy'
3443 or on the internet at http://perltidy.sourceforge.net.
3451 This is perltidy version $VERSION, a perl script indenter. Usage:
3453 perltidy [ options ] file1 file2 file3 ...
3454 (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
3455 perltidy [ options ] file1 -o outfile
3456 perltidy [ options ] file1 -st >outfile
3457 perltidy [ options ] <infile >outfile
3459 Options have short and long forms. Short forms are shown; see
3460 man pages for long forms. Note: '=s' indicates a required string,
3461 and '=n' indicates a required integer.
3465 -o=file name of the output file (only if single input file)
3466 -oext=s change output extension from 'tdy' to s
3467 -opath=path change path to be 'path' for output files
3468 -b backup original to .bak and modify file in-place
3469 -bext=s change default backup extension from 'bak' to s
3470 -q deactivate error messages (for running under editor)
3471 -w include non-critical warning messages in the .ERR error output
3472 -syn run perl -c to check syntax (default under unix systems)
3473 -log save .LOG file, which has useful diagnostics
3474 -f force perltidy to read a binary file
3475 -g like -log but writes more detailed .LOG file, for debugging scripts
3476 -opt write the set of options actually used to a .LOG file
3477 -npro ignore .perltidyrc configuration command file
3478 -pro=file read configuration commands from file instead of .perltidyrc
3479 -st send output to standard output, STDOUT
3480 -se send all error output to standard error output, STDERR
3481 -v display version number to standard output and quit
3484 -i=n use n columns per indentation level (default n=4)
3485 -t tabs: use one tab character per indentation level, not recommeded
3486 -nt no tabs: use n spaces per indentation level (default)
3487 -et=n entab leading whitespace n spaces per tab; not recommended
3488 -io "indent only": just do indentation, no other formatting.
3489 -sil=n set starting indentation level to n; use if auto detection fails
3490 -ole=s specify output line ending (s=dos or win, mac, unix)
3491 -ple keep output line endings same as input (input must be filename)
3494 -fws freeze whitespace; this disables all whitespace changes
3495 and disables the following switches:
3496 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)
3497 -bbt same as -bt but for code block braces; same as -bt if not given
3498 -bbvt block braces vertically tight; use with -bl or -bli
3499 -bbvtl=s make -bbvt to apply to selected list of block types
3500 -pt=n paren tightness (n=0, 1 or 2)
3501 -sbt=n square bracket tightness (n=0, 1, or 2)
3502 -bvt=n brace vertical tightness,
3503 n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3504 -pvt=n paren vertical tightness (see -bvt for n)
3505 -sbvt=n square bracket vertical tightness (see -bvt for n)
3506 -bvtc=n closing brace vertical tightness:
3507 n=(0=open, 1=sometimes close, 2=always close)
3508 -pvtc=n closing paren vertical tightness, see -bvtc for n.
3509 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3510 -ci=n sets continuation indentation=n, default is n=2 spaces
3511 -lp line up parentheses, brackets, and non-BLOCK braces
3512 -sfs add space before semicolon in for( ; ; )
3513 -aws allow perltidy to add whitespace (default)
3514 -dws delete all old non-essential whitespace
3515 -icb indent closing brace of a code block
3516 -cti=n closing indentation of paren, square bracket, or non-block brace:
3517 n=0 none, =1 align with opening, =2 one full indentation level
3518 -icp equivalent to -cti=2
3519 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /'
3520 -wrs=s want space right of tokens in string;
3521 -sts put space before terminal semicolon of a statement
3522 -sak=s put space between keywords given in s and '(';
3523 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3526 -fnl freeze newlines; this disables all line break changes
3527 and disables the following switches:
3528 -anl add newlines; ok to introduce new line breaks
3529 -bbs add blank line before subs and packages
3530 -bbc add blank line before block comments
3531 -bbb add blank line between major blocks
3532 -kbl=n keep old blank lines? 0=no, 1=some, 2=all
3533 -mbl=n maximum consecutive blank lines to output (default=1)
3534 -ce cuddled else; use this style: '} else {'
3535 -cb cuddled blocks (other than 'if-elsif-else')
3536 -cbl=s list of blocks to cuddled, default 'try-catch-finally'
3537 -dnl delete old newlines (default)
3538 -l=n maximum line length; default n=80
3539 -bl opening brace on new line
3540 -sbl opening sub brace on new line. value of -bl is used if not given.
3541 -bli opening brace on new line and indented
3542 -bar opening brace always on right, even for long clauses
3543 -vt=n vertical tightness (requires -lp); n controls break after opening
3544 token: 0=never 1=no break if next line balanced 2=no break
3545 -vtc=n vertical tightness of closing container; n controls if closing
3546 token starts new line: 0=always 1=not unless list 1=never
3547 -wba=s want break after tokens in string; i.e. wba=': .'
3548 -wbb=s want break before tokens in string
3549 -wn weld nested: combines opening and closing tokens when both are adjacent
3551 Following Old Breakpoints
3552 -kis keep interior semicolons. Allows multiple statements per line.
3553 -boc break at old comma breaks: turns off all automatic list formatting
3554 -bol break at old logical breakpoints: or, and, ||, && (default)
3555 -bok break at old list keyword breakpoints such as map, sort (default)
3556 -bot break at old conditional (ternary ?:) operator breakpoints (default)
3557 -boa break at old attribute breakpoints
3558 -cab=n break at commas after a comma-arrow (=>):
3559 n=0 break at all commas after =>
3560 n=1 stable: break unless this breaks an existing one-line container
3561 n=2 break only if a one-line container cannot be formed
3562 n=3 do not treat commas after => specially at all
3565 -ibc indent block comments (default)
3566 -isbc indent spaced block comments; may indent unless no leading space
3567 -msc=n minimum desired spaces to side comment, default 4
3568 -fpsc=n fix position for side comments; default 0;
3569 -csc add or update closing side comments after closing BLOCK brace
3570 -dcsc delete closing side comments created by a -csc command
3571 -cscp=s change closing side comment prefix to be other than '## end'
3572 -cscl=s change closing side comment to apply to selected list of blocks
3573 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3574 -csct=n maximum number of columns of appended text, default n=20
3575 -cscw causes warning if old side comment is overwritten with -csc
3577 -sbc use 'static block comments' identified by leading '##' (default)
3578 -sbcp=s change static block comment identifier to be other than '##'
3579 -osbc outdent static block comments
3581 -ssc use 'static side comments' identified by leading '##' (default)
3582 -sscp=s change static side comment identifier to be other than '##'
3584 Delete selected text
3585 -dac delete all comments AND pod
3586 -dbc delete block comments
3587 -dsc delete side comments
3590 Send selected text to a '.TEE' file
3591 -tac tee all comments AND pod
3592 -tbc tee block comments
3593 -tsc tee side comments
3597 -olq outdent long quoted strings (default)
3598 -olc outdent a long block comment line
3599 -ola outdent statement labels
3600 -okw outdent control keywords (redo, next, last, goto, return)
3601 -okwl=s specify alternative keywords for -okw command
3604 -mft=n maximum fields per table; default n=40
3605 -x do not format lines before hash-bang line (i.e., for VMS)
3606 -asc allows perltidy to add a ';' when missing (default)
3607 -dsm allows perltidy to delete an unnecessary ';' (default)
3609 Combinations of other parameters
3610 -gnu attempt to follow GNU Coding Standards as applied to perl
3611 -mangle remove as many newlines as possible (but keep comments and pods)
3612 -extrude insert as many newlines as possible
3614 Dump and die, debugging
3615 -dop dump options used in this run to standard output and quit
3616 -ddf dump default options to standard output and quit
3617 -dsn dump all option short names to standard output and quit
3618 -dln dump option long names to standard output and quit
3619 -dpro dump whatever configuration file is in effect to standard output
3620 -dtt dump all token types to standard output and quit
3623 -html write an html file (see 'man perl2web' for many options)
3624 Note: when -html is used, no indentation or formatting are done.
3625 Hint: try perltidy -html -css=mystyle.css filename.pl
3626 and edit mystyle.css to change the appearance of filename.html.
3627 -nnn gives line numbers
3628 -pre only writes out <pre>..</pre> code section
3629 -toc places a table of contents to subs at the top (default)
3630 -pod passes pod text through pod2html (default)
3631 -frm write html as a frame (3 files)
3632 -text=s extra extension for table of contents if -frm, default='toc'
3633 -sext=s extra extension for file content if -frm, default='src'
3635 A prefix of "n" negates short form toggle switches, and a prefix of "no"
3636 negates the long forms. For example, -nasc means don't add missing
3639 If you are unable to see this entire text, try "perltidy -h | more"
3640 For more detailed information, and additional options, try "man perltidy",
3641 or go to the perltidy home page at http://perltidy.sourceforge.net
3647 sub process_this_file {
3649 my ( $tokenizer, $formatter ) = @_;
3651 while ( my $line = $tokenizer->get_line() ) {
3652 $formatter->write_line($line);
3654 my $severe_error = $tokenizer->report_tokenization_errors();
3655 eval { $formatter->finish_formatting($severe_error) };
3662 # Use 'perl -c' to make sure that we did not create bad syntax
3663 # This is a very good independent check for programming errors
3665 # Given names of the input and output files, ($istream, $ostream),
3666 # we do the following:
3667 # - check syntax of the input file
3668 # - if bad, all done (could be an incomplete code snippet)
3669 # - if infile syntax ok, then check syntax of the output file;
3670 # - if outfile syntax bad, issue warning; this implies a code bug!
3671 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3673 my ( $istream, $ostream, $logger_object, $rOpts ) = @_;
3674 my $infile_syntax_ok = 0;
3675 my $line_of_dashes = '-' x 42 . "\n";
3677 my $flags = $rOpts->{'perl-syntax-check-flags'};
3679 # be sure we invoke perl with -c
3680 # note: perl will accept repeated flags like '-c -c'. It is safest
3681 # to append another -c than try to find an interior bundled c, as
3682 # in -Tc, because such a 'c' might be in a quoted string, for example.
3683 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3685 # be sure we invoke perl with -x if requested
3686 # same comments about repeated parameters applies
3687 if ( $rOpts->{'look-for-hash-bang'} ) {
3688 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3691 # this shouldn't happen unless a temporary file couldn't be made
3692 if ( $istream eq '-' ) {
3693 $logger_object->write_logfile_entry(
3694 "Cannot run perl -c on STDIN and STDOUT\n");
3695 return $infile_syntax_ok;
3698 $logger_object->write_logfile_entry(
3699 "checking input file syntax with perl $flags\n");
3701 # Not all operating systems/shells support redirection of the standard
3703 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3705 my ( $istream_filename, $perl_output ) =
3706 do_syntax_check( $istream, $flags, $error_redirection );
3707 $logger_object->write_logfile_entry(
3708 "Input stream passed to Perl as file $istream_filename\n");
3709 $logger_object->write_logfile_entry($line_of_dashes);
3710 $logger_object->write_logfile_entry("$perl_output\n");
3712 if ( $perl_output =~ /syntax\s*OK/ ) {
3713 $infile_syntax_ok = 1;
3714 $logger_object->write_logfile_entry($line_of_dashes);
3715 $logger_object->write_logfile_entry(
3716 "checking output file syntax with perl $flags ...\n");
3717 my ( $ostream_filename, $perl_output ) =
3718 do_syntax_check( $ostream, $flags, $error_redirection );
3719 $logger_object->write_logfile_entry(
3720 "Output stream passed to Perl as file $ostream_filename\n");
3721 $logger_object->write_logfile_entry($line_of_dashes);
3722 $logger_object->write_logfile_entry("$perl_output\n");
3724 unless ( $perl_output =~ /syntax\s*OK/ ) {
3725 $logger_object->write_logfile_entry($line_of_dashes);
3726 $logger_object->warning(
3727 "The output file has a syntax error when tested with perl $flags $ostream !\n"
3729 $logger_object->warning(
3730 "This implies an error in perltidy; the file $ostream is bad\n"
3732 $logger_object->report_definite_bug();
3734 # the perl version number will be helpful for diagnosing the problem
3735 $logger_object->write_logfile_entry(
3736 qx/perl -v $error_redirection/ . "\n" );
3741 # Only warn of perl -c syntax errors. Other messages,
3742 # such as missing modules, are too common. They can be
3743 # seen by running with perltidy -w
3744 $logger_object->complain("A syntax check using perl $flags\n");
3745 $logger_object->complain(
3746 "for the output in file $istream_filename gives:\n");
3747 $logger_object->complain($line_of_dashes);
3748 $logger_object->complain("$perl_output\n");
3749 $logger_object->complain($line_of_dashes);
3750 $infile_syntax_ok = -1;
3751 $logger_object->write_logfile_entry($line_of_dashes);
3752 $logger_object->write_logfile_entry(
3753 "The output file will not be checked because of input file problems\n"
3756 return $infile_syntax_ok;
3759 sub do_syntax_check {
3760 my ( $stream, $flags, $error_redirection ) = @_;
3762 ############################################################
3763 # This code is not reachable because syntax check is deactivated,
3764 # but it is retained for reference.
3765 ############################################################
3767 # We need a named input file for executing perl
3768 my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
3770 # TODO: Need to add name of file to log somewhere
3771 # otherwise Perl output is hard to read
3772 if ( !$stream_filename ) { return $stream_filename, "" }
3774 # We have to quote the filename in case it has unusual characters
3775 # or spaces. Example: this filename #CM11.pm# gives trouble.
3776 my $quoted_stream_filename = '"' . $stream_filename . '"';
3778 # Under VMS something like -T will become -t (and an error) so we
3779 # will put quotes around the flags. Double quotes seem to work on
3780 # Unix/Windows/VMS, but this may not work on all systems. (Single
3781 # quotes do not work under Windows). It could become necessary to
3782 # put double quotes around each flag, such as: -"c" -"T"
3783 # We may eventually need some system-dependent coding here.
3784 $flags = '"' . $flags . '"';
3786 # now wish for luck...
3787 my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
3790 unlink $stream_filename
3791 or Perl::Tidy::Die("couldn't unlink stream $stream_filename: $!\n");
3793 return $stream_filename, $msg;
3796 #####################################################################
3798 # This is a stripped down version of IO::Scalar
3799 # Given a reference to a scalar, it supplies either:
3800 # a getline method which reads lines (mode='r'), or
3801 # a print method which reads lines (mode='w')
3803 #####################################################################
3804 package Perl::Tidy::IOScalar;
3808 my ( $package, $rscalar, $mode ) = @_;
3809 my $ref = ref $rscalar;
3810 if ( $ref ne 'SCALAR' ) {
3812 ------------------------------------------------------------------------
3813 expecting ref to SCALAR but got ref to ($ref); trace follows:
3814 ------------------------------------------------------------------------
3818 if ( $mode eq 'w' ) {
3820 return bless [ $rscalar, $mode ], $package;
3822 elsif ( $mode eq 'r' ) {
3824 # Convert a scalar to an array.
3825 # This avoids looking for "\n" on each call to getline
3827 # NOTES: The -1 count is needed to avoid loss of trailing blank lines
3828 # (which might be important in a DATA section).
3830 if ( $rscalar && ${$rscalar} ) {
3831 @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
3833 # remove possible extra blank line introduced with split
3834 if ( @array && $array[-1] eq "\n" ) { pop @array }
3837 return bless [ \@array, $mode, $i_next ], $package;
3841 ------------------------------------------------------------------------
3842 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3843 ------------------------------------------------------------------------
3850 my $mode = $self->[1];
3851 if ( $mode ne 'r' ) {
3853 ------------------------------------------------------------------------
3854 getline call requires mode = 'r' but mode = ($mode); trace follows:
3855 ------------------------------------------------------------------------
3858 my $i = $self->[2]++;
3859 return $self->[0]->[$i];
3863 my ( $self, $msg ) = @_;
3864 my $mode = $self->[1];
3865 if ( $mode ne 'w' ) {
3867 ------------------------------------------------------------------------
3868 print call requires mode = 'w' but mode = ($mode); trace follows:
3869 ------------------------------------------------------------------------
3872 ${ $self->[0] } .= $msg;
3874 sub close { return }
3876 #####################################################################
3878 # This is a stripped down version of IO::ScalarArray
3879 # Given a reference to an array, it supplies either:
3880 # a getline method which reads lines (mode='r'), or
3881 # a print method which reads lines (mode='w')
3883 # NOTE: this routine assumes that there aren't any embedded
3884 # newlines within any of the array elements. There are no checks
3887 #####################################################################
3888 package Perl::Tidy::IOScalarArray;
3892 my ( $package, $rarray, $mode ) = @_;
3893 my $ref = ref $rarray;
3894 if ( $ref ne 'ARRAY' ) {
3896 ------------------------------------------------------------------------
3897 expecting ref to ARRAY but got ref to ($ref); trace follows:
3898 ------------------------------------------------------------------------
3902 if ( $mode eq 'w' ) {
3904 return bless [ $rarray, $mode ], $package;
3906 elsif ( $mode eq 'r' ) {
3908 return bless [ $rarray, $mode, $i_next ], $package;
3912 ------------------------------------------------------------------------
3913 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3914 ------------------------------------------------------------------------
3921 my $mode = $self->[1];
3922 if ( $mode ne 'r' ) {
3924 ------------------------------------------------------------------------
3925 getline requires mode = 'r' but mode = ($mode); trace follows:
3926 ------------------------------------------------------------------------
3929 my $i = $self->[2]++;
3930 return $self->[0]->[$i];
3934 my ( $self, $msg ) = @_;
3935 my $mode = $self->[1];
3936 if ( $mode ne 'w' ) {
3938 ------------------------------------------------------------------------
3939 print requires mode = 'w' but mode = ($mode); trace follows:
3940 ------------------------------------------------------------------------
3943 push @{ $self->[0] }, $msg;
3945 sub close { return }
3947 #####################################################################
3949 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3950 # which returns the next line to be parsed
3952 #####################################################################
3954 package Perl::Tidy::LineSource;
3958 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3960 my $input_line_ending;
3961 if ( $rOpts->{'preserve-line-endings'} ) {
3962 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3965 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3968 # in order to check output syntax when standard output is used,
3969 # or when it is an object, we have to make a copy of the file
3970 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3973 # Turning off syntax check when input output is used.
3974 # The reason is that temporary files cause problems on
3976 $rOpts->{'check-syntax'} = 0;
3978 ${$rpending_logfile_message} .= <<EOM;
3979 Note: --syntax check will be skipped because standard input is used
3986 _filename => $input_file,
3987 _input_line_ending => $input_line_ending,
3988 _rinput_buffer => [],
3993 sub close_input_file {
3996 # Only close physical files, not STDIN and other objects
3997 my $filename = $self->{_filename};
3998 if ( $filename ne '-' && !ref $filename ) {
3999 eval { $self->{_fh}->close() };
4007 my $fh = $self->{_fh};
4008 my $rinput_buffer = $self->{_rinput_buffer};
4010 if ( scalar( @{$rinput_buffer} ) ) {
4011 $line = shift @{$rinput_buffer};
4014 $line = $fh->getline();
4016 # patch to read raw mac files under unix, dos
4017 # see if the first line has embedded \r's
4018 if ( $line && !$self->{_started} ) {
4019 if ( $line =~ /[\015][^\015\012]/ ) {
4021 # found one -- break the line up and store in a buffer
4022 @{$rinput_buffer} = map { $_ . "\n" } split /\015/, $line;
4023 my $count = @{$rinput_buffer};
4024 $line = shift @{$rinput_buffer};
4026 $self->{_started}++;
4032 #####################################################################
4034 # the Perl::Tidy::LineSink class supplies a write_line method for
4035 # actual file writing
4037 #####################################################################
4039 package Perl::Tidy::LineSink;
4043 my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
4044 $rpending_logfile_message, $binmode )
4049 my $output_file_open = 0;
4051 if ( $rOpts->{'format'} eq 'tidy' ) {
4052 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
4053 unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; }
4054 $output_file_open = 1;
4056 if ( $rOpts->{'character-encoding'}
4057 && $rOpts->{'character-encoding'} eq 'utf8' )
4059 if ( ref($fh) eq 'IO::File' ) {
4060 $fh->binmode(":encoding(UTF-8)");
4062 elsif ( $output_file eq '-' ) {
4063 binmode STDOUT, ":encoding(UTF-8)";
4067 # Patch for RT 122030
4068 elsif ( ref($fh) eq 'IO::File' ) { $fh->binmode(); }
4070 elsif ( $output_file eq '-' ) { binmode STDOUT }
4074 # in order to check output syntax when standard output is used,
4075 # or when it is an object, we have to make a copy of the file
4076 if ( $output_file eq '-' || ref $output_file ) {
4077 if ( $rOpts->{'check-syntax'} ) {
4079 # Turning off syntax check when standard output is used.
4080 # The reason is that temporary files cause problems on
4082 $rOpts->{'check-syntax'} = 0;
4083 ${$rpending_logfile_message} .= <<EOM;
4084 Note: --syntax check will be skipped because standard output is used
4093 _output_file => $output_file,
4094 _output_file_open => $output_file_open,
4096 _tee_file => $tee_file,
4097 _tee_file_opened => 0,
4098 _line_separator => $line_separator,
4099 _binmode => $binmode,
4105 my ( $self, $line ) = @_;
4106 my $fh = $self->{_fh};
4108 my $output_file_open = $self->{_output_file_open};
4110 $line .= $self->{_line_separator};
4112 $fh->print($line) if ( $self->{_output_file_open} );
4114 if ( $self->{_tee_flag} ) {
4115 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
4116 my $fh_tee = $self->{_fh_tee};
4117 print $fh_tee $line;
4124 $self->{_tee_flag} = 1;
4130 $self->{_tee_flag} = 0;
4134 sub really_open_tee_file {
4136 my $tee_file = $self->{_tee_file};
4138 $fh_tee = IO::File->new(">$tee_file")
4139 or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n");
4140 binmode $fh_tee if $self->{_binmode};
4141 $self->{_tee_file_opened} = 1;
4142 $self->{_fh_tee} = $fh_tee;
4146 sub close_output_file {
4149 # Only close physical files, not STDOUT and other objects
4150 my $output_file = $self->{_output_file};
4151 if ( $output_file ne '-' && !ref $output_file ) {
4152 eval { $self->{_fh}->close() } if $self->{_output_file_open};
4154 $self->close_tee_file();
4158 sub close_tee_file {
4161 # Only close physical files, not STDOUT and other objects
4162 if ( $self->{_tee_file_opened} ) {
4163 my $tee_file = $self->{_tee_file};
4164 if ( $tee_file ne '-' && !ref $tee_file ) {
4165 eval { $self->{_fh_tee}->close() };
4166 $self->{_tee_file_opened} = 0;
4172 #####################################################################
4174 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
4175 # useful for program development.
4177 # Only one such file is created regardless of the number of input
4178 # files processed. This allows the results of processing many files
4179 # to be summarized in a single file.
4181 # Output messages go to a file named DIAGNOSTICS, where
4182 # they are labeled by file and line. This allows many files to be
4183 # scanned at once for some particular condition of interest. It was
4184 # particularly useful for developing guessing strategies.
4186 # NOTE: This feature is deactivated in final releases but can be
4187 # reactivated for debugging by un-commenting the 'I' options flag
4189 #####################################################################
4191 package Perl::Tidy::Diagnostics;
4197 _write_diagnostics_count => 0,
4198 _last_diagnostic_file => "",
4204 sub set_input_file {
4205 my ( $self, $input_file ) = @_;
4206 $self->{_input_file} = $input_file;
4210 sub write_diagnostics {
4211 my ( $self, $msg ) = @_;
4213 unless ( $self->{_write_diagnostics_count} ) {
4214 open( $self->{_fh}, ">", "DIAGNOSTICS" )
4215 or Perl::Tidy::Die("couldn't open DIAGNOSTICS: $!\n");
4218 my $fh = $self->{_fh};
4219 my $last_diagnostic_file = $self->{_last_diagnostic_file};
4220 my $input_file = $self->{_input_file};
4221 if ( $last_diagnostic_file ne $input_file ) {
4222 $fh->print("\nFILE:$input_file\n");
4224 $self->{_last_diagnostic_file} = $input_file;
4225 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
4226 $fh->print("$input_line_number:\t$msg");
4227 $self->{_write_diagnostics_count}++;
4231 #####################################################################
4233 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
4235 #####################################################################
4237 package Perl::Tidy::Logger;
4241 my ( $class, $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude ) =
4244 my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef;
4246 # remove any old error output file if we might write a new one
4247 unless ( $fh_warnings || ref($warning_file) ) {
4248 if ( -e $warning_file ) {
4249 unlink($warning_file)
4251 "couldn't unlink warning file $warning_file: $!\n");
4256 defined( $rOpts->{'logfile-gap'} )
4257 ? $rOpts->{'logfile-gap'}
4259 if ( $logfile_gap == 0 ) { $logfile_gap = 1 }
4262 _log_file => $log_file,
4263 _logfile_gap => $logfile_gap,
4265 _fh_warnings => $fh_warnings,
4266 _last_input_line_written => 0,
4267 _at_end_of_file => 0,
4269 _block_log_output => 0,
4270 _line_of_tokens => undef,
4271 _output_line_number => undef,
4272 _wrote_line_information_string => 0,
4273 _wrote_column_headings => 0,
4274 _warning_file => $warning_file,
4275 _warning_count => 0,
4276 _complaint_count => 0,
4277 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure
4278 _saw_brace_error => 0,
4279 _saw_extrude => $saw_extrude,
4280 _output_array => [],
4284 sub get_warning_count {
4286 return $self->{_warning_count};
4289 sub get_use_prefix {
4291 return $self->{_use_prefix};
4294 sub block_log_output {
4296 $self->{_block_log_output} = 1;
4300 sub unblock_log_output {
4302 $self->{_block_log_output} = 0;
4306 sub interrupt_logfile {
4308 $self->{_use_prefix} = 0;
4309 $self->warning("\n");
4310 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
4314 sub resume_logfile {
4316 $self->write_logfile_entry( '#' x 60 . "\n" );
4317 $self->{_use_prefix} = 1;
4321 sub we_are_at_the_last_line {
4323 unless ( $self->{_wrote_line_information_string} ) {
4324 $self->write_logfile_entry("Last line\n\n");
4326 $self->{_at_end_of_file} = 1;
4330 # record some stuff in case we go down in flames
4332 my ( $self, $line_of_tokens, $output_line_number ) = @_;
4333 my $input_line = $line_of_tokens->{_line_text};
4334 my $input_line_number = $line_of_tokens->{_line_number};
4336 # save line information in case we have to write a logfile message
4337 $self->{_line_of_tokens} = $line_of_tokens;
4338 $self->{_output_line_number} = $output_line_number;
4339 $self->{_wrote_line_information_string} = 0;
4341 my $last_input_line_written = $self->{_last_input_line_written};
4342 my $rOpts = $self->{_rOpts};
4345 ( $input_line_number - $last_input_line_written ) >=
4346 $self->{_logfile_gap}
4348 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
4351 my $structural_indentation_level = $line_of_tokens->{_level_0};
4352 $structural_indentation_level = 0
4353 if ( $structural_indentation_level < 0 );
4354 $self->{_last_input_line_written} = $input_line_number;
4355 ( my $out_str = $input_line ) =~ s/^\s*//;
4358 $out_str = ( '.' x $structural_indentation_level ) . $out_str;
4360 if ( length($out_str) > 35 ) {
4361 $out_str = substr( $out_str, 0, 35 ) . " ....";
4363 $self->logfile_output( "", "$out_str\n" );
4368 sub write_logfile_entry {
4370 my ( $self, @msg ) = @_;
4372 # add leading >>> to avoid confusing error messages and code
4373 $self->logfile_output( ">>>", "@msg" );
4377 sub write_column_headings {
4380 $self->{_wrote_column_headings} = 1;
4381 my $routput_array = $self->{_output_array};
4382 push @{$routput_array}, <<EOM;
4383 The nesting depths in the table below are at the start of the lines.
4384 The indicated output line numbers are not always exact.
4385 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
4387 in:out indent c b nesting code + messages; (messages begin with >>>)
4388 lines levels i k (code begins with one '.' per indent level)
4389 ------ ----- - - -------- -------------------------------------------
4394 sub make_line_information_string {
4396 # make columns of information when a logfile message needs to go out
4398 my $line_of_tokens = $self->{_line_of_tokens};
4399 my $input_line_number = $line_of_tokens->{_line_number};
4400 my $line_information_string = "";
4401 if ($input_line_number) {
4403 my $output_line_number = $self->{_output_line_number};
4404 my $brace_depth = $line_of_tokens->{_curly_brace_depth};
4405 my $paren_depth = $line_of_tokens->{_paren_depth};
4406 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
4407 my $guessed_indentation_level =
4408 $line_of_tokens->{_guessed_indentation_level};
4409 ##my $rtoken_array = $line_of_tokens->{_rtoken_array};
4411 my $structural_indentation_level = $line_of_tokens->{_level_0};
4413 $self->write_column_headings() unless $self->{_wrote_column_headings};
4415 # keep logfile columns aligned for scripts up to 999 lines;
4416 # for longer scripts it doesn't really matter
4417 my $extra_space = "";
4419 ( $input_line_number < 10 ) ? " "
4420 : ( $input_line_number < 100 ) ? " "
4423 ( $output_line_number < 10 ) ? " "
4424 : ( $output_line_number < 100 ) ? " "
4427 # there are 2 possible nesting strings:
4428 # the original which looks like this: (0 [1 {2
4429 # the new one, which looks like this: {{[
4430 # the new one is easier to read, and shows the order, but
4431 # could be arbitrarily long, so we use it unless it is too long
4432 my $nesting_string =
4433 "($paren_depth [$square_bracket_depth {$brace_depth";
4434 my $nesting_string_new = $line_of_tokens->{_nesting_tokens_0};
4435 my $ci_level = $line_of_tokens->{_ci_level_0};
4436 if ( $ci_level > 9 ) { $ci_level = '*' }
4437 my $bk = ( $line_of_tokens->{_nesting_blocks_0} =~ /1$/ ) ? '1' : '0';
4439 if ( length($nesting_string_new) <= 8 ) {
4441 $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
4443 $line_information_string =
4444 "L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
4446 return $line_information_string;
4449 sub logfile_output {
4450 my ( $self, $prompt, $msg ) = @_;
4451 return if ( $self->{_block_log_output} );
4453 my $routput_array = $self->{_output_array};
4454 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
4455 push @{$routput_array}, "$msg";
4458 my $line_information_string = $self->make_line_information_string();
4459 $self->{_wrote_line_information_string} = 1;
4461 if ($line_information_string) {
4462 push @{$routput_array}, "$line_information_string $prompt$msg";
4465 push @{$routput_array}, "$msg";
4471 sub get_saw_brace_error {
4473 return $self->{_saw_brace_error};
4476 sub increment_brace_error {
4478 $self->{_saw_brace_error}++;
4483 my ( $self, $msg ) = @_;
4485 #use constant BRACE_WARNING_LIMIT => 10;
4486 my $BRACE_WARNING_LIMIT = 10;
4487 my $saw_brace_error = $self->{_saw_brace_error};
4489 if ( $saw_brace_error < $BRACE_WARNING_LIMIT ) {
4490 $self->warning($msg);
4493 $self->{_saw_brace_error} = $saw_brace_error;
4495 if ( $saw_brace_error == $BRACE_WARNING_LIMIT ) {
4496 $self->warning("No further warnings of this type will be given\n");
4503 # handle non-critical warning messages based on input flag
4504 my ( $self, $msg ) = @_;
4505 my $rOpts = $self->{_rOpts};
4507 # these appear in .ERR output only if -w flag is used
4508 if ( $rOpts->{'warning-output'} ) {
4509 $self->warning($msg);
4512 # otherwise, they go to the .LOG file
4514 $self->{_complaint_count}++;
4515 $self->write_logfile_entry($msg);
4522 # report errors to .ERR file (or stdout)
4523 my ( $self, $msg ) = @_;
4525 #use constant WARNING_LIMIT => 50;
4526 my $WARNING_LIMIT = 50;
4528 my $rOpts = $self->{_rOpts};
4529 unless ( $rOpts->{'quiet'} ) {
4531 my $warning_count = $self->{_warning_count};
4532 my $fh_warnings = $self->{_fh_warnings};
4533 if ( !$fh_warnings ) {
4534 my $warning_file = $self->{_warning_file};
4535 ( $fh_warnings, my $filename ) =
4536 Perl::Tidy::streamhandle( $warning_file, 'w' );
4537 $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n");
4538 Perl::Tidy::Warn "## Please see file $filename\n"
4539 unless ref($warning_file);
4540 $self->{_fh_warnings} = $fh_warnings;
4541 $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n");
4544 if ( $warning_count < $WARNING_LIMIT ) {
4545 if ( $self->get_use_prefix() > 0 ) {
4546 my $input_line_number =
4547 Perl::Tidy::Tokenizer::get_input_line_number();
4548 if ( !defined($input_line_number) ) { $input_line_number = -1 }
4549 $fh_warnings->print("$input_line_number:\t$msg");
4550 $self->write_logfile_entry("WARNING: $msg");
4553 $fh_warnings->print($msg);
4554 $self->write_logfile_entry($msg);
4558 $self->{_warning_count} = $warning_count;
4560 if ( $warning_count == $WARNING_LIMIT ) {
4561 $fh_warnings->print("No further warnings will be given\n");
4567 # programming bug codes:
4569 # 0 = maybe, not sure.
4571 sub report_possible_bug {
4573 my $saw_code_bug = $self->{_saw_code_bug};
4574 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4578 sub report_definite_bug {
4580 $self->{_saw_code_bug} = 1;
4584 sub ask_user_for_bug_report {
4586 my ( $self, $infile_syntax_ok, $formatter ) = @_;
4587 my $saw_code_bug = $self->{_saw_code_bug};
4588 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4589 $self->warning(<<EOM);
4591 You may have encountered a code bug in perltidy. If you think so, and
4592 the problem is not listed in the BUGS file at
4593 http://perltidy.sourceforge.net, please report it so that it can be
4594 corrected. Include the smallest possible script which has the problem,
4595 along with the .LOG file. See the manual pages for contact information.
4600 elsif ( $saw_code_bug == 1 ) {
4601 if ( $self->{_saw_extrude} ) {
4602 $self->warning(<<EOM);
4604 You may have encountered a bug in perltidy. However, since you are using the
4605 -extrude option, the problem may be with perl or one of its modules, which have
4606 occasional problems with this type of file. If you believe that the
4607 problem is with perltidy, and the problem is not listed in the BUGS file at
4608 http://perltidy.sourceforge.net, please report it so that it can be corrected.
4609 Include the smallest possible script which has the problem, along with the .LOG
4610 file. See the manual pages for contact information.
4615 $self->warning(<<EOM);
4617 Oops, you seem to have encountered a bug in perltidy. Please check the
4618 BUGS file at http://perltidy.sourceforge.net. If the problem is not
4619 listed there, please report it so that it can be corrected. Include the
4620 smallest possible script which produces this message, along with the
4621 .LOG file if appropriate. See the manual pages for contact information.
4622 Your efforts are appreciated.
4625 my $added_semicolon_count = 0;
4627 $added_semicolon_count =
4628 $formatter->get_added_semicolon_count();
4630 if ( $added_semicolon_count > 0 ) {
4631 $self->warning(<<EOM);
4633 The log file shows that perltidy added $added_semicolon_count semicolons.
4634 Please rerun with -nasc to see if that is the cause of the syntax error. Even
4635 if that is the problem, please report it so that it can be fixed.
4646 # called after all formatting to summarize errors
4647 my ( $self, $infile_syntax_ok, $formatter ) = @_;
4649 my $rOpts = $self->{_rOpts};
4650 my $warning_count = $self->{_warning_count};
4651 my $saw_code_bug = $self->{_saw_code_bug};
4654 ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4655 || $saw_code_bug == 1
4656 || $rOpts->{'logfile'};
4657 my $log_file = $self->{_log_file};
4658 if ($warning_count) {
4659 if ($save_logfile) {
4660 $self->block_log_output(); # avoid echoing this to the logfile
4662 "The logfile $log_file may contain useful information\n");
4663 $self->unblock_log_output();
4666 if ( $self->{_complaint_count} > 0 ) {
4668 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4672 if ( $self->{_saw_brace_error}
4673 && ( $self->{_logfile_gap} > 1 || !$save_logfile ) )
4675 $self->warning("To save a full .LOG file rerun with -g\n");
4678 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4680 if ($save_logfile) {
4681 my $log_file = $self->{_log_file};
4682 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4684 my $routput_array = $self->{_output_array};
4685 foreach ( @{$routput_array} ) { $fh->print($_) }
4686 if ( $log_file ne '-' && !ref $log_file ) {
4687 eval { $fh->close() };
4694 #####################################################################
4696 # The Perl::Tidy::DevNull class supplies a dummy print method
4698 #####################################################################
4700 package Perl::Tidy::DevNull;
4701 sub new { my $self = shift; return bless {}, $self }
4702 sub print { return }
4703 sub close { return }
4705 #####################################################################
4707 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4709 #####################################################################
4711 package Perl::Tidy::HtmlWriter;
4721 %short_to_long_names
4725 $missing_html_entities
4728 # replace unsafe characters with HTML entity representation if HTML::Entities
4730 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4734 my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4735 $html_src_extension )
4738 my $html_file_opened = 0;
4740 ( $html_fh, my $html_filename ) =
4741 Perl::Tidy::streamhandle( $html_file, 'w' );
4743 Perl::Tidy::Warn("can't open $html_file: $!\n");
4746 $html_file_opened = 1;
4748 if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4749 $input_file = "NONAME";
4752 # write the table of contents to a string
4754 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4757 my @pre_string_stack;
4758 if ( $rOpts->{'html-pre-only'} ) {
4760 # pre section goes directly to the output stream
4761 $html_pre_fh = $html_fh;
4762 $html_pre_fh->print( <<"PRE_END");
4768 # pre section go out to a temporary string
4770 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4771 push @pre_string_stack, \$pre_string;
4774 # pod text gets diverted if the 'pod2html' is used
4777 if ( $rOpts->{'pod2html'} ) {
4778 if ( $rOpts->{'html-pre-only'} ) {
4779 undef $rOpts->{'pod2html'};
4782 eval "use Pod::Html";
4785 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4786 undef $rOpts->{'pod2html'};
4789 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4796 if ( $rOpts->{'frames'} ) {
4797 unless ($extension) {
4799 "cannot use frames without a specified output extension; ignoring -frm\n";
4800 undef $rOpts->{'frames'};
4803 $toc_filename = $input_file . $html_toc_extension . $extension;
4804 $src_filename = $input_file . $html_src_extension . $extension;
4808 # ----------------------------------------------------------
4809 # Output is now directed as follows:
4810 # html_toc_fh <-- table of contents items
4811 # html_pre_fh <-- the <pre> section of formatted code, except:
4812 # html_pod_fh <-- pod goes here with the pod2html option
4813 # ----------------------------------------------------------
4815 my $title = $rOpts->{'title'};
4817 ( $title, my $path ) = fileparse($input_file);
4819 my $toc_item_count = 0;
4820 my $in_toc_package = "";
4823 _input_file => $input_file, # name of input file
4824 _title => $title, # title, unescaped
4825 _html_file => $html_file, # name of .html output file
4826 _toc_filename => $toc_filename, # for frames option
4827 _src_filename => $src_filename, # for frames option
4828 _html_file_opened => $html_file_opened, # a flag
4829 _html_fh => $html_fh, # the output stream
4830 _html_pre_fh => $html_pre_fh, # pre section goes here
4831 _rpre_string_stack => \@pre_string_stack, # stack of pre sections
4832 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html
4833 _rpod_string => \$pod_string, # string holding pod
4834 _pod_cut_count => 0, # how many =cut's?
4835 _html_toc_fh => $html_toc_fh, # fh for table of contents
4836 _rtoc_string => \$toc_string, # string holding toc
4837 _rtoc_item_count => \$toc_item_count, # how many toc items
4838 _rin_toc_package => \$in_toc_package, # package name
4839 _rtoc_name_count => {}, # hash to track unique names
4840 _rpackage_stack => [], # stack to check for package
4842 _rlast_level => \$last_level, # brace indentation level
4848 # Add an item to the html table of contents.
4849 # This is called even if no table of contents is written,
4850 # because we still want to put the anchors in the <pre> text.
4851 # We are given an anchor name and its type; types are:
4852 # 'package', 'sub', '__END__', '__DATA__', 'EOF'
4853 # There must be an 'EOF' call at the end to wrap things up.
4854 my ( $self, $name, $type ) = @_;
4855 my $html_toc_fh = $self->{_html_toc_fh};
4856 my $html_pre_fh = $self->{_html_pre_fh};
4857 my $rtoc_name_count = $self->{_rtoc_name_count};
4858 my $rtoc_item_count = $self->{_rtoc_item_count};
4859 my $rlast_level = $self->{_rlast_level};
4860 my $rin_toc_package = $self->{_rin_toc_package};
4861 my $rpackage_stack = $self->{_rpackage_stack};
4863 # packages contain sublists of subs, so to avoid errors all package
4864 # items are written and finished with the following routines
4865 my $end_package_list = sub {
4866 if ( ${$rin_toc_package} ) {
4867 $html_toc_fh->print("</ul>\n</li>\n");
4868 ${$rin_toc_package} = "";
4872 my $start_package_list = sub {
4873 my ( $unique_name, $package ) = @_;
4874 if ( ${$rin_toc_package} ) { $end_package_list->() }
4875 $html_toc_fh->print(<<EOM);
4876 <li><a href=\"#$unique_name\">package $package</a>
4879 ${$rin_toc_package} = $package;
4882 # start the table of contents on the first item
4883 unless ( ${$rtoc_item_count} ) {
4885 # but just quit if we hit EOF without any other entries
4886 # in this case, there will be no toc
4887 return if ( $type eq 'EOF' );
4888 $html_toc_fh->print( <<"TOC_END");
4889 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4893 ${$rtoc_item_count}++;
4895 # make a unique anchor name for this location:
4896 # - packages get a 'package-' prefix
4897 # - subs use their names
4898 my $unique_name = $name;
4899 if ( $type eq 'package' ) { $unique_name = "package-$name" }
4901 # append '-1', '-2', etc if necessary to make unique; this will
4902 # be unique because subs and packages cannot have a '-'
4903 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4904 $unique_name .= "-$count";
4907 # - all names get terminal '-' if pod2html is used, to avoid
4908 # conflicts with anchor names created by pod2html
4909 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4911 # start/stop lists of subs
4912 if ( $type eq 'sub' ) {
4913 my $package = $rpackage_stack->[ ${$rlast_level} ];
4914 unless ($package) { $package = 'main' }
4916 # if we're already in a package/sub list, be sure its the right
4917 # package or else close it
4918 if ( ${$rin_toc_package} && ${$rin_toc_package} ne $package ) {
4919 $end_package_list->();
4922 # start a package/sub list if necessary
4923 unless ( ${$rin_toc_package} ) {
4924 $start_package_list->( $unique_name, $package );
4928 # now write an entry in the toc for this item
4929 if ( $type eq 'package' ) {
4930 $start_package_list->( $unique_name, $name );
4932 elsif ( $type eq 'sub' ) {
4933 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4936 $end_package_list->();
4937 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4940 # write the anchor in the <pre> section
4941 $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4943 # end the table of contents, if any, on the end of file
4944 if ( $type eq 'EOF' ) {
4945 $html_toc_fh->print( <<"TOC_END");
4947 <!-- END CODE INDEX -->
4955 # This is the official list of tokens which may be identified by the
4956 # user. Long names are used as getopt keys. Short names are
4957 # convenient short abbreviations for specifying input. Short names
4958 # somewhat resemble token type characters, but are often different
4959 # because they may only be alphanumeric, to allow command line
4960 # input. Also, note that because of case insensitivity of html,
4961 # this table must be in a single case only (I've chosen to use all
4963 # When adding NEW_TOKENS: update this hash table
4964 # short names => long names
4965 %short_to_long_names = (
4975 'pu' => 'punctuation',
4976 'i' => 'identifier',
4978 'h' => 'here-doc-target',
4979 'hh' => 'here-doc-text',
4981 'sc' => 'semicolon',
4982 'm' => 'subroutine',
4986 # Now we have to map actual token types into one of the above short
4987 # names; any token types not mapped will get 'punctuation'
4990 # The values of this hash table correspond to the keys of the
4991 # previous hash table.
4992 # The keys of this hash table are token types and can be seen
4993 # by running with --dump-token-types (-dtt).
4995 # When adding NEW_TOKENS: update this hash table
4996 # $type => $short_name
4997 %token_short_names = (
5022 # These token types will all be called identifiers for now
5023 # FIXME: could separate user defined modules as separate type
5024 my @identifier = qw" i t U C Y Z G :: CORE::";
5025 @token_short_names{@identifier} = ('i') x scalar(@identifier);
5027 # These token types will be called 'structure'
5028 my @structure = qw" { } ";
5029 @token_short_names{@structure} = ('s') x scalar(@structure);
5031 # OLD NOTES: save for reference
5032 # Any of these could be added later if it would be useful.
5033 # For now, they will by default become punctuation
5034 # my @list = qw" L R [ ] ";
5035 # @token_long_names{@list} = ('non-structure') x scalar(@list);
5038 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
5040 # @token_long_names{@list} = ('math') x scalar(@list);
5042 # my @list = qw" & &= ~ ~= ^ ^= | |= ";
5043 # @token_long_names{@list} = ('bit') x scalar(@list);
5045 # my @list = qw" == != < > <= <=> ";
5046 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
5048 # my @list = qw" && || ! &&= ||= //= ";
5049 # @token_long_names{@list} = ('logical') x scalar(@list);
5051 # my @list = qw" . .= =~ !~ x x= ";
5052 # @token_long_names{@list} = ('string-operators') x scalar(@list);
5055 # my @list = qw" .. -> <> ... \ ? ";
5056 # @token_long_names{@list} = ('misc-operators') x scalar(@list);
5060 sub make_getopt_long_names {
5061 my ( $class, $rgetopt_names ) = @_;
5062 while ( my ( $short_name, $name ) = each %short_to_long_names ) {
5063 push @{$rgetopt_names}, "html-color-$name=s";
5064 push @{$rgetopt_names}, "html-italic-$name!";
5065 push @{$rgetopt_names}, "html-bold-$name!";
5067 push @{$rgetopt_names}, "html-color-background=s";
5068 push @{$rgetopt_names}, "html-linked-style-sheet=s";
5069 push @{$rgetopt_names}, "nohtml-style-sheets";
5070 push @{$rgetopt_names}, "html-pre-only";
5071 push @{$rgetopt_names}, "html-line-numbers";
5072 push @{$rgetopt_names}, "html-entities!";
5073 push @{$rgetopt_names}, "stylesheet";
5074 push @{$rgetopt_names}, "html-table-of-contents!";
5075 push @{$rgetopt_names}, "pod2html!";
5076 push @{$rgetopt_names}, "frames!";
5077 push @{$rgetopt_names}, "html-toc-extension=s";
5078 push @{$rgetopt_names}, "html-src-extension=s";
5080 # Pod::Html parameters:
5081 push @{$rgetopt_names}, "backlink=s";
5082 push @{$rgetopt_names}, "cachedir=s";
5083 push @{$rgetopt_names}, "htmlroot=s";
5084 push @{$rgetopt_names}, "libpods=s";
5085 push @{$rgetopt_names}, "podpath=s";
5086 push @{$rgetopt_names}, "podroot=s";
5087 push @{$rgetopt_names}, "title=s";
5089 # Pod::Html parameters with leading 'pod' which will be removed
5090 # before the call to Pod::Html
5091 push @{$rgetopt_names}, "podquiet!";
5092 push @{$rgetopt_names}, "podverbose!";
5093 push @{$rgetopt_names}, "podrecurse!";
5094 push @{$rgetopt_names}, "podflush";
5095 push @{$rgetopt_names}, "podheader!";
5096 push @{$rgetopt_names}, "podindex!";
5100 sub make_abbreviated_names {
5102 # We're appending things like this to the expansion list:
5103 # 'hcc' => [qw(html-color-comment)],
5104 # 'hck' => [qw(html-color-keyword)],
5106 my ( $class, $rexpansion ) = @_;
5108 # abbreviations for color/bold/italic properties
5109 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
5110 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
5111 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
5112 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
5113 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
5114 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
5117 # abbreviations for all other html options
5118 ${$rexpansion}{"hcbg"} = ["html-color-background"];
5119 ${$rexpansion}{"pre"} = ["html-pre-only"];
5120 ${$rexpansion}{"toc"} = ["html-table-of-contents"];
5121 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
5122 ${$rexpansion}{"nnn"} = ["html-line-numbers"];
5123 ${$rexpansion}{"hent"} = ["html-entities"];
5124 ${$rexpansion}{"nhent"} = ["nohtml-entities"];
5125 ${$rexpansion}{"css"} = ["html-linked-style-sheet"];
5126 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
5127 ${$rexpansion}{"ss"} = ["stylesheet"];
5128 ${$rexpansion}{"pod"} = ["pod2html"];
5129 ${$rexpansion}{"npod"} = ["nopod2html"];
5130 ${$rexpansion}{"frm"} = ["frames"];
5131 ${$rexpansion}{"nfrm"} = ["noframes"];
5132 ${$rexpansion}{"text"} = ["html-toc-extension"];
5133 ${$rexpansion}{"sext"} = ["html-src-extension"];
5139 # This will be called once after options have been parsed
5140 my ( $class, $rOpts ) = @_;
5142 # X11 color names for default settings that seemed to look ok
5143 # (these color names are only used for programming clarity; the hex
5144 # numbers are actually written)
5145 use constant ForestGreen => "#228B22";
5146 use constant SaddleBrown => "#8B4513";
5147 use constant magenta4 => "#8B008B";
5148 use constant IndianRed3 => "#CD5555";
5149 use constant DeepSkyBlue4 => "#00688B";
5150 use constant MediumOrchid3 => "#B452CD";
5151 use constant black => "#000000";
5152 use constant white => "#FFFFFF";
5153 use constant red => "#FF0000";
5155 # set default color, bold, italic properties
5156 # anything not listed here will be given the default (punctuation) color --
5157 # these types currently not listed and get default: ws pu s sc cm co p
5158 # When adding NEW_TOKENS: add an entry here if you don't want defaults
5160 # set_default_properties( $short_name, default_color, bold?, italic? );
5161 set_default_properties( 'c', ForestGreen, 0, 0 );
5162 set_default_properties( 'pd', ForestGreen, 0, 1 );
5163 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown
5164 set_default_properties( 'q', IndianRed3, 0, 0 );
5165 set_default_properties( 'hh', IndianRed3, 0, 1 );
5166 set_default_properties( 'h', IndianRed3, 1, 0 );
5167 set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
5168 set_default_properties( 'w', black, 0, 0 );
5169 set_default_properties( 'n', MediumOrchid3, 0, 0 );
5170 set_default_properties( 'v', MediumOrchid3, 0, 0 );
5171 set_default_properties( 'j', IndianRed3, 1, 0 );
5172 set_default_properties( 'm', red, 1, 0 );
5174 set_default_color( 'html-color-background', white );
5175 set_default_color( 'html-color-punctuation', black );
5177 # setup property lookup tables for tokens based on their short names
5178 # every token type has a short name, and will use these tables
5179 # to do the html markup
5180 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
5181 $html_color{$short_name} = $rOpts->{"html-color-$long_name"};
5182 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
5183 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
5186 # write style sheet to STDOUT and die if requested
5187 if ( defined( $rOpts->{'stylesheet'} ) ) {
5188 write_style_sheet_file('-');
5192 # make sure user gives a file name after -css
5193 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
5194 $css_linkname = $rOpts->{'html-linked-style-sheet'};
5195 if ( $css_linkname =~ /^-/ ) {
5196 Perl::Tidy::Die "You must specify a valid filename after -css\n";
5200 # check for conflict
5201 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
5202 $rOpts->{'nohtml-style-sheets'} = 0;
5203 warning("You can't specify both -css and -nss; -nss ignored\n");
5206 # write a style sheet file if necessary
5207 if ($css_linkname) {
5209 # if the selected filename exists, don't write, because user may
5210 # have done some work by hand to create it; use backup name instead
5211 # Also, this will avoid a potential disaster in which the user
5212 # forgets to specify the style sheet, like this:
5213 # perltidy -html -css myfile1.pl myfile2.pl
5214 # This would cause myfile1.pl to parsed as the style sheet by GetOpts
5215 my $css_filename = $css_linkname;
5216 unless ( -e $css_filename ) {
5217 write_style_sheet_file($css_filename);
5220 $missing_html_entities = 1 unless $rOpts->{'html-entities'};
5224 sub write_style_sheet_file {
5226 my $css_filename = shift;
5228 unless ( $fh = IO::File->new("> $css_filename") ) {
5229 Perl::Tidy::Die "can't open $css_filename: $!\n";
5231 write_style_sheet_data($fh);
5232 eval { $fh->close };
5236 sub write_style_sheet_data {
5238 # write the style sheet data to an open file handle
5241 my $bg_color = $rOpts->{'html-color-background'};
5242 my $text_color = $rOpts->{'html-color-punctuation'};
5244 # pre-bgcolor is new, and may not be defined
5245 my $pre_bg_color = $rOpts->{'html-pre-color-background'};
5246 $pre_bg_color = $bg_color unless $pre_bg_color;
5248 $fh->print(<<"EOM");
5249 /* default style sheet generated by perltidy */
5250 body {background: $bg_color; color: $text_color}
5251 pre { color: $text_color;
5252 background: $pre_bg_color;
5253 font-family: courier;
5258 foreach my $short_name ( sort keys %short_to_long_names ) {
5259 my $long_name = $short_to_long_names{$short_name};
5261 my $abbrev = '.' . $short_name;
5262 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment
5263 my $color = $html_color{$short_name};
5264 if ( !defined($color) ) { $color = $text_color }
5265 $fh->print("$abbrev \{ color: $color;");
5267 if ( $html_bold{$short_name} ) {
5268 $fh->print(" font-weight:bold;");
5271 if ( $html_italic{$short_name} ) {
5272 $fh->print(" font-style:italic;");
5274 $fh->print("} /* $long_name */\n");
5279 sub set_default_color {
5281 # make sure that options hash $rOpts->{$key} contains a valid color
5282 my ( $key, $color ) = @_;
5283 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
5284 $rOpts->{$key} = check_RGB($color);
5290 # if color is a 6 digit hex RGB value, prepend a #, otherwise
5291 # assume that it is a valid ascii color name
5293 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
5297 sub set_default_properties {
5298 my ( $short_name, $color, $bold, $italic ) = @_;
5300 set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
5302 $key = "html-bold-$short_to_long_names{$short_name}";
5303 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
5304 $key = "html-italic-$short_to_long_names{$short_name}";
5305 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
5311 # Use Pod::Html to process the pod and make the page
5312 # then merge the perltidy code sections into it.
5313 # return 1 if success, 0 otherwise
5314 my ( $self, $pod_string, $css_string, $toc_string, $rpre_string_stack ) =
5316 my $input_file = $self->{_input_file};
5317 my $title = $self->{_title};
5318 my $success_flag = 0;
5320 # don't try to use pod2html if no pod
5321 unless ($pod_string) {
5322 return $success_flag;
5325 # Pod::Html requires a real temporary filename
5326 my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile();
5329 "unable to open temporary file $tmpfile; cannot use pod2html\n";
5330 return $success_flag;
5333 #------------------------------------------------------------------
5334 # Warning: a temporary file is open; we have to clean up if
5335 # things go bad. From here on all returns should be by going to
5336 # RETURN so that the temporary file gets unlinked.
5337 #------------------------------------------------------------------
5339 # write the pod text to the temporary file
5340 $fh_tmp->print($pod_string);
5343 # Hand off the pod to pod2html.
5344 # Note that we can use the same temporary filename for input and output
5345 # because of the way pod2html works.
5349 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
5351 # Flags with string args:
5352 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
5353 # "podpath=s", "podroot=s"
5354 # Note: -css=s is handled by perltidy itself
5355 foreach my $kw (qw(backlink cachedir htmlroot libpods podpath podroot))
5357 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
5360 # Toggle switches; these have extra leading 'pod'
5361 # "header!", "index!", "recurse!", "quiet!", "verbose!"
5362 foreach my $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
5363 my $kwd = $kw; # allows us to strip 'pod'
5364 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
5365 elsif ( defined( $rOpts->{$kw} ) ) {
5367 push @args, "--no$kwd";
5372 my $kw = 'podflush';
5373 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
5375 # Must clean up if pod2html dies (it can);
5376 # Be careful not to overwrite callers __DIE__ routine
5377 local $SIG{__DIE__} = sub {
5378 unlink $tmpfile if -e $tmpfile;
5379 Perl::Tidy::Die $_[0];
5384 $fh_tmp = IO::File->new( $tmpfile, 'r' );
5387 # this error shouldn't happen ... we just used this filename
5389 "unable to open temporary file $tmpfile; cannot use pod2html\n";
5393 my $html_fh = $self->{_html_fh};
5399 # This routine will write the html selectively and store the toc
5400 my $html_print = sub {
5402 $html_fh->print($_) unless ($no_print);
5403 if ($in_toc) { push @toc, $_ }
5407 # loop over lines of html output from pod2html and merge in
5408 # the necessary perltidy html sections
5409 my ( $saw_body, $saw_index, $saw_body_end );
5410 while ( my $line = $fh_tmp->getline() ) {
5412 if ( $line =~ /^\s*<html>\s*$/i ) {
5413 my $date = localtime;
5414 $html_print->("<!-- Generated by perltidy on $date -->\n");
5415 $html_print->($line);
5418 # Copy the perltidy css, if any, after <body> tag
5419 elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
5421 $html_print->($css_string) if $css_string;
5422 $html_print->($line);
5424 # add a top anchor and heading
5425 $html_print->("<a name=\"-top-\"></a>\n");
5426 $title = escape_html($title);
5427 $html_print->("<h1>$title</h1>\n");
5430 # check for start of index, old pod2html
5431 # before Pod::Html VERSION 1.15_02 it is delimited by comments as:
5432 # <!-- INDEX BEGIN -->
5436 # <!-- INDEX END -->
5438 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
5441 # when frames are used, an extra table of contents in the
5442 # contents panel is confusing, so don't print it
5443 $no_print = $rOpts->{'frames'}
5444 || !$rOpts->{'html-table-of-contents'};
5445 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5446 $html_print->($line);
5449 # check for start of index, new pod2html
5450 # After Pod::Html VERSION 1.15_02 it is delimited as:
5454 elsif ( $line =~ /^\s*<ul\s+id="index">/i ) {
5458 # when frames are used, an extra table of contents in the
5459 # contents panel is confusing, so don't print it
5460 $no_print = $rOpts->{'frames'}
5461 || !$rOpts->{'html-table-of-contents'};
5462 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5463 $html_print->($line);
5466 # Check for end of index, old pod2html
5467 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
5469 $html_print->($line);
5471 # Copy the perltidy toc, if any, after the Pod::Html toc
5473 $html_print->("<hr />\n") if $rOpts->{'frames'};
5474 $html_print->("<h2>Code Index:</h2>\n");
5475 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5476 $html_print->(@toc);
5482 # must track <ul> depth level for new pod2html
5483 elsif ( $line =~ /\s*<ul>\s*$/i && $in_toc eq 'UL' ) {
5485 $html_print->($line);
5488 # Check for end of index, for new pod2html
5489 elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) {
5491 $html_print->($line);
5493 # Copy the perltidy toc, if any, after the Pod::Html toc
5494 if ( $ul_level <= 0 ) {
5497 $html_print->("<hr />\n") if $rOpts->{'frames'};
5498 $html_print->("<h2>Code Index:</h2>\n");
5499 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5500 $html_print->(@toc);
5508 # Copy one perltidy section after each marker
5509 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
5511 $html_print->($1) if $1;
5513 # Intermingle code and pod sections if we saw multiple =cut's.
5514 if ( $self->{_pod_cut_count} > 1 ) {
5515 my $rpre_string = shift( @{$rpre_string_stack} );
5516 if ( ${$rpre_string} ) {
5517 $html_print->('<pre>');
5518 $html_print->( ${$rpre_string} );
5519 $html_print->('</pre>');
5523 # shouldn't happen: we stored a string before writing
5526 "Problem merging html stream with pod2html; order may be wrong\n";
5528 $html_print->($line);
5531 # If didn't see multiple =cut lines, we'll put the pod out first
5532 # and then the code, because it's less confusing.
5535 # since we are not intermixing code and pod, we don't need
5536 # or want any <hr> lines which separated pod and code
5537 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
5541 # Copy any remaining code section before the </body> tag
5542 elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
5544 if ( @{$rpre_string_stack} ) {
5545 unless ( $self->{_pod_cut_count} > 1 ) {
5546 $html_print->('<hr />');
5548 while ( my $rpre_string = shift( @{$rpre_string_stack} ) ) {
5549 $html_print->('<pre>');
5550 $html_print->( ${$rpre_string} );
5551 $html_print->('</pre>');
5554 $html_print->($line);
5557 $html_print->($line);
5562 unless ($saw_body) {
5563 Perl::Tidy::Warn "Did not see <body> in pod2html output\n";
5566 unless ($saw_body_end) {
5567 Perl::Tidy::Warn "Did not see </body> in pod2html output\n";
5570 unless ($saw_index) {
5571 Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n";
5576 eval { $html_fh->close() };
5578 # note that we have to unlink tmpfile before making frames
5579 # because the tmpfile may be one of the names used for frames
5580 if ( -e $tmpfile ) {
5581 unless ( unlink($tmpfile) ) {
5582 Perl::Tidy::Warn("couldn't unlink temporary file $tmpfile: $!\n");
5587 if ( $success_flag && $rOpts->{'frames'} ) {
5588 $self->make_frame( \@toc );
5590 return $success_flag;
5595 # Make a frame with table of contents in the left panel
5596 # and the text in the right panel.
5598 # $html_filename contains the no-frames html output
5599 # $rtoc is a reference to an array with the table of contents
5600 my ( $self, $rtoc ) = @_;
5601 my $input_file = $self->{_input_file};
5602 my $html_filename = $self->{_html_file};
5603 my $toc_filename = $self->{_toc_filename};
5604 my $src_filename = $self->{_src_filename};
5605 my $title = $self->{_title};
5606 $title = escape_html($title);
5608 # FUTURE input parameter:
5609 my $top_basename = "";
5611 # We need to produce 3 html files:
5612 # 1. - the table of contents
5613 # 2. - the contents (source code) itself
5614 # 3. - the frame which contains them
5616 # get basenames for relative links
5617 my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5618 my ( $src_basename, $src_path ) = fileparse($src_filename);
5620 # 1. Make the table of contents panel, with appropriate changes
5621 # to the anchor names
5622 my $src_frame_name = 'SRC';
5624 write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5627 # 2. The current .html filename is renamed to be the contents panel
5628 rename( $html_filename, $src_filename )
5629 or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n";
5631 # 3. Then use the original html filename for the frame
5633 $title, $html_filename, $top_basename,
5634 $toc_basename, $src_basename, $src_frame_name
5639 sub write_toc_html {
5641 # write a separate html table of contents file for frames
5642 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5643 my $fh = IO::File->new( $toc_filename, 'w' )
5644 or Perl::Tidy::Die "Cannot open $toc_filename:$!\n";
5648 <title>$title</title>
5651 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5655 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5656 $fh->print( join "", @{$rtoc} );
5666 sub write_frame_html {
5668 # write an html file to be the table of contents frame
5670 $title, $frame_filename, $top_basename,
5671 $toc_basename, $src_basename, $src_frame_name
5674 my $fh = IO::File->new( $frame_filename, 'w' )
5675 or Perl::Tidy::Die "Cannot open $toc_basename:$!\n";
5678 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5679 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5680 <?xml version="1.0" encoding="iso-8859-1" ?>
5681 <html xmlns="http://www.w3.org/1999/xhtml">
5683 <title>$title</title>
5687 # two left panels, one right, if master index file
5688 if ($top_basename) {
5690 <frameset cols="20%,80%">
5691 <frameset rows="30%,70%">
5692 <frame src = "$top_basename" />
5693 <frame src = "$toc_basename" />
5698 # one left panels, one right, if no master index file
5701 <frameset cols="20%,*">
5702 <frame src = "$toc_basename" />
5706 <frame src = "$src_basename" name = "$src_frame_name" />
5709 <p>If you see this message, you are using a non-frame-capable web client.</p>
5710 <p>This document contains:</p>
5712 <li><a href="$toc_basename">A table of contents</a></li>
5713 <li><a href="$src_basename">The source code</a></li>
5723 sub change_anchor_names {
5725 # add a filename and target to anchors
5726 # also return the first anchor
5727 my ( $rlines, $filename, $target ) = @_;
5729 foreach my $line ( @{$rlines} ) {
5731 # We're looking for lines like this:
5732 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5733 # ---- - -------- -----------------
5735 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5739 my $href = "$filename#$name";
5740 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5741 unless ($first_anchor) { $first_anchor = $href }
5744 return $first_anchor;
5747 sub close_html_file {
5749 return unless $self->{_html_file_opened};
5751 my $html_fh = $self->{_html_fh};
5752 my $rtoc_string = $self->{_rtoc_string};
5754 # There are 3 basic paths to html output...
5756 # ---------------------------------
5757 # Path 1: finish up if in -pre mode
5758 # ---------------------------------
5759 if ( $rOpts->{'html-pre-only'} ) {
5760 $html_fh->print( <<"PRE_END");
5763 eval { $html_fh->close() };
5768 $self->add_toc_item( 'EOF', 'EOF' );
5770 my $rpre_string_stack = $self->{_rpre_string_stack};
5772 # Patch to darken the <pre> background color in case of pod2html and
5773 # interleaved code/documentation. Otherwise, the distinction
5774 # between code and documentation is blurred.
5775 if ( $rOpts->{pod2html}
5776 && $self->{_pod_cut_count} >= 1
5777 && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5779 $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5782 # put the css or its link into a string, if used
5784 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5786 # use css linked to another file
5787 if ( $rOpts->{'html-linked-style-sheet'} ) {
5789 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />));
5792 # use css embedded in this file
5793 elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5794 $fh_css->print( <<'ENDCSS');
5795 <style type="text/css">
5798 write_style_sheet_data($fh_css);
5799 $fh_css->print( <<"ENDCSS");
5805 # -----------------------------------------------------------
5806 # path 2: use pod2html if requested
5807 # If we fail for some reason, continue on to path 3
5808 # -----------------------------------------------------------
5809 if ( $rOpts->{'pod2html'} ) {
5810 my $rpod_string = $self->{_rpod_string};
5812 ${$rpod_string}, $css_string,
5813 ${$rtoc_string}, $rpre_string_stack
5817 # --------------------------------------------------
5818 # path 3: write code in html, with pod only in italics
5819 # --------------------------------------------------
5820 my $input_file = $self->{_input_file};
5821 my $title = escape_html($input_file);
5822 my $date = localtime;
5823 $html_fh->print( <<"HTML_START");
5824 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5825 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5826 <!-- Generated by perltidy on $date -->
5827 <html xmlns="http://www.w3.org/1999/xhtml">
5829 <title>$title</title>
5832 # output the css, if used
5834 $html_fh->print($css_string);
5835 $html_fh->print( <<"ENDCSS");
5842 $html_fh->print( <<"HTML_START");
5844 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5848 $html_fh->print("<a name=\"-top-\"></a>\n");
5849 $html_fh->print( <<"EOM");
5853 # copy the table of contents
5854 if ( ${$rtoc_string}
5855 && !$rOpts->{'frames'}
5856 && $rOpts->{'html-table-of-contents'} )
5858 $html_fh->print( ${$rtoc_string} );
5861 # copy the pre section(s)
5862 my $fname_comment = $input_file;
5863 $fname_comment =~ s/--+/-/g; # protect HTML comment tags
5864 $html_fh->print( <<"END_PRE");
5866 <!-- contents of filename: $fname_comment -->
5870 foreach my $rpre_string ( @{$rpre_string_stack} ) {
5871 $html_fh->print( ${$rpre_string} );
5874 # and finish the html page
5875 $html_fh->print( <<"HTML_END");
5880 eval { $html_fh->close() }; # could be object without close method
5882 if ( $rOpts->{'frames'} ) {
5883 my @toc = map { $_ .= "\n" } split /\n/, ${$rtoc_string};
5884 $self->make_frame( \@toc );
5890 my ( $self, $rtokens, $rtoken_type, $rlevels ) = @_;
5891 my ( @colored_tokens, $string, $type, $token, $level );
5892 my $rlast_level = $self->{_rlast_level};
5893 my $rpackage_stack = $self->{_rpackage_stack};
5895 for ( my $j = 0 ; $j < @{$rtoken_type} ; $j++ ) {
5896 $type = $rtoken_type->[$j];
5897 $token = $rtokens->[$j];
5898 $level = $rlevels->[$j];
5899 $level = 0 if ( $level < 0 );
5901 #-------------------------------------------------------
5902 # Update the package stack. The package stack is needed to keep
5903 # the toc correct because some packages may be declared within
5904 # blocks and go out of scope when we leave the block.
5905 #-------------------------------------------------------
5906 if ( $level > ${$rlast_level} ) {
5907 unless ( $rpackage_stack->[ $level - 1 ] ) {
5908 $rpackage_stack->[ $level - 1 ] = 'main';
5910 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5912 elsif ( $level < ${$rlast_level} ) {
5913 my $package = $rpackage_stack->[$level];
5914 unless ($package) { $package = 'main' }
5916 # if we change packages due to a nesting change, we
5917 # have to make an entry in the toc
5918 if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5919 $self->add_toc_item( $package, 'package' );
5922 ${$rlast_level} = $level;
5924 #-------------------------------------------------------
5925 # Intercept a sub name here; split it
5926 # into keyword 'sub' and sub name; and add an
5928 #-------------------------------------------------------
5929 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5930 $token = $self->markup_html_element( $1, 'k' );
5931 push @colored_tokens, $token;
5935 # but don't include sub declarations in the toc;
5936 # these wlll have leading token types 'i;'
5937 my $signature = join "", @{$rtoken_type};
5938 unless ( $signature =~ /^i;/ ) {
5939 my $subname = $token;
5940 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5941 $self->add_toc_item( $subname, 'sub' );
5945 #-------------------------------------------------------
5946 # Intercept a package name here; split it
5947 # into keyword 'package' and name; add to the toc,
5948 # and update the package stack
5949 #-------------------------------------------------------
5950 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5951 $token = $self->markup_html_element( $1, 'k' );
5952 push @colored_tokens, $token;
5955 $self->add_toc_item( "$token", 'package' );
5956 $rpackage_stack->[$level] = $token;
5959 $token = $self->markup_html_element( $token, $type );
5960 push @colored_tokens, $token;
5962 return ( \@colored_tokens );
5965 sub markup_html_element {
5966 my ( $self, $token, $type ) = @_;
5968 return $token if ( $type eq 'b' ); # skip a blank token
5969 return $token if ( $token =~ /^\s*$/ ); # skip a blank line
5970 $token = escape_html($token);
5972 # get the short abbreviation for this token type
5973 my $short_name = $token_short_names{$type};
5974 if ( !defined($short_name) ) {
5975 $short_name = "pu"; # punctuation is default
5978 # handle style sheets..
5979 if ( !$rOpts->{'nohtml-style-sheets'} ) {
5980 if ( $short_name ne 'pu' ) {
5981 $token = qq(<span class="$short_name">) . $token . "</span>";
5985 # handle no style sheets..
5987 my $color = $html_color{$short_name};
5989 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5990 $token = qq(<font color="$color">) . $token . "</font>";
5992 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5993 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
6001 if ($missing_html_entities) {
6002 $token =~ s/\&/&/g;
6003 $token =~ s/\</</g;
6004 $token =~ s/\>/>/g;
6005 $token =~ s/\"/"/g;
6008 HTML::Entities::encode_entities($token);
6013 sub finish_formatting {
6015 # called after last line
6017 $self->close_html_file();
6023 my ( $self, $line_of_tokens ) = @_;
6024 return unless $self->{_html_file_opened};
6025 my $html_pre_fh = $self->{_html_pre_fh};
6026 my $line_type = $line_of_tokens->{_line_type};
6027 my $input_line = $line_of_tokens->{_line_text};
6028 my $line_number = $line_of_tokens->{_line_number};
6031 # markup line of code..
6033 if ( $line_type eq 'CODE' ) {
6034 my $rtoken_type = $line_of_tokens->{_rtoken_type};
6035 my $rtokens = $line_of_tokens->{_rtokens};
6036 my $rlevels = $line_of_tokens->{_rlevels};
6038 if ( $input_line =~ /(^\s*)/ ) {
6044 my ($rcolored_tokens) =
6045 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
6046 $html_line .= join '', @{$rcolored_tokens};
6049 # markup line of non-code..
6052 if ( $line_type eq 'HERE' ) { $line_character = 'H' }
6053 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
6054 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
6055 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
6056 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
6057 elsif ( $line_type eq 'END_START' ) {
6058 $line_character = 'k';
6059 $self->add_toc_item( '__END__', '__END__' );
6061 elsif ( $line_type eq 'DATA_START' ) {
6062 $line_character = 'k';
6063 $self->add_toc_item( '__DATA__', '__DATA__' );
6065 elsif ( $line_type =~ /^POD/ ) {
6066 $line_character = 'P';
6067 if ( $rOpts->{'pod2html'} ) {
6068 my $html_pod_fh = $self->{_html_pod_fh};
6069 if ( $line_type eq 'POD_START' ) {
6071 my $rpre_string_stack = $self->{_rpre_string_stack};
6072 my $rpre_string = $rpre_string_stack->[-1];
6074 # if we have written any non-blank lines to the
6075 # current pre section, start writing to a new output
6077 if ( ${$rpre_string} =~ /\S/ ) {
6080 Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
6081 $self->{_html_pre_fh} = $html_pre_fh;
6082 push @{$rpre_string_stack}, \$pre_string;
6084 # leave a marker in the pod stream so we know
6085 # where to put the pre section we just
6087 my $for_html = '=for html'; # don't confuse pod utils
6088 $html_pod_fh->print(<<EOM);
6091 <!-- pERLTIDY sECTION -->
6096 # otherwise, just clear the current string and start
6099 ${$rpre_string} = "";
6100 $html_pod_fh->print("\n");
6103 $html_pod_fh->print( $input_line . "\n" );
6104 if ( $line_type eq 'POD_END' ) {
6105 $self->{_pod_cut_count}++;
6106 $html_pod_fh->print("\n");
6111 else { $line_character = 'Q' }
6112 $html_line = $self->markup_html_element( $input_line, $line_character );
6115 # add the line number if requested
6116 if ( $rOpts->{'html-line-numbers'} ) {
6118 ( $line_number < 10 ) ? " "
6119 : ( $line_number < 100 ) ? " "
6120 : ( $line_number < 1000 ) ? " "
6122 $html_line = $extra_space . $line_number . " " . $html_line;
6126 $html_pre_fh->print("$html_line\n");
6130 #####################################################################
6132 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
6133 # line breaks to the token stream
6135 # WARNING: This is not a real class for speed reasons. Only one
6136 # Formatter may be used.
6138 #####################################################################
6140 package Perl::Tidy::Formatter;
6144 # Caution: these debug flags produce a lot of output
6145 # They should all be 0 except when debugging small scripts
6146 use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
6147 use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
6148 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
6149 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
6150 use constant FORMATTER_DEBUG_FLAG_CI => 0;
6151 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
6152 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
6153 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
6154 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
6155 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
6156 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
6157 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
6158 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
6159 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
6161 my $debug_warning = sub {
6162 print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
6165 FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
6166 FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
6167 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
6168 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
6169 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
6170 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
6171 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
6172 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
6173 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
6174 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
6175 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
6176 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
6177 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
6178 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
6185 $max_gnu_stack_index
6186 $gnu_position_predictor
6187 $line_start_index_to_go
6188 $last_indentation_written
6189 $last_unadjusted_indentation
6191 $last_output_short_opening_token
6194 $saw_VERSION_in_this_file
6199 $gnu_sequence_number
6200 $last_output_indentation
6206 @type_sequence_to_go
6207 @container_environment_to_go
6208 @bond_strength_to_go
6209 @forced_breakpoint_to_go
6210 @token_lengths_to_go
6211 @summed_lengths_to_go
6213 @leading_spaces_to_go
6214 @reduced_spaces_to_go
6215 @matching_token_to_go
6218 @nesting_depth_to_go
6220 @old_breakpoint_to_go
6228 %saved_opening_indentation
6231 $comma_count_in_batch
6232 $last_nonblank_index_to_go
6233 $last_nonblank_type_to_go
6234 $last_nonblank_token_to_go
6235 $last_last_nonblank_index_to_go
6236 $last_last_nonblank_type_to_go
6237 $last_last_nonblank_token_to_go
6238 @nonblank_lines_at_depth
6241 @whitespace_level_stack
6242 $whitespace_last_level
6244 $format_skipping_pattern_begin
6245 $format_skipping_pattern_end
6247 $forced_breakpoint_count
6248 $forced_breakpoint_undo_count
6249 @forced_breakpoint_undo_stack
6250 %postponed_breakpoint
6254 $first_embedded_tab_at
6255 $last_embedded_tab_at
6256 $deleted_semicolon_count
6257 $first_deleted_semicolon_at
6258 $last_deleted_semicolon_at
6259 $added_semicolon_count
6260 $first_added_semicolon_at
6261 $last_added_semicolon_at
6262 $first_tabbing_disagreement
6263 $last_tabbing_disagreement
6264 $in_tabbing_disagreement
6265 $tabbing_disagreement_count
6268 $last_line_leading_type
6269 $last_line_leading_level
6270 $last_last_line_leading_level
6273 %block_opening_line_number
6274 $csc_new_statement_ok
6277 $accumulating_text_for_block
6279 $rleading_block_if_elsif_text
6280 $leading_block_text_level
6281 $leading_block_text_length_exceeded
6282 $leading_block_text_line_length
6283 $leading_block_text_line_number
6284 $closing_side_comment_prefix_pattern
6285 $closing_side_comment_list_pattern
6287 $blank_lines_after_opening_block_pattern
6288 $blank_lines_before_closing_block_pattern
6290 $last_nonblank_token
6292 $last_last_nonblank_token
6293 $last_last_nonblank_type
6294 $last_nonblank_block_type
6297 %is_if_brace_follower
6298 %space_after_keyword
6301 %is_last_next_redo_return
6302 %is_other_brace_follower
6303 %is_else_brace_follower
6304 %is_anon_sub_brace_follower
6305 %is_anon_sub_1_brace_follower
6307 %is_sort_map_grep_eval
6308 %is_sort_map_grep_eval_do
6309 %is_block_without_semicolon
6314 %is_if_unless_and_or_last_next_redo_return
6315 %ok_to_add_semicolon_for_block_type
6321 $is_static_block_comment
6322 $index_start_one_line_block
6323 $semicolons_before_block_self_destruct
6324 $index_max_forced_break
6327 $vertical_aligner_object
6334 $static_block_comment_pattern
6335 $static_side_comment_pattern
6336 %opening_vertical_tightness
6337 %closing_vertical_tightness
6338 %closing_token_indentation
6339 $some_closing_token_indentation
6341 %opening_token_right
6342 %stack_opening_token
6343 %stack_closing_token
6345 $block_brace_vertical_tightness_pattern
6348 $rOpts_add_whitespace
6349 $rOpts_block_brace_tightness
6350 $rOpts_block_brace_vertical_tightness
6351 $rOpts_brace_left_and_indent
6352 $rOpts_comma_arrow_breakpoints
6353 $rOpts_break_at_old_keyword_breakpoints
6354 $rOpts_break_at_old_comma_breakpoints
6355 $rOpts_break_at_old_logical_breakpoints
6356 $rOpts_break_at_old_ternary_breakpoints
6357 $rOpts_break_at_old_attribute_breakpoints
6358 $rOpts_closing_side_comment_else_flag
6359 $rOpts_closing_side_comment_maximum_text
6360 $rOpts_continuation_indentation
6362 $rOpts_delete_old_whitespace
6363 $rOpts_fuzzy_line_length
6364 $rOpts_indent_columns
6365 $rOpts_line_up_parentheses
6366 $rOpts_maximum_fields_per_table
6367 $rOpts_maximum_line_length
6368 $rOpts_variable_maximum_line_length
6369 $rOpts_short_concatenation_item_length
6370 $rOpts_keep_old_blank_lines
6371 $rOpts_ignore_old_breakpoints
6372 $rOpts_format_skipping
6373 $rOpts_space_function_paren
6374 $rOpts_space_keyword_paren
6375 $rOpts_keep_interior_semicolons
6376 $rOpts_ignore_side_comment_lengths
6377 $rOpts_stack_closing_block_brace
6378 $rOpts_space_backslash_quote
6379 $rOpts_whitespace_cycle
6383 %is_keyword_returning_list
6387 %right_bond_strength
6401 %weld_len_left_closing
6402 %weld_len_right_closing
6403 %weld_len_left_opening
6404 %weld_len_right_opening
6406 $rcuddled_block_types
6417 # Array index names for token vars
6420 _BLOCK_TYPE_ => $i++,
6422 _CONTAINER_ENVIRONMENT_ => $i++,
6423 _CONTAINER_TYPE_ => $i++,
6424 _CUMULATIVE_LENGTH_ => $i++,
6425 _LINE_INDEX_ => $i++,
6426 _KNEXT_SEQ_ITEM_ => $i++,
6428 _LEVEL_TRUE_ => $i++,
6432 _TYPE_SEQUENCE_ => $i++,
6434 $NVARS = 1 + _TYPE_SEQUENCE_;
6436 # default list of block types for which -bli would apply
6437 $bli_list_string = 'if else elsif unless while for foreach do : sub';
6442 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
6443 <= >= == =~ !~ != ++ -- /= x=
6445 @is_digraph{@q} = (1) x scalar(@q);
6447 @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ );
6448 @is_trigraph{@q} = (1) x scalar(@q);
6451 = **= += *= &= <<= &&=
6452 -= /= |= >>= ||= //=
6456 @is_assignment{@q} = (1) x scalar(@q);
6466 @is_keyword_returning_list{@q} = (1) x scalar(@q);
6468 @q = qw(is if unless and or err last next redo return);
6469 @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
6471 @q = qw(last next redo return);
6472 @is_last_next_redo_return{@q} = (1) x scalar(@q);
6474 @q = qw(sort map grep);
6475 @is_sort_map_grep{@q} = (1) x scalar(@q);
6477 @q = qw(sort map grep eval);
6478 @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
6480 @q = qw(sort map grep eval do);
6481 @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
6484 @is_if_unless{@q} = (1) x scalar(@q);
6486 @q = qw(and or err);
6487 @is_and_or{@q} = (1) x scalar(@q);
6489 # Identify certain operators which often occur in chains.
6490 # Note: the minus (-) causes a side effect of padding of the first line in
6491 # something like this (by sub set_logical_padding):
6492 # Checkbutton => 'Transmission checked',
6493 # -variable => \$TRANS
6494 # This usually improves appearance so it seems ok.
6495 @q = qw(&& || and or : ? . + - * /);
6496 @is_chain_operator{@q} = (1) x scalar(@q);
6498 # We can remove semicolons after blocks preceded by these keywords
6500 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6501 unless while until for foreach given when default);
6502 @is_block_without_semicolon{@q} = (1) x scalar(@q);
6504 # We will allow semicolons to be added within these block types
6505 # as well as sub and package blocks.
6507 # 1. Note that these keywords are omitted:
6508 # switch case given when default sort map grep
6509 # 2. It is also ok to add for sub and package blocks and a labeled block
6510 # 3. But not okay for other perltidy types including:
6512 # 4. Test files: blktype.t, blktype1.t, semicolon.t
6514 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6515 unless do while until eval for foreach );
6516 @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
6518 # 'L' is token for opening { at hash key
6520 @is_opening_type{@q} = (1) x scalar(@q);
6522 # 'R' is token for closing } at hash key
6524 @is_closing_type{@q} = (1) x scalar(@q);
6527 @is_opening_token{@q} = (1) x scalar(@q);
6530 @is_closing_token{@q} = (1) x scalar(@q);
6532 # Patterns for standardizing matches to block types for regular subs and
6533 # anonymous subs. Examples
6534 # 'sub process' is a named sub
6535 # 'sub ::m' is a named sub
6536 # 'sub' is an anonymous sub
6537 # 'sub:' is a label, not a sub
6538 # 'substr' is a keyword
6539 $SUB_PATTERN = '^sub\s+(::|\w)';
6540 $ASUB_PATTERN = '^sub$';
6544 use constant WS_YES => 1;
6545 use constant WS_OPTIONAL => 0;
6546 use constant WS_NO => -1;
6548 # Token bond strengths.
6549 use constant NO_BREAK => 10000;
6550 use constant VERY_STRONG => 100;
6551 use constant STRONG => 2.1;
6552 use constant NOMINAL => 1.1;
6553 use constant WEAK => 0.8;
6554 use constant VERY_WEAK => 0.55;
6556 # values for testing indexes in output array
6557 use constant UNDEFINED_INDEX => -1;
6559 # Maximum number of little messages; probably need not be changed.
6560 use constant MAX_NAG_MESSAGES => 6;
6562 # increment between sequence numbers for each type
6563 # For example, ?: pairs might have numbers 7,11,15,...
6564 use constant TYPE_SEQUENCE_INCREMENT => 4;
6568 # methods to count instances
6570 sub get_count { return $_count; }
6571 sub _increment_count { return ++$_count }
6572 sub _decrement_count { return --$_count }
6577 # trim leading and trailing whitespace from a string
6586 my $max = shift @vals;
6587 foreach my $val (@vals) {
6588 $max = ( $max < $val ) ? $val : $max;
6595 my $min = shift @vals;
6596 foreach my $val (@vals) {
6597 $min = ( $min > $val ) ? $val : $min;
6604 # given a string containing words separated by whitespace,
6605 # return the list of words
6610 return split( /\s+/, $str );
6614 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
6616 # Check the keys of a hash:
6617 # $rtest = ref to hash to test
6618 # $rexpected = ref to has with valid keys
6620 # $msg = a message to write in case of error
6621 # $exact_match defines the type of check:
6622 # = false: test hash must not have unknown key
6623 # = true: test hash must have exactly same keys as known hash
6625 grep { !exists $rvalid->{$_} } keys %{$rtest};
6627 grep { !exists $rtest->{$_} } keys %{$rvalid};
6628 my $error = @unknown_keys;
6629 if ($exact_match) { $error ||= @missing_keys }
6632 my @expected_keys = sort keys %{$rvalid};
6633 @unknown_keys = sort @unknown_keys;
6634 Perl::Tidy::Die <<EOM;
6635 ------------------------------------------------------------------------
6636 Program error detected checking hash keys
6638 Expected keys: (@expected_keys)
6639 Unknown key(s): (@unknown_keys)
6640 Missing key(s): (@missing_keys)
6641 ------------------------------------------------------------------------
6646 # interface to Perl::Tidy::Logger routines
6649 if ($logger_object) { $logger_object->warning($msg); }
6655 if ($logger_object) {
6656 $logger_object->complain($msg);
6661 sub write_logfile_entry {
6663 if ($logger_object) {
6664 $logger_object->write_logfile_entry(@msg);
6671 if ($logger_object) { $logger_object->black_box(@msg); }
6675 sub report_definite_bug {
6676 if ($logger_object) {
6677 $logger_object->report_definite_bug();
6682 sub get_saw_brace_error {
6683 if ($logger_object) {
6684 $logger_object->get_saw_brace_error();
6689 sub we_are_at_the_last_line {
6690 if ($logger_object) {
6691 $logger_object->we_are_at_the_last_line();
6696 # interface to Perl::Tidy::Diagnostics routine
6697 sub write_diagnostics {
6699 if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); }
6703 sub get_added_semicolon_count {
6705 return $added_semicolon_count;
6710 $self->_decrement_count();
6714 sub get_output_line_number {
6715 return $vertical_aligner_object->get_output_line_number();
6722 # we are given an object with a write_line() method to take lines
6724 sink_object => undef,
6725 diagnostics_object => undef,
6726 logger_object => undef,
6728 my %args = ( %defaults, @_ );
6730 $logger_object = $args{logger_object};
6731 $diagnostics_object = $args{diagnostics_object};
6733 # we create another object with a get_line() and peek_ahead() method
6734 my $sink_object = $args{sink_object};
6735 $file_writer_object =
6736 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6738 # initialize the leading whitespace stack to negative levels
6739 # so that we can never run off the end of the stack
6740 $peak_batch_size = 0; # flag to determine if we have output code
6741 $gnu_position_predictor = 0; # where the current token is predicted to be
6742 $max_gnu_stack_index = 0;
6743 $max_gnu_item_index = -1;
6744 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6745 @gnu_item_list = ();
6746 $last_output_indentation = 0;
6747 $last_indentation_written = 0;
6748 $last_unadjusted_indentation = 0;
6749 $last_leading_token = "";
6750 $last_output_short_opening_token = 0;
6752 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6753 $saw_END_or_DATA_ = 0;
6755 @block_type_to_go = ();
6756 @type_sequence_to_go = ();
6757 @container_environment_to_go = ();
6758 @bond_strength_to_go = ();
6759 @forced_breakpoint_to_go = ();
6760 @summed_lengths_to_go = (); # line length to start of ith token
6761 @token_lengths_to_go = ();
6763 @matching_token_to_go = ();
6764 @mate_index_to_go = ();
6765 @ci_levels_to_go = ();
6766 @nesting_depth_to_go = (0);
6767 @nobreak_to_go = ();
6768 @old_breakpoint_to_go = ();
6770 @rtoken_vars_to_go = ();
6773 @leading_spaces_to_go = ();
6774 @reduced_spaces_to_go = ();
6778 @whitespace_level_stack = ();
6779 $whitespace_last_level = -1;
6782 @has_broken_sublist = ();
6783 @want_comma_break = ();
6786 $first_tabbing_disagreement = 0;
6787 $last_tabbing_disagreement = 0;
6788 $tabbing_disagreement_count = 0;
6789 $in_tabbing_disagreement = 0;
6790 $input_line_tabbing = undef;
6792 $last_last_line_leading_level = 0;
6793 $last_line_leading_level = 0;
6794 $last_line_leading_type = '#';
6796 $last_nonblank_token = ';';
6797 $last_nonblank_type = ';';
6798 $last_last_nonblank_token = ';';
6799 $last_last_nonblank_type = ';';
6800 $last_nonblank_block_type = "";
6801 $last_output_level = 0;
6802 $looking_for_else = 0;
6803 $embedded_tab_count = 0;
6804 $first_embedded_tab_at = 0;
6805 $last_embedded_tab_at = 0;
6806 $deleted_semicolon_count = 0;
6807 $first_deleted_semicolon_at = 0;
6808 $last_deleted_semicolon_at = 0;
6809 $added_semicolon_count = 0;
6810 $first_added_semicolon_at = 0;
6811 $last_added_semicolon_at = 0;
6812 $is_static_block_comment = 0;
6813 %postponed_breakpoint = ();
6815 # variables for adding side comments
6816 %block_leading_text = ();
6817 %block_opening_line_number = ();
6818 $csc_new_statement_ok = 1;
6819 %csc_block_label = ();
6821 %saved_opening_indentation = ();
6823 reset_block_text_accumulator();
6825 prepare_for_new_input_lines();
6827 $vertical_aligner_object =
6828 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6829 $logger_object, $diagnostics_object );
6831 if ( $rOpts->{'entab-leading-whitespace'} ) {
6832 write_logfile_entry(
6833 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6836 elsif ( $rOpts->{'tabs'} ) {
6837 write_logfile_entry("Indentation will be with a tab character\n");
6840 write_logfile_entry(
6841 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6844 # This hash holds the main data structures for formatting
6845 # All hash keys must be defined here.
6847 rlines => [], # = ref to array of lines of the file
6848 rLL => [], # = ref to array with all tokens
6849 # in the file. LL originally meant
6850 # 'Linked List'. Linked lists were a
6851 # bad idea but LL is easy to type.
6852 Klimit => undef, # = maximum K index for rLL. This is
6853 # needed to catch any autovivification
6855 rnested_pairs => [], # for welding decisions
6856 K_opening_container => {}, # for quickly traversing structure
6857 K_closing_container => {}, # for quickly traversing structure
6858 K_opening_ternary => {}, # for quickly traversing structure
6859 K_closing_ternary => {}, # for quickly traversing structure
6860 rK_phantom_semicolons =>
6861 undef, # for undoing phantom semicolons if iterating
6862 rpaired_to_inner_container => {},
6863 rbreak_container => {}, # prevent one-line blocks
6864 rvalid_self_keys => [], # for checking
6866 my @valid_keys = keys %{$formatter_self};
6867 $formatter_self->{rvalid_self_keys} = \@valid_keys;
6869 bless $formatter_self, $class;
6871 # Safety check..this is not a class yet
6872 if ( _increment_count() > 1 ) {
6874 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
6876 return $formatter_self;
6882 # This routine is called for errors that really should not occur
6883 # except if there has been a bug introduced by a recent program change
6884 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
6885 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
6886 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
6888 Perl::Tidy::Die(<<EOM);
6889 ==============================================================================
6890 Fault detected at line $line0 of sub '$subroutine1'
6891 in file '$filename1'
6892 which was called from line $line1 of sub '$subroutine2'
6894 This is probably an error introduced by a recent programming change.
6895 ==============================================================================
6899 sub check_self_hash {
6901 my @valid_self_keys = @{ $self->{rvalid_self_keys} };
6902 my %valid_self_hash;
6903 @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys);
6904 check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 );
6908 sub check_token_array {
6911 # Check for errors in the array of tokens
6912 # Uses package variable $NVARS
6913 $self->check_self_hash();
6914 my $rLL = $self->{rLL};
6915 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
6916 my $nvars = @{ $rLL->[$KK] };
6917 if ( $nvars != $NVARS ) {
6918 my $type = $rLL->[$KK]->[_TYPE_];
6919 $type = '*' unless defined($type);
6921 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
6924 foreach my $var ( _TOKEN_, _TYPE_ ) {
6925 if ( !defined( $rLL->[$KK]->[$var] ) ) {
6926 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
6927 Fault("Undefined variable $var for K=$KK, line=$iline\n");
6934 sub set_rLL_max_index {
6937 # Set the limit of the rLL array, assuming that it is correct.
6938 # This should only be called by routines after they make changes
6940 my $rLL = $self->{rLL};
6941 if ( !defined($rLL) ) {
6943 # Shouldn't happen because rLL was initialized to be an array ref
6944 Fault("Undefined Memory rLL");
6946 my $Klimit_old = $self->{Klimit};
6949 if ( $num > 0 ) { $Klimit = $num - 1 }
6950 $self->{Klimit} = $Klimit;
6954 sub get_rLL_max_index {
6957 # the memory location $rLL and number of tokens should be obtained
6958 # from this routine so that any autovivication can be immediately caught.
6959 my $rLL = $self->{rLL};
6960 my $Klimit = $self->{Klimit};
6961 if ( !defined($rLL) ) {
6963 # Shouldn't happen because rLL was initialized to be an array ref
6964 Fault("Undefined Memory rLL");
6967 if ( $num == 0 && defined($Klimit)
6968 || $num > 0 && !defined($Klimit)
6969 || $num > 0 && $Klimit != $num - 1 )
6972 # Possible autovivification problem...
6973 if ( !defined($Klimit) ) { $Klimit = '*' }
6974 Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
6979 sub prepare_for_new_input_lines {
6981 # Remember the largest batch size processed. This is needed
6982 # by the pad routine to avoid padding the first nonblank token
6983 if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
6984 $peak_batch_size = $max_index_to_go;
6987 $gnu_sequence_number++; # increment output batch counter
6988 %last_gnu_equals = ();
6989 %gnu_comma_count = ();
6990 %gnu_arrow_count = ();
6991 $line_start_index_to_go = 0;
6992 $max_gnu_item_index = UNDEFINED_INDEX;
6993 $index_max_forced_break = UNDEFINED_INDEX;
6994 $max_index_to_go = UNDEFINED_INDEX;
6995 $last_nonblank_index_to_go = UNDEFINED_INDEX;
6996 $last_nonblank_type_to_go = '';
6997 $last_nonblank_token_to_go = '';
6998 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6999 $last_last_nonblank_type_to_go = '';
7000 $last_last_nonblank_token_to_go = '';
7001 $forced_breakpoint_count = 0;
7002 $forced_breakpoint_undo_count = 0;
7003 $rbrace_follower = undef;
7004 $summed_lengths_to_go[0] = 0;
7005 $comma_count_in_batch = 0;
7006 $starting_in_quote = 0;
7008 destroy_one_line_block();
7014 # Loop over old lines to set new line break points
7017 my $rlines = $self->{rlines};
7019 # Flag to prevent blank lines when POD occurs in a format skipping sect.
7020 my $in_format_skipping_section;
7023 foreach my $line_of_tokens ( @{$rlines} ) {
7025 my $last_line_type = $line_type;
7026 $line_type = $line_of_tokens->{_line_type};
7027 my $input_line = $line_of_tokens->{_line_text};
7029 # _line_type codes are:
7030 # SYSTEM - system-specific code before hash-bang line
7031 # CODE - line of perl code (including comments)
7032 # POD_START - line starting pod, such as '=head'
7033 # POD - pod documentation text
7034 # POD_END - last line of pod section, '=cut'
7035 # HERE - text of here-document
7036 # HERE_END - last line of here-doc (target word)
7037 # FORMAT - format section
7038 # FORMAT_END - last line of format section, '.'
7039 # DATA_START - __DATA__ line
7040 # DATA - unidentified text following __DATA__
7041 # END_START - __END__ line
7042 # END - unidentified text following __END__
7043 # ERROR - we are in big trouble, probably not a perl script
7045 # put a blank line after an =cut which comes before __END__ and __DATA__
7046 # (required by podchecker)
7047 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
7048 $file_writer_object->reset_consecutive_blank_lines();
7049 if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
7050 $self->want_blank_line();
7054 # handle line of code..
7055 if ( $line_type eq 'CODE' ) {
7057 my $CODE_type = $line_of_tokens->{_code_type};
7058 $in_format_skipping_section = $CODE_type eq 'FS';
7060 # Handle blank lines
7061 if ( $CODE_type eq 'BL' ) {
7063 # If keep-old-blank-lines is zero, we delete all
7064 # old blank lines and let the blank line rules generate any
7066 if ($rOpts_keep_old_blank_lines) {
7068 $file_writer_object->write_blank_code_line(
7069 $rOpts_keep_old_blank_lines == 2 );
7070 $last_line_leading_type = 'b';
7076 # let logger see all non-blank lines of code
7077 my $output_line_number = get_output_line_number();
7078 ##$vertical_aligner_object->get_output_line_number();
7079 black_box( $line_of_tokens, $output_line_number );
7082 # Handle Format Skipping (FS) and Verbatim (VB) Lines
7083 if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
7084 $self->write_unindented_line("$input_line");
7085 $file_writer_object->reset_consecutive_blank_lines();
7089 # Handle all other lines of code
7090 $self->print_line_of_tokens($line_of_tokens);
7093 # handle line of non-code..
7099 if ( $line_type =~ /^POD/ ) {
7101 # Pod docs should have a preceding blank line. But stay
7102 # out of __END__ and __DATA__ sections, because
7103 # the user may be using this section for any purpose whatsoever
7104 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
7105 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
7106 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
7108 && !$in_format_skipping_section
7109 && $line_type eq 'POD_START'
7110 && !$saw_END_or_DATA_ )
7112 $self->want_blank_line();
7116 # leave the blank counters in a predictable state
7117 # after __END__ or __DATA__
7118 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
7119 $file_writer_object->reset_consecutive_blank_lines();
7120 $saw_END_or_DATA_ = 1;
7123 # write unindented non-code line
7124 if ( !$skip_line ) {
7125 if ($tee_line) { $file_writer_object->tee_on() }
7126 $self->write_unindented_line($input_line);
7127 if ($tee_line) { $file_writer_object->tee_off() }
7134 { ## Beginning of routine to check line hashes
7136 my %valid_line_hash;
7140 # These keys are defined for each line in the formatter
7141 # Each line must have exactly these quantities
7142 my @valid_line_keys = qw(
7145 _guessed_indentation_level
7152 _square_bracket_depth
7154 _ended_in_blank_token
7163 @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
7166 sub check_line_hashes {
7168 $self->check_self_hash();
7169 my $rlines = $self->{rlines};
7170 foreach my $rline ( @{$rlines} ) {
7171 my $iline = $rline->{_line_number};
7172 my $line_type = $rline->{_line_type};
7173 check_keys( $rline, \%valid_line_hash,
7174 "Checkpoint: line number =$iline, line_type=$line_type", 1 );
7179 } ## End check line hashes
7183 # We are caching tokenized lines as they arrive and converting them to the
7184 # format needed for the final formatting.
7185 my ( $self, $line_of_tokens_old ) = @_;
7186 my $rLL = $self->{rLL};
7187 my $Klimit = $self->{Klimit};
7188 my $rlines_new = $self->{rlines};
7191 my $line_of_tokens = {};
7196 _guessed_indentation_level
7202 _square_bracket_depth
7207 $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
7210 # Data needed by Logger
7211 $line_of_tokens->{_level_0} = 0;
7212 $line_of_tokens->{_ci_level_0} = 0;
7213 $line_of_tokens->{_nesting_blocks_0} = "";
7214 $line_of_tokens->{_nesting_tokens_0} = "";
7216 # Needed to avoid trimming quotes
7217 $line_of_tokens->{_ended_in_blank_token} = undef;
7219 my $line_type = $line_of_tokens_old->{_line_type};
7220 my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
7221 if ( $line_type eq 'CODE' ) {
7223 my $rtokens = $line_of_tokens_old->{_rtokens};
7224 my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
7225 my $rblock_type = $line_of_tokens_old->{_rblock_type};
7226 my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
7227 my $rcontainer_environment =
7228 $line_of_tokens_old->{_rcontainer_environment};
7229 my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
7230 my $rlevels = $line_of_tokens_old->{_rlevels};
7231 my $rslevels = $line_of_tokens_old->{_rslevels};
7232 my $rci_levels = $line_of_tokens_old->{_rci_levels};
7233 my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
7234 my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
7236 my $jmax = @{$rtokens} - 1;
7238 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
7239 foreach my $j ( 0 .. $jmax ) {
7243 _BLOCK_TYPE_, _CONTAINER_TYPE_,
7244 _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
7245 _LEVEL_, _LEVEL_TRUE_,
7246 _SLEVEL_, _CI_LEVEL_,
7250 $rtokens->[$j], $rtoken_type->[$j],
7251 $rblock_type->[$j], $rcontainer_type->[$j],
7252 $rcontainer_environment->[$j], $rtype_sequence->[$j],
7253 $rlevels->[$j], $rlevels->[$j],
7254 $rslevels->[$j], $rci_levels->[$j],
7257 ##push @token_array, \@tokary;
7258 push @{$rLL}, \@tokary;
7262 $Klimit = @{$rLL} - 1;
7264 # Need to remember if we can trim the input line
7265 $line_of_tokens->{_ended_in_blank_token} =
7266 $rtoken_type->[$jmax] eq 'b';
7268 $line_of_tokens->{_level_0} = $rlevels->[0];
7269 $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
7270 $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
7271 $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
7275 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
7276 $line_of_tokens->{_code_type} = "";
7277 $self->{Klimit} = $Klimit;
7279 push @{$rlines_new}, $line_of_tokens;
7285 # initialize these global hashes, which control the use of
7286 # whitespace around tokens:
7291 # %space_after_keyword
7293 # Many token types are identical to the tokens themselves.
7294 # See the tokenizer for a complete list. Here are some special types:
7296 # f = semicolon in for statement
7299 # Note that :: is excluded since it should be contained in an identifier
7300 # Note that '->' is excluded because it never gets space
7301 # parentheses and brackets are excluded since they are handled specially
7302 # curly braces are included but may be overridden by logic, such as
7305 # NEW_TOKENS: create a whitespace rule here. This can be as
7306 # simple as adding your new letter to @spaces_both_sides, for
7312 @is_opening_type{@q} = (1) x scalar(@q);
7315 @is_closing_type{@q} = (1) x scalar(@q);
7317 my @spaces_both_sides = qw"
7318 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
7319 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
7320 &&= ||= //= <=> A k f w F n C Y U G v
7323 my @spaces_left_side = qw"
7324 t ! ~ m p { \ h pp mm Z j
7326 push( @spaces_left_side, '#' ); # avoids warning message
7328 my @spaces_right_side = qw"
7329 ; } ) ] R J ++ -- **=
7331 push( @spaces_right_side, ',' ); # avoids warning message
7333 # Note that we are in a BEGIN block here. Later in processing
7334 # the values of %want_left_space and %want_right_space
7335 # may be overridden by any user settings specified by the
7336 # -wls and -wrs parameters. However the binary_whitespace_rules
7337 # are hardwired and have priority.
7338 @want_left_space{@spaces_both_sides} =
7339 (1) x scalar(@spaces_both_sides);
7340 @want_right_space{@spaces_both_sides} =
7341 (1) x scalar(@spaces_both_sides);
7342 @want_left_space{@spaces_left_side} =
7343 (1) x scalar(@spaces_left_side);
7344 @want_right_space{@spaces_left_side} =
7345 (-1) x scalar(@spaces_left_side);
7346 @want_left_space{@spaces_right_side} =
7347 (-1) x scalar(@spaces_right_side);
7348 @want_right_space{@spaces_right_side} =
7349 (1) x scalar(@spaces_right_side);
7350 $want_left_space{'->'} = WS_NO;
7351 $want_right_space{'->'} = WS_NO;
7352 $want_left_space{'**'} = WS_NO;
7353 $want_right_space{'**'} = WS_NO;
7354 $want_right_space{'CORE::'} = WS_NO;
7356 # These binary_ws_rules are hardwired and have priority over the above
7357 # settings. It would be nice to allow adjustment by the user,
7358 # but it would be complicated to specify.
7360 # hash type information must stay tightly bound
7362 $binary_ws_rules{'i'}{'L'} = WS_NO;
7363 $binary_ws_rules{'i'}{'{'} = WS_YES;
7364 $binary_ws_rules{'k'}{'{'} = WS_YES;
7365 $binary_ws_rules{'U'}{'{'} = WS_YES;
7366 $binary_ws_rules{'i'}{'['} = WS_NO;
7367 $binary_ws_rules{'R'}{'L'} = WS_NO;
7368 $binary_ws_rules{'R'}{'{'} = WS_NO;
7369 $binary_ws_rules{'t'}{'L'} = WS_NO;
7370 $binary_ws_rules{'t'}{'{'} = WS_NO;
7371 $binary_ws_rules{'}'}{'L'} = WS_NO;
7372 $binary_ws_rules{'}'}{'{'} = WS_NO;
7373 $binary_ws_rules{'$'}{'L'} = WS_NO;
7374 $binary_ws_rules{'$'}{'{'} = WS_NO;
7375 $binary_ws_rules{'@'}{'L'} = WS_NO;
7376 $binary_ws_rules{'@'}{'{'} = WS_NO;
7377 $binary_ws_rules{'='}{'L'} = WS_YES;
7378 $binary_ws_rules{'J'}{'J'} = WS_YES;
7380 # the following includes ') {'
7381 # as in : if ( xxx ) { yyy }
7382 $binary_ws_rules{']'}{'L'} = WS_NO;
7383 $binary_ws_rules{']'}{'{'} = WS_NO;
7384 $binary_ws_rules{')'}{'{'} = WS_YES;
7385 $binary_ws_rules{')'}{'['} = WS_NO;
7386 $binary_ws_rules{']'}{'['} = WS_NO;
7387 $binary_ws_rules{']'}{'{'} = WS_NO;
7388 $binary_ws_rules{'}'}{'['} = WS_NO;
7389 $binary_ws_rules{'R'}{'['} = WS_NO;
7391 $binary_ws_rules{']'}{'++'} = WS_NO;
7392 $binary_ws_rules{']'}{'--'} = WS_NO;
7393 $binary_ws_rules{')'}{'++'} = WS_NO;
7394 $binary_ws_rules{')'}{'--'} = WS_NO;
7396 $binary_ws_rules{'R'}{'++'} = WS_NO;
7397 $binary_ws_rules{'R'}{'--'} = WS_NO;
7399 $binary_ws_rules{'i'}{'Q'} = WS_YES;
7400 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
7402 # FIXME: we could to split 'i' into variables and functions
7403 # and have no space for functions but space for variables. For now,
7404 # I have a special patch in the special rules below
7405 $binary_ws_rules{'i'}{'('} = WS_NO;
7407 $binary_ws_rules{'w'}{'('} = WS_NO;
7408 $binary_ws_rules{'w'}{'{'} = WS_YES;
7409 } ## end BEGIN block
7411 sub set_whitespace_flags {
7413 # This routine examines each pair of nonblank tokens and
7414 # sets a flag indicating if white space is needed.
7416 # $rwhitespace_flags->[$j] is a flag indicating whether a white space
7417 # BEFORE token $j is needed, with the following values:
7419 # WS_NO = -1 do not want a space before token $j
7420 # WS_OPTIONAL= 0 optional space or $j is a whitespace
7421 # WS_YES = 1 want a space before token $j
7425 my $rLL = $self->{rLL};
7427 my $rwhitespace_flags = [];
7429 my ( $last_token, $last_type, $last_block_type, $last_input_line_no,
7430 $token, $type, $block_type, $input_line_no );
7431 my $j_tight_closing_paren = -1;
7439 $last_block_type = '';
7440 $last_input_line_no = 0;
7442 my $jmax = @{$rLL} - 1;
7446 # This is some logic moved to a sub to avoid deep nesting of if stmts
7447 my $ws_in_container = sub {
7451 if ( $j + 1 > $jmax ) { return (WS_NO) }
7453 # Patch to count '-foo' as single token so that
7454 # each of $a{-foo} and $a{foo} and $a{'foo'} do
7455 # not get spaces with default formatting.
7459 && $last_token eq '{'
7460 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
7462 # $j_next is where a closing token should be if
7463 # the container has a single token
7464 if ( $j_here + 1 > $jmax ) { return (WS_NO) }
7466 ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
7470 if ( $j_next > $jmax ) { return WS_NO }
7471 my $tok_next = $rLL->[$j_next]->[_TOKEN_];
7472 my $type_next = $rLL->[$j_next]->[_TYPE_];
7474 # for tightness = 1, if there is just one token
7475 # within the matching pair, we will keep it tight
7477 $tok_next eq $matching_token{$last_token}
7479 # but watch out for this: [ [ ] (misc.t)
7480 && $last_token ne $token
7482 # double diamond is usually spaced
7488 # remember where to put the space for the closing paren
7489 $j_tight_closing_paren = $j_next;
7495 # main loop over all tokens to define the whitespace flags
7496 for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
7498 my $rtokh = $rLL->[$j];
7501 $rwhitespace_flags->[$j] = WS_OPTIONAL;
7503 if ( $rtokh->[_TYPE_] eq 'b' ) {
7507 # set a default value, to be changed as needed
7509 $last_token = $token;
7511 $last_block_type = $block_type;
7512 $last_input_line_no = $input_line_no;
7513 $token = $rtokh->[_TOKEN_];
7514 $type = $rtokh->[_TYPE_];
7515 $block_type = $rtokh->[_BLOCK_TYPE_];
7516 $input_line_no = $rtokh->[_LINE_INDEX_];
7518 #---------------------------------------------------------------
7519 # Whitespace Rules Section 1:
7520 # Handle space on the inside of opening braces.
7521 #---------------------------------------------------------------
7524 if ( $is_opening_type{$last_type} ) {
7526 $j_tight_closing_paren = -1;
7528 # let us keep empty matched braces together: () {} []
7530 if ( $token eq $matching_token{$last_token} ) {
7540 # we're considering the right of an opening brace
7541 # tightness = 0 means always pad inside with space
7542 # tightness = 1 means pad inside if "complex"
7543 # tightness = 2 means never pad inside with space
7546 if ( $last_type eq '{'
7547 && $last_token eq '{'
7548 && $last_block_type )
7550 $tightness = $rOpts_block_brace_tightness;
7552 else { $tightness = $tightness{$last_token} }
7554 #=============================================================
7555 # Patch for test problem fabrice_bug.pl
7556 # We must always avoid spaces around a bare word beginning
7558 # my $before = ${^PREMATCH};
7559 # Because all of the following cause an error in perl:
7560 # my $before = ${ ^PREMATCH };
7561 # my $before = ${ ^PREMATCH};
7562 # my $before = ${^PREMATCH };
7563 # So if brace tightness flag is -bt=0 we must temporarily reset
7564 # to bt=1. Note that here we must set tightness=1 and not 2 so
7565 # that the closing space
7566 # is also avoided (via the $j_tight_closing_paren flag in coding)
7567 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
7569 #=============================================================
7571 if ( $tightness <= 0 ) {
7574 elsif ( $tightness > 1 ) {
7578 $ws = $ws_in_container->($j);
7581 } # end setting space flag inside opening tokens
7584 if FORMATTER_DEBUG_FLAG_WHITE;
7586 #---------------------------------------------------------------
7587 # Whitespace Rules Section 2:
7588 # Handle space on inside of closing brace pairs.
7589 #---------------------------------------------------------------
7592 if ( $is_closing_type{$type} ) {
7594 if ( $j == $j_tight_closing_paren ) {
7596 $j_tight_closing_paren = -1;
7601 if ( !defined($ws) ) {
7604 if ( $type eq '}' && $token eq '}' && $block_type ) {
7605 $tightness = $rOpts_block_brace_tightness;
7607 else { $tightness = $tightness{$token} }
7609 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
7612 } # end setting space flag inside closing tokens
7616 if FORMATTER_DEBUG_FLAG_WHITE;
7618 #---------------------------------------------------------------
7619 # Whitespace Rules Section 3:
7620 # Use the binary rule table.
7621 #---------------------------------------------------------------
7622 if ( !defined($ws) ) {
7623 $ws = $binary_ws_rules{$last_type}{$type};
7627 if FORMATTER_DEBUG_FLAG_WHITE;
7629 #---------------------------------------------------------------
7630 # Whitespace Rules Section 4:
7631 # Handle some special cases.
7632 #---------------------------------------------------------------
7633 if ( $token eq '(' ) {
7635 # This will have to be tweaked as tokenization changes.
7636 # We usually want a space at '} (', for example:
7637 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
7640 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
7641 # At present, the above & block is marked as type L/R so this case
7642 # won't go through here.
7643 if ( $last_type eq '}' ) { $ws = WS_YES }
7645 # NOTE: some older versions of Perl had occasional problems if
7646 # spaces are introduced between keywords or functions and opening
7647 # parens. So the default is not to do this except is certain
7648 # cases. The current Perl seems to tolerate spaces.
7650 # Space between keyword and '('
7651 elsif ( $last_type eq 'k' ) {
7653 unless ( $rOpts_space_keyword_paren
7654 || $space_after_keyword{$last_token} );
7657 # Space between function and '('
7658 # -----------------------------------------------------
7659 # 'w' and 'i' checks for something like:
7660 # myfun( &myfun( ->myfun(
7661 # -----------------------------------------------------
7662 elsif (( $last_type =~ /^[wUG]$/ )
7663 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
7665 $ws = WS_NO unless ($rOpts_space_function_paren);
7668 # space between something like $i and ( in
7669 # for $i ( 0 .. 20 ) {
7670 # FIXME: eventually, type 'i' needs to be split into multiple
7671 # token types so this can be a hardwired rule.
7672 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
7676 # allow constant function followed by '()' to retain no space
7677 elsif ($last_type eq 'C'
7678 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
7684 # patch for SWITCH/CASE: make space at ']{' optional
7685 # since the '{' might begin a case or when block
7686 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
7690 # keep space between 'sub' and '{' for anonymous sub definition
7691 if ( $type eq '{' ) {
7692 if ( $last_token eq 'sub' ) {
7696 # this is needed to avoid no space in '){'
7697 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
7699 # avoid any space before the brace or bracket in something like
7700 # @opts{'a','b',...}
7701 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
7706 elsif ( $type eq 'i' ) {
7708 # never a space before ->
7709 if ( $token =~ /^\-\>/ ) {
7714 # retain any space between '-' and bare word
7715 elsif ( $type eq 'w' || $type eq 'C' ) {
7716 $ws = WS_OPTIONAL if $last_type eq '-';
7718 # never a space before ->
7719 if ( $token =~ /^\-\>/ ) {
7724 # retain any space between '-' and bare word
7725 # example: avoid space between 'USER' and '-' here:
7726 # $myhash{USER-NAME}='steve';
7727 elsif ( $type eq 'm' || $type eq '-' ) {
7728 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
7731 # always space before side comment
7732 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
7734 # always preserver whatever space was used after a possible
7735 # filehandle (except _) or here doc operator
7738 && ( ( $last_type eq 'Z' && $last_token ne '_' )
7739 || $last_type eq 'h' )
7745 # space_backslash_quote; RT #123774
7746 # allow a space between a backslash and single or double quote
7747 # to avoid fooling html formatters
7748 elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
7749 if ($rOpts_space_backslash_quote) {
7750 if ( $rOpts_space_backslash_quote == 1 ) {
7753 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
7754 else { } # shouldnt happen
7763 if FORMATTER_DEBUG_FLAG_WHITE;
7765 #---------------------------------------------------------------
7766 # Whitespace Rules Section 5:
7767 # Apply default rules not covered above.
7768 #---------------------------------------------------------------
7770 # If we fall through to here, look at the pre-defined hash tables for
7771 # the two tokens, and:
7772 # if (they are equal) use the common value
7773 # if (either is zero or undef) use the other
7774 # if (either is -1) use it
7788 if ( !defined($ws) ) {
7789 my $wl = $want_left_space{$type};
7790 my $wr = $want_right_space{$last_type};
7791 if ( !defined($wl) ) { $wl = 0 }
7792 if ( !defined($wr) ) { $wr = 0 }
7793 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
7796 if ( !defined($ws) ) {
7799 "WS flag is undefined for tokens $last_token $token\n");
7802 # Treat newline as a whitespace. Otherwise, we might combine
7803 # 'Send' and '-recipients' here according to the above rules:
7804 # my $msg = new Fax::Send
7805 # -recipients => $to,
7807 if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
7812 && ( $last_type !~ /^[Zh]$/ ) )
7815 # If this happens, we have a non-fatal but undesirable
7816 # hole in the above rules which should be patched.
7818 "WS flag is zero for tokens $last_token $token\n");
7821 $rwhitespace_flags->[$j] = $ws;
7823 FORMATTER_DEBUG_FLAG_WHITE && do {
7824 my $str = substr( $last_token, 0, 15 );
7825 $str .= ' ' x ( 16 - length($str) );
7826 if ( !defined($ws_1) ) { $ws_1 = "*" }
7827 if ( !defined($ws_2) ) { $ws_2 = "*" }
7828 if ( !defined($ws_3) ) { $ws_3 = "*" }
7829 if ( !defined($ws_4) ) { $ws_4 = "*" }
7831 "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
7835 if ( $rOpts->{'tight-secret-operators'} ) {
7836 new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
7838 return $rwhitespace_flags;
7839 } ## end sub set_whitespace_flags
7841 sub respace_tokens {
7844 return if $rOpts->{'indent-only'};
7846 # This routine makes all necessary changes to the tokenization after the
7847 # file has been read. This consists mostly of inserting and deleting spaces
7848 # according to the selected parameters. In a few cases non-space characters
7849 # are added, deleted or modified.
7851 # The old tokens are copied one-by-one, with changes, from the old
7852 # linear storage array to a new array.
7854 my $rLL = $self->{rLL};
7855 my $Klimit_old = $self->{Klimit};
7856 my $rlines = $self->{rlines};
7857 my $rpaired_to_inner_container = $self->{rpaired_to_inner_container};
7859 my $rLL_new = []; # This is the new array
7862 my $Kmax = @{$rLL} - 1;
7864 # Set the whitespace flags, which indicate the token spacing preference.
7865 my $rwhitespace_flags = $self->set_whitespace_flags();
7867 # we will be setting token lengths as we go
7868 my $cumulative_length = 0;
7870 # We also define these hash indexes giving container token array indexes
7871 # as a function of the container sequence numbers. For example,
7872 my $K_opening_container = {}; # opening [ { or (
7873 my $K_closing_container = {}; # closing ] } or )
7874 my $K_opening_ternary = {}; # opening ? of ternary
7875 my $K_closing_ternary = {}; # closing : of ternary
7877 # List of new K indexes of phantom semicolons
7878 # This will be needed if we want to undo them for iterations
7879 my $rK_phantom_semicolons = [];
7881 # Temporary hashes for adding semicolons
7882 ##my $rKfirst_new = {};
7884 # a sub to link preceding nodes forward to a new node type
7885 my $link_back = sub {
7886 my ( $Ktop, $key ) = @_;
7888 my $Kprev = $Ktop - 1;
7890 && !defined( $rLL_new->[$Kprev]->[$key] ) )
7892 $rLL_new->[$Kprev]->[$key] = $Ktop;
7897 # A sub to store each token in the new array
7898 # All new tokens must be stored by this sub so that it can update
7899 # all data structures on the fly.
7900 my $last_nonblank_type = ';';
7901 my $store_token = sub {
7904 # This will be the index of this item in the new array
7905 my $KK_new = @{$rLL_new};
7907 # check for a sequenced item (i.e., container or ?/:)
7908 my $type_sequence = $item->[_TYPE_SEQUENCE_];
7909 if ($type_sequence) {
7911 $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
7913 my $token = $item->[_TOKEN_];
7914 if ( $is_opening_token{$token} ) {
7916 $K_opening_container->{$type_sequence} = $KK_new;
7918 elsif ( $is_closing_token{$token} ) {
7920 $K_closing_container->{$type_sequence} = $KK_new;
7923 # These are not yet used but could be useful
7925 if ( $token eq '?' ) {
7926 $K_opening_ternary->{$type_sequence} = $KK;
7928 elsif ( $token eq ':' ) {
7929 $K_closing_ternary->{$type_sequence} = $KK;
7933 print STDERR "Ugh: shouldn't happen\n";
7938 # Save the length sum to just BEFORE this token
7939 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
7941 # now set the length of this token
7942 my $token_length = length( $item->[_TOKEN_] );
7944 # and update the cumulative length
7945 $cumulative_length += $token_length;
7947 my $type = $item->[_TYPE_];
7948 if ( $type ne 'b' ) { $last_nonblank_type = $type }
7950 # and finally, add this item to the new array
7951 push @{$rLL_new}, $item;
7954 my $add_phantom_semicolon = sub {
7958 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
7959 return unless ( defined($Kp) );
7961 # we are only adding semicolons for certain block types
7962 my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
7964 unless ( $ok_to_add_semicolon_for_block_type{$block_type}
7965 || $block_type =~ /^(sub|package)/
7966 || $block_type =~ /^\w+\:$/ );
7968 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7970 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
7971 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
7973 # Do not add a semicolon if...
7977 # it would follow a comment (and be isolated)
7978 $previous_nonblank_type eq '#'
7980 # it follows a code block ( because they are not always wanted
7981 # there and may add clutter)
7982 || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
7984 # it would follow a label
7985 || $previous_nonblank_type eq 'J'
7987 # it would be inside a 'format' statement (and cause syntax error)
7988 || ( $previous_nonblank_type eq 'k'
7989 && $previous_nonblank_token =~ /format/ )
7991 # if it would prevent welding two containers
7992 || $rpaired_to_inner_container->{$type_sequence}
7996 # We will insert an empty semicolon here as a placeholder.
7997 # Later, if it becomes the last token on a line, we will bring it to life.
7998 # The advantage of doing this is that (1) we just have to check line endings,
7999 # and (2) the phantom semicolon has zero width and therefore won't cause
8000 # needless breaks of one-line blocks.
8002 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
8003 && $want_left_space{';'} == WS_NO )
8006 # convert the blank into a semicolon..
8007 # be careful: we are working on the new stack top
8008 # on a token which has been stored.
8009 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
8011 # Convert the existing blank to a semicolon
8012 $rLL_new->[$Ktop]->[_TOKEN_] = ''; # zero length
8013 $rLL_new->[$Ktop]->[_TYPE_] = ';';
8014 $rLL_new->[$Ktop]->[_SLEVEL_] =
8015 $rLL->[$KK]->[_SLEVEL_];
8017 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
8019 # Then store a new blank
8020 $store_token->($rcopy);
8024 # insert a new token
8025 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
8026 $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
8027 $store_token->($rcopy);
8028 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
8034 # Check that a quote looks okay
8035 # This works but needs to by sync'd with the log file output
8036 my ( $KK, $Kfirst ) = @_;
8037 my $token = $rLL->[$KK]->[_TOKEN_];
8038 note_embedded_tab() if ( $token =~ "\t" );
8040 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
8041 return unless ( defined($Kp) );
8042 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
8043 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
8045 my $previous_nonblank_type_2 = 'b';
8046 my $previous_nonblank_token_2 = "";
8047 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
8048 if ( defined($Kpp) ) {
8049 $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
8050 $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
8053 my $Kn = $self->K_next_nonblank($KK);
8054 my $next_nonblank_token = "";
8055 if ( defined($Kn) ) {
8056 $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
8059 my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
8060 my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
8062 # make note of something like '$var = s/xxx/yyy/;'
8063 # in case it should have been '$var =~ s/xxx/yyy/;'
8065 $token =~ /^(s|tr|y|m|\/)/
8066 && $previous_nonblank_token =~ /^(=|==|!=)$/
8068 # preceded by simple scalar
8069 && $previous_nonblank_type_2 eq 'i'
8070 && $previous_nonblank_token_2 =~ /^\$/
8072 # followed by some kind of termination
8073 # (but give complaint if we can not see far enough ahead)
8074 && $next_nonblank_token =~ /^[; \)\}]$/
8076 # scalar is not declared
8077 && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
8080 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8082 "Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
8087 # Main loop over all lines of the file
8091 foreach my $line_of_tokens ( @{$rlines} ) {
8093 $input_line_number = $line_of_tokens->{_line_number};
8094 my $last_line_type = $line_type;
8095 $line_type = $line_of_tokens->{_line_type};
8096 next unless ( $line_type eq 'CODE' );
8097 my $last_CODE_type = $CODE_type;
8098 $CODE_type = $line_of_tokens->{_code_type};
8099 my $rK_range = $line_of_tokens->{_rK_range};
8100 my ( $Kfirst, $Klast ) = @{$rK_range};
8101 next unless defined($Kfirst);
8103 # Check for correct sequence of token indexes...
8104 # An error here means that sub write_line() did not correctly
8105 # package the tokenized lines as it received them.
8106 if ( defined($last_K_out) ) {
8107 if ( $Kfirst != $last_K_out + 1 ) {
8109 "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
8114 if ( $Kfirst != 0 ) {
8115 Fault("Program Bug: first K is $Kfirst but should be 0");
8118 $last_K_out = $Klast;
8120 # Handle special lines of code
8121 if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
8123 # CODE_types are as follows.
8125 # 'VB' = Verbatim - line goes out verbatim
8126 # 'FS' = Format Skipping - line goes out verbatim, no blanks
8127 # 'IO' = Indent Only - only indentation may be changed
8128 # 'NIN' = No Internal Newlines - line does not get broken
8129 # 'HSC'=Hanging Side Comment - fix this hanging side comment
8130 # 'BC'=Block Comment - an ordinary full line comment
8131 # 'SBC'=Static Block Comment - a block comment which does not get
8133 # 'SBCX'=Static Block Comment Without Leading Space
8134 # 'DEL'=Delete this line
8135 # 'VER'=VERSION statement
8136 # '' or (undefined) - no restructions
8138 # For a hanging side comment we insert an empty quote before
8139 # the comment so that it becomes a normal side comment and
8140 # will be aligned by the vertical aligner
8141 if ( $CODE_type eq 'HSC' ) {
8143 # Safety Check: This must be a line with one token (a comment)
8144 my $rtoken_vars = $rLL->[$Kfirst];
8145 if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
8147 # Note that even if the flag 'noadd-whitespace' is set, we will
8148 # make an exception here and allow a blank to be inserted to push the comment
8149 # to the right. We can think of this as an adjustment of indentation
8150 # rather than whitespace between tokens. This will also prevent the hanging
8151 # side comment from getting converted to a block comment if whitespace
8152 # gets deleted, as for example with the -extrude and -mangle options.
8153 my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
8154 $store_token->($rcopy);
8155 $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
8156 $store_token->($rcopy);
8157 $store_token->($rtoken_vars);
8162 # This line was mis-marked by sub scan_comment
8164 "Program bug. A hanging side comment has been mismarked"
8169 # Copy tokens unchanged
8170 foreach my $KK ( $Kfirst .. $Klast ) {
8171 $store_token->( $rLL->[$KK] );
8176 # Handle normal line..
8178 # Insert any essential whitespace between lines
8179 # if last line was normal CODE
8180 my $type_next = $rLL->[$Kfirst]->[_TYPE_];
8181 my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
8182 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
8183 if ( $last_line_type eq 'CODE'
8184 && $type_next ne 'b'
8187 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
8188 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
8190 my ( $token_pp, $type_pp );
8191 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
8192 if ( defined($Kpp) ) {
8193 $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
8194 $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
8202 is_essential_whitespace(
8203 $token_pp, $type_pp, $token_p,
8204 $type_p, $token_next, $type_next,
8209 # Copy this first token as blank, but use previous line number
8210 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
8211 $rcopy->[_LINE_INDEX_] =
8212 $rLL_new->[-1]->[_LINE_INDEX_];
8213 $store_token->($rcopy);
8217 # loop to copy all tokens on this line, with any changes
8219 for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
8220 $rtoken_vars = $rLL->[$KK];
8221 my $token = $rtoken_vars->[_TOKEN_];
8222 my $type = $rtoken_vars->[_TYPE_];
8223 my $last_type_sequence = $type_sequence;
8224 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8226 # Handle a blank space ...
8227 if ( $type eq 'b' ) {
8229 # Delete it if not wanted by whitespace rules
8230 # or we are deleting all whitespace
8231 # Note that whitespace flag is a flag indicating whether a
8232 # white space BEFORE the token is needed
8233 next if ( $KK >= $Kmax ); # skip terminal blank
8234 my $Knext = $KK + 1;
8235 my $ws = $rwhitespace_flags->[$Knext];
8237 || $rOpts_delete_old_whitespace )
8240 # FIXME: maybe switch to using _new
8241 my $Kp = $self->K_previous_nonblank($KK);
8242 next unless defined($Kp);
8243 my $token_p = $rLL->[$Kp]->[_TOKEN_];
8244 my $type_p = $rLL->[$Kp]->[_TYPE_];
8246 my ( $token_pp, $type_pp );
8248 #my $Kpp = $K_previous_nonblank->($Kp);
8249 my $Kpp = $self->K_previous_nonblank($Kp);
8250 if ( defined($Kpp) ) {
8251 $token_pp = $rLL->[$Kpp]->[_TOKEN_];
8252 $type_pp = $rLL->[$Kpp]->[_TYPE_];
8258 my $token_next = $rLL->[$Knext]->[_TOKEN_];
8259 my $type_next = $rLL->[$Knext]->[_TYPE_];
8261 my $do_not_delete = is_essential_whitespace(
8262 $token_pp, $type_pp, $token_p,
8263 $type_p, $token_next, $type_next,
8266 next unless ($do_not_delete);
8269 # make it just one character if allowed
8270 if ($rOpts_add_whitespace) {
8271 $rtoken_vars->[_TOKEN_] = ' ';
8273 $store_token->($rtoken_vars);
8277 # Handle a nonblank token...
8279 # Modify certain tokens here for whitespace
8280 # The following is not yet done, but could be:
8282 if ( $type =~ /^[wit]$/ ) {
8285 # change '$ var' to '$var' etc
8286 # '-> new' to '->new'
8287 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8289 $rtoken_vars->[_TOKEN_] = $token;
8292 # Split identifiers with leading arrows, inserting blanks if
8293 # necessary. It is easier and safer here than in the
8294 # tokenizer. For example '->new' becomes two tokens, '->' and
8295 # 'new' with a possible blank between.
8297 # Note: there is a related patch in sub set_whitespace_flags
8298 if ( $token =~ /^\-\>(.*)$/ && $1 ) {
8299 my $token_save = $1;
8300 my $type_save = $type;
8302 # store a blank to left of arrow if necessary
8303 my $Kprev = $self->K_previous_nonblank($KK);
8304 if ( defined($Kprev)
8305 && $rLL->[$Kprev]->[_TYPE_] ne 'b'
8306 && $rOpts_add_whitespace
8307 && $want_left_space{'->'} == WS_YES )
8310 copy_token_as_type( $rtoken_vars, 'b', ' ' );
8311 $store_token->($rcopy);
8314 # then store the arrow
8315 my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
8316 $store_token->($rcopy);
8318 # then reset the current token to be the remainder,
8319 # and reset the whitespace flag according to the arrow
8320 $token = $rtoken_vars->[_TOKEN_] = $token_save;
8321 $type = $rtoken_vars->[_TYPE_] = $type_save;
8322 $store_token->($rtoken_vars);
8326 if ( $token =~ /$SUB_PATTERN/ ) {
8327 $token =~ s/\s+/ /g;
8328 $rtoken_vars->[_TOKEN_] = $token;
8331 # trim identifiers of trailing blanks which can occur
8332 # under some unusual circumstances, such as if the
8333 # identifier 'witch' has trailing blanks on input here:
8337 # () # prototype may be on new line ...
8339 if ( $type eq 'i' ) {
8340 $token =~ s/\s+$//g;
8341 $rtoken_vars->[_TOKEN_] = $token;
8345 # change 'LABEL :' to 'LABEL:'
8346 elsif ( $type eq 'J' ) {
8348 $rtoken_vars->[_TOKEN_] = $token;
8351 # patch to add space to something like "x10"
8352 # This avoids having to split this token in the pre-tokenizer
8353 elsif ( $type eq 'n' ) {
8354 if ( $token =~ /^x\d+/ ) {
8356 $rtoken_vars->[_TOKEN_] = $token;
8360 # check a quote for problems
8361 elsif ( $type eq 'Q' ) {
8363 # This is ready to go but is commented out because there is
8364 # still identical logic in sub break_lines.
8365 # $check_Q->($KK, $Kfirst);
8368 # trim blanks from right of qw quotes
8369 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8370 elsif ( $type eq 'q' ) {
8372 $rtoken_vars->[_TOKEN_] = $token;
8373 note_embedded_tab() if ( $token =~ "\t" );
8376 elsif ($type_sequence) {
8378 # if ( $is_opening_token{$token} ) {
8381 if ( $is_closing_token{$token} ) {
8383 # Insert a tentative missing semicolon if the next token is
8384 # a closing block brace
8389 # not preceded by a ';'
8390 && $last_nonblank_type ne ';'
8392 # and this is not a VERSION stmt (is all one line, we are not
8393 # inserting semicolons on one-line blocks)
8394 && $CODE_type ne 'VER'
8396 # and we are allowed to add semicolons
8397 && $rOpts->{'add-semicolons'}
8400 $add_phantom_semicolon->($KK);
8405 # Insert any needed whitespace
8407 && $rLL_new->[-1]->[_TYPE_] ne 'b'
8408 && $rOpts_add_whitespace )
8410 my $ws = $rwhitespace_flags->[$KK];
8413 my $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
8414 $rcopy->[_LINE_INDEX_] =
8415 $rLL_new->[-1]->[_LINE_INDEX_];
8416 $store_token->($rcopy);
8419 $store_token->($rtoken_vars);
8423 # Reset memory to be the new array
8424 $self->{rLL} = $rLL_new;
8425 $self->set_rLL_max_index();
8426 $self->{K_opening_container} = $K_opening_container;
8427 $self->{K_closing_container} = $K_closing_container;
8428 $self->{K_opening_ternary} = $K_opening_ternary;
8429 $self->{K_closing_ternary} = $K_closing_ternary;
8430 $self->{rK_phantom_semicolons} = $rK_phantom_semicolons;
8432 # make sure the new array looks okay
8433 $self->check_token_array();
8435 # reset the token limits of each line
8436 $self->resync_lines_and_tokens();
8443 my $Last_line_had_side_comment;
8444 my $In_format_skipping_section;
8445 my $Saw_VERSION_in_this_file;
8449 my $rlines = $self->{rlines};
8451 $Last_line_had_side_comment = undef;
8452 $In_format_skipping_section = undef;
8453 $Saw_VERSION_in_this_file = undef;
8455 # Loop over all lines
8456 foreach my $line_of_tokens ( @{$rlines} ) {
8457 my $line_type = $line_of_tokens->{_line_type};
8458 next unless ( $line_type eq 'CODE' );
8459 my $CODE_type = $self->get_CODE_type($line_of_tokens);
8460 $line_of_tokens->{_code_type} = $CODE_type;
8466 my ( $self, $line_of_tokens ) = @_;
8468 # We are looking at a line of code and setting a flag to
8469 # describe any special processing that it requires
8471 # Possible CODE_types are as follows.
8473 # 'VB' = Verbatim - line goes out verbatim
8474 # 'IO' = Indent Only - line goes out unchanged except for indentation
8475 # 'NIN' = No Internal Newlines - line does not get broken
8476 # 'HSC'=Hanging Side Comment - fix this hanging side comment
8477 # 'BC'=Block Comment - an ordinary full line comment
8478 # 'SBC'=Static Block Comment - a block comment which does not get
8480 # 'SBCX'=Static Block Comment Without Leading Space
8481 # 'DEL'=Delete this line
8482 # 'VER'=VERSION statement
8483 # '' or (undefined) - no restructions
8485 my $rLL = $self->{rLL};
8486 my $Klimit = $self->{Klimit};
8488 my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
8489 my $no_internal_newlines = 1 - $rOpts_add_newlines;
8490 if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
8492 # extract what we need for this line..
8494 # Global value for error messages:
8495 $input_line_number = $line_of_tokens->{_line_number};
8497 my $rK_range = $line_of_tokens->{_rK_range};
8498 my ( $Kfirst, $Klast ) = @{$rK_range};
8500 if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
8501 my $input_line = $line_of_tokens->{_line_text};
8502 my $in_continued_quote = my $starting_in_quote =
8503 $line_of_tokens->{_starting_in_quote};
8504 my $in_quote = $line_of_tokens->{_ending_in_quote};
8505 my $ending_in_quote = $in_quote;
8506 my $guessed_indentation_level =
8507 $line_of_tokens->{_guessed_indentation_level};
8509 my $is_static_block_comment = 0;
8511 # Handle a continued quote..
8512 if ($in_continued_quote) {
8514 # A line which is entirely a quote or pattern must go out
8515 # verbatim. Note: the \n is contained in $input_line.
8517 if ( ( $input_line =~ "\t" ) ) {
8518 note_embedded_tab();
8520 $Last_line_had_side_comment = 0;
8525 my $is_block_comment =
8526 ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
8528 # Write line verbatim if we are in a formatting skip section
8529 if ($In_format_skipping_section) {
8530 $Last_line_had_side_comment = 0;
8532 # Note: extra space appended to comment simplifies pattern matching
8533 if ( $is_block_comment
8534 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
8535 /$format_skipping_pattern_end/o )
8537 $In_format_skipping_section = 0;
8538 write_logfile_entry("Exiting formatting skip section\n");
8543 # See if we are entering a formatting skip section
8544 if ( $rOpts_format_skipping
8545 && $is_block_comment
8546 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
8547 /$format_skipping_pattern_begin/o )
8549 $In_format_skipping_section = 1;
8550 write_logfile_entry("Entering formatting skip section\n");
8551 $Last_line_had_side_comment = 0;
8555 # ignore trailing blank tokens (they will get deleted later)
8556 if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
8560 # Handle a blank line..
8562 $Last_line_had_side_comment = 0;
8566 # see if this is a static block comment (starts with ## by default)
8567 my $is_static_block_comment_without_leading_space = 0;
8568 if ( $is_block_comment
8569 && $rOpts->{'static-block-comments'}
8570 && $input_line =~ /$static_block_comment_pattern/o )
8572 $is_static_block_comment = 1;
8573 $is_static_block_comment_without_leading_space =
8574 substr( $input_line, 0, 1 ) eq '#';
8577 # Check for comments which are line directives
8578 # Treat exactly as static block comments without leading space
8579 # reference: perlsyn, near end, section Plain Old Comments (Not!)
8580 # example: '# line 42 "new_filename.plx"'
8583 && $input_line =~ /^\# \s*
8585 (?:\s("?)([^"]+)\2)? \s*
8589 $is_static_block_comment = 1;
8590 $is_static_block_comment_without_leading_space = 1;
8593 # look for hanging side comment
8596 && $Last_line_had_side_comment # last line had side comment
8597 && $input_line =~ /^\s/ # there is some leading space
8598 && !$is_static_block_comment # do not make static comment hanging
8599 && $rOpts->{'hanging-side-comments'} # user is allowing
8600 # hanging side comments
8604 $Last_line_had_side_comment = 1;
8608 # remember if this line has a side comment
8609 $Last_line_had_side_comment =
8610 ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
8612 # Handle a block (full-line) comment..
8613 if ($is_block_comment) {
8615 if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' }
8617 # TRIM COMMENTS -- This could be turned off as a option
8618 $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end
8620 if ($is_static_block_comment_without_leading_space) {
8623 elsif ($is_static_block_comment) {
8632 # NOTE: This does not work yet. Version in print-line-of-tokens
8633 # is Still used until fixed
8635 # compare input/output indentation except for continuation lines
8636 # (because they have an unknown amount of initial blank space)
8637 # and lines which are quotes (because they may have been outdented)
8638 # Note: this test is placed here because we know the continuation flag
8639 # at this point, which allows us to avoid non-meaningful checks.
8640 my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_];
8641 compare_indentation_levels( $guessed_indentation_level,
8642 $structural_indentation_level )
8643 unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0
8644 || $guessed_indentation_level == 0
8645 && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' );
8648 # Patch needed for MakeMaker. Do not break a statement
8649 # in which $VERSION may be calculated. See MakeMaker.pm;
8650 # this is based on the coding in it.
8651 # The first line of a file that matches this will be eval'd:
8652 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8654 # *VERSION = \'1.01';
8655 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
8656 # We will pass such a line straight through without breaking
8657 # it unless -npvl is used.
8659 # Patch for problem reported in RT #81866, where files
8660 # had been flattened into a single line and couldn't be
8661 # tidied without -npvl. There are two parts to this patch:
8662 # First, it is not done for a really long line (80 tokens for now).
8663 # Second, we will only allow up to one semicolon
8664 # before the VERSION. We need to allow at least one semicolon
8665 # for statements like this:
8666 # require Exporter; our $VERSION = $Exporter::VERSION;
8667 # where both statements must be on a single line for MakeMaker
8669 my $is_VERSION_statement = 0;
8670 if ( !$Saw_VERSION_in_this_file
8673 /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
8675 $Saw_VERSION_in_this_file = 1;
8676 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
8683 sub find_nested_pairs {
8686 my $rLL = $self->{rLL};
8687 return unless ( defined($rLL) && @{$rLL} );
8689 # We define an array of pairs of nested containers
8692 # We also set the following hash values to identify container pairs for
8693 # which the opening and closing tokens are adjacent in the token stream:
8694 # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
8695 # $seqno_in are the seqence numbers of the outer and inner containers of
8696 # the pair We need these later to decide if we can insert a missing
8698 my $rpaired_to_inner_container = {};
8700 # This local hash remembers if an outer container has a close following
8702 # The key is the outer sequence number
8703 # The value is the token_hash of the inner container
8705 my %has_close_following_opening;
8707 # Names of calling routines can either be marked as 'i' or 'w',
8708 # and they may invoke a sub call with an '->'. We will consider
8709 # any consecutive string of such types as a single unit when making
8710 # weld decisions. We also allow a leading !
8711 my $is_name_type = {
8721 return $type && $is_name_type->{$type};
8725 my $last_last_container;
8726 my $last_nonblank_token_vars;
8729 my $nonblank_token_count = 0;
8731 # loop over all tokens
8732 foreach my $rtoken_vars ( @{$rLL} ) {
8734 my $type = $rtoken_vars->[_TYPE_];
8736 next if ( $type eq 'b' );
8738 # long identifier-like items are counted as a single item
8739 $nonblank_token_count++
8740 unless ( $is_name->($type)
8741 && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
8743 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8744 if ($type_sequence) {
8746 my $token = $rtoken_vars->[_TOKEN_];
8748 if ( $is_opening_token{$token} ) {
8750 # following previous opening token ...
8751 if ( $last_container
8752 && $is_opening_token{ $last_container->[_TOKEN_] } )
8755 # adjacent to this one
8756 my $tok_diff = $nonblank_token_count - $last_count;
8758 my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
8761 || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
8764 # remember this pair...
8765 my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
8766 my $inner_seqno = $type_sequence;
8767 $has_close_following_opening{$outer_seqno} =
8773 elsif ( $is_closing_token{$token} ) {
8775 # if the corresponding opening token had an adjacent opening
8776 if ( $has_close_following_opening{$type_sequence}
8777 && $is_closing_token{ $last_container->[_TOKEN_] }
8778 && $has_close_following_opening{$type_sequence}
8779 ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
8782 # The closing weld tokens must be adjacent
8783 # NOTE: so intermediate commas and semicolons
8784 # can currently block a weld. This is something
8785 # that could be fixed in the future by including
8786 # a flag to delete un-necessary commas and semicolons.
8787 my $tok_diff = $nonblank_token_count - $last_count;
8789 if ( $tok_diff == 1 ) {
8791 # This is a closely nested pair ..
8792 my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
8793 my $outer_seqno = $type_sequence;
8794 $rpaired_to_inner_container->{$outer_seqno} =
8797 push @nested_pairs, [ $inner_seqno, $outer_seqno ];
8802 $last_last_container = $last_container;
8803 $last_container = $rtoken_vars;
8804 $last_count = $nonblank_token_count;
8806 $last_nonblank_token_vars = $rtoken_vars;
8808 $self->{rnested_pairs} = \@nested_pairs;
8809 $self->{rpaired_to_inner_container} = $rpaired_to_inner_container;
8815 # a debug routine, not normally used
8816 my ( $self, $msg ) = @_;
8817 my $rLL = $self->{rLL};
8818 my $nvars = @{$rLL};
8819 print STDERR "$msg\n";
8820 print STDERR "ntokens=$nvars\n";
8821 print STDERR "K\t_TOKEN_\t_TYPE_\n";
8823 foreach my $item ( @{$rLL} ) {
8824 print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
8829 sub K_next_nonblank {
8830 my ( $self, $KK, $rLL ) = @_;
8832 # return the index K of the next nonblank token
8833 return unless ( defined($KK) && $KK >= 0 );
8834 $rLL = $self->{rLL} unless ( defined($rLL) );
8837 while ( $Knnb < $Num ) {
8838 if ( !defined( $rLL->[$Knnb] ) ) {
8839 Fault("Undefined entry for k=$Knnb");
8841 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
8847 sub K_previous_nonblank {
8849 # return index of previous nonblank token before item K
8850 # Call with $KK=undef to start search at the top of the array
8851 my ( $self, $KK, $rLL ) = @_;
8852 $rLL = $self->{rLL} unless ( defined($rLL) );
8854 if ( !defined($KK) ) { $KK = $Num }
8855 elsif ( $KK > $Num ) {
8857 # The caller should make the first call with KK_new=undef to
8860 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
8864 while ( $Kpnb >= 0 ) {
8865 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
8871 sub weld_containers {
8873 # do any welding operations
8876 # initialize weld length hashes needed later for checking line lengths
8877 # TODO: These should eventually be stored in $self rather than be package vars
8878 %weld_len_left_closing = ();
8879 %weld_len_right_closing = ();
8880 %weld_len_left_opening = ();
8881 %weld_len_right_opening = ();
8883 return if ( $rOpts->{'indent-only'} );
8884 return unless ($rOpts_add_newlines);
8886 $self->weld_nested_containers()
8887 if $rOpts->{'weld-nested-containers'};
8889 # Note that these two calls are order-dependent.
8890 # sub weld_nested_containers() must be called before sub
8891 # weld_cuddled_blocks(). This is because it is more complex and could
8892 # overwrite the %weld_len_... hash values written by weld_cuddled_blocks().
8893 # sub weld_cuddled_blocks(), on the other hand, is much simpler and will
8894 # not overwrite the values written by weld_nested_containers. But
8895 # note that weld_nested_containers() changes the _LEVEL_ values, so
8896 # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
8898 # Here is a good test case to Be sure that both cuddling and welding
8899 # are working and not interfering with each other:
8901 # perltidy -wn -cb -cbl='if-elsif-else'
8903 # if ($BOLD_MATH) { (
8904 # $labels, $comment,
8905 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
8907 # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
8911 $self->weld_cuddled_blocks()
8912 if $rOpts->{'cuddled-blocks'};
8917 sub weld_cuddled_blocks {
8920 # This routine implements the -cb flag by finding the appropriate
8921 # closing and opening block braces and welding them together.
8923 my $rLL = $self->{rLL};
8924 return unless ( defined($rLL) && @{$rLL} );
8925 my $rbreak_container = $self->{rbreak_container};
8927 my $K_opening_container = $self->{K_opening_container};
8928 my $K_closing_container = $self->{K_closing_container};
8930 my $length_to_opening_seqno = sub {
8932 my $KK = $K_opening_container->{$seqno};
8933 my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
8936 my $length_to_closing_seqno = sub {
8938 my $KK = $K_closing_container->{$seqno};
8939 my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
8943 my $is_broken_block = sub {
8945 # a block is broken if the input line numbers of the braces differ
8946 # we can only cuddle between broken blocks
8948 my $K_opening = $K_opening_container->{$seqno};
8949 return unless ( defined($K_opening) );
8950 my $K_closing = $K_closing_container->{$seqno};
8951 return unless ( defined($K_closing) );
8952 return $rbreak_container->{$seqno}
8953 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
8954 $rLL->[$K_opening]->[_LINE_INDEX_];
8957 # A stack to remember open chains at all levels:
8958 # $in_chain[$level] = [$chain_type, $type_sequence];
8960 my $CBO = $rOpts->{'cuddled-break-option'};
8962 # loop over structure items to find cuddled pairs
8965 while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) {
8966 my $rtoken_vars = $rLL->[$KK];
8967 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8968 if ( !$type_sequence ) {
8969 Fault("sequence = $type_sequence not defined");
8972 # We use the original levels because they get changed by sub
8973 # 'weld_nested_containers'. So if this were to be called before that
8974 # routine, the levels would be wrong and things would go bad.
8975 my $last_level = $level;
8976 $level = $rtoken_vars->[_LEVEL_TRUE_];
8978 if ( $level < $last_level ) { $in_chain[$last_level] = undef }
8979 elsif ( $level > $last_level ) { $in_chain[$level] = undef }
8981 # We are only looking at code blocks
8982 my $token = $rtoken_vars->[_TOKEN_];
8983 my $type = $rtoken_vars->[_TYPE_];
8984 next unless ( $type eq $token );
8986 if ( $token eq '{' ) {
8988 my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
8989 if ( !$block_type ) {
8991 # patch for unrecognized block types which may not be labeled
8992 my $Kp = $self->K_previous_nonblank($KK);
8993 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
8994 $Kp = $self->K_previous_nonblank($Kp);
8997 $block_type = $rLL->[$Kp]->[_TOKEN_];
8999 if ( $in_chain[$level] ) {
9001 # we are in a chain and are at an opening block brace.
9002 # See if we are welding this opening brace with the previous
9003 # block brace. Get their identification numbers:
9004 my $closing_seqno = $in_chain[$level]->[1];
9005 my $opening_seqno = $type_sequence;
9007 # The preceding block must be on multiple lines so that its
9008 # closing brace will start a new line.
9009 if ( !$is_broken_block->($closing_seqno) ) {
9010 next unless ( $CBO == 2 );
9011 $rbreak_container->{$closing_seqno} = 1;
9014 # we will let the trailing block be either broken or intact
9015 ## && $is_broken_block->($opening_seqno);
9017 # We can weld the closing brace to its following word ..
9018 my $Ko = $K_closing_container->{$closing_seqno};
9019 my $Kon = $self->K_next_nonblank($Ko);
9021 # ..unless it is a comment
9022 if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) {
9024 $rLL->[ $Kon + 1 ]->[_CUMULATIVE_LENGTH_] -
9025 $rLL->[$Ko]->[_CUMULATIVE_LENGTH_];
9026 $weld_len_right_closing{$closing_seqno} = $dlen;
9028 # Set flag that we want to break the next container
9029 # so that the cuddled line is balanced.
9030 $rbreak_container->{$opening_seqno} = 1
9037 # We are not in a chain. Start a new chain if we see the
9038 # starting block type.
9039 if ( $rcuddled_block_types->{$block_type} ) {
9040 $in_chain[$level] = [ $block_type, $type_sequence ];
9044 $in_chain[$level] = [ $block_type, $type_sequence ];
9048 elsif ( $token eq '}' ) {
9049 if ( $in_chain[$level] ) {
9051 # We are in a chain at a closing brace. See if this chain
9053 my $Knn = $self->K_next_nonblank($KK);
9055 # skip past comments
9056 while ( $Knn && $rLL->[$Knn]->[_TYPE_] eq '#' ) {
9057 $Knn = $self->K_next_nonblank($Knn);
9061 my $chain_type = $in_chain[$level]->[0];
9062 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
9064 $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
9068 # Note that we do not weld yet because we must wait until
9069 # we we are sure that an opening brace for this follows.
9070 $in_chain[$level]->[1] = $type_sequence;
9072 else { $in_chain[$level] = undef }
9080 sub weld_nested_containers {
9083 # This routine implements the -wn flag by "welding together"
9084 # the nested closing and opening tokens which were previously
9085 # identified by sub 'find_nested_pairs'. "welding" simply
9086 # involves setting certain hash values which will be checked
9087 # later during formatting.
9089 my $rLL = $self->{rLL};
9090 my $Klimit = $self->get_rLL_max_index();
9091 my $rnested_pairs = $self->{rnested_pairs};
9092 my $rlines = $self->{rlines};
9093 my $K_opening_container = $self->{K_opening_container};
9094 my $K_closing_container = $self->{K_closing_container};
9096 # Return unless there are nested pairs to weld
9097 return unless defined($rnested_pairs) && @{$rnested_pairs};
9099 # This array will hold the sequence numbers of the tokens to be welded.
9102 # Variables needed for estimating line lengths
9103 my $starting_indent;
9104 my $starting_lentot;
9106 # A tolerance to the length for length estimates. In some rare cases
9107 # this can avoid problems where a final weld slightly exceeds the
9108 # line length and gets broken in a bad spot.
9111 my $excess_length_to = sub {
9112 my ($rtoken_hash) = @_;
9114 # Estimate the length from the line start to a given token
9115 my $length = $rtoken_hash->[_CUMULATIVE_LENGTH_] - $starting_lentot;
9118 $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
9119 return ($excess_length);
9121 my $length_to_opening_seqno = sub {
9123 my $KK = $K_opening_container->{$seqno};
9124 my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
9127 my $length_to_closing_seqno = sub {
9129 my $KK = $K_closing_container->{$seqno};
9130 my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
9135 # _oo=outer opening, i.e. first of { {
9136 # _io=inner opening, i.e. second of { {
9137 # _oc=outer closing, i.e. second of } {
9138 # _ic=inner closing, i.e. first of } }
9142 # We are working from outermost to innermost pairs so that
9143 # level changes will be complete when we arrive at the inner pairs.
9145 while ( my $item = pop( @{$rnested_pairs} ) ) {
9146 my ( $inner_seqno, $outer_seqno ) = @{$item};
9148 my $Kouter_opening = $K_opening_container->{$outer_seqno};
9149 my $Kinner_opening = $K_opening_container->{$inner_seqno};
9150 my $Kouter_closing = $K_closing_container->{$outer_seqno};
9151 my $Kinner_closing = $K_closing_container->{$inner_seqno};
9153 my $outer_opening = $rLL->[$Kouter_opening];
9154 my $inner_opening = $rLL->[$Kinner_opening];
9155 my $outer_closing = $rLL->[$Kouter_closing];
9156 my $inner_closing = $rLL->[$Kinner_closing];
9158 my $iline_oo = $outer_opening->[_LINE_INDEX_];
9159 my $iline_io = $inner_opening->[_LINE_INDEX_];
9161 # Set flag saying if this pair starts a new weld
9162 my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
9164 # Set flag saying if this pair is adjacent to the previous nesting pair
9165 # (even if previous pair was rejected as a weld)
9166 my $touch_previous_pair =
9167 defined($previous_pair) && $outer_seqno == $previous_pair->[0];
9168 $previous_pair = $item;
9170 # Set a flag if we should not weld. It sometimes looks best not to weld
9171 # when the opening and closing tokens are very close. However, there
9172 # is a danger that we will create a "blinker", which oscillates between
9173 # two semi-stable states, if we do not weld. So the rules for
9174 # not welding have to be carefully defined and tested.
9176 if ( !$touch_previous_pair ) {
9178 # If this pair is not adjacent to the previous pair (skipped or
9179 # not), then measure lengths from the start of line of oo
9181 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
9182 my ( $Kfirst, $Klast ) = @{$rK_range};
9183 $starting_lentot = $rLL->[$Kfirst]->[_CUMULATIVE_LENGTH_];
9184 $starting_indent = 0;
9185 if ( !$rOpts_variable_maximum_line_length ) {
9186 my $level = $rLL->[$Kfirst]->[_LEVEL_];
9187 $starting_indent = $rOpts_indent_columns * $level;
9190 # DO-NOT-WELD RULE 1:
9191 # Do not weld something that looks like the start of a two-line
9192 # function call, like this:
9193 # $trans->add_transformation(
9194 # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
9195 # We will look for a semicolon after the closing paren.
9197 # We want to weld something complex, like this though
9198 # my $compass = uc( opposite_direction( line_to_canvas_direction(
9199 # @{ $coords[0] }, @{ $coords[1] } ) ) );
9200 # Otherwise we will get a 'blinker'
9202 my $iline_oc = $outer_closing->[_LINE_INDEX_];
9203 if ( $iline_oc <= $iline_oo + 1 ) {
9205 # Look for following semicolon...
9206 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
9207 my $next_nonblank_type =
9208 defined($Knext_nonblank)
9209 ? $rLL->[$Knext_nonblank]->[_TYPE_]
9211 if ( $next_nonblank_type eq ';' ) {
9213 # Then do not weld if no other containers between inner
9214 # opening and closing.
9215 my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
9216 if ( $Knext_seq_item == $Kinner_closing ) {
9223 my $iline_ic = $inner_closing->[_LINE_INDEX_];
9225 # DO-NOT-WELD RULE 2:
9226 # Do not weld an opening paren to an inner one line brace block
9227 # We will just use old line numbers for this test and require
9228 # iterations if necessary for convergence
9230 # For example, otherwise we could cause the opening paren
9231 # in the following example to separate from the caller name
9234 # $_[0]->code_handler
9235 # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
9237 # Here is another example where we do not want to weld:
9238 # $wrapped->add_around_modifier(
9239 # sub { push @tracelog => 'around 1'; $_[0]->(); } );
9241 # If the one line sub block gets broken due to length or by the
9242 # user, then we can weld. The result will then be:
9243 # $wrapped->add_around_modifier( sub {
9244 # push @tracelog => 'around 1';
9248 if ( $iline_ic == $iline_io ) {
9250 my $token_oo = $outer_opening->[_TOKEN_];
9251 my $block_type_io = $inner_opening->[_BLOCK_TYPE_];
9252 my $token_io = $inner_opening->[_TOKEN_];
9253 $do_not_weld ||= $token_oo eq '(' && $token_io eq '{';
9256 # DO-NOT-WELD RULE 3:
9257 # Do not weld if this makes our line too long
9258 $do_not_weld ||= $excess_length_to->($inner_opening) > 0;
9262 # After neglecting a pair, we start measuring from start of point io
9263 $starting_lentot = $inner_opening->[_CUMULATIVE_LENGTH_];
9264 $starting_indent = 0;
9265 if ( !$rOpts_variable_maximum_line_length ) {
9266 my $level = $inner_opening->[_LEVEL_];
9267 $starting_indent = $rOpts_indent_columns * $level;
9270 # Normally, a broken pair should not decrease indentation of
9271 # intermediate tokens:
9272 ## if ( $last_pair_broken ) { next }
9273 # However, for long strings of welded tokens, such as '{{{{{{...'
9274 # we will allow broken pairs to also remove indentation.
9275 # This will keep very long strings of opening and closing
9276 # braces from marching off to the right. We will do this if the
9277 # number of tokens in a weld before the broken weld is 4 or more.
9278 # This rule will mainly be needed for test scripts, since typical
9279 # welds have fewer than about 4 welded tokens.
9280 if ( !@welds || @{ $welds[-1] } < 4 ) { next }
9283 # otherwise start new weld ...
9284 elsif ($starting_new_weld) {
9288 # ... or extend current weld
9290 unshift @{ $welds[-1] }, $inner_seqno;
9293 ########################################################################
9294 # After welding, reduce the indentation level if all intermediate tokens
9295 ########################################################################
9297 my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
9298 if ( $dlevel != 0 ) {
9299 my $Kstart = $Kinner_opening;
9300 my $Kstop = $Kinner_closing;
9301 for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
9302 $rLL->[$KK]->[_LEVEL_] += $dlevel;
9307 #####################################################
9308 # Define weld lengths needed later to set line breaks
9309 #####################################################
9310 foreach my $item (@welds) {
9312 # sweep from inner to outer
9317 foreach my $outer_seqno ( @{$item} ) {
9321 $length_to_opening_seqno->($inner_seqno) -
9322 $length_to_opening_seqno->($outer_seqno);
9325 $length_to_closing_seqno->($outer_seqno) -
9326 $length_to_closing_seqno->($inner_seqno);
9328 $len_open += $dlen_opening;
9329 $len_close += $dlen_closing;
9333 $weld_len_left_closing{$outer_seqno} = $len_close;
9334 $weld_len_right_opening{$outer_seqno} = $len_open;
9336 $inner_seqno = $outer_seqno;
9339 # sweep from outer to inner
9340 foreach my $seqno ( reverse @{$item} ) {
9341 $weld_len_right_closing{$seqno} =
9342 $len_close - $weld_len_left_closing{$seqno};
9343 $weld_len_left_opening{$seqno} =
9344 $len_open - $weld_len_right_opening{$seqno};
9348 #####################################
9350 #####################################
9354 foreach my $weld (@welds) {
9355 print "\nWeld number $count has seq: (@{$weld})\n";
9356 foreach my $seq ( @{$weld} ) {
9359 left_opening=$weld_len_left_opening{$seq};
9360 right_opening=$weld_len_right_opening{$seq};
9361 left_closing=$weld_len_left_closing{$seq};
9362 right_closing=$weld_len_right_closing{$seq};
9374 my ( $seqno, $type_or_tok ) = @_;
9376 # Given the sequence number of a token, and the token or its type,
9377 # return the length of any weld to its left
9381 if ( $is_closing_type{$type_or_tok} ) {
9382 $weld_len = $weld_len_left_closing{$seqno};
9384 elsif ( $is_opening_type{$type_or_tok} ) {
9385 $weld_len = $weld_len_left_opening{$seqno};
9388 if ( !defined($weld_len) ) { $weld_len = 0 }
9392 sub weld_len_right {
9394 my ( $seqno, $type_or_tok ) = @_;
9396 # Given the sequence number of a token, and the token or its type,
9397 # return the length of any weld to its right
9401 if ( $is_closing_type{$type_or_tok} ) {
9402 $weld_len = $weld_len_right_closing{$seqno};
9404 elsif ( $is_opening_type{$type_or_tok} ) {
9405 $weld_len = $weld_len_right_opening{$seqno};
9408 if ( !defined($weld_len) ) { $weld_len = 0 }
9412 sub weld_len_left_to_go {
9415 # Given the index of a token in the 'to_go' array
9416 # return the length of any weld to its left
9417 return if ( $i < 0 );
9419 weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
9423 sub weld_len_right_to_go {
9426 # Given the index of a token in the 'to_go' array
9427 # return the length of any weld to its right
9428 return if ( $i < 0 );
9429 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
9431 weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
9435 sub link_sequence_items {
9437 # This has been merged into 'respace_tokens' but retained for reference
9439 my $rlines = $self->{rlines};
9440 my $rLL = $self->{rLL};
9442 # We walk the token list and make links to the next sequence item.
9443 # We also define these hashes to container tokens using sequence number as
9445 my $K_opening_container = {}; # opening [ { or (
9446 my $K_closing_container = {}; # closing ] } or )
9447 my $K_opening_ternary = {}; # opening ? of ternary
9448 my $K_closing_ternary = {}; # closing : of ternary
9450 # sub to link preceding nodes forward to a new node type
9451 my $link_back = sub {
9452 my ( $Ktop, $key ) = @_;
9454 my $Kprev = $Ktop - 1;
9456 && !defined( $rLL->[$Kprev]->[$key] ) )
9458 $rLL->[$Kprev]->[$key] = $Ktop;
9463 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
9465 $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef;
9467 my $type = $rLL->[$KK]->[_TYPE_];
9469 next if ( $type eq 'b' );
9471 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9472 if ($type_sequence) {
9474 $link_back->( $KK, _KNEXT_SEQ_ITEM_ );
9476 my $token = $rLL->[$KK]->[_TOKEN_];
9477 if ( $is_opening_token{$token} ) {
9479 $K_opening_container->{$type_sequence} = $KK;
9481 elsif ( $is_closing_token{$token} ) {
9483 $K_closing_container->{$type_sequence} = $KK;
9486 # These are not yet used but could be useful
9488 if ( $token eq '?' ) {
9489 $K_opening_ternary->{$type_sequence} = $KK;
9491 elsif ( $token eq ':' ) {
9492 $K_closing_ternary->{$type_sequence} = $KK;
9496 Unknown sequenced token type '$type'. Expecting one of '{[(?:)]}'
9503 $self->{K_opening_container} = $K_opening_container;
9504 $self->{K_closing_container} = $K_closing_container;
9505 $self->{K_opening_ternary} = $K_opening_ternary;
9506 $self->{K_closing_ternary} = $K_closing_ternary;
9510 sub sum_token_lengths {
9513 # This has been merged into 'respace_tokens' but retained for reference
9514 my $rLL = $self->{rLL};
9515 my $cumulative_length = 0;
9516 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
9518 # Save the length sum to just BEFORE this token
9519 $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length;
9521 # now set the length of this token
9522 my $token_length = length( $rLL->[$KK]->[_TOKEN_] );
9524 $cumulative_length += $token_length;
9529 sub resync_lines_and_tokens {
9532 my $rLL = $self->{rLL};
9533 my $Klimit = $self->{Klimit};
9534 my $rlines = $self->{rlines};
9536 # Re-construct the arrays of tokens associated with the original input lines
9537 # since they have probably changed due to inserting and deleting blanks
9538 # and a few other tokens.
9542 # This is the next token and its line index:
9545 if ( defined($rLL) && @{$rLL} ) {
9546 $Kmax = @{$rLL} - 1;
9547 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
9550 my $get_inext = sub {
9551 if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
9553 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
9558 # Remember the most recently output token index
9562 foreach my $line_of_tokens ( @{$rlines} ) {
9564 my $line_type = $line_of_tokens->{_line_type};
9565 if ( $line_type eq 'CODE' ) {
9569 $inext = $get_inext->();
9570 while ( defined($inext) && $inext <= $iline ) {
9571 push @{K_array}, $Knext;
9573 $inext = $get_inext->();
9576 # Delete any terminal blank token
9578 if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
9583 # Define the range of K indexes for the line:
9584 # $Kfirst = index of first token on line
9585 # $Klast_out = index of last token on line
9586 my ( $Kfirst, $Klast );
9588 $Kfirst = $K_array[0];
9589 $Klast = $K_array[-1];
9590 $Klast_out = $Klast;
9593 # It is only safe to trim the actual line text if the input
9594 # line had a terminal blank token. Otherwise, we may be
9596 if ( $line_of_tokens->{_ended_in_blank_token} ) {
9597 $line_of_tokens->{_line_text} =~ s/\s+$//;
9599 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
9603 # There shouldn't be any nodes beyond the last one unless we start
9604 # allowing 'link_after' calls
9605 if ( defined($inext) ) {
9607 Fault("unexpected tokens at end of file when reconstructing lines");
9615 my $rlines = $self->{rlines};
9616 foreach my $line ( @{$rlines} ) {
9617 my $input_line = $line->{_line_text};
9618 $self->write_unindented_line($input_line);
9623 sub finish_formatting {
9625 my ( $self, $severe_error ) = @_;
9627 # The file has been tokenized and is ready to be formatted.
9628 # All of the relevant data is stored in $self, ready to go.
9630 # output file verbatim if severe error or no formatting requested
9631 if ( $severe_error || $rOpts->{notidy} ) {
9632 $self->dump_verbatim();
9637 # Make a pass through the lines, looking at lines of CODE and identifying
9638 # special processing needs, such format skipping sections marked by
9640 $self->scan_comments();
9642 # Find nested pairs of container tokens for any welding. This information
9643 # is also needed for adding semicolons, so it is split apart from the
9645 $self->find_nested_pairs();
9647 # Make sure everything looks good
9648 $self->check_line_hashes();
9650 # Future: Place to Begin future Iteration Loop
9651 # foreach my $it_count(1..$maxit) {
9653 # Future: We must reset some things after the first iteration.
9655 # - resetting levels if there was any welding
9656 # - resetting any phantom semicolons
9657 # - dealing with any line numbering issues so we can relate final lines
9658 # line numbers with input line numbers.
9660 # If ($it_count>1) {
9661 # Copy {level_raw} to [_LEVEL_] if ($it_count>1)
9665 # Make a pass through all tokens, adding or deleting any whitespace as
9666 # required. Also make any other changes, such as adding semicolons.
9667 # All token changes must be made here so that the token data structure
9668 # remains fixed for the rest of this iteration.
9669 $self->respace_tokens();
9671 # Implement any welding needed for the -wn or -cb options
9672 $self->weld_containers();
9674 # Finishes formatting and write the result to the line sink.
9675 # Eventually this call should just change the 'rlines' data according to the
9676 # new line breaks and then return so that we can do an internal iteration
9677 # before continuing with the next stages of formatting.
9678 $self->break_lines();
9680 ############################################################
9681 # A possible future decomposition of 'break_lines()' follows.
9683 # - allow perltidy to do an internal iteration which eliminates
9684 # many unnecessary steps, such as re-parsing and vertical alignment.
9685 # This will allow iterations to be automatic.
9686 # - consolidate all length calculations to allow utf8 alignment
9687 ############################################################
9689 # Future: Check for convergence of beginning tokens on CODE lines
9691 # Future: End of Iteration Loop
9693 # Future: add_padding($rargs);
9695 # Future: add_closing_side_comments($rargs);
9697 # Future: vertical_alignment($rargs);
9699 # Future: output results
9701 # A final routine to tie up any loose ends
9706 sub create_one_line_block {
9707 ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) =
9712 sub destroy_one_line_block {
9713 $index_start_one_line_block = UNDEFINED_INDEX;
9714 $semicolons_before_block_self_destruct = 0;
9718 sub leading_spaces_to_go {
9720 # return the number of indentation spaces for a token in the output stream;
9721 # these were previously stored by 'set_leading_whitespace'.
9724 if ( $ii < 0 ) { $ii = 0 }
9725 return get_spaces( $leading_spaces_to_go[$ii] );
9731 # return the number of leading spaces associated with an indentation
9732 # variable $indentation is either a constant number of spaces or an object
9733 # with a get_spaces method.
9734 my $indentation = shift;
9735 return ref($indentation) ? $indentation->get_spaces() : $indentation;
9738 sub get_recoverable_spaces {
9740 # return the number of spaces (+ means shift right, - means shift left)
9741 # that we would like to shift a group of lines with the same indentation
9742 # to get them to line up with their opening parens
9743 my $indentation = shift;
9744 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
9747 sub get_available_spaces_to_go {
9750 my $item = $leading_spaces_to_go[$ii];
9752 # return the number of available leading spaces associated with an
9753 # indentation variable. $indentation is either a constant number of
9754 # spaces or an object with a get_available_spaces method.
9755 return ref($item) ? $item->get_available_spaces() : 0;
9758 sub new_lp_indentation_item {
9760 # this is an interface to the IndentationItem class
9761 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
9763 # A negative level implies not to store the item in the item_list
9765 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
9767 my $item = Perl::Tidy::IndentationItem->new(
9769 $ci_level, $available_spaces,
9770 $index, $gnu_sequence_number,
9771 $align_paren, $max_gnu_stack_index,
9772 $line_start_index_to_go,
9775 if ( $level >= 0 ) {
9776 $gnu_item_list[$max_gnu_item_index] = $item;
9782 sub set_leading_whitespace {
9784 # This routine defines leading whitespace
9785 # given: the level and continuation_level of a token,
9786 # define: space count of leading string which would apply if it
9787 # were the first token of a new line.
9789 my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
9791 # Adjust levels if necessary to recycle whitespace:
9792 # given $level_abs, the absolute level
9793 # define $level, a possibly reduced level for whitespace
9794 my $level = $level_abs;
9795 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
9796 if ( $level_abs < $whitespace_last_level ) {
9797 pop(@whitespace_level_stack);
9799 if ( !@whitespace_level_stack ) {
9800 push @whitespace_level_stack, $level_abs;
9802 elsif ( $level_abs > $whitespace_last_level ) {
9803 $level = $whitespace_level_stack[-1] +
9804 ( $level_abs - $whitespace_last_level );
9807 # 1 Try to break at a block brace
9809 $level > $rOpts_whitespace_cycle
9810 && $last_nonblank_type eq '{'
9811 && $last_nonblank_token eq '{'
9814 # 2 Then either a brace or bracket
9815 || ( $level > $rOpts_whitespace_cycle + 1
9816 && $last_nonblank_token =~ /^[\{\[]$/ )
9818 # 3 Then a paren too
9819 || $level > $rOpts_whitespace_cycle + 2
9824 push @whitespace_level_stack, $level;
9826 $level = $whitespace_level_stack[-1];
9828 $whitespace_last_level = $level_abs;
9830 # modify for -bli, which adds one continuation indentation for
9832 if ( $rOpts_brace_left_and_indent
9833 && $max_index_to_go == 0
9834 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
9839 # patch to avoid trouble when input file has negative indentation.
9840 # other logic should catch this error.
9841 if ( $level < 0 ) { $level = 0 }
9843 #-------------------------------------------
9844 # handle the standard indentation scheme
9845 #-------------------------------------------
9846 unless ($rOpts_line_up_parentheses) {
9848 $ci_level * $rOpts_continuation_indentation +
9849 $level * $rOpts_indent_columns;
9851 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
9853 if ($in_continued_quote) {
9857 $leading_spaces_to_go[$max_index_to_go] = $space_count;
9858 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
9862 #-------------------------------------------------------------
9863 # handle case of -lp indentation..
9864 #-------------------------------------------------------------
9866 # The continued_quote flag means that this is the first token of a
9867 # line, and it is the continuation of some kind of multi-line quote
9868 # or pattern. It requires special treatment because it must have no
9869 # added leading whitespace. So we create a special indentation item
9870 # which is not in the stack.
9871 if ($in_continued_quote) {
9872 my $space_count = 0;
9873 my $available_space = 0;
9874 $level = -1; # flag to prevent storing in item_list
9875 $leading_spaces_to_go[$max_index_to_go] =
9876 $reduced_spaces_to_go[$max_index_to_go] =
9877 new_lp_indentation_item( $space_count, $level, $ci_level,
9878 $available_space, 0 );
9882 # get the top state from the stack
9883 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
9884 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
9885 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
9887 my $type = $types_to_go[$max_index_to_go];
9888 my $token = $tokens_to_go[$max_index_to_go];
9889 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
9891 if ( $type eq '{' || $type eq '(' ) {
9893 $gnu_comma_count{ $total_depth + 1 } = 0;
9894 $gnu_arrow_count{ $total_depth + 1 } = 0;
9896 # If we come to an opening token after an '=' token of some type,
9897 # see if it would be helpful to 'break' after the '=' to save space
9898 my $last_equals = $last_gnu_equals{$total_depth};
9899 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
9901 # find the position if we break at the '='
9902 my $i_test = $last_equals;
9903 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
9906 ##my $too_close = ($i_test==$max_index_to_go-1);
9908 my $test_position = total_line_length( $i_test, $max_index_to_go );
9909 my $mll = maximum_line_length($i_test);
9913 # the equals is not just before an open paren (testing)
9916 # if we are beyond the midpoint
9917 $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
9919 # or we are beyond the 1/4 point and there was an old
9920 # break at the equals
9922 $gnu_position_predictor >
9923 $mll - $rOpts_maximum_line_length * 3 / 4
9925 $old_breakpoint_to_go[$last_equals]
9926 || ( $last_equals > 0
9927 && $old_breakpoint_to_go[ $last_equals - 1 ] )
9928 || ( $last_equals > 1
9929 && $types_to_go[ $last_equals - 1 ] eq 'b'
9930 && $old_breakpoint_to_go[ $last_equals - 2 ] )
9936 # then make the switch -- note that we do not set a real
9937 # breakpoint here because we may not really need one; sub
9938 # scan_list will do that if necessary
9939 $line_start_index_to_go = $i_test + 1;
9940 $gnu_position_predictor = $test_position;
9946 maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
9948 # Check for decreasing depth ..
9949 # Note that one token may have both decreasing and then increasing
9950 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
9951 # in this example we would first go back to (1,0) then up to (2,0)
9953 if ( $level < $current_level || $ci_level < $current_ci_level ) {
9955 # loop to find the first entry at or completely below this level
9956 my ( $lev, $ci_lev );
9958 if ($max_gnu_stack_index) {
9960 # save index of token which closes this level
9961 $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go);
9963 # Undo any extra indentation if we saw no commas
9964 my $available_spaces =
9965 $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
9967 my $comma_count = 0;
9968 my $arrow_count = 0;
9969 if ( $type eq '}' || $type eq ')' ) {
9970 $comma_count = $gnu_comma_count{$total_depth};
9971 $arrow_count = $gnu_arrow_count{$total_depth};
9972 $comma_count = 0 unless $comma_count;
9973 $arrow_count = 0 unless $arrow_count;
9975 $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count);
9976 $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count);
9978 if ( $available_spaces > 0 ) {
9980 if ( $comma_count <= 0 || $arrow_count > 0 ) {
9982 my $i = $gnu_stack[$max_gnu_stack_index]->get_index();
9984 $gnu_stack[$max_gnu_stack_index]
9985 ->get_sequence_number();
9987 # Be sure this item was created in this batch. This
9988 # should be true because we delete any available
9989 # space from open items at the end of each batch.
9990 if ( $gnu_sequence_number != $seqno
9991 || $i > $max_gnu_item_index )
9994 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
9996 report_definite_bug();
10000 if ( $arrow_count == 0 ) {
10002 ->permanently_decrease_available_spaces(
10003 $available_spaces);
10007 ->tentatively_decrease_available_spaces(
10008 $available_spaces);
10010 foreach my $j ( $i + 1 .. $max_gnu_item_index ) {
10012 ->decrease_SPACES($available_spaces);
10018 # go down one level
10019 --$max_gnu_stack_index;
10020 $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
10021 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
10023 # stop when we reach a level at or below the current level
10024 if ( $lev <= $level && $ci_lev <= $ci_level ) {
10026 $gnu_stack[$max_gnu_stack_index]->get_spaces();
10027 $current_level = $lev;
10028 $current_ci_level = $ci_lev;
10033 # reached bottom of stack .. should never happen because
10034 # only negative levels can get here, and $level was forced
10035 # to be positive above.
10038 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
10040 report_definite_bug();
10046 # handle increasing depth
10047 if ( $level > $current_level || $ci_level > $current_ci_level ) {
10049 # Compute the standard incremental whitespace. This will be
10050 # the minimum incremental whitespace that will be used. This
10051 # choice results in a smooth transition between the gnu-style
10052 # and the standard style.
10053 my $standard_increment =
10054 ( $level - $current_level ) * $rOpts_indent_columns +
10055 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
10057 # Now we have to define how much extra incremental space
10058 # ("$available_space") we want. This extra space will be
10059 # reduced as necessary when long lines are encountered or when
10060 # it becomes clear that we do not have a good list.
10061 my $available_space = 0;
10062 my $align_paren = 0;
10065 # initialization on empty stack..
10066 if ( $max_gnu_stack_index == 0 ) {
10067 $space_count = $level * $rOpts_indent_columns;
10070 # if this is a BLOCK, add the standard increment
10071 elsif ($last_nonblank_block_type) {
10072 $space_count += $standard_increment;
10075 # if last nonblank token was not structural indentation,
10076 # just use standard increment
10077 elsif ( $last_nonblank_type ne '{' ) {
10078 $space_count += $standard_increment;
10081 # otherwise use the space to the first non-blank level change token
10084 $space_count = $gnu_position_predictor;
10086 my $min_gnu_indentation =
10087 $gnu_stack[$max_gnu_stack_index]->get_spaces();
10089 $available_space = $space_count - $min_gnu_indentation;
10090 if ( $available_space >= $standard_increment ) {
10091 $min_gnu_indentation += $standard_increment;
10093 elsif ( $available_space > 1 ) {
10094 $min_gnu_indentation += $available_space + 1;
10096 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
10097 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
10098 $min_gnu_indentation += 2;
10101 $min_gnu_indentation += 1;
10105 $min_gnu_indentation += $standard_increment;
10107 $available_space = $space_count - $min_gnu_indentation;
10109 if ( $available_space < 0 ) {
10110 $space_count = $min_gnu_indentation;
10111 $available_space = 0;
10116 # update state, but not on a blank token
10117 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
10119 $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
10121 ++$max_gnu_stack_index;
10122 $gnu_stack[$max_gnu_stack_index] =
10123 new_lp_indentation_item( $space_count, $level, $ci_level,
10124 $available_space, $align_paren );
10126 # If the opening paren is beyond the half-line length, then
10127 # we will use the minimum (standard) indentation. This will
10128 # help avoid problems associated with running out of space
10129 # near the end of a line. As a result, in deeply nested
10130 # lists, there will be some indentations which are limited
10131 # to this minimum standard indentation. But the most deeply
10132 # nested container will still probably be able to shift its
10133 # parameters to the right for proper alignment, so in most
10134 # cases this will not be noticeable.
10135 if ( $available_space > 0 && $space_count > $halfway ) {
10136 $gnu_stack[$max_gnu_stack_index]
10137 ->tentatively_decrease_available_spaces($available_space);
10142 # Count commas and look for non-list characters. Once we see a
10143 # non-list character, we give up and don't look for any more commas.
10144 if ( $type eq '=>' ) {
10145 $gnu_arrow_count{$total_depth}++;
10147 # tentatively treating '=>' like '=' for estimating breaks
10148 # TODO: this could use some experimentation
10149 $last_gnu_equals{$total_depth} = $max_index_to_go;
10152 elsif ( $type eq ',' ) {
10153 $gnu_comma_count{$total_depth}++;
10156 elsif ( $is_assignment{$type} ) {
10157 $last_gnu_equals{$total_depth} = $max_index_to_go;
10160 # this token might start a new line
10161 # if this is a non-blank..
10162 if ( $type ne 'b' ) {
10167 # this is the first nonblank token of the line
10168 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
10170 # or previous character was one of these:
10171 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
10173 # or previous character was opening and this does not close it
10174 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
10175 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
10177 # or this token is one of these:
10178 || $type =~ /^([\.]|\|\||\&\&)$/
10180 # or this is a closing structure
10181 || ( $last_nonblank_type_to_go eq '}'
10182 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
10184 # or previous token was keyword 'return'
10185 || ( $last_nonblank_type_to_go eq 'k'
10186 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
10188 # or starting a new line at certain keywords is fine
10190 && $is_if_unless_and_or_last_next_redo_return{$token} )
10192 # or this is after an assignment after a closing structure
10194 $is_assignment{$last_nonblank_type_to_go}
10196 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
10198 # and it is significantly to the right
10199 || $gnu_position_predictor > $halfway
10204 check_for_long_gnu_style_lines();
10205 $line_start_index_to_go = $max_index_to_go;
10207 # back up 1 token if we want to break before that type
10208 # otherwise, we may strand tokens like '?' or ':' on a line
10209 if ( $line_start_index_to_go > 0 ) {
10210 if ( $last_nonblank_type_to_go eq 'k' ) {
10212 if ( $want_break_before{$last_nonblank_token_to_go} ) {
10213 $line_start_index_to_go--;
10216 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
10217 $line_start_index_to_go--;
10223 # remember the predicted position of this token on the output line
10224 if ( $max_index_to_go > $line_start_index_to_go ) {
10225 $gnu_position_predictor =
10226 total_line_length( $line_start_index_to_go, $max_index_to_go );
10229 $gnu_position_predictor =
10230 $space_count + $token_lengths_to_go[$max_index_to_go];
10233 # store the indentation object for this token
10234 # this allows us to manipulate the leading whitespace
10235 # (in case we have to reduce indentation to fit a line) without
10236 # having to change any token values
10237 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
10238 $reduced_spaces_to_go[$max_index_to_go] =
10239 ( $max_gnu_stack_index > 0 && $ci_level )
10240 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
10241 : $gnu_stack[$max_gnu_stack_index];
10245 sub check_for_long_gnu_style_lines {
10247 # look at the current estimated maximum line length, and
10248 # remove some whitespace if it exceeds the desired maximum
10250 # this is only for the '-lp' style
10251 return unless ($rOpts_line_up_parentheses);
10253 # nothing can be done if no stack items defined for this line
10254 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
10256 # see if we have exceeded the maximum desired line length
10257 # keep 2 extra free because they are needed in some cases
10258 # (result of trial-and-error testing)
10259 my $spaces_needed =
10260 $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
10262 return if ( $spaces_needed <= 0 );
10264 # We are over the limit, so try to remove a requested number of
10265 # spaces from leading whitespace. We are only allowed to remove
10266 # from whitespace items created on this batch, since others have
10267 # already been used and cannot be undone.
10268 my @candidates = ();
10271 # loop over all whitespace items created for the current batch
10272 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
10273 my $item = $gnu_item_list[$i];
10275 # item must still be open to be a candidate (otherwise it
10276 # cannot influence the current token)
10277 next if ( $item->get_closed() >= 0 );
10279 my $available_spaces = $item->get_available_spaces();
10281 if ( $available_spaces > 0 ) {
10282 push( @candidates, [ $i, $available_spaces ] );
10286 return unless (@candidates);
10288 # sort by available whitespace so that we can remove whitespace
10289 # from the maximum available first
10290 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
10292 # keep removing whitespace until we are done or have no more
10293 foreach my $candidate (@candidates) {
10294 my ( $i, $available_spaces ) = @{$candidate};
10295 my $deleted_spaces =
10296 ( $available_spaces > $spaces_needed )
10298 : $available_spaces;
10300 # remove the incremental space from this item
10301 $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
10305 # update the leading whitespace of this item and all items
10306 # that came after it
10307 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
10309 my $old_spaces = $gnu_item_list[$i]->get_spaces();
10310 if ( $old_spaces >= $deleted_spaces ) {
10311 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
10314 # shouldn't happen except for code bug:
10316 my $level = $gnu_item_list[$i_debug]->get_level();
10317 my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
10318 my $old_level = $gnu_item_list[$i]->get_level();
10319 my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
10321 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
10323 report_definite_bug();
10326 $gnu_position_predictor -= $deleted_spaces;
10327 $spaces_needed -= $deleted_spaces;
10328 last unless ( $spaces_needed > 0 );
10333 sub finish_lp_batch {
10335 # This routine is called once after each output stream batch is
10336 # finished to undo indentation for all incomplete -lp
10337 # indentation levels. It is too risky to leave a level open,
10338 # because then we can't backtrack in case of a long line to follow.
10339 # This means that comments and blank lines will disrupt this
10340 # indentation style. But the vertical aligner may be able to
10341 # get the space back if there are side comments.
10343 # this is only for the 'lp' style
10344 return unless ($rOpts_line_up_parentheses);
10346 # nothing can be done if no stack items defined for this line
10347 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
10349 # loop over all whitespace items created for the current batch
10350 foreach my $i ( 0 .. $max_gnu_item_index ) {
10351 my $item = $gnu_item_list[$i];
10353 # only look for open items
10354 next if ( $item->get_closed() >= 0 );
10356 # Tentatively remove all of the available space
10357 # (The vertical aligner will try to get it back later)
10358 my $available_spaces = $item->get_available_spaces();
10359 if ( $available_spaces > 0 ) {
10361 # delete incremental space for this item
10363 ->tentatively_decrease_available_spaces($available_spaces);
10365 # Reduce the total indentation space of any nodes that follow
10366 # Note that any such nodes must necessarily be dependents
10368 foreach ( $i + 1 .. $max_gnu_item_index ) {
10369 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
10376 sub reduce_lp_indentation {
10378 # reduce the leading whitespace at token $i if possible by $spaces_needed
10379 # (a large value of $spaces_needed will remove all excess space)
10380 # NOTE: to be called from scan_list only for a sequence of tokens
10381 # contained between opening and closing parens/braces/brackets
10383 my ( $i, $spaces_wanted ) = @_;
10384 my $deleted_spaces = 0;
10386 my $item = $leading_spaces_to_go[$i];
10387 my $available_spaces = $item->get_available_spaces();
10390 $available_spaces > 0
10391 && ( ( $spaces_wanted <= $available_spaces )
10392 || !$item->get_have_child() )
10396 # we'll remove these spaces, but mark them as recoverable
10398 $item->tentatively_decrease_available_spaces($spaces_wanted);
10401 return $deleted_spaces;
10404 sub token_sequence_length {
10406 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
10407 # returns 0 if $ibeg > $iend (shouldn't happen)
10408 my ( $ibeg, $iend ) = @_;
10409 return 0 if ( $iend < 0 || $ibeg > $iend );
10410 return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
10411 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
10414 sub total_line_length {
10416 # return length of a line of tokens ($ibeg .. $iend)
10417 my ( $ibeg, $iend ) = @_;
10418 return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
10421 sub maximum_line_length_for_level {
10423 # return maximum line length for line starting with a given level
10424 my $maximum_line_length = $rOpts_maximum_line_length;
10426 # Modify if -vmll option is selected
10427 if ($rOpts_variable_maximum_line_length) {
10429 if ( $level < 0 ) { $level = 0 }
10430 $maximum_line_length += $level * $rOpts_indent_columns;
10432 return $maximum_line_length;
10435 sub maximum_line_length {
10437 # return maximum line length for line starting with the token at given index
10439 return maximum_line_length_for_level( $levels_to_go[$ii] );
10442 sub excess_line_length {
10444 # return number of characters by which a line of tokens ($ibeg..$iend)
10445 # exceeds the allowable line length.
10446 my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_;
10448 # Include left and right weld lengths unless requested not to
10449 my $wl = $ignore_left_weld ? 0 : weld_len_left_to_go($iend);
10450 my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend);
10452 return total_line_length( $ibeg, $iend ) + $wl + $wr -
10453 maximum_line_length($ibeg);
10458 # flush buffer and write any informative messages
10462 $file_writer_object->decrement_output_line_number()
10463 ; # fix up line number since it was incremented
10464 we_are_at_the_last_line();
10465 if ( $added_semicolon_count > 0 ) {
10466 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
10468 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
10469 write_logfile_entry("$added_semicolon_count $what added:\n");
10470 write_logfile_entry(
10471 " $first at input line $first_added_semicolon_at\n");
10473 if ( $added_semicolon_count > 1 ) {
10474 write_logfile_entry(
10475 " Last at input line $last_added_semicolon_at\n");
10477 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
10478 write_logfile_entry("\n");
10481 if ( $deleted_semicolon_count > 0 ) {
10482 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
10484 ( $deleted_semicolon_count > 1 )
10485 ? "semicolons were"
10487 write_logfile_entry(
10488 "$deleted_semicolon_count unnecessary $what deleted:\n");
10489 write_logfile_entry(
10490 " $first at input line $first_deleted_semicolon_at\n");
10492 if ( $deleted_semicolon_count > 1 ) {
10493 write_logfile_entry(
10494 " Last at input line $last_deleted_semicolon_at\n");
10496 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
10497 write_logfile_entry("\n");
10500 if ( $embedded_tab_count > 0 ) {
10501 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
10503 ( $embedded_tab_count > 1 )
10504 ? "quotes or patterns"
10505 : "quote or pattern";
10506 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
10507 write_logfile_entry(
10508 "This means the display of this script could vary with device or software\n"
10510 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
10512 if ( $embedded_tab_count > 1 ) {
10513 write_logfile_entry(
10514 " Last at input line $last_embedded_tab_at\n");
10516 write_logfile_entry("\n");
10519 if ($first_tabbing_disagreement) {
10520 write_logfile_entry(
10521 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
10525 if ($in_tabbing_disagreement) {
10526 write_logfile_entry(
10527 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
10532 if ($last_tabbing_disagreement) {
10534 write_logfile_entry(
10535 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
10539 write_logfile_entry("No indentation disagreement seen\n");
10542 if ($first_tabbing_disagreement) {
10543 write_logfile_entry(
10544 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
10547 write_logfile_entry("\n");
10549 $vertical_aligner_object->report_anything_unusual();
10551 $file_writer_object->report_line_length_errors();
10556 sub check_options {
10558 # This routine is called to check the Opts hash after it is defined
10561 make_static_block_comment_pattern();
10562 make_static_side_comment_pattern();
10563 make_closing_side_comment_prefix();
10564 make_closing_side_comment_list_pattern();
10565 $format_skipping_pattern_begin =
10566 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
10567 $format_skipping_pattern_end =
10568 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
10570 # If closing side comments ARE selected, then we can safely
10571 # delete old closing side comments unless closing side comment
10572 # warnings are requested. This is a good idea because it will
10573 # eliminate any old csc's which fall below the line count threshold.
10574 # We cannot do this if warnings are turned on, though, because we
10575 # might delete some text which has been added. So that must
10576 # be handled when comments are created.
10577 if ( $rOpts->{'closing-side-comments'} ) {
10578 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
10579 $rOpts->{'delete-closing-side-comments'} = 1;
10583 # If closing side comments ARE NOT selected, but warnings ARE
10584 # selected and we ARE DELETING csc's, then we will pretend to be
10585 # adding with a huge interval. This will force the comments to be
10586 # generated for comparison with the old comments, but not added.
10587 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
10588 if ( $rOpts->{'delete-closing-side-comments'} ) {
10589 $rOpts->{'delete-closing-side-comments'} = 0;
10590 $rOpts->{'closing-side-comments'} = 1;
10591 $rOpts->{'closing-side-comment-interval'} = 100000000;
10595 make_bli_pattern();
10596 make_block_brace_vertical_tightness_pattern();
10597 make_blank_line_pattern();
10599 prepare_cuddled_block_types();
10600 if ( $rOpts->{'dump-cuddled-block-list'} ) {
10601 dump_cuddled_block_list(*STDOUT);
10602 Perl::Tidy::Exit 0;
10605 if ( $rOpts->{'line-up-parentheses'} ) {
10607 if ( $rOpts->{'indent-only'}
10608 || !$rOpts->{'add-newlines'}
10609 || !$rOpts->{'delete-old-newlines'} )
10611 Perl::Tidy::Warn <<EOM;
10612 -----------------------------------------------------------------------
10613 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
10615 The -lp indentation logic requires that perltidy be able to coordinate
10616 arbitrarily large numbers of line breakpoints. This isn't possible
10617 with these flags. Sometimes an acceptable workaround is to use -wocb=3
10618 -----------------------------------------------------------------------
10620 $rOpts->{'line-up-parentheses'} = 0;
10624 # At present, tabs are not compatible with the line-up-parentheses style
10625 # (it would be possible to entab the total leading whitespace
10626 # just prior to writing the line, if desired).
10627 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
10628 Perl::Tidy::Warn <<EOM;
10629 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
10631 $rOpts->{'tabs'} = 0;
10634 # Likewise, tabs are not compatible with outdenting..
10635 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
10636 Perl::Tidy::Warn <<EOM;
10637 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
10639 $rOpts->{'tabs'} = 0;
10642 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
10643 Perl::Tidy::Warn <<EOM;
10644 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
10646 $rOpts->{'tabs'} = 0;
10649 if ( !$rOpts->{'space-for-semicolon'} ) {
10650 $want_left_space{'f'} = -1;
10653 if ( $rOpts->{'space-terminal-semicolon'} ) {
10654 $want_left_space{';'} = 1;
10657 # implement outdenting preferences for keywords
10658 %outdent_keyword = ();
10659 unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
10660 @_ = qw(next last redo goto return); # defaults
10663 # FUTURE: if not a keyword, assume that it is an identifier
10665 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
10666 $outdent_keyword{$_} = 1;
10669 Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword";
10673 # implement user whitespace preferences
10674 if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
10675 @want_left_space{@_} = (1) x scalar(@_);
10678 if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
10679 @want_right_space{@_} = (1) x scalar(@_);
10682 if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
10683 @want_left_space{@_} = (-1) x scalar(@_);
10686 if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
10687 @want_right_space{@_} = (-1) x scalar(@_);
10689 if ( $rOpts->{'dump-want-left-space'} ) {
10690 dump_want_left_space(*STDOUT);
10691 Perl::Tidy::Exit 0;
10694 if ( $rOpts->{'dump-want-right-space'} ) {
10695 dump_want_right_space(*STDOUT);
10696 Perl::Tidy::Exit 0;
10699 # default keywords for which space is introduced before an opening paren
10700 # (at present, including them messes up vertical alignment)
10701 @_ = qw(my local our and or err eq ne if else elsif until
10702 unless while for foreach return switch case given when catch);
10703 @space_after_keyword{@_} = (1) x scalar(@_);
10705 # first remove any or all of these if desired
10706 if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
10708 # -nsak='*' selects all the above keywords
10709 if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
10710 @space_after_keyword{@_} = (0) x scalar(@_);
10713 # then allow user to add to these defaults
10714 if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
10715 @space_after_keyword{@_} = (1) x scalar(@_);
10718 # implement user break preferences
10719 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
10720 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
10721 . : ? && || and or err xor
10724 my $break_after = sub {
10725 foreach my $tok (@_) {
10726 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
10727 my $lbs = $left_bond_strength{$tok};
10728 my $rbs = $right_bond_strength{$tok};
10729 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
10730 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
10736 my $break_before = sub {
10737 foreach my $tok (@_) {
10738 my $lbs = $left_bond_strength{$tok};
10739 my $rbs = $right_bond_strength{$tok};
10740 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
10741 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
10747 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
10748 $break_before->(@all_operators)
10749 if ( $rOpts->{'break-before-all-operators'} );
10751 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
10752 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
10754 # make note if breaks are before certain key types
10755 %want_break_before = ();
10756 foreach my $tok ( @all_operators, ',' ) {
10757 $want_break_before{$tok} =
10758 $left_bond_strength{$tok} < $right_bond_strength{$tok};
10761 # Coordinate ?/: breaks, which must be similar
10762 if ( !$want_break_before{':'} ) {
10763 $want_break_before{'?'} = $want_break_before{':'};
10764 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
10765 $left_bond_strength{'?'} = NO_BREAK;
10768 # Define here tokens which may follow the closing brace of a do statement
10769 # on the same line, as in:
10770 # } while ( $something);
10771 @_ = qw(until while unless if ; : );
10773 @is_do_follower{@_} = (1) x scalar(@_);
10775 # These tokens may follow the closing brace of an if or elsif block.
10776 # In other words, for cuddled else we want code to look like:
10777 # } elsif ( $something) {
10779 if ( $rOpts->{'cuddled-else'} ) {
10780 @_ = qw(else elsif);
10781 @is_if_brace_follower{@_} = (1) x scalar(@_);
10784 %is_if_brace_follower = ();
10787 # nothing can follow the closing curly of an else { } block:
10788 %is_else_brace_follower = ();
10790 # what can follow a multi-line anonymous sub definition closing curly:
10791 @_ = qw# ; : => or and && || ~~ !~~ ) #;
10793 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
10795 # what can follow a one-line anonymous sub closing curly:
10796 # one-line anonymous subs also have ']' here...
10797 # see tk3.t and PP.pm
10798 @_ = qw# ; : => or and && || ) ] ~~ !~~ #;
10800 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
10802 # What can follow a closing curly of a block
10803 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
10804 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
10805 @_ = qw# ; : => or and && || ) #;
10808 # allow cuddled continue if cuddled else is specified
10809 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
10811 @is_other_brace_follower{@_} = (1) x scalar(@_);
10813 $right_bond_strength{'{'} = WEAK;
10814 $left_bond_strength{'{'} = VERY_STRONG;
10816 # make -l=0 equal to -l=infinite
10817 if ( !$rOpts->{'maximum-line-length'} ) {
10818 $rOpts->{'maximum-line-length'} = 1000000;
10821 # make -lbl=0 equal to -lbl=infinite
10822 if ( !$rOpts->{'long-block-line-count'} ) {
10823 $rOpts->{'long-block-line-count'} = 1000000;
10826 my $enc = $rOpts->{'character-encoding'};
10827 if ( $enc && $enc !~ /^(none|utf8)$/i ) {
10828 Perl::Tidy::Die <<EOM;
10829 Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
10833 my $ole = $rOpts->{'output-line-ending'};
10842 # Patch for RT #99514, a memoization issue.
10843 # Normally, the user enters one of 'dos', 'win', etc, and we change the
10844 # value in the options parameter to be the corresponding line ending
10845 # character. But, if we are using memoization, on later passes through
10846 # here the option parameter will already have the desired ending
10847 # character rather than the keyword 'dos', 'win', etc. So
10848 # we must check to see if conversion has already been done and, if so,
10849 # bypass the conversion step.
10850 my %endings_inverted = (
10851 "\015\012" => 'dos',
10852 "\015\012" => 'win',
10857 if ( defined( $endings_inverted{$ole} ) ) {
10859 # we already have valid line ending, nothing more to do
10863 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
10864 my $str = join " ", keys %endings;
10865 Perl::Tidy::Die <<EOM;
10866 Unrecognized line ending '$ole'; expecting one of: $str
10869 if ( $rOpts->{'preserve-line-endings'} ) {
10870 Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
10871 $rOpts->{'preserve-line-endings'} = undef;
10876 # hashes used to simplify setting whitespace
10878 '{' => $rOpts->{'brace-tightness'},
10879 '}' => $rOpts->{'brace-tightness'},
10880 '(' => $rOpts->{'paren-tightness'},
10881 ')' => $rOpts->{'paren-tightness'},
10882 '[' => $rOpts->{'square-bracket-tightness'},
10883 ']' => $rOpts->{'square-bracket-tightness'},
10885 %matching_token = (
10892 # frequently used parameters
10893 $rOpts_add_newlines = $rOpts->{'add-newlines'};
10894 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
10895 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
10896 $rOpts_block_brace_vertical_tightness =
10897 $rOpts->{'block-brace-vertical-tightness'};
10898 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
10899 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
10900 $rOpts_break_at_old_ternary_breakpoints =
10901 $rOpts->{'break-at-old-ternary-breakpoints'};
10902 $rOpts_break_at_old_attribute_breakpoints =
10903 $rOpts->{'break-at-old-attribute-breakpoints'};
10904 $rOpts_break_at_old_comma_breakpoints =
10905 $rOpts->{'break-at-old-comma-breakpoints'};
10906 $rOpts_break_at_old_keyword_breakpoints =
10907 $rOpts->{'break-at-old-keyword-breakpoints'};
10908 $rOpts_break_at_old_logical_breakpoints =
10909 $rOpts->{'break-at-old-logical-breakpoints'};
10910 $rOpts_closing_side_comment_else_flag =
10911 $rOpts->{'closing-side-comment-else-flag'};
10912 $rOpts_closing_side_comment_maximum_text =
10913 $rOpts->{'closing-side-comment-maximum-text'};
10914 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
10915 $rOpts_cuddled_else = $rOpts->{'cuddled-else'};
10916 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
10917 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
10918 $rOpts_indent_columns = $rOpts->{'indent-columns'};
10919 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
10920 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
10921 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
10922 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
10924 $rOpts_variable_maximum_line_length =
10925 $rOpts->{'variable-maximum-line-length'};
10926 $rOpts_short_concatenation_item_length =
10927 $rOpts->{'short-concatenation-item-length'};
10929 $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
10930 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
10931 $rOpts_format_skipping = $rOpts->{'format-skipping'};
10932 $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
10933 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
10934 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
10935 $rOpts_ignore_side_comment_lengths =
10936 $rOpts->{'ignore-side-comment-lengths'};
10938 # Note that both opening and closing tokens can access the opening
10939 # and closing flags of their container types.
10940 %opening_vertical_tightness = (
10941 '(' => $rOpts->{'paren-vertical-tightness'},
10942 '{' => $rOpts->{'brace-vertical-tightness'},
10943 '[' => $rOpts->{'square-bracket-vertical-tightness'},
10944 ')' => $rOpts->{'paren-vertical-tightness'},
10945 '}' => $rOpts->{'brace-vertical-tightness'},
10946 ']' => $rOpts->{'square-bracket-vertical-tightness'},
10949 %closing_vertical_tightness = (
10950 '(' => $rOpts->{'paren-vertical-tightness-closing'},
10951 '{' => $rOpts->{'brace-vertical-tightness-closing'},
10952 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
10953 ')' => $rOpts->{'paren-vertical-tightness-closing'},
10954 '}' => $rOpts->{'brace-vertical-tightness-closing'},
10955 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
10958 # assume flag for '>' same as ')' for closing qw quotes
10959 %closing_token_indentation = (
10960 ')' => $rOpts->{'closing-paren-indentation'},
10961 '}' => $rOpts->{'closing-brace-indentation'},
10962 ']' => $rOpts->{'closing-square-bracket-indentation'},
10963 '>' => $rOpts->{'closing-paren-indentation'},
10966 # flag indicating if any closing tokens are indented
10967 $some_closing_token_indentation =
10968 $rOpts->{'closing-paren-indentation'}
10969 || $rOpts->{'closing-brace-indentation'}
10970 || $rOpts->{'closing-square-bracket-indentation'}
10971 || $rOpts->{'indent-closing-brace'};
10973 %opening_token_right = (
10974 '(' => $rOpts->{'opening-paren-right'},
10975 '{' => $rOpts->{'opening-hash-brace-right'},
10976 '[' => $rOpts->{'opening-square-bracket-right'},
10979 %stack_opening_token = (
10980 '(' => $rOpts->{'stack-opening-paren'},
10981 '{' => $rOpts->{'stack-opening-hash-brace'},
10982 '[' => $rOpts->{'stack-opening-square-bracket'},
10985 %stack_closing_token = (
10986 ')' => $rOpts->{'stack-closing-paren'},
10987 '}' => $rOpts->{'stack-closing-hash-brace'},
10988 ']' => $rOpts->{'stack-closing-square-bracket'},
10990 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
10991 $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
10997 # See if a pattern will compile. We have to use a string eval here,
10998 # but it should be safe because the pattern has been constructed
11000 my ($pattern) = @_;
11001 eval "'##'=~/$pattern/";
11005 sub prepare_cuddled_block_types {
11007 my $cuddled_string = $rOpts->{'cuddled-block-list'};
11008 $cuddled_string = "try-catch-finally" unless defined($cuddled_string);
11010 # we have a cuddled string of the form
11011 # 'try-catch-finally'
11013 # we want to prepare a hash of the form
11015 # $rcuddled_block_types = {
11022 # use -dcbl to dump this hash
11024 # Multiple such strings are input as a space or comma separated list
11026 # If we get two lists with the same leading type, such as
11027 # -cbl = "-try-catch-finally -try-catch-otherwise"
11028 # then they will get merged as follows:
11029 # $rcuddled_block_types = {
11033 # 'otherwise' => 1,
11036 # This will allow either type of chain to be followed.
11038 $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
11039 my @cuddled_strings = split /\s+/, $cuddled_string;
11041 $rcuddled_block_types = {};
11043 # process each dash-separated string...
11044 my $string_count = 0;
11045 foreach my $string (@cuddled_strings) {
11046 next unless $string;
11047 my @words = split /-+/, $string; # allow multiple dashes
11049 # we could look for and report possible errors here...
11050 next unless ( @words && @words > 0 );
11051 my $start = shift @words;
11053 # allow either '-continue' or *-continue' for arbitrary starting type
11054 $start = '*' unless $start;
11056 # always make an entry for the leading word. If none follow, this
11057 # will still prevent a wildcard from matching this word.
11058 if ( !defined( $rcuddled_block_types->{$start} ) ) {
11059 $rcuddled_block_types->{$start} = {};
11062 # The count gives the original word order in case we ever want it.
11064 my $word_count = 0;
11065 foreach my $word (@words) {
11068 $rcuddled_block_types->{$start}->{$word} =
11069 1; #"$string_count.$word_count";
11076 sub dump_cuddled_block_list {
11079 # Here is the format of the cuddled block type hash
11080 # which controls this routine
11081 # my $rcuddled_block_types = {
11091 #The numerical values are string.word,
11092 #where string = string number and word = word number in that string
11094 my $cuddled_string = $rOpts->{'cuddled-block-list'};
11095 $cuddled_string = '' unless $cuddled_string;
11097 ------------------------------------------------------------------------
11098 Hash of cuddled block types created from
11099 -cbl='$cuddled_string'
11100 ------------------------------------------------------------------------
11104 $fh->print( Dumper($rcuddled_block_types) );
11107 ------------------------------------------------------------------------
11112 sub make_static_block_comment_pattern {
11114 # create the pattern used to identify static block comments
11115 $static_block_comment_pattern = '^\s*##';
11117 # allow the user to change it
11118 if ( $rOpts->{'static-block-comment-prefix'} ) {
11119 my $prefix = $rOpts->{'static-block-comment-prefix'};
11120 $prefix =~ s/^\s*//;
11121 my $pattern = $prefix;
11123 # user may give leading caret to force matching left comments only
11124 if ( $prefix !~ /^\^#/ ) {
11125 if ( $prefix !~ /^#/ ) {
11127 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
11129 $pattern = '^\s*' . $prefix;
11131 if ( bad_pattern($pattern) ) {
11133 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
11135 $static_block_comment_pattern = $pattern;
11140 sub make_format_skipping_pattern {
11141 my ( $opt_name, $default ) = @_;
11142 my $param = $rOpts->{$opt_name};
11143 unless ($param) { $param = $default }
11144 $param =~ s/^\s*//;
11145 if ( $param !~ /^#/ ) {
11147 "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
11149 my $pattern = '^' . $param . '\s';
11150 if ( bad_pattern($pattern) ) {
11152 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
11157 sub make_closing_side_comment_list_pattern {
11159 # turn any input list into a regex for recognizing selected block types
11160 $closing_side_comment_list_pattern = '^\w+';
11161 if ( defined( $rOpts->{'closing-side-comment-list'} )
11162 && $rOpts->{'closing-side-comment-list'} )
11164 $closing_side_comment_list_pattern =
11165 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
11170 sub make_bli_pattern {
11172 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
11173 && $rOpts->{'brace-left-and-indent-list'} )
11175 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
11178 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
11182 sub make_block_brace_vertical_tightness_pattern {
11184 # turn any input list into a regex for recognizing selected block types
11185 $block_brace_vertical_tightness_pattern =
11186 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
11187 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
11188 && $rOpts->{'block-brace-vertical-tightness-list'} )
11190 $block_brace_vertical_tightness_pattern =
11191 make_block_pattern( '-bbvtl',
11192 $rOpts->{'block-brace-vertical-tightness-list'} );
11197 sub make_blank_line_pattern {
11199 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
11200 my $key = 'blank-lines-before-closing-block-list';
11201 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
11202 $blank_lines_before_closing_block_pattern =
11203 make_block_pattern( '-blbcl', $rOpts->{$key} );
11206 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
11207 $key = 'blank-lines-after-opening-block-list';
11208 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
11209 $blank_lines_after_opening_block_pattern =
11210 make_block_pattern( '-blaol', $rOpts->{$key} );
11215 sub make_block_pattern {
11217 # given a string of block-type keywords, return a regex to match them
11218 # The only tricky part is that labels are indicated with a single ':'
11219 # and the 'sub' token text may have additional text after it (name of
11224 # input string: "if else elsif unless while for foreach do : sub";
11225 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
11229 # To distinguish between anonymous subs and named subs, use 'sub' to
11230 # indicate a named sub, and 'asub' to indicate an anonymous sub
11232 my ( $abbrev, $string ) = @_;
11233 my @list = split_words($string);
11236 for my $i (@list) {
11237 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
11240 if ( $i eq 'sub' ) {
11242 elsif ( $i eq 'asub' ) {
11244 elsif ( $i eq ';' ) {
11247 elsif ( $i eq '{' ) {
11250 elsif ( $i eq ':' ) {
11251 push @words, '\w+:';
11253 elsif ( $i =~ /^\w/ ) {
11258 "unrecognized block type $i after $abbrev, ignoring\n";
11261 my $pattern = '(' . join( '|', @words ) . ')$';
11262 my $sub_patterns = "";
11263 if ( $seen{'sub'} ) {
11264 $sub_patterns .= '|' . $SUB_PATTERN;
11266 if ( $seen{'asub'} ) {
11267 $sub_patterns .= '|' . $ASUB_PATTERN;
11269 if ($sub_patterns) {
11270 $pattern = '(' . $pattern . $sub_patterns . ')';
11272 $pattern = '^' . $pattern;
11276 sub make_static_side_comment_pattern {
11278 # create the pattern used to identify static side comments
11279 $static_side_comment_pattern = '^##';
11281 # allow the user to change it
11282 if ( $rOpts->{'static-side-comment-prefix'} ) {
11283 my $prefix = $rOpts->{'static-side-comment-prefix'};
11284 $prefix =~ s/^\s*//;
11285 my $pattern = '^' . $prefix;
11286 if ( bad_pattern($pattern) ) {
11288 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
11290 $static_side_comment_pattern = $pattern;
11295 sub make_closing_side_comment_prefix {
11297 # Be sure we have a valid closing side comment prefix
11298 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
11299 my $csc_prefix_pattern;
11300 if ( !defined($csc_prefix) ) {
11301 $csc_prefix = '## end';
11302 $csc_prefix_pattern = '^##\s+end';
11305 my $test_csc_prefix = $csc_prefix;
11306 if ( $test_csc_prefix !~ /^#/ ) {
11307 $test_csc_prefix = '#' . $test_csc_prefix;
11310 # make a regex to recognize the prefix
11311 my $test_csc_prefix_pattern = $test_csc_prefix;
11313 # escape any special characters
11314 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
11316 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
11318 # allow exact number of intermediate spaces to vary
11319 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
11321 # make sure we have a good pattern
11322 # if we fail this we probably have an error in escaping
11325 if ( bad_pattern($test_csc_prefix_pattern) ) {
11327 # shouldn't happen..must have screwed up escaping, above
11328 report_definite_bug();
11330 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
11332 # just warn and keep going with defaults
11333 Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n";
11335 "Using default -cscp instead; please check output\n";
11338 $csc_prefix = $test_csc_prefix;
11339 $csc_prefix_pattern = $test_csc_prefix_pattern;
11342 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
11343 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
11347 sub dump_want_left_space {
11351 These values are the main control of whitespace to the left of a token type;
11352 They may be altered with the -wls parameter.
11353 For a list of token types, use perltidy --dump-token-types (-dtt)
11354 1 means the token wants a space to its left
11355 -1 means the token does not want a space to its left
11356 ------------------------------------------------------------------------
11358 foreach my $key ( sort keys %want_left_space ) {
11359 print $fh "$key\t$want_left_space{$key}\n";
11364 sub dump_want_right_space {
11368 These values are the main control of whitespace to the right of a token type;
11369 They may be altered with the -wrs parameter.
11370 For a list of token types, use perltidy --dump-token-types (-dtt)
11371 1 means the token wants a space to its right
11372 -1 means the token does not want a space to its right
11373 ------------------------------------------------------------------------
11375 foreach my $key ( sort keys %want_right_space ) {
11376 print $fh "$key\t$want_right_space{$key}\n";
11381 { # begin is_essential_whitespace
11383 my %is_sort_grep_map;
11384 my %is_for_foreach;
11389 @q = qw(sort grep map);
11390 @is_sort_grep_map{@q} = (1) x scalar(@q);
11392 @q = qw(for foreach);
11393 @is_for_foreach{@q} = (1) x scalar(@q);
11397 sub is_essential_whitespace {
11399 # Essential whitespace means whitespace which cannot be safely deleted
11400 # without risking the introduction of a syntax error.
11401 # We are given three tokens and their types:
11402 # ($tokenl, $typel) is the token to the left of the space in question
11403 # ($tokenr, $typer) is the token to the right of the space in question
11404 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
11406 # This is a slow routine but is not needed too often except when -mangle
11409 # Note: This routine should almost never need to be changed. It is
11410 # for avoiding syntax problems rather than for formatting.
11411 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
11415 # never combine two bare words or numbers
11416 # examples: and ::ok(1)
11417 # return ::spw(...)
11418 # for bla::bla:: abc
11419 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
11420 # $input eq"quit" to make $inputeq"quit"
11421 # my $size=-s::SINK if $file; <==OK but we won't do it
11422 # don't join something like: for bla::bla:: abc
11423 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
11424 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
11425 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
11427 # do not combine a number with a concatenation dot
11428 # example: pom.caputo:
11429 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
11430 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
11431 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
11433 # do not join a minus with a bare word, because you might form
11434 # a file test operator. Example from Complex.pm:
11435 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
11436 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
11438 # and something like this could become ambiguous without space
11440 # use constant III=>1;
11444 || ( ( $tokenl eq '-' )
11445 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
11447 # '= -' should not become =- or you will get a warning
11448 # about reversed -=
11449 # || ($tokenr eq '-')
11451 # keep a space between a quote and a bareword to prevent the
11452 # bareword from becoming a quote modifier.
11453 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
11455 # keep a space between a token ending in '$' and any word;
11456 # this caused trouble: "die @$ if $@"
11457 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
11458 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
11460 # perl is very fussy about spaces before <<
11461 || ( $tokenr =~ /^\<\</ )
11463 # avoid combining tokens to create new meanings. Example:
11464 # $a+ +$b must not become $a++$b
11465 || ( $is_digraph{ $tokenl . $tokenr } )
11466 || ( $is_trigraph{ $tokenl . $tokenr } )
11468 # another example: do not combine these two &'s:
11469 # allow_options & &OPT_EXECCGI
11470 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
11472 # don't combine $$ or $# with any alphanumeric
11473 # (testfile mangle.t with --mangle)
11474 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
11476 # retain any space after possible filehandle
11477 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
11478 || ( $typel eq 'Z' )
11480 # Perl is sensitive to whitespace after the + here:
11481 # $b = xvals $a + 0.1 * yvals $a;
11482 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
11484 # keep paren separate in 'use Foo::Bar ()'
11485 || ( $tokenr eq '('
11488 && $tokenll eq 'use' )
11490 # keep any space between filehandle and paren:
11491 # file mangle.t with --mangle:
11492 || ( $typel eq 'Y' && $tokenr eq '(' )
11494 # retain any space after here doc operator ( hereerr.t)
11495 || ( $typel eq 'h' )
11497 # be careful with a space around ++ and --, to avoid ambiguity as to
11498 # which token it applies
11499 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
11500 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
11502 # need space after foreach my; for example, this will fail in
11503 # older versions of Perl:
11504 # foreach my$ft(@filetypes)...
11508 # /^(for|foreach)$/
11509 && $is_for_foreach{$tokenll}
11510 && $tokenr =~ /^\$/
11513 # must have space between grep and left paren; "grep(" will fail
11514 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
11516 # don't stick numbers next to left parens, as in:
11517 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
11518 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
11520 # We must be sure that a space between a ? and a quoted string
11521 # remains if the space before the ? remains. [Loca.pm, lockarea]
11523 # $b=join $comma ? ',' : ':', @_; # ok
11524 # $b=join $comma?',' : ':', @_; # ok!
11525 # $b=join $comma ?',' : ':', @_; # error!
11526 # Not really required:
11527 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
11529 # do not remove space between an '&' and a bare word because
11530 # it may turn into a function evaluation, like here
11531 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
11532 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
11533 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
11535 # space stacked labels (TODO: check if really necessary)
11536 || ( $typel eq 'J' && $typer eq 'J' )
11538 ; # the value of this long logic sequence is the result we want
11539 ##if ($typel eq 'j') {print STDERR "typel=$typel typer=$typer result='$result'\n"}
11545 my %secret_operators;
11546 my %is_leading_secret_token;
11550 # token lists for perl secret operators as compiled by Philippe Bruhat
11551 # at: https://metacpan.org/module/perlsecret
11552 %secret_operators = (
11553 'Goatse' => [qw#= ( ) =#], #=( )=
11554 'Venus1' => [qw#0 +#], # 0+
11555 'Venus2' => [qw#+ 0#], # +0
11556 'Enterprise' => [qw#) x ! !#], # ()x!!
11557 'Kite1' => [qw#~ ~ <>#], # ~~<>
11558 'Kite2' => [qw#~~ <>#], # ~~<>
11559 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
11560 'Bang bang ' => [qw#! !#], # !!
11563 # The following operators and constants are not included because they
11564 # are normally kept tight by perltidy:
11568 # Make a lookup table indexed by the first token of each operator:
11569 # first token => [list, list, ...]
11570 foreach my $value ( values(%secret_operators) ) {
11571 my $tok = $value->[0];
11572 push @{ $is_leading_secret_token{$tok} }, $value;
11576 sub new_secret_operator_whitespace {
11578 my ( $rlong_array, $rwhitespace_flags ) = @_;
11580 # Loop over all tokens in this line
11581 my ( $token, $type );
11582 my $jmax = @{$rlong_array} - 1;
11583 foreach my $j ( 0 .. $jmax ) {
11585 $token = $rlong_array->[$j]->[_TOKEN_];
11586 $type = $rlong_array->[$j]->[_TYPE_];
11588 # Skip unless this token might start a secret operator
11589 next if ( $type eq 'b' );
11590 next unless ( $is_leading_secret_token{$token} );
11592 # Loop over all secret operators with this leading token
11593 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
11595 foreach my $tok ( @{$rpattern} ) {
11599 if ( $jend <= $jmax
11600 && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
11602 || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
11611 # set flags to prevent spaces within this operator
11612 foreach my $jj ( $j + 1 .. $jend ) {
11613 $rwhitespace_flags->[$jj] = WS_NO;
11618 } ## End Loop over all operators
11619 } ## End loop over all tokens
11624 { # begin print_line_of_tokens
11626 my $rinput_token_array; # Current working array
11627 my $rinput_K_array; # Future working array
11630 my $guessed_indentation_level;
11632 # This should be a return variable from extract_token
11633 # These local token variables are stored by store_token_to_go:
11638 my $container_environment;
11639 my $container_type;
11640 my $in_continued_quote;
11642 my $no_internal_newlines;
11648 # routine to pull the jth token from the line of tokens
11649 sub extract_token {
11650 my ( $self, $j ) = @_;
11652 my $rLL = $self->{rLL};
11653 $Ktoken_vars = $rinput_K_array->[$j];
11654 if ( !defined($Ktoken_vars) ) {
11656 # Shouldn't happen: an error here would be due to a recent program change
11657 Fault("undefined index K for j=$j");
11659 $rtoken_vars = $rLL->[$Ktoken_vars];
11661 if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) {
11663 # Shouldn't happen: an error here would be due to a recent program change
11665 j=$j, K=$Ktoken_vars, '$rtoken_vars->[_TOKEN_]' ne '$rLL->[$Ktoken_vars]'
11669 #########################################################
11670 # these are now redundant and can eventually be eliminated
11672 $token = $rtoken_vars->[_TOKEN_];
11673 $type = $rtoken_vars->[_TYPE_];
11674 $block_type = $rtoken_vars->[_BLOCK_TYPE_];
11675 $container_type = $rtoken_vars->[_CONTAINER_TYPE_];
11676 $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_];
11677 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
11678 $level = $rtoken_vars->[_LEVEL_];
11679 $slevel = $rtoken_vars->[_SLEVEL_];
11680 $ci_level = $rtoken_vars->[_CI_LEVEL_];
11681 #########################################################
11689 sub save_current_token {
11692 $block_type, $ci_level,
11693 $container_environment, $container_type,
11694 $in_continued_quote, $level,
11695 $no_internal_newlines, $slevel,
11697 $type_sequence, $rtoken_vars,
11703 sub restore_current_token {
11705 $block_type, $ci_level,
11706 $container_environment, $container_type,
11707 $in_continued_quote, $level,
11708 $no_internal_newlines, $slevel,
11710 $type_sequence, $rtoken_vars,
11719 # Returns the length of a token, given:
11720 # $token=text of the token
11722 # $not_first_token = should be TRUE if this is not the first token of
11723 # the line. It might the index of this token in an array. It is
11724 # used to test for a side comment vs a block comment.
11725 # Note: Eventually this should be the only routine determining the
11726 # length of a token in this package.
11727 my ( $token, $type, $not_first_token ) = @_;
11728 my $token_length = length($token);
11730 # We mark lengths of side comments as just 1 if we are
11731 # ignoring their lengths when setting line breaks.
11733 if ( $rOpts_ignore_side_comment_lengths
11734 && $not_first_token
11736 return $token_length;
11739 sub rtoken_length {
11741 # return length of ith token in @{$rtokens}
11743 return token_length( $rinput_token_array->[$i]->[_TOKEN_],
11744 $rinput_token_array->[$i]->[_TYPE_], $i );
11747 # Routine to place the current token into the output stream.
11748 # Called once per output token.
11749 sub store_token_to_go {
11751 my ( $self, $side_comment_follows ) = @_;
11753 my $flag = $side_comment_follows ? 1 : $no_internal_newlines;
11755 ++$max_index_to_go;
11756 $K_to_go[$max_index_to_go] = $Ktoken_vars;
11757 $rtoken_vars_to_go[$max_index_to_go] = $rtoken_vars;
11758 $tokens_to_go[$max_index_to_go] = $token;
11759 $types_to_go[$max_index_to_go] = $type;
11760 $nobreak_to_go[$max_index_to_go] = $flag;
11761 $old_breakpoint_to_go[$max_index_to_go] = 0;
11762 $forced_breakpoint_to_go[$max_index_to_go] = 0;
11763 $block_type_to_go[$max_index_to_go] = $block_type;
11764 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
11765 $container_environment_to_go[$max_index_to_go] = $container_environment;
11766 $ci_levels_to_go[$max_index_to_go] = $ci_level;
11767 $mate_index_to_go[$max_index_to_go] = -1;
11768 $matching_token_to_go[$max_index_to_go] = '';
11769 $bond_strength_to_go[$max_index_to_go] = 0;
11771 # Note: negative levels are currently retained as a diagnostic so that
11772 # the 'final indentation level' is correctly reported for bad scripts.
11773 # But this means that every use of $level as an index must be checked.
11774 # If this becomes too much of a problem, we might give up and just clip
11776 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
11777 $levels_to_go[$max_index_to_go] = $level;
11778 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
11780 # link the non-blank tokens
11781 my $iprev = $max_index_to_go - 1;
11782 $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
11783 $iprev_to_go[$max_index_to_go] = $iprev;
11784 $inext_to_go[$iprev] = $max_index_to_go
11785 if ( $iprev >= 0 && $type ne 'b' );
11786 $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
11788 $token_lengths_to_go[$max_index_to_go] =
11789 token_length( $token, $type, $max_index_to_go );
11791 # We keep a running sum of token lengths from the start of this batch:
11792 # summed_lengths_to_go[$i] = total length to just before token $i
11793 # summed_lengths_to_go[$i+1] = total length to just after token $i
11794 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
11795 $summed_lengths_to_go[$max_index_to_go] +
11796 $token_lengths_to_go[$max_index_to_go];
11798 # Define the indentation that this token would have if it started
11799 # a new line. We have to do this now because we need to know this
11800 # when considering one-line blocks.
11801 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
11803 # remember previous nonblank tokens seen
11804 if ( $type ne 'b' ) {
11805 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
11806 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
11807 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
11808 $last_nonblank_index_to_go = $max_index_to_go;
11809 $last_nonblank_type_to_go = $type;
11810 $last_nonblank_token_to_go = $token;
11811 if ( $type eq ',' ) {
11812 $comma_count_in_batch++;
11816 FORMATTER_DEBUG_FLAG_STORE && do {
11817 my ( $a, $b, $c ) = caller();
11819 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
11824 sub insert_new_token_to_go {
11826 # insert a new token into the output stream. use same level as
11827 # previous token; assumes a character at max_index_to_go.
11830 save_current_token();
11831 ( $token, $type, $slevel, $no_internal_newlines ) = @args;
11833 if ( $max_index_to_go == UNDEFINED_INDEX ) {
11834 warning("code bug: bad call to insert_new_token_to_go\n");
11836 $level = $levels_to_go[$max_index_to_go];
11838 # FIXME: it seems to be necessary to use the next, rather than
11839 # previous, value of this variable when creating a new blank (align.t)
11840 #my $slevel = $nesting_depth_to_go[$max_index_to_go];
11841 $ci_level = $ci_levels_to_go[$max_index_to_go];
11842 $container_environment = $container_environment_to_go[$max_index_to_go];
11843 $in_continued_quote = 0;
11845 $type_sequence = "";
11846 $self->store_token_to_go();
11847 restore_current_token();
11852 my ($rold_token_hash) = @_;
11853 my %new_token_hash =
11854 map { $_, $rold_token_hash->{$_} } keys %{$rold_token_hash};
11855 return \%new_token_hash;
11860 my @new = map { $_ } @{$rold};
11864 sub copy_token_as_type {
11865 my ( $rold_token, $type, $token ) = @_;
11866 if ( $type eq 'b' ) {
11867 $token = " " unless defined($token);
11869 elsif ( $type eq 'q' ) {
11870 $token = '' unless defined($token);
11872 elsif ( $type eq '->' ) {
11873 $token = '->' unless defined($token);
11875 elsif ( $type eq ';' ) {
11876 $token = ';' unless defined($token);
11880 "Programming error: copy_token_as has type $type but should be 'b' or 'q'"
11883 my $rnew_token = copy_array($rold_token);
11884 $rnew_token->[_TYPE_] = $type;
11885 $rnew_token->[_TOKEN_] = $token;
11886 $rnew_token->[_BLOCK_TYPE_] = '';
11887 $rnew_token->[_CONTAINER_TYPE_] = '';
11888 $rnew_token->[_CONTAINER_ENVIRONMENT_] = '';
11889 $rnew_token->[_TYPE_SEQUENCE_] = '';
11890 return $rnew_token;
11893 sub boolean_equals {
11894 my ( $val1, $val2 ) = @_;
11895 return ( $val1 && $val2 || !$val1 && !$val2 );
11898 sub print_line_of_tokens {
11900 my ( $self, $line_of_tokens ) = @_;
11902 # This routine is called once per input line to process all of
11903 # the tokens on that line. This is the first stage of
11906 # Full-line comments and blank lines may be processed immediately.
11908 # For normal lines of code, the tokens are stored one-by-one,
11909 # via calls to 'sub store_token_to_go', until a known line break
11910 # point is reached. Then, the batch of collected tokens is
11911 # passed along to 'sub output_line_to_go' for further
11912 # processing. This routine decides if there should be
11913 # whitespace between each pair of non-white tokens, so later
11914 # routines only need to decide on any additional line breaks.
11915 # Any whitespace is initially a single space character. Later,
11916 # the vertical aligner may expand that to be multiple space
11917 # characters if necessary for alignment.
11919 $input_line_number = $line_of_tokens->{_line_number};
11920 my $input_line = $line_of_tokens->{_line_text};
11921 my $CODE_type = $line_of_tokens->{_code_type};
11923 my $rK_range = $line_of_tokens->{_rK_range};
11924 my ( $K_first, $K_last ) = @{$rK_range};
11926 my $rLL = $self->{rLL};
11927 my $rbreak_container = $self->{rbreak_container};
11929 if ( !defined($K_first) ) {
11931 # Unexpected blank line..
11932 # Calling routine was supposed to handle this
11934 "Programming Error: Unexpected Blank Line in print_line_of_tokens. Ignoring"
11939 $no_internal_newlines = 1 - $rOpts_add_newlines;
11941 ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
11942 my $is_static_block_comment_without_leading_space =
11943 $CODE_type eq 'SBCX';
11944 $is_static_block_comment =
11945 $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
11946 my $is_hanging_side_comment = $CODE_type eq 'HSC';
11947 my $is_VERSION_statement = $CODE_type eq 'VER';
11948 if ($is_VERSION_statement) {
11949 $saw_VERSION_in_this_file = 1;
11950 $no_internal_newlines = 1;
11953 # Add interline blank if any
11954 my $last_old_nonblank_type = "b";
11955 my $first_new_nonblank_type = "b";
11956 my $first_new_nonblank_token = " ";
11957 if ( $max_index_to_go >= 0 ) {
11958 $last_old_nonblank_type = $types_to_go[$max_index_to_go];
11959 $first_new_nonblank_type = $rLL->[$K_first]->[_TYPE_];
11960 $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
11962 && $types_to_go[$max_index_to_go] ne 'b'
11964 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
11970 # Copy the tokens into local arrays
11971 $rinput_token_array = [];
11972 $rinput_K_array = [];
11973 $rinput_K_array = [ ( $K_first .. $K_last ) ];
11974 $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ];
11975 my $jmax = @{$rinput_K_array} - 1;
11977 $in_continued_quote = $starting_in_quote =
11978 $line_of_tokens->{_starting_in_quote};
11979 $in_quote = $line_of_tokens->{_ending_in_quote};
11980 $ending_in_quote = $in_quote;
11981 $guessed_indentation_level =
11982 $line_of_tokens->{_guessed_indentation_level};
11985 my $next_nonblank_token;
11986 my $next_nonblank_token_type;
11989 $container_type = "";
11990 $container_environment = "";
11991 $type_sequence = "";
11993 ######################################
11994 # Handle a block (full-line) comment..
11995 ######################################
11998 if ( $rOpts->{'delete-block-comments'} ) { return }
12000 if ( $rOpts->{'tee-block-comments'} ) {
12001 $file_writer_object->tee_on();
12004 destroy_one_line_block();
12005 $self->output_line_to_go();
12007 # output a blank line before block comments
12009 # unless we follow a blank or comment line
12010 $last_line_leading_type !~ /^[#b]$/
12013 && $rOpts->{'blanks-before-comments'}
12015 # if this is NOT an empty comment line
12016 && $rinput_token_array->[0]->[_TOKEN_] ne '#'
12018 # not after a short line ending in an opening token
12019 # because we already have space above this comment.
12020 # Note that the first comment in this if block, after
12021 # the 'if (', does not get a blank line because of this.
12022 && !$last_output_short_opening_token
12024 # never before static block comments
12025 && !$is_static_block_comment
12028 $self->flush(); # switching to new output stream
12029 $file_writer_object->write_blank_code_line();
12030 $last_line_leading_type = 'b';
12033 # TRIM COMMENTS -- This could be turned off as a option
12034 $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//; # trim right end
12037 $rOpts->{'indent-block-comments'}
12038 && ( !$rOpts->{'indent-spaced-block-comments'}
12039 || $input_line =~ /^\s+/ )
12040 && !$is_static_block_comment_without_leading_space
12043 $self->extract_token(0);
12044 $self->store_token_to_go();
12045 $self->output_line_to_go();
12048 $self->flush(); # switching to new output stream
12049 $file_writer_object->write_code_line(
12050 $rinput_token_array->[0]->[_TOKEN_] . "\n" );
12051 $last_line_leading_type = '#';
12053 if ( $rOpts->{'tee-block-comments'} ) {
12054 $file_writer_object->tee_off();
12059 # TODO: Move to sub scan_comments
12060 # compare input/output indentation except for continuation lines
12061 # (because they have an unknown amount of initial blank space)
12062 # and lines which are quotes (because they may have been outdented)
12063 # Note: this test is placed here because we know the continuation flag
12064 # at this point, which allows us to avoid non-meaningful checks.
12065 my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_];
12066 compare_indentation_levels( $guessed_indentation_level,
12067 $structural_indentation_level )
12068 unless ( $is_hanging_side_comment
12069 || $rinput_token_array->[0]->[_CI_LEVEL_] > 0
12070 || $guessed_indentation_level == 0
12071 && $rinput_token_array->[0]->[_TYPE_] eq 'Q' );
12073 ##########################
12074 # Handle indentation-only
12075 ##########################
12077 # NOTE: In previous versions we sent all qw lines out immediately here.
12078 # No longer doing this: also write a line which is entirely a 'qw' list
12079 # to allow stacking of opening and closing tokens. Note that interior
12080 # qw lines will still go out at the end of this routine.
12081 ##if ( $rOpts->{'indent-only'} ) {
12082 if ( $CODE_type eq 'IO' ) {
12084 my $line = $input_line;
12086 # delete side comments if requested with -io, but
12087 # we will not allow deleting of closing side comments with -io
12088 # because the coding would be more complex
12089 if ( $rOpts->{'delete-side-comments'}
12090 && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' )
12094 foreach my $jj ( 0 .. $jmax - 1 ) {
12095 $line .= $rinput_token_array->[$jj]->[_TOKEN_];
12098 $line = trim($line);
12100 $self->extract_token(0);
12104 $container_type = "";
12105 $container_environment = "";
12106 $type_sequence = "";
12107 $self->store_token_to_go();
12108 $self->output_line_to_go();
12112 ############################
12113 # Handle all other lines ...
12114 ############################
12116 #######################################################
12117 # FIXME: this should become unnecessary
12118 # making $j+2 valid simplifies coding
12120 copy_token_as_type( $rinput_token_array->[$jmax], 'b' );
12121 push @{$rinput_token_array}, $rnew_blank;
12122 push @{$rinput_token_array}, $rnew_blank;
12123 #######################################################
12125 # If we just saw the end of an elsif block, write nag message
12126 # if we do not see another elseif or an else.
12127 if ($looking_for_else) {
12129 unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
12130 write_logfile_entry("(No else block)\n");
12132 $looking_for_else = 0;
12135 # This is a good place to kill incomplete one-line blocks
12138 ( $semicolons_before_block_self_destruct == 0 )
12139 && ( $max_index_to_go >= 0 )
12140 && ( $last_old_nonblank_type eq ';' )
12141 && ( $first_new_nonblank_token ne '}' )
12144 # Patch for RT #98902. Honor request to break at old commas.
12145 || ( $rOpts_break_at_old_comma_breakpoints
12146 && $max_index_to_go >= 0
12147 && $last_old_nonblank_type eq ',' )
12150 $forced_breakpoint_to_go[$max_index_to_go] = 1
12151 if ($rOpts_break_at_old_comma_breakpoints);
12152 destroy_one_line_block();
12153 $self->output_line_to_go();
12156 # loop to process the tokens one-by-one
12160 # We do not want a leading blank if the previous batch just got output
12162 if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
12166 foreach my $j ( $jmin .. $jmax ) {
12168 # pull out the local values for this token
12169 $self->extract_token($j);
12171 if ( $type eq '#' ) {
12173 # trim trailing whitespace
12174 # (there is no option at present to prevent this)
12175 $token =~ s/\s*$//;
12178 $rOpts->{'delete-side-comments'}
12180 # delete closing side comments if necessary
12181 || ( $rOpts->{'delete-closing-side-comments'}
12182 && $token =~ /$closing_side_comment_prefix_pattern/o
12183 && $last_nonblank_block_type =~
12184 /$closing_side_comment_list_pattern/o )
12187 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
12188 unstore_token_to_go();
12194 # If we are continuing after seeing a right curly brace, flush
12195 # buffer unless we see what we are looking for, as in
12197 if ( $rbrace_follower && $type ne 'b' ) {
12199 unless ( $rbrace_follower->{$token} ) {
12200 $self->output_line_to_go();
12202 $rbrace_follower = undef;
12206 ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' )
12209 $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_];
12210 $next_nonblank_token_type =
12211 $rinput_token_array->[$j_next]->[_TYPE_];
12213 ######################
12214 # MAYBE MOVE ELSEWHERE?
12215 ######################
12216 if ( $type eq 'Q' ) {
12217 note_embedded_tab() if ( $token =~ "\t" );
12219 # make note of something like '$var = s/xxx/yyy/;'
12220 # in case it should have been '$var =~ s/xxx/yyy/;'
12222 $token =~ /^(s|tr|y|m|\/)/
12223 && $last_nonblank_token =~ /^(=|==|!=)$/
12225 # preceded by simple scalar
12226 && $last_last_nonblank_type eq 'i'
12227 && $last_last_nonblank_token =~ /^\$/
12229 # followed by some kind of termination
12230 # (but give complaint if we can's see far enough ahead)
12231 && $next_nonblank_token =~ /^[; \)\}]$/
12233 # scalar is not declared
12235 $types_to_go[0] eq 'k'
12236 && $tokens_to_go[0] =~ /^(my|our|local)$/
12240 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
12242 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
12247 # Do not allow breaks which would promote a side comment to a
12248 # block comment. In order to allow a break before an opening
12249 # or closing BLOCK, followed by a side comment, those sections
12250 # of code will handle this flag separately.
12251 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
12252 my $is_opening_BLOCK =
12256 && $block_type ne 't' );
12257 my $is_closing_BLOCK =
12261 && $block_type ne 't' );
12263 if ( $side_comment_follows
12264 && !$is_opening_BLOCK
12265 && !$is_closing_BLOCK )
12267 $no_internal_newlines = 1;
12270 # We're only going to handle breaking for code BLOCKS at this
12271 # (top) level. Other indentation breaks will be handled by
12272 # sub scan_list, which is better suited to dealing with them.
12273 if ($is_opening_BLOCK) {
12275 # Tentatively output this token. This is required before
12276 # calling starting_one_line_block. We may have to unstore
12277 # it, though, if we have to break before it.
12278 $self->store_token_to_go($side_comment_follows);
12280 # Look ahead to see if we might form a one-line block..
12283 # But obey any flag set for cuddled blocks
12284 if ( $rbreak_container->{$type_sequence} ) {
12285 destroy_one_line_block();
12289 starting_one_line_block( $j, $jmax, $level, $slevel,
12290 $ci_level, $rinput_token_array );
12292 clear_breakpoint_undo_stack();
12294 # to simplify the logic below, set a flag to indicate if
12295 # this opening brace is far from the keyword which introduces it
12296 my $keyword_on_same_line = 1;
12297 if ( ( $max_index_to_go >= 0 )
12298 && ( $last_nonblank_type eq ')' ) )
12300 if ( $block_type =~ /^(if|else|elsif)$/
12301 && ( $tokens_to_go[0] eq '}' )
12302 && $rOpts_cuddled_else )
12304 $keyword_on_same_line = 1;
12306 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
12308 $keyword_on_same_line = 0;
12312 # decide if user requested break before '{'
12315 # use -bl flag if not a sub block of any type
12316 $block_type !~ /^sub\b/
12317 ? $rOpts->{'opening-brace-on-new-line'}
12319 # use -sbl flag for a named sub block
12320 : $block_type !~ /$ASUB_PATTERN/
12321 ? $rOpts->{'opening-sub-brace-on-new-line'}
12323 # use -asbl flag for an anonymous sub block
12324 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
12326 # Do not break if this token is welded to the left
12327 if ( weld_len_left( $type_sequence, $token ) ) {
12331 # Break before an opening '{' ...
12337 # and we were unable to start looking for a block,
12338 && $index_start_one_line_block == UNDEFINED_INDEX
12340 # or if it will not be on same line as its keyword, so that
12341 # it will be outdented (eval.t, overload.t), and the user
12342 # has not insisted on keeping it on the right
12343 || ( !$keyword_on_same_line
12344 && !$rOpts->{'opening-brace-always-on-right'} )
12349 # but only if allowed
12350 unless ($no_internal_newlines) {
12352 # since we already stored this token, we must unstore it
12353 $self->unstore_token_to_go();
12355 # then output the line
12356 $self->output_line_to_go();
12358 # and now store this token at the start of a new line
12359 $self->store_token_to_go($side_comment_follows);
12363 # Now update for side comment
12364 if ($side_comment_follows) { $no_internal_newlines = 1 }
12366 # now output this line
12367 unless ($no_internal_newlines) {
12368 $self->output_line_to_go();
12372 elsif ($is_closing_BLOCK) {
12374 # If there is a pending one-line block ..
12375 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
12377 # we have to terminate it if..
12380 # it is too long (final length may be different from
12381 # initial estimate). note: must allow 1 space for this
12383 excess_line_length( $index_start_one_line_block,
12384 $max_index_to_go ) >= 0
12386 # or if it has too many semicolons
12387 || ( $semicolons_before_block_self_destruct == 0
12388 && $last_nonblank_type ne ';' )
12391 destroy_one_line_block();
12395 # put a break before this closing curly brace if appropriate
12396 unless ( $no_internal_newlines
12397 || $index_start_one_line_block != UNDEFINED_INDEX )
12400 # write out everything before this closing curly brace
12401 $self->output_line_to_go();
12404 # Now update for side comment
12405 if ($side_comment_follows) { $no_internal_newlines = 1 }
12407 # store the closing curly brace
12408 $self->store_token_to_go();
12410 # ok, we just stored a closing curly brace. Often, but
12411 # not always, we want to end the line immediately.
12412 # So now we have to check for special cases.
12414 # if this '}' successfully ends a one-line block..
12415 my $is_one_line_block = 0;
12416 my $keep_going = 0;
12417 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
12419 # Remember the type of token just before the
12420 # opening brace. It would be more general to use
12421 # a stack, but this will work for one-line blocks.
12422 $is_one_line_block =
12423 $types_to_go[$index_start_one_line_block];
12425 # we have to actually make it by removing tentative
12426 # breaks that were set within it
12427 undo_forced_breakpoint_stack(0);
12428 set_nobreaks( $index_start_one_line_block,
12429 $max_index_to_go - 1 );
12431 # then re-initialize for the next one-line block
12432 destroy_one_line_block();
12434 # then decide if we want to break after the '}' ..
12435 # We will keep going to allow certain brace followers as in:
12436 # do { $ifclosed = 1; last } unless $losing;
12438 # But make a line break if the curly ends a
12439 # significant block:
12442 $is_block_without_semicolon{$block_type}
12444 # Follow users break point for
12445 # one line block types U & G, such as a 'try' block
12446 || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
12449 # if needless semicolon follows we handle it later
12450 && $next_nonblank_token ne ';'
12453 $self->output_line_to_go()
12454 unless ($no_internal_newlines);
12458 # set string indicating what we need to look for brace follower
12460 if ( $block_type eq 'do' ) {
12461 $rbrace_follower = \%is_do_follower;
12463 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
12464 $rbrace_follower = \%is_if_brace_follower;
12466 elsif ( $block_type eq 'else' ) {
12467 $rbrace_follower = \%is_else_brace_follower;
12470 # added eval for borris.t
12471 elsif ($is_sort_map_grep_eval{$block_type}
12472 || $is_one_line_block eq 'G' )
12474 $rbrace_follower = undef;
12479 elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
12481 if ($is_one_line_block) {
12482 $rbrace_follower = \%is_anon_sub_1_brace_follower;
12485 $rbrace_follower = \%is_anon_sub_brace_follower;
12489 # None of the above: specify what can follow a closing
12490 # brace of a block which is not an
12491 # if/elsif/else/do/sort/map/grep/eval
12493 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
12495 $rbrace_follower = \%is_other_brace_follower;
12498 # See if an elsif block is followed by another elsif or else;
12500 if ( $block_type eq 'elsif' ) {
12502 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
12503 $looking_for_else = 1; # ok, check on next line
12507 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
12508 write_logfile_entry("No else block :(\n");
12513 # keep going after certain block types (map,sort,grep,eval)
12514 # added eval for borris.t
12520 # if no more tokens, postpone decision until re-entring
12521 elsif ( ( $next_nonblank_token_type eq 'b' )
12522 && $rOpts_add_newlines )
12524 unless ($rbrace_follower) {
12525 $self->output_line_to_go()
12526 unless ($no_internal_newlines);
12530 elsif ($rbrace_follower) {
12532 unless ( $rbrace_follower->{$next_nonblank_token} ) {
12533 $self->output_line_to_go()
12534 unless ($no_internal_newlines);
12536 $rbrace_follower = undef;
12540 $self->output_line_to_go() unless ($no_internal_newlines);
12543 } # end treatment of closing block token
12546 elsif ( $type eq ';' ) {
12548 # kill one-line blocks with too many semicolons
12549 $semicolons_before_block_self_destruct--;
12551 ( $semicolons_before_block_self_destruct < 0 )
12552 || ( $semicolons_before_block_self_destruct == 0
12553 && $next_nonblank_token_type !~ /^[b\}]$/ )
12556 destroy_one_line_block();
12559 # Remove unnecessary semicolons, but not after bare
12560 # blocks, where it could be unsafe if the brace is
12564 $last_nonblank_token eq '}'
12566 $is_block_without_semicolon{
12567 $last_nonblank_block_type}
12568 || $last_nonblank_block_type =~ /$SUB_PATTERN/
12569 || $last_nonblank_block_type =~ /^\w+:$/ )
12571 || $last_nonblank_type eq ';'
12576 $rOpts->{'delete-semicolons'}
12578 # don't delete ; before a # because it would promote it
12579 # to a block comment
12580 && ( $next_nonblank_token_type ne '#' )
12583 note_deleted_semicolon();
12584 $self->output_line_to_go()
12585 unless ( $no_internal_newlines
12586 || $index_start_one_line_block != UNDEFINED_INDEX );
12590 write_logfile_entry("Extra ';'\n");
12593 $self->store_token_to_go();
12595 $self->output_line_to_go()
12596 unless ( $no_internal_newlines
12597 || ( $rOpts_keep_interior_semicolons && $j < $jmax )
12598 || ( $next_nonblank_token eq '}' ) );
12602 # handle here_doc target string
12603 elsif ( $type eq 'h' ) {
12605 # no newlines after seeing here-target
12606 $no_internal_newlines = 1;
12607 destroy_one_line_block();
12608 $self->store_token_to_go();
12611 # handle all other token types
12614 $self->store_token_to_go();
12617 # remember two previous nonblank OUTPUT tokens
12618 if ( $type ne '#' && $type ne 'b' ) {
12619 $last_last_nonblank_token = $last_nonblank_token;
12620 $last_last_nonblank_type = $last_nonblank_type;
12621 $last_nonblank_token = $token;
12622 $last_nonblank_type = $type;
12623 $last_nonblank_block_type = $block_type;
12626 # unset the continued-quote flag since it only applies to the
12627 # first token, and we want to resume normal formatting if
12628 # there are additional tokens on the line
12629 $in_continued_quote = 0;
12631 } # end of loop over all tokens in this 'line_of_tokens'
12633 # we have to flush ..
12636 # if there is a side comment
12637 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
12639 # if this line ends in a quote
12640 # NOTE: This is critically important for insuring that quoted lines
12641 # do not get processed by things like -sot and -sct
12644 # if this is a VERSION statement
12645 || $is_VERSION_statement
12647 # to keep a label at the end of a line
12650 # if we are instructed to keep all old line breaks
12651 || !$rOpts->{'delete-old-newlines'}
12654 destroy_one_line_block();
12655 $self->output_line_to_go();
12658 # mark old line breakpoints in current output stream
12659 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
12660 my $jobp = $max_index_to_go;
12661 if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
12665 $old_breakpoint_to_go[$jobp] = 1;
12668 } ## end sub print_line_of_tokens
12669 } ## end block print_line_of_tokens
12671 # sub output_line_to_go sends one logical line of tokens on down the
12672 # pipeline to the VerticalAligner package, breaking the line into continuation
12673 # lines as necessary. The line of tokens is ready to go in the "to_go"
12675 sub output_line_to_go {
12679 # debug stuff; this routine can be called from many points
12680 FORMATTER_DEBUG_FLAG_OUTPUT && do {
12681 my ( $a, $b, $c ) = caller;
12683 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
12685 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
12686 write_diagnostics("$output_str\n");
12689 # Do not end line in a weld
12690 # TODO: Move this fix into the routine?
12691 #my $jnb = $max_index_to_go;
12692 #if ( $jnb > 0 && $types_to_go[$jnb] eq 'b' ) { $jnb-- }
12693 return if ( weld_len_right_to_go($max_index_to_go) );
12695 # just set a tentative breakpoint if we might be in a one-line block
12696 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
12697 set_forced_breakpoint($max_index_to_go);
12701 my $cscw_block_comment;
12702 $cscw_block_comment = $self->add_closing_side_comment()
12703 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
12705 my $comma_arrow_count_contained = match_opening_and_closing_tokens();
12707 # tell the -lp option we are outputting a batch so it can close
12708 # any unfinished items in its stack
12711 # If this line ends in a code block brace, set breaks at any
12712 # previous closing code block braces to breakup a chain of code
12713 # blocks on one line. This is very rare but can happen for
12714 # user-defined subs. For example we might be looking at this:
12715 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
12716 my $saw_good_break = 0; # flag to force breaks even if short line
12719 # looking for opening or closing block brace
12720 $block_type_to_go[$max_index_to_go]
12722 # but not one of these which are never duplicated on a line:
12723 # until|while|for|if|elsif|else
12724 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
12727 my $lev = $nesting_depth_to_go[$max_index_to_go];
12729 # Walk backwards from the end and
12730 # set break at any closing block braces at the same level.
12731 # But quit if we are not in a chain of blocks.
12732 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
12733 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
12734 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
12736 if ( $block_type_to_go[$i] ) {
12737 if ( $tokens_to_go[$i] eq '}' ) {
12738 set_forced_breakpoint($i);
12739 $saw_good_break = 1;
12743 # quit if we see anything besides words, function, blanks
12745 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
12750 my $imax = $max_index_to_go;
12752 # trim any blank tokens
12753 if ( $max_index_to_go >= 0 ) {
12754 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
12755 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
12758 # anything left to write?
12759 if ( $imin <= $imax ) {
12761 # add a blank line before certain key types but not after a comment
12762 if ( $last_line_leading_type !~ /^[#]/ ) {
12763 my $want_blank = 0;
12764 my $leading_token = $tokens_to_go[$imin];
12765 my $leading_type = $types_to_go[$imin];
12767 # blank lines before subs except declarations and one-liners
12768 # MCONVERSION LOCATION - for sub tokenization change
12769 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
12770 $want_blank = $rOpts->{'blank-lines-before-subs'}
12772 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
12773 $imax ) !~ /^[\;\}]$/
12777 # break before all package declarations
12778 # MCONVERSION LOCATION - for tokenizaton change
12779 elsif ($leading_token =~ /^(package\s)/
12780 && $leading_type eq 'i' )
12782 $want_blank = $rOpts->{'blank-lines-before-packages'};
12785 # break before certain key blocks except one-liners
12786 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
12787 $want_blank = $rOpts->{'blank-lines-before-subs'}
12789 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
12794 # Break before certain block types if we haven't had a
12795 # break at this level for a while. This is the
12796 # difficult decision..
12797 elsif ($leading_type eq 'k'
12798 && $last_line_leading_type ne 'b'
12799 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
12801 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
12802 if ( !defined($lc) ) { $lc = 0 }
12805 $rOpts->{'blanks-before-blocks'}
12806 && $lc >= $rOpts->{'long-block-line-count'}
12807 && $file_writer_object->get_consecutive_nonblank_lines() >=
12808 $rOpts->{'long-block-line-count'}
12810 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
12815 # Check for blank lines wanted before a closing brace
12816 if ( $leading_token eq '}' ) {
12817 if ( $rOpts->{'blank-lines-before-closing-block'}
12818 && $block_type_to_go[$imin]
12819 && $block_type_to_go[$imin] =~
12820 /$blank_lines_before_closing_block_pattern/ )
12822 my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
12823 if ( $nblanks > $want_blank ) {
12824 $want_blank = $nblanks;
12831 # future: send blank line down normal path to VerticalAligner
12832 Perl::Tidy::VerticalAligner::flush();
12833 $file_writer_object->require_blank_code_lines($want_blank);
12837 # update blank line variables and count number of consecutive
12838 # non-blank, non-comment lines at this level
12839 $last_last_line_leading_level = $last_line_leading_level;
12840 $last_line_leading_level = $levels_to_go[$imin];
12841 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
12842 $last_line_leading_type = $types_to_go[$imin];
12843 if ( $last_line_leading_level == $last_last_line_leading_level
12844 && $last_line_leading_type ne 'b'
12845 && $last_line_leading_type ne '#'
12846 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
12848 $nonblank_lines_at_depth[$last_line_leading_level]++;
12851 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
12854 FORMATTER_DEBUG_FLAG_FLUSH && do {
12855 my ( $package, $file, $line ) = caller;
12857 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
12860 # add a couple of extra terminal blank tokens
12863 # set all forced breakpoints for good list formatting
12864 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
12866 my $old_line_count_in_batch =
12867 $rtoken_vars_to_go[$max_index_to_go]->[_LINE_INDEX_] -
12868 $rtoken_vars_to_go[0]->[_LINE_INDEX_] + 1;
12872 || $old_line_count_in_batch > 1
12874 # must always call scan_list() with unbalanced batches because it
12875 # is maintaining some stacks
12876 || is_unbalanced_batch()
12878 # call scan_list if we might want to break at commas
12880 $comma_count_in_batch
12881 && ( $rOpts_maximum_fields_per_table > 0
12882 || $rOpts_comma_arrow_breakpoints == 0 )
12885 # call scan_list if user may want to break open some one-line
12887 || ( $comma_arrow_count_contained
12888 && $rOpts_comma_arrow_breakpoints != 3 )
12891 ## This caused problems in one version of perl for unknown reasons:
12892 ## $saw_good_break ||= scan_list();
12893 my $sgb = scan_list();
12894 $saw_good_break ||= $sgb;
12897 # let $ri_first and $ri_last be references to lists of
12898 # first and last tokens of line fragments to output..
12899 my ( $ri_first, $ri_last );
12901 # write a single line if..
12904 # we aren't allowed to add any newlines
12905 !$rOpts_add_newlines
12907 # or, we don't already have an interior breakpoint
12908 # and we didn't see a good breakpoint
12910 !$forced_breakpoint_count
12911 && !$saw_good_break
12913 # and this line is 'short'
12918 @{$ri_first} = ($imin);
12919 @{$ri_last} = ($imax);
12922 # otherwise use multiple lines
12925 ( $ri_first, $ri_last, my $colon_count ) =
12926 set_continuation_breaks($saw_good_break);
12928 break_all_chain_tokens( $ri_first, $ri_last );
12930 break_equals( $ri_first, $ri_last );
12932 # now we do a correction step to clean this up a bit
12933 # (The only time we would not do this is for debugging)
12934 if ( $rOpts->{'recombine'} ) {
12935 ( $ri_first, $ri_last ) =
12936 recombine_breakpoints( $ri_first, $ri_last );
12939 insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
12942 # do corrector step if -lp option is used
12943 my $do_not_pad = 0;
12944 if ($rOpts_line_up_parentheses) {
12945 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
12947 $self->unmask_phantom_semicolons( $ri_first, $ri_last );
12948 $self->send_lines_to_vertical_aligner( $ri_first, $ri_last,
12951 # Insert any requested blank lines after an opening brace. We have to
12952 # skip back before any side comment to find the terminal token
12954 for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
12955 next if $types_to_go[$iterm] eq '#';
12956 next if $types_to_go[$iterm] eq 'b';
12960 # write requested number of blank lines after an opening block brace
12961 if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
12962 if ( $rOpts->{'blank-lines-after-opening-block'}
12963 && $block_type_to_go[$iterm]
12964 && $block_type_to_go[$iterm] =~
12965 /$blank_lines_after_opening_block_pattern/ )
12967 my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
12968 Perl::Tidy::VerticalAligner::flush();
12969 $file_writer_object->require_blank_code_lines($nblanks);
12974 prepare_for_new_input_lines();
12976 # output any new -cscw block comment
12977 if ($cscw_block_comment) {
12979 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
12984 sub note_added_semicolon {
12985 my ($line_number) = @_;
12986 $last_added_semicolon_at = $line_number;
12987 if ( $added_semicolon_count == 0 ) {
12988 $first_added_semicolon_at = $last_added_semicolon_at;
12990 $added_semicolon_count++;
12991 write_logfile_entry("Added ';' here\n");
12995 sub note_deleted_semicolon {
12996 $last_deleted_semicolon_at = $input_line_number;
12997 if ( $deleted_semicolon_count == 0 ) {
12998 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
13000 $deleted_semicolon_count++;
13001 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
13005 sub note_embedded_tab {
13006 $embedded_tab_count++;
13007 $last_embedded_tab_at = $input_line_number;
13008 if ( !$first_embedded_tab_at ) {
13009 $first_embedded_tab_at = $last_embedded_tab_at;
13012 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
13013 write_logfile_entry("Embedded tabs in quote or pattern\n");
13018 sub starting_one_line_block {
13020 # after seeing an opening curly brace, look for the closing brace
13021 # and see if the entire block will fit on a line. This routine is
13022 # not always right because it uses the old whitespace, so a check
13023 # is made later (at the closing brace) to make sure we really
13024 # have a one-line block. We have to do this preliminary check,
13025 # though, because otherwise we would always break at a semicolon
13026 # within a one-line block if the block contains multiple statements.
13028 my ( $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_;
13030 my $jmax_check = @{$rtoken_array};
13031 if ( $jmax_check < $jmax ) {
13032 print STDERR "jmax=$jmax > $jmax_check\n";
13035 # kill any current block - we can only go 1 deep
13036 destroy_one_line_block();
13039 # 1=distance from start of block to opening brace exceeds line length
13044 # shouldn't happen: there must have been a prior call to
13045 # store_token_to_go to put the opening brace in the output stream
13046 if ( $max_index_to_go < 0 ) {
13047 warning("program bug: store_token_to_go called incorrectly\n");
13048 report_definite_bug();
13052 # cannot use one-line blocks with cuddled else/elsif lines
13053 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
13058 my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_];
13060 # find the starting keyword for this block (such as 'if', 'else', ...)
13062 if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
13063 $i_start = $max_index_to_go;
13066 # the previous nonblank token should start these block types
13067 elsif (( $last_last_nonblank_token_to_go eq $block_type )
13068 || ( $block_type =~ /^sub\b/ )
13069 || $block_type =~ /\(\)/ )
13071 $i_start = $last_last_nonblank_index_to_go;
13073 # For signatures and extended syntax ...
13074 # If this brace follows a parenthesized list, we should look back to
13075 # find the keyword before the opening paren because otherwise we might
13076 # form a one line block which stays intack, and cause the parenthesized
13077 # expression to break open. That looks bad. However, actually
13078 # searching for the opening paren is slow and tedius.
13079 # The actual keyword is often at the start of a line, but might not be.
13080 # For example, we might have an anonymous sub with signature list
13081 # following a =>. It is safe to mark the start anywhere before the
13082 # opening paren, so we just go back to the prevoious break (or start of
13083 # the line) if that is before the opening paren. The minor downside is
13084 # that we may very occasionally break open a block unnecessarily.
13085 if ( $tokens_to_go[$i_start] eq ')' ) {
13086 $i_start = $index_max_forced_break + 1;
13087 if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
13088 my $lev = $levels_to_go[$i_start];
13089 if ( $lev > $level ) { return 0 }
13093 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
13095 # For something like "if (xxx) {", the keyword "if" will be
13096 # just after the most recent break. This will be 0 unless
13097 # we have just killed a one-line block and are starting another.
13099 # Note: cannot use inext_index_to_go[] here because that array
13100 # is still being constructed.
13101 $i_start = $index_max_forced_break + 1;
13102 if ( $types_to_go[$i_start] eq 'b' ) {
13106 # Patch to avoid breaking short blocks defined with extended_syntax:
13107 # Strip off any trailing () which was added in the parser to mark
13108 # the opening keyword. For example, in the following
13109 # create( TypeFoo $e) {$bubba}
13110 # the blocktype would be marked as create()
13111 my $stripped_block_type = $block_type;
13112 $stripped_block_type =~ s/\(\)$//;
13114 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
13119 # patch for SWITCH/CASE to retain one-line case/when blocks
13120 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
13122 # Note: cannot use inext_index_to_go[] here because that array
13123 # is still being constructed.
13124 $i_start = $index_max_forced_break + 1;
13125 if ( $types_to_go[$i_start] eq 'b' ) {
13128 unless ( $tokens_to_go[$i_start] eq $block_type ) {
13137 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
13139 # see if length is too long to even start
13140 if ( $pos > maximum_line_length($i_start) ) {
13144 foreach my $i ( $j + 1 .. $jmax ) {
13146 # old whitespace could be arbitrarily large, so don't use it
13147 if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 }
13148 else { $pos += rtoken_length($i) }
13150 # Return false result if we exceed the maximum line length,
13151 if ( $pos > maximum_line_length($i_start) ) {
13155 # or encounter another opening brace before finding the closing brace.
13156 elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{'
13157 && $rtoken_array->[$i]->[_TYPE_] eq '{'
13158 && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
13163 # if we find our closing brace..
13164 elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}'
13165 && $rtoken_array->[$i]->[_TYPE_] eq '}'
13166 && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
13169 # be sure any trailing comment also fits on the line
13171 ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1;
13173 # Patch for one-line sort/map/grep/eval blocks with side comments:
13174 # We will ignore the side comment length for sort/map/grep/eval
13175 # because this can lead to statements which change every time
13176 # perltidy is run. Here is an example from Denis Moskowitz which
13177 # oscillates between these two states without this patch:
13180 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13184 ## $_->foo ne 'bar'
13185 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13189 # When the first line is input it gets broken apart by the main
13190 # line break logic in sub print_line_of_tokens.
13191 # When the second line is input it gets recombined by
13192 # print_line_of_tokens and passed to the output routines. The
13193 # output routines (set_continuation_breaks) do not break it apart
13194 # because the bond strengths are set to the highest possible value
13195 # for grep/map/eval/sort blocks, so the first version gets output.
13196 # It would be possible to fix this by changing bond strengths,
13197 # but they are high to prevent errors in older versions of perl.
13199 if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#'
13200 && !$is_sort_map_grep{$block_type} )
13203 $pos += rtoken_length($i_nonblank);
13205 if ( $i_nonblank > $i + 1 ) {
13207 # source whitespace could be anything, assume
13208 # at least one space before the hash on output
13209 if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) {
13212 else { $pos += rtoken_length( $i + 1 ) }
13215 if ( $pos >= maximum_line_length($i_start) ) {
13220 # ok, it's a one-line block
13221 create_one_line_block( $i_start, 20 );
13225 # just keep going for other characters
13230 # Allow certain types of new one-line blocks to form by joining
13231 # input lines. These can be safely done, but for other block types,
13232 # we keep old one-line blocks but do not form new ones. It is not
13233 # always a good idea to make as many one-line blocks as possible,
13234 # so other types are not done. The user can always use -mangle.
13235 if ( $is_sort_map_grep_eval{$block_type} ) {
13236 create_one_line_block( $i_start, 1 );
13241 sub unstore_token_to_go {
13243 # remove most recent token from output stream
13245 if ( $max_index_to_go > 0 ) {
13246 $max_index_to_go--;
13249 $max_index_to_go = UNDEFINED_INDEX;
13254 sub want_blank_line {
13257 $file_writer_object->want_blank_line();
13261 sub write_unindented_line {
13262 my ( $self, $line ) = @_;
13264 $file_writer_object->write_line($line);
13270 # Undo continuation indentation in certain sequences
13271 # For example, we can undo continuation indentation in sort/map/grep chains
13272 # my $dat1 = pack( "n*",
13273 # map { $_, $lookup->{$_} }
13274 # sort { $a <=> $b }
13275 # grep { $lookup->{$_} ne $default } keys %$lookup );
13276 # To align the map/sort/grep keywords like this:
13277 # my $dat1 = pack( "n*",
13278 # map { $_, $lookup->{$_} }
13279 # sort { $a <=> $b }
13280 # grep { $lookup->{$_} ne $default } keys %$lookup );
13281 my ( $ri_first, $ri_last ) = @_;
13282 my ( $line_1, $line_2, $lev_last );
13283 my $this_line_is_semicolon_terminated;
13284 my $max_line = @{$ri_first} - 1;
13286 # looking at each line of this batch..
13287 # We are looking at leading tokens and looking for a sequence
13288 # all at the same level and higher level than enclosing lines.
13289 foreach my $line ( 0 .. $max_line ) {
13291 my $ibeg = $ri_first->[$line];
13292 my $lev = $levels_to_go[$ibeg];
13295 # if we have started a chain..
13298 # see if it continues..
13299 if ( $lev == $lev_last ) {
13300 if ( $types_to_go[$ibeg] eq 'k'
13301 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
13304 # chain continues...
13305 # check for chain ending at end of a statement
13306 if ( $line == $max_line ) {
13308 # see of this line ends a statement
13309 my $iend = $ri_last->[$line];
13310 $this_line_is_semicolon_terminated =
13311 $types_to_go[$iend] eq ';'
13313 # with possible side comment
13314 || ( $types_to_go[$iend] eq '#'
13315 && $iend - $ibeg >= 2
13316 && $types_to_go[ $iend - 2 ] eq ';'
13317 && $types_to_go[ $iend - 1 ] eq 'b' );
13319 $line_2 = $line if ($this_line_is_semicolon_terminated);
13327 elsif ( $lev < $lev_last ) {
13329 # chain ends with previous line
13330 $line_2 = $line - 1;
13332 elsif ( $lev > $lev_last ) {
13338 # undo the continuation indentation if a chain ends
13339 if ( defined($line_2) && defined($line_1) ) {
13340 my $continuation_line_count = $line_2 - $line_1 + 1;
13341 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] =
13342 (0) x ($continuation_line_count)
13343 if ( $continuation_line_count >= 0 );
13344 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
13345 = @reduced_spaces_to_go[ @{$ri_first}
13346 [ $line_1 .. $line_2 ] ];
13351 # not in a chain yet..
13354 # look for start of a new sort/map/grep chain
13355 if ( $lev > $lev_last ) {
13356 if ( $types_to_go[$ibeg] eq 'k'
13357 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
13371 # If there is a single, long parameter within parens, like this:
13373 # $self->command( "/msg "
13374 # . $infoline->chan
13375 # . " You said $1, but did you know that it's square was "
13376 # . $1 * $1 . " ?" );
13378 # we can remove the continuation indentation of the 2nd and higher lines
13379 # to achieve this effect, which is more pleasing:
13381 # $self->command("/msg "
13382 # . $infoline->chan
13383 # . " You said $1, but did you know that it's square was "
13384 # . $1 * $1 . " ?");
13386 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
13387 my $max_line = @{$ri_first} - 1;
13389 # must be multiple lines
13390 return unless $max_line > $line_open;
13392 my $lev_start = $levels_to_go[$i_start];
13393 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
13395 # see if all additional lines in this container have continuation
13398 my $line_1 = 1 + $line_open;
13399 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
13400 my $ibeg = $ri_first->[$n];
13401 my $iend = $ri_last->[$n];
13402 if ( $ibeg eq $closing_index ) { $n--; last }
13403 return if ( $lev_start != $levels_to_go[$ibeg] );
13404 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
13405 last if ( $closing_index <= $iend );
13408 # we can reduce the indentation of all continuation lines
13409 my $continuation_line_count = $n - $line_open;
13410 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
13411 (0) x ($continuation_line_count);
13412 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
13413 @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
13419 # insert $pad_spaces before token number $ipad
13420 my ( $ipad, $pad_spaces ) = @_;
13421 if ( $pad_spaces > 0 ) {
13422 $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
13424 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
13425 $tokens_to_go[$ipad] = "";
13433 $token_lengths_to_go[$ipad] += $pad_spaces;
13434 foreach my $i ( $ipad .. $max_index_to_go ) {
13435 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
13445 my @q = qw( + - * / );
13446 @is_math_op{@q} = (1) x scalar(@q);
13449 sub set_logical_padding {
13451 # Look at a batch of lines and see if extra padding can improve the
13452 # alignment when there are certain leading operators. Here is an
13453 # example, in which some extra space is introduced before
13454 # '( $year' to make it line up with the subsequent lines:
13456 # if ( ( $Year < 1601 )
13457 # || ( $Year > 2899 )
13458 # || ( $EndYear < 1601 )
13459 # || ( $EndYear > 2899 ) )
13461 # &Error_OutOfRange;
13464 my ( $ri_first, $ri_last ) = @_;
13465 my $max_line = @{$ri_first} - 1;
13467 # FIXME: move these declarations below
13468 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
13469 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
13471 # looking at each line of this batch..
13472 foreach my $line ( 0 .. $max_line - 1 ) {
13474 # see if the next line begins with a logical operator
13475 $ibeg = $ri_first->[$line];
13476 $iend = $ri_last->[$line];
13477 $ibeg_next = $ri_first->[ $line + 1 ];
13478 $tok_next = $tokens_to_go[$ibeg_next];
13479 $type_next = $types_to_go[$ibeg_next];
13481 $has_leading_op_next = ( $tok_next =~ /^\w/ )
13482 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
13483 : $is_chain_operator{$type_next}; # and, or
13485 next unless ($has_leading_op_next);
13487 # next line must not be at lesser depth
13489 if ( $nesting_depth_to_go[$ibeg] >
13490 $nesting_depth_to_go[$ibeg_next] );
13492 # identify the token in this line to be padded on the left
13495 # handle lines at same depth...
13496 if ( $nesting_depth_to_go[$ibeg] ==
13497 $nesting_depth_to_go[$ibeg_next] )
13500 # if this is not first line of the batch ...
13503 # and we have leading operator..
13504 next if $has_leading_op;
13506 # Introduce padding if..
13507 # 1. the previous line is at lesser depth, or
13508 # 2. the previous line ends in an assignment
13509 # 3. the previous line ends in a 'return'
13510 # 4. the previous line ends in a comma
13511 # Example 1: previous line at lesser depth
13512 # if ( ( $Year < 1601 ) # <- we are here but
13513 # || ( $Year > 2899 ) # list has not yet
13514 # || ( $EndYear < 1601 ) # collapsed vertically
13515 # || ( $EndYear > 2899 ) )
13518 # Example 2: previous line ending in assignment:
13520 # $year % 4 ? 0 # <- We are here
13521 # : $year % 100 ? 1
13522 # : $year % 400 ? 0
13525 # Example 3: previous line ending in comma:
13532 # be sure levels agree (do not indent after an indented 'if')
13534 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
13536 # allow padding on first line after a comma but only if:
13537 # (1) this is line 2 and
13538 # (2) there are at more than three lines and
13539 # (3) lines 3 and 4 have the same leading operator
13540 # These rules try to prevent padding within a long
13541 # comma-separated list.
13543 if ( $types_to_go[$iendm] eq ','
13547 my $ibeg_next_next = $ri_first->[ $line + 2 ];
13548 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
13549 $ok_comma = $tok_next_next eq $tok_next;
13554 $is_assignment{ $types_to_go[$iendm] }
13556 || ( $nesting_depth_to_go[$ibegm] <
13557 $nesting_depth_to_go[$ibeg] )
13558 || ( $types_to_go[$iendm] eq 'k'
13559 && $tokens_to_go[$iendm] eq 'return' )
13562 # we will add padding before the first token
13566 # for first line of the batch..
13569 # WARNING: Never indent if first line is starting in a
13570 # continued quote, which would change the quote.
13571 next if $starting_in_quote;
13573 # if this is text after closing '}'
13574 # then look for an interior token to pad
13575 if ( $types_to_go[$ibeg] eq '}' ) {
13579 # otherwise, we might pad if it looks really good
13582 # we might pad token $ibeg, so be sure that it
13583 # is at the same depth as the next line.
13585 if ( $nesting_depth_to_go[$ibeg] !=
13586 $nesting_depth_to_go[$ibeg_next] );
13588 # We can pad on line 1 of a statement if at least 3
13589 # lines will be aligned. Otherwise, it
13590 # can look very confusing.
13592 # We have to be careful not to pad if there are too few
13593 # lines. The current rule is:
13594 # (1) in general we require at least 3 consecutive lines
13595 # with the same leading chain operator token,
13596 # (2) but an exception is that we only require two lines
13597 # with leading colons if there are no more lines. For example,
13598 # the first $i in the following snippet would get padding
13599 # by the second rule:
13601 # $i == 1 ? ( "First", "Color" )
13602 # : $i == 2 ? ( "Then", "Rarity" )
13603 # : ( "Then", "Name" );
13605 if ( $max_line > 1 ) {
13606 my $leading_token = $tokens_to_go[$ibeg_next];
13609 # never indent line 1 of a '.' series because
13610 # previous line is most likely at same level.
13611 # TODO: we should also look at the leasing_spaces
13612 # of the last output line and skip if it is same
13614 next if ( $leading_token eq '.' );
13617 foreach my $l ( 2 .. 3 ) {
13618 last if ( $line + $l > $max_line );
13619 my $ibeg_next_next = $ri_first->[ $line + $l ];
13620 if ( $tokens_to_go[$ibeg_next_next] ne
13623 $tokens_differ = 1;
13628 next if ($tokens_differ);
13629 next if ( $count < 3 && $leading_token ne ':' );
13639 # find interior token to pad if necessary
13640 if ( !defined($ipad) ) {
13642 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
13644 # find any unclosed container
13646 unless ( $type_sequence_to_go[$i]
13647 && $mate_index_to_go[$i] > $iend );
13649 # find next nonblank token to pad
13650 $ipad = $inext_to_go[$i];
13651 last if ( $ipad > $iend );
13656 # We cannot pad the first leading token of a file because
13657 # it could cause a bug in which the starting indentation
13658 # level is guessed incorrectly each time the code is run
13659 # though perltidy, thus causing the code to march off to
13660 # the right. For example, the following snippet would have
13663 ## ov_method mycan( $package, '(""' ), $package
13664 ## or ov_method mycan( $package, '(0+' ), $package
13665 ## or ov_method mycan( $package, '(bool' ), $package
13666 ## or ov_method mycan( $package, '(nomethod' ), $package;
13668 # If this snippet is within a block this won't happen
13669 # unless the user just processes the snippet alone within
13670 # an editor. In that case either the user will see and
13671 # fix the problem or it will be corrected next time the
13672 # entire file is processed with perltidy.
13673 ##next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
13674 next if ( $ipad == 0 && $peak_batch_size <= 1 );
13676 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
13677 ## IT DID MORE HARM THAN GOOD
13679 ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
13682 ##? # do not put leading padding for just 2 lines of math
13683 ##? if ( $ipad == $ibeg
13685 ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
13686 ##? && $is_math_op{$type_next}
13687 ##? && $line + 2 <= $max_line )
13689 ##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
13690 ##? my $type_next_next = $types_to_go[$ibeg_next_next];
13691 ##? next if !$is_math_op{$type_next_next};
13694 # next line must not be at greater depth
13695 my $iend_next = $ri_last->[ $line + 1 ];
13697 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
13698 $nesting_depth_to_go[$ipad] );
13700 # lines must be somewhat similar to be padded..
13701 my $inext_next = $inext_to_go[$ibeg_next];
13702 my $type = $types_to_go[$ipad];
13703 my $type_next = $types_to_go[ $ipad + 1 ];
13705 # see if there are multiple continuation lines
13706 my $logical_continuation_lines = 1;
13707 if ( $line + 2 <= $max_line ) {
13708 my $leading_token = $tokens_to_go[$ibeg_next];
13709 my $ibeg_next_next = $ri_first->[ $line + 2 ];
13710 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
13711 && $nesting_depth_to_go[$ibeg_next] eq
13712 $nesting_depth_to_go[$ibeg_next_next] )
13714 $logical_continuation_lines++;
13718 # see if leading types match
13719 my $types_match = $types_to_go[$inext_next] eq $type;
13720 my $matches_without_bang;
13722 # if first line has leading ! then compare the following token
13723 if ( !$types_match && $type eq '!' ) {
13724 $types_match = $matches_without_bang =
13725 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
13730 # either we have multiple continuation lines to follow
13731 # and we are not padding the first token
13732 ( $logical_continuation_lines > 1 && $ipad > 0 )
13740 # and keywords must match if keyword
13743 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
13749 #----------------------begin special checks--------------
13752 # A check is needed before we can make the pad.
13753 # If we are in a list with some long items, we want each
13754 # item to stand out. So in the following example, the
13755 # first line beginning with '$casefold->' would look good
13756 # padded to align with the next line, but then it
13757 # would be indented more than the last line, so we
13761 # $casefold->{code} eq '0041'
13762 # && $casefold->{status} eq 'C'
13763 # && $casefold->{mapping} eq '0061',
13768 # It would be faster, and almost as good, to use a comma
13769 # count, and not pad if comma_count > 1 and the previous
13770 # line did not end with a comma.
13774 my $ibg = $ri_first->[ $line + 1 ];
13775 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
13777 # just use simplified formula for leading spaces to avoid
13778 # needless sub calls
13779 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
13781 # look at each line beyond the next ..
13783 foreach my $ltest ( $line + 2 .. $max_line ) {
13785 my $ibg = $ri_first->[$l];
13787 # quit looking at the end of this container
13789 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
13790 || ( $nesting_depth_to_go[$ibg] < $depth );
13792 # cannot do the pad if a later line would be
13794 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
13800 # don't pad if we end in a broken list
13801 if ( $l == $max_line ) {
13802 my $i2 = $ri_last->[$l];
13803 if ( $types_to_go[$i2] eq '#' ) {
13804 my $i1 = $ri_first->[$l];
13807 terminal_type( \@types_to_go, \@block_type_to_go,
13814 # a minus may introduce a quoted variable, and we will
13815 # add the pad only if this line begins with a bare word,
13816 # such as for the word 'Button' here:
13818 # Button => "Print letter \"~$_\"",
13819 # -command => [ sub { print "$_[0]\n" }, $_ ],
13820 # -accelerator => "Meta+$_"
13823 # On the other hand, if 'Button' is quoted, it looks best
13826 # 'Button' => "Print letter \"~$_\"",
13827 # -command => [ sub { print "$_[0]\n" }, $_ ],
13828 # -accelerator => "Meta+$_"
13830 if ( $types_to_go[$ibeg_next] eq 'm' ) {
13831 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
13834 next unless $ok_to_pad;
13836 #----------------------end special check---------------
13838 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
13839 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
13840 $pad_spaces = $length_2 - $length_1;
13842 # If the first line has a leading ! and the second does
13843 # not, then remove one space to try to align the next
13844 # leading characters, which are often the same. For example:
13846 # || $ts == $self->Holder
13847 # || $self->Holder->Type eq "Arena" )
13849 # This usually helps readability, but if there are subsequent
13850 # ! operators things will still get messed up. For example:
13852 # if ( !exists $Net::DNS::typesbyname{$qtype}
13853 # && exists $Net::DNS::classesbyname{$qtype}
13854 # && !exists $Net::DNS::classesbyname{$qclass}
13855 # && exists $Net::DNS::typesbyname{$qclass} )
13856 # We can't fix that.
13857 if ($matches_without_bang) { $pad_spaces-- }
13859 # make sure this won't change if -lp is used
13860 my $indentation_1 = $leading_spaces_to_go[$ibeg];
13861 if ( ref($indentation_1) ) {
13862 if ( $indentation_1->get_recoverable_spaces() == 0 ) {
13863 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
13864 unless ( $indentation_2->get_recoverable_spaces() == 0 )
13871 # we might be able to handle a pad of -1 by removing a blank
13873 if ( $pad_spaces < 0 ) {
13875 if ( $pad_spaces == -1 ) {
13876 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
13878 pad_token( $ipad - 1, $pad_spaces );
13884 # now apply any padding for alignment
13885 if ( $ipad >= 0 && $pad_spaces ) {
13887 my $length_t = total_line_length( $ibeg, $iend );
13888 if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
13890 pad_token( $ipad, $pad_spaces );
13898 $has_leading_op = $has_leading_op_next;
13899 } # end of loop over lines
13904 sub correct_lp_indentation {
13906 # When the -lp option is used, we need to make a last pass through
13907 # each line to correct the indentation positions in case they differ
13908 # from the predictions. This is necessary because perltidy uses a
13909 # predictor/corrector method for aligning with opening parens. The
13910 # predictor is usually good, but sometimes stumbles. The corrector
13911 # tries to patch things up once the actual opening paren locations
13913 my ( $ri_first, $ri_last ) = @_;
13914 my $do_not_pad = 0;
13916 # Note on flag '$do_not_pad':
13917 # We want to avoid a situation like this, where the aligner inserts
13918 # whitespace before the '=' to align it with a previous '=', because
13919 # otherwise the parens might become mis-aligned in a situation like
13920 # this, where the '=' has become aligned with the previous line,
13921 # pushing the opening '(' forward beyond where we want it.
13923 # $mkFloor::currentRoom = '';
13924 # $mkFloor::c_entry = $c->Entry(
13926 # -relief => 'sunken',
13930 # We leave it to the aligner to decide how to do this.
13932 # first remove continuation indentation if appropriate
13933 my $max_line = @{$ri_first} - 1;
13935 # looking at each line of this batch..
13936 my ( $ibeg, $iend );
13937 foreach my $line ( 0 .. $max_line ) {
13938 $ibeg = $ri_first->[$line];
13939 $iend = $ri_last->[$line];
13941 # looking at each token in this output line..
13942 foreach my $i ( $ibeg .. $iend ) {
13944 # How many space characters to place before this token
13945 # for special alignment. Actual padding is done in the
13948 # looking for next unvisited indentation item
13949 my $indentation = $leading_spaces_to_go[$i];
13950 if ( !$indentation->get_marked() ) {
13951 $indentation->set_marked(1);
13953 # looking for indentation item for which we are aligning
13954 # with parens, braces, and brackets
13955 next unless ( $indentation->get_align_paren() );
13957 # skip closed container on this line
13958 if ( $i > $ibeg ) {
13959 my $im = max( $ibeg, $iprev_to_go[$i] );
13960 if ( $type_sequence_to_go[$im]
13961 && $mate_index_to_go[$im] <= $iend )
13967 if ( $line == 1 && $i == $ibeg ) {
13971 # Ok, let's see what the error is and try to fix it
13973 my $predicted_pos = $indentation->get_spaces();
13974 if ( $i > $ibeg ) {
13976 # token is mid-line - use length to previous token
13977 $actual_pos = total_line_length( $ibeg, $i - 1 );
13979 # for mid-line token, we must check to see if all
13980 # additional lines have continuation indentation,
13981 # and remove it if so. Otherwise, we do not get
13983 my $closing_index = $indentation->get_closed();
13984 if ( $closing_index > $iend ) {
13985 my $ibeg_next = $ri_first->[ $line + 1 ];
13986 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
13987 undo_lp_ci( $line, $i, $closing_index, $ri_first,
13992 elsif ( $line > 0 ) {
13994 # handle case where token starts a new line;
13995 # use length of previous line
13996 my $ibegm = $ri_first->[ $line - 1 ];
13997 my $iendm = $ri_last->[ $line - 1 ];
13998 $actual_pos = total_line_length( $ibegm, $iendm );
14002 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
14006 # token is first character of first line of batch
14007 $actual_pos = $predicted_pos;
14010 my $move_right = $actual_pos - $predicted_pos;
14012 # done if no error to correct (gnu2.t)
14013 if ( $move_right == 0 ) {
14014 $indentation->set_recoverable_spaces($move_right);
14018 # if we have not seen closure for this indentation in
14019 # this batch, we can only pass on a request to the
14021 my $closing_index = $indentation->get_closed();
14023 if ( $closing_index < 0 ) {
14024 $indentation->set_recoverable_spaces($move_right);
14028 # If necessary, look ahead to see if there is really any
14029 # leading whitespace dependent on this whitespace, and
14030 # also find the longest line using this whitespace.
14031 # Since it is always safe to move left if there are no
14032 # dependents, we only need to do this if we may have
14033 # dependent nodes or need to move right.
14035 my $right_margin = 0;
14036 my $have_child = $indentation->get_have_child();
14038 my %saw_indentation;
14039 my $line_count = 1;
14040 $saw_indentation{$indentation} = $indentation;
14042 if ( $have_child || $move_right > 0 ) {
14044 my $max_length = 0;
14045 if ( $i == $ibeg ) {
14046 $max_length = total_line_length( $ibeg, $iend );
14049 # look ahead at the rest of the lines of this batch..
14050 foreach my $line_t ( $line + 1 .. $max_line ) {
14051 my $ibeg_t = $ri_first->[$line_t];
14052 my $iend_t = $ri_last->[$line_t];
14053 last if ( $closing_index <= $ibeg_t );
14055 # remember all different indentation objects
14056 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
14057 $saw_indentation{$indentation_t} = $indentation_t;
14060 # remember longest line in the group
14061 my $length_t = total_line_length( $ibeg_t, $iend_t );
14062 if ( $length_t > $max_length ) {
14063 $max_length = $length_t;
14066 $right_margin = maximum_line_length($ibeg) - $max_length;
14067 if ( $right_margin < 0 ) { $right_margin = 0 }
14070 my $first_line_comma_count =
14071 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
14072 my $comma_count = $indentation->get_comma_count();
14073 my $arrow_count = $indentation->get_arrow_count();
14075 # This is a simple approximate test for vertical alignment:
14076 # if we broke just after an opening paren, brace, bracket,
14077 # and there are 2 or more commas in the first line,
14078 # and there are no '=>'s,
14079 # then we are probably vertically aligned. We could set
14080 # an exact flag in sub scan_list, but this is good
14082 my $indentation_count = keys %saw_indentation;
14083 my $is_vertically_aligned =
14085 && $first_line_comma_count > 1
14086 && $indentation_count == 1
14087 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
14089 # Make the move if possible ..
14092 # we can always move left
14095 # but we should only move right if we are sure it will
14096 # not spoil vertical alignment
14097 || ( $comma_count == 0 )
14098 || ( $comma_count > 0 && !$is_vertically_aligned )
14102 ( $move_right <= $right_margin )
14106 foreach ( keys %saw_indentation ) {
14107 $saw_indentation{$_}
14108 ->permanently_decrease_available_spaces( -$move );
14112 # Otherwise, record what we want and the vertical aligner
14113 # will try to recover it.
14115 $indentation->set_recoverable_spaces($move_right);
14120 return $do_not_pad;
14123 # flush is called to output any tokens in the pipeline, so that
14124 # an alternate source of lines can be written in the correct order
14128 destroy_one_line_block();
14129 $self->output_line_to_go();
14130 Perl::Tidy::VerticalAligner::flush();
14134 sub reset_block_text_accumulator {
14136 # save text after 'if' and 'elsif' to append after 'else'
14137 if ($accumulating_text_for_block) {
14139 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
14140 push @{$rleading_block_if_elsif_text}, $leading_block_text;
14143 $accumulating_text_for_block = "";
14144 $leading_block_text = "";
14145 $leading_block_text_level = 0;
14146 $leading_block_text_length_exceeded = 0;
14147 $leading_block_text_line_number = 0;
14148 $leading_block_text_line_length = 0;
14152 sub set_block_text_accumulator {
14154 $accumulating_text_for_block = $tokens_to_go[$i];
14155 if ( $accumulating_text_for_block !~ /^els/ ) {
14156 $rleading_block_if_elsif_text = [];
14158 $leading_block_text = "";
14159 $leading_block_text_level = $levels_to_go[$i];
14160 $leading_block_text_line_number = get_output_line_number();
14161 ##$vertical_aligner_object->get_output_line_number();
14162 $leading_block_text_length_exceeded = 0;
14164 # this will contain the column number of the last character
14165 # of the closing side comment
14166 $leading_block_text_line_length =
14167 length($csc_last_label) +
14168 length($accumulating_text_for_block) +
14169 length( $rOpts->{'closing-side-comment-prefix'} ) +
14170 $leading_block_text_level * $rOpts_indent_columns + 3;
14174 sub accumulate_block_text {
14177 # accumulate leading text for -csc, ignoring any side comments
14178 if ( $accumulating_text_for_block
14179 && !$leading_block_text_length_exceeded
14180 && $types_to_go[$i] ne '#' )
14183 my $added_length = $token_lengths_to_go[$i];
14184 $added_length += 1 if $i == 0;
14185 my $new_line_length = $leading_block_text_line_length + $added_length;
14187 # we can add this text if we don't exceed some limits..
14190 # we must not have already exceeded the text length limit
14191 length($leading_block_text) <
14192 $rOpts_closing_side_comment_maximum_text
14195 # the new total line length must be below the line length limit
14196 # or the new length must be below the text length limit
14197 # (ie, we may allow one token to exceed the text length limit)
14200 maximum_line_length_for_level($leading_block_text_level)
14202 || length($leading_block_text) + $added_length <
14203 $rOpts_closing_side_comment_maximum_text
14206 # UNLESS: we are adding a closing paren before the brace we seek.
14207 # This is an attempt to avoid situations where the ... to be
14208 # added are longer than the omitted right paren, as in:
14210 # foreach my $item (@a_rather_long_variable_name_here) {
14212 # } ## end foreach my $item (@a_rather_long_variable_name_here...
14215 $tokens_to_go[$i] eq ')'
14218 $i + 1 <= $max_index_to_go
14219 && $block_type_to_go[ $i + 1 ] eq
14220 $accumulating_text_for_block
14222 || ( $i + 2 <= $max_index_to_go
14223 && $block_type_to_go[ $i + 2 ] eq
14224 $accumulating_text_for_block )
14230 # add an extra space at each newline
14231 if ( $i == 0 ) { $leading_block_text .= ' ' }
14233 # add the token text
14234 $leading_block_text .= $tokens_to_go[$i];
14235 $leading_block_text_line_length = $new_line_length;
14238 # show that text was truncated if necessary
14239 elsif ( $types_to_go[$i] ne 'b' ) {
14240 $leading_block_text_length_exceeded = 1;
14241 $leading_block_text .= '...';
14248 my %is_if_elsif_else_unless_while_until_for_foreach;
14252 # These block types may have text between the keyword and opening
14253 # curly. Note: 'else' does not, but must be included to allow trailing
14254 # if/elsif text to be appended.
14255 # patch for SWITCH/CASE: added 'case' and 'when'
14257 qw(if elsif else unless while until for foreach case when catch);
14258 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
14262 sub accumulate_csc_text {
14264 # called once per output buffer when -csc is used. Accumulates
14265 # the text placed after certain closing block braces.
14266 # Defines and returns the following for this buffer:
14268 my $block_leading_text = ""; # the leading text of the last '}'
14269 my $rblock_leading_if_elsif_text;
14270 my $i_block_leading_text =
14271 -1; # index of token owning block_leading_text
14272 my $block_line_count = 100; # how many lines the block spans
14273 my $terminal_type = 'b'; # type of last nonblank token
14274 my $i_terminal = 0; # index of last nonblank token
14275 my $terminal_block_type = "";
14277 # update most recent statement label
14278 $csc_last_label = "" unless ($csc_last_label);
14279 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
14280 my $block_label = $csc_last_label;
14282 # Loop over all tokens of this batch
14283 for my $i ( 0 .. $max_index_to_go ) {
14284 my $type = $types_to_go[$i];
14285 my $block_type = $block_type_to_go[$i];
14286 my $token = $tokens_to_go[$i];
14288 # remember last nonblank token type
14289 if ( $type ne '#' && $type ne 'b' ) {
14290 $terminal_type = $type;
14291 $terminal_block_type = $block_type;
14295 my $type_sequence = $type_sequence_to_go[$i];
14296 if ( $block_type && $type_sequence ) {
14298 if ( $token eq '}' ) {
14300 # restore any leading text saved when we entered this block
14301 if ( defined( $block_leading_text{$type_sequence} ) ) {
14302 ( $block_leading_text, $rblock_leading_if_elsif_text )
14303 = @{ $block_leading_text{$type_sequence} };
14304 $i_block_leading_text = $i;
14305 delete $block_leading_text{$type_sequence};
14306 $rleading_block_if_elsif_text =
14307 $rblock_leading_if_elsif_text;
14310 if ( defined( $csc_block_label{$type_sequence} ) ) {
14311 $block_label = $csc_block_label{$type_sequence};
14312 delete $csc_block_label{$type_sequence};
14315 # if we run into a '}' then we probably started accumulating
14316 # at something like a trailing 'if' clause..no harm done.
14317 if ( $accumulating_text_for_block
14318 && $levels_to_go[$i] <= $leading_block_text_level )
14320 my $lev = $levels_to_go[$i];
14321 reset_block_text_accumulator();
14324 if ( defined( $block_opening_line_number{$type_sequence} ) )
14326 my $output_line_number = get_output_line_number();
14327 ##$vertical_aligner_object->get_output_line_number();
14328 $block_line_count =
14329 $output_line_number -
14330 $block_opening_line_number{$type_sequence} + 1;
14331 delete $block_opening_line_number{$type_sequence};
14335 # Error: block opening line undefined for this line..
14336 # This shouldn't be possible, but it is not a
14337 # significant problem.
14341 elsif ( $token eq '{' ) {
14343 my $line_number = get_output_line_number();
14344 ##$vertical_aligner_object->get_output_line_number();
14345 $block_opening_line_number{$type_sequence} = $line_number;
14347 # set a label for this block, except for
14348 # a bare block which already has the label
14349 # A label can only be used on the next {
14350 if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
14351 $csc_block_label{$type_sequence} = $csc_last_label;
14352 $csc_last_label = "";
14354 if ( $accumulating_text_for_block
14355 && $levels_to_go[$i] == $leading_block_text_level )
14358 if ( $accumulating_text_for_block eq $block_type ) {
14360 # save any leading text before we enter this block
14361 $block_leading_text{$type_sequence} = [
14362 $leading_block_text,
14363 $rleading_block_if_elsif_text
14365 $block_opening_line_number{$type_sequence} =
14366 $leading_block_text_line_number;
14367 reset_block_text_accumulator();
14371 # shouldn't happen, but not a serious error.
14372 # We were accumulating -csc text for block type
14373 # $accumulating_text_for_block and unexpectedly
14374 # encountered a '{' for block type $block_type.
14381 && $csc_new_statement_ok
14382 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
14383 && $token =~ /$closing_side_comment_list_pattern/o )
14385 set_block_text_accumulator($i);
14389 # note: ignoring type 'q' because of tricks being played
14390 # with 'q' for hanging side comments
14391 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
14392 $csc_new_statement_ok =
14393 ( $block_type || $type eq 'J' || $type eq ';' );
14396 && $accumulating_text_for_block
14397 && $levels_to_go[$i] == $leading_block_text_level )
14399 reset_block_text_accumulator();
14402 accumulate_block_text($i);
14407 # Treat an 'else' block specially by adding preceding 'if' and
14408 # 'elsif' text. Otherwise, the 'end else' is not helpful,
14409 # especially for cuddled-else formatting.
14410 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
14411 $block_leading_text =
14412 make_else_csc_text( $i_terminal, $terminal_block_type,
14413 $block_leading_text, $rblock_leading_if_elsif_text );
14416 # if this line ends in a label then remember it for the next pass
14417 $csc_last_label = "";
14418 if ( $terminal_type eq 'J' ) {
14419 $csc_last_label = $tokens_to_go[$i_terminal];
14422 return ( $terminal_type, $i_terminal, $i_block_leading_text,
14423 $block_leading_text, $block_line_count, $block_label );
14427 sub make_else_csc_text {
14429 # create additional -csc text for an 'else' and optionally 'elsif',
14430 # depending on the value of switch
14431 # $rOpts_closing_side_comment_else_flag:
14433 # = 0 add 'if' text to trailing else
14434 # = 1 same as 0 plus:
14435 # add 'if' to 'elsif's if can fit in line length
14436 # add last 'elsif' to trailing else if can fit in one line
14437 # = 2 same as 1 but do not check if exceed line length
14439 # $rif_elsif_text = a reference to a list of all previous closing
14440 # side comments created for this if block
14442 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
14443 my $csc_text = $block_leading_text;
14445 if ( $block_type eq 'elsif'
14446 && $rOpts_closing_side_comment_else_flag == 0 )
14451 my $count = @{$rif_elsif_text};
14452 return $csc_text unless ($count);
14454 my $if_text = '[ if' . $rif_elsif_text->[0];
14456 # always show the leading 'if' text on 'else'
14457 if ( $block_type eq 'else' ) {
14458 $csc_text .= $if_text;
14461 # see if that's all
14462 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
14466 my $last_elsif_text = "";
14467 if ( $count > 1 ) {
14468 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
14469 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
14472 # tentatively append one more item
14473 my $saved_text = $csc_text;
14474 if ( $block_type eq 'else' ) {
14475 $csc_text .= $last_elsif_text;
14478 $csc_text .= ' ' . $if_text;
14481 # all done if no length checks requested
14482 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
14486 # undo it if line length exceeded
14488 length($csc_text) +
14489 length($block_type) +
14490 length( $rOpts->{'closing-side-comment-prefix'} ) +
14491 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
14492 if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
14493 $csc_text = $saved_text;
14498 { # sub balance_csc_text
14513 sub balance_csc_text {
14515 # Append characters to balance a closing side comment so that editors
14516 # such as vim can correctly jump through code.
14518 # input = ## end foreach my $foo ( sort { $b ...
14519 # output = ## end foreach my $foo ( sort { $b ...})
14521 # NOTE: This routine does not currently filter out structures within
14522 # quoted text because the bounce algorithms in text editors do not
14523 # necessarily do this either (a version of vim was checked and
14524 # did not do this).
14526 # Some complex examples which will cause trouble for some editors:
14527 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
14528 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
14529 # if ( $1 eq '{' ) {
14530 # test file test1/braces.pl has many such examples.
14534 # loop to examine characters one-by-one, RIGHT to LEFT and
14535 # build a balancing ending, LEFT to RIGHT.
14536 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
14538 my $char = substr( $csc, $pos, 1 );
14540 # ignore everything except structural characters
14541 next unless ( $matching_char{$char} );
14543 # pop most recently appended character
14544 my $top = chop($csc);
14546 # push it back plus the mate to the newest character
14547 # unless they balance each other.
14548 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
14551 # return the balanced string
14556 sub add_closing_side_comment {
14560 # add closing side comments after closing block braces if -csc used
14561 my $cscw_block_comment;
14563 #---------------------------------------------------------------
14564 # Step 1: loop through all tokens of this line to accumulate
14565 # the text needed to create the closing side comments. Also see
14566 # how the line ends.
14567 #---------------------------------------------------------------
14569 my ( $terminal_type, $i_terminal, $i_block_leading_text,
14570 $block_leading_text, $block_line_count, $block_label )
14571 = accumulate_csc_text();
14573 #---------------------------------------------------------------
14574 # Step 2: make the closing side comment if this ends a block
14575 #---------------------------------------------------------------
14576 ##my $have_side_comment = $i_terminal != $max_index_to_go;
14577 my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
14579 # if this line might end in a block closure..
14581 $terminal_type eq '}'
14586 # the block is long enough
14587 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
14589 # or there is an existing comment to check
14590 || ( $have_side_comment
14591 && $rOpts->{'closing-side-comment-warnings'} )
14594 # .. and if this is one of the types of interest
14595 && $block_type_to_go[$i_terminal] =~
14596 /$closing_side_comment_list_pattern/o
14598 # .. but not an anonymous sub
14599 # These are not normally of interest, and their closing braces are
14600 # often followed by commas or semicolons anyway. This also avoids
14601 # possible erratic output due to line numbering inconsistencies
14602 # in the cases where their closing braces terminate a line.
14603 && $block_type_to_go[$i_terminal] ne 'sub'
14605 # ..and the corresponding opening brace must is not in this batch
14606 # (because we do not need to tag one-line blocks, although this
14607 # should also be caught with a positive -csci value)
14608 && $mate_index_to_go[$i_terminal] < 0
14613 # this is the last token (line doesn't have a side comment)
14614 !$have_side_comment
14616 # or the old side comment is a closing side comment
14617 || $tokens_to_go[$max_index_to_go] =~
14618 /$closing_side_comment_prefix_pattern/o
14623 # then make the closing side comment text
14624 if ($block_label) { $block_label .= " " }
14626 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
14628 # append any extra descriptive text collected above
14629 if ( $i_block_leading_text == $i_terminal ) {
14630 $token .= $block_leading_text;
14633 $token = balance_csc_text($token)
14634 if $rOpts->{'closing-side-comments-balanced'};
14636 $token =~ s/\s*$//; # trim any trailing whitespace
14638 # handle case of existing closing side comment
14639 if ($have_side_comment) {
14641 # warn if requested and tokens differ significantly
14642 if ( $rOpts->{'closing-side-comment-warnings'} ) {
14643 my $old_csc = $tokens_to_go[$max_index_to_go];
14644 my $new_csc = $token;
14645 $new_csc =~ s/\s+//g; # trim all whitespace
14646 $old_csc =~ s/\s+//g; # trim all whitespace
14647 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
14648 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
14649 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
14650 my $new_trailing_dots = $1;
14651 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
14653 # Patch to handle multiple closing side comments at
14654 # else and elsif's. These have become too complicated
14655 # to check, so if we see an indication of
14656 # '[ if' or '[ # elsif', then assume they were made
14658 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
14659 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
14661 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
14662 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
14665 # if old comment is contained in new comment,
14666 # only compare the common part.
14667 if ( length($new_csc) > length($old_csc) ) {
14668 $new_csc = substr( $new_csc, 0, length($old_csc) );
14671 # if the new comment is shorter and has been limited,
14672 # only compare the common part.
14673 if ( length($new_csc) < length($old_csc)
14674 && $new_trailing_dots )
14676 $old_csc = substr( $old_csc, 0, length($new_csc) );
14679 # any remaining difference?
14680 if ( $new_csc ne $old_csc ) {
14682 # just leave the old comment if we are below the threshold
14683 # for creating side comments
14684 if ( $block_line_count <
14685 $rOpts->{'closing-side-comment-interval'} )
14690 # otherwise we'll make a note of it
14694 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
14697 # save the old side comment in a new trailing block comment
14698 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
14701 $cscw_block_comment =
14702 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
14707 # No differences.. we can safely delete old comment if we
14708 # are below the threshold
14709 if ( $block_line_count <
14710 $rOpts->{'closing-side-comment-interval'} )
14713 $self->unstore_token_to_go()
14714 if ( $types_to_go[$max_index_to_go] eq '#' );
14715 $self->unstore_token_to_go()
14716 if ( $types_to_go[$max_index_to_go] eq 'b' );
14721 # switch to the new csc (unless we deleted it!)
14722 $tokens_to_go[$max_index_to_go] = $token if $token;
14725 # handle case of NO existing closing side comment
14728 # Remove any existing blank and add another below.
14729 # This is a tricky point. A side comment needs to have the same level
14730 # as the preceding closing brace or else the line will not get the right
14731 # indentation. So even if we have a blank, we are going to replace it.
14732 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
14733 unstore_token_to_go();
14736 # insert the new side comment into the output token stream
14738 my $block_type = '';
14739 my $type_sequence = '';
14740 my $container_environment =
14741 $container_environment_to_go[$max_index_to_go];
14742 my $level = $levels_to_go[$max_index_to_go];
14743 my $slevel = $nesting_depth_to_go[$max_index_to_go];
14744 my $no_internal_newlines = 0;
14746 my $ci_level = $ci_levels_to_go[$max_index_to_go];
14747 my $in_continued_quote = 0;
14749 # insert a blank token
14750 $self->insert_new_token_to_go( ' ', 'b', $slevel,
14751 $no_internal_newlines );
14753 # then the side comment
14754 $self->insert_new_token_to_go( $token, $type, $slevel,
14755 $no_internal_newlines );
14758 return $cscw_block_comment;
14761 sub previous_nonblank_token {
14765 return "" if ( $im < 0 );
14766 if ( $types_to_go[$im] eq 'b' ) { $im--; }
14767 return "" if ( $im < 0 );
14768 $name = $tokens_to_go[$im];
14770 # prepend any sub name to an isolated -> to avoid unwanted alignments
14771 # [test case is test8/penco.pl]
14772 if ( $name eq '->' ) {
14774 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
14775 $name = $tokens_to_go[$im] . $name;
14781 sub send_lines_to_vertical_aligner {
14783 my ( $self, $ri_first, $ri_last, $do_not_pad ) = @_;
14785 my $rindentation_list = [0]; # ref to indentations for each line
14787 # define the array @matching_token_to_go for the output tokens
14788 # which will be non-blank for each special token (such as =>)
14789 # for which alignment is required.
14790 set_vertical_alignment_markers( $ri_first, $ri_last );
14792 # flush if necessary to avoid unwanted alignment
14793 my $must_flush = 0;
14794 if ( @{$ri_first} > 1 ) {
14796 # flush before a long if statement
14797 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
14802 Perl::Tidy::VerticalAligner::flush();
14805 undo_ci( $ri_first, $ri_last );
14807 set_logical_padding( $ri_first, $ri_last );
14809 # loop to prepare each line for shipment
14810 my $n_last_line = @{$ri_first} - 1;
14812 for my $n ( 0 .. $n_last_line ) {
14813 my $ibeg = $ri_first->[$n];
14814 my $iend = $ri_last->[$n];
14816 my ( $rtokens, $rfields, $rpatterns ) =
14817 make_alignment_patterns( $ibeg, $iend );
14819 # Set flag to show how much level changes between this line
14820 # and the next line, if we have it.
14822 if ( $n < $n_last_line ) {
14823 my $ibegp = $ri_first->[ $n + 1 ];
14824 $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
14827 my ( $indentation, $lev, $level_end, $terminal_type,
14828 $is_semicolon_terminated, $is_outdented_line )
14829 = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
14830 $ri_first, $ri_last, $rindentation_list, $ljump );
14832 # we will allow outdenting of long lines..
14833 my $outdent_long_lines = (
14835 # which are long quotes, if allowed
14836 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
14838 # which are long block comments, if allowed
14840 $types_to_go[$ibeg] eq '#'
14841 && $rOpts->{'outdent-long-comments'}
14843 # but not if this is a static block comment
14844 && !$is_static_block_comment
14849 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
14851 my $rvertical_tightness_flags =
14852 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
14853 $ri_first, $ri_last );
14855 # flush an outdented line to avoid any unwanted vertical alignment
14856 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
14858 # Set a flag at the final ':' of a ternary chain to request
14859 # vertical alignment of the final term. Here is a
14860 # slightly complex example:
14862 # $self->{_text} = (
14864 # : $type eq 'item' ? "the $section entry"
14865 # : "the section on $section"
14869 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
14870 # : ' elsewhere in this document'
14873 my $is_terminal_ternary = 0;
14874 if ( $tokens_to_go[$ibeg] eq ':'
14875 || $n > 0 && $tokens_to_go[ $ri_last->[ $n - 1 ] ] eq ':' )
14877 my $last_leading_type = ":";
14879 my $iprev = $ri_first->[ $n - 1 ];
14880 $last_leading_type = $types_to_go[$iprev];
14882 if ( $terminal_type ne ';'
14883 && $n_last_line > $n
14884 && $level_end == $lev )
14886 my $inext = $ri_first->[ $n + 1 ];
14887 $level_end = $levels_to_go[$inext];
14888 $terminal_type = $types_to_go[$inext];
14891 $is_terminal_ternary = $last_leading_type eq ':'
14892 && ( ( $terminal_type eq ';' && $level_end <= $lev )
14893 || ( $terminal_type ne ':' && $level_end < $lev ) )
14895 # the terminal term must not contain any ternary terms, as in
14897 # $Is_MSWin32 ? ".\\echo$$"
14898 # : $Is_MacOS ? ":echo$$"
14899 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
14901 && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ];
14904 # send this new line down the pipe
14905 my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
14906 Perl::Tidy::VerticalAligner::valign_input(
14913 $forced_breakpoint_to_go[$iend] || $in_comma_list,
14914 $outdent_long_lines,
14915 $is_terminal_ternary,
14916 $is_semicolon_terminated,
14918 $rvertical_tightness_flags,
14922 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
14924 # flush an outdented line to avoid any unwanted vertical alignment
14925 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
14929 # Set flag indicating if this line ends in an opening
14930 # token and is very short, so that a blank line is not
14931 # needed if the subsequent line is a comment.
14932 # Examples of what we are looking for:
14938 $last_output_short_opening_token
14940 # line ends in opening token
14941 = $types_to_go[$iend] =~ /^[\{\(\[L]$/
14945 # line has either single opening token
14948 # or is a single token followed by opening token.
14949 # Note that sub identifiers have blanks like 'sub doit'
14950 || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
14953 # and limit total to 10 character widths
14954 && token_sequence_length( $ibeg, $iend ) <= 10;
14956 } # end of loop to output each line
14958 # remember indentation of lines containing opening containers for
14959 # later use by sub set_adjusted_indentation
14960 save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
14964 { # begin make_alignment_patterns
14966 my %block_type_map;
14971 # map related block names into a common name to
14973 %block_type_map = (
14984 # map certain keywords to the same 'if' class to align
14985 # long if/elsif sequences. [elsif.pl]
14991 'default' => 'given',
14992 'case' => 'switch',
14994 # treat an 'undef' similar to numbers and quotes
14999 sub make_alignment_patterns {
15001 # Here we do some important preliminary work for the
15002 # vertical aligner. We create three arrays for one
15003 # output line. These arrays contain strings that can
15004 # be tested by the vertical aligner to see if
15005 # consecutive lines can be aligned vertically.
15007 # The three arrays are indexed on the vertical
15008 # alignment fields and are:
15009 # @tokens - a list of any vertical alignment tokens for this line.
15010 # These are tokens, such as '=' '&&' '#' etc which
15011 # we want to might align vertically. These are
15012 # decorated with various information such as
15013 # nesting depth to prevent unwanted vertical
15014 # alignment matches.
15015 # @fields - the actual text of the line between the vertical alignment
15017 # @patterns - a modified list of token types, one for each alignment
15018 # field. These should normally each match before alignment is
15019 # allowed, even when the alignment tokens match.
15020 my ( $ibeg, $iend ) = @_;
15024 my $i_start = $ibeg;
15027 my @container_name = ("");
15028 my @multiple_comma_arrows = (undef);
15030 my $j = 0; # field index
15033 for my $i ( $ibeg .. $iend ) {
15035 # Keep track of containers balanced on this line only.
15036 # These are used below to prevent unwanted cross-line alignments.
15037 # Unbalanced containers already avoid aligning across
15038 # container boundaries.
15039 if ( $tokens_to_go[$i] eq '(' ) {
15041 # if container is balanced on this line...
15042 my $i_mate = $mate_index_to_go[$i];
15043 if ( $i_mate > $i && $i_mate <= $iend ) {
15045 my $seqno = $type_sequence_to_go[$i];
15046 my $count = comma_arrow_count($seqno);
15047 $multiple_comma_arrows[$depth] = $count && $count > 1;
15049 # Append the previous token name to make the container name
15050 # more unique. This name will also be given to any commas
15051 # within this container, and it helps avoid undesirable
15052 # alignments of different types of containers.
15053 my $name = previous_nonblank_token($i);
15055 $container_name[$depth] = "+" . $name;
15057 # Make the container name even more unique if necessary.
15058 # If we are not vertically aligning this opening paren,
15059 # append a character count to avoid bad alignment because
15060 # it usually looks bad to align commas within containers
15061 # for which the opening parens do not align. Here
15062 # is an example very BAD alignment of commas (because
15063 # the atan2 functions are not all aligned):
15065 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
15066 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
15067 # $X * atan2( $X, 1 ) -
15068 # $Y * atan2( $Y, 1 );
15070 # On the other hand, it is usually okay to align commas if
15071 # opening parens align, such as:
15072 # glVertex3d( $cx + $s * $xs, $cy, $z );
15073 # glVertex3d( $cx, $cy + $s * $ys, $z );
15074 # glVertex3d( $cx - $s * $xs, $cy, $z );
15075 # glVertex3d( $cx, $cy - $s * $ys, $z );
15077 # To distinguish between these situations, we will
15078 # append the length of the line from the previous matching
15079 # token, or beginning of line, to the function name. This
15080 # will allow the vertical aligner to reject undesirable
15083 # if we are not aligning on this paren...
15084 if ( $matching_token_to_go[$i] eq '' ) {
15086 # Sum length from previous alignment, or start of line.
15088 ( $i_start == $ibeg )
15089 ? total_line_length( $i_start, $i - 1 )
15090 : token_sequence_length( $i_start, $i - 1 );
15092 # tack length onto the container name to make unique
15093 $container_name[$depth] .= "-" . $len;
15097 elsif ( $tokens_to_go[$i] eq ')' ) {
15098 $depth-- if $depth > 0;
15101 # if we find a new synchronization token, we are done with
15103 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
15105 my $tok = my $raw_tok = $matching_token_to_go[$i];
15107 # make separators in different nesting depths unique
15108 # by appending the nesting depth digit.
15109 if ( $raw_tok ne '#' ) {
15110 $tok .= "$nesting_depth_to_go[$i]";
15113 # also decorate commas with any container name to avoid
15114 # unwanted cross-line alignments.
15115 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
15116 if ( $container_name[$depth] ) {
15117 $tok .= $container_name[$depth];
15121 # Patch to avoid aligning leading and trailing if, unless.
15122 # Mark trailing if, unless statements with container names.
15123 # This makes them different from leading if, unless which
15124 # are not so marked at present. If we ever need to name
15125 # them too, we could use ci to distinguish them.
15126 # Example problem to avoid:
15127 # return ( 2, "DBERROR" )
15128 # if ( $retval == 2 );
15129 # if ( scalar @_ ) {
15130 # my ( $a, $b, $c, $d, $e, $f ) = @_;
15132 if ( $raw_tok eq '(' ) {
15133 my $ci = $ci_levels_to_go[$ibeg];
15134 if ( $container_name[$depth] =~ /^\+(if|unless)/
15137 $tok .= $container_name[$depth];
15141 # Decorate block braces with block types to avoid
15142 # unwanted alignments such as the following:
15143 # foreach ( @{$routput_array} ) { $fh->print($_) }
15144 # eval { $fh->close() };
15145 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
15146 my $block_type = $block_type_to_go[$i];
15148 # map certain related block types to allow
15149 # else blocks to align
15150 $block_type = $block_type_map{$block_type}
15151 if ( defined( $block_type_map{$block_type} ) );
15153 # remove sub names to allow one-line sub braces to align
15154 # regardless of name
15155 #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
15156 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
15158 # allow all control-type blocks to align
15159 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
15161 $tok .= $block_type;
15164 # concatenate the text of the consecutive tokens to form
15167 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
15169 # store the alignment token for this field
15170 push( @tokens, $tok );
15172 # get ready for the next batch
15175 $patterns[$j] = "";
15178 # continue accumulating tokens
15179 # handle non-keywords..
15180 if ( $types_to_go[$i] ne 'k' ) {
15181 my $type = $types_to_go[$i];
15183 # Mark most things before arrows as a quote to
15184 # get them to line up. Testfile: mixed.pl.
15185 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
15186 my $next_type = $types_to_go[ $i + 1 ];
15187 my $i_next_nonblank =
15188 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
15190 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
15193 # Patch to ignore leading minus before words,
15194 # by changing pattern 'mQ' into just 'Q',
15195 # so that we can align things like this:
15196 # Button => "Print letter \"~$_\"",
15197 # -command => [ sub { print "$_[0]\n" }, $_ ],
15198 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
15202 # patch to make numbers and quotes align
15203 if ( $type eq 'n' ) { $type = 'Q' }
15205 # patch to ignore any ! in patterns
15206 if ( $type eq '!' ) { $type = '' }
15208 $patterns[$j] .= $type;
15211 # for keywords we have to use the actual text
15214 my $tok = $tokens_to_go[$i];
15216 # but map certain keywords to a common string to allow
15218 $tok = $keyword_map{$tok}
15219 if ( defined( $keyword_map{$tok} ) );
15220 $patterns[$j] .= $tok;
15224 # done with this line .. join text of tokens to make the last field
15225 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
15226 return ( \@tokens, \@fields, \@patterns );
15229 } # end make_alignment_patterns
15231 { # begin unmatched_indexes
15233 # closure to keep track of unbalanced containers.
15234 # arrays shared by the routines in this block:
15235 my @unmatched_opening_indexes_in_this_batch;
15236 my @unmatched_closing_indexes_in_this_batch;
15237 my %comma_arrow_count;
15239 sub is_unbalanced_batch {
15240 return @unmatched_opening_indexes_in_this_batch +
15241 @unmatched_closing_indexes_in_this_batch;
15244 sub comma_arrow_count {
15246 return $comma_arrow_count{$seqno};
15249 sub match_opening_and_closing_tokens {
15251 # Match up indexes of opening and closing braces, etc, in this batch.
15252 # This has to be done after all tokens are stored because unstoring
15253 # of tokens would otherwise cause trouble.
15255 @unmatched_opening_indexes_in_this_batch = ();
15256 @unmatched_closing_indexes_in_this_batch = ();
15257 %comma_arrow_count = ();
15258 my $comma_arrow_count_contained = 0;
15260 foreach my $i ( 0 .. $max_index_to_go ) {
15261 if ( $type_sequence_to_go[$i] ) {
15262 my $token = $tokens_to_go[$i];
15263 if ( $token =~ /^[\(\[\{\?]$/ ) {
15264 push @unmatched_opening_indexes_in_this_batch, $i;
15266 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
15268 my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
15269 if ( defined($i_mate) && $i_mate >= 0 ) {
15270 if ( $type_sequence_to_go[$i_mate] ==
15271 $type_sequence_to_go[$i] )
15273 $mate_index_to_go[$i] = $i_mate;
15274 $mate_index_to_go[$i_mate] = $i;
15275 my $seqno = $type_sequence_to_go[$i];
15276 if ( $comma_arrow_count{$seqno} ) {
15277 $comma_arrow_count_contained +=
15278 $comma_arrow_count{$seqno};
15282 push @unmatched_opening_indexes_in_this_batch,
15284 push @unmatched_closing_indexes_in_this_batch, $i;
15288 push @unmatched_closing_indexes_in_this_batch, $i;
15292 elsif ( $tokens_to_go[$i] eq '=>' ) {
15293 if (@unmatched_opening_indexes_in_this_batch) {
15294 my $j = $unmatched_opening_indexes_in_this_batch[-1];
15295 my $seqno = $type_sequence_to_go[$j];
15296 $comma_arrow_count{$seqno}++;
15300 return $comma_arrow_count_contained;
15303 sub save_opening_indentation {
15305 # This should be called after each batch of tokens is output. It
15306 # saves indentations of lines of all unmatched opening tokens.
15307 # These will be used by sub get_opening_indentation.
15309 my ( $ri_first, $ri_last, $rindentation_list ) = @_;
15311 # we no longer need indentations of any saved indentations which
15312 # are unmatched closing tokens in this batch, because we will
15313 # never encounter them again. So we can delete them to keep
15314 # the hash size down.
15315 foreach (@unmatched_closing_indexes_in_this_batch) {
15316 my $seqno = $type_sequence_to_go[$_];
15317 delete $saved_opening_indentation{$seqno};
15320 # we need to save indentations of any unmatched opening tokens
15321 # in this batch because we may need them in a subsequent batch.
15322 foreach (@unmatched_opening_indexes_in_this_batch) {
15323 my $seqno = $type_sequence_to_go[$_];
15324 $saved_opening_indentation{$seqno} = [
15325 lookup_opening_indentation(
15326 $_, $ri_first, $ri_last, $rindentation_list
15332 } # end unmatched_indexes
15334 sub get_opening_indentation {
15336 # get the indentation of the line which output the opening token
15337 # corresponding to a given closing token in the current output batch.
15340 # $i_closing - index in this line of a closing token ')' '}' or ']'
15342 # $ri_first - reference to list of the first index $i for each output
15343 # line in this batch
15344 # $ri_last - reference to list of the last index $i for each output line
15346 # $rindentation_list - reference to a list containing the indentation
15347 # used for each line.
15350 # -the indentation of the line which contained the opening token
15351 # which matches the token at index $i_opening
15352 # -and its offset (number of columns) from the start of the line
15354 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
15356 # first, see if the opening token is in the current batch
15357 my $i_opening = $mate_index_to_go[$i_closing];
15358 my ( $indent, $offset, $is_leading, $exists );
15360 if ( $i_opening >= 0 ) {
15362 # it is..look up the indentation
15363 ( $indent, $offset, $is_leading ) =
15364 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
15365 $rindentation_list );
15368 # if not, it should have been stored in the hash by a previous batch
15370 my $seqno = $type_sequence_to_go[$i_closing];
15372 if ( $saved_opening_indentation{$seqno} ) {
15373 ( $indent, $offset, $is_leading ) =
15374 @{ $saved_opening_indentation{$seqno} };
15377 # some kind of serious error
15378 # (example is badfile.t)
15387 # if no sequence number it must be an unbalanced container
15395 return ( $indent, $offset, $is_leading, $exists );
15398 sub lookup_opening_indentation {
15400 # get the indentation of the line in the current output batch
15401 # which output a selected opening token
15404 # $i_opening - index of an opening token in the current output batch
15405 # whose line indentation we need
15406 # $ri_first - reference to list of the first index $i for each output
15407 # line in this batch
15408 # $ri_last - reference to list of the last index $i for each output line
15410 # $rindentation_list - reference to a list containing the indentation
15411 # used for each line. (NOTE: the first slot in
15412 # this list is the last returned line number, and this is
15413 # followed by the list of indentations).
15416 # -the indentation of the line which contained token $i_opening
15417 # -and its offset (number of columns) from the start of the line
15419 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
15421 my $nline = $rindentation_list->[0]; # line number of previous lookup
15423 # reset line location if necessary
15424 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
15426 # find the correct line
15427 unless ( $i_opening > $ri_last->[-1] ) {
15428 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
15431 # error - token index is out of bounds - shouldn't happen
15434 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
15436 report_definite_bug();
15437 $nline = $#{$ri_last};
15440 $rindentation_list->[0] =
15441 $nline; # save line number to start looking next call
15442 my $ibeg = $ri_start->[$nline];
15443 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
15444 my $is_leading = ( $ibeg == $i_opening );
15445 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
15449 my %is_if_elsif_else_unless_while_until_for_foreach;
15453 # These block types may have text between the keyword and opening
15454 # curly. Note: 'else' does not, but must be included to allow trailing
15455 # if/elsif text to be appended.
15456 # patch for SWITCH/CASE: added 'case' and 'when'
15457 my @q = qw(if elsif else unless while until for foreach case when);
15458 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
15462 sub set_adjusted_indentation {
15464 # This routine has the final say regarding the actual indentation of
15465 # a line. It starts with the basic indentation which has been
15466 # defined for the leading token, and then takes into account any
15467 # options that the user has set regarding special indenting and
15470 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
15471 $rindentation_list, $level_jump )
15474 # we need to know the last token of this line
15475 my ( $terminal_type, $i_terminal ) =
15476 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
15478 my $is_outdented_line = 0;
15480 my $is_semicolon_terminated = $terminal_type eq ';'
15481 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
15483 # NOTE: A future improvement would be to make it semicolon terminated
15484 # even if it does not have a semicolon but is followed by a closing
15485 # block brace. This would undo ci even for something like the
15486 # following, in which the final paren does not have a semicolon because
15487 # it is a possible weld location:
15489 # if ($BOLD_MATH) {
15491 # $labels, $comment,
15492 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
15497 # MOJO: Set a flag if this lines begins with ')->'
15498 my $leading_paren_arrow = (
15499 $types_to_go[$ibeg] eq '}'
15500 && $tokens_to_go[$ibeg] eq ')'
15502 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
15503 || ( $ibeg < $i_terminal - 1
15504 && $types_to_go[ $ibeg + 1 ] eq 'b'
15505 && $types_to_go[ $ibeg + 2 ] eq '->' )
15509 ##########################################################
15510 # Section 1: set a flag and a default indentation
15512 # Most lines are indented according to the initial token.
15513 # But it is common to outdent to the level just after the
15514 # terminal token in certain cases...
15515 # adjust_indentation flag:
15516 # 0 - do not adjust
15518 # 2 - vertically align with opening token
15520 ##########################################################
15521 my $adjust_indentation = 0;
15522 my $default_adjust_indentation = $adjust_indentation;
15525 $opening_indentation, $opening_offset,
15526 $is_leading, $opening_exists
15529 # if we are at a closing token of some type..
15530 if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
15532 # get the indentation of the line containing the corresponding
15535 $opening_indentation, $opening_offset,
15536 $is_leading, $opening_exists
15538 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
15539 $rindentation_list );
15541 # First set the default behavior:
15544 # default behavior is to outdent closing lines
15545 # of the form: "); }; ]; )->xxx;"
15546 $is_semicolon_terminated
15548 # and 'cuddled parens' of the form: ")->pack("
15549 # Bug fix for RT #123749]: the types here were
15550 # incorrectly '(' and ')'. Corrected to be '{' and '}'
15552 $terminal_type eq '{'
15553 && $types_to_go[$ibeg] eq '}'
15554 && ( $nesting_depth_to_go[$iend] + 1 ==
15555 $nesting_depth_to_go[$ibeg] )
15558 # remove continuation indentation for any line like
15560 # or without ending '{' and unbalanced, such as
15561 # such as '}->{$operator}'
15563 $types_to_go[$ibeg] eq '}'
15565 && ( $types_to_go[$iend] eq '{'
15566 || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
15569 # and when the next line is at a lower indentation level
15570 # PATCH: and only if the style allows undoing continuation
15571 # for all closing token types. We should really wait until
15572 # the indentation of the next line is known and then make
15573 # a decision, but that would require another pass.
15574 || ( $level_jump < 0 && !$some_closing_token_indentation )
15576 # Patch for -wn=2, multiple welded closing tokens
15577 || ( $i_terminal > $ibeg
15578 && $types_to_go[$iend] =~ /^[\)\}\]R]$/ )
15582 $adjust_indentation = 1;
15585 # outdent something like '),'
15587 $terminal_type eq ','
15589 # Removed this constraint for -wn
15590 # OLD: allow just one character before the comma
15591 # && $i_terminal == $ibeg + 1
15593 # require LIST environment; otherwise, we may outdent too much -
15594 # this can happen in calls without parentheses (overload.t);
15595 && $container_environment_to_go[$i_terminal] eq 'LIST'
15598 $adjust_indentation = 1;
15601 # undo continuation indentation of a terminal closing token if
15602 # it is the last token before a level decrease. This will allow
15603 # a closing token to line up with its opening counterpart, and
15604 # avoids a indentation jump larger than 1 level.
15605 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
15606 && $i_terminal == $ibeg )
15608 my $ci = $ci_levels_to_go[$ibeg];
15609 my $lev = $levels_to_go[$ibeg];
15610 my $next_type = $types_to_go[ $ibeg + 1 ];
15611 my $i_next_nonblank =
15612 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
15613 if ( $i_next_nonblank <= $max_index_to_go
15614 && $levels_to_go[$i_next_nonblank] < $lev )
15616 $adjust_indentation = 1;
15619 # Patch for RT #96101, in which closing brace of anonymous subs
15620 # was not outdented. We should look ahead and see if there is
15621 # a level decrease at the next token (i.e., a closing token),
15622 # but right now we do not have that information. For now
15623 # we see if we are in a list, and this works well.
15624 # See test files 'sub*.t' for good test cases.
15625 if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
15626 && $container_environment_to_go[$i_terminal] eq 'LIST'
15627 && !$rOpts->{'indent-closing-brace'} )
15630 $opening_indentation, $opening_offset,
15631 $is_leading, $opening_exists
15633 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
15634 $rindentation_list );
15635 my $indentation = $leading_spaces_to_go[$ibeg];
15636 if ( defined($opening_indentation)
15637 && get_spaces($indentation) >
15638 get_spaces($opening_indentation) )
15640 $adjust_indentation = 1;
15645 # YVES patch 1 of 2:
15646 # Undo ci of line with leading closing eval brace,
15647 # but not beyond the indention of the line with
15648 # the opening brace.
15649 if ( $block_type_to_go[$ibeg] eq 'eval'
15650 && !$rOpts->{'line-up-parentheses'}
15651 && !$rOpts->{'indent-closing-brace'} )
15654 $opening_indentation, $opening_offset,
15655 $is_leading, $opening_exists
15657 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
15658 $rindentation_list );
15659 my $indentation = $leading_spaces_to_go[$ibeg];
15660 if ( defined($opening_indentation)
15661 && get_spaces($indentation) >
15662 get_spaces($opening_indentation) )
15664 $adjust_indentation = 1;
15668 $default_adjust_indentation = $adjust_indentation;
15670 # Now modify default behavior according to user request:
15671 # handle option to indent non-blocks of the form ); }; ];
15672 # But don't do special indentation to something like ')->pack('
15673 if ( !$block_type_to_go[$ibeg] ) {
15674 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
15676 if ( $i_terminal <= $ibeg + 1
15677 || $is_semicolon_terminated )
15679 $adjust_indentation = 2;
15682 $adjust_indentation = 0;
15685 elsif ( $cti == 2 ) {
15686 if ($is_semicolon_terminated) {
15687 $adjust_indentation = 3;
15690 $adjust_indentation = 0;
15693 elsif ( $cti == 3 ) {
15694 $adjust_indentation = 3;
15698 # handle option to indent blocks
15701 $rOpts->{'indent-closing-brace'}
15703 $i_terminal == $ibeg # isolated terminal '}'
15704 || $is_semicolon_terminated
15708 $adjust_indentation = 3;
15713 # if at ');', '};', '>;', and '];' of a terminal qw quote
15714 elsif ($rpatterns->[0] =~ /^qb*;$/
15715 && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
15717 if ( $closing_token_indentation{$1} == 0 ) {
15718 $adjust_indentation = 1;
15721 $adjust_indentation = 3;
15725 # if line begins with a ':', align it with any
15726 # previous line leading with corresponding ?
15727 elsif ( $types_to_go[$ibeg] eq ':' ) {
15729 $opening_indentation, $opening_offset,
15730 $is_leading, $opening_exists
15732 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
15733 $rindentation_list );
15734 if ($is_leading) { $adjust_indentation = 2; }
15737 ##########################################################
15738 # Section 2: set indentation according to flag set above
15740 # Select the indentation object to define leading
15741 # whitespace. If we are outdenting something like '} } );'
15742 # then we want to use one level below the last token
15743 # ($i_terminal) in order to get it to fully outdent through
15745 ##########################################################
15748 my $level_end = $levels_to_go[$iend];
15750 if ( $adjust_indentation == 0 ) {
15751 $indentation = $leading_spaces_to_go[$ibeg];
15752 $lev = $levels_to_go[$ibeg];
15754 elsif ( $adjust_indentation == 1 ) {
15756 # Change the indentation to be that of a different token on the line
15757 # Previously, the indentation of the terminal token was used:
15759 # $indentation = $reduced_spaces_to_go[$i_terminal];
15760 # $lev = $levels_to_go[$i_terminal];
15762 # Generalization for MOJO:
15763 # Use the lowest level indentation of the tokens on the line.
15764 # For example, here we can use the indentation of the ending ';':
15765 # } until ($selection > 0 and $selection < 10); # ok to use ';'
15766 # But this will not outdent if we use the terminal indentation:
15767 # )->then( sub { # use indentation of the ->, not the {
15768 # Warning: reduced_spaces_to_go[] may be a reference, do not
15769 # do numerical checks with it
15772 $indentation = $reduced_spaces_to_go[$i_ind];
15773 $lev = $levels_to_go[$i_ind];
15774 while ( $i_ind < $i_terminal ) {
15776 if ( $levels_to_go[$i_ind] < $lev ) {
15777 $indentation = $reduced_spaces_to_go[$i_ind];
15778 $lev = $levels_to_go[$i_ind];
15783 # handle indented closing token which aligns with opening token
15784 elsif ( $adjust_indentation == 2 ) {
15786 # handle option to align closing token with opening token
15787 $lev = $levels_to_go[$ibeg];
15789 # calculate spaces needed to align with opening token
15791 get_spaces($opening_indentation) + $opening_offset;
15793 # Indent less than the previous line.
15795 # Problem: For -lp we don't exactly know what it was if there
15796 # were recoverable spaces sent to the aligner. A good solution
15797 # would be to force a flush of the vertical alignment buffer, so
15798 # that we would know. For now, this rule is used for -lp:
15800 # When the last line did not start with a closing token we will
15801 # be optimistic that the aligner will recover everything wanted.
15803 # This rule will prevent us from breaking a hierarchy of closing
15804 # tokens, and in a worst case will leave a closing paren too far
15805 # indented, but this is better than frequently leaving it not
15807 my $last_spaces = get_spaces($last_indentation_written);
15808 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
15810 get_recoverable_spaces($last_indentation_written);
15813 # reset the indentation to the new space count if it works
15814 # only options are all or none: nothing in-between looks good
15815 $lev = $levels_to_go[$ibeg];
15816 if ( $space_count < $last_spaces ) {
15817 if ($rOpts_line_up_parentheses) {
15818 my $lev = $levels_to_go[$ibeg];
15820 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
15823 $indentation = $space_count;
15827 # revert to default if it doesn't work
15829 $space_count = leading_spaces_to_go($ibeg);
15830 if ( $default_adjust_indentation == 0 ) {
15831 $indentation = $leading_spaces_to_go[$ibeg];
15833 elsif ( $default_adjust_indentation == 1 ) {
15834 $indentation = $reduced_spaces_to_go[$i_terminal];
15835 $lev = $levels_to_go[$i_terminal];
15840 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
15843 # handle -icb (indented closing code block braces)
15844 # Updated method for indented block braces: indent one full level if
15845 # there is no continuation indentation. This will occur for major
15846 # structures such as sub, if, else, but not for things like map
15849 # Note: only code blocks without continuation indentation are
15850 # handled here (if, else, unless, ..). In the following snippet,
15851 # the terminal brace of the sort block will have continuation
15852 # indentation as shown so it will not be handled by the coding
15853 # here. We would have to undo the continuation indentation to do
15854 # this, but it probably looks ok as is. This is a possible future
15855 # update for semicolon terminated lines.
15857 # if ($sortby eq 'date' or $sortby eq 'size') {
15859 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
15864 if ( $block_type_to_go[$ibeg]
15865 && $ci_levels_to_go[$i_terminal] == 0 )
15867 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
15868 $indentation = $spaces + $rOpts_indent_columns;
15870 # NOTE: for -lp we could create a new indentation object, but
15871 # there is probably no need to do it
15874 # handle -icp and any -icb block braces which fall through above
15875 # test such as the 'sort' block mentioned above.
15878 # There are currently two ways to handle -icp...
15879 # One way is to use the indentation of the previous line:
15880 # $indentation = $last_indentation_written;
15882 # The other way is to use the indentation that the previous line
15883 # would have had if it hadn't been adjusted:
15884 $indentation = $last_unadjusted_indentation;
15886 # Current method: use the minimum of the two. This avoids
15887 # inconsistent indentation.
15888 if ( get_spaces($last_indentation_written) <
15889 get_spaces($indentation) )
15891 $indentation = $last_indentation_written;
15895 # use previous indentation but use own level
15896 # to cause list to be flushed properly
15897 $lev = $levels_to_go[$ibeg];
15900 # remember indentation except for multi-line quotes, which get
15902 unless ( $ibeg == 0 && $starting_in_quote ) {
15903 $last_indentation_written = $indentation;
15904 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
15905 $last_leading_token = $tokens_to_go[$ibeg];
15908 # be sure lines with leading closing tokens are not outdented more
15909 # than the line which contained the corresponding opening token.
15911 #############################################################
15912 # updated per bug report in alex_bug.pl: we must not
15913 # mess with the indentation of closing logical braces so
15914 # we must treat something like '} else {' as if it were
15915 # an isolated brace my $is_isolated_block_brace = (
15916 # $iend == $ibeg ) && $block_type_to_go[$ibeg];
15917 #############################################################
15918 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
15919 && ( $iend == $ibeg
15920 || $is_if_elsif_else_unless_while_until_for_foreach{
15921 $block_type_to_go[$ibeg]
15924 # only do this for a ':; which is aligned with its leading '?'
15925 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
15928 defined($opening_indentation)
15929 && !$leading_paren_arrow # MOJO
15930 && !$is_isolated_block_brace
15931 && !$is_unaligned_colon
15934 if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
15935 $indentation = $opening_indentation;
15939 # remember the indentation of each line of this batch
15940 push @{$rindentation_list}, $indentation;
15942 # outdent lines with certain leading tokens...
15945 # must be first word of this batch
15951 # certain leading keywords if requested
15953 $rOpts->{'outdent-keywords'}
15954 && $types_to_go[$ibeg] eq 'k'
15955 && $outdent_keyword{ $tokens_to_go[$ibeg] }
15958 # or labels if requested
15959 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
15961 # or static block comments if requested
15962 || ( $types_to_go[$ibeg] eq '#'
15963 && $rOpts->{'outdent-static-block-comments'}
15964 && $is_static_block_comment )
15969 my $space_count = leading_spaces_to_go($ibeg);
15970 if ( $space_count > 0 ) {
15971 $space_count -= $rOpts_continuation_indentation;
15972 $is_outdented_line = 1;
15973 if ( $space_count < 0 ) { $space_count = 0 }
15975 # do not promote a spaced static block comment to non-spaced;
15976 # this is not normally necessary but could be for some
15977 # unusual user inputs (such as -ci = -i)
15978 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
15982 if ($rOpts_line_up_parentheses) {
15984 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
15987 $indentation = $space_count;
15992 return ( $indentation, $lev, $level_end, $terminal_type,
15993 $is_semicolon_terminated, $is_outdented_line );
15997 sub set_vertical_tightness_flags {
15999 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
16001 # Define vertical tightness controls for the nth line of a batch.
16002 # We create an array of parameters which tell the vertical aligner
16003 # if we should combine this line with the next line to achieve the
16004 # desired vertical tightness. The array of parameters contains:
16006 # [0] type: 1=opening non-block 2=closing non-block
16007 # 3=opening block brace 4=closing block brace
16009 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
16010 # if closing: spaces of padding to use
16011 # [2] sequence number of container
16012 # [3] valid flag: do not append if this flag is false. Will be
16013 # true if appropriate -vt flag is set. Otherwise, Will be
16014 # made true only for 2 line container in parens with -lp
16016 # These flags are used by sub set_leading_whitespace in
16017 # the vertical aligner
16019 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
16021 #--------------------------------------------------------------
16022 # Vertical Tightness Flags Section 1:
16023 # Handle Lines 1 .. n-1 but not the last line
16024 # For non-BLOCK tokens, we will need to examine the next line
16025 # too, so we won't consider the last line.
16026 #--------------------------------------------------------------
16027 if ( $n < $n_last_line ) {
16029 #--------------------------------------------------------------
16030 # Vertical Tightness Flags Section 1a:
16031 # Look for Type 1, last token of this line is a non-block opening token
16032 #--------------------------------------------------------------
16033 my $ibeg_next = $ri_first->[ $n + 1 ];
16034 my $token_end = $tokens_to_go[$iend];
16035 my $iend_next = $ri_last->[ $n + 1 ];
16037 $type_sequence_to_go[$iend]
16038 && !$block_type_to_go[$iend]
16039 && $is_opening_token{$token_end}
16041 $opening_vertical_tightness{$token_end} > 0
16043 # allow 2-line method call to be closed up
16044 || ( $rOpts_line_up_parentheses
16045 && $token_end eq '('
16047 && $types_to_go[ $iend - 1 ] ne 'b' )
16052 # avoid multiple jumps in nesting depth in one line if
16054 my $ovt = $opening_vertical_tightness{$token_end};
16055 my $iend_next = $ri_last->[ $n + 1 ];
16058 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
16059 $nesting_depth_to_go[$ibeg_next] )
16063 # If -vt flag has not been set, mark this as invalid
16064 # and aligner will validate it if it sees the closing paren
16066 my $valid_flag = $ovt;
16067 @{$rvertical_tightness_flags} =
16068 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
16072 #--------------------------------------------------------------
16073 # Vertical Tightness Flags Section 1b:
16074 # Look for Type 2, first token of next line is a non-block closing
16075 # token .. and be sure this line does not have a side comment
16076 #--------------------------------------------------------------
16077 my $token_next = $tokens_to_go[$ibeg_next];
16078 if ( $type_sequence_to_go[$ibeg_next]
16079 && !$block_type_to_go[$ibeg_next]
16080 && $is_closing_token{$token_next}
16081 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
16083 my $ovt = $opening_vertical_tightness{$token_next};
16084 my $cvt = $closing_vertical_tightness{$token_next};
16087 # never append a trailing line like )->pack(
16088 # because it will throw off later alignment
16090 $nesting_depth_to_go[$ibeg_next] ==
16091 $nesting_depth_to_go[ $iend_next + 1 ] + 1
16096 $container_environment_to_go[$ibeg_next] ne 'LIST'
16100 # allow closing up 2-line method calls
16101 || ( $rOpts_line_up_parentheses
16102 && $token_next eq ')' )
16109 # decide which trailing closing tokens to append..
16111 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
16113 my $str = join( '',
16114 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
16116 # append closing token if followed by comment or ';'
16117 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
16121 my $valid_flag = $cvt;
16122 @{$rvertical_tightness_flags} = (
16124 $tightness{$token_next} == 2 ? 0 : 1,
16125 $type_sequence_to_go[$ibeg_next], $valid_flag,
16131 #--------------------------------------------------------------
16132 # Vertical Tightness Flags Section 1c:
16133 # Implement the Opening Token Right flag (Type 2)..
16134 # If requested, move an isolated trailing opening token to the end of
16135 # the previous line which ended in a comma. We could do this
16136 # in sub recombine_breakpoints but that would cause problems
16137 # with -lp formatting. The problem is that indentation will
16138 # quickly move far to the right in nested expressions. By
16139 # doing it after indentation has been set, we avoid changes
16140 # to the indentation. Actual movement of the token takes place
16141 # in sub valign_output_step_B.
16142 #--------------------------------------------------------------
16144 $opening_token_right{ $tokens_to_go[$ibeg_next] }
16146 # previous line is not opening
16147 # (use -sot to combine with it)
16148 && !$is_opening_token{$token_end}
16150 # previous line ended in one of these
16151 # (add other cases if necessary; '=>' and '.' are not necessary
16152 && !$block_type_to_go[$ibeg_next]
16154 # this is a line with just an opening token
16155 && ( $iend_next == $ibeg_next
16156 || $iend_next == $ibeg_next + 2
16157 && $types_to_go[$iend_next] eq '#' )
16159 # looks bad if we align vertically with the wrong container
16160 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
16163 my $valid_flag = 1;
16164 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
16165 @{$rvertical_tightness_flags} =
16166 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
16169 #--------------------------------------------------------------
16170 # Vertical Tightness Flags Section 1d:
16171 # Stacking of opening and closing tokens (Type 2)
16172 #--------------------------------------------------------------
16174 my $token_beg_next = $tokens_to_go[$ibeg_next];
16176 # patch to make something like 'qw(' behave like an opening paren
16178 if ( $types_to_go[$ibeg_next] eq 'q' ) {
16179 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
16180 $token_beg_next = $1;
16184 if ( $is_closing_token{$token_end}
16185 && $is_closing_token{$token_beg_next} )
16187 $stackable = $stack_closing_token{$token_beg_next}
16188 unless ( $block_type_to_go[$ibeg_next] )
16189 ; # shouldn't happen; just checking
16191 elsif ($is_opening_token{$token_end}
16192 && $is_opening_token{$token_beg_next} )
16194 $stackable = $stack_opening_token{$token_beg_next}
16195 unless ( $block_type_to_go[$ibeg_next] )
16196 ; # shouldn't happen; just checking
16201 my $is_semicolon_terminated;
16202 if ( $n + 1 == $n_last_line ) {
16203 my ( $terminal_type, $i_terminal ) = terminal_type(
16204 \@types_to_go, \@block_type_to_go,
16205 $ibeg_next, $iend_next
16207 $is_semicolon_terminated = $terminal_type eq ';'
16208 && $nesting_depth_to_go[$iend_next] <
16209 $nesting_depth_to_go[$ibeg_next];
16212 # this must be a line with just an opening token
16213 # or end in a semicolon
16215 $is_semicolon_terminated
16216 || ( $iend_next == $ibeg_next
16217 || $iend_next == $ibeg_next + 2
16218 && $types_to_go[$iend_next] eq '#' )
16221 my $valid_flag = 1;
16222 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
16223 @{$rvertical_tightness_flags} =
16224 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
16230 #--------------------------------------------------------------
16231 # Vertical Tightness Flags Section 2:
16232 # Handle type 3, opening block braces on last line of the batch
16233 # Check for a last line with isolated opening BLOCK curly
16234 #--------------------------------------------------------------
16235 elsif ($rOpts_block_brace_vertical_tightness
16237 && $types_to_go[$iend] eq '{'
16238 && $block_type_to_go[$iend] =~
16239 /$block_brace_vertical_tightness_pattern/o )
16241 @{$rvertical_tightness_flags} =
16242 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
16245 #--------------------------------------------------------------
16246 # Vertical Tightness Flags Section 3:
16247 # Handle type 4, a closing block brace on the last line of the batch Check
16248 # for a last line with isolated closing BLOCK curly
16249 #--------------------------------------------------------------
16250 elsif ($rOpts_stack_closing_block_brace
16252 && $block_type_to_go[$iend]
16253 && $types_to_go[$iend] eq '}' )
16255 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
16256 @{$rvertical_tightness_flags} =
16257 ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
16260 # pack in the sequence numbers of the ends of this line
16261 $rvertical_tightness_flags->[4] = get_seqno($ibeg);
16262 $rvertical_tightness_flags->[5] = get_seqno($iend);
16263 return $rvertical_tightness_flags;
16268 # get opening and closing sequence numbers of a token for the vertical
16269 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
16270 # to be treated somewhat like opening and closing tokens for stacking
16271 # tokens by the vertical aligner.
16273 my $seqno = $type_sequence_to_go[$ii];
16274 if ( $types_to_go[$ii] eq 'q' ) {
16277 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
16280 if ( !$ending_in_quote ) {
16281 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
16289 my %is_vertical_alignment_type;
16290 my %is_vertical_alignment_keyword;
16291 my %is_terminal_alignment_type;
16297 # Removed =~ from list to improve chances of alignment
16298 # Removed // from list to improve chances of alignment (RT# 119588)
16300 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
16301 { ? : => && || ~~ !~~
16303 @is_vertical_alignment_type{@q} = (1) x scalar(@q);
16305 # only align these at end of line
16307 @is_terminal_alignment_type{@q} = (1) x scalar(@q);
16309 # eq and ne were removed from this list to improve alignment chances
16310 @q = qw(if unless and or err for foreach while until);
16311 @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
16314 sub set_vertical_alignment_markers {
16316 # This routine takes the first step toward vertical alignment of the
16317 # lines of output text. It looks for certain tokens which can serve as
16318 # vertical alignment markers (such as an '=').
16320 # Method: We look at each token $i in this output batch and set
16321 # $matching_token_to_go[$i] equal to those tokens at which we would
16322 # accept vertical alignment.
16324 my ( $ri_first, $ri_last ) = @_;
16326 # nothing to do if we aren't allowed to change whitespace
16327 if ( !$rOpts_add_whitespace ) {
16328 for my $i ( 0 .. $max_index_to_go ) {
16329 $matching_token_to_go[$i] = '';
16334 # remember the index of last nonblank token before any sidecomment
16335 my $i_terminal = $max_index_to_go;
16336 if ( $types_to_go[$i_terminal] eq '#' ) {
16337 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
16338 if ( $i_terminal > 0 ) { --$i_terminal }
16342 # look at each line of this batch..
16343 my $last_vertical_alignment_before_index;
16344 my $vert_last_nonblank_type;
16345 my $vert_last_nonblank_token;
16346 my $vert_last_nonblank_block_type;
16347 my $max_line = @{$ri_first} - 1;
16349 foreach my $line ( 0 .. $max_line ) {
16350 my $ibeg = $ri_first->[$line];
16351 my $iend = $ri_last->[$line];
16352 $last_vertical_alignment_before_index = -1;
16353 $vert_last_nonblank_type = '';
16354 $vert_last_nonblank_token = '';
16355 $vert_last_nonblank_block_type = '';
16357 # look at each token in this output line..
16358 foreach my $i ( $ibeg .. $iend ) {
16359 my $alignment_type = '';
16360 my $type = $types_to_go[$i];
16361 my $block_type = $block_type_to_go[$i];
16362 my $token = $tokens_to_go[$i];
16364 # check for flag indicating that we should not align
16366 if ( $matching_token_to_go[$i] ) {
16367 $matching_token_to_go[$i] = '';
16371 #--------------------------------------------------------
16372 # First see if we want to align BEFORE this token
16373 #--------------------------------------------------------
16375 # The first possible token that we can align before
16376 # is index 2 because: 1) it doesn't normally make sense to
16377 # align before the first token and 2) the second
16378 # token must be a blank if we are to align before
16380 if ( $i < $ibeg + 2 ) { }
16382 # must follow a blank token
16383 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
16385 # align a side comment --
16386 elsif ( $type eq '#' ) {
16390 # it is a static side comment
16392 $rOpts->{'static-side-comments'}
16393 && $token =~ /$static_side_comment_pattern/o
16396 # or a closing side comment
16397 || ( $vert_last_nonblank_block_type
16399 /$closing_side_comment_prefix_pattern/o )
16402 $alignment_type = $type;
16403 } ## Example of a static side comment
16406 # otherwise, do not align two in a row to create a
16408 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
16410 # align before one of these keywords
16411 # (within a line, since $i>1)
16412 elsif ( $type eq 'k' ) {
16414 # /^(if|unless|and|or|eq|ne)$/
16415 if ( $is_vertical_alignment_keyword{$token} ) {
16416 $alignment_type = $token;
16420 # align before one of these types..
16421 # Note: add '.' after new vertical aligner is operational
16422 elsif ( $is_vertical_alignment_type{$type} ) {
16423 $alignment_type = $token;
16425 # Do not align a terminal token. Although it might
16426 # occasionally look ok to do this, this has been found to be
16427 # a good general rule. The main problems are:
16428 # (1) that the terminal token (such as an = or :) might get
16429 # moved far to the right where it is hard to see because
16430 # nothing follows it, and
16431 # (2) doing so may prevent other good alignments.
16432 # Current exceptions are && and ||
16433 if ( $i == $iend || $i >= $i_terminal ) {
16434 $alignment_type = ""
16435 unless ( $is_terminal_alignment_type{$type} );
16438 # Do not align leading ': (' or '. ('. This would prevent
16439 # alignment in something like the following:
16441 # ( $input_line_number < 10 ) ? " "
16442 # : ( $input_line_number < 100 ) ? " "
16446 # ( $case_matters ? $accessor : " lc($accessor) " )
16447 # . ( $yesno ? " eq " : " ne " )
16448 if ( $i == $ibeg + 2
16449 && $types_to_go[$ibeg] =~ /^[\.\:]$/
16450 && $types_to_go[ $i - 1 ] eq 'b' )
16452 $alignment_type = "";
16455 # For a paren after keyword, only align something like this:
16457 # elsif ( $b ) { &b }
16458 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
16459 $alignment_type = ""
16460 unless $vert_last_nonblank_token =~
16461 /^(if|unless|elsif)$/;
16464 # be sure the alignment tokens are unique
16465 # This didn't work well: reason not determined
16466 # if ($token ne $type) {$alignment_type .= $type}
16469 # NOTE: This is deactivated because it causes the previous
16470 # if/elsif alignment to fail
16471 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
16472 #{ $alignment_type = $type; }
16474 if ($alignment_type) {
16475 $last_vertical_alignment_before_index = $i;
16478 #--------------------------------------------------------
16479 # Next see if we want to align AFTER the previous nonblank
16480 #--------------------------------------------------------
16482 # We want to line up ',' and interior ';' tokens, with the added
16483 # space AFTER these tokens. (Note: interior ';' is included
16484 # because it may occur in short blocks).
16487 # we haven't already set it
16490 # and its not the first token of the line
16493 # and it follows a blank
16494 && $types_to_go[ $i - 1 ] eq 'b'
16496 # and previous token IS one of these:
16497 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
16499 # and it's NOT one of these
16500 && ( $type !~ /^[b\#\)\]\}]$/ )
16502 # then go ahead and align
16506 $alignment_type = $vert_last_nonblank_type;
16509 #--------------------------------------------------------
16510 # then store the value
16511 #--------------------------------------------------------
16512 $matching_token_to_go[$i] = $alignment_type;
16513 if ( $type ne 'b' ) {
16514 $vert_last_nonblank_type = $type;
16515 $vert_last_nonblank_token = $token;
16516 $vert_last_nonblank_block_type = $block_type;
16524 sub terminal_type {
16526 # returns type of last token on this line (terminal token), as follows:
16527 # returns # for a full-line comment
16528 # returns ' ' for a blank line
16529 # otherwise returns final token type
16531 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
16533 # check for full-line comment..
16534 if ( $rtype->[$ibeg] eq '#' ) {
16535 return wantarray ? ( $rtype->[$ibeg], $ibeg ) : $rtype->[$ibeg];
16539 # start at end and walk backwards..
16540 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
16542 # skip past any side comment and blanks
16543 next if ( $rtype->[$i] eq 'b' );
16544 next if ( $rtype->[$i] eq '#' );
16546 # found it..make sure it is a BLOCK termination,
16547 # but hide a terminal } after sort/grep/map because it is not
16548 # necessarily the end of the line. (terminal.t)
16549 my $terminal_type = $rtype->[$i];
16551 $terminal_type eq '}'
16552 && ( !$rblock_type->[$i]
16553 || ( $is_sort_map_grep_eval_do{ $rblock_type->[$i] } ) )
16556 $terminal_type = 'b';
16558 return wantarray ? ( $terminal_type, $i ) : $terminal_type;
16562 return wantarray ? ( ' ', $ibeg ) : ' ';
16566 { # set_bond_strengths
16568 my %is_good_keyword_breakpoint;
16569 my %is_lt_gt_le_ge;
16571 my %binary_bond_strength;
16578 sub bias_table_key {
16579 my ( $type, $token ) = @_;
16580 my $bias_table_key = $type;
16581 if ( $type eq 'k' ) {
16582 $bias_table_key = $token;
16583 if ( $token eq 'err' ) { $bias_table_key = 'or' }
16585 return $bias_table_key;
16588 sub set_bond_strengths {
16593 @q = qw(if unless while until for foreach);
16594 @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
16596 @q = qw(lt gt le ge);
16597 @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
16599 # The decision about where to break a line depends upon a "bond
16600 # strength" between tokens. The LOWER the bond strength, the MORE
16601 # likely a break. A bond strength may be any value but to simplify
16602 # things there are several pre-defined strength levels:
16604 # NO_BREAK => 10000;
16605 # VERY_STRONG => 100;
16609 # VERY_WEAK => 0.55;
16611 # The strength values are based on trial-and-error, and need to be
16612 # tweaked occasionally to get desired results. Some comments:
16614 # 1. Only relative strengths are important. small differences
16615 # in strengths can make big formatting differences.
16616 # 2. Each indentation level adds one unit of bond strength.
16617 # 3. A value of NO_BREAK makes an unbreakable bond
16618 # 4. A value of VERY_WEAK is the strength of a ','
16619 # 5. Values below NOMINAL are considered ok break points.
16620 # 6. Values above NOMINAL are considered poor break points.
16622 # The bond strengths should roughly follow precedence order where
16623 # possible. If you make changes, please check the results very
16624 # carefully on a variety of scripts. Testing with the -extrude
16625 # options is particularly helpful in exercising all of the rules.
16627 # Wherever possible, bond strengths are defined in the following
16628 # tables. There are two main stages to setting bond strengths and
16629 # two types of tables:
16631 # The first stage involves looking at each token individually and
16632 # defining left and right bond strengths, according to if we want
16633 # to break to the left or right side, and how good a break point it
16634 # is. For example tokens like =, ||, && make good break points and
16635 # will have low strengths, but one might want to break on either
16636 # side to put them at the end of one line or beginning of the next.
16638 # The second stage involves looking at certain pairs of tokens and
16639 # defining a bond strength for that particular pair. This second
16640 # stage has priority.
16642 #---------------------------------------------------------------
16643 # Bond Strength BEGIN Section 1.
16644 # Set left and right bond strengths of individual tokens.
16645 #---------------------------------------------------------------
16647 # NOTE: NO_BREAK's set in this section first are HINTS which will
16648 # probably not be honored. Essential NO_BREAKS's should be set in
16649 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
16650 # of this subroutine.
16652 # Note that we are setting defaults in this section. The user
16653 # cannot change bond strengths but can cause the left and right
16654 # bond strengths of any token type to be swapped through the use of
16655 # the -wba and -wbb flags. In this way the user can determine if a
16656 # breakpoint token should appear at the end of one line or the
16657 # beginning of the next line.
16659 # The hash keys in this section are token types, plus the text of
16660 # certain keywords like 'or', 'and'.
16662 # no break around possible filehandle
16663 $left_bond_strength{'Z'} = NO_BREAK;
16664 $right_bond_strength{'Z'} = NO_BREAK;
16666 # never put a bare word on a new line:
16667 # example print (STDERR, "bla"); will fail with break after (
16668 $left_bond_strength{'w'} = NO_BREAK;
16670 # blanks always have infinite strength to force breaks after
16672 $right_bond_strength{'b'} = NO_BREAK;
16674 # try not to break on exponentation
16675 @q = qw" ** .. ... <=> ";
16676 @left_bond_strength{@q} = (STRONG) x scalar(@q);
16677 @right_bond_strength{@q} = (STRONG) x scalar(@q);
16679 # The comma-arrow has very low precedence but not a good break point
16680 $left_bond_strength{'=>'} = NO_BREAK;
16681 $right_bond_strength{'=>'} = NOMINAL;
16683 # ok to break after label
16684 $left_bond_strength{'J'} = NO_BREAK;
16685 $right_bond_strength{'J'} = NOMINAL;
16686 $left_bond_strength{'j'} = STRONG;
16687 $right_bond_strength{'j'} = STRONG;
16688 $left_bond_strength{'A'} = STRONG;
16689 $right_bond_strength{'A'} = STRONG;
16691 $left_bond_strength{'->'} = STRONG;
16692 $right_bond_strength{'->'} = VERY_STRONG;
16694 $left_bond_strength{'CORE::'} = NOMINAL;
16695 $right_bond_strength{'CORE::'} = NO_BREAK;
16697 # breaking AFTER modulus operator is ok:
16699 @left_bond_strength{@q} = (STRONG) x scalar(@q);
16700 @right_bond_strength{@q} =
16701 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
16703 # Break AFTER math operators * and /
16705 @left_bond_strength{@q} = (STRONG) x scalar(@q);
16706 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
16708 # Break AFTER weakest math operators + and -
16709 # Make them weaker than * but a bit stronger than '.'
16711 @left_bond_strength{@q} = (STRONG) x scalar(@q);
16712 @right_bond_strength{@q} =
16713 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
16715 # breaking BEFORE these is just ok:
16717 @right_bond_strength{@q} = (STRONG) x scalar(@q);
16718 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
16720 # breaking before the string concatenation operator seems best
16721 # because it can be hard to see at the end of a line
16722 $right_bond_strength{'.'} = STRONG;
16723 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
16726 @left_bond_strength{@q} = (STRONG) x scalar(@q);
16727 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
16729 # make these a little weaker than nominal so that they get
16730 # favored for end-of-line characters
16731 @q = qw"!= == =~ !~ ~~ !~~";
16732 @left_bond_strength{@q} = (STRONG) x scalar(@q);
16733 @right_bond_strength{@q} =
16734 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
16736 # break AFTER these
16737 @q = qw" < > | & >= <=";
16738 @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
16739 @right_bond_strength{@q} =
16740 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
16742 # breaking either before or after a quote is ok
16743 # but bias for breaking before a quote
16744 $left_bond_strength{'Q'} = NOMINAL;
16745 $right_bond_strength{'Q'} = NOMINAL + 0.02;
16746 $left_bond_strength{'q'} = NOMINAL;
16747 $right_bond_strength{'q'} = NOMINAL;
16749 # starting a line with a keyword is usually ok
16750 $left_bond_strength{'k'} = NOMINAL;
16752 # we usually want to bond a keyword strongly to what immediately
16753 # follows, rather than leaving it stranded at the end of a line
16754 $right_bond_strength{'k'} = STRONG;
16756 $left_bond_strength{'G'} = NOMINAL;
16757 $right_bond_strength{'G'} = STRONG;
16759 # assignment operators
16761 = **= += *= &= <<= &&=
16762 -= /= |= >>= ||= //=
16767 # Default is to break AFTER various assignment operators
16768 @left_bond_strength{@q} = (STRONG) x scalar(@q);
16769 @right_bond_strength{@q} =
16770 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
16772 # Default is to break BEFORE '&&' and '||' and '//'
16773 # set strength of '||' to same as '=' so that chains like
16774 # $a = $b || $c || $d will break before the first '||'
16775 $right_bond_strength{'||'} = NOMINAL;
16776 $left_bond_strength{'||'} = $right_bond_strength{'='};
16778 # same thing for '//'
16779 $right_bond_strength{'//'} = NOMINAL;
16780 $left_bond_strength{'//'} = $right_bond_strength{'='};
16782 # set strength of && a little higher than ||
16783 $right_bond_strength{'&&'} = NOMINAL;
16784 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
16786 $left_bond_strength{';'} = VERY_STRONG;
16787 $right_bond_strength{';'} = VERY_WEAK;
16788 $left_bond_strength{'f'} = VERY_STRONG;
16790 # make right strength of for ';' a little less than '='
16791 # to make for contents break after the ';' to avoid this:
16792 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
16793 # $number_of_fields )
16794 # and make it weaker than ',' and 'and' too
16795 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
16797 # The strengths of ?/: should be somewhere between
16798 # an '=' and a quote (NOMINAL),
16799 # make strength of ':' slightly less than '?' to help
16800 # break long chains of ? : after the colons
16801 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
16802 $right_bond_strength{':'} = NO_BREAK;
16803 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
16804 $right_bond_strength{'?'} = NO_BREAK;
16806 $left_bond_strength{','} = VERY_STRONG;
16807 $right_bond_strength{','} = VERY_WEAK;
16809 # remaining digraphs and trigraphs not defined above
16810 @q = qw( :: <> ++ --);
16811 @left_bond_strength{@q} = (WEAK) x scalar(@q);
16812 @right_bond_strength{@q} = (STRONG) x scalar(@q);
16814 # Set bond strengths of certain keywords
16815 # make 'or', 'err', 'and' slightly weaker than a ','
16816 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
16817 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
16818 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
16819 $left_bond_strength{'xor'} = NOMINAL;
16820 $right_bond_strength{'and'} = NOMINAL;
16821 $right_bond_strength{'or'} = NOMINAL;
16822 $right_bond_strength{'err'} = NOMINAL;
16823 $right_bond_strength{'xor'} = STRONG;
16825 #---------------------------------------------------------------
16826 # Bond Strength BEGIN Section 2.
16827 # Set binary rules for bond strengths between certain token types.
16828 #---------------------------------------------------------------
16830 # We have a little problem making tables which apply to the
16831 # container tokens. Here is a list of container tokens and
16834 # type tokens // meaning
16835 # { {, [, ( // indent
16836 # } }, ], ) // outdent
16837 # [ [ // left non-structural [ (enclosing an array index)
16838 # ] ] // right non-structural square bracket
16839 # ( ( // left non-structural paren
16840 # ) ) // right non-structural paren
16841 # L { // left non-structural curly brace (enclosing a key)
16842 # R } // right non-structural curly brace
16844 # Some rules apply to token types and some to just the token
16845 # itself. We solve the problem by combining type and token into a
16846 # new hash key for the container types.
16848 # If a rule applies to a token 'type' then we need to make rules
16849 # for each of these 'type.token' combinations:
16860 # If a rule applies to a token then we need to make rules for
16861 # these 'type.token' combinations:
16870 # allow long lines before final { in an if statement, as in:
16875 # Otherwise, the line before the { tends to be too short.
16877 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
16878 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
16880 # break on something like '} (', but keep this stronger than a ','
16881 # example is in 'howe.pl'
16882 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
16883 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
16885 # keep matrix and hash indices together
16886 # but make them a little below STRONG to allow breaking open
16887 # something like {'some-word'}{'some-very-long-word'} at the }{
16889 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
16890 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
16891 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
16892 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
16894 # increase strength to the point where a break in the following
16895 # will be after the opening paren rather than at the arrow:
16897 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
16899 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16900 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16901 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16902 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16903 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16904 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16906 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
16907 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
16908 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
16909 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
16911 #---------------------------------------------------------------
16912 # Binary NO_BREAK rules
16913 #---------------------------------------------------------------
16915 # use strict requires that bare word and => not be separated
16916 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
16917 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
16919 # Never break between a bareword and a following paren because
16920 # perl may give an error. For example, if a break is placed
16921 # between 'to_filehandle' and its '(' the following line will
16922 # give a syntax error [Carp.pm]: my( $no) =fileno(
16923 # to_filehandle( $in)) ;
16924 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
16925 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
16926 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
16927 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
16929 # use strict requires that bare word within braces not start new
16931 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
16933 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
16935 # use strict requires that bare word and => not be separated
16936 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
16938 # use strict does not allow separating type info from trailing { }
16939 # testfile is readmail.pl
16940 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
16941 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
16943 # As a defensive measure, do not break between a '(' and a
16944 # filehandle. In some cases, this can cause an error. For
16945 # example, the following program works:
16952 # But this program fails:
16960 # This is normally only a problem with the 'extrude' option
16961 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
16962 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
16964 # never break between sub name and opening paren
16965 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
16966 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
16968 # keep '}' together with ';'
16969 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
16971 # Breaking before a ++ can cause perl to guess wrong. For
16972 # example the following line will cause a syntax error
16973 # with -extrude if we break between '$i' and '++' [fixstyle2]
16974 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
16975 $nobreak_lhs{'++'} = NO_BREAK;
16977 # Do not break before a possible file handle
16978 $nobreak_lhs{'Z'} = NO_BREAK;
16980 # use strict hates bare words on any new line. For
16981 # example, a break before the underscore here provokes the
16982 # wrath of use strict:
16983 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
16984 $nobreak_rhs{'F'} = NO_BREAK;
16985 $nobreak_rhs{'CORE::'} = NO_BREAK;
16987 #---------------------------------------------------------------
16988 # Bond Strength BEGIN Section 3.
16989 # Define tables and values for applying a small bias to the above
16991 #---------------------------------------------------------------
16992 # Adding a small 'bias' to strengths is a simple way to make a line
16993 # break at the first of a sequence of identical terms. For
16994 # example, to force long string of conditional operators to break
16995 # with each line ending in a ':', we can add a small number to the
16996 # bond strength of each ':' (colon.t)
16997 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
16998 $delta_bias = 0.0001; # a very small strength level
17002 # patch-its always ok to break at end of line
17003 $nobreak_to_go[$max_index_to_go] = 0;
17005 # we start a new set of bias values for each line
17007 @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
17008 my $code_bias = -.01; # bias for closing block braces
17013 my $last_nonblank_type = $type;
17014 my $last_nonblank_token = $token;
17015 my $list_str = $left_bond_strength{'?'};
17017 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
17018 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
17021 # main loop to compute bond strengths between each pair of tokens
17022 foreach my $i ( 0 .. $max_index_to_go ) {
17023 $last_type = $type;
17024 if ( $type ne 'b' ) {
17025 $last_nonblank_type = $type;
17026 $last_nonblank_token = $token;
17028 $type = $types_to_go[$i];
17030 # strength on both sides of a blank is the same
17031 if ( $type eq 'b' && $last_type ne 'b' ) {
17032 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
17036 $token = $tokens_to_go[$i];
17037 $block_type = $block_type_to_go[$i];
17039 $next_type = $types_to_go[$i_next];
17040 $next_token = $tokens_to_go[$i_next];
17041 $total_nesting_depth = $nesting_depth_to_go[$i_next];
17042 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
17043 $next_nonblank_type = $types_to_go[$i_next_nonblank];
17044 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17046 # We are computing the strength of the bond between the current
17047 # token and the NEXT token.
17049 #---------------------------------------------------------------
17050 # Bond Strength Section 1:
17051 # First Approximation.
17052 # Use minimum of individual left and right tabulated bond
17054 #---------------------------------------------------------------
17055 my $bsr = $right_bond_strength{$type};
17056 my $bsl = $left_bond_strength{$next_nonblank_type};
17058 # define right bond strengths of certain keywords
17059 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
17060 $bsr = $right_bond_strength{$token};
17062 elsif ( $token eq 'ne' or $token eq 'eq' ) {
17066 # set terminal bond strength to the nominal value
17067 # this will cause good preceding breaks to be retained
17068 if ( $i_next_nonblank > $max_index_to_go ) {
17072 # define right bond strengths of certain keywords
17073 if ( $next_nonblank_type eq 'k'
17074 && defined( $left_bond_strength{$next_nonblank_token} ) )
17076 $bsl = $left_bond_strength{$next_nonblank_token};
17078 elsif ($next_nonblank_token eq 'ne'
17079 or $next_nonblank_token eq 'eq' )
17083 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
17084 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
17087 # Use the minimum of the left and right strengths. Note: it might
17088 # seem that we would want to keep a NO_BREAK if either token has
17089 # this value. This didn't work, for example because in an arrow
17090 # list, it prevents the comma from separating from the following
17091 # bare word (which is probably quoted by its arrow). So necessary
17092 # NO_BREAK's have to be handled as special cases in the final
17094 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
17095 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
17096 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
17097 my $bond_str_1 = $bond_str;
17099 #---------------------------------------------------------------
17100 # Bond Strength Section 2:
17101 # Apply hardwired rules..
17102 #---------------------------------------------------------------
17104 # Patch to put terminal or clauses on a new line: Weaken the bond
17105 # at an || followed by die or similar keyword to make the terminal
17106 # or clause fall on a new line, like this:
17108 # my $class = shift
17109 # || die "Cannot add broadcast: No class identifier found";
17111 # Otherwise the break will be at the previous '=' since the || and
17112 # = have the same starting strength and the or is biased, like
17116 # shift || die "Cannot add broadcast: No class identifier found";
17118 # In any case if the user places a break at either the = or the ||
17119 # it should remain there.
17120 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
17121 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
17122 if ( $want_break_before{$token} && $i > 0 ) {
17123 $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
17126 $bond_str -= $delta_bias;
17131 # good to break after end of code blocks
17132 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
17134 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
17135 $code_bias += $delta_bias;
17138 if ( $type eq 'k' ) {
17140 # allow certain control keywords to stand out
17141 if ( $next_nonblank_type eq 'k'
17142 && $is_last_next_redo_return{$token} )
17144 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
17147 # Don't break after keyword my. This is a quick fix for a
17148 # rare problem with perl. An example is this line from file
17151 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
17152 # $this->{'question'} ) )
17154 if ( $token eq 'my' ) {
17155 $bond_str = NO_BREAK;
17160 # good to break before 'if', 'unless', etc
17161 if ( $is_if_brace_follower{$next_nonblank_token} ) {
17162 $bond_str = VERY_WEAK;
17165 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
17167 # FIXME: needs more testing
17168 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
17169 $bond_str = $list_str if ( $bond_str > $list_str );
17172 # keywords like 'unless', 'if', etc, within statements
17174 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
17175 $bond_str = VERY_WEAK / 1.05;
17179 # try not to break before a comma-arrow
17180 elsif ( $next_nonblank_type eq '=>' ) {
17181 if ( $bond_str < STRONG ) { $bond_str = STRONG }
17184 #---------------------------------------------------------------
17185 # Additional hardwired NOBREAK rules
17186 #---------------------------------------------------------------
17188 # map1.t -- correct for a quirk in perl
17190 && $next_nonblank_type eq 'i'
17191 && $last_nonblank_type eq 'k'
17192 && $is_sort_map_grep{$last_nonblank_token} )
17194 # /^(sort|map|grep)$/ )
17196 $bond_str = NO_BREAK;
17199 # extrude.t: do not break before paren at:
17201 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
17202 $bond_str = NO_BREAK;
17205 # in older version of perl, use strict can cause problems with
17206 # breaks before bare words following opening parens. For example,
17207 # this will fail under older versions if a break is made between
17208 # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
17209 # command"); close MAIL;
17210 if ( $type eq '{' ) {
17212 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
17214 # but it's fine to break if the word is followed by a '=>'
17215 # or if it is obviously a sub call
17216 my $i_next_next_nonblank = $i_next_nonblank + 1;
17217 my $next_next_type = $types_to_go[$i_next_next_nonblank];
17218 if ( $next_next_type eq 'b'
17219 && $i_next_nonblank < $max_index_to_go )
17221 $i_next_next_nonblank++;
17222 $next_next_type = $types_to_go[$i_next_next_nonblank];
17225 # We'll check for an old breakpoint and keep a leading
17226 # bareword if it was that way in the input file.
17227 # Presumably it was ok that way. For example, the
17228 # following would remain unchanged:
17231 # January, February, March, April,
17232 # May, June, July, August,
17233 # September, October, November, December,
17236 # This should be sufficient:
17238 !$old_breakpoint_to_go[$i]
17239 && ( $next_next_type eq ','
17240 || $next_next_type eq '}' )
17243 $bond_str = NO_BREAK;
17248 # Do not break between a possible filehandle and a ? or / and do
17249 # not introduce a break after it if there is no blank
17251 elsif ( $type eq 'Z' ) {
17256 # if there is no blank and we do not want one. Examples:
17257 # print $x++ # do not break after $x
17258 # print HTML"HELLO" # break ok after HTML
17261 && defined( $want_left_space{$next_type} )
17262 && $want_left_space{$next_type} == WS_NO
17265 # or we might be followed by the start of a quote
17266 || $next_nonblank_type =~ /^[\/\?]$/
17269 $bond_str = NO_BREAK;
17273 # Breaking before a ? before a quote can cause trouble if
17274 # they are not separated by a blank.
17275 # Example: a syntax error occurs if you break before the ? here
17276 # my$logic=join$all?' && ':' || ',@regexps;
17277 # From: Professional_Perl_Programming_Code/multifind.pl
17278 if ( $next_nonblank_type eq '?' ) {
17279 $bond_str = NO_BREAK
17280 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
17283 # Breaking before a . followed by a number
17284 # can cause trouble if there is no intervening space
17285 # Example: a syntax error occurs if you break before the .2 here
17286 # $str .= pack($endian.2, ensurrogate($ord));
17287 # From: perl58/Unicode.pm
17288 elsif ( $next_nonblank_type eq '.' ) {
17289 $bond_str = NO_BREAK
17290 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
17293 # patch to put cuddled elses back together when on multiple
17294 # lines, as in: } \n else \n { \n
17295 if ($rOpts_cuddled_else) {
17297 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
17298 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
17300 $bond_str = NO_BREAK;
17303 my $bond_str_2 = $bond_str;
17305 #---------------------------------------------------------------
17306 # End of hardwired rules
17307 #---------------------------------------------------------------
17309 #---------------------------------------------------------------
17310 # Bond Strength Section 3:
17311 # Apply table rules. These have priority over the above
17313 #---------------------------------------------------------------
17315 my $tabulated_bond_str;
17317 my $rtype = $next_nonblank_type;
17318 if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
17319 if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
17320 $rtype = $next_nonblank_type . $next_nonblank_token;
17323 if ( $binary_bond_strength{$ltype}{$rtype} ) {
17324 $bond_str = $binary_bond_strength{$ltype}{$rtype};
17325 $tabulated_bond_str = $bond_str;
17328 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
17329 $bond_str = NO_BREAK;
17330 $tabulated_bond_str = $bond_str;
17332 my $bond_str_3 = $bond_str;
17334 # If the hardwired rules conflict with the tabulated bond
17335 # strength then there is an inconsistency that should be fixed
17336 FORMATTER_DEBUG_FLAG_BOND_TABLES
17337 && $tabulated_bond_str
17339 && $bond_str_1 != $bond_str_2
17340 && $bond_str_2 != $tabulated_bond_str
17343 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
17346 #-----------------------------------------------------------------
17347 # Bond Strength Section 4:
17348 # Modify strengths of certain tokens which often occur in sequence
17349 # by adding a small bias to each one in turn so that the breaks
17350 # occur from left to right.
17352 # Note that we only changing strengths by small amounts here,
17353 # and usually increasing, so we should not be altering any NO_BREAKs.
17354 # Other routines which check for NO_BREAKs will use a tolerance
17355 # of one to avoid any problem.
17356 #-----------------------------------------------------------------
17358 # The bias tables use special keys
17359 my $left_key = bias_table_key( $type, $token );
17361 bias_table_key( $next_nonblank_type, $next_nonblank_token );
17363 # add any bias set by sub scan_list at old comma break points.
17364 if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
17367 elsif ( defined( $bias{$left_key} ) ) {
17368 if ( !$want_break_before{$left_key} ) {
17369 $bias{$left_key} += $delta_bias;
17370 $bond_str += $bias{$left_key};
17375 if ( defined( $bias{$right_key} ) ) {
17376 if ( $want_break_before{$right_key} ) {
17378 # for leading '.' align all but 'short' quotes; the idea
17379 # is to not place something like "\n" on a single line.
17380 if ( $right_key eq '.' ) {
17382 $last_nonblank_type eq '.'
17385 $rOpts_short_concatenation_item_length )
17386 && ( !$is_closing_token{$token} )
17389 $bias{$right_key} += $delta_bias;
17393 $bias{$right_key} += $delta_bias;
17395 $bond_str += $bias{$right_key};
17398 my $bond_str_4 = $bond_str;
17400 #---------------------------------------------------------------
17401 # Bond Strength Section 5:
17402 # Fifth Approximation.
17403 # Take nesting depth into account by adding the nesting depth
17404 # to the bond strength.
17405 #---------------------------------------------------------------
17408 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
17409 if ( $total_nesting_depth > 0 ) {
17410 $strength = $bond_str + $total_nesting_depth;
17413 $strength = $bond_str;
17417 $strength = NO_BREAK;
17420 #---------------------------------------------------------------
17421 # Bond Strength Section 6:
17422 # Sixth Approximation. Welds.
17423 #---------------------------------------------------------------
17425 # Do not allow a break within welds,
17426 if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK }
17428 # But encourage breaking after opening welded tokens
17429 elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) {
17433 ## # TESTING: weaken before first weld closing token
17434 ## # This did not help
17435 ## elsif ($i_next_nonblank <= $max_index_to_go
17436 ## && weld_len_right_to_go($i_next_nonblank)
17437 ## && $next_nonblank_token =~ /^[\}\]\)]$/ )
17439 ## $strength -= 0.9;
17442 # always break after side comment
17443 if ( $type eq '#' ) { $strength = 0 }
17445 $bond_strength_to_go[$i] = $strength;
17447 FORMATTER_DEBUG_FLAG_BOND && do {
17448 my $str = substr( $token, 0, 15 );
17449 $str .= ' ' x ( 16 - length($str) );
17451 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
17455 } ## end sub set_bond_strengths
17458 sub pad_array_to_go {
17460 # to simplify coding in scan_list and set_bond_strengths, it helps
17461 # to create some extra blank tokens at the end of the arrays
17462 $tokens_to_go[ $max_index_to_go + 1 ] = '';
17463 $tokens_to_go[ $max_index_to_go + 2 ] = '';
17464 $types_to_go[ $max_index_to_go + 1 ] = 'b';
17465 $types_to_go[ $max_index_to_go + 2 ] = 'b';
17466 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
17467 $nesting_depth_to_go[$max_index_to_go];
17470 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
17471 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
17473 # shouldn't happen:
17474 unless ( get_saw_brace_error() ) {
17476 "Program bug in scan_list: hit nesting error which should have been caught\n"
17478 report_definite_bug();
17482 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
17487 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
17488 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
17493 { # begin scan_list
17496 $block_type, $current_depth,
17498 $i_last_nonblank_token, $last_colon_sequence_number,
17499 $last_nonblank_token, $last_nonblank_type,
17500 $last_nonblank_block_type, $last_old_breakpoint_count,
17501 $minimum_depth, $next_nonblank_block_type,
17502 $next_nonblank_token, $next_nonblank_type,
17503 $old_breakpoint_count, $starting_breakpoint_count,
17504 $starting_depth, $token,
17505 $type, $type_sequence,
17509 @breakpoint_stack, @breakpoint_undo_stack,
17510 @comma_index, @container_type,
17511 @identifier_count_stack, @index_before_arrow,
17512 @interrupted_list, @item_count_stack,
17513 @last_comma_index, @last_dot_index,
17514 @last_nonblank_type, @old_breakpoint_count_stack,
17515 @opening_structure_index_stack, @rfor_semicolon_list,
17516 @has_old_logical_breakpoints, @rand_or_list,
17520 # routine to define essential variables when we go 'up' to
17522 sub check_for_new_minimum_depth {
17524 if ( $depth < $minimum_depth ) {
17526 $minimum_depth = $depth;
17528 # these arrays need not retain values between calls
17529 $breakpoint_stack[$depth] = $starting_breakpoint_count;
17530 $container_type[$depth] = "";
17531 $identifier_count_stack[$depth] = 0;
17532 $index_before_arrow[$depth] = -1;
17533 $interrupted_list[$depth] = 1;
17534 $item_count_stack[$depth] = 0;
17535 $last_nonblank_type[$depth] = "";
17536 $opening_structure_index_stack[$depth] = -1;
17538 $breakpoint_undo_stack[$depth] = undef;
17539 $comma_index[$depth] = undef;
17540 $last_comma_index[$depth] = undef;
17541 $last_dot_index[$depth] = undef;
17542 $old_breakpoint_count_stack[$depth] = undef;
17543 $has_old_logical_breakpoints[$depth] = 0;
17544 $rand_or_list[$depth] = [];
17545 $rfor_semicolon_list[$depth] = [];
17546 $i_equals[$depth] = -1;
17548 # these arrays must retain values between calls
17549 if ( !defined( $has_broken_sublist[$depth] ) ) {
17550 $dont_align[$depth] = 0;
17551 $has_broken_sublist[$depth] = 0;
17552 $want_comma_break[$depth] = 0;
17558 # routine to decide which commas to break at within a container;
17560 # $bp_count = number of comma breakpoints set
17561 # $do_not_break_apart = a flag indicating if container need not
17563 sub set_comma_breakpoints {
17567 my $do_not_break_apart = 0;
17570 if ( $item_count_stack[$dd] ) {
17572 # handle commas not in containers...
17573 if ( $dont_align[$dd] ) {
17574 do_uncontained_comma_breaks($dd);
17577 # handle commas within containers...
17579 my $fbc = $forced_breakpoint_count;
17581 # always open comma lists not preceded by keywords,
17582 # barewords, identifiers (that is, anything that doesn't
17583 # look like a function call)
17584 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
17586 set_comma_breakpoints_do(
17588 $opening_structure_index_stack[$dd],
17590 $item_count_stack[$dd],
17591 $identifier_count_stack[$dd],
17593 $next_nonblank_type,
17594 $container_type[$dd],
17595 $interrupted_list[$dd],
17596 \$do_not_break_apart,
17599 $bp_count = $forced_breakpoint_count - $fbc;
17600 $do_not_break_apart = 0 if $must_break_open;
17603 return ( $bp_count, $do_not_break_apart );
17606 sub do_uncontained_comma_breaks {
17608 # Handle commas not in containers...
17609 # This is a catch-all routine for commas that we
17610 # don't know what to do with because the don't fall
17611 # within containers. We will bias the bond strength
17612 # to break at commas which ended lines in the input
17613 # file. This usually works better than just trying
17614 # to put as many items on a line as possible. A
17615 # downside is that if the input file is garbage it
17616 # won't work very well. However, the user can always
17617 # prevent following the old breakpoints with the
17621 my $old_comma_break_count = 0;
17622 foreach my $ii ( @{ $comma_index[$dd] } ) {
17623 if ( $old_breakpoint_to_go[$ii] ) {
17624 $old_comma_break_count++;
17625 $bond_strength_to_go[$ii] = $bias;
17627 # reduce bias magnitude to force breaks in order
17632 # Also put a break before the first comma if
17633 # (1) there was a break there in the input, and
17634 # (2) there was exactly one old break before the first comma break
17635 # (3) OLD: there are multiple old comma breaks
17636 # (3) NEW: there are one or more old comma breaks (see return example)
17638 # For example, we will follow the user and break after
17639 # 'print' in this snippet:
17641 # "conformability (Not the same dimension)\n",
17642 # "\t", $have, " is ", text_unit($hu), "\n",
17643 # "\t", $want, " is ", text_unit($wu), "\n",
17646 # Another example, just one comma, where we will break after
17649 # $x * cos($a) - $y * sin($a),
17650 # $x * sin($a) + $y * cos($a);
17652 # Breaking a print statement:
17654 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
17655 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
17657 # But we will not force a break after the opening paren here
17658 # (causes a blinker):
17659 # $heap->{stream}->set_output_filter(
17660 # poe::filter::reference->new('myotherfreezer') ),
17663 my $i_first_comma = $comma_index[$dd]->[0];
17664 if ( $old_breakpoint_to_go[$i_first_comma] ) {
17665 my $level_comma = $levels_to_go[$i_first_comma];
17668 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
17669 if ( $old_breakpoint_to_go[$ii] ) {
17671 last if ( $obp_count > 1 );
17673 if ( $levels_to_go[$ii] == $level_comma );
17677 # Changed rule from multiple old commas to just one here:
17678 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
17680 # Do not to break before an opening token because
17681 # it can lead to "blinkers".
17682 my $ibreakm = $ibreak;
17683 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
17684 if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
17686 set_forced_breakpoint($ibreak);
17693 my %is_logical_container;
17696 my @q = qw# if elsif unless while and or err not && | || ? : ! #;
17697 @is_logical_container{@q} = (1) x scalar(@q);
17700 sub set_for_semicolon_breakpoints {
17702 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
17703 set_forced_breakpoint($_);
17708 sub set_logical_breakpoints {
17711 $item_count_stack[$dd] == 0
17712 && $is_logical_container{ $container_type[$dd] }
17714 || $has_old_logical_breakpoints[$dd]
17718 # Look for breaks in this order:
17721 foreach my $i ( 0 .. 3 ) {
17722 if ( $rand_or_list[$dd][$i] ) {
17723 foreach ( @{ $rand_or_list[$dd][$i] } ) {
17724 set_forced_breakpoint($_);
17727 # break at any 'if' and 'unless' too
17728 foreach ( @{ $rand_or_list[$dd][4] } ) {
17729 set_forced_breakpoint($_);
17731 $rand_or_list[$dd] = [];
17739 sub is_unbreakable_container {
17741 # never break a container of one of these types
17742 # because bad things can happen (map1.t)
17744 return $is_sort_map_grep{ $container_type[$dd] };
17749 # This routine is responsible for setting line breaks for all lists,
17750 # so that hierarchical structure can be displayed and so that list
17751 # items can be vertically aligned. The output of this routine is
17752 # stored in the array @forced_breakpoint_to_go, which is used to set
17753 # final breakpoints.
17755 $starting_depth = $nesting_depth_to_go[0];
17758 $current_depth = $starting_depth;
17760 $last_colon_sequence_number = -1;
17761 $last_nonblank_token = ';';
17762 $last_nonblank_type = ';';
17763 $last_nonblank_block_type = ' ';
17764 $last_old_breakpoint_count = 0;
17765 $minimum_depth = $current_depth + 1; # forces update in check below
17766 $old_breakpoint_count = 0;
17767 $starting_breakpoint_count = $forced_breakpoint_count;
17770 $type_sequence = '';
17772 my $total_depth_variation = 0;
17773 my $i_old_assignment_break;
17774 my $depth_last = $starting_depth;
17776 check_for_new_minimum_depth($current_depth);
17778 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
17779 my $want_previous_breakpoint = -1;
17781 my $saw_good_breakpoint;
17782 my $i_line_end = -1;
17783 my $i_line_start = -1;
17785 # loop over all tokens in this batch
17786 while ( ++$i <= $max_index_to_go ) {
17787 if ( $type ne 'b' ) {
17788 $i_last_nonblank_token = $i - 1;
17789 $last_nonblank_type = $type;
17790 $last_nonblank_token = $token;
17791 $last_nonblank_block_type = $block_type;
17792 } ## end if ( $type ne 'b' )
17793 $type = $types_to_go[$i];
17794 $block_type = $block_type_to_go[$i];
17795 $token = $tokens_to_go[$i];
17796 $type_sequence = $type_sequence_to_go[$i];
17797 my $next_type = $types_to_go[ $i + 1 ];
17798 my $next_token = $tokens_to_go[ $i + 1 ];
17799 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
17800 $next_nonblank_type = $types_to_go[$i_next_nonblank];
17801 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17802 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17804 # set break if flag was set
17805 if ( $want_previous_breakpoint >= 0 ) {
17806 set_forced_breakpoint($want_previous_breakpoint);
17807 $want_previous_breakpoint = -1;
17810 $last_old_breakpoint_count = $old_breakpoint_count;
17811 if ( $old_breakpoint_to_go[$i] ) {
17813 $i_line_start = $i_next_nonblank;
17815 $old_breakpoint_count++;
17817 # Break before certain keywords if user broke there and
17818 # this is a 'safe' break point. The idea is to retain
17819 # any preferred breaks for sequential list operations,
17820 # like a schwartzian transform.
17821 if ($rOpts_break_at_old_keyword_breakpoints) {
17823 $next_nonblank_type eq 'k'
17824 && $is_keyword_returning_list{$next_nonblank_token}
17825 && ( $type =~ /^[=\)\]\}Riw]$/
17827 && $is_keyword_returning_list{$token} )
17831 # we actually have to set this break next time through
17832 # the loop because if we are at a closing token (such
17833 # as '}') which forms a one-line block, this break might
17835 $want_previous_breakpoint = $i;
17836 } ## end if ( $next_nonblank_type...)
17837 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
17839 # Break before attributes if user broke there
17840 if ($rOpts_break_at_old_attribute_breakpoints) {
17841 if ( $next_nonblank_type eq 'A' ) {
17842 $want_previous_breakpoint = $i;
17846 # remember an = break as possible good break point
17847 if ( $is_assignment{$type} ) {
17848 $i_old_assignment_break = $i;
17850 elsif ( $is_assignment{$next_nonblank_type} ) {
17851 $i_old_assignment_break = $i_next_nonblank;
17853 } ## end if ( $old_breakpoint_to_go...)
17855 next if ( $type eq 'b' );
17856 $depth = $nesting_depth_to_go[ $i + 1 ];
17858 $total_depth_variation += abs( $depth - $depth_last );
17859 $depth_last = $depth;
17861 # safety check - be sure we always break after a comment
17862 # Shouldn't happen .. an error here probably means that the
17863 # nobreak flag did not get turned off correctly during
17865 if ( $type eq '#' ) {
17866 if ( $i != $max_index_to_go ) {
17868 "Non-fatal program bug: backup logic needed to break after a comment\n"
17870 report_definite_bug();
17871 $nobreak_to_go[$i] = 0;
17872 set_forced_breakpoint($i);
17873 } ## end if ( $i != $max_index_to_go)
17874 } ## end if ( $type eq '#' )
17876 # Force breakpoints at certain tokens in long lines.
17877 # Note that such breakpoints will be undone later if these tokens
17878 # are fully contained within parens on a line.
17881 # break before a keyword within a line
17885 # if one of these keywords:
17886 && $token =~ /^(if|unless|while|until|for)$/
17888 # but do not break at something like '1 while'
17889 && ( $last_nonblank_type ne 'n' || $i > 2 )
17891 # and let keywords follow a closing 'do' brace
17892 && $last_nonblank_block_type ne 'do'
17897 # or container is broken (by side-comment, etc)
17898 || ( $next_nonblank_token eq '('
17899 && $mate_index_to_go[$i_next_nonblank] < $i )
17903 set_forced_breakpoint( $i - 1 );
17904 } ## end if ( $type eq 'k' && $i...)
17906 # remember locations of '||' and '&&' for possible breaks if we
17907 # decide this is a long logical expression.
17908 if ( $type eq '||' ) {
17909 push @{ $rand_or_list[$depth][2] }, $i;
17910 ++$has_old_logical_breakpoints[$depth]
17911 if ( ( $i == $i_line_start || $i == $i_line_end )
17912 && $rOpts_break_at_old_logical_breakpoints );
17913 } ## end if ( $type eq '||' )
17914 elsif ( $type eq '&&' ) {
17915 push @{ $rand_or_list[$depth][3] }, $i;
17916 ++$has_old_logical_breakpoints[$depth]
17917 if ( ( $i == $i_line_start || $i == $i_line_end )
17918 && $rOpts_break_at_old_logical_breakpoints );
17919 } ## end elsif ( $type eq '&&' )
17920 elsif ( $type eq 'f' ) {
17921 push @{ $rfor_semicolon_list[$depth] }, $i;
17923 elsif ( $type eq 'k' ) {
17924 if ( $token eq 'and' ) {
17925 push @{ $rand_or_list[$depth][1] }, $i;
17926 ++$has_old_logical_breakpoints[$depth]
17927 if ( ( $i == $i_line_start || $i == $i_line_end )
17928 && $rOpts_break_at_old_logical_breakpoints );
17929 } ## end if ( $token eq 'and' )
17931 # break immediately at 'or's which are probably not in a logical
17932 # block -- but we will break in logical breaks below so that
17933 # they do not add to the forced_breakpoint_count
17934 elsif ( $token eq 'or' ) {
17935 push @{ $rand_or_list[$depth][0] }, $i;
17936 ++$has_old_logical_breakpoints[$depth]
17937 if ( ( $i == $i_line_start || $i == $i_line_end )
17938 && $rOpts_break_at_old_logical_breakpoints );
17939 if ( $is_logical_container{ $container_type[$depth] } ) {
17942 if ($is_long_line) { set_forced_breakpoint($i) }
17943 elsif ( ( $i == $i_line_start || $i == $i_line_end )
17944 && $rOpts_break_at_old_logical_breakpoints )
17946 $saw_good_breakpoint = 1;
17948 } ## end else [ if ( $is_logical_container...)]
17949 } ## end elsif ( $token eq 'or' )
17950 elsif ( $token eq 'if' || $token eq 'unless' ) {
17951 push @{ $rand_or_list[$depth][4] }, $i;
17952 if ( ( $i == $i_line_start || $i == $i_line_end )
17953 && $rOpts_break_at_old_logical_breakpoints )
17955 set_forced_breakpoint($i);
17957 } ## end elsif ( $token eq 'if' ||...)
17958 } ## end elsif ( $type eq 'k' )
17959 elsif ( $is_assignment{$type} ) {
17960 $i_equals[$depth] = $i;
17963 if ($type_sequence) {
17965 # handle any postponed closing breakpoints
17966 if ( $token =~ /^[\)\]\}\:]$/ ) {
17967 if ( $type eq ':' ) {
17968 $last_colon_sequence_number = $type_sequence;
17970 # retain break at a ':' line break
17971 if ( ( $i == $i_line_start || $i == $i_line_end )
17972 && $rOpts_break_at_old_ternary_breakpoints )
17975 set_forced_breakpoint($i);
17977 # break at previous '='
17978 if ( $i_equals[$depth] > 0 ) {
17979 set_forced_breakpoint( $i_equals[$depth] );
17980 $i_equals[$depth] = -1;
17982 } ## end if ( ( $i == $i_line_start...))
17983 } ## end if ( $type eq ':' )
17984 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
17985 my $inc = ( $type eq ':' ) ? 0 : 1;
17986 set_forced_breakpoint( $i - $inc );
17987 delete $postponed_breakpoint{$type_sequence};
17989 } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
17991 # set breaks at ?/: if they will get separated (and are
17992 # not a ?/: chain), or if the '?' is at the end of the
17994 elsif ( $token eq '?' ) {
17995 my $i_colon = $mate_index_to_go[$i];
17997 $i_colon <= 0 # the ':' is not in this batch
17998 || $i == 0 # this '?' is the first token of the line
18000 $max_index_to_go # or this '?' is the last token
18004 # don't break at a '?' if preceded by ':' on
18005 # this line of previous ?/: pair on this line.
18006 # This is an attempt to preserve a chain of ?/:
18007 # expressions (elsif2.t). And don't break if
18008 # this has a side comment.
18009 set_forced_breakpoint($i)
18011 $type_sequence == (
18012 $last_colon_sequence_number +
18013 TYPE_SEQUENCE_INCREMENT
18015 || $tokens_to_go[$max_index_to_go] eq '#'
18017 set_closing_breakpoint($i);
18018 } ## end if ( $i_colon <= 0 ||...)
18019 } ## end elsif ( $token eq '?' )
18020 } ## end if ($type_sequence)
18022 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
18024 #------------------------------------------------------------
18025 # Handle Increasing Depth..
18027 # prepare for a new list when depth increases
18028 # token $i is a '(','{', or '['
18029 #------------------------------------------------------------
18030 if ( $depth > $current_depth ) {
18032 $breakpoint_stack[$depth] = $forced_breakpoint_count;
18033 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
18034 $has_broken_sublist[$depth] = 0;
18035 $identifier_count_stack[$depth] = 0;
18036 $index_before_arrow[$depth] = -1;
18037 $interrupted_list[$depth] = 0;
18038 $item_count_stack[$depth] = 0;
18039 $last_comma_index[$depth] = undef;
18040 $last_dot_index[$depth] = undef;
18041 $last_nonblank_type[$depth] = $last_nonblank_type;
18042 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
18043 $opening_structure_index_stack[$depth] = $i;
18044 $rand_or_list[$depth] = [];
18045 $rfor_semicolon_list[$depth] = [];
18046 $i_equals[$depth] = -1;
18047 $want_comma_break[$depth] = 0;
18048 $container_type[$depth] =
18049 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
18050 ? $last_nonblank_token
18052 $has_old_logical_breakpoints[$depth] = 0;
18054 # if line ends here then signal closing token to break
18055 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
18057 set_closing_breakpoint($i);
18060 # Not all lists of values should be vertically aligned..
18061 $dont_align[$depth] =
18063 # code BLOCKS are handled at a higher level
18064 ( $block_type ne "" )
18066 # certain paren lists
18067 || ( $type eq '(' ) && (
18069 # it does not usually look good to align a list of
18070 # identifiers in a parameter list, as in:
18071 # my($var1, $var2, ...)
18072 # (This test should probably be refined, for now I'm just
18073 # testing for any keyword)
18074 ( $last_nonblank_type eq 'k' )
18076 # a trailing '(' usually indicates a non-list
18077 || ( $next_nonblank_type eq '(' )
18080 # patch to outdent opening brace of long if/for/..
18081 # statements (like this one). See similar coding in
18082 # set_continuation breaks. We have also catch it here for
18083 # short line fragments which otherwise will not go through
18084 # set_continuation_breaks.
18088 # if we have the ')' but not its '(' in this batch..
18089 && ( $last_nonblank_token eq ')' )
18090 && $mate_index_to_go[$i_last_nonblank_token] < 0
18092 # and user wants brace to left
18093 && !$rOpts->{'opening-brace-always-on-right'}
18095 && ( $type eq '{' ) # should be true
18096 && ( $token eq '{' ) # should be true
18099 set_forced_breakpoint( $i - 1 );
18100 } ## end if ( $block_type && ( ...))
18101 } ## end if ( $depth > $current_depth)
18103 #------------------------------------------------------------
18104 # Handle Decreasing Depth..
18106 # finish off any old list when depth decreases
18107 # token $i is a ')','}', or ']'
18108 #------------------------------------------------------------
18109 elsif ( $depth < $current_depth ) {
18111 check_for_new_minimum_depth($depth);
18113 # force all outer logical containers to break after we see on
18115 $has_old_logical_breakpoints[$depth] ||=
18116 $has_old_logical_breakpoints[$current_depth];
18118 # Patch to break between ') {' if the paren list is broken.
18119 # There is similar logic in set_continuation_breaks for
18120 # non-broken lists.
18122 && $next_nonblank_block_type
18123 && $interrupted_list[$current_depth]
18124 && $next_nonblank_type eq '{'
18125 && !$rOpts->{'opening-brace-always-on-right'} )
18127 set_forced_breakpoint($i);
18128 } ## end if ( $token eq ')' && ...
18130 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
18132 # set breaks at commas if necessary
18133 my ( $bp_count, $do_not_break_apart ) =
18134 set_comma_breakpoints($current_depth);
18136 my $i_opening = $opening_structure_index_stack[$current_depth];
18137 my $saw_opening_structure = ( $i_opening >= 0 );
18139 # this term is long if we had to break at interior commas..
18140 my $is_long_term = $bp_count > 0;
18142 # If this is a short container with one or more comma arrows,
18143 # then we will mark it as a long term to open it if requested.
18144 # $rOpts_comma_arrow_breakpoints =
18145 # 0 - open only if comma precedes closing brace
18146 # 1 - stable: except for one line blocks
18147 # 2 - try to form 1 line blocks
18149 # 4 - always open up if vt=0
18150 # 5 - stable: even for one line blocks if vt=0
18151 if ( !$is_long_term
18152 && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
18153 && $index_before_arrow[ $depth + 1 ] > 0
18154 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
18157 $is_long_term = $rOpts_comma_arrow_breakpoints == 4
18158 || ( $rOpts_comma_arrow_breakpoints == 0
18159 && $last_nonblank_token eq ',' )
18160 || ( $rOpts_comma_arrow_breakpoints == 5
18161 && $old_breakpoint_to_go[$i_opening] );
18162 } ## end if ( !$is_long_term &&...)
18164 # mark term as long if the length between opening and closing
18165 # parens exceeds allowed line length
18166 if ( !$is_long_term && $saw_opening_structure ) {
18167 my $i_opening_minus = find_token_starting_list($i_opening);
18169 # Note: we have to allow for one extra space after a
18170 # closing token so that we do not strand a comma or
18171 # semicolon, hence the '>=' here (oneline.t)
18172 # Note: we ignore left weld lengths here for best results
18174 excess_line_length( $i_opening_minus, $i, 1 ) >= 0;
18175 } ## end if ( !$is_long_term &&...)
18177 # We've set breaks after all comma-arrows. Now we have to
18178 # undo them if this can be a one-line block
18179 # (the only breakpoints set will be due to comma-arrows)
18182 # user doesn't require breaking after all comma-arrows
18183 ( $rOpts_comma_arrow_breakpoints != 0 )
18184 && ( $rOpts_comma_arrow_breakpoints != 4 )
18186 # and if the opening structure is in this batch
18187 && $saw_opening_structure
18189 # and either on the same old line
18191 $old_breakpoint_count_stack[$current_depth] ==
18192 $last_old_breakpoint_count
18194 # or user wants to form long blocks with arrows
18195 || $rOpts_comma_arrow_breakpoints == 2
18198 # and we made some breakpoints between the opening and closing
18199 && ( $breakpoint_undo_stack[$current_depth] <
18200 $forced_breakpoint_undo_count )
18202 # and this block is short enough to fit on one line
18203 # Note: use < because need 1 more space for possible comma
18208 undo_forced_breakpoint_stack(
18209 $breakpoint_undo_stack[$current_depth] );
18210 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
18212 # now see if we have any comma breakpoints left
18213 my $has_comma_breakpoints =
18214 ( $breakpoint_stack[$current_depth] !=
18215 $forced_breakpoint_count );
18217 # update broken-sublist flag of the outer container
18218 $has_broken_sublist[$depth] =
18219 $has_broken_sublist[$depth]
18220 || $has_broken_sublist[$current_depth]
18222 || $has_comma_breakpoints;
18224 # Having come to the closing ')', '}', or ']', now we have to decide if we
18225 # should 'open up' the structure by placing breaks at the opening and
18226 # closing containers. This is a tricky decision. Here are some of the
18227 # basic considerations:
18229 # -If this is a BLOCK container, then any breakpoints will have already
18230 # been set (and according to user preferences), so we need do nothing here.
18232 # -If we have a comma-separated list for which we can align the list items,
18233 # then we need to do so because otherwise the vertical aligner cannot
18234 # currently do the alignment.
18236 # -If this container does itself contain a container which has been broken
18237 # open, then it should be broken open to properly show the structure.
18239 # -If there is nothing to align, and no other reason to break apart,
18240 # then do not do it.
18242 # We will not break open the parens of a long but 'simple' logical expression.
18245 # This is an example of a simple logical expression and its formatting:
18247 # if ( $bigwasteofspace1 && $bigwasteofspace2
18248 # || $bigwasteofspace3 && $bigwasteofspace4 )
18250 # Most people would prefer this than the 'spacey' version:
18253 # $bigwasteofspace1 && $bigwasteofspace2
18254 # || $bigwasteofspace3 && $bigwasteofspace4
18257 # To illustrate the rules for breaking logical expressions, consider:
18261 # and ( exists $ids_excl_uc{$id_uc}
18262 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
18264 # This is on the verge of being difficult to read. The current default is to
18265 # open it up like this:
18270 # and ( exists $ids_excl_uc{$id_uc}
18271 # or grep $id_uc =~ /$_/, @ids_excl_uc )
18274 # This is a compromise which tries to avoid being too dense and to spacey.
18275 # A more spaced version would be:
18281 # exists $ids_excl_uc{$id_uc}
18282 # or grep $id_uc =~ /$_/, @ids_excl_uc
18286 # Some people might prefer the spacey version -- an option could be added. The
18287 # innermost expression contains a long block '( exists $ids_... ')'.
18289 # Here is how the logic goes: We will force a break at the 'or' that the
18290 # innermost expression contains, but we will not break apart its opening and
18291 # closing containers because (1) it contains no multi-line sub-containers itself,
18292 # and (2) there is no alignment to be gained by breaking it open like this
18295 # exists $ids_excl_uc{$id_uc}
18296 # or grep $id_uc =~ /$_/, @ids_excl_uc
18299 # (although this looks perfectly ok and might be good for long expressions). The
18300 # outer 'if' container, though, contains a broken sub-container, so it will be
18301 # broken open to avoid too much density. Also, since it contains no 'or's, there
18302 # will be a forced break at its 'and'.
18304 # set some flags telling something about this container..
18305 my $is_simple_logical_expression = 0;
18306 if ( $item_count_stack[$current_depth] == 0
18307 && $saw_opening_structure
18308 && $tokens_to_go[$i_opening] eq '('
18309 && $is_logical_container{ $container_type[$current_depth] }
18313 # This seems to be a simple logical expression with
18314 # no existing breakpoints. Set a flag to prevent
18316 if ( !$has_comma_breakpoints ) {
18317 $is_simple_logical_expression = 1;
18320 # This seems to be a simple logical expression with
18321 # breakpoints (broken sublists, for example). Break
18322 # at all 'or's and '||'s.
18324 set_logical_breakpoints($current_depth);
18326 } ## end if ( $item_count_stack...)
18329 && @{ $rfor_semicolon_list[$current_depth] } )
18331 set_for_semicolon_breakpoints($current_depth);
18333 # open up a long 'for' or 'foreach' container to allow
18334 # leading term alignment unless -lp is used.
18335 $has_comma_breakpoints = 1
18336 unless $rOpts_line_up_parentheses;
18337 } ## end if ( $is_long_term && ...)
18341 # breaks for code BLOCKS are handled at a higher level
18344 # we do not need to break at the top level of an 'if'
18346 && !$is_simple_logical_expression
18348 ## modification to keep ': (' containers vertically tight;
18349 ## but probably better to let user set -vt=1 to avoid
18350 ## inconsistency with other paren types
18351 ## && ($container_type[$current_depth] ne ':')
18353 # otherwise, we require one of these reasons for breaking:
18356 # - this term has forced line breaks
18357 $has_comma_breakpoints
18359 # - the opening container is separated from this batch
18360 # for some reason (comment, blank line, code block)
18361 # - this is a non-paren container spanning multiple lines
18362 || !$saw_opening_structure
18364 # - this is a long block contained in another breakable
18367 && $container_environment_to_go[$i_opening] ne
18373 # For -lp option, we must put a breakpoint before
18374 # the token which has been identified as starting
18375 # this indentation level. This is necessary for
18376 # proper alignment.
18377 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
18379 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
18380 if ( $i_opening + 1 < $max_index_to_go
18381 && $types_to_go[ $i_opening + 1 ] eq 'b' )
18383 $item = $leading_spaces_to_go[ $i_opening + 2 ];
18385 if ( defined($item) ) {
18386 my $i_start_2 = $item->get_starting_index();
18388 defined($i_start_2)
18390 # we are breaking after an opening brace, paren,
18391 # so don't break before it too
18392 && $i_start_2 ne $i_opening
18396 # Only break for breakpoints at the same
18397 # indentation level as the opening paren
18398 my $test1 = $nesting_depth_to_go[$i_opening];
18399 my $test2 = $nesting_depth_to_go[$i_start_2];
18400 if ( $test2 == $test1 ) {
18401 set_forced_breakpoint( $i_start_2 - 1 );
18403 } ## end if ( defined($i_start_2...))
18404 } ## end if ( defined($item) )
18405 } ## end if ( $rOpts_line_up_parentheses...)
18407 # break after opening structure.
18408 # note: break before closing structure will be automatic
18409 if ( $minimum_depth <= $current_depth ) {
18411 set_forced_breakpoint($i_opening)
18412 unless ( $do_not_break_apart
18413 || is_unbreakable_container($current_depth) );
18415 # break at ',' of lower depth level before opening token
18416 if ( $last_comma_index[$depth] ) {
18417 set_forced_breakpoint( $last_comma_index[$depth] );
18420 # break at '.' of lower depth level before opening token
18421 if ( $last_dot_index[$depth] ) {
18422 set_forced_breakpoint( $last_dot_index[$depth] );
18425 # break before opening structure if preceded by another
18426 # closing structure and a comma. This is normally
18427 # done by the previous closing brace, but not
18428 # if it was a one-line block.
18429 if ( $i_opening > 2 ) {
18431 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
18435 if ( $types_to_go[$i_prev] eq ','
18436 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
18438 set_forced_breakpoint($i_prev);
18441 # also break before something like ':(' or '?('
18444 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
18446 my $token_prev = $tokens_to_go[$i_prev];
18447 if ( $want_break_before{$token_prev} ) {
18448 set_forced_breakpoint($i_prev);
18450 } ## end elsif ( $types_to_go[$i_prev...])
18451 } ## end if ( $i_opening > 2 )
18452 } ## end if ( $minimum_depth <=...)
18454 # break after comma following closing structure
18455 if ( $next_type eq ',' ) {
18456 set_forced_breakpoint( $i + 1 );
18459 # break before an '=' following closing structure
18461 $is_assignment{$next_nonblank_type}
18462 && ( $breakpoint_stack[$current_depth] !=
18463 $forced_breakpoint_count )
18466 set_forced_breakpoint($i);
18467 } ## end if ( $is_assignment{$next_nonblank_type...})
18469 # break at any comma before the opening structure Added
18470 # for -lp, but seems to be good in general. It isn't
18471 # obvious how far back to look; the '5' below seems to
18472 # work well and will catch the comma in something like
18473 # push @list, myfunc( $param, $param, ..
18475 my $icomma = $last_comma_index[$depth];
18476 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
18477 unless ( $forced_breakpoint_to_go[$icomma] ) {
18478 set_forced_breakpoint($icomma);
18481 } # end logic to open up a container
18483 # Break open a logical container open if it was already open
18484 elsif ($is_simple_logical_expression
18485 && $has_old_logical_breakpoints[$current_depth] )
18487 set_logical_breakpoints($current_depth);
18490 # Handle long container which does not get opened up
18491 elsif ($is_long_term) {
18493 # must set fake breakpoint to alert outer containers that
18495 set_fake_breakpoint();
18496 } ## end elsif ($is_long_term)
18498 } ## end elsif ( $depth < $current_depth)
18500 #------------------------------------------------------------
18501 # Handle this token
18502 #------------------------------------------------------------
18504 $current_depth = $depth;
18506 # handle comma-arrow
18507 if ( $type eq '=>' ) {
18508 next if ( $last_nonblank_type eq '=>' );
18509 next if $rOpts_break_at_old_comma_breakpoints;
18510 next if $rOpts_comma_arrow_breakpoints == 3;
18511 $want_comma_break[$depth] = 1;
18512 $index_before_arrow[$depth] = $i_last_nonblank_token;
18514 } ## end if ( $type eq '=>' )
18516 elsif ( $type eq '.' ) {
18517 $last_dot_index[$depth] = $i;
18520 # Turn off alignment if we are sure that this is not a list
18521 # environment. To be safe, we will do this if we see certain
18522 # non-list tokens, such as ';', and also the environment is
18523 # not a list. Note that '=' could be in any of the = operators
18524 # (lextest.t). We can't just use the reported environment
18525 # because it can be incorrect in some cases.
18526 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
18527 && $container_environment_to_go[$i] ne 'LIST' )
18529 $dont_align[$depth] = 1;
18530 $want_comma_break[$depth] = 0;
18531 $index_before_arrow[$depth] = -1;
18532 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
18534 # now just handle any commas
18535 next unless ( $type eq ',' );
18537 $last_dot_index[$depth] = undef;
18538 $last_comma_index[$depth] = $i;
18540 # break here if this comma follows a '=>'
18541 # but not if there is a side comment after the comma
18542 if ( $want_comma_break[$depth] ) {
18544 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
18545 if ($rOpts_comma_arrow_breakpoints) {
18546 $want_comma_break[$depth] = 0;
18547 ##$index_before_arrow[$depth] = -1;
18552 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
18554 # break before the previous token if it looks safe
18555 # Example of something that we will not try to break before:
18556 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
18557 # Also we don't want to break at a binary operator (like +):
18561 # $y - $R, -fill => 'black',
18563 my $ibreak = $index_before_arrow[$depth] - 1;
18565 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
18567 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
18568 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
18569 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
18571 # don't break pointer calls, such as the following:
18572 # File::Spec->curdir => 1,
18573 # (This is tokenized as adjacent 'w' tokens)
18574 ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
18576 # And don't break before a comma, as in the following:
18577 # ( LONGER_THAN,=> 1,
18578 # EIGHTY_CHARACTERS,=> 2,
18579 # CAUSES_FORMATTING,=> 3,
18582 # This example is for -tso but should be general rule
18583 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
18584 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
18586 set_forced_breakpoint($ibreak);
18588 } ## end if ( $types_to_go[$ibreak...])
18589 } ## end if ( $ibreak > 0 && $tokens_to_go...)
18591 $want_comma_break[$depth] = 0;
18592 $index_before_arrow[$depth] = -1;
18594 # handle list which mixes '=>'s and ','s:
18595 # treat any list items so far as an interrupted list
18596 $interrupted_list[$depth] = 1;
18598 } ## end if ( $want_comma_break...)
18600 # break after all commas above starting depth
18601 if ( $depth < $starting_depth && !$dont_align[$depth] ) {
18602 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
18606 # add this comma to the list..
18607 my $item_count = $item_count_stack[$depth];
18608 if ( $item_count == 0 ) {
18610 # but do not form a list with no opening structure
18613 # open INFILE_COPY, ">$input_file_copy"
18614 # or die ("very long message");
18616 if ( ( $opening_structure_index_stack[$depth] < 0 )
18617 && $container_environment_to_go[$i] eq 'BLOCK' )
18619 $dont_align[$depth] = 1;
18621 } ## end if ( $item_count == 0 )
18623 $comma_index[$depth][$item_count] = $i;
18624 ++$item_count_stack[$depth];
18625 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
18626 $identifier_count_stack[$depth]++;
18628 } ## end while ( ++$i <= $max_index_to_go)
18630 #-------------------------------------------
18631 # end of loop over all tokens in this batch
18632 #-------------------------------------------
18634 # set breaks for any unfinished lists ..
18635 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
18637 $interrupted_list[$dd] = 1;
18638 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
18639 set_comma_breakpoints($dd);
18640 set_logical_breakpoints($dd)
18641 if ( $has_old_logical_breakpoints[$dd] );
18642 set_for_semicolon_breakpoints($dd);
18644 # break open container...
18645 my $i_opening = $opening_structure_index_stack[$dd];
18646 set_forced_breakpoint($i_opening)
18648 is_unbreakable_container($dd)
18650 # Avoid a break which would place an isolated ' or "
18653 && $i_opening >= $max_index_to_go - 2
18654 && $token =~ /^['"]$/ )
18656 } ## end for ( my $dd = $current_depth...)
18658 # Return a flag indicating if the input file had some good breakpoints.
18659 # This flag will be used to force a break in a line shorter than the
18660 # allowed line length.
18661 if ( $has_old_logical_breakpoints[$current_depth] ) {
18662 $saw_good_breakpoint = 1;
18665 # A complex line with one break at an = has a good breakpoint.
18666 # This is not complex ($total_depth_variation=0):
18670 # This is complex ($total_depth_variation=6):
18672 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
18673 elsif ($i_old_assignment_break
18674 && $total_depth_variation > 4
18675 && $old_breakpoint_count == 1 )
18677 $saw_good_breakpoint = 1;
18678 } ## end elsif ( $i_old_assignment_break...)
18680 return $saw_good_breakpoint;
18681 } ## end sub scan_list
18684 sub find_token_starting_list {
18686 # When testing to see if a block will fit on one line, some
18687 # previous token(s) may also need to be on the line; particularly
18688 # if this is a sub call. So we will look back at least one
18689 # token. NOTE: This isn't perfect, but not critical, because
18690 # if we mis-identify a block, it will be wrapped and therefore
18691 # fixed the next time it is formatted.
18692 my $i_opening_paren = shift;
18693 my $i_opening_minus = $i_opening_paren;
18694 my $im1 = $i_opening_paren - 1;
18695 my $im2 = $i_opening_paren - 2;
18696 my $im3 = $i_opening_paren - 3;
18697 my $typem1 = $types_to_go[$im1];
18698 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
18699 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
18700 $i_opening_minus = $i_opening_paren;
18702 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
18703 $i_opening_minus = $im1 if $im1 >= 0;
18705 # walk back to improve length estimate
18706 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
18707 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
18708 $i_opening_minus = $j;
18710 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
18712 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
18713 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
18714 $i_opening_minus = $im2;
18716 return $i_opening_minus;
18719 { # begin set_comma_breakpoints_do
18721 my %is_keyword_with_special_leading_term;
18725 # These keywords have prototypes which allow a special leading item
18726 # followed by a list
18728 qw(formline grep kill map printf sprintf push chmod join pack unshift);
18729 @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
18732 sub set_comma_breakpoints_do {
18734 # Given a list with some commas, set breakpoints at some of the
18735 # commas, if necessary, to make it easy to read. This list is
18738 $depth, $i_opening_paren, $i_closing_paren,
18739 $item_count, $identifier_count, $rcomma_index,
18740 $next_nonblank_type, $list_type, $interrupted,
18741 $rdo_not_break_apart, $must_break_open,
18744 # nothing to do if no commas seen
18745 return if ( $item_count < 1 );
18746 my $i_first_comma = $rcomma_index->[0];
18747 my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
18748 my $i_last_comma = $i_true_last_comma;
18749 if ( $i_last_comma >= $max_index_to_go ) {
18750 $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
18751 return if ( $item_count < 1 );
18754 #---------------------------------------------------------------
18755 # find lengths of all items in the list to calculate page layout
18756 #---------------------------------------------------------------
18757 my $comma_count = $item_count;
18763 my @max_length = ( 0, 0 );
18764 my $first_term_length;
18765 my $i = $i_opening_paren;
18768 foreach my $j ( 0 .. $comma_count - 1 ) {
18769 $is_odd = 1 - $is_odd;
18770 $i_prev_plus = $i + 1;
18771 $i = $rcomma_index->[$j];
18774 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
18776 ( $types_to_go[$i_prev_plus] eq 'b' )
18779 push @i_term_begin, $i_term_begin;
18780 push @i_term_end, $i_term_end;
18781 push @i_term_comma, $i;
18783 # note: currently adding 2 to all lengths (for comma and space)
18785 2 + token_sequence_length( $i_term_begin, $i_term_end );
18786 push @item_lengths, $length;
18789 $first_term_length = $length;
18793 if ( $length > $max_length[$is_odd] ) {
18794 $max_length[$is_odd] = $length;
18799 # now we have to make a distinction between the comma count and item
18800 # count, because the item count will be one greater than the comma
18801 # count if the last item is not terminated with a comma
18803 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
18804 ? $i_last_comma + 1
18807 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
18808 ? $i_closing_paren - 2
18809 : $i_closing_paren - 1;
18810 my $i_effective_last_comma = $i_last_comma;
18812 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
18814 if ( $last_item_length > 0 ) {
18816 # add 2 to length because other lengths include a comma and a blank
18817 $last_item_length += 2;
18818 push @item_lengths, $last_item_length;
18819 push @i_term_begin, $i_b + 1;
18820 push @i_term_end, $i_e;
18821 push @i_term_comma, undef;
18823 my $i_odd = $item_count % 2;
18825 if ( $last_item_length > $max_length[$i_odd] ) {
18826 $max_length[$i_odd] = $last_item_length;
18830 $i_effective_last_comma = $i_e + 1;
18832 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
18833 $identifier_count++;
18837 #---------------------------------------------------------------
18838 # End of length calculations
18839 #---------------------------------------------------------------
18841 #---------------------------------------------------------------
18842 # Compound List Rule 1:
18843 # Break at (almost) every comma for a list containing a broken
18844 # sublist. This has higher priority than the Interrupted List
18846 #---------------------------------------------------------------
18847 if ( $has_broken_sublist[$depth] ) {
18849 # Break at every comma except for a comma between two
18850 # simple, small terms. This prevents long vertical
18851 # columns of, say, just 0's.
18852 my $small_length = 10; # 2 + actual maximum length wanted
18854 # We'll insert a break in long runs of small terms to
18855 # allow alignment in uniform tables.
18856 my $skipped_count = 0;
18857 my $columns = table_columns_available($i_first_comma);
18858 my $fields = int( $columns / $small_length );
18859 if ( $rOpts_maximum_fields_per_table
18860 && $fields > $rOpts_maximum_fields_per_table )
18862 $fields = $rOpts_maximum_fields_per_table;
18864 my $max_skipped_count = $fields - 1;
18866 my $is_simple_last_term = 0;
18867 my $is_simple_next_term = 0;
18868 foreach my $j ( 0 .. $item_count ) {
18869 $is_simple_last_term = $is_simple_next_term;
18870 $is_simple_next_term = 0;
18871 if ( $j < $item_count
18872 && $i_term_end[$j] == $i_term_begin[$j]
18873 && $item_lengths[$j] <= $small_length )
18875 $is_simple_next_term = 1;
18878 if ( $is_simple_last_term
18879 && $is_simple_next_term
18880 && $skipped_count < $max_skipped_count )
18885 $skipped_count = 0;
18886 my $i = $i_term_comma[ $j - 1 ];
18887 last unless defined $i;
18888 set_forced_breakpoint($i);
18892 # always break at the last comma if this list is
18893 # interrupted; we wouldn't want to leave a terminal '{', for
18895 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
18899 #my ( $a, $b, $c ) = caller();
18900 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
18901 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
18902 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
18904 #---------------------------------------------------------------
18905 # Interrupted List Rule:
18906 # A list is forced to use old breakpoints if it was interrupted
18907 # by side comments or blank lines, or requested by user.
18908 #---------------------------------------------------------------
18909 if ( $rOpts_break_at_old_comma_breakpoints
18911 || $i_opening_paren < 0 )
18913 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
18917 #---------------------------------------------------------------
18918 # Looks like a list of items. We have to look at it and size it up.
18919 #---------------------------------------------------------------
18921 my $opening_token = $tokens_to_go[$i_opening_paren];
18922 my $opening_environment =
18923 $container_environment_to_go[$i_opening_paren];
18925 #-------------------------------------------------------------------
18926 # Return if this will fit on one line
18927 #-------------------------------------------------------------------
18929 my $i_opening_minus = find_token_starting_list($i_opening_paren);
18931 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
18933 #-------------------------------------------------------------------
18934 # Now we know that this block spans multiple lines; we have to set
18935 # at least one breakpoint -- real or fake -- as a signal to break
18936 # open any outer containers.
18937 #-------------------------------------------------------------------
18938 set_fake_breakpoint();
18940 # be sure we do not extend beyond the current list length
18941 if ( $i_effective_last_comma >= $max_index_to_go ) {
18942 $i_effective_last_comma = $max_index_to_go - 1;
18945 # Set a flag indicating if we need to break open to keep -lp
18946 # items aligned. This is necessary if any of the list terms
18947 # exceeds the available space after the '('.
18948 my $need_lp_break_open = $must_break_open;
18949 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
18950 my $columns_if_unbroken =
18951 maximum_line_length($i_opening_minus) -
18952 total_line_length( $i_opening_minus, $i_opening_paren );
18953 $need_lp_break_open =
18954 ( $max_length[0] > $columns_if_unbroken )
18955 || ( $max_length[1] > $columns_if_unbroken )
18956 || ( $first_term_length > $columns_if_unbroken );
18959 # Specify if the list must have an even number of fields or not.
18960 # It is generally safest to assume an even number, because the
18961 # list items might be a hash list. But if we can be sure that
18962 # it is not a hash, then we can allow an odd number for more
18964 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
18966 if ( $identifier_count >= $item_count - 1
18967 || $is_assignment{$next_nonblank_type}
18968 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
18974 # do we have a long first term which should be
18975 # left on a line by itself?
18976 my $use_separate_first_term = (
18977 $odd_or_even == 1 # only if we can use 1 field/line
18978 && $item_count > 3 # need several items
18979 && $first_term_length >
18980 2 * $max_length[0] - 2 # need long first term
18981 && $first_term_length >
18982 2 * $max_length[1] - 2 # need long first term
18985 # or do we know from the type of list that the first term should
18987 if ( !$use_separate_first_term ) {
18988 if ( $is_keyword_with_special_leading_term{$list_type} ) {
18989 $use_separate_first_term = 1;
18991 # should the container be broken open?
18992 if ( $item_count < 3 ) {
18993 if ( $i_first_comma - $i_opening_paren < 4 ) {
18994 ${$rdo_not_break_apart} = 1;
18997 elsif ($first_term_length < 20
18998 && $i_first_comma - $i_opening_paren < 4 )
19000 my $columns = table_columns_available($i_first_comma);
19001 if ( $first_term_length < $columns ) {
19002 ${$rdo_not_break_apart} = 1;
19009 if ($use_separate_first_term) {
19011 # ..set a break and update starting values
19012 $use_separate_first_term = 1;
19013 set_forced_breakpoint($i_first_comma);
19014 $i_opening_paren = $i_first_comma;
19015 $i_first_comma = $rcomma_index->[1];
19017 return if $comma_count == 1;
19018 shift @item_lengths;
19019 shift @i_term_begin;
19021 shift @i_term_comma;
19024 # if not, update the metrics to include the first term
19026 if ( $first_term_length > $max_length[0] ) {
19027 $max_length[0] = $first_term_length;
19031 # Field width parameters
19032 my $pair_width = ( $max_length[0] + $max_length[1] );
19034 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
19036 # Number of free columns across the page width for laying out tables
19037 my $columns = table_columns_available($i_first_comma);
19039 # Estimated maximum number of fields which fit this space
19040 # This will be our first guess
19041 my $number_of_fields_max =
19042 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
19044 my $number_of_fields = $number_of_fields_max;
19046 # Find the best-looking number of fields
19047 # and make this our second guess if possible
19048 my ( $number_of_fields_best, $ri_ragged_break_list,
19049 $new_identifier_count )
19050 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
19053 if ( $number_of_fields_best != 0
19054 && $number_of_fields_best < $number_of_fields_max )
19056 $number_of_fields = $number_of_fields_best;
19059 # ----------------------------------------------------------------------
19060 # If we are crowded and the -lp option is being used, try to
19061 # undo some indentation
19062 # ----------------------------------------------------------------------
19064 $rOpts_line_up_parentheses
19066 $number_of_fields == 0
19067 || ( $number_of_fields == 1
19068 && $number_of_fields != $number_of_fields_best )
19072 my $available_spaces = get_available_spaces_to_go($i_first_comma);
19073 if ( $available_spaces > 0 ) {
19075 my $spaces_wanted = $max_width - $columns; # for 1 field
19077 if ( $number_of_fields_best == 0 ) {
19078 $number_of_fields_best =
19079 get_maximum_fields_wanted( \@item_lengths );
19082 if ( $number_of_fields_best != 1 ) {
19083 my $spaces_wanted_2 =
19084 1 + $pair_width - $columns; # for 2 fields
19085 if ( $available_spaces > $spaces_wanted_2 ) {
19086 $spaces_wanted = $spaces_wanted_2;
19090 if ( $spaces_wanted > 0 ) {
19091 my $deleted_spaces =
19092 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
19095 if ( $deleted_spaces > 0 ) {
19096 $columns = table_columns_available($i_first_comma);
19097 $number_of_fields_max =
19098 maximum_number_of_fields( $columns, $odd_or_even,
19099 $max_width, $pair_width );
19100 $number_of_fields = $number_of_fields_max;
19102 if ( $number_of_fields_best == 1
19103 && $number_of_fields >= 1 )
19105 $number_of_fields = $number_of_fields_best;
19112 # try for one column if two won't work
19113 if ( $number_of_fields <= 0 ) {
19114 $number_of_fields = int( $columns / $max_width );
19117 # The user can place an upper bound on the number of fields,
19118 # which can be useful for doing maintenance on tables
19119 if ( $rOpts_maximum_fields_per_table
19120 && $number_of_fields > $rOpts_maximum_fields_per_table )
19122 $number_of_fields = $rOpts_maximum_fields_per_table;
19125 # How many columns (characters) and lines would this container take
19126 # if no additional whitespace were added?
19127 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
19128 $i_effective_last_comma + 1 );
19129 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
19130 my $packed_lines = 1 + int( $packed_columns / $columns );
19132 # are we an item contained in an outer list?
19133 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
19135 if ( $number_of_fields <= 0 ) {
19137 # #---------------------------------------------------------------
19138 # # We're in trouble. We can't find a single field width that works.
19139 # # There is no simple answer here; we may have a single long list
19141 # #---------------------------------------------------------------
19143 # In many cases, it may be best to not force a break if there is just one
19144 # comma, because the standard continuation break logic will do a better
19147 # In the common case that all but one of the terms can fit
19148 # on a single line, it may look better not to break open the
19149 # containing parens. Consider, for example
19153 # sort { $color_value{$::a} <=> $color_value{$::b}; }
19156 # which will look like this with the container broken:
19160 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
19163 # Here is an example of this rule for a long last term:
19165 # log_message( 0, 256, 128,
19166 # "Number of routes in adj-RIB-in to be considered: $peercount" );
19168 # And here is an example with a long first term:
19171 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
19172 # $r, $pu, $ps, $cu, $cs, $tt
19174 # if $style eq 'all';
19176 my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
19177 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
19178 my $long_first_term =
19179 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
19181 # break at every comma ...
19184 # if requested by user or is best looking
19185 $number_of_fields_best == 1
19187 # or if this is a sublist of a larger list
19188 || $in_hierarchical_list
19190 # or if multiple commas and we don't have a long first or last
19192 || ( $comma_count > 1
19193 && !( $long_last_term || $long_first_term ) )
19196 foreach ( 0 .. $comma_count - 1 ) {
19197 set_forced_breakpoint( $rcomma_index->[$_] );
19200 elsif ($long_last_term) {
19202 set_forced_breakpoint($i_last_comma);
19203 ${$rdo_not_break_apart} = 1 unless $must_break_open;
19205 elsif ($long_first_term) {
19207 set_forced_breakpoint($i_first_comma);
19211 # let breaks be defined by default bond strength logic
19216 # --------------------------------------------------------
19217 # We have a tentative field count that seems to work.
19218 # How many lines will this require?
19219 # --------------------------------------------------------
19220 my $formatted_lines = $item_count / ($number_of_fields);
19221 if ( $formatted_lines != int $formatted_lines ) {
19222 $formatted_lines = 1 + int $formatted_lines;
19225 # So far we've been trying to fill out to the right margin. But
19226 # compact tables are easier to read, so let's see if we can use fewer
19227 # fields without increasing the number of lines.
19228 $number_of_fields =
19229 compactify_table( $item_count, $number_of_fields, $formatted_lines,
19232 # How many spaces across the page will we fill?
19233 my $columns_per_line =
19234 ( int $number_of_fields / 2 ) * $pair_width +
19235 ( $number_of_fields % 2 ) * $max_width;
19237 my $formatted_columns;
19239 if ( $number_of_fields > 1 ) {
19240 $formatted_columns =
19241 ( $pair_width * ( int( $item_count / 2 ) ) +
19242 ( $item_count % 2 ) * $max_width );
19245 $formatted_columns = $max_width * $item_count;
19247 if ( $formatted_columns < $packed_columns ) {
19248 $formatted_columns = $packed_columns;
19251 my $unused_columns = $formatted_columns - $packed_columns;
19253 # set some empirical parameters to help decide if we should try to
19254 # align; high sparsity does not look good, especially with few lines
19255 my $sparsity = ($unused_columns) / ($formatted_columns);
19256 my $max_allowed_sparsity =
19257 ( $item_count < 3 ) ? 0.1
19258 : ( $packed_lines == 1 ) ? 0.15
19259 : ( $packed_lines == 2 ) ? 0.4
19262 # Begin check for shortcut methods, which avoid treating a list
19263 # as a table for relatively small parenthesized lists. These
19264 # are usually easier to read if not formatted as tables.
19266 $packed_lines <= 2 # probably can fit in 2 lines
19267 && $item_count < 9 # doesn't have too many items
19268 && $opening_environment eq 'BLOCK' # not a sub-container
19269 && $opening_token eq '(' # is paren list
19273 # Shortcut method 1: for -lp and just one comma:
19274 # This is a no-brainer, just break at the comma.
19276 $rOpts_line_up_parentheses # -lp
19277 && $item_count == 2 # two items, one comma
19278 && !$must_break_open
19281 my $i_break = $rcomma_index->[0];
19282 set_forced_breakpoint($i_break);
19283 ${$rdo_not_break_apart} = 1;
19284 set_non_alignment_flags( $comma_count, $rcomma_index );
19289 # method 2 is for most small ragged lists which might look
19290 # best if not displayed as a table.
19292 ( $number_of_fields == 2 && $item_count == 3 )
19294 $new_identifier_count > 0 # isn't all quotes
19295 && $sparsity > 0.15
19296 ) # would be fairly spaced gaps if aligned
19300 my $break_count = set_ragged_breakpoints( \@i_term_comma,
19301 $ri_ragged_break_list );
19302 ++$break_count if ($use_separate_first_term);
19304 # NOTE: we should really use the true break count here,
19305 # which can be greater if there are large terms and
19306 # little space, but usually this will work well enough.
19307 unless ($must_break_open) {
19309 if ( $break_count <= 1 ) {
19310 ${$rdo_not_break_apart} = 1;
19312 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
19314 ${$rdo_not_break_apart} = 1;
19317 set_non_alignment_flags( $comma_count, $rcomma_index );
19321 } # end shortcut methods
19325 FORMATTER_DEBUG_FLAG_SPARSE && do {
19327 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
19331 #---------------------------------------------------------------
19332 # Compound List Rule 2:
19333 # If this list is too long for one line, and it is an item of a
19334 # larger list, then we must format it, regardless of sparsity
19335 # (ian.t). One reason that we have to do this is to trigger
19336 # Compound List Rule 1, above, which causes breaks at all commas of
19337 # all outer lists. In this way, the structure will be properly
19339 #---------------------------------------------------------------
19341 # Decide if this list is too long for one line unless broken
19342 my $total_columns = table_columns_available($i_opening_paren);
19343 my $too_long = $packed_columns > $total_columns;
19345 # For a paren list, include the length of the token just before the
19346 # '(' because this is likely a sub call, and we would have to
19347 # include the sub name on the same line as the list. This is still
19348 # imprecise, but not too bad. (steve.t)
19349 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
19351 $too_long = excess_line_length( $i_opening_minus,
19352 $i_effective_last_comma + 1 ) > 0;
19355 # FIXME: For an item after a '=>', try to include the length of the
19356 # thing before the '=>'. This is crude and should be improved by
19357 # actually looking back token by token.
19358 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
19359 my $i_opening_minus = $i_opening_paren - 4;
19360 if ( $i_opening_minus >= 0 ) {
19361 $too_long = excess_line_length( $i_opening_minus,
19362 $i_effective_last_comma + 1 ) > 0;
19366 # Always break lists contained in '[' and '{' if too long for 1 line,
19367 # and always break lists which are too long and part of a more complex
19369 my $must_break_open_container = $must_break_open
19371 && ( $in_hierarchical_list || $opening_token ne '(' ) );
19373 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
19375 #---------------------------------------------------------------
19376 # The main decision:
19377 # Now decide if we will align the data into aligned columns. Do not
19378 # attempt to align columns if this is a tiny table or it would be
19379 # too spaced. It seems that the more packed lines we have, the
19380 # sparser the list that can be allowed and still look ok.
19381 #---------------------------------------------------------------
19383 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
19384 || ( $formatted_lines < 2 )
19385 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
19389 #---------------------------------------------------------------
19390 # too sparse: would look ugly if aligned in a table;
19391 #---------------------------------------------------------------
19393 # use old breakpoints if this is a 'big' list
19394 # FIXME: goal is to improve set_ragged_breakpoints so that
19395 # this is not necessary.
19396 if ( $packed_lines > 2 && $item_count > 10 ) {
19397 write_logfile_entry("List sparse: using old breakpoints\n");
19398 copy_old_breakpoints( $i_first_comma, $i_last_comma );
19401 # let the continuation logic handle it if 2 lines
19404 my $break_count = set_ragged_breakpoints( \@i_term_comma,
19405 $ri_ragged_break_list );
19406 ++$break_count if ($use_separate_first_term);
19408 unless ($must_break_open_container) {
19409 if ( $break_count <= 1 ) {
19410 ${$rdo_not_break_apart} = 1;
19412 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
19414 ${$rdo_not_break_apart} = 1;
19417 set_non_alignment_flags( $comma_count, $rcomma_index );
19422 #---------------------------------------------------------------
19423 # go ahead and format as a table
19424 #---------------------------------------------------------------
19425 write_logfile_entry(
19426 "List: auto formatting with $number_of_fields fields/row\n");
19428 my $j_first_break =
19429 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
19432 my $j = $j_first_break ;
19433 $j < $comma_count ;
19434 $j += $number_of_fields
19437 my $i = $rcomma_index->[$j];
19438 set_forced_breakpoint($i);
19444 sub set_non_alignment_flags {
19446 # set flag which indicates that these commas should not be
19448 my ( $comma_count, $rcomma_index ) = @_;
19449 foreach ( 0 .. $comma_count - 1 ) {
19450 $matching_token_to_go[ $rcomma_index->[$_] ] = 1;
19455 sub study_list_complexity {
19457 # Look for complex tables which should be formatted with one term per line.
19458 # Returns the following:
19460 # \@i_ragged_break_list = list of good breakpoints to avoid lines
19461 # which are hard to read
19462 # $number_of_fields_best = suggested number of fields based on
19463 # complexity; = 0 if any number may be used.
19465 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
19466 my $item_count = @{$ri_term_begin};
19467 my $complex_item_count = 0;
19468 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
19469 my $i_max = @{$ritem_lengths} - 1;
19470 ##my @item_complexity;
19472 my $i_last_last_break = -3;
19473 my $i_last_break = -2;
19474 my @i_ragged_break_list;
19476 my $definitely_complex = 30;
19477 my $definitely_simple = 12;
19478 my $quote_count = 0;
19480 for my $i ( 0 .. $i_max ) {
19481 my $ib = $ri_term_begin->[$i];
19482 my $ie = $ri_term_end->[$i];
19484 # define complexity: start with the actual term length
19485 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
19487 ##TBD: join types here and check for variations
19488 ##my $str=join "", @tokens_to_go[$ib..$ie];
19491 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
19495 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
19499 if ( $ib eq $ie ) {
19500 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
19501 $complex_item_count++;
19502 $weighted_length *= 2;
19508 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
19509 $complex_item_count++;
19510 $weighted_length *= 2;
19512 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
19513 $weighted_length += 4;
19517 # add weight for extra tokens.
19518 $weighted_length += 2 * ( $ie - $ib );
19520 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
19521 ## print "# COMPLEXITY:$weighted_length $BUB\n";
19523 ##push @item_complexity, $weighted_length;
19525 # now mark a ragged break after this item it if it is 'long and
19527 if ( $weighted_length >= $definitely_complex ) {
19529 # if we broke after the previous term
19530 # then break before it too
19531 if ( $i_last_break == $i - 1
19533 && $i_last_last_break != $i - 2 )
19536 ## FIXME: don't strand a small term
19537 pop @i_ragged_break_list;
19538 push @i_ragged_break_list, $i - 2;
19539 push @i_ragged_break_list, $i - 1;
19542 push @i_ragged_break_list, $i;
19543 $i_last_last_break = $i_last_break;
19544 $i_last_break = $i;
19547 # don't break before a small last term -- it will
19548 # not look good on a line by itself.
19549 elsif ($i == $i_max
19550 && $i_last_break == $i - 1
19551 && $weighted_length <= $definitely_simple )
19553 pop @i_ragged_break_list;
19557 my $identifier_count = $i_max + 1 - $quote_count;
19559 # Need more tuning here..
19560 if ( $max_width > 12
19561 && $complex_item_count > $item_count / 2
19562 && $number_of_fields_best != 2 )
19564 $number_of_fields_best = 1;
19567 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
19570 sub get_maximum_fields_wanted {
19572 # Not all tables look good with more than one field of items.
19573 # This routine looks at a table and decides if it should be
19574 # formatted with just one field or not.
19575 # This coding is still under development.
19576 my ($ritem_lengths) = @_;
19578 my $number_of_fields_best = 0;
19580 # For just a few items, we tentatively assume just 1 field.
19581 my $item_count = @{$ritem_lengths};
19582 if ( $item_count <= 5 ) {
19583 $number_of_fields_best = 1;
19586 # For larger tables, look at it both ways and see what looks best
19590 my @max_length = ( 0, 0 );
19591 my @last_length_2 = ( undef, undef );
19592 my @first_length_2 = ( undef, undef );
19593 my $last_length = undef;
19594 my $total_variation_1 = 0;
19595 my $total_variation_2 = 0;
19596 my @total_variation_2 = ( 0, 0 );
19597 foreach my $j ( 0 .. $item_count - 1 ) {
19599 $is_odd = 1 - $is_odd;
19600 my $length = $ritem_lengths->[$j];
19601 if ( $length > $max_length[$is_odd] ) {
19602 $max_length[$is_odd] = $length;
19605 if ( defined($last_length) ) {
19606 my $dl = abs( $length - $last_length );
19607 $total_variation_1 += $dl;
19609 $last_length = $length;
19611 my $ll = $last_length_2[$is_odd];
19612 if ( defined($ll) ) {
19613 my $dl = abs( $length - $ll );
19614 $total_variation_2[$is_odd] += $dl;
19617 $first_length_2[$is_odd] = $length;
19619 $last_length_2[$is_odd] = $length;
19621 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
19623 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
19624 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
19625 $number_of_fields_best = 1;
19628 return ($number_of_fields_best);
19631 sub table_columns_available {
19632 my $i_first_comma = shift;
19634 maximum_line_length($i_first_comma) -
19635 leading_spaces_to_go($i_first_comma);
19637 # Patch: the vertical formatter does not line up lines whose lengths
19638 # exactly equal the available line length because of allowances
19639 # that must be made for side comments. Therefore, the number of
19640 # available columns is reduced by 1 character.
19645 sub maximum_number_of_fields {
19647 # how many fields will fit in the available space?
19648 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
19649 my $max_pairs = int( $columns / $pair_width );
19650 my $number_of_fields = $max_pairs * 2;
19651 if ( $odd_or_even == 1
19652 && $max_pairs * $pair_width + $max_width <= $columns )
19654 $number_of_fields++;
19656 return $number_of_fields;
19659 sub compactify_table {
19661 # given a table with a certain number of fields and a certain number
19662 # of lines, see if reducing the number of fields will make it look
19664 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
19665 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
19669 $min_fields = $number_of_fields ;
19670 $min_fields >= $odd_or_even
19671 && $min_fields * $formatted_lines >= $item_count ;
19672 $min_fields -= $odd_or_even
19675 $number_of_fields = $min_fields;
19678 return $number_of_fields;
19681 sub set_ragged_breakpoints {
19683 # Set breakpoints in a list that cannot be formatted nicely as a
19685 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
19687 my $break_count = 0;
19688 foreach ( @{$ri_ragged_break_list} ) {
19689 my $j = $ri_term_comma->[$_];
19691 set_forced_breakpoint($j);
19695 return $break_count;
19698 sub copy_old_breakpoints {
19699 my ( $i_first_comma, $i_last_comma ) = @_;
19700 for my $i ( $i_first_comma .. $i_last_comma ) {
19701 if ( $old_breakpoint_to_go[$i] ) {
19702 set_forced_breakpoint($i);
19709 my ( $i, $j ) = @_;
19710 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
19712 FORMATTER_DEBUG_FLAG_NOBREAK && do {
19713 my ( $a, $b, $c ) = caller();
19715 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
19718 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
19721 # shouldn't happen; non-critical error
19723 FORMATTER_DEBUG_FLAG_NOBREAK && do {
19724 my ( $a, $b, $c ) = caller();
19726 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
19732 sub set_fake_breakpoint {
19734 # Just bump up the breakpoint count as a signal that there are breaks.
19735 # This is useful if we have breaks but may want to postpone deciding where
19737 $forced_breakpoint_count++;
19741 sub set_forced_breakpoint {
19744 return unless defined $i && $i >= 0;
19746 # no breaks between welded tokens
19747 return if ( weld_len_right_to_go($i) );
19749 # when called with certain tokens, use bond strengths to decide
19750 # if we break before or after it
19751 my $token = $tokens_to_go[$i];
19753 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
19754 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
19757 # breaks are forced before 'if' and 'unless'
19758 elsif ( $is_if_unless{$token} ) { $i-- }
19760 if ( $i >= 0 && $i <= $max_index_to_go ) {
19761 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
19763 FORMATTER_DEBUG_FLAG_FORCE && do {
19764 my ( $a, $b, $c ) = caller();
19766 "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
19769 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
19770 $forced_breakpoint_to_go[$i_nonblank] = 1;
19772 if ( $i_nonblank > $index_max_forced_break ) {
19773 $index_max_forced_break = $i_nonblank;
19775 $forced_breakpoint_count++;
19776 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
19779 # if we break at an opening container..break at the closing
19780 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
19781 set_closing_breakpoint($i_nonblank);
19788 sub clear_breakpoint_undo_stack {
19789 $forced_breakpoint_undo_count = 0;
19793 sub undo_forced_breakpoint_stack {
19795 my $i_start = shift;
19796 if ( $i_start < 0 ) {
19798 my ( $a, $b, $c ) = caller();
19800 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
19804 while ( $forced_breakpoint_undo_count > $i_start ) {
19806 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
19807 if ( $i >= 0 && $i <= $max_index_to_go ) {
19808 $forced_breakpoint_to_go[$i] = 0;
19809 $forced_breakpoint_count--;
19811 FORMATTER_DEBUG_FLAG_UNDOBP && do {
19812 my ( $a, $b, $c ) = caller();
19814 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
19818 # shouldn't happen, but not a critical error
19820 FORMATTER_DEBUG_FLAG_UNDOBP && do {
19821 my ( $a, $b, $c ) = caller();
19823 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
19830 { # begin recombine_breakpoints
19842 @is_amp_amp{@q} = (1) x scalar(@q);
19845 @is_ternary{@q} = (1) x scalar(@q);
19847 @q = qw( + - * / );
19848 @is_math_op{@q} = (1) x scalar(@q);
19851 @is_plus_minus{@q} = (1) x scalar(@q);
19854 @is_mult_div{@q} = (1) x scalar(@q);
19857 sub DUMP_BREAKPOINTS {
19859 # Debug routine to dump current breakpoints...not normally called
19860 # We are given indexes to the current lines:
19861 # $ri_beg = ref to array of BEGinning indexes of each line
19862 # $ri_end = ref to array of ENDing indexes of each line
19863 my ( $ri_beg, $ri_end, $msg ) = @_;
19864 print STDERR "----Dumping breakpoints from: $msg----\n";
19865 for my $n ( 0 .. @{$ri_end} - 1 ) {
19866 my $ibeg = $ri_beg->[$n];
19867 my $iend = $ri_end->[$n];
19869 foreach my $i ( $ibeg .. $iend ) {
19870 $text .= $tokens_to_go[$i];
19872 print STDERR "$n ($ibeg:$iend) $text\n";
19874 print STDERR "----\n";
19878 sub unmask_phantom_semicolons {
19880 my ( $self, $ri_beg, $ri_end ) = @_;
19882 # Walk down the lines of this batch and unmask any invisible line-ending
19883 # semicolons. They were placed by sub respace_tokens but we only now
19884 # know if we actually need them.
19886 my $nmax = @{$ri_end} - 1;
19887 foreach my $n ( 0 .. $nmax ) {
19889 my $i = $ri_end->[$n];
19890 if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
19892 $tokens_to_go[$i] = $rtoken_vars_to_go[$i]->[_TOKEN_] =
19893 $want_left_space{';'} == WS_NO ? ';' : ' ;';
19894 my $line_number = $rtoken_vars_to_go[$i]->[_LINE_INDEX_] + 1;
19895 note_added_semicolon($line_number);
19901 sub recombine_breakpoints {
19903 # sub set_continuation_breaks is very liberal in setting line breaks
19904 # for long lines, always setting breaks at good breakpoints, even
19905 # when that creates small lines. Sometimes small line fragments
19906 # are produced which would look better if they were combined.
19907 # That's the task of this routine.
19909 # We are given indexes to the current lines:
19910 # $ri_beg = ref to array of BEGinning indexes of each line
19911 # $ri_end = ref to array of ENDing indexes of each line
19912 my ( $ri_beg, $ri_end ) = @_;
19914 # Make a list of all good joining tokens between the lines
19917 my $nmax = @{$ri_end} - 1;
19918 for my $n ( 1 .. $nmax ) {
19919 my $ibeg_1 = $ri_beg->[ $n - 1 ];
19920 my $iend_1 = $ri_end->[ $n - 1 ];
19921 my $iend_2 = $ri_end->[$n];
19922 my $ibeg_2 = $ri_beg->[$n];
19924 my ( $itok, $itokp, $itokm );
19926 foreach my $itest ( $iend_1, $ibeg_2 ) {
19927 my $type = $types_to_go[$itest];
19928 if ( $is_math_op{$type}
19929 || $is_amp_amp{$type}
19930 || $is_assignment{$type}
19936 $joint[$n] = [$itok];
19939 my $more_to_do = 1;
19941 # We keep looping over all of the lines of this batch
19942 # until there are no more possible recombinations
19943 my $nmax_last = @{$ri_end};
19945 while ($more_to_do) {
19948 my $nmax = @{$ri_end} - 1;
19950 # Safety check for infinite loop
19951 unless ( $nmax < $nmax_last ) {
19953 # Shouldn't happen because splice below decreases nmax on each
19955 Fault("Program bug-infinite loop in recombine breakpoints\n");
19957 $nmax_last = $nmax;
19959 my $skip_Section_3;
19960 my $leading_amp_count = 0;
19961 my $this_line_is_semicolon_terminated;
19963 # loop over all remaining lines in this batch
19964 for my $iter ( 1 .. $nmax ) {
19966 # alternating sweep direction gives symmetric results
19967 # for recombining lines which exceed the line length
19968 # such as eval {{{{.... }}}}
19970 if ($reverse) { $n = 1 + $nmax - $iter; }
19971 else { $n = $iter }
19973 #----------------------------------------------------------
19974 # If we join the current pair of lines,
19975 # line $n-1 will become the left part of the joined line
19976 # line $n will become the right part of the joined line
19978 # Here are Indexes of the endpoint tokens of the two lines:
19980 # -----line $n-1--- | -----line $n-----
19981 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
19984 # We want to decide if we should remove the line break
19985 # between the tokens at $iend_1 and $ibeg_2
19987 # We will apply a number of ad-hoc tests to see if joining
19988 # here will look ok. The code will just issue a 'next'
19989 # command if the join doesn't look good. If we get through
19990 # the gauntlet of tests, the lines will be recombined.
19991 #----------------------------------------------------------
19993 # beginning and ending tokens of the lines we are working on
19994 my $ibeg_1 = $ri_beg->[ $n - 1 ];
19995 my $iend_1 = $ri_end->[ $n - 1 ];
19996 my $iend_2 = $ri_end->[$n];
19997 my $ibeg_2 = $ri_beg->[$n];
19998 my $ibeg_nmax = $ri_beg->[$nmax];
20000 # combined line cannot be too long
20001 my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 );
20002 next if ( $excess > 0 );
20004 my $type_iend_1 = $types_to_go[$iend_1];
20005 my $type_iend_2 = $types_to_go[$iend_2];
20006 my $type_ibeg_1 = $types_to_go[$ibeg_1];
20007 my $type_ibeg_2 = $types_to_go[$ibeg_2];
20009 # terminal token of line 2 if any side comment is ignored:
20010 my $iend_2t = $iend_2;
20011 my $type_iend_2t = $type_iend_2;
20013 # some beginning indexes of other lines, which may not exist
20014 my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
20015 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
20016 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
20020 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
20021 # $nesting_depth_to_go[$ibeg_1] );
20023 FORMATTER_DEBUG_FLAG_RECOMBINE && do {
20025 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
20028 # If line $n is the last line, we set some flags and
20029 # do any special checks for it
20030 if ( $n == $nmax ) {
20032 # a terminal '{' should stay where it is
20033 next if $type_ibeg_2 eq '{';
20035 if ( $type_iend_2 eq '#'
20036 && $iend_2 - $ibeg_2 >= 2
20037 && $types_to_go[ $iend_2 - 1 ] eq 'b' )
20039 $iend_2t = $iend_2 - 2;
20040 $type_iend_2t = $types_to_go[$iend_2t];
20043 $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
20046 #----------------------------------------------------------
20047 # Recombine Section 0:
20048 # Examine the special token joining this line pair, if any.
20049 # Put as many tests in this section to avoid duplicate code and
20050 # to make formatting independent of whether breaks are to the
20051 # left or right of an operator.
20052 #----------------------------------------------------------
20054 my ($itok) = @{ $joint[$n] };
20057 # FIXME: Patch - may not be necessary
20059 $type_iend_1 eq 'b'
20064 $type_iend_2 eq 'b'
20069 my $type = $types_to_go[$itok];
20071 if ( $type eq ':' ) {
20073 # do not join at a colon unless it disobeys the break request
20074 if ( $itok eq $iend_1 ) {
20075 next unless $want_break_before{$type};
20078 $leading_amp_count++;
20079 next if $want_break_before{$type};
20083 # handle math operators + - * /
20084 elsif ( $is_math_op{$type} ) {
20086 # Combine these lines if this line is a single
20087 # number, or if it is a short term with same
20088 # operator as the previous line. For example, in
20089 # the following code we will combine all of the
20090 # short terms $A, $B, $C, $D, $E, $F, together
20091 # instead of leaving them one per line:
20093 # $A * $B * $C * $D * $E * $F *
20094 # ( 2. * $eps * $sigma * $area ) *
20095 # ( 1. / $tcold**3 - 1. / $thot**3 );
20097 # This can be important in math-intensive code.
20101 my $itokp = min( $inext_to_go[$itok], $iend_2 );
20102 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
20103 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
20104 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
20106 # check for a number on the right
20107 if ( $types_to_go[$itokp] eq 'n' ) {
20109 # ok if nothing else on right
20110 if ( $itokp == $iend_2 ) {
20115 # look one more token to right..
20116 # okay if math operator or some termination
20118 ( ( $itokpp == $iend_2 )
20119 && $is_math_op{ $types_to_go[$itokpp] } )
20120 || $types_to_go[$itokpp] =~ /^[#,;]$/;
20124 # check for a number on the left
20125 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
20127 # okay if nothing else to left
20128 if ( $itokm == $ibeg_1 ) {
20132 # otherwise look one more token to left
20135 # okay if math operator, comma, or assignment
20136 $good_combo = ( $itokmm == $ibeg_1 )
20137 && ( $is_math_op{ $types_to_go[$itokmm] }
20138 || $types_to_go[$itokmm] =~ /^[,]$/
20139 || $is_assignment{ $types_to_go[$itokmm] }
20144 # look for a single short token either side of the
20146 if ( !$good_combo ) {
20148 # Slight adjustment factor to make results
20149 # independent of break before or after operator in
20150 # long summed lists. (An operator and a space make
20152 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
20156 # numbers or id's on both sides of this joint
20157 $types_to_go[$itokp] =~ /^[in]$/
20158 && $types_to_go[$itokm] =~ /^[in]$/
20160 # one of the two lines must be short:
20163 # no more than 2 nonblank tokens right of
20168 && token_sequence_length( $itokp, $iend_2 )
20170 $rOpts_short_concatenation_item_length
20173 # no more than 2 nonblank tokens left of
20178 && token_sequence_length( $ibeg_1, $itokm )
20180 $rOpts_short_concatenation_item_length
20185 # keep pure terms; don't mix +- with */
20187 $is_plus_minus{$type}
20188 && ( $is_mult_div{ $types_to_go[$itokmm] }
20189 || $is_mult_div{ $types_to_go[$itokpp] } )
20192 $is_mult_div{$type}
20193 && ( $is_plus_minus{ $types_to_go[$itokmm] }
20194 || $is_plus_minus{ $types_to_go[$itokpp] } )
20200 # it is also good to combine if we can reduce to 2 lines
20201 if ( !$good_combo ) {
20203 # index on other line where same token would be in a
20206 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
20211 && $types_to_go[$iother] ne $type;
20214 next unless ($good_combo);
20218 elsif ( $is_amp_amp{$type} ) {
20222 elsif ( $is_assignment{$type} ) {
20224 } ## end assignment
20227 #----------------------------------------------------------
20228 # Recombine Section 1:
20229 # Join welded nested containers immediately
20230 # use alternating sweep direction until all are welds
20231 # are done. This produces more symmetric opening and
20232 # closing joins when lines exceed line length.
20233 #----------------------------------------------------------
20234 if ( weld_len_right_to_go($iend_1)
20235 || weld_len_left_to_go($ibeg_2) )
20238 $reverse = 1 - $reverse;
20243 #----------------------------------------------------------
20244 # Recombine Section 2:
20245 # Examine token at $iend_1 (right end of first line of pair)
20246 #----------------------------------------------------------
20248 # an isolated '}' may join with a ';' terminated segment
20249 if ( $type_iend_1 eq '}' ) {
20251 # Check for cases where combining a semicolon terminated
20252 # statement with a previous isolated closing paren will
20253 # allow the combined line to be outdented. This is
20254 # generally a good move. For example, we can join up
20255 # the last two lines here:
20257 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
20258 # $size, $atime, $mtime, $ctime, $blksize, $blocks
20264 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
20265 # $size, $atime, $mtime, $ctime, $blksize, $blocks
20268 # which makes the parens line up.
20270 # Another example, from Joe Matarazzo, probably looks best
20271 # with the 'or' clause appended to the trailing paren:
20272 # $self->some_method(
20275 # ) or die "Some_method didn't work";
20277 # But we do not want to do this for something like the -lp
20278 # option where the paren is not outdentable because the
20279 # trailing clause will be far to the right.
20281 # The logic here is synchronized with the logic in sub
20282 # sub set_adjusted_indentation, which actually does
20285 $skip_Section_3 ||= $this_line_is_semicolon_terminated
20287 # only one token on last line
20288 && $ibeg_1 == $iend_1
20290 # must be structural paren
20291 && $tokens_to_go[$iend_1] eq ')'
20293 # style must allow outdenting,
20294 && !$closing_token_indentation{')'}
20296 # only leading '&&', '||', and ':' if no others seen
20297 # (but note: our count made below could be wrong
20298 # due to intervening comments)
20299 && ( $leading_amp_count == 0
20300 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
20302 # but leading colons probably line up with a
20303 # previous colon or question (count could be wrong).
20304 && $type_ibeg_2 ne ':'
20306 # only one step in depth allowed. this line must not
20307 # begin with a ')' itself.
20308 && ( $nesting_depth_to_go[$iend_1] ==
20309 $nesting_depth_to_go[$iend_2] + 1 );
20311 # YVES patch 2 of 2:
20312 # Allow cuddled eval chains, like this:
20319 # This patch works together with a patch in
20320 # setting adjusted indentation (where the closing eval
20321 # brace is outdented if possible).
20322 # The problem is that an 'eval' block has continuation
20323 # indentation and it looks better to undo it in some
20324 # cases. If we do not use this patch we would get:
20332 # The alternative, for uncuddled style, is to create
20333 # a patch in set_adjusted_indentation which undoes
20334 # the indentation of a leading line like 'or do {'.
20335 # This doesn't work well with -icb through
20337 $block_type_to_go[$iend_1] eq 'eval'
20338 && !$rOpts->{'line-up-parentheses'}
20339 && !$rOpts->{'indent-closing-brace'}
20340 && $tokens_to_go[$iend_2] eq '{'
20342 ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
20343 || ( $type_ibeg_2 eq 'k'
20344 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
20345 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
20349 $skip_Section_3 ||= 1;
20356 # handle '.' and '?' specially below
20357 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
20361 elsif ( $type_iend_1 eq '{' ) {
20364 # honor breaks at opening brace
20365 # Added to prevent recombining something like this:
20366 # } || eval { package main;
20367 next if $forced_breakpoint_to_go[$iend_1];
20370 # do not recombine lines with ending &&, ||,
20371 elsif ( $is_amp_amp{$type_iend_1} ) {
20372 next unless $want_break_before{$type_iend_1};
20375 # Identify and recombine a broken ?/: chain
20376 elsif ( $type_iend_1 eq '?' ) {
20378 # Do not recombine different levels
20380 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
20382 # do not recombine unless next line ends in :
20383 next unless $type_iend_2 eq ':';
20386 # for lines ending in a comma...
20387 elsif ( $type_iend_1 eq ',' ) {
20389 # Do not recombine at comma which is following the
20391 # TODO: might be best to make a special flag
20392 next if ( $old_breakpoint_to_go[$iend_1] );
20394 # an isolated '},' may join with an identifier + ';'
20395 # this is useful for the class of a 'bless' statement (bless.t)
20396 if ( $type_ibeg_1 eq '}'
20397 && $type_ibeg_2 eq 'i' )
20400 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
20401 && ( $iend_2 == ( $ibeg_2 + 1 ) )
20402 && $this_line_is_semicolon_terminated );
20404 # override breakpoint
20405 $forced_breakpoint_to_go[$iend_1] = 0;
20411 # do not recombine after a comma unless this will leave
20413 next unless ( $n + 1 >= $nmax );
20415 # do not recombine if there is a change in indentation depth
20418 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
20420 # do not recombine a "complex expression" after a
20421 # comma. "complex" means no parens.
20423 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
20424 if ( $tokens_to_go[$ii] eq '(' ) {
20429 next if $saw_paren;
20434 elsif ( $type_iend_1 eq '(' ) {
20436 # No longer doing this
20439 elsif ( $type_iend_1 eq ')' ) {
20441 # No longer doing this
20444 # keep a terminal for-semicolon
20445 elsif ( $type_iend_1 eq 'f' ) {
20449 # if '=' at end of line ...
20450 elsif ( $is_assignment{$type_iend_1} ) {
20452 # keep break after = if it was in input stream
20453 # this helps prevent 'blinkers'
20454 next if $old_breakpoint_to_go[$iend_1]
20456 # don't strand an isolated '='
20457 && $iend_1 != $ibeg_1;
20459 my $is_short_quote =
20460 ( $type_ibeg_2 eq 'Q'
20461 && $ibeg_2 == $iend_2
20462 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
20463 $rOpts_short_concatenation_item_length );
20465 ( $type_ibeg_1 eq '?'
20466 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
20468 # always join an isolated '=', a short quote, or if this
20469 # will put ?/: at start of adjacent lines
20470 if ( $ibeg_1 != $iend_1
20471 && !$is_short_quote
20478 # unless we can reduce this to two lines
20481 # or three lines, the last with a leading semicolon
20482 || ( $nmax == $n + 2
20483 && $types_to_go[$ibeg_nmax] eq ';' )
20485 # or the next line ends with a here doc
20486 || $type_iend_2 eq 'h'
20488 # or the next line ends in an open paren or brace
20489 # and the break hasn't been forced [dima.t]
20490 || ( !$forced_breakpoint_to_go[$iend_1]
20491 && $type_iend_2 eq '{' )
20494 # do not recombine if the two lines might align well
20495 # this is a very approximate test for this
20497 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
20502 # Recombine if we can make two lines
20505 # -lp users often prefer this:
20506 # my $title = function($env, $env, $sysarea,
20507 # "bubba Borrower Entry");
20508 # so we will recombine if -lp is used we have
20510 && ( !$rOpts_line_up_parentheses
20511 || $type_iend_2 ne ',' )
20515 # otherwise, scan the rhs line up to last token for
20516 # complexity. Note that we are not counting the last
20517 # token in case it is an opening paren.
20519 my $depth = $nesting_depth_to_go[$ibeg_2];
20520 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
20521 if ( $nesting_depth_to_go[$i] != $depth ) {
20523 last if ( $tv > 1 );
20525 $depth = $nesting_depth_to_go[$i];
20528 # ok to recombine if no level changes before last token
20531 # otherwise, do not recombine if more than two
20533 next if ( $tv > 1 );
20535 # check total complexity of the two adjacent lines
20536 # that will occur if we do this join
20539 ? $ri_end->[ $n + 1 ]
20541 foreach my $i ( $iend_2 .. $istop ) {
20542 if ( $nesting_depth_to_go[$i] != $depth ) {
20544 last if ( $tv > 2 );
20546 $depth = $nesting_depth_to_go[$i];
20549 # do not recombine if total is more than 2 level changes
20550 next if ( $tv > 2 );
20555 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
20556 $forced_breakpoint_to_go[$iend_1] = 0;
20561 elsif ( $type_iend_1 eq 'k' ) {
20563 # make major control keywords stand out
20568 #/^(last|next|redo|return)$/
20569 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
20571 # but only if followed by multiple lines
20575 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
20577 unless $want_break_before{ $tokens_to_go[$iend_1] };
20581 #----------------------------------------------------------
20582 # Recombine Section 3:
20583 # Examine token at $ibeg_2 (left end of second line of pair)
20584 #----------------------------------------------------------
20586 # join lines identified above as capable of
20587 # causing an outdented line with leading closing paren
20588 # Note that we are skipping the rest of this section
20589 # and the rest of the loop to do the join
20590 if ($skip_Section_3) {
20591 $forced_breakpoint_to_go[$iend_1] = 0;
20596 # handle lines with leading &&, ||
20597 elsif ( $is_amp_amp{$type_ibeg_2} ) {
20599 $leading_amp_count++;
20601 # ok to recombine if it follows a ? or :
20602 # and is followed by an open paren..
20604 ( $is_ternary{$type_ibeg_1}
20605 && $tokens_to_go[$iend_2] eq '(' )
20607 # or is followed by a ? or : at same depth
20609 # We are looking for something like this. We can
20610 # recombine the && line with the line above to make the
20611 # structure more clear:
20613 # exists $G->{Attr}->{V}
20614 # && exists $G->{Attr}->{V}->{$u}
20615 # ? %{ $G->{Attr}->{V}->{$u} }
20618 # We should probably leave something like this alone:
20620 # exists $G->{Attr}->{E}
20621 # && exists $G->{Attr}->{E}->{$u}
20622 # && exists $G->{Attr}->{E}->{$u}->{$v}
20623 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
20625 # so that we either have all of the &&'s (or ||'s)
20626 # on one line, as in the first example, or break at
20627 # each one as in the second example. However, it
20628 # sometimes makes things worse to check for this because
20629 # it prevents multiple recombinations. So this is not done.
20631 && $is_ternary{ $types_to_go[$ibeg_3] }
20632 && $nesting_depth_to_go[$ibeg_3] ==
20633 $nesting_depth_to_go[$ibeg_2] );
20635 next if !$ok && $want_break_before{$type_ibeg_2};
20636 $forced_breakpoint_to_go[$iend_1] = 0;
20638 # tweak the bond strength to give this joint priority
20643 # Identify and recombine a broken ?/: chain
20644 elsif ( $type_ibeg_2 eq '?' ) {
20646 # Do not recombine different levels
20647 my $lev = $levels_to_go[$ibeg_2];
20648 next if ( $lev ne $levels_to_go[$ibeg_1] );
20650 # Do not recombine a '?' if either next line or
20651 # previous line does not start with a ':'. The reasons
20652 # are that (1) no alignment of the ? will be possible
20653 # and (2) the expression is somewhat complex, so the
20654 # '?' is harder to see in the interior of the line.
20655 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
20656 my $precedes_colon =
20657 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
20658 next unless ( $follows_colon || $precedes_colon );
20660 # we will always combining a ? line following a : line
20661 if ( !$follows_colon ) {
20663 # ...otherwise recombine only if it looks like a chain.
20664 # we will just look at a few nearby lines to see if
20665 # this looks like a chain.
20666 my $local_count = 0;
20667 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
20670 && $types_to_go[$ii] eq ':'
20671 && $levels_to_go[$ii] == $lev;
20673 next unless ( $local_count > 1 );
20675 $forced_breakpoint_to_go[$iend_1] = 0;
20678 # do not recombine lines with leading '.'
20679 elsif ( $type_ibeg_2 eq '.' ) {
20680 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
20684 # ... unless there is just one and we can reduce
20685 # this to two lines if we do. For example, this
20689 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
20691 # looks better than this:
20692 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
20693 # . '$args .= $pat;'
20698 && $type_ibeg_1 ne $type_ibeg_2
20701 # ... or this would strand a short quote , like this
20702 # . "some long quote"
20705 || ( $types_to_go[$i_next_nonblank] eq 'Q'
20706 && $i_next_nonblank >= $iend_2 - 1
20707 && $token_lengths_to_go[$i_next_nonblank] <
20708 $rOpts_short_concatenation_item_length )
20712 # handle leading keyword..
20713 elsif ( $type_ibeg_2 eq 'k' ) {
20715 # handle leading "or"
20716 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
20719 $this_line_is_semicolon_terminated
20722 # following 'if' or 'unless' or 'or'
20723 $type_ibeg_1 eq 'k'
20724 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
20726 # important: only combine a very simple or
20727 # statement because the step below may have
20728 # combined a trailing 'and' with this or,
20729 # and we do not want to then combine
20730 # everything together
20731 && ( $iend_2 - $ibeg_2 <= 7 )
20736 $forced_breakpoint_to_go[$iend_1] = 0
20737 unless $old_breakpoint_to_go[$iend_1];
20740 # handle leading 'and'
20741 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
20743 # Decide if we will combine a single terminal 'and'
20744 # after an 'if' or 'unless'.
20746 # This looks best with the 'and' on the same
20747 # line as the 'if':
20750 # if $seconds and $nu < 2;
20752 # But this looks better as shown:
20755 # if !$this->{Parents}{$_}
20756 # or $this->{Parents}{$_} eq $_;
20760 $this_line_is_semicolon_terminated
20763 # following 'if' or 'unless' or 'or'
20764 $type_ibeg_1 eq 'k'
20765 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
20766 || $tokens_to_go[$ibeg_1] eq 'or' )
20771 # handle leading "if" and "unless"
20772 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
20774 # FIXME: This is still experimental..may not be too useful
20777 $this_line_is_semicolon_terminated
20779 # previous line begins with 'and' or 'or'
20780 && $type_ibeg_1 eq 'k'
20781 && $is_and_or{ $tokens_to_go[$ibeg_1] }
20786 # handle all other leading keywords
20789 # keywords look best at start of lines,
20790 # but combine things like "1 while"
20791 unless ( $is_assignment{$type_iend_1} ) {
20793 if ( ( $type_iend_1 ne 'k' )
20794 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
20799 # similar treatment of && and || as above for 'and' and 'or':
20800 # NOTE: This block of code is currently bypassed because
20801 # of a previous block but is retained for possible future use.
20802 elsif ( $is_amp_amp{$type_ibeg_2} ) {
20804 # maybe looking at something like:
20805 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
20809 $this_line_is_semicolon_terminated
20811 # previous line begins with an 'if' or 'unless' keyword
20812 && $type_ibeg_1 eq 'k'
20813 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
20818 # handle line with leading = or similar
20819 elsif ( $is_assignment{$type_ibeg_2} ) {
20820 next unless ( $n == 1 || $n == $nmax );
20821 next if $old_breakpoint_to_go[$iend_1];
20825 # unless we can reduce this to two lines
20828 # or three lines, the last with a leading semicolon
20829 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
20831 # or the next line ends with a here doc
20832 || $type_iend_2 eq 'h'
20834 # or this is a short line ending in ;
20835 || ( $n == $nmax && $this_line_is_semicolon_terminated )
20837 $forced_breakpoint_to_go[$iend_1] = 0;
20840 #----------------------------------------------------------
20841 # Recombine Section 4:
20842 # Combine the lines if we arrive here and it is possible
20843 #----------------------------------------------------------
20845 # honor hard breakpoints
20846 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
20848 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
20850 # Require a few extra spaces before recombining lines if we are
20851 # at an old breakpoint unless this is a simple list or terminal
20852 # line. The goal is to avoid oscillating between two
20853 # quasi-stable end states. For example this snippet caused
20857 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
20861 if ( $old_breakpoint_to_go[$iend_1]
20862 && !$this_line_is_semicolon_terminated
20865 && $type_iend_2 ne ',' );
20867 # do not recombine if we would skip in indentation levels
20868 if ( $n < $nmax ) {
20869 my $if_next = $ri_beg->[ $n + 1 ];
20872 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
20873 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
20875 # but an isolated 'if (' is undesirable
20878 && $iend_1 - $ibeg_1 <= 2
20879 && $type_ibeg_1 eq 'k'
20880 && $tokens_to_go[$ibeg_1] eq 'if'
20881 && $tokens_to_go[$iend_1] ne '('
20887 next if ( $bs >= NO_BREAK - 1 );
20889 # remember the pair with the greatest bond strength
20896 if ( $bs > $bs_best ) {
20903 # recombine the pair with the greatest bond strength
20905 splice @{$ri_beg}, $n_best, 1;
20906 splice @{$ri_end}, $n_best - 1, 1;
20907 splice @joint, $n_best, 1;
20909 # keep going if we are still making progress
20913 return ( $ri_beg, $ri_end );
20915 } # end recombine_breakpoints
20917 sub break_all_chain_tokens {
20919 # scan the current breakpoints looking for breaks at certain "chain
20920 # operators" (. : && || + etc) which often occur repeatedly in a long
20921 # statement. If we see a break at any one, break at all similar tokens
20922 # within the same container.
20924 my ( $ri_left, $ri_right ) = @_;
20926 my %saw_chain_type;
20927 my %left_chain_type;
20928 my %right_chain_type;
20929 my %interior_chain_type;
20930 my $nmax = @{$ri_right} - 1;
20932 # scan the left and right end tokens of all lines
20934 for my $n ( 0 .. $nmax ) {
20935 my $il = $ri_left->[$n];
20936 my $ir = $ri_right->[$n];
20937 my $typel = $types_to_go[$il];
20938 my $typer = $types_to_go[$ir];
20939 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
20940 $typer = '+' if ( $typer eq '-' );
20941 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
20942 $typer = '*' if ( $typer eq '/' );
20943 my $tokenl = $tokens_to_go[$il];
20944 my $tokenr = $tokens_to_go[$ir];
20946 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
20947 next if ( $typel eq '?' );
20948 push @{ $left_chain_type{$typel} }, $il;
20949 $saw_chain_type{$typel} = 1;
20952 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
20953 next if ( $typer eq '?' );
20954 push @{ $right_chain_type{$typer} }, $ir;
20955 $saw_chain_type{$typer} = 1;
20959 return unless $count;
20961 # now look for any interior tokens of the same types
20963 for my $n ( 0 .. $nmax ) {
20964 my $il = $ri_left->[$n];
20965 my $ir = $ri_right->[$n];
20966 foreach my $i ( $il + 1 .. $ir - 1 ) {
20967 my $type = $types_to_go[$i];
20968 $type = '+' if ( $type eq '-' );
20969 $type = '*' if ( $type eq '/' );
20970 if ( $saw_chain_type{$type} ) {
20971 push @{ $interior_chain_type{$type} }, $i;
20976 return unless $count;
20978 # now make a list of all new break points
20981 # loop over all chain types
20982 foreach my $type ( keys %saw_chain_type ) {
20984 # quit if just ONE continuation line with leading . For example--
20985 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
20987 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
20989 # loop over all interior chain tokens
20990 foreach my $itest ( @{ $interior_chain_type{$type} } ) {
20992 # loop over all left end tokens of same type
20993 if ( $left_chain_type{$type} ) {
20994 next if $nobreak_to_go[ $itest - 1 ];
20995 foreach my $i ( @{ $left_chain_type{$type} } ) {
20996 next unless in_same_container( $i, $itest );
20997 push @insert_list, $itest - 1;
20999 # Break at matching ? if this : is at a different level.
21000 # For example, the ? before $THRf_DEAD in the following
21001 # should get a break if its : gets a break.
21004 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
21005 # : ( $_ & 4 ) ? $THRf_R_DETACHED
21006 # : $THRf_R_JOINABLE;
21008 && $levels_to_go[$i] != $levels_to_go[$itest] )
21010 my $i_question = $mate_index_to_go[$itest];
21011 if ( $i_question > 0 ) {
21012 push @insert_list, $i_question - 1;
21019 # loop over all right end tokens of same type
21020 if ( $right_chain_type{$type} ) {
21021 next if $nobreak_to_go[$itest];
21022 foreach my $i ( @{ $right_chain_type{$type} } ) {
21023 next unless in_same_container( $i, $itest );
21024 push @insert_list, $itest;
21026 # break at matching ? if this : is at a different level
21028 && $levels_to_go[$i] != $levels_to_go[$itest] )
21030 my $i_question = $mate_index_to_go[$itest];
21031 if ( $i_question >= 0 ) {
21032 push @insert_list, $i_question;
21041 # insert any new break points
21042 if (@insert_list) {
21043 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
21050 # Look for assignment operators that could use a breakpoint.
21051 # For example, in the following snippet
21053 # $HOME = $ENV{HOME}
21056 # || die "no home directory for user $<";
21058 # we could break at the = to get this, which is a little nicer:
21063 # || die "no home directory for user $<";
21065 # The logic here follows the logic in set_logical_padding, which
21066 # will add the padding in the second line to improve alignment.
21068 my ( $ri_left, $ri_right ) = @_;
21069 my $nmax = @{$ri_right} - 1;
21070 return unless ( $nmax >= 2 );
21072 # scan the left ends of first two lines
21075 for my $n ( 1 .. 2 ) {
21076 my $il = $ri_left->[$n];
21077 my $typel = $types_to_go[$il];
21078 my $tokenl = $tokens_to_go[$il];
21080 my $has_leading_op = ( $tokenl =~ /^\w/ )
21081 ? $is_chain_operator{$tokenl} # + - * / : ? && ||
21082 : $is_chain_operator{$typel}; # and, or
21083 return unless ($has_leading_op);
21086 unless ( $tokenl eq $tokbeg
21087 && $nesting_depth_to_go[$il] eq $depth_beg );
21090 $depth_beg = $nesting_depth_to_go[$il];
21093 # now look for any interior tokens of the same types
21094 my $il = $ri_left->[0];
21095 my $ir = $ri_right->[0];
21097 # now make a list of all new break points
21099 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
21100 my $type = $types_to_go[$i];
21101 if ( $is_assignment{$type}
21102 && $nesting_depth_to_go[$i] eq $depth_beg )
21104 if ( $want_break_before{$type} ) {
21105 push @insert_list, $i - 1;
21108 push @insert_list, $i;
21113 # Break after a 'return' followed by a chain of operators
21114 # return ( $^O !~ /win32|dos/i )
21115 # && ( $^O ne 'VMS' )
21116 # && ( $^O ne 'OS2' )
21117 # && ( $^O ne 'MacOS' );
21120 # ( $^O !~ /win32|dos/i )
21121 # && ( $^O ne 'VMS' )
21122 # && ( $^O ne 'OS2' )
21123 # && ( $^O ne 'MacOS' );
21125 if ( $types_to_go[$i] eq 'k'
21126 && $tokens_to_go[$i] eq 'return'
21128 && $nesting_depth_to_go[$i] eq $depth_beg )
21130 push @insert_list, $i;
21133 return unless (@insert_list);
21135 # One final check...
21136 # scan second and third lines and be sure there are no assignments
21137 # we want to avoid breaking at an = to make something like this:
21139 # $html_icons{"$type-$state"}
21140 # or $icon = $html_icons{$type}
21141 # or $icon = $html_icons{$state} )
21142 for my $n ( 1 .. 2 ) {
21143 my $il = $ri_left->[$n];
21144 my $ir = $ri_right->[$n];
21145 foreach my $i ( $il + 1 .. $ir ) {
21146 my $type = $types_to_go[$i];
21148 if ( $is_assignment{$type}
21149 && $nesting_depth_to_go[$i] eq $depth_beg );
21153 # ok, insert any new break point
21154 if (@insert_list) {
21155 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
21160 sub insert_final_breaks {
21162 my ( $ri_left, $ri_right ) = @_;
21164 my $nmax = @{$ri_right} - 1;
21166 # scan the left and right end tokens of all lines
21168 my $i_first_colon = -1;
21169 for my $n ( 0 .. $nmax ) {
21170 my $il = $ri_left->[$n];
21171 my $ir = $ri_right->[$n];
21172 my $typel = $types_to_go[$il];
21173 my $typer = $types_to_go[$ir];
21174 return if ( $typel eq '?' );
21175 return if ( $typer eq '?' );
21176 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
21177 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
21180 # For long ternary chains,
21181 # if the first : we see has its # ? is in the interior
21182 # of a preceding line, then see if there are any good
21183 # breakpoints before the ?.
21184 if ( $i_first_colon > 0 ) {
21185 my $i_question = $mate_index_to_go[$i_first_colon];
21186 if ( $i_question > 0 ) {
21188 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
21189 my $token = $tokens_to_go[$ii];
21190 my $type = $types_to_go[$ii];
21192 # For now, a good break is either a comma or a 'return'.
21193 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
21194 && in_same_container( $ii, $i_question ) )
21196 push @insert_list, $ii;
21201 # insert any new break points
21202 if (@insert_list) {
21203 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
21210 sub in_same_container {
21212 # check to see if tokens at i1 and i2 are in the
21213 # same container, and not separated by a comma, ? or :
21214 my ( $i1, $i2 ) = @_;
21215 my $type = $types_to_go[$i1];
21216 my $depth = $nesting_depth_to_go[$i1];
21217 return unless ( $nesting_depth_to_go[$i2] == $depth );
21218 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
21220 ###########################################################
21221 # This is potentially a very slow routine and not critical.
21222 # For safety just give up for large differences.
21223 # See test file 'infinite_loop.txt'
21224 # TODO: replace this loop with a data structure
21225 ###########################################################
21226 return if ( $i2 - $i1 > 200 );
21228 foreach my $i ( $i1 + 1 .. $i2 - 1 ) {
21229 next if ( $nesting_depth_to_go[$i] > $depth );
21230 return if ( $nesting_depth_to_go[$i] < $depth );
21232 my $tok = $tokens_to_go[$i];
21233 $tok = ',' if $tok eq '=>'; # treat => same as ,
21235 # Example: we would not want to break at any of these .'s
21236 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
21237 if ( $type ne ':' ) {
21238 return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
21241 return if ( $tok =~ /^[\,]$/ );
21247 sub set_continuation_breaks {
21249 # Define an array of indexes for inserting newline characters to
21250 # keep the line lengths below the maximum desired length. There is
21251 # an implied break after the last token, so it need not be included.
21254 # This routine is part of series of routines which adjust line
21255 # lengths. It is only called if a statement is longer than the
21256 # maximum line length, or if a preliminary scanning located
21257 # desirable break points. Sub scan_list has already looked at
21258 # these tokens and set breakpoints (in array
21259 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
21260 # after commas, after opening parens, and before closing parens).
21261 # This routine will honor these breakpoints and also add additional
21262 # breakpoints as necessary to keep the line length below the maximum
21263 # requested. It bases its decision on where the 'bond strength' is
21266 # Output: returns references to the arrays:
21269 # which contain the indexes $i of the first and last tokens on each
21272 # In addition, the array:
21273 # $forced_breakpoint_to_go[$i]
21274 # may be updated to be =1 for any index $i after which there must be
21275 # a break. This signals later routines not to undo the breakpoint.
21277 my $saw_good_break = shift;
21278 my @i_first = (); # the first index to output
21279 my @i_last = (); # the last index to output
21280 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
21281 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
21283 set_bond_strengths();
21286 my $imax = $max_index_to_go;
21287 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
21288 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
21289 my $i_begin = $imin; # index for starting next iteration
21291 my $leading_spaces = leading_spaces_to_go($imin);
21292 my $line_count = 0;
21293 my $last_break_strength = NO_BREAK;
21294 my $i_last_break = -1;
21295 my $max_bias = 0.001;
21296 my $tiny_bias = 0.0001;
21297 my $leading_alignment_token = "";
21298 my $leading_alignment_type = "";
21300 # see if any ?/:'s are in order
21301 my $colons_in_order = 1;
21303 my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ];
21304 my $colon_count = @colon_list;
21305 foreach (@colon_list) {
21306 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
21310 # This is a sufficient but not necessary condition for colon chain
21311 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
21313 #-------------------------------------------------------
21314 # BEGINNING of main loop to set continuation breakpoints
21315 # Keep iterating until we reach the end
21316 #-------------------------------------------------------
21317 while ( $i_begin <= $imax ) {
21318 my $lowest_strength = NO_BREAK;
21319 my $starting_sum = $summed_lengths_to_go[$i_begin];
21322 my $lowest_next_token = '';
21323 my $lowest_next_type = 'b';
21324 my $i_lowest_next_nonblank = -1;
21326 #-------------------------------------------------------
21327 # BEGINNING of inner loop to find the best next breakpoint
21328 #-------------------------------------------------------
21329 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
21330 my $type = $types_to_go[$i_test];
21331 my $token = $tokens_to_go[$i_test];
21332 my $next_type = $types_to_go[ $i_test + 1 ];
21333 my $next_token = $tokens_to_go[ $i_test + 1 ];
21334 my $i_next_nonblank = $inext_to_go[$i_test];
21335 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
21336 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
21337 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
21338 my $strength = $bond_strength_to_go[$i_test];
21339 my $maximum_line_length = maximum_line_length($i_begin);
21341 # use old breaks as a tie-breaker. For example to
21342 # prevent blinkers with -pbp in this code:
21345 ## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
21348 # At the same time try to prevent a leading * in this code
21349 # with the default formatting:
21352 ## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
21353 ## * ( $x**( $a - 1 ) )
21354 ## * ( ( 1 - $x )**( $b - 1 ) );
21356 # reduce strength a bit to break ties at an old breakpoint ...
21358 $old_breakpoint_to_go[$i_test]
21360 # which is a 'good' breakpoint, meaning ...
21361 # we don't want to break before it
21362 && !$want_break_before{$type}
21364 # and either we want to break before the next token
21365 # or the next token is not short (i.e. not a '*', '/' etc.)
21366 && $i_next_nonblank <= $imax
21367 && ( $want_break_before{$next_nonblank_type}
21368 || $token_lengths_to_go[$i_next_nonblank] > 2
21369 || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
21372 $strength -= $tiny_bias;
21375 # otherwise increase strength a bit if this token would be at the
21376 # maximum line length. This is necessary to avoid blinking
21377 # in the above example when the -iob flag is added.
21381 $summed_lengths_to_go[ $i_test + 1 ] -
21383 if ( $len >= $maximum_line_length ) {
21384 $strength += $tiny_bias;
21388 my $must_break = 0;
21390 # Force an immediate break at certain operators
21391 # with lower level than the start of the line,
21392 # unless we've already seen a better break.
21394 ##############################################
21395 # Note on an issue with a preceding ?
21396 ##############################################
21397 # We don't include a ? in the above list, but there may
21398 # be a break at a previous ? if the line is long.
21399 # Because of this we do not want to force a break if
21400 # there is a previous ? on this line. For now the best way
21401 # to do this is to not break if we have seen a lower strength
21402 # point, which is probably a ?.
21404 # Example of unwanted breaks we are avoiding at a '.' following a ?
21405 # from pod2html using perltidy -gnu:
21407 # ? "\n<A NAME=\""
21409 # . "\">\n$text</A>\n"
21410 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
21413 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
21414 || ( $next_nonblank_type eq 'k'
21415 && $next_nonblank_token =~ /^(and|or)$/ )
21417 && ( $nesting_depth_to_go[$i_begin] >
21418 $nesting_depth_to_go[$i_next_nonblank] )
21419 && ( $strength <= $lowest_strength )
21422 set_forced_breakpoint($i_next_nonblank);
21427 # Try to put a break where requested by scan_list
21428 $forced_breakpoint_to_go[$i_test]
21430 # break between ) { in a continued line so that the '{' can
21432 # See similar logic in scan_list which catches instances
21433 # where a line is just something like ') {'. We have to
21434 # be careful because the corresponding block keyword might
21435 # not be on the first line, such as 'for' here:
21439 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
21445 && ( $token eq ')' )
21446 && ( $next_nonblank_type eq '{' )
21447 && ($next_nonblank_block_type)
21448 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
21450 # RT #104427: Dont break before opening sub brace because
21451 # sub block breaks handled at higher level, unless
21452 # it looks like the preceeding list is long and broken
21454 $next_nonblank_block_type =~ /^sub\b/
21455 && ( $nesting_depth_to_go[$i_begin] ==
21456 $nesting_depth_to_go[$i_next_nonblank] )
21459 && !$rOpts->{'opening-brace-always-on-right'}
21462 # There is an implied forced break at a terminal opening brace
21463 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
21467 # Forced breakpoints must sometimes be overridden, for example
21468 # because of a side comment causing a NO_BREAK. It is easier
21469 # to catch this here than when they are set.
21470 if ( $strength < NO_BREAK - 1 ) {
21471 $strength = $lowest_strength - $tiny_bias;
21476 # quit if a break here would put a good terminal token on
21477 # the next line and we already have a possible break
21480 && ( $next_nonblank_type =~ /^[\;\,]$/ )
21484 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
21486 ) > $maximum_line_length
21490 last if ( $i_lowest >= 0 );
21493 # Avoid a break which would strand a single punctuation
21494 # token. For example, we do not want to strand a leading
21495 # '.' which is followed by a long quoted string.
21496 # But note that we do want to do this with -extrude (l=1)
21497 # so please test any changes to this code on -extrude.
21500 && ( $i_test == $i_begin )
21501 && ( $i_test < $imax )
21502 && ( $token eq $type )
21506 $summed_lengths_to_go[ $i_test + 1 ] -
21508 ) < $maximum_line_length
21512 $i_test = min( $imax, $inext_to_go[$i_test] );
21516 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
21519 # break at previous best break if it would have produced
21520 # a leading alignment of certain common tokens, and it
21521 # is different from the latest candidate break
21523 if ($leading_alignment_type);
21525 # Force at least one breakpoint if old code had good
21526 # break It is only called if a breakpoint is required or
21527 # desired. This will probably need some adjustments
21528 # over time. A goal is to try to be sure that, if a new
21529 # side comment is introduced into formatted text, then
21530 # the same breakpoints will occur. scbreak.t
21533 $i_test == $imax # we are at the end
21534 && !$forced_breakpoint_count #
21535 && $saw_good_break # old line had good break
21536 && $type =~ /^[#;\{]$/ # and this line ends in
21537 # ';' or side comment
21538 && $i_last_break < 0 # and we haven't made a break
21539 && $i_lowest >= 0 # and we saw a possible break
21540 && $i_lowest < $imax - 1 # (but not just before this ;)
21541 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
21544 # Do not skip past an important break point in a short final
21545 # segment. For example, without this check we would miss the
21546 # break at the final / in the following code:
21549 # ( $tau * $mass_pellet * $q_0 *
21550 # ( 1. - exp( -$t_stop / $tau ) ) -
21551 # 4. * $pi * $factor * $k_ice *
21552 # ( $t_melt - $t_ice ) *
21555 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
21557 if ( $line_count > 2
21558 && $i_lowest < $i_test
21559 && $i_test > $imax - 2
21560 && $nesting_depth_to_go[$i_begin] >
21561 $nesting_depth_to_go[$i_lowest]
21562 && $lowest_strength < $last_break_strength - .5 * WEAK )
21564 # Make this break for math operators for now
21565 my $ir = $inext_to_go[$i_lowest];
21566 my $il = $iprev_to_go[$ir];
21568 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
21569 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
21572 # Update the minimum bond strength location
21573 $lowest_strength = $strength;
21574 $i_lowest = $i_test;
21575 $lowest_next_token = $next_nonblank_token;
21576 $lowest_next_type = $next_nonblank_type;
21577 $i_lowest_next_nonblank = $i_next_nonblank;
21578 last if $must_break;
21580 # set flags to remember if a break here will produce a
21581 # leading alignment of certain common tokens
21582 if ( $line_count > 0
21584 && ( $lowest_strength - $last_break_strength <= $max_bias )
21587 my $i_last_end = $iprev_to_go[$i_begin];
21588 my $tok_beg = $tokens_to_go[$i_begin];
21589 my $type_beg = $types_to_go[$i_begin];
21592 # check for leading alignment of certain tokens
21594 $tok_beg eq $next_nonblank_token
21595 && $is_chain_operator{$tok_beg}
21596 && ( $type_beg eq 'k'
21597 || $type_beg eq $tok_beg )
21598 && $nesting_depth_to_go[$i_begin] >=
21599 $nesting_depth_to_go[$i_next_nonblank]
21602 || ( $tokens_to_go[$i_last_end] eq $token
21603 && $is_chain_operator{$token}
21604 && ( $type eq 'k' || $type eq $token )
21605 && $nesting_depth_to_go[$i_last_end] >=
21606 $nesting_depth_to_go[$i_test] )
21609 $leading_alignment_token = $next_nonblank_token;
21610 $leading_alignment_type = $next_nonblank_type;
21615 my $too_long = ( $i_test >= $imax );
21616 if ( !$too_long ) {
21619 $summed_lengths_to_go[ $i_test + 2 ] -
21621 $too_long = $next_length > $maximum_line_length;
21623 # To prevent blinkers we will avoid leaving a token exactly at
21624 # the line length limit unless it is the last token or one of
21625 # several "good" types.
21627 # The following code was a blinker with -pbp before this
21629 ## $last_nonblank_token eq '('
21630 ## && $is_indirect_object_taker{ $paren_type
21631 ## [$paren_depth] }
21632 # The issue causing the problem is that if the
21633 # term [$paren_depth] gets broken across a line then
21634 # the whitespace routine doesn't see both opening and closing
21635 # brackets and will format like '[ $paren_depth ]'. This
21636 # leads to an oscillation in length depending if we break
21637 # before the closing bracket or not.
21639 && $i_test + 1 < $imax
21640 && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
21642 $too_long = $next_length >= $maximum_line_length;
21646 FORMATTER_DEBUG_FLAG_BREAK
21649 my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
21650 my $i_testp2 = $i_test + 2;
21651 if ( $i_testp2 > $max_index_to_go + 1 ) {
21652 $i_testp2 = $max_index_to_go + 1;
21654 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
21655 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
21657 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
21660 # allow one extra terminal token after exceeding line length
21661 # if it would strand this token.
21662 if ( $rOpts_fuzzy_line_length
21664 && $i_lowest == $i_test
21665 && $token_lengths_to_go[$i_test] > 1
21666 && $next_nonblank_type =~ /^[\;\,]$/ )
21673 ( $i_test == $imax ) # we're done if no more tokens,
21675 ( $i_lowest >= 0 ) # or no more space and we have a break
21681 #-------------------------------------------------------
21682 # END of inner loop to find the best next breakpoint
21683 # Now decide exactly where to put the breakpoint
21684 #-------------------------------------------------------
21686 # it's always ok to break at imax if no other break was found
21687 if ( $i_lowest < 0 ) { $i_lowest = $imax }
21689 # semi-final index calculation
21690 my $i_next_nonblank = $inext_to_go[$i_lowest];
21691 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
21692 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
21694 #-------------------------------------------------------
21695 # ?/: rule 1 : if a break here will separate a '?' on this
21696 # line from its closing ':', then break at the '?' instead.
21697 #-------------------------------------------------------
21698 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
21699 next unless ( $tokens_to_go[$i] eq '?' );
21701 # do not break if probable sequence of ?/: statements
21702 next if ($is_colon_chain);
21704 # do not break if statement is broken by side comment
21707 $tokens_to_go[$max_index_to_go] eq '#'
21708 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
21709 $max_index_to_go ) !~ /^[\;\}]$/
21712 # no break needed if matching : is also on the line
21714 if ( $mate_index_to_go[$i] >= 0
21715 && $mate_index_to_go[$i] <= $i_next_nonblank );
21718 if ( $want_break_before{'?'} ) { $i_lowest-- }
21722 #-------------------------------------------------------
21723 # END of inner loop to find the best next breakpoint:
21724 # Break the line after the token with index i=$i_lowest
21725 #-------------------------------------------------------
21727 # final index calculation
21728 $i_next_nonblank = $inext_to_go[$i_lowest];
21729 $next_nonblank_type = $types_to_go[$i_next_nonblank];
21730 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
21732 FORMATTER_DEBUG_FLAG_BREAK
21734 "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
21736 #-------------------------------------------------------
21737 # ?/: rule 2 : if we break at a '?', then break at its ':'
21739 # Note: this rule is also in sub scan_list to handle a break
21740 # at the start and end of a line (in case breaks are dictated
21741 # by side comments).
21742 #-------------------------------------------------------
21743 if ( $next_nonblank_type eq '?' ) {
21744 set_closing_breakpoint($i_next_nonblank);
21746 elsif ( $types_to_go[$i_lowest] eq '?' ) {
21747 set_closing_breakpoint($i_lowest);
21750 #-------------------------------------------------------
21751 # ?/: rule 3 : if we break at a ':' then we save
21752 # its location for further work below. We may need to go
21753 # back and break at its '?'.
21754 #-------------------------------------------------------
21755 if ( $next_nonblank_type eq ':' ) {
21756 push @i_colon_breaks, $i_next_nonblank;
21758 elsif ( $types_to_go[$i_lowest] eq ':' ) {
21759 push @i_colon_breaks, $i_lowest;
21762 # here we should set breaks for all '?'/':' pairs which are
21763 # separated by this line
21767 # save this line segment, after trimming blanks at the ends
21769 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
21771 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
21773 # set a forced breakpoint at a container opening, if necessary, to
21774 # signal a break at a closing container. Excepting '(' for now.
21775 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
21776 && !$forced_breakpoint_to_go[$i_lowest] )
21778 set_closing_breakpoint($i_lowest);
21781 # get ready to go again
21782 $i_begin = $i_lowest + 1;
21783 $last_break_strength = $lowest_strength;
21784 $i_last_break = $i_lowest;
21785 $leading_alignment_token = "";
21786 $leading_alignment_type = "";
21787 $lowest_next_token = '';
21788 $lowest_next_type = 'b';
21790 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
21794 # update indentation size
21795 if ( $i_begin <= $imax ) {
21796 $leading_spaces = leading_spaces_to_go($i_begin);
21800 #-------------------------------------------------------
21801 # END of main loop to set continuation breakpoints
21802 # Now go back and make any necessary corrections
21803 #-------------------------------------------------------
21805 #-------------------------------------------------------
21806 # ?/: rule 4 -- if we broke at a ':', then break at
21807 # corresponding '?' unless this is a chain of ?: expressions
21808 #-------------------------------------------------------
21809 if (@i_colon_breaks) {
21811 # using a simple method for deciding if we are in a ?/: chain --
21812 # this is a chain if it has multiple ?/: pairs all in order;
21814 # Note that if line starts in a ':' we count that above as a break
21815 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
21817 unless ($is_chain) {
21818 my @insert_list = ();
21819 foreach (@i_colon_breaks) {
21820 my $i_question = $mate_index_to_go[$_];
21821 if ( $i_question >= 0 ) {
21822 if ( $want_break_before{'?'} ) {
21823 $i_question = $iprev_to_go[$i_question];
21826 if ( $i_question >= 0 ) {
21827 push @insert_list, $i_question;
21830 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
21834 return ( \@i_first, \@i_last, $colon_count );
21837 sub insert_additional_breaks {
21839 # this routine will add line breaks at requested locations after
21840 # sub set_continuation_breaks has made preliminary breaks.
21842 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
21845 my $line_number = 0;
21846 foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
21848 $i_f = $ri_first->[$line_number];
21849 $i_l = $ri_last->[$line_number];
21850 while ( $i_break_left >= $i_l ) {
21853 # shouldn't happen unless caller passes bad indexes
21854 if ( $line_number >= @{$ri_last} ) {
21856 "Non-fatal program bug: couldn't set break at $i_break_left\n"
21858 report_definite_bug();
21861 $i_f = $ri_first->[$line_number];
21862 $i_l = $ri_last->[$line_number];
21865 # Do not leave a blank at the end of a line; back up if necessary
21866 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
21868 my $i_break_right = $inext_to_go[$i_break_left];
21869 if ( $i_break_left >= $i_f
21870 && $i_break_left < $i_l
21871 && $i_break_right > $i_f
21872 && $i_break_right <= $i_l )
21874 splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
21875 splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
21881 sub set_closing_breakpoint {
21883 # set a breakpoint at a matching closing token
21884 # at present, this is only used to break at a ':' which matches a '?'
21885 my $i_break = shift;
21887 if ( $mate_index_to_go[$i_break] >= 0 ) {
21889 # CAUTION: infinite recursion possible here:
21890 # set_closing_breakpoint calls set_forced_breakpoint, and
21891 # set_forced_breakpoint call set_closing_breakpoint
21892 # ( test files attrib.t, BasicLyx.pm.html).
21893 # Don't reduce the '2' in the statement below
21894 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
21896 # break before } ] and ), but sub set_forced_breakpoint will decide
21897 # to break before or after a ? and :
21898 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
21899 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
21903 my $type_sequence = $type_sequence_to_go[$i_break];
21904 if ($type_sequence) {
21905 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
21906 $postponed_breakpoint{$type_sequence} = 1;
21912 sub compare_indentation_levels {
21914 # check to see if output line tabbing agrees with input line
21915 # this can be very useful for debugging a script which has an extra
21917 my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
21918 if ( $guessed_indentation_level ne $structural_indentation_level ) {
21919 $last_tabbing_disagreement = $input_line_number;
21921 if ($in_tabbing_disagreement) {
21924 $tabbing_disagreement_count++;
21926 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
21927 write_logfile_entry(
21928 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
21931 $in_tabbing_disagreement = $input_line_number;
21932 $first_tabbing_disagreement = $in_tabbing_disagreement
21933 unless ($first_tabbing_disagreement);
21938 if ($in_tabbing_disagreement) {
21940 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
21941 write_logfile_entry(
21942 "End indentation disagreement from input line $in_tabbing_disagreement\n"
21945 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
21946 write_logfile_entry(
21947 "No further tabbing disagreements will be noted\n");
21950 $in_tabbing_disagreement = 0;
21956 #####################################################################
21958 # the Perl::Tidy::IndentationItem class supplies items which contain
21959 # how much whitespace should be used at the start of a line
21961 #####################################################################
21963 package Perl::Tidy::IndentationItem;
21967 # Create an 'indentation_item' which describes one level of leading
21968 # whitespace when the '-lp' indentation is used.
21970 $class, $spaces, $level,
21971 $ci_level, $available_spaces, $index,
21972 $gnu_sequence_number, $align_paren, $stack_depth,
21977 my $arrow_count = 0;
21978 my $comma_count = 0;
21979 my $have_child = 0;
21980 my $want_right_spaces = 0;
21984 # spaces => # total leading white spaces
21985 # level => # the indentation 'level'
21986 # ci_level => # the 'continuation level'
21987 # available_spaces => # how many left spaces available
21989 # closed => # index where we saw closing '}'
21990 # comma_count => # how many commas at this level?
21991 # sequence_number => # output batch number
21992 # index => # index in output batch list
21993 # have_child => # any dependents?
21994 # recoverable_spaces => # how many spaces to the right
21995 # # we would like to move to get
21996 # # alignment (negative if left)
21997 # align_paren => # do we want to try to align
21998 # # with an opening structure?
21999 # marked => # if visited by corrector logic
22000 # stack_depth => # indentation nesting depth
22001 # starting_index => # first token index of this level
22002 # arrow_count => # how many =>'s
22005 _spaces => $spaces,
22007 _ci_level => $ci_level,
22008 _available_spaces => $available_spaces,
22009 _closed => $closed,
22010 _comma_count => $comma_count,
22011 _sequence_number => $gnu_sequence_number,
22013 _have_child => $have_child,
22014 _recoverable_spaces => $want_right_spaces,
22015 _align_paren => $align_paren,
22016 _marked => $marked,
22017 _stack_depth => $stack_depth,
22018 _starting_index => $starting_index,
22019 _arrow_count => $arrow_count,
22023 sub permanently_decrease_available_spaces {
22025 # make a permanent reduction in the available indentation spaces
22026 # at one indentation item. NOTE: if there are child nodes, their
22027 # total SPACES must be reduced by the caller.
22029 my ( $item, $spaces_needed ) = @_;
22030 my $available_spaces = $item->get_available_spaces();
22031 my $deleted_spaces =
22032 ( $available_spaces > $spaces_needed )
22034 : $available_spaces;
22035 $item->decrease_available_spaces($deleted_spaces);
22036 $item->decrease_SPACES($deleted_spaces);
22037 $item->set_recoverable_spaces(0);
22039 return $deleted_spaces;
22042 sub tentatively_decrease_available_spaces {
22044 # We are asked to tentatively delete $spaces_needed of indentation
22045 # for a indentation item. We may want to undo this later. NOTE: if
22046 # there are child nodes, their total SPACES must be reduced by the
22048 my ( $item, $spaces_needed ) = @_;
22049 my $available_spaces = $item->get_available_spaces();
22050 my $deleted_spaces =
22051 ( $available_spaces > $spaces_needed )
22053 : $available_spaces;
22054 $item->decrease_available_spaces($deleted_spaces);
22055 $item->decrease_SPACES($deleted_spaces);
22056 $item->increase_recoverable_spaces($deleted_spaces);
22057 return $deleted_spaces;
22060 sub get_stack_depth {
22062 return $self->{_stack_depth};
22067 return $self->{_spaces};
22072 return $self->{_marked};
22076 my ( $self, $value ) = @_;
22077 if ( defined($value) ) {
22078 $self->{_marked} = $value;
22080 return $self->{_marked};
22083 sub get_available_spaces {
22085 return $self->{_available_spaces};
22088 sub decrease_SPACES {
22089 my ( $self, $value ) = @_;
22090 if ( defined($value) ) {
22091 $self->{_spaces} -= $value;
22093 return $self->{_spaces};
22096 sub decrease_available_spaces {
22097 my ( $self, $value ) = @_;
22098 if ( defined($value) ) {
22099 $self->{_available_spaces} -= $value;
22101 return $self->{_available_spaces};
22104 sub get_align_paren {
22106 return $self->{_align_paren};
22109 sub get_recoverable_spaces {
22111 return $self->{_recoverable_spaces};
22114 sub set_recoverable_spaces {
22115 my ( $self, $value ) = @_;
22116 if ( defined($value) ) {
22117 $self->{_recoverable_spaces} = $value;
22119 return $self->{_recoverable_spaces};
22122 sub increase_recoverable_spaces {
22123 my ( $self, $value ) = @_;
22124 if ( defined($value) ) {
22125 $self->{_recoverable_spaces} += $value;
22127 return $self->{_recoverable_spaces};
22132 return $self->{_ci_level};
22137 return $self->{_level};
22140 sub get_sequence_number {
22142 return $self->{_sequence_number};
22147 return $self->{_index};
22150 sub get_starting_index {
22152 return $self->{_starting_index};
22155 sub set_have_child {
22156 my ( $self, $value ) = @_;
22157 if ( defined($value) ) {
22158 $self->{_have_child} = $value;
22160 return $self->{_have_child};
22163 sub get_have_child {
22165 return $self->{_have_child};
22168 sub set_arrow_count {
22169 my ( $self, $value ) = @_;
22170 if ( defined($value) ) {
22171 $self->{_arrow_count} = $value;
22173 return $self->{_arrow_count};
22176 sub get_arrow_count {
22178 return $self->{_arrow_count};
22181 sub set_comma_count {
22182 my ( $self, $value ) = @_;
22183 if ( defined($value) ) {
22184 $self->{_comma_count} = $value;
22186 return $self->{_comma_count};
22189 sub get_comma_count {
22191 return $self->{_comma_count};
22195 my ( $self, $value ) = @_;
22196 if ( defined($value) ) {
22197 $self->{_closed} = $value;
22199 return $self->{_closed};
22204 return $self->{_closed};
22207 #####################################################################
22209 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
22210 # contain a single output line
22212 #####################################################################
22214 package Perl::Tidy::VerticalAligner::Line;
22221 my %default_data = (
22223 jmax_original_line => undef,
22226 rpatterns => undef,
22227 indentation => undef,
22228 leading_space_count => undef,
22229 outdent_long_lines => undef,
22230 list_type => undef,
22231 is_hanging_side_comment => undef,
22233 maximum_line_length => undef,
22234 rvertical_tightness_flags => undef
22238 # methods to count object population
22240 sub get_count { return $_count; }
22241 sub _increment_count { return ++$_count }
22242 sub _decrement_count { return --$_count }
22245 # Constructor may be called as a class method
22247 my ( $caller, %arg ) = @_;
22248 my $caller_is_obj = ref($caller);
22249 my $class = $caller_is_obj || $caller;
22250 ##no strict "refs";
22251 my $self = bless {}, $class;
22253 $self->{_ralignments} = [];
22255 foreach my $key ( keys %default_data ) {
22256 my $_key = '_' . $key;
22258 # Caller keys do not have an underscore
22259 if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} }
22260 elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} }
22261 else { $self->{$_key} = $default_data{$_key} }
22264 $self->_increment_count();
22270 $self->_decrement_count();
22274 sub get_jmax { my $self = shift; return $self->{_jmax} }
22276 sub get_jmax_original_line {
22278 return $self->{_jmax_original_line};
22280 sub get_rtokens { my $self = shift; return $self->{_rtokens} }
22281 sub get_rfields { my $self = shift; return $self->{_rfields} }
22282 sub get_rpatterns { my $self = shift; return $self->{_rpatterns} }
22283 sub get_indentation { my $self = shift; return $self->{_indentation} }
22285 sub get_leading_space_count {
22287 return $self->{_leading_space_count};
22290 sub get_outdent_long_lines {
22292 return $self->{_outdent_long_lines};
22294 sub get_list_type { my $self = shift; return $self->{_list_type} }
22296 sub get_is_hanging_side_comment {
22298 return $self->{_is_hanging_side_comment};
22301 sub get_rvertical_tightness_flags {
22303 return $self->{_rvertical_tightness_flags};
22307 ## FIXME: does caller ever supply $val??
22308 my ( $self, $j, $val ) = @_;
22309 return $self->{_ralignments}->[$j]->set_column($val);
22312 sub get_alignment {
22313 my ( $self, $j ) = @_;
22314 return $self->{_ralignments}->[$j];
22316 sub get_alignments { my $self = shift; return @{ $self->{_ralignments} } }
22319 my ( $self, $j ) = @_;
22320 return $self->{_ralignments}->[$j]->get_column();
22323 sub get_starting_column {
22324 my ( $self, $j ) = @_;
22325 return $self->{_ralignments}->[$j]->get_starting_column();
22328 sub increment_column {
22329 my ( $self, $k, $pad ) = @_;
22330 $self->{_ralignments}->[$k]->increment_column($pad);
22334 sub set_alignments {
22336 @{ $self->{_ralignments} } = @_;
22340 sub current_field_width {
22341 my ( $self, $j ) = @_;
22343 return $self->get_column($j);
22346 return $self->get_column($j) - $self->get_column( $j - 1 );
22350 sub field_width_growth {
22351 my ( $self, $j ) = @_;
22352 return $self->get_column($j) - $self->get_starting_column($j);
22355 sub starting_field_width {
22356 my ( $self, $j ) = @_;
22358 return $self->get_starting_column($j);
22361 return $self->get_starting_column($j) -
22362 $self->get_starting_column( $j - 1 );
22366 sub increase_field_width {
22368 my ( $self, $j, $pad ) = @_;
22369 my $jmax = $self->get_jmax();
22370 for my $k ( $j .. $jmax ) {
22371 $self->increment_column( $k, $pad );
22376 sub get_available_space_on_right {
22378 my $jmax = $self->get_jmax();
22379 return $self->{_maximum_line_length} - $self->get_column($jmax);
22382 sub set_jmax { my ( $self, $val ) = @_; $self->{_jmax} = $val; return }
22384 sub set_jmax_original_line {
22385 my ( $self, $val ) = @_;
22386 $self->{_jmax_original_line} = $val;
22391 my ( $self, $val ) = @_;
22392 $self->{_rtokens} = $val;
22397 my ( $self, $val ) = @_;
22398 $self->{_rfields} = $val;
22402 sub set_rpatterns {
22403 my ( $self, $val ) = @_;
22404 $self->{_rpatterns} = $val;
22408 sub set_indentation {
22409 my ( $self, $val ) = @_;
22410 $self->{_indentation} = $val;
22414 sub set_leading_space_count {
22415 my ( $self, $val ) = @_;
22416 $self->{_leading_space_count} = $val;
22420 sub set_outdent_long_lines {
22421 my ( $self, $val ) = @_;
22422 $self->{_outdent_long_lines} = $val;
22426 sub set_list_type {
22427 my ( $self, $val ) = @_;
22428 $self->{_list_type} = $val;
22432 sub set_is_hanging_side_comment {
22433 my ( $self, $val ) = @_;
22434 $self->{_is_hanging_side_comment} = $val;
22438 sub set_alignment {
22439 my ( $self, $j, $val ) = @_;
22440 $self->{_ralignments}->[$j] = $val;
22446 #####################################################################
22448 # the Perl::Tidy::VerticalAligner::Alignment class holds information
22449 # on a single column being aligned
22451 #####################################################################
22452 package Perl::Tidy::VerticalAligner::Alignment;
22460 # _column # the current column number
22461 # _starting_column # column number when created
22462 # _matching_token # what token we are matching
22463 # _starting_line # the line index of creation
22465 # the most recent line to use it
22467 # _serial_number # unique number for this alignment
22469 my %default_data = (
22471 starting_column => undef,
22472 matching_token => undef,
22473 starting_line => undef,
22474 ending_line => undef,
22475 saved_column => undef,
22476 serial_number => undef,
22479 # class population count
22482 sub get_count { return $_count }
22483 sub _increment_count { return ++$_count }
22484 sub _decrement_count { return --$_count }
22489 my ( $caller, %arg ) = @_;
22490 my $caller_is_obj = ref($caller);
22491 my $class = $caller_is_obj || $caller;
22492 ##no strict "refs";
22493 my $self = bless {}, $class;
22495 foreach my $key ( keys %default_data ) {
22496 my $_key = '_' . $key;
22497 if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} }
22498 elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} }
22499 else { $self->{$_key} = $default_data{$_key} }
22501 $self->_increment_count();
22507 $self->_decrement_count();
22511 sub get_column { my $self = shift; return $self->{_column} }
22513 sub get_starting_column {
22515 return $self->{_starting_column};
22517 sub get_matching_token { my $self = shift; return $self->{_matching_token} }
22518 sub get_starting_line { my $self = shift; return $self->{_starting_line} }
22519 sub get_ending_line { my $self = shift; return $self->{_ending_line} }
22520 sub get_serial_number { my $self = shift; return $self->{_serial_number} }
22522 sub set_column { my ( $self, $val ) = @_; $self->{_column} = $val; return }
22524 sub set_starting_column {
22525 my ( $self, $val ) = @_;
22526 $self->{_starting_column} = $val;
22530 sub set_matching_token {
22531 my ( $self, $val ) = @_;
22532 $self->{_matching_token} = $val;
22536 sub set_starting_line {
22537 my ( $self, $val ) = @_;
22538 $self->{_starting_line} = $val;
22542 sub set_ending_line {
22543 my ( $self, $val ) = @_;
22544 $self->{_ending_line} = $val;
22548 sub increment_column {
22549 my ( $self, $val ) = @_;
22550 $self->{_column} += $val;
22556 $self->{_saved_column} = $self->{_column};
22560 sub restore_column {
22562 $self->{_column} = $self->{_saved_column};
22567 package Perl::Tidy::VerticalAligner;
22569 # The Perl::Tidy::VerticalAligner package collects output lines and
22570 # attempts to line up certain common tokens, such as => and #, which are
22571 # identified by the calling routine.
22573 # There are two main routines: valign_input and flush. Append acts as a
22574 # storage buffer, collecting lines into a group which can be vertically
22575 # aligned. When alignment is no longer possible or desirable, it dumps
22576 # the group to flush.
22578 # valign_input -----> flush
22586 # Caution: these debug flags produce a lot of output
22587 # They should all be 0 except when debugging small scripts
22589 use constant VALIGN_DEBUG_FLAG_APPEND => 0;
22590 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
22591 use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
22592 use constant VALIGN_DEBUG_FLAG_TABS => 0;
22594 my $debug_warning = sub {
22595 print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
22599 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
22600 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
22601 VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
22602 VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS');
22607 $vertical_aligner_self
22609 $maximum_alignment_index
22613 $previous_minimum_jmax_seen
22614 $previous_maximum_jmax_seen
22615 $maximum_line_index
22620 $last_level_written
22621 $last_leading_space_count
22625 $last_comment_column
22626 $last_side_comment_line_number
22627 $last_side_comment_length
22628 $last_side_comment_level
22629 $outdented_line_count
22630 $first_outdented_line_at
22631 $last_outdented_line_at
22632 $diagnostics_object
22634 $file_writer_object
22635 @side_comment_history
22636 $comment_leading_space_count
22637 $is_matching_terminal_line
22638 $consecutive_block_comments
22645 $cached_line_leading_space_count
22646 $cached_seqno_string
22648 $valign_buffer_filling
22652 $last_nonblank_seqno_string
22656 $rOpts_maximum_line_length
22657 $rOpts_variable_maximum_line_length
22658 $rOpts_continuation_indentation
22659 $rOpts_indent_columns
22661 $rOpts_entab_leading_whitespace
22664 $rOpts_fixed_position_side_comment
22665 $rOpts_minimum_space_to_comment
22672 my $class, $rOpts, $file_writer_object, $logger_object,
22673 $diagnostics_object
22676 # variables describing the entire space group:
22677 $ralignment_list = [];
22679 $last_level_written = -1;
22680 $extra_indent_ok = 0; # can we move all lines to the right?
22681 $last_side_comment_length = 0;
22682 $maximum_jmax_seen = 0;
22683 $minimum_jmax_seen = 0;
22684 $previous_minimum_jmax_seen = 0;
22685 $previous_maximum_jmax_seen = 0;
22687 # variables describing each line of the group
22688 @group_lines = (); # list of all lines in group
22690 $outdented_line_count = 0;
22691 $first_outdented_line_at = 0;
22692 $last_outdented_line_at = 0;
22693 $last_side_comment_line_number = 0;
22694 $last_side_comment_level = -1;
22695 $is_matching_terminal_line = 0;
22697 # most recent 3 side comments; [ line number, column ]
22698 $side_comment_history[0] = [ -300, 0 ];
22699 $side_comment_history[1] = [ -200, 0 ];
22700 $side_comment_history[2] = [ -100, 0 ];
22702 # valign_output_step_B cache:
22703 $cached_line_text = "";
22704 $cached_line_type = 0;
22705 $cached_line_flag = 0;
22707 $cached_line_valid = 0;
22708 $cached_line_leading_space_count = 0;
22709 $cached_seqno_string = "";
22711 # string of sequence numbers joined together
22712 $seqno_string = "";
22713 $last_nonblank_seqno_string = "";
22715 # frequently used parameters
22716 $rOpts_indent_columns = $rOpts->{'indent-columns'};
22717 $rOpts_tabs = $rOpts->{'tabs'};
22718 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
22719 $rOpts_fixed_position_side_comment =
22720 $rOpts->{'fixed-position-side-comment'};
22721 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
22722 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
22723 $rOpts_variable_maximum_line_length =
22724 $rOpts->{'variable-maximum-line-length'};
22725 $rOpts_valign = $rOpts->{'valign'};
22727 $consecutive_block_comments = 0;
22728 forget_side_comment();
22730 initialize_for_new_group();
22732 $vertical_aligner_self = {};
22733 bless $vertical_aligner_self, $class;
22734 return $vertical_aligner_self;
22737 sub initialize_for_new_group {
22738 $maximum_line_index = -1; # lines in the current group
22739 $maximum_alignment_index = -1; # alignments in current group
22740 $zero_count = 0; # count consecutive lines without tokens
22741 $current_line = undef; # line being matched for alignment
22742 $group_maximum_gap = 0; # largest gap introduced
22744 $marginal_match = 0;
22745 $comment_leading_space_count = 0;
22746 $last_leading_space_count = 0;
22750 # interface to Perl::Tidy::Diagnostics routines
22751 sub write_diagnostics {
22753 if ($diagnostics_object) {
22754 $diagnostics_object->write_diagnostics($msg);
22759 # interface to Perl::Tidy::Logger routines
22762 if ($logger_object) {
22763 $logger_object->warning($msg);
22768 sub write_logfile_entry {
22770 if ($logger_object) {
22771 $logger_object->write_logfile_entry($msg);
22776 sub report_definite_bug {
22777 if ($logger_object) {
22778 $logger_object->report_definite_bug();
22785 # return the number of leading spaces associated with an indentation
22786 # variable $indentation is either a constant number of spaces or an
22787 # object with a get_spaces method.
22788 my $indentation = shift;
22789 return ref($indentation) ? $indentation->get_spaces() : $indentation;
22792 sub get_recoverable_spaces {
22794 # return the number of spaces (+ means shift right, - means shift left)
22795 # that we would like to shift a group of lines with the same indentation
22796 # to get them to line up with their opening parens
22797 my $indentation = shift;
22798 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
22801 sub get_stack_depth {
22803 my $indentation = shift;
22804 return ref($indentation) ? $indentation->get_stack_depth() : 0;
22807 sub make_alignment {
22808 my ( $col, $token ) = @_;
22810 # make one new alignment at column $col which aligns token $token
22811 ++$maximum_alignment_index;
22812 my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
22814 starting_column => $col,
22815 matching_token => $token,
22816 starting_line => $maximum_line_index,
22817 ending_line => $maximum_line_index,
22818 serial_number => $maximum_alignment_index,
22820 $ralignment_list->[$maximum_alignment_index] = $alignment;
22824 sub dump_alignments {
22826 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
22827 for my $i ( 0 .. $maximum_alignment_index ) {
22828 my $column = $ralignment_list->[$i]->get_column();
22829 my $starting_column = $ralignment_list->[$i]->get_starting_column();
22830 my $matching_token = $ralignment_list->[$i]->get_matching_token();
22831 my $starting_line = $ralignment_list->[$i]->get_starting_line();
22832 my $ending_line = $ralignment_list->[$i]->get_ending_line();
22834 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
22839 sub save_alignment_columns {
22840 for my $i ( 0 .. $maximum_alignment_index ) {
22841 $ralignment_list->[$i]->save_column();
22846 sub restore_alignment_columns {
22847 for my $i ( 0 .. $maximum_alignment_index ) {
22848 $ralignment_list->[$i]->restore_column();
22853 sub forget_side_comment {
22854 $last_comment_column = 0;
22858 sub maximum_line_length_for_level {
22860 # return maximum line length for line starting with a given level
22861 my $maximum_line_length = $rOpts_maximum_line_length;
22862 if ($rOpts_variable_maximum_line_length) {
22864 if ( $level < 0 ) { $level = 0 }
22865 $maximum_line_length += $level * $rOpts_indent_columns;
22867 return $maximum_line_length;
22872 # Place one line in the current vertical group.
22874 # The input parameters are:
22875 # $level = indentation level of this line
22876 # $rfields = reference to array of fields
22877 # $rpatterns = reference to array of patterns, one per field
22878 # $rtokens = reference to array of tokens starting fields 1,2,..
22880 # Here is an example of what this package does. In this example,
22881 # we are trying to line up both the '=>' and the '#'.
22883 # '18' => 'grave', # \`
22884 # '19' => 'acute', # `'
22885 # '20' => 'caron', # \v
22886 # <-tabs-><f1-><--field 2 ---><-f3->
22889 # col1 col2 col3 col4
22891 # The calling routine has already broken the entire line into 3 fields as
22892 # indicated. (So the work of identifying promising common tokens has
22893 # already been done).
22895 # In this example, there will be 2 tokens being matched: '=>' and '#'.
22896 # They are the leading parts of fields 2 and 3, but we do need to know
22897 # what they are so that we can dump a group of lines when these tokens
22900 # The fields contain the actual characters of each field. The patterns
22901 # are like the fields, but they contain mainly token types instead
22902 # of tokens, so they have fewer characters. They are used to be
22903 # sure we are matching fields of similar type.
22905 # In this example, there will be 4 column indexes being adjusted. The
22906 # first one is always at zero. The interior columns are at the start of
22907 # the matching tokens, and the last one tracks the maximum line length.
22909 # Each time a new line comes in, it joins the current vertical
22910 # group if possible. Otherwise it causes the current group to be dumped
22911 # and a new group is started.
22913 # For each new group member, the column locations are increased, as
22914 # necessary, to make room for the new fields. When the group is finally
22915 # output, these column numbers are used to compute the amount of spaces of
22916 # padding needed for each field.
22918 # Programming note: the fields are assumed not to have any tab characters.
22919 # Tabs have been previously removed except for tabs in quoted strings and
22920 # side comments. Tabs in these fields can mess up the column counting.
22921 # The log file warns the user if there are any such tabs.
22924 $level, $level_end,
22925 $indentation, $rfields,
22926 $rtokens, $rpatterns,
22927 $is_forced_break, $outdent_long_lines,
22928 $is_terminal_ternary, $is_terminal_statement,
22929 $do_not_pad, $rvertical_tightness_flags,
22933 # number of fields is $jmax
22934 # number of tokens between fields is $jmax-1
22935 my $jmax = $#{$rfields};
22937 my $leading_space_count = get_spaces($indentation);
22939 # set outdented flag to be sure we either align within statements or
22940 # across statement boundaries, but not both.
22941 my $is_outdented = $last_leading_space_count > $leading_space_count;
22942 $last_leading_space_count = $leading_space_count;
22944 # Patch: undo for hanging side comment
22945 my $is_hanging_side_comment =
22946 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
22947 $is_outdented = 0 if $is_hanging_side_comment;
22949 # Forget side comment alignment after seeing 2 or more block comments
22950 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
22951 if ($is_block_comment) {
22952 $consecutive_block_comments++;
22955 if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
22956 $consecutive_block_comments = 0;
22959 VALIGN_DEBUG_FLAG_APPEND0 && do {
22961 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
22964 # Validate cached line if necessary: If we can produce a container
22965 # with just 2 lines total by combining an existing cached opening
22966 # token with the closing token to follow, then we will mark both
22967 # cached flags as valid.
22968 if ($rvertical_tightness_flags) {
22969 if ( $maximum_line_index <= 0
22970 && $cached_line_type
22972 && $rvertical_tightness_flags->[2]
22973 && $rvertical_tightness_flags->[2] == $cached_seqno )
22975 $rvertical_tightness_flags->[3] ||= 1;
22976 $cached_line_valid ||= 1;
22980 # do not join an opening block brace with an unbalanced line
22981 # unless requested with a flag value of 2
22982 if ( $cached_line_type == 3
22983 && $maximum_line_index < 0
22984 && $cached_line_flag < 2
22985 && $level_jump != 0 )
22987 $cached_line_valid = 0;
22990 # patch until new aligner is finished
22991 if ($do_not_pad) { my_flush() }
22993 # shouldn't happen:
22994 if ( $level < 0 ) { $level = 0 }
22996 # do not align code across indentation level changes
22997 # or if vertical alignment is turned off for debugging
22998 if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
23000 # we are allowed to shift a group of lines to the right if its
23001 # level is greater than the previous and next group
23003 ( $level < $group_level && $last_level_written < $group_level );
23007 # If we know that this line will get flushed out by itself because
23008 # of level changes, we can leave the extra_indent_ok flag set.
23009 # That way, if we get an external flush call, we will still be
23010 # able to do some -lp alignment if necessary.
23011 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
23013 $group_level = $level;
23015 # wait until after the above flush to get the leading space
23016 # count because it may have been changed if the -icp flag is in
23018 $leading_space_count = get_spaces($indentation);
23022 # --------------------------------------------------------------------
23023 # Patch to collect outdentable block COMMENTS
23024 # --------------------------------------------------------------------
23025 my $is_blank_line = "";
23026 if ( $group_type eq 'COMMENT' ) {
23030 && $outdent_long_lines
23031 && $leading_space_count == $comment_leading_space_count
23036 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
23044 # --------------------------------------------------------------------
23045 # add dummy fields for terminal ternary
23046 # --------------------------------------------------------------------
23047 my $j_terminal_match;
23048 if ( $is_terminal_ternary && $current_line ) {
23049 $j_terminal_match =
23050 fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
23051 $jmax = @{$rfields} - 1;
23054 # --------------------------------------------------------------------
23055 # add dummy fields for else statement
23056 # --------------------------------------------------------------------
23057 if ( $rfields->[0] =~ /^else\s*$/
23059 && $level_jump == 0 )
23061 $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
23062 $jmax = @{$rfields} - 1;
23065 # --------------------------------------------------------------------
23066 # Step 1. Handle simple line of code with no fields to match.
23067 # --------------------------------------------------------------------
23068 if ( $jmax <= 0 ) {
23071 if ( $maximum_line_index >= 0
23072 && !get_recoverable_spaces( $group_lines[0]->get_indentation() ) )
23075 # flush the current group if it has some aligned columns..
23076 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
23078 # flush current group if we are just collecting side comments..
23081 # ...and we haven't seen a comment lately
23082 ( $zero_count > 3 )
23084 # ..or if this new line doesn't fit to the left of the comments
23085 || ( ( $leading_space_count + length( $rfields->[0] ) ) >
23086 $group_lines[0]->get_column(0) )
23093 # patch to start new COMMENT group if this comment may be outdented
23094 if ( $is_block_comment
23095 && $outdent_long_lines
23096 && $maximum_line_index < 0 )
23098 $group_type = 'COMMENT';
23099 $comment_leading_space_count = $leading_space_count;
23100 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
23104 # just write this line directly if no current group, no side comment,
23105 # and no space recovery is needed.
23106 if ( $maximum_line_index < 0 && !get_recoverable_spaces($indentation) )
23108 valign_output_step_B( $leading_space_count, $rfields->[0], 0,
23109 $outdent_long_lines, $rvertical_tightness_flags, $level );
23117 # programming check: (shouldn't happen)
23118 # an error here implies an incorrect call was made
23119 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
23121 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
23123 report_definite_bug();
23126 # --------------------------------------------------------------------
23127 # create an object to hold this line
23128 # --------------------------------------------------------------------
23129 my $new_line = new Perl::Tidy::VerticalAligner::Line(
23131 jmax_original_line => $jmax,
23132 rtokens => $rtokens,
23133 rfields => $rfields,
23134 rpatterns => $rpatterns,
23135 indentation => $indentation,
23136 leading_space_count => $leading_space_count,
23137 outdent_long_lines => $outdent_long_lines,
23139 is_hanging_side_comment => $is_hanging_side_comment,
23140 maximum_line_length => maximum_line_length_for_level($level),
23141 rvertical_tightness_flags => $rvertical_tightness_flags,
23144 # Initialize a global flag saying if the last line of the group should
23145 # match end of group and also terminate the group. There should be no
23146 # returns between here and where the flag is handled at the bottom.
23147 my $col_matching_terminal = 0;
23148 if ( defined($j_terminal_match) ) {
23150 # remember the column of the terminal ? or { to match with
23151 $col_matching_terminal = $current_line->get_column($j_terminal_match);
23153 # set global flag for sub decide_if_aligned
23154 $is_matching_terminal_line = 1;
23157 # --------------------------------------------------------------------
23158 # It simplifies things to create a zero length side comment
23160 # --------------------------------------------------------------------
23161 make_side_comment( $new_line, $level_end );
23163 # --------------------------------------------------------------------
23164 # Decide if this is a simple list of items.
23165 # There are 3 list types: none, comma, comma-arrow.
23166 # We use this below to be less restrictive in deciding what to align.
23167 # --------------------------------------------------------------------
23168 if ($is_forced_break) {
23169 decide_if_list($new_line);
23172 if ($current_line) {
23174 # --------------------------------------------------------------------
23175 # Allow hanging side comment to join current group, if any
23176 # This will help keep side comments aligned, because otherwise we
23177 # will have to start a new group, making alignment less likely.
23178 # --------------------------------------------------------------------
23179 join_hanging_comment( $new_line, $current_line )
23180 if $is_hanging_side_comment;
23182 # --------------------------------------------------------------------
23183 # If there is just one previous line, and it has more fields
23184 # than the new line, try to join fields together to get a match with
23185 # the new line. At the present time, only a single leading '=' is
23186 # allowed to be compressed out. This is useful in rare cases where
23187 # a table is forced to use old breakpoints because of side comments,
23188 # and the table starts out something like this:
23189 # my %MonthChars = ('0', 'Jan', # side comment
23192 # Eliminating the '=' field will allow the remaining fields to line up.
23193 # This situation does not occur if there are no side comments
23194 # because scan_list would put a break after the opening '('.
23195 # --------------------------------------------------------------------
23196 eliminate_old_fields( $new_line, $current_line );
23198 # --------------------------------------------------------------------
23199 # If the new line has more fields than the current group,
23200 # see if we can match the first fields and combine the remaining
23201 # fields of the new line.
23202 # --------------------------------------------------------------------
23203 eliminate_new_fields( $new_line, $current_line );
23205 # --------------------------------------------------------------------
23206 # Flush previous group unless all common tokens and patterns match..
23207 # --------------------------------------------------------------------
23208 check_match( $new_line, $current_line );
23210 # --------------------------------------------------------------------
23211 # See if there is space for this line in the current group (if any)
23212 # --------------------------------------------------------------------
23213 if ($current_line) {
23214 check_fit( $new_line, $current_line );
23218 # --------------------------------------------------------------------
23219 # Append this line to the current group (or start new group)
23220 # --------------------------------------------------------------------
23221 add_to_group($new_line);
23223 # Future update to allow this to vary:
23224 $current_line = $new_line if ( $maximum_line_index == 0 );
23226 # output this group if it ends in a terminal else or ternary line
23227 if ( defined($j_terminal_match) ) {
23229 # if there is only one line in the group (maybe due to failure to match
23230 # perfectly with previous lines), then align the ? or { of this
23231 # terminal line with the previous one unless that would make the line
23233 if ( $maximum_line_index == 0 ) {
23234 my $col_now = $current_line->get_column($j_terminal_match);
23235 my $pad = $col_matching_terminal - $col_now;
23236 my $padding_available =
23237 $current_line->get_available_space_on_right();
23238 if ( $pad > 0 && $pad <= $padding_available ) {
23239 $current_line->increase_field_width( $j_terminal_match, $pad );
23243 $is_matching_terminal_line = 0;
23246 # --------------------------------------------------------------------
23247 # Step 8. Some old debugging stuff
23248 # --------------------------------------------------------------------
23249 VALIGN_DEBUG_FLAG_APPEND && do {
23250 print STDOUT "APPEND fields:";
23251 dump_array( @{$rfields} );
23252 print STDOUT "APPEND tokens:";
23253 dump_array( @{$rtokens} );
23254 print STDOUT "APPEND patterns:";
23255 dump_array( @{$rpatterns} );
23262 sub join_hanging_comment {
23265 my $jmax = $line->get_jmax();
23266 return 0 unless $jmax == 1; # must be 2 fields
23267 my $rtokens = $line->get_rtokens();
23268 return 0 unless $rtokens->[0] eq '#'; # the second field is a comment..
23269 my $rfields = $line->get_rfields();
23270 return 0 unless $rfields->[0] =~ /^\s*$/; # the first field is empty...
23271 my $old_line = shift;
23272 my $maximum_field_index = $old_line->get_jmax();
23274 unless $maximum_field_index > $jmax; # the current line has more fields
23275 my $rpatterns = $line->get_rpatterns();
23277 $line->set_is_hanging_side_comment(1);
23278 $jmax = $maximum_field_index;
23279 $line->set_jmax($jmax);
23280 $rfields->[$jmax] = $rfields->[1];
23281 $rtokens->[ $jmax - 1 ] = $rtokens->[0];
23282 $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
23283 foreach my $j ( 1 .. $jmax - 1 ) {
23284 $rfields->[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
23285 $rtokens->[ $j - 1 ] = "";
23286 $rpatterns->[ $j - 1 ] = "";
23291 sub eliminate_old_fields {
23293 my $new_line = shift;
23294 my $jmax = $new_line->get_jmax();
23295 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
23296 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
23298 # there must be one previous line
23299 return unless ( $maximum_line_index == 0 );
23301 my $old_line = shift;
23302 my $maximum_field_index = $old_line->get_jmax();
23304 ###############################################
23305 # Moved below to allow new coding for => matches
23306 # return unless $maximum_field_index > $jmax;
23307 ###############################################
23309 # Identify specific cases where field elimination is allowed:
23310 # case=1: both lines have comma-separated lists, and the first
23311 # line has an equals
23312 # case=2: both lines have leading equals
23314 # case 1 is the default
23317 # See if case 2: both lines have leading '='
23318 # We'll require similar leading patterns in this case
23319 my $old_rtokens = $old_line->get_rtokens();
23320 my $rtokens = $new_line->get_rtokens();
23321 my $rpatterns = $new_line->get_rpatterns();
23322 my $old_rpatterns = $old_line->get_rpatterns();
23323 if ( $rtokens->[0] =~ /^=>?\d*$/
23324 && $old_rtokens->[0] eq $rtokens->[0]
23325 && $old_rpatterns->[0] eq $rpatterns->[0] )
23330 # not too many fewer fields in new line for case 1
23331 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
23333 # case 1 must have side comment
23334 my $old_rfields = $old_line->get_rfields();
23337 && length( $old_rfields->[$maximum_field_index] ) == 0 );
23339 my $rfields = $new_line->get_rfields();
23341 my $hid_equals = 0;
23343 my @new_alignments = ();
23344 my @new_fields = ();
23345 my @new_matching_patterns = ();
23346 my @new_matching_tokens = ();
23349 my $current_field = '';
23350 my $current_pattern = '';
23352 # loop over all old tokens
23354 foreach my $k ( 0 .. $maximum_field_index - 1 ) {
23355 $current_field .= $old_rfields->[$k];
23356 $current_pattern .= $old_rpatterns->[$k];
23357 last if ( $j > $jmax - 1 );
23359 if ( $old_rtokens->[$k] eq $rtokens->[$j] ) {
23361 $new_fields[$j] = $current_field;
23362 $new_matching_patterns[$j] = $current_pattern;
23363 $current_field = '';
23364 $current_pattern = '';
23365 $new_matching_tokens[$j] = $old_rtokens->[$k];
23366 $new_alignments[$j] = $old_line->get_alignment($k);
23371 if ( $old_rtokens->[$k] =~ /^\=\d*$/ ) {
23372 last if ( $case == 2 ); # avoid problems with stuff
23373 # like: $a=$b=$c=$d;
23377 if ( $in_match && $case == 1 )
23378 ; # disallow gaps in matching field types in case 1
23382 # Modify the current state if we are successful.
23383 # We must exactly reach the ends of the new list for success, and the old
23384 # pattern must have more fields. Here is an example where the first and
23385 # second lines have the same number, and we should not align:
23386 # my @a = map chr, 0 .. 255;
23387 # my @b = grep /\W/, @a;
23388 # my @c = grep /[^\w]/, @a;
23390 # Otherwise, we would get all of the commas aligned, which doesn't work as
23392 # my @a = map chr, 0 .. 255;
23393 # my @b = grep /\W/, @a;
23394 # my @c = grep /[^\w]/, @a;
23396 if ( ( $j == $jmax )
23397 && ( $current_field eq '' )
23398 && ( $case != 1 || $hid_equals )
23399 && ( $maximum_field_index > $jmax ) )
23401 my $k = $maximum_field_index;
23402 $current_field .= $old_rfields->[$k];
23403 $current_pattern .= $old_rpatterns->[$k];
23404 $new_fields[$j] = $current_field;
23405 $new_matching_patterns[$j] = $current_pattern;
23407 $new_alignments[$j] = $old_line->get_alignment($k);
23408 $maximum_field_index = $j;
23410 $old_line->set_alignments(@new_alignments);
23411 $old_line->set_jmax($jmax);
23412 $old_line->set_rtokens( \@new_matching_tokens );
23413 $old_line->set_rfields( \@new_fields );
23414 $old_line->set_rpatterns( \@{$rpatterns} );
23417 # Dumb Down starting match if necessary:
23419 # Consider the following two lines:
23422 # $a => 20 > 3 ? 1 : 0,
23426 # We would like to get alignment regardless of the order of the two lines.
23427 # If the lines come in in this order, then we will simplify the patterns of the first line
23428 # in sub eliminate_new_fields.
23429 # If the lines come in reverse order, then we achieve this with eliminate_new_fields.
23431 # This update is currently restricted to leading '=>' matches. Although we
23432 # could do this for both '=' and '=>', overall the results for '=' come out
23433 # better without this step because this step can eliminate some other good
23434 # matches. For example, with the '=' we get:
23436 # my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
23437 # my @dsf = map "$_\x{FFFE}Fred", @disilva;
23438 # my @dsj = map "$_\x{FFFE}John", @disilva;
23439 # my @dsJ = map "$_ John", @disilva;
23441 # without including '=' we get:
23443 # my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
23444 # my @dsf = map "$_\x{FFFE}Fred", @disilva;
23445 # my @dsj = map "$_\x{FFFE}John", @disilva;
23446 # my @dsJ = map "$_ John", @disilva;
23450 && @new_matching_tokens == 1
23451 ##&& $new_matching_tokens[0] =~ /^=/ # see note above
23452 && $new_matching_tokens[0] =~ /^=>/
23453 && $maximum_field_index > 2
23456 my $jmaxm = $jmax - 1;
23457 my $kmaxm = $maximum_field_index - 1;
23458 my $have_side_comment = $old_rtokens->[$kmaxm] eq '#';
23460 # We need to reduce the group pattern to be just two tokens,
23461 # the leading equality or => and the final side comment
23463 my $mid_field = join "",
23464 @{$old_rfields}[ 1 .. $maximum_field_index - 1 ];
23465 my $mid_patterns = join "",
23466 @{$old_rpatterns}[ 1 .. $maximum_field_index - 1 ];
23467 my @new_alignments = (
23468 $old_line->get_alignment(0),
23469 $old_line->get_alignment( $maximum_field_index - 1 )
23472 ( $old_rtokens->[0], $old_rtokens->[ $maximum_field_index - 1 ] );
23474 $old_rfields->[0], $mid_field, $old_rfields->[$maximum_field_index]
23476 my @new_patterns = (
23477 $old_rpatterns->[0], $mid_patterns,
23478 $old_rpatterns->[$maximum_field_index]
23481 $maximum_field_index = 2;
23482 $old_line->set_jmax($maximum_field_index);
23483 $old_line->set_rtokens( \@new_tokens );
23484 $old_line->set_rfields( \@new_fields );
23485 $old_line->set_rpatterns( \@new_patterns );
23487 initialize_for_new_group();
23488 add_to_group($old_line);
23489 $current_line = $old_line;
23494 # create an empty side comment if none exists
23495 sub make_side_comment {
23496 my ( $new_line, $level_end ) = @_;
23497 my $jmax = $new_line->get_jmax();
23498 my $rtokens = $new_line->get_rtokens();
23500 # if line does not have a side comment...
23501 if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
23502 my $rfields = $new_line->get_rfields();
23503 my $rpatterns = $new_line->get_rpatterns();
23504 $rtokens->[$jmax] = '#';
23505 $rfields->[ ++$jmax ] = '';
23506 $rpatterns->[$jmax] = '#';
23507 $new_line->set_jmax($jmax);
23508 $new_line->set_jmax_original_line($jmax);
23511 # line has a side comment..
23514 # don't remember old side comment location for very long
23515 my $line_number = $vertical_aligner_self->get_output_line_number();
23516 my $rfields = $new_line->get_rfields();
23518 $line_number - $last_side_comment_line_number > 12
23520 # and don't remember comment location across block level changes
23521 || ( $level_end < $last_side_comment_level
23522 && $rfields->[0] =~ /^}/ )
23525 forget_side_comment();
23527 $last_side_comment_line_number = $line_number;
23528 $last_side_comment_level = $level_end;
23533 sub decide_if_list {
23537 # A list will be taken to be a line with a forced break in which all
23538 # of the field separators are commas or comma-arrows (except for the
23541 # List separator tokens are things like ',3' or '=>2',
23542 # where the trailing digit is the nesting depth. Allow braces
23543 # to allow nested list items.
23544 my $rtokens = $line->get_rtokens();
23545 my $test_token = $rtokens->[0];
23546 if ( $test_token =~ /^(\,|=>)/ ) {
23547 my $list_type = $test_token;
23548 my $jmax = $line->get_jmax();
23550 foreach ( 1 .. $jmax - 2 ) {
23551 if ( $rtokens->[$_] !~ /^(\,|=>|\{)/ ) {
23556 $line->set_list_type($list_type);
23561 sub eliminate_new_fields {
23563 my ( $new_line, $old_line ) = @_;
23564 return unless ( $maximum_line_index >= 0 );
23565 my $jmax = $new_line->get_jmax();
23567 my $old_rtokens = $old_line->get_rtokens();
23568 my $rtokens = $new_line->get_rtokens();
23569 my $is_assignment =
23570 ( $rtokens->[0] =~ /^=>?\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
23572 # must be monotonic variation
23573 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
23575 # must be more fields in the new line
23576 my $maximum_field_index = $old_line->get_jmax();
23577 return unless ( $maximum_field_index < $jmax );
23579 unless ($is_assignment) {
23581 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
23582 ; # only if monotonic
23584 # never combine fields of a comma list
23586 unless ( $maximum_field_index > 1 )
23587 && ( $new_line->get_list_type() !~ /^,/ );
23590 my $rfields = $new_line->get_rfields();
23591 my $rpatterns = $new_line->get_rpatterns();
23592 my $old_rpatterns = $old_line->get_rpatterns();
23594 # loop over all OLD tokens except comment and check match
23596 foreach my $k ( 0 .. $maximum_field_index - 2 ) {
23597 if ( ( $old_rtokens->[$k] ne $rtokens->[$k] )
23598 || ( $old_rpatterns->[$k] ne $rpatterns->[$k] ) )
23605 # first tokens agree, so combine extra new tokens
23607 ##for my $k ( $maximum_field_index .. $jmax - 1 ) {
23608 foreach my $k ( $maximum_field_index .. $jmax - 1 ) {
23610 $rfields->[ $maximum_field_index - 1 ] .= $rfields->[$k];
23611 $rfields->[$k] = "";
23612 $rpatterns->[ $maximum_field_index - 1 ] .= $rpatterns->[$k];
23613 $rpatterns->[$k] = "";
23616 $rtokens->[ $maximum_field_index - 1 ] = '#';
23617 $rfields->[$maximum_field_index] = $rfields->[$jmax];
23618 $rpatterns->[$maximum_field_index] = $rpatterns->[$jmax];
23619 $jmax = $maximum_field_index;
23621 $new_line->set_jmax($jmax);
23625 sub fix_terminal_ternary {
23627 # Add empty fields as necessary to align a ternary term
23632 # : $year % 100 ? 1
23633 # : $year % 400 ? 0
23636 # returns 1 if the terminal item should be indented
23638 my ( $rfields, $rtokens, $rpatterns ) = @_;
23640 my $jmax = @{$rfields} - 1;
23641 my $old_line = $group_lines[$maximum_line_index];
23642 my $rfields_old = $old_line->get_rfields();
23644 my $rpatterns_old = $old_line->get_rpatterns();
23645 my $rtokens_old = $old_line->get_rtokens();
23646 my $maximum_field_index = $old_line->get_jmax();
23648 # look for the question mark after the :
23650 my $depth_question;
23652 foreach my $j ( 0 .. $maximum_field_index - 1 ) {
23653 my $tok = $rtokens_old->[$j];
23654 if ( $tok =~ /^\?(\d+)$/ ) {
23655 $depth_question = $1;
23657 # depth must be correct
23658 next unless ( $depth_question eq $group_level );
23661 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
23662 $pad = " " x length($1);
23665 return; # shouldn't happen
23670 return unless ( defined($jquestion) ); # shouldn't happen
23672 # Now splice the tokens and patterns of the previous line
23673 # into the else line to insure a match. Add empty fields
23675 my $jadd = $jquestion;
23677 # Work on copies of the actual arrays in case we have
23678 # to return due to an error
23679 my @fields = @{$rfields};
23680 my @patterns = @{$rpatterns};
23681 my @tokens = @{$rtokens};
23683 VALIGN_DEBUG_FLAG_TERNARY && do {
23685 print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
23686 print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
23687 print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
23688 print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
23689 print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
23690 print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
23693 # handle cases of leading colon on this line
23694 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
23696 my ( $colon, $therest ) = ( $1, $2 );
23698 # Handle sub-case of first field with leading colon plus additional code
23699 # This is the usual situation as at the '1' below:
23701 # : $year % 400 ? 0
23705 # Split the first field after the leading colon and insert padding.
23706 # Note that this padding will remain even if the terminal value goes
23707 # out on a separate line. This does not seem to look to bad, so no
23708 # mechanism has been included to undo it.
23709 my $field1 = shift @fields;
23710 unshift @fields, ( $colon, $pad . $therest );
23712 # change the leading pattern from : to ?
23713 return unless ( $patterns[0] =~ s/^\:/?/ );
23715 # install leading tokens and patterns of existing line
23716 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
23717 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
23719 # insert appropriate number of empty fields
23720 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
23723 # handle sub-case of first field just equal to leading colon.
23724 # This can happen for example in the example below where
23725 # the leading '(' would create a new alignment token
23726 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
23727 # : ( $mname = $name . '->' );
23730 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
23732 # prepend a leading ? onto the second pattern
23733 $patterns[1] = "?b" . $patterns[1];
23735 # pad the second field
23736 $fields[1] = $pad . $fields[1];
23738 # install leading tokens and patterns of existing line, replacing
23739 # leading token and inserting appropriate number of empty fields
23740 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
23741 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
23742 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
23746 # Handle case of no leading colon on this line. This will
23747 # be the case when -wba=':' is used. For example,
23748 # $year % 400 ? 0 :
23752 # install leading tokens and patterns of existing line
23753 $patterns[0] = '?' . 'b' . $patterns[0];
23754 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
23755 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
23757 # insert appropriate number of empty fields
23758 $jadd = $jquestion + 1;
23759 $fields[0] = $pad . $fields[0];
23760 splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
23763 VALIGN_DEBUG_FLAG_TERNARY && do {
23765 print STDOUT "MODIFIED TOKENS=<@tokens>\n";
23766 print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
23767 print STDOUT "MODIFIED FIELDS=<@fields>\n";
23770 # all ok .. update the arrays
23771 @{$rfields} = @fields;
23772 @{$rtokens} = @tokens;
23773 @{$rpatterns} = @patterns;
23775 # force a flush after this line
23779 sub fix_terminal_else {
23781 # Add empty fields as necessary to align a balanced terminal
23782 # else block to a previous if/elsif/unless block,
23785 # if ( 1 || $x ) { print "ok 13\n"; }
23786 # else { print "not ok 13\n"; }
23788 # returns 1 if the else block should be indented
23790 my ( $rfields, $rtokens, $rpatterns ) = @_;
23791 my $jmax = @{$rfields} - 1;
23792 return unless ( $jmax > 0 );
23794 # check for balanced else block following if/elsif/unless
23795 my $rfields_old = $current_line->get_rfields();
23797 # TBD: add handling for 'case'
23798 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
23800 # look for the opening brace after the else, and extract the depth
23801 my $tok_brace = $rtokens->[0];
23803 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
23805 # probably: "else # side_comment"
23808 my $rpatterns_old = $current_line->get_rpatterns();
23809 my $rtokens_old = $current_line->get_rtokens();
23810 my $maximum_field_index = $current_line->get_jmax();
23812 # be sure the previous if/elsif is followed by an opening paren
23814 my $tok_paren = '(' . $depth_brace;
23815 my $tok_test = $rtokens_old->[$jparen];
23816 return unless ( $tok_test eq $tok_paren ); # shouldn't happen
23818 # Now find the opening block brace
23820 foreach my $j ( 1 .. $maximum_field_index - 1 ) {
23821 my $tok = $rtokens_old->[$j];
23822 if ( $tok eq $tok_brace ) {
23827 return unless ( defined($jbrace) ); # shouldn't happen
23829 # Now splice the tokens and patterns of the previous line
23830 # into the else line to insure a match. Add empty fields
23832 my $jadd = $jbrace - $jparen;
23833 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
23834 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
23835 splice( @{$rfields}, 1, 0, ('') x $jadd );
23837 # force a flush after this line if it does not follow a case
23838 if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
23839 else { return $jbrace }
23842 { # sub check_match
23843 my %is_good_alignment;
23847 # Vertically aligning on certain "good" tokens is usually okay
23848 # so we can be less restrictive in marginal cases.
23849 my @q = qw( { ? => = );
23851 @is_good_alignment{@q} = (1) x scalar(@q);
23856 # See if the current line matches the current vertical alignment group.
23857 # If not, flush the current group.
23858 my ( $new_line, $old_line ) = @_;
23860 # uses global variables:
23861 # $previous_minimum_jmax_seen
23862 # $maximum_jmax_seen
23863 # $maximum_line_index
23865 my $jmax = $new_line->get_jmax();
23866 my $maximum_field_index = $old_line->get_jmax();
23868 # flush if this line has too many fields
23869 if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
23871 # flush if adding this line would make a non-monotonic field count
23873 ( $maximum_field_index > $jmax ) # this has too few fields
23875 ( $previous_minimum_jmax_seen <
23876 $jmax ) # and wouldn't be monotonic
23877 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
23884 # otherwise see if this line matches the current group
23885 my $jmax_original_line = $new_line->get_jmax_original_line();
23886 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
23887 my $rtokens = $new_line->get_rtokens();
23888 my $rfields = $new_line->get_rfields();
23889 my $rpatterns = $new_line->get_rpatterns();
23890 my $list_type = $new_line->get_list_type();
23892 my $group_list_type = $old_line->get_list_type();
23893 my $old_rpatterns = $old_line->get_rpatterns();
23894 my $old_rtokens = $old_line->get_rtokens();
23896 my $jlimit = $jmax - 1;
23897 if ( $maximum_field_index > $jmax ) {
23898 $jlimit = $jmax_original_line;
23899 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
23902 # handle comma-separated lists ..
23903 if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
23904 for my $j ( 0 .. $jlimit ) {
23905 my $old_tok = $old_rtokens->[$j];
23906 next unless $old_tok;
23907 my $new_tok = $rtokens->[$j];
23908 next unless $new_tok;
23910 # lists always match ...
23911 # unless they would align any '=>'s with ','s
23913 if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
23914 || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
23918 # do detailed check for everything else except hanging side comments
23919 elsif ( !$is_hanging_side_comment ) {
23921 my $leading_space_count = $new_line->get_leading_space_count();
23925 my $saw_good_alignment;
23927 for my $j ( 0 .. $jlimit ) {
23929 my $old_tok = $old_rtokens->[$j];
23930 my $new_tok = $rtokens->[$j];
23932 # Note on encoding used for alignment tokens:
23933 # -------------------------------------------
23934 # Tokens are "decorated" with information which can help
23935 # prevent unwanted alignments. Consider for example the
23936 # following two lines:
23937 # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
23938 # local ( $i, $f ) = &'bdiv( $xn, $xd );
23939 # There are three alignment tokens in each line, a comma,
23940 # an =, and a comma. In the first line these three tokens
23942 # ,4+local-18 =3 ,4+split-7
23943 # and in the second line they are encoded as
23944 # ,4+local-18 =3 ,4+&'bdiv-8
23945 # Tokens always at least have token name and nesting
23946 # depth. So in this example the ='s are at depth 3 and
23947 # the ,'s are at depth 4. This prevents aligning tokens
23948 # of different depths. Commas contain additional
23949 # information, as follows:
23950 # , {depth} + {container name} - {spaces to opening paren}
23951 # This allows us to reject matching the rightmost commas
23952 # in the above two lines, since they are for different
23953 # function calls. This encoding is done in
23954 # 'sub send_lines_to_vertical_aligner'.
23956 # Pick off actual token.
23957 # Everything up to the first digit is the actual token.
23958 my $alignment_token = $new_tok;
23959 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
23961 # see if the decorated tokens match
23962 my $tokens_match = $new_tok eq $old_tok
23964 # Exception for matching terminal : of ternary statement..
23965 # consider containers prefixed by ? and : a match
23966 || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
23968 # No match if the alignment tokens differ...
23969 if ( !$tokens_match ) {
23971 # ...Unless this is a side comment
23975 # and there is either at least one alignment token
23976 # or this is a single item following a list. This
23977 # latter rule is required for 'December' to join
23978 # the following list:
23980 # '', 'January', 'February', 'March',
23981 # 'April', 'May', 'June', 'July',
23982 # 'August', 'September', 'October', 'November',
23985 # If it doesn't then the -lp formatting will fail.
23986 && ( $j > 0 || $old_tok =~ /^,/ )
23989 $marginal_match = 1
23990 if ( $marginal_match == 0
23991 && $maximum_line_index == 0 );
23998 # Calculate amount of padding required to fit this in.
23999 # $pad is the number of spaces by which we must increase
24000 # the current field to squeeze in this field.
24002 length( $rfields->[$j] ) - $old_line->current_field_width($j);
24003 if ( $j == 0 ) { $pad += $leading_space_count; }
24005 # remember max pads to limit marginal cases
24006 if ( $alignment_token ne '#' ) {
24007 if ( $pad > $max_pad ) { $max_pad = $pad }
24008 if ( $pad < $min_pad ) { $min_pad = $pad }
24010 if ( $is_good_alignment{$alignment_token} ) {
24011 $saw_good_alignment = 1;
24014 # If patterns don't match, we have to be careful...
24015 if ( $old_rpatterns->[$j] ne $rpatterns->[$j] ) {
24017 # flag this as a marginal match since patterns differ
24018 $marginal_match = 1
24019 if ( $marginal_match == 0 && $maximum_line_index == 0 );
24021 # We have to be very careful about aligning commas
24022 # when the pattern's don't match, because it can be
24023 # worse to create an alignment where none is needed
24024 # than to omit one. Here's an example where the ','s
24025 # are not in named containers. The first line below
24026 # should not match the next two:
24027 # ( $a, $b ) = ( $b, $r );
24028 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
24029 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
24030 if ( $alignment_token eq ',' ) {
24032 # do not align commas unless they are in named containers
24033 goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
24036 # do not align parens unless patterns match;
24037 # large ugly spaces can occur in math expressions.
24038 elsif ( $alignment_token eq '(' ) {
24040 # But we can allow a match if the parens don't
24041 # require any padding.
24042 if ( $pad != 0 ) { goto NO_MATCH }
24045 # Handle an '=' alignment with different patterns to
24047 elsif ( $alignment_token eq '=' ) {
24049 # It is best to be a little restrictive when
24050 # aligning '=' tokens. Here is an example of
24051 # two lines that we will not align:
24054 # The problem is that one is a 'my' declaration,
24055 # and the other isn't, so they're not very similar.
24056 # We will filter these out by comparing the first
24057 # letter of the pattern. This is crude, but works
24060 substr( $old_rpatterns->[$j], 0, 1 ) ne
24061 substr( $rpatterns->[$j], 0, 1 ) )
24066 # If we pass that test, we'll call it a marginal match.
24067 # Here is an example of a marginal match:
24069 # $op = compile_bblock($op);
24070 # The left tokens are both identifiers, but
24071 # one accesses a hash and the other doesn't.
24072 # We'll let this be a tentative match and undo
24073 # it later if we don't find more than 2 lines
24075 elsif ( $maximum_line_index == 0 ) {
24077 2; # =2 prevents being undone below
24082 # Don't let line with fewer fields increase column widths
24084 if ( $maximum_field_index > $jmax ) {
24086 # Exception: suspend this rule to allow last lines to join
24087 if ( $pad > 0 ) { goto NO_MATCH; }
24089 } ## end for my $j ( 0 .. $jlimit)
24091 # Turn off the "marginal match" flag in some cases...
24092 # A "marginal match" occurs when the alignment tokens agree
24093 # but there are differences in the other tokens (patterns).
24094 # If we leave the marginal match flag set, then the rule is that we
24095 # will align only if there are more than two lines in the group.
24096 # We will turn of the flag if we almost have a match
24097 # and either we have seen a good alignment token or we
24098 # just need a small pad (2 spaces) to fit. These rules are
24099 # the result of experimentation. Tokens which misaligned by just
24100 # one or two characters are annoying. On the other hand,
24101 # large gaps to less important alignment tokens are also annoying.
24102 if ( $marginal_match == 1
24103 && $jmax == $maximum_field_index
24104 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
24107 $marginal_match = 0;
24109 ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
24112 # We have a match (even if marginal).
24113 # If the current line has fewer fields than the current group
24114 # but otherwise matches, copy the remaining group fields to
24115 # make it a perfect match.
24116 if ( $maximum_field_index > $jmax ) {
24118 ##########################################################
24119 # FIXME: The previous version had a bug which made side comments
24120 # become regular fields, so for now the program does not allow a
24121 # line with side comment to match. This should eventually be done.
24122 # The best test file for experimenting is 'lista.t'
24123 ##########################################################
24125 my $comment = $rfields->[$jmax];
24126 goto NO_MATCH if ($comment);
24129 for my $jj ( $jlimit .. $maximum_field_index ) {
24130 $rtokens->[$jj] = $old_rtokens->[$jj];
24131 $rfields->[ $jj + 1 ] = '';
24132 $rpatterns->[ $jj + 1 ] = $old_rpatterns->[ $jj + 1 ];
24135 ## THESE DO NOT GIVE CORRECT RESULTS
24136 ## $rfields->[$jmax] = $comment;
24137 ## $new_line->set_jmax($jmax);
24143 ##print "no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$old_rtokens->[0]\n";
24151 my ( $new_line, $old_line ) = @_;
24152 return unless ( $maximum_line_index >= 0 );
24154 my $jmax = $new_line->get_jmax();
24155 my $leading_space_count = $new_line->get_leading_space_count();
24156 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
24157 my $rtokens = $new_line->get_rtokens();
24158 my $rfields = $new_line->get_rfields();
24159 my $rpatterns = $new_line->get_rpatterns();
24161 my $group_list_type = $group_lines[0]->get_list_type();
24163 my $padding_so_far = 0;
24164 my $padding_available = $old_line->get_available_space_on_right();
24166 # save current columns in case this doesn't work
24167 save_alignment_columns();
24169 my $maximum_field_index = $old_line->get_jmax();
24170 for my $j ( 0 .. $jmax ) {
24172 my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j);
24175 $pad += $leading_space_count;
24178 # remember largest gap of the group, excluding gap to side comment
24180 && $group_maximum_gap < -$pad
24182 && $j < $jmax - 1 )
24184 $group_maximum_gap = -$pad;
24190 ## This patch helps sometimes, but it doesn't check to see if
24191 ## the line is too long even without the side comment. It needs
24193 ##don't let a long token with no trailing side comment push
24194 ##side comments out, or end a group. (sidecmt1.t)
24195 ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0);
24197 # BEGIN PATCH for keith1.txt.
24198 # If the group began matching multiple tokens but later this got
24199 # reduced to a fewer number of matching tokens, then the fields
24200 # of the later lines will still have to fit into their corresponding
24201 # fields. So a large later field will "push" the other fields to
24202 # the right, including previous side comments, and if there is no room
24203 # then there is no match.
24204 # For example, look at the last line in the following snippet:
24206 # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true : false;
24207 # my $env = ($b_prod_db) ? "prd" : "val";
24208 # my $plant = ( $OPT{p} ) ? $OPT{p} : "STL";
24209 # my $task = $OPT{t};
24210 # my $fnam = "longggggggggggggggg.$record_created.$env.$plant.idash";
24212 # The long term will push the '?' to the right to fit in, and in this
24213 # case there is not enough room so it will not match the equals unless
24214 # we do something special.
24216 # Usually it looks good to keep an initial alignment of '=' going, and
24217 # we can do this if the long term can fit in the space taken up by the
24218 # remaining fields (the ? : fields here).
24220 # Allowing any matching token for now, but it could be restricted
24221 # to an '='-like token if necessary.
24224 $pad > $padding_available
24225 && $jmax == 2 # matching one thing (plus #)
24226 && $j == $jmax - 1 # at last field
24227 && $maximum_line_index > 0 # more than 1 line in group now
24228 && $jmax < $maximum_field_index # other lines have more fields
24229 && length( $rfields->[$jmax] ) == 0 # no side comment
24231 # Uncomment to match only equals (but this does not seem necessary)
24232 # && $rtokens->[0] =~ /^=\d/ # matching an equals
24235 my $extra_padding = 0;
24236 foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) {
24237 $extra_padding += $old_line->current_field_width($jj);
24240 next if ( $pad <= $padding_available + $extra_padding );
24243 # END PATCH for keith1.pl
24245 # This line will need space; lets see if we want to accept it..
24248 # not if this won't fit
24249 ( $pad > $padding_available )
24251 # previously, there were upper bounds placed on padding here
24252 # (maximum_whitespace_columns), but they were not really helpful
24257 # revert to starting state then flush; things didn't work out
24258 restore_alignment_columns();
24263 # patch to avoid excessive gaps in previous lines,
24264 # due to a line of fewer fields.
24265 # return join( ".",
24266 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
24267 # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
24268 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
24270 # looks ok, squeeze this field in
24271 $old_line->increase_field_width( $j, $pad );
24272 $padding_available -= $pad;
24274 # remember largest gap of the group, excluding gap to side comment
24275 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
24276 $group_maximum_gap = $pad;
24284 # The current line either starts a new alignment group or is
24285 # accepted into the current alignment group.
24286 my $new_line = shift;
24287 $group_lines[ ++$maximum_line_index ] = $new_line;
24289 # initialize field lengths if starting new group
24290 if ( $maximum_line_index == 0 ) {
24292 my $jmax = $new_line->get_jmax();
24293 my $rfields = $new_line->get_rfields();
24294 my $rtokens = $new_line->get_rtokens();
24295 my $col = $new_line->get_leading_space_count();
24297 for my $j ( 0 .. $jmax ) {
24298 $col += length( $rfields->[$j] );
24300 # create initial alignments for the new group
24302 if ( $j < $jmax ) { $token = $rtokens->[$j] }
24303 my $alignment = make_alignment( $col, $token );
24304 $new_line->set_alignment( $j, $alignment );
24307 $maximum_jmax_seen = $jmax;
24308 $minimum_jmax_seen = $jmax;
24311 # use previous alignments otherwise
24313 my @new_alignments =
24314 $group_lines[ $maximum_line_index - 1 ]->get_alignments();
24315 $new_line->set_alignments(@new_alignments);
24318 # remember group jmax extremes for next call to valign_input
24319 $previous_minimum_jmax_seen = $minimum_jmax_seen;
24320 $previous_maximum_jmax_seen = $maximum_jmax_seen;
24326 # debug routine to dump array contents
24328 print STDOUT "(@_)\n";
24332 # flush() sends the current Perl::Tidy::VerticalAligner group down the
24333 # pipeline to Perl::Tidy::FileWriter.
24335 # This is the external flush, which also empties the buffer and cache
24338 # the buffer must be emptied first, then any cached text
24339 dump_valign_buffer();
24341 if ( $maximum_line_index < 0 ) {
24342 if ($cached_line_type) {
24343 $seqno_string = $cached_seqno_string;
24344 valign_output_step_C( $cached_line_text,
24345 $cached_line_leading_space_count,
24346 $last_level_written );
24347 $cached_line_type = 0;
24348 $cached_line_text = "";
24349 $cached_seqno_string = "";
24358 sub reduce_valign_buffer_indentation {
24361 if ( $valign_buffer_filling && $diff ) {
24362 my $max_valign_buffer = @valign_buffer;
24363 foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
24364 my ( $line, $leading_space_count, $level ) =
24365 @{ $valign_buffer[$i] };
24366 my $ws = substr( $line, 0, $diff );
24367 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
24368 $line = substr( $line, $diff );
24370 if ( $leading_space_count >= $diff ) {
24371 $leading_space_count -= $diff;
24372 $level = level_change( $leading_space_count, $diff, $level );
24374 $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
24382 # compute decrease in level when we remove $diff spaces from the
24384 my ( $leading_space_count, $diff, $level ) = @_;
24385 if ($rOpts_indent_columns) {
24387 int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
24388 my $nlev = int( $leading_space_count / $rOpts_indent_columns );
24389 $level -= ( $olev - $nlev );
24390 if ( $level < 0 ) { $level = 0 }
24395 sub dump_valign_buffer {
24396 if (@valign_buffer) {
24397 foreach (@valign_buffer) {
24398 valign_output_step_D( @{$_} );
24400 @valign_buffer = ();
24402 $valign_buffer_filling = "";
24406 # This is the internal flush, which leaves the cache intact
24409 return if ( $maximum_line_index < 0 );
24411 # handle a group of comment lines
24412 if ( $group_type eq 'COMMENT' ) {
24414 VALIGN_DEBUG_FLAG_APPEND0 && do {
24415 my ( $a, $b, $c ) = caller();
24417 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
24420 my $leading_space_count = $comment_leading_space_count;
24421 my $leading_string = get_leading_string($leading_space_count);
24423 # zero leading space count if any lines are too long
24424 my $max_excess = 0;
24425 for my $i ( 0 .. $maximum_line_index ) {
24426 my $str = $group_lines[$i];
24429 $leading_space_count -
24430 maximum_line_length_for_level($group_level);
24431 if ( $excess > $max_excess ) {
24432 $max_excess = $excess;
24436 if ( $max_excess > 0 ) {
24437 $leading_space_count -= $max_excess;
24438 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
24439 $last_outdented_line_at =
24440 $file_writer_object->get_output_line_number();
24441 unless ($outdented_line_count) {
24442 $first_outdented_line_at = $last_outdented_line_at;
24444 $outdented_line_count += ( $maximum_line_index + 1 );
24447 # write the group of lines
24448 my $outdent_long_lines = 0;
24449 for my $i ( 0 .. $maximum_line_index ) {
24450 valign_output_step_B( $leading_space_count, $group_lines[$i], 0,
24451 $outdent_long_lines, "", $group_level );
24455 # handle a group of code lines
24458 VALIGN_DEBUG_FLAG_APPEND0 && do {
24459 my $group_list_type = $group_lines[0]->get_list_type();
24460 my ( $a, $b, $c ) = caller();
24461 my $maximum_field_index = $group_lines[0]->get_jmax();
24463 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
24467 # some small groups are best left unaligned
24468 my $do_not_align = decide_if_aligned();
24470 # optimize side comment location
24471 $do_not_align = adjust_side_comment($do_not_align);
24473 # recover spaces for -lp option if possible
24474 my $extra_leading_spaces = get_extra_leading_spaces();
24476 # all lines of this group have the same basic leading spacing
24477 my $group_leader_length = $group_lines[0]->get_leading_space_count();
24479 # add extra leading spaces if helpful
24480 # NOTE: Use zero; this did not work well
24481 my $min_ci_gap = 0;
24483 # loop to output all lines
24484 for my $i ( 0 .. $maximum_line_index ) {
24485 my $line = $group_lines[$i];
24486 valign_output_step_A( $line, $min_ci_gap, $do_not_align,
24487 $group_leader_length, $extra_leading_spaces );
24490 initialize_for_new_group();
24494 sub decide_if_aligned {
24496 # Do not try to align two lines which are not really similar
24497 return unless $maximum_line_index == 1;
24498 return if ($is_matching_terminal_line);
24500 my $group_list_type = $group_lines[0]->get_list_type();
24502 my $do_not_align = (
24504 # always align lists
24509 # don't align if it was just a marginal match
24512 # don't align two lines with big gap
24513 || $group_maximum_gap > 12
24515 # or lines with differing number of alignment tokens
24516 # TODO: this could be improved. It occasionally rejects
24518 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
24522 # But try to convert them into a simple comment group if the first line
24523 # a has side comment
24524 my $rfields = $group_lines[0]->get_rfields();
24525 my $maximum_field_index = $group_lines[0]->get_jmax();
24527 && ( $maximum_line_index > 0 )
24528 && ( length( $rfields->[$maximum_field_index] ) > 0 ) )
24533 return $do_not_align;
24536 sub adjust_side_comment {
24538 my $do_not_align = shift;
24540 # let's see if we can move the side comment field out a little
24541 # to improve readability (the last field is always a side comment field)
24542 my $have_side_comment = 0;
24543 my $first_side_comment_line = -1;
24544 my $maximum_field_index = $group_lines[0]->get_jmax();
24545 for my $i ( 0 .. $maximum_line_index ) {
24546 my $line = $group_lines[$i];
24548 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
24549 $have_side_comment = 1;
24550 $first_side_comment_line = $i;
24555 my $kmax = $maximum_field_index + 1;
24557 if ($have_side_comment) {
24559 my $line = $group_lines[0];
24561 # the maximum space without exceeding the line length:
24562 my $avail = $line->get_available_space_on_right();
24564 # try to use the previous comment column
24565 my $side_comment_column = $line->get_column( $kmax - 2 );
24566 my $move = $last_comment_column - $side_comment_column;
24568 ## my $sc_line0 = $side_comment_history[0]->[0];
24569 ## my $sc_col0 = $side_comment_history[0]->[1];
24570 ## my $sc_line1 = $side_comment_history[1]->[0];
24571 ## my $sc_col1 = $side_comment_history[1]->[1];
24572 ## my $sc_line2 = $side_comment_history[2]->[0];
24573 ## my $sc_col2 = $side_comment_history[2]->[1];
24575 ## # FUTURE UPDATES:
24576 ## # Be sure to ignore 'do not align' and '} # end comments'
24577 ## # Find first $move > 0 and $move <= $avail as follows:
24578 ## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
24579 ## # 2. try sc_col2 if (line-sc_line2) < 12
24580 ## # 3. try min possible space, plus up to 8,
24581 ## # 4. try min possible space
24583 if ( $kmax > 0 && !$do_not_align ) {
24585 # but if this doesn't work, give up and use the minimum space
24586 if ( $move > $avail ) {
24587 $move = $rOpts_minimum_space_to_comment - 1;
24590 # but we want some minimum space to the comment
24591 my $min_move = $rOpts_minimum_space_to_comment - 1;
24593 && $last_side_comment_length > 0
24594 && ( $first_side_comment_line == 0 )
24595 && $group_level == $last_level_written )
24600 if ( $move < $min_move ) {
24604 # previously, an upper bound was placed on $move here,
24605 # (maximum_space_to_comment), but it was not helpful
24607 # don't exceed the available space
24608 if ( $move > $avail ) { $move = $avail }
24610 # we can only increase space, never decrease
24612 $line->increase_field_width( $maximum_field_index - 1, $move );
24615 # remember this column for the next group
24616 $last_comment_column = $line->get_column( $kmax - 2 );
24620 # try to at least line up the existing side comment location
24621 if ( $kmax > 0 && $move > 0 && $move < $avail ) {
24622 $line->increase_field_width( $maximum_field_index - 1, $move );
24626 # reset side comment column if we can't align
24628 forget_side_comment();
24632 return $do_not_align;
24635 sub valign_output_step_A {
24637 ###############################################################
24638 # This is Step A in writing vertically aligned lines.
24639 # The line is prepared according to the alignments which have
24640 # been found and shipped to the next step.
24641 ###############################################################
24643 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
24644 $extra_leading_spaces )
24646 my $rfields = $line->get_rfields();
24647 my $leading_space_count = $line->get_leading_space_count();
24648 my $outdent_long_lines = $line->get_outdent_long_lines();
24649 my $maximum_field_index = $line->get_jmax();
24650 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
24652 # add any extra spaces
24653 if ( $leading_space_count > $group_leader_length ) {
24654 $leading_space_count += $min_ci_gap;
24657 my $str = $rfields->[0];
24659 # loop to concatenate all fields of this line and needed padding
24660 my $total_pad_count = 0;
24661 for my $j ( 1 .. $maximum_field_index ) {
24663 # skip zero-length side comments
24666 ( $j == $maximum_field_index )
24667 && ( !defined( $rfields->[$j] )
24668 || ( length( $rfields->[$j] ) == 0 ) )
24671 # compute spaces of padding before this field
24672 my $col = $line->get_column( $j - 1 );
24673 my $pad = $col - ( length($str) + $leading_space_count );
24675 if ($do_not_align) {
24677 ( $j < $maximum_field_index )
24679 : $rOpts_minimum_space_to_comment - 1;
24682 # if the -fpsc flag is set, move the side comment to the selected
24683 # column if and only if it is possible, ignoring constraints on
24684 # line length and minimum space to comment
24685 if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
24687 my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
24688 if ( $newpad >= 0 ) { $pad = $newpad; }
24691 # accumulate the padding
24692 if ( $pad > 0 ) { $total_pad_count += $pad; }
24695 if ( !defined $rfields->[$j] ) {
24696 write_diagnostics("UNDEFined field at j=$j\n");
24699 # only add padding when we have a finite field;
24700 # this avoids extra terminal spaces if we have empty fields
24701 if ( length( $rfields->[$j] ) > 0 ) {
24702 $str .= ' ' x $total_pad_count;
24703 $total_pad_count = 0;
24704 $str .= $rfields->[$j];
24707 $total_pad_count = 0;
24710 # update side comment history buffer
24711 if ( $j == $maximum_field_index ) {
24712 my $lineno = $file_writer_object->get_output_line_number();
24713 shift @side_comment_history;
24714 push @side_comment_history, [ $lineno, $col ];
24718 my $side_comment_length = ( length( $rfields->[$maximum_field_index] ) );
24720 # ship this line off
24721 valign_output_step_B( $leading_space_count + $extra_leading_spaces,
24722 $str, $side_comment_length, $outdent_long_lines,
24723 $rvertical_tightness_flags, $group_level );
24727 sub get_extra_leading_spaces {
24729 #----------------------------------------------------------
24730 # Define any extra indentation space (for the -lp option).
24732 # If a list has side comments, sub scan_list must dump the
24733 # list before it sees everything. When this happens, it sets
24734 # the indentation to the standard scheme, but notes how
24735 # many spaces it would have liked to use. We may be able
24736 # to recover that space here in the event that all of the
24737 # lines of a list are back together again.
24738 #----------------------------------------------------------
24740 my $extra_leading_spaces = 0;
24741 if ($extra_indent_ok) {
24742 my $object = $group_lines[0]->get_indentation();
24743 if ( ref($object) ) {
24744 my $extra_indentation_spaces_wanted =
24745 get_recoverable_spaces($object);
24747 # all indentation objects must be the same
24748 for my $i ( 1 .. $maximum_line_index ) {
24749 if ( $object != $group_lines[$i]->get_indentation() ) {
24750 $extra_indentation_spaces_wanted = 0;
24755 if ($extra_indentation_spaces_wanted) {
24757 # the maximum space without exceeding the line length:
24758 my $avail = $group_lines[0]->get_available_space_on_right();
24759 $extra_leading_spaces =
24760 ( $avail > $extra_indentation_spaces_wanted )
24761 ? $extra_indentation_spaces_wanted
24764 # update the indentation object because with -icp the terminal
24765 # ');' will use the same adjustment.
24766 $object->permanently_decrease_available_spaces(
24767 -$extra_leading_spaces );
24771 return $extra_leading_spaces;
24774 sub combine_fields {
24776 # combine all fields except for the comment field ( sidecmt.t )
24777 # Uses global variables:
24779 # $maximum_line_index
24780 my $maximum_field_index = $group_lines[0]->get_jmax();
24781 foreach my $j ( 0 .. $maximum_line_index ) {
24782 my $line = $group_lines[$j];
24783 my $rfields = $line->get_rfields();
24784 foreach ( 1 .. $maximum_field_index - 1 ) {
24785 $rfields->[0] .= $rfields->[$_];
24787 $rfields->[1] = $rfields->[$maximum_field_index];
24789 $line->set_jmax(1);
24790 $line->set_column( 0, 0 );
24791 $line->set_column( 1, 0 );
24794 $maximum_field_index = 1;
24796 for my $j ( 0 .. $maximum_line_index ) {
24797 my $line = $group_lines[$j];
24798 my $rfields = $line->get_rfields();
24799 for my $k ( 0 .. $maximum_field_index ) {
24800 my $pad = length( $rfields->[$k] ) - $line->current_field_width($k);
24802 $pad += $group_lines[$j]->get_leading_space_count();
24805 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
24812 sub get_output_line_number {
24814 # the output line number reported to a caller is the number of items
24815 # written plus the number of items in the buffer
24817 return 1 + $maximum_line_index +
24818 $file_writer_object->get_output_line_number();
24821 sub valign_output_step_B {
24823 ###############################################################
24824 # This is Step B in writing vertically aligned lines.
24825 # Vertical tightness is applied according to preset flags.
24826 # In particular this routine handles stacking of opening
24827 # and closing tokens.
24828 ###############################################################
24830 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
24831 $rvertical_tightness_flags, $level )
24834 # handle outdenting of long lines:
24835 if ($outdent_long_lines) {
24838 $side_comment_length +
24839 $leading_space_count -
24840 maximum_line_length_for_level($level);
24841 if ( $excess > 0 ) {
24842 $leading_space_count = 0;
24843 $last_outdented_line_at =
24844 $file_writer_object->get_output_line_number();
24846 unless ($outdented_line_count) {
24847 $first_outdented_line_at = $last_outdented_line_at;
24849 $outdented_line_count++;
24853 # Make preliminary leading whitespace. It could get changed
24854 # later by entabbing, so we have to keep track of any changes
24855 # to the leading_space_count from here on.
24856 my $leading_string =
24857 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
24859 # Unpack any recombination data; it was packed by
24860 # sub send_lines_to_vertical_aligner. Contents:
24862 # [0] type: 1=opening non-block 2=closing non-block
24863 # 3=opening block brace 4=closing block brace
24864 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
24865 # if closing: spaces of padding to use
24866 # [2] sequence number of container
24867 # [3] valid flag: do not append if this flag is false
24869 my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
24871 if ($rvertical_tightness_flags) {
24873 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
24875 ) = @{$rvertical_tightness_flags};
24878 $seqno_string = $seqno_end;
24880 # handle any cached line ..
24881 # either append this line to it or write it out
24882 if ( length($cached_line_text) ) {
24884 # Dump an invalid cached line
24885 if ( !$cached_line_valid ) {
24886 valign_output_step_C( $cached_line_text,
24887 $cached_line_leading_space_count,
24888 $last_level_written );
24891 # Handle cached line ending in OPENING tokens
24892 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
24894 my $gap = $leading_space_count - length($cached_line_text);
24896 # handle option of just one tight opening per line:
24897 if ( $cached_line_flag == 1 ) {
24898 if ( defined($open_or_close) && $open_or_close == 1 ) {
24903 if ( $gap >= 0 && defined($seqno_beg) ) {
24904 $leading_string = $cached_line_text . ' ' x $gap;
24905 $leading_space_count = $cached_line_leading_space_count;
24906 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
24907 $level = $last_level_written;
24910 valign_output_step_C( $cached_line_text,
24911 $cached_line_leading_space_count,
24912 $last_level_written );
24916 # Handle cached line ending in CLOSING tokens
24918 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
24921 # The new line must start with container
24924 # The container combination must be okay..
24927 # okay to combine like types
24928 ( $open_or_close == $cached_line_type )
24930 # closing block brace may append to non-block
24931 || ( $cached_line_type == 2 && $open_or_close == 4 )
24933 # something like ');'
24934 || ( !$open_or_close && $cached_line_type == 2 )
24938 # The combined line must fit
24940 length($test_line) <=
24941 maximum_line_length_for_level($last_level_written) )
24945 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
24947 # Patch to outdent closing tokens ending # in ');'
24948 # If we are joining a line like ');' to a previous stacked
24949 # set of closing tokens, then decide if we may outdent the
24950 # combined stack to the indentation of the ');'. Since we
24951 # should not normally outdent any of the other tokens more than
24952 # the indentation of the lines that contained them, we will
24953 # only do this if all of the corresponding opening
24954 # tokens were on the same line. This can happen with
24955 # -sot and -sct. For example, it is ok here:
24956 # __PACKAGE__->load_components( qw(
24961 # But, for example, we do not outdent in this example because
24962 # that would put the closing sub brace out farther than the
24963 # opening sub brace:
24965 # perltidy -sot -sct
24967 # '<Control-f>' => sub {
24969 # my $e = $c->XEvent;
24970 # itemsUnderArea $c;
24973 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
24975 # The way to tell this is if the stacked sequence numbers
24976 # of this output line are the reverse of the stacked
24977 # sequence numbers of the previous non-blank line of
24978 # sequence numbers. So we can join if the previous
24979 # nonblank string of tokens is the mirror image. For
24980 # example if stack )}] is 13:8:6 then we are looking for a
24981 # leading stack like [{( which is 6:8:13 We only need to
24982 # check the two ends, because the intermediate tokens must
24983 # fall in order. Note on speed: having to split on colons
24984 # and eliminate multiple colons might appear to be slow,
24985 # but it's not an issue because we almost never come
24986 # through here. In a typical file we don't.
24987 $seqno_string =~ s/^:+//;
24988 $last_nonblank_seqno_string =~ s/^:+//;
24989 $seqno_string =~ s/:+/:/g;
24990 $last_nonblank_seqno_string =~ s/:+/:/g;
24992 # how many spaces can we outdent?
24994 $cached_line_leading_space_count - $leading_space_count;
24996 && length($seqno_string)
24997 && length($last_nonblank_seqno_string) ==
24998 length($seqno_string) )
25001 ( split ':', $last_nonblank_seqno_string );
25002 my @seqno_now = ( split ':', $seqno_string );
25003 if ( $seqno_now[-1] == $seqno_last[0]
25004 && $seqno_now[0] == $seqno_last[-1] )
25008 # for absolute safety, be sure we only remove
25010 my $ws = substr( $test_line, 0, $diff );
25011 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
25013 $test_line = substr( $test_line, $diff );
25014 $cached_line_leading_space_count -= $diff;
25015 $last_level_written =
25017 $cached_line_leading_space_count,
25018 $diff, $last_level_written );
25019 reduce_valign_buffer_indentation($diff);
25022 # shouldn't happen, but not critical:
25024 ## ERROR transferring indentation here
25031 $leading_string = "";
25032 $leading_space_count = $cached_line_leading_space_count;
25033 $level = $last_level_written;
25036 valign_output_step_C( $cached_line_text,
25037 $cached_line_leading_space_count,
25038 $last_level_written );
25042 $cached_line_type = 0;
25043 $cached_line_text = "";
25045 # make the line to be written
25046 my $line = $leading_string . $str;
25048 # write or cache this line
25049 if ( !$open_or_close || $side_comment_length > 0 ) {
25050 valign_output_step_C( $line, $leading_space_count, $level );
25053 $cached_line_text = $line;
25054 $cached_line_type = $open_or_close;
25055 $cached_line_flag = $tightness_flag;
25056 $cached_seqno = $seqno;
25057 $cached_line_valid = $valid;
25058 $cached_line_leading_space_count = $leading_space_count;
25059 $cached_seqno_string = $seqno_string;
25062 $last_level_written = $level;
25063 $last_side_comment_length = $side_comment_length;
25064 $extra_indent_ok = 0;
25068 sub valign_output_step_C {
25070 ###############################################################
25071 # This is Step C in writing vertically aligned lines.
25072 # Lines are either stored in a buffer or passed along to the next step.
25073 # The reason for storing lines is that we may later want to reduce their
25074 # indentation when -sot and -sct are both used.
25075 ###############################################################
25078 # Dump any saved lines if we see a line with an unbalanced opening or
25080 dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
25082 # Either store or write this line
25083 if ($valign_buffer_filling) {
25084 push @valign_buffer, [@args];
25087 valign_output_step_D(@args);
25090 # For lines starting or ending with opening or closing tokens..
25091 if ($seqno_string) {
25092 $last_nonblank_seqno_string = $seqno_string;
25094 # Start storing lines when we see a line with multiple stacked opening
25096 # patch for RT #94354, requested by Colin Williams
25097 if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
25100 # This test is efficient but a little subtle: The first test says
25101 # that we have multiple sequence numbers and hence multiple opening
25102 # or closing tokens in this line. The second part of the test
25103 # rejects stacked closing and ternary tokens. So if we get here
25104 # then we should have stacked unbalanced opening tokens.
25106 # Here is a complex example:
25108 # Foo($Bar[0], { # (side comment)
25112 # The first line has sequence 6::4. It does not begin with
25113 # a closing token or ternary, so it passes the test and must be
25114 # stacked opening tokens.
25116 # The last line has sequence 4:6 but is a stack of closing tokens,
25117 # so it gets rejected.
25119 # Note that the sequence number of an opening token for a qw quote
25120 # is a negative number and will be rejected.
25121 # For example, for the following line:
25122 # skip_symbols([qw(
25123 # $seqno_string='10:5:-1'. It would be okay to accept it but
25124 # I decided not to do this after testing.
25126 $valign_buffer_filling = $seqno_string;
25133 sub valign_output_step_D {
25135 ###############################################################
25136 # This is Step D in writing vertically aligned lines.
25137 # Write one vertically aligned line of code to the output object.
25138 ###############################################################
25140 my ( $line, $leading_space_count, $level ) = @_;
25142 # The line is currently correct if there is no tabbing (recommended!)
25143 # We may have to lop off some leading spaces and replace with tabs.
25144 if ( $leading_space_count > 0 ) {
25146 # Nothing to do if no tabs
25147 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
25148 || $rOpts_indent_columns <= 0 )
25154 # Handle entab option
25155 elsif ($rOpts_entab_leading_whitespace) {
25157 $leading_space_count % $rOpts_entab_leading_whitespace;
25159 int( $leading_space_count / $rOpts_entab_leading_whitespace );
25160 my $leading_string = "\t" x $tab_count . ' ' x $space_count;
25161 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
25162 substr( $line, 0, $leading_space_count ) = $leading_string;
25166 # shouldn't happen - program error counting whitespace
25168 VALIGN_DEBUG_FLAG_TABS
25170 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
25175 # Handle option of one tab per level
25177 my $leading_string = ( "\t" x $level );
25179 $leading_space_count - $level * $rOpts_indent_columns;
25181 # shouldn't happen:
25182 if ( $space_count < 0 ) {
25184 # But it could be an outdented comment
25185 if ( $line !~ /^\s*#/ ) {
25186 VALIGN_DEBUG_FLAG_TABS
25188 "Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
25191 $leading_string = ( ' ' x $leading_space_count );
25194 $leading_string .= ( ' ' x $space_count );
25196 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
25197 substr( $line, 0, $leading_space_count ) = $leading_string;
25201 # shouldn't happen - program error counting whitespace
25202 # we'll skip entabbing
25203 VALIGN_DEBUG_FLAG_TABS
25205 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
25210 $file_writer_object->write_code_line( $line . "\n" );
25214 { # begin get_leading_string
25216 my @leading_string_cache;
25218 sub get_leading_string {
25220 # define the leading whitespace string for this line..
25221 my $leading_whitespace_count = shift;
25223 # Handle case of zero whitespace, which includes multi-line quotes
25224 # (which may have a finite level; this prevents tab problems)
25225 if ( $leading_whitespace_count <= 0 ) {
25229 # look for previous result
25230 elsif ( $leading_string_cache[$leading_whitespace_count] ) {
25231 return $leading_string_cache[$leading_whitespace_count];
25234 # must compute a string for this number of spaces
25235 my $leading_string;
25237 # Handle simple case of no tabs
25238 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
25239 || $rOpts_indent_columns <= 0 )
25241 $leading_string = ( ' ' x $leading_whitespace_count );
25244 # Handle entab option
25245 elsif ($rOpts_entab_leading_whitespace) {
25247 $leading_whitespace_count % $rOpts_entab_leading_whitespace;
25248 my $tab_count = int(
25249 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
25250 $leading_string = "\t" x $tab_count . ' ' x $space_count;
25253 # Handle option of one tab per level
25255 $leading_string = ( "\t" x $group_level );
25257 $leading_whitespace_count - $group_level * $rOpts_indent_columns;
25259 # shouldn't happen:
25260 if ( $space_count < 0 ) {
25261 VALIGN_DEBUG_FLAG_TABS
25263 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
25266 # -- skip entabbing
25267 $leading_string = ( ' ' x $leading_whitespace_count );
25270 $leading_string .= ( ' ' x $space_count );
25273 $leading_string_cache[$leading_whitespace_count] = $leading_string;
25274 return $leading_string;
25276 } # end get_leading_string
25278 sub report_anything_unusual {
25280 if ( $outdented_line_count > 0 ) {
25281 write_logfile_entry(
25282 "$outdented_line_count long lines were outdented:\n");
25283 write_logfile_entry(
25284 " First at output line $first_outdented_line_at\n");
25286 if ( $outdented_line_count > 1 ) {
25287 write_logfile_entry(
25288 " Last at output line $last_outdented_line_at\n");
25290 write_logfile_entry(
25291 " use -noll to prevent outdenting, -l=n to increase line length\n"
25293 write_logfile_entry("\n");
25298 #####################################################################
25300 # the Perl::Tidy::FileWriter class writes the output file
25302 #####################################################################
25304 package Perl::Tidy::FileWriter;
25306 # Maximum number of little messages; probably need not be changed.
25307 use constant MAX_NAG_MESSAGES => 6;
25309 sub write_logfile_entry {
25310 my ( $self, $msg ) = @_;
25311 my $logger_object = $self->{_logger_object};
25312 if ($logger_object) {
25313 $logger_object->write_logfile_entry($msg);
25319 my ( $class, $line_sink_object, $rOpts, $logger_object ) = @_;
25322 _line_sink_object => $line_sink_object,
25323 _logger_object => $logger_object,
25325 _output_line_number => 1,
25326 _consecutive_blank_lines => 0,
25327 _consecutive_nonblank_lines => 0,
25328 _first_line_length_error => 0,
25329 _max_line_length_error => 0,
25330 _last_line_length_error => 0,
25331 _first_line_length_error_at => 0,
25332 _max_line_length_error_at => 0,
25333 _last_line_length_error_at => 0,
25334 _line_length_error_count => 0,
25335 _max_output_line_length => 0,
25336 _max_output_line_length_at => 0,
25342 $self->{_line_sink_object}->tee_on();
25348 $self->{_line_sink_object}->tee_off();
25352 sub get_output_line_number {
25354 return $self->{_output_line_number};
25357 sub decrement_output_line_number {
25359 $self->{_output_line_number}--;
25363 sub get_consecutive_nonblank_lines {
25365 return $self->{_consecutive_nonblank_lines};
25368 sub reset_consecutive_blank_lines {
25370 $self->{_consecutive_blank_lines} = 0;
25374 sub want_blank_line {
25376 unless ( $self->{_consecutive_blank_lines} ) {
25377 $self->write_blank_code_line();
25382 sub require_blank_code_lines {
25384 # write out the requested number of blanks regardless of the value of -mbl
25385 # unless -mbl=0. This allows extra blank lines to be written for subs and
25386 # packages even with the default -mbl=1
25387 my ( $self, $count ) = @_;
25388 my $need = $count - $self->{_consecutive_blank_lines};
25389 my $rOpts = $self->{_rOpts};
25390 my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
25391 foreach my $i ( 0 .. $need - 1 ) {
25392 $self->write_blank_code_line($forced);
25397 sub write_blank_code_line {
25399 my $forced = shift;
25400 my $rOpts = $self->{_rOpts};
25403 && $self->{_consecutive_blank_lines} >=
25404 $rOpts->{'maximum-consecutive-blank-lines'} );
25405 $self->{_consecutive_blank_lines}++;
25406 $self->{_consecutive_nonblank_lines} = 0;
25407 $self->write_line("\n");
25411 sub write_code_line {
25415 if ( $a =~ /^\s*$/ ) {
25416 my $rOpts = $self->{_rOpts};
25418 if ( $self->{_consecutive_blank_lines} >=
25419 $rOpts->{'maximum-consecutive-blank-lines'} );
25420 $self->{_consecutive_blank_lines}++;
25421 $self->{_consecutive_nonblank_lines} = 0;
25424 $self->{_consecutive_blank_lines} = 0;
25425 $self->{_consecutive_nonblank_lines}++;
25427 $self->write_line($a);
25432 my ( $self, $a ) = @_;
25434 # TODO: go through and see if the test is necessary here
25435 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
25437 $self->{_line_sink_object}->write_line($a);
25439 # This calculation of excess line length ignores any internal tabs
25440 my $rOpts = $self->{_rOpts};
25441 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
25442 if ( $a =~ /^\t+/g ) {
25443 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
25446 # Note that we just incremented output line number to future value
25447 # so we must subtract 1 for current line number
25448 if ( length($a) > 1 + $self->{_max_output_line_length} ) {
25449 $self->{_max_output_line_length} = length($a) - 1;
25450 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
25453 if ( $exceed > 0 ) {
25454 my $output_line_number = $self->{_output_line_number};
25455 $self->{_last_line_length_error} = $exceed;
25456 $self->{_last_line_length_error_at} = $output_line_number - 1;
25457 if ( $self->{_line_length_error_count} == 0 ) {
25458 $self->{_first_line_length_error} = $exceed;
25459 $self->{_first_line_length_error_at} = $output_line_number - 1;
25463 $self->{_last_line_length_error} > $self->{_max_line_length_error} )
25465 $self->{_max_line_length_error} = $exceed;
25466 $self->{_max_line_length_error_at} = $output_line_number - 1;
25469 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
25470 $self->write_logfile_entry(
25471 "Line length exceeded by $exceed characters\n");
25473 $self->{_line_length_error_count}++;
25478 sub report_line_length_errors {
25480 my $rOpts = $self->{_rOpts};
25481 my $line_length_error_count = $self->{_line_length_error_count};
25482 if ( $line_length_error_count == 0 ) {
25483 $self->write_logfile_entry(
25484 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
25485 my $max_output_line_length = $self->{_max_output_line_length};
25486 my $max_output_line_length_at = $self->{_max_output_line_length_at};
25487 $self->write_logfile_entry(
25488 " Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
25494 my $word = ( $line_length_error_count > 1 ) ? "s" : "";
25495 $self->write_logfile_entry(
25496 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
25499 $word = ( $line_length_error_count > 1 ) ? "First" : "";
25500 my $first_line_length_error = $self->{_first_line_length_error};
25501 my $first_line_length_error_at = $self->{_first_line_length_error_at};
25502 $self->write_logfile_entry(
25503 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
25506 if ( $line_length_error_count > 1 ) {
25507 my $max_line_length_error = $self->{_max_line_length_error};
25508 my $max_line_length_error_at = $self->{_max_line_length_error_at};
25509 my $last_line_length_error = $self->{_last_line_length_error};
25510 my $last_line_length_error_at = $self->{_last_line_length_error_at};
25511 $self->write_logfile_entry(
25512 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
25514 $self->write_logfile_entry(
25515 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
25522 #####################################################################
25524 # The Perl::Tidy::Debugger class shows line tokenization
25526 #####################################################################
25528 package Perl::Tidy::Debugger;
25532 my ( $class, $filename ) = @_;
25535 _debug_file => $filename,
25536 _debug_file_opened => 0,
25541 sub really_open_debug_file {
25544 my $debug_file = $self->{_debug_file};
25546 unless ( $fh = IO::File->new("> $debug_file") ) {
25547 Perl::Tidy::Warn("can't open $debug_file: $!\n");
25549 $self->{_debug_file_opened} = 1;
25550 $self->{_fh} = $fh;
25552 "Use -dump-token-types (-dtt) to get a list of token type codes\n";
25556 sub close_debug_file {
25559 my $fh = $self->{_fh};
25560 if ( $self->{_debug_file_opened} ) {
25561 eval { $self->{_fh}->close() };
25566 sub write_debug_entry {
25568 # This is a debug dump routine which may be modified as necessary
25569 # to dump tokens on a line-by-line basis. The output will be written
25570 # to the .DEBUG file when the -D flag is entered.
25571 my ( $self, $line_of_tokens ) = @_;
25573 my $input_line = $line_of_tokens->{_line_text};
25575 my $rtoken_type = $line_of_tokens->{_rtoken_type};
25576 my $rtokens = $line_of_tokens->{_rtokens};
25577 my $rlevels = $line_of_tokens->{_rlevels};
25578 my $rslevels = $line_of_tokens->{_rslevels};
25579 my $rblock_type = $line_of_tokens->{_rblock_type};
25581 my $input_line_number = $line_of_tokens->{_line_number};
25582 my $line_type = $line_of_tokens->{_line_type};
25583 ##my $rtoken_array = $line_of_tokens->{_token_array};
25587 my $token_str = "$input_line_number: ";
25588 my $reconstructed_original = "$input_line_number: ";
25589 my $block_str = "$input_line_number: ";
25591 #$token_str .= "$line_type: ";
25592 #$reconstructed_original .= "$line_type: ";
25595 my @next_char = ( '"', '"' );
25597 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
25598 my $fh = $self->{_fh};
25600 # FIXME: could convert to use of token_array instead
25601 foreach my $j ( 0 .. @{$rtoken_type} - 1 ) {
25604 if ( $rtoken_type->[$j] eq 'k' ) {
25605 $pattern .= $rtokens->[$j];
25608 $pattern .= $rtoken_type->[$j];
25610 $reconstructed_original .= $rtokens->[$j];
25611 $block_str .= "($rblock_type->[$j])";
25612 $num = length( $rtokens->[$j] );
25613 my $type_str = $rtoken_type->[$j];
25615 # be sure there are no blank tokens (shouldn't happen)
25616 # This can only happen if a programming error has been made
25617 # because all valid tokens are non-blank
25618 if ( $type_str eq ' ' ) {
25619 print $fh "BLANK TOKEN on the next line\n";
25620 $type_str = $next_char[$i_next];
25621 $i_next = 1 - $i_next;
25624 if ( length($type_str) == 1 ) {
25625 $type_str = $type_str x $num;
25627 $token_str .= $type_str;
25630 # Write what you want here ...
25631 # print $fh "$input_line\n";
25632 # print $fh "$pattern\n";
25633 print $fh "$reconstructed_original\n";
25634 print $fh "$token_str\n";
25636 #print $fh "$block_str\n";
25640 #####################################################################
25642 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
25643 # method for returning the next line to be parsed, as well as a
25644 # 'peek_ahead()' method
25646 # The input parameter is an object with a 'get_line()' method
25647 # which returns the next line to be parsed
25649 #####################################################################
25651 package Perl::Tidy::LineBuffer;
25655 my ( $class, $line_source_object ) = @_;
25658 _line_source_object => $line_source_object,
25659 _rlookahead_buffer => [],
25664 my ( $self, $buffer_index ) = @_;
25666 my $line_source_object = $self->{_line_source_object};
25667 my $rlookahead_buffer = $self->{_rlookahead_buffer};
25668 if ( $buffer_index < scalar( @{$rlookahead_buffer} ) ) {
25669 $line = $rlookahead_buffer->[$buffer_index];
25672 $line = $line_source_object->get_line();
25673 push( @{$rlookahead_buffer}, $line );
25681 my $line_source_object = $self->{_line_source_object};
25682 my $rlookahead_buffer = $self->{_rlookahead_buffer};
25684 if ( scalar( @{$rlookahead_buffer} ) ) {
25685 $line = shift @{$rlookahead_buffer};
25688 $line = $line_source_object->get_line();
25693 ########################################################################
25695 # the Perl::Tidy::Tokenizer package is essentially a filter which
25696 # reads lines of perl source code from a source object and provides
25697 # corresponding tokenized lines through its get_line() method. Lines
25698 # flow from the source_object to the caller like this:
25700 # source_object --> LineBuffer_object --> Tokenizer --> calling routine
25701 # get_line() get_line() get_line() line_of_tokens
25703 # The source object can be any object with a get_line() method which
25704 # supplies one line (a character string) perl call.
25705 # The LineBuffer object is created by the Tokenizer.
25706 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
25707 # containing one tokenized line for each call to its get_line() method.
25709 # WARNING: This is not a real class yet. Only one tokenizer my be used.
25711 ########################################################################
25713 package Perl::Tidy::Tokenizer;
25717 # Caution: these debug flags produce a lot of output
25718 # They should all be 0 except when debugging small scripts
25720 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
25721 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
25722 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
25723 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
25724 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
25726 my $debug_warning = sub {
25727 print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n";
25730 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
25731 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
25732 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
25733 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
25734 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
25740 # PACKAGE VARIABLES for processing an entire FILE.
25744 $last_nonblank_token
25745 $last_nonblank_type
25746 $last_nonblank_block_type
25754 %user_function_prototype
25756 %is_block_list_function
25757 %saw_function_definition
25761 $square_bracket_depth
25766 @nesting_sequence_number
25767 @current_sequence_number
25769 @paren_semicolon_count
25770 @paren_structural_type
25772 @brace_structural_type
25775 @square_bracket_type
25776 @square_bracket_structural_type
25778 @nested_ternary_flag
25779 @nested_statement_type
25780 @starting_line_of_current_depth
25783 # GLOBAL CONSTANTS for routines in this package
25785 %is_indirect_object_taker
25787 %expecting_operator_token
25788 %expecting_operator_types
25789 %expecting_term_types
25790 %expecting_term_token
25792 %is_file_test_operator
25795 %is_valid_token_type
25797 %is_code_block_token
25799 @opening_brace_names
25800 @closing_brace_names
25801 %is_keyword_taking_list
25802 %is_q_qq_qw_qx_qr_s_y_tr_m
25805 # possible values of operator_expected()
25806 use constant TERM => -1;
25807 use constant UNKNOWN => 0;
25808 use constant OPERATOR => 1;
25810 # possible values of context
25811 use constant SCALAR_CONTEXT => -1;
25812 use constant UNKNOWN_CONTEXT => 0;
25813 use constant LIST_CONTEXT => 1;
25815 # Maximum number of little messages; probably need not be changed.
25816 use constant MAX_NAG_MESSAGES => 6;
25820 # methods to count instances
25822 sub get_count { return $_count; }
25823 sub _increment_count { return ++$_count }
25824 sub _decrement_count { return --$_count }
25829 $self->_decrement_count();
25837 # Note: 'tabs' and 'indent_columns' are temporary and should be
25840 source_object => undef,
25841 debugger_object => undef,
25842 diagnostics_object => undef,
25843 logger_object => undef,
25844 starting_level => undef,
25845 indent_columns => 4,
25847 look_for_hash_bang => 0,
25849 look_for_autoloader => 1,
25850 look_for_selfloader => 1,
25851 starting_line_number => 1,
25852 extended_syntax => 0,
25854 my %args = ( %defaults, @_ );
25856 # we are given an object with a get_line() method to supply source lines
25857 my $source_object = $args{source_object};
25859 # we create another object with a get_line() and peek_ahead() method
25860 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
25862 # Tokenizer state data is as follows:
25863 # _rhere_target_list reference to list of here-doc targets
25864 # _here_doc_target the target string for a here document
25865 # _here_quote_character the type of here-doc quoting (" ' ` or none)
25866 # to determine if interpolation is done
25867 # _quote_target character we seek if chasing a quote
25868 # _line_start_quote line where we started looking for a long quote
25869 # _in_here_doc flag indicating if we are in a here-doc
25870 # _in_pod flag set if we are in pod documentation
25871 # _in_error flag set if we saw severe error (binary in script)
25872 # _in_data flag set if we are in __DATA__ section
25873 # _in_end flag set if we are in __END__ section
25874 # _in_format flag set if we are in a format description
25875 # _in_attribute_list flag telling if we are looking for attributes
25876 # _in_quote flag telling if we are chasing a quote
25877 # _starting_level indentation level of first line
25878 # _line_buffer_object object with get_line() method to supply source code
25879 # _diagnostics_object place to write debugging information
25880 # _unexpected_error_count error count used to limit output
25881 # _lower_case_labels_at line numbers where lower case labels seen
25882 # _hit_bug program bug detected
25883 $tokenizer_self = {
25884 _rhere_target_list => [],
25886 _here_doc_target => "",
25887 _here_quote_character => "",
25893 _in_attribute_list => 0,
25895 _quote_target => "",
25896 _line_start_quote => -1,
25897 _starting_level => $args{starting_level},
25898 _know_starting_level => defined( $args{starting_level} ),
25899 _tabsize => $args{tabsize},
25900 _indent_columns => $args{indent_columns},
25901 _look_for_hash_bang => $args{look_for_hash_bang},
25902 _trim_qw => $args{trim_qw},
25903 _continuation_indentation => $args{continuation_indentation},
25904 _outdent_labels => $args{outdent_labels},
25905 _last_line_number => $args{starting_line_number} - 1,
25906 _saw_perl_dash_P => 0,
25907 _saw_perl_dash_w => 0,
25908 _saw_use_strict => 0,
25909 _saw_v_string => 0,
25911 _look_for_autoloader => $args{look_for_autoloader},
25912 _look_for_selfloader => $args{look_for_selfloader},
25913 _saw_autoloader => 0,
25914 _saw_selfloader => 0,
25915 _saw_hash_bang => 0,
25918 _saw_negative_indentation => 0,
25919 _started_tokenizing => 0,
25920 _line_buffer_object => $line_buffer_object,
25921 _debugger_object => $args{debugger_object},
25922 _diagnostics_object => $args{diagnostics_object},
25923 _logger_object => $args{logger_object},
25924 _unexpected_error_count => 0,
25925 _started_looking_for_here_target_at => 0,
25926 _nearly_matched_here_target_at => undef,
25928 _rlower_case_labels_at => undef,
25929 _extended_syntax => $args{extended_syntax},
25932 prepare_for_a_new_file();
25933 find_starting_indentation_level();
25935 bless $tokenizer_self, $class;
25937 # This is not a full class yet, so die if an attempt is made to
25938 # create more than one object.
25940 if ( _increment_count() > 1 ) {
25942 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
25945 return $tokenizer_self;
25949 # interface to Perl::Tidy::Logger routines
25952 my $logger_object = $tokenizer_self->{_logger_object};
25953 if ($logger_object) {
25954 $logger_object->warning($msg);
25961 my $logger_object = $tokenizer_self->{_logger_object};
25962 if ($logger_object) {
25963 $logger_object->complain($msg);
25968 sub write_logfile_entry {
25970 my $logger_object = $tokenizer_self->{_logger_object};
25971 if ($logger_object) {
25972 $logger_object->write_logfile_entry($msg);
25977 sub interrupt_logfile {
25978 my $logger_object = $tokenizer_self->{_logger_object};
25979 if ($logger_object) {
25980 $logger_object->interrupt_logfile();
25985 sub resume_logfile {
25986 my $logger_object = $tokenizer_self->{_logger_object};
25987 if ($logger_object) {
25988 $logger_object->resume_logfile();
25993 sub increment_brace_error {
25994 my $logger_object = $tokenizer_self->{_logger_object};
25995 if ($logger_object) {
25996 $logger_object->increment_brace_error();
26001 sub report_definite_bug {
26002 $tokenizer_self->{_hit_bug} = 1;
26003 my $logger_object = $tokenizer_self->{_logger_object};
26004 if ($logger_object) {
26005 $logger_object->report_definite_bug();
26010 sub brace_warning {
26012 my $logger_object = $tokenizer_self->{_logger_object};
26013 if ($logger_object) {
26014 $logger_object->brace_warning($msg);
26019 sub get_saw_brace_error {
26020 my $logger_object = $tokenizer_self->{_logger_object};
26021 if ($logger_object) {
26022 return $logger_object->get_saw_brace_error();
26029 # interface to Perl::Tidy::Diagnostics routines
26030 sub write_diagnostics {
26032 if ( $tokenizer_self->{_diagnostics_object} ) {
26033 $tokenizer_self->{_diagnostics_object}->write_diagnostics($msg);
26038 sub report_tokenization_errors {
26041 my $severe_error = $self->{_in_error};
26043 my $level = get_indentation_level();
26044 if ( $level != $tokenizer_self->{_starting_level} ) {
26045 warning("final indentation level: $level\n");
26048 check_final_nesting_depths();
26050 if ( $tokenizer_self->{_look_for_hash_bang}
26051 && !$tokenizer_self->{_saw_hash_bang} )
26054 "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
26057 if ( $tokenizer_self->{_in_format} ) {
26058 warning("hit EOF while in format description\n");
26061 if ( $tokenizer_self->{_in_pod} ) {
26063 # Just write log entry if this is after __END__ or __DATA__
26064 # because this happens to often, and it is not likely to be
26066 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
26067 write_logfile_entry(
26068 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
26074 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
26080 if ( $tokenizer_self->{_in_here_doc} ) {
26082 my $here_doc_target = $tokenizer_self->{_here_doc_target};
26083 my $started_looking_for_here_target_at =
26084 $tokenizer_self->{_started_looking_for_here_target_at};
26085 if ($here_doc_target) {
26087 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
26092 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
26095 my $nearly_matched_here_target_at =
26096 $tokenizer_self->{_nearly_matched_here_target_at};
26097 if ($nearly_matched_here_target_at) {
26099 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
26104 if ( $tokenizer_self->{_in_quote} ) {
26106 my $line_start_quote = $tokenizer_self->{_line_start_quote};
26107 my $quote_target = $tokenizer_self->{_quote_target};
26109 ( $tokenizer_self->{_in_attribute_list} )
26113 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
26117 if ( $tokenizer_self->{_hit_bug} ) {
26121 my $logger_object = $tokenizer_self->{_logger_object};
26123 # TODO: eventually may want to activate this to cause file to be output verbatim
26126 # Set the severe error for a fairly high warning count because
26127 # some of the warnings do not harm formatting, such as duplicate
26129 my $warning_count = $logger_object->{_warning_count};
26130 if ( $warning_count > 50 ) {
26134 # Brace errors are significant, so set the severe error flag at
26136 my $saw_brace_error = $logger_object->{_saw_brace_error};
26137 if ( $saw_brace_error > 2 ) {
26142 unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
26143 if ( $] < 5.006 ) {
26144 write_logfile_entry("Suggest including '-w parameter'\n");
26147 write_logfile_entry("Suggest including 'use warnings;'\n");
26151 if ( $tokenizer_self->{_saw_perl_dash_P} ) {
26152 write_logfile_entry("Use of -P parameter for defines is discouraged\n");
26155 unless ( $tokenizer_self->{_saw_use_strict} ) {
26156 write_logfile_entry("Suggest including 'use strict;'\n");
26159 # it is suggested that labels have at least one upper case character
26160 # for legibility and to avoid code breakage as new keywords are introduced
26161 if ( $tokenizer_self->{_rlower_case_labels_at} ) {
26162 my @lower_case_labels_at =
26163 @{ $tokenizer_self->{_rlower_case_labels_at} };
26164 write_logfile_entry(
26165 "Suggest using upper case characters in label(s)\n");
26167 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
26169 return $severe_error;
26172 sub report_v_string {
26174 # warn if this version can't handle v-strings
26176 unless ( $tokenizer_self->{_saw_v_string} ) {
26177 $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
26179 if ( $] < 5.006 ) {
26181 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
26187 sub get_input_line_number {
26188 return $tokenizer_self->{_last_line_number};
26191 # returns the next tokenized line
26196 # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
26197 # $square_bracket_depth, $paren_depth
26199 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
26200 $tokenizer_self->{_line_text} = $input_line;
26202 return unless ($input_line);
26204 my $input_line_number = ++$tokenizer_self->{_last_line_number};
26206 # Find and remove what characters terminate this line, including any
26208 my $input_line_separator = "";
26209 if ( chomp($input_line) ) { $input_line_separator = $/ }
26211 # TODO: what other characters should be included here?
26212 if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
26213 $input_line_separator = $2 . $input_line_separator;
26216 # for backwards compatibility we keep the line text terminated with
26217 # a newline character
26218 $input_line .= "\n";
26219 $tokenizer_self->{_line_text} = $input_line; # update
26221 # create a data structure describing this line which will be
26222 # returned to the caller.
26224 # _line_type codes are:
26225 # SYSTEM - system-specific code before hash-bang line
26226 # CODE - line of perl code (including comments)
26227 # POD_START - line starting pod, such as '=head'
26228 # POD - pod documentation text
26229 # POD_END - last line of pod section, '=cut'
26230 # HERE - text of here-document
26231 # HERE_END - last line of here-doc (target word)
26232 # FORMAT - format section
26233 # FORMAT_END - last line of format section, '.'
26234 # DATA_START - __DATA__ line
26235 # DATA - unidentified text following __DATA__
26236 # END_START - __END__ line
26237 # END - unidentified text following __END__
26238 # ERROR - we are in big trouble, probably not a perl script
26241 # _curly_brace_depth - depth of curly braces at start of line
26242 # _square_bracket_depth - depth of square brackets at start of line
26243 # _paren_depth - depth of parens at start of line
26244 # _starting_in_quote - this line continues a multi-line quote
26245 # (so don't trim leading blanks!)
26246 # _ending_in_quote - this line ends in a multi-line quote
26247 # (so don't trim trailing blanks!)
26248 my $line_of_tokens = {
26249 _line_type => 'EOF',
26250 _line_text => $input_line,
26251 _line_number => $input_line_number,
26252 _rtoken_type => undef,
26255 _rslevels => undef,
26256 _rblock_type => undef,
26257 _rcontainer_type => undef,
26258 _rcontainer_environment => undef,
26259 _rtype_sequence => undef,
26260 _rnesting_tokens => undef,
26261 _rci_levels => undef,
26262 _rnesting_blocks => undef,
26263 _guessed_indentation_level => 0,
26264 _starting_in_quote => 0, # to be set by subroutine
26265 _ending_in_quote => 0,
26266 _curly_brace_depth => $brace_depth,
26267 _square_bracket_depth => $square_bracket_depth,
26268 _paren_depth => $paren_depth,
26269 _quote_character => '',
26272 # must print line unchanged if we are in a here document
26273 if ( $tokenizer_self->{_in_here_doc} ) {
26275 $line_of_tokens->{_line_type} = 'HERE';
26276 my $here_doc_target = $tokenizer_self->{_here_doc_target};
26277 my $here_quote_character = $tokenizer_self->{_here_quote_character};
26278 my $candidate_target = $input_line;
26279 chomp $candidate_target;
26281 # Handle <<~ targets, which are indicated here by a leading space on
26282 # the here quote character
26283 if ( $here_quote_character =~ /^\s/ ) {
26284 $candidate_target =~ s/^\s*//;
26286 if ( $candidate_target eq $here_doc_target ) {
26287 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
26288 $line_of_tokens->{_line_type} = 'HERE_END';
26289 write_logfile_entry("Exiting HERE document $here_doc_target\n");
26291 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
26292 if ( @{$rhere_target_list} ) { # there can be multiple here targets
26293 ( $here_doc_target, $here_quote_character ) =
26294 @{ shift @{$rhere_target_list} };
26295 $tokenizer_self->{_here_doc_target} = $here_doc_target;
26296 $tokenizer_self->{_here_quote_character} =
26297 $here_quote_character;
26298 write_logfile_entry(
26299 "Entering HERE document $here_doc_target\n");
26300 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
26301 $tokenizer_self->{_started_looking_for_here_target_at} =
26302 $input_line_number;
26305 $tokenizer_self->{_in_here_doc} = 0;
26306 $tokenizer_self->{_here_doc_target} = "";
26307 $tokenizer_self->{_here_quote_character} = "";
26311 # check for error of extra whitespace
26312 # note for PERL6: leading whitespace is allowed
26314 $candidate_target =~ s/\s*$//;
26315 $candidate_target =~ s/^\s*//;
26316 if ( $candidate_target eq $here_doc_target ) {
26317 $tokenizer_self->{_nearly_matched_here_target_at} =
26318 $input_line_number;
26321 return $line_of_tokens;
26324 # must print line unchanged if we are in a format section
26325 elsif ( $tokenizer_self->{_in_format} ) {
26327 if ( $input_line =~ /^\.[\s#]*$/ ) {
26328 write_logfile_entry("Exiting format section\n");
26329 $tokenizer_self->{_in_format} = 0;
26330 $line_of_tokens->{_line_type} = 'FORMAT_END';
26333 $line_of_tokens->{_line_type} = 'FORMAT';
26335 return $line_of_tokens;
26338 # must print line unchanged if we are in pod documentation
26339 elsif ( $tokenizer_self->{_in_pod} ) {
26341 $line_of_tokens->{_line_type} = 'POD';
26342 if ( $input_line =~ /^=cut/ ) {
26343 $line_of_tokens->{_line_type} = 'POD_END';
26344 write_logfile_entry("Exiting POD section\n");
26345 $tokenizer_self->{_in_pod} = 0;
26347 if ( $input_line =~ /^\#\!.*perl\b/ ) {
26349 "Hash-bang in pod can cause older versions of perl to fail! \n"
26353 return $line_of_tokens;
26356 # must print line unchanged if we have seen a severe error (i.e., we
26357 # are seeing illegal tokens and cannot continue. Syntax errors do
26358 # not pass this route). Calling routine can decide what to do, but
26359 # the default can be to just pass all lines as if they were after __END__
26360 elsif ( $tokenizer_self->{_in_error} ) {
26361 $line_of_tokens->{_line_type} = 'ERROR';
26362 return $line_of_tokens;
26365 # print line unchanged if we are __DATA__ section
26366 elsif ( $tokenizer_self->{_in_data} ) {
26368 # ...but look for POD
26369 # Note that the _in_data and _in_end flags remain set
26370 # so that we return to that state after seeing the
26371 # end of a pod section
26372 if ( $input_line =~ /^=(?!cut)/ ) {
26373 $line_of_tokens->{_line_type} = 'POD_START';
26374 write_logfile_entry("Entering POD section\n");
26375 $tokenizer_self->{_in_pod} = 1;
26376 return $line_of_tokens;
26379 $line_of_tokens->{_line_type} = 'DATA';
26380 return $line_of_tokens;
26384 # print line unchanged if we are in __END__ section
26385 elsif ( $tokenizer_self->{_in_end} ) {
26387 # ...but look for POD
26388 # Note that the _in_data and _in_end flags remain set
26389 # so that we return to that state after seeing the
26390 # end of a pod section
26391 if ( $input_line =~ /^=(?!cut)/ ) {
26392 $line_of_tokens->{_line_type} = 'POD_START';
26393 write_logfile_entry("Entering POD section\n");
26394 $tokenizer_self->{_in_pod} = 1;
26395 return $line_of_tokens;
26398 $line_of_tokens->{_line_type} = 'END';
26399 return $line_of_tokens;
26403 # check for a hash-bang line if we haven't seen one
26404 if ( !$tokenizer_self->{_saw_hash_bang} ) {
26405 if ( $input_line =~ /^\#\!.*perl\b/ ) {
26406 $tokenizer_self->{_saw_hash_bang} = $input_line_number;
26408 # check for -w and -P flags
26409 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
26410 $tokenizer_self->{_saw_perl_dash_P} = 1;
26413 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
26414 $tokenizer_self->{_saw_perl_dash_w} = 1;
26418 ( $input_line_number > 1 )
26420 # leave any hash bang in a BEGIN block alone
26421 # i.e. see 'debugger-duck_type.t'
26423 $last_nonblank_block_type
26424 && $last_nonblank_block_type eq 'BEGIN'
26426 && ( !$tokenizer_self->{_look_for_hash_bang} )
26430 # this is helpful for VMS systems; we may have accidentally
26431 # tokenized some DCL commands
26432 if ( $tokenizer_self->{_started_tokenizing} ) {
26434 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
26438 complain("Useless hash-bang after line 1\n");
26442 # Report the leading hash-bang as a system line
26443 # This will prevent -dac from deleting it
26445 $line_of_tokens->{_line_type} = 'SYSTEM';
26446 return $line_of_tokens;
26451 # wait for a hash-bang before parsing if the user invoked us with -x
26452 if ( $tokenizer_self->{_look_for_hash_bang}
26453 && !$tokenizer_self->{_saw_hash_bang} )
26455 $line_of_tokens->{_line_type} = 'SYSTEM';
26456 return $line_of_tokens;
26459 # a first line of the form ': #' will be marked as SYSTEM
26460 # since lines of this form may be used by tcsh
26461 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
26462 $line_of_tokens->{_line_type} = 'SYSTEM';
26463 return $line_of_tokens;
26466 # now we know that it is ok to tokenize the line...
26467 # the line tokenizer will modify any of these private variables:
26468 # _rhere_target_list
26475 my $ending_in_quote_last = $tokenizer_self->{_in_quote};
26476 tokenize_this_line($line_of_tokens);
26478 # Now finish defining the return structure and return it
26479 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
26481 # handle severe error (binary data in script)
26482 if ( $tokenizer_self->{_in_error} ) {
26483 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages
26484 warning("Giving up after error\n");
26485 $line_of_tokens->{_line_type} = 'ERROR';
26486 reset_indentation_level(0); # avoid error messages
26487 return $line_of_tokens;
26490 # handle start of pod documentation
26491 if ( $tokenizer_self->{_in_pod} ) {
26493 # This gets tricky..above a __DATA__ or __END__ section, perl
26494 # accepts '=cut' as the start of pod section. But afterwards,
26495 # only pod utilities see it and they may ignore an =cut without
26496 # leading =head. In any case, this isn't good.
26497 if ( $input_line =~ /^=cut\b/ ) {
26498 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
26499 complain("=cut while not in pod ignored\n");
26500 $tokenizer_self->{_in_pod} = 0;
26501 $line_of_tokens->{_line_type} = 'POD_END';
26504 $line_of_tokens->{_line_type} = 'POD_START';
26506 "=cut starts a pod section .. this can fool pod utilities.\n"
26508 write_logfile_entry("Entering POD section\n");
26513 $line_of_tokens->{_line_type} = 'POD_START';
26514 write_logfile_entry("Entering POD section\n");
26517 return $line_of_tokens;
26520 # update indentation levels for log messages
26521 if ( $input_line !~ /^\s*$/ ) {
26522 my $rlevels = $line_of_tokens->{_rlevels};
26523 $line_of_tokens->{_guessed_indentation_level} =
26524 guess_old_indentation_level($input_line);
26527 # see if this line contains here doc targets
26528 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
26529 if ( @{$rhere_target_list} ) {
26531 my ( $here_doc_target, $here_quote_character ) =
26532 @{ shift @{$rhere_target_list} };
26533 $tokenizer_self->{_in_here_doc} = 1;
26534 $tokenizer_self->{_here_doc_target} = $here_doc_target;
26535 $tokenizer_self->{_here_quote_character} = $here_quote_character;
26536 write_logfile_entry("Entering HERE document $here_doc_target\n");
26537 $tokenizer_self->{_started_looking_for_here_target_at} =
26538 $input_line_number;
26541 # NOTE: __END__ and __DATA__ statements are written unformatted
26542 # because they can theoretically contain additional characters
26543 # which are not tokenized (and cannot be read with <DATA> either!).
26544 if ( $tokenizer_self->{_in_data} ) {
26545 $line_of_tokens->{_line_type} = 'DATA_START';
26546 write_logfile_entry("Starting __DATA__ section\n");
26547 $tokenizer_self->{_saw_data} = 1;
26549 # keep parsing after __DATA__ if use SelfLoader was seen
26550 if ( $tokenizer_self->{_saw_selfloader} ) {
26551 $tokenizer_self->{_in_data} = 0;
26552 write_logfile_entry(
26553 "SelfLoader seen, continuing; -nlsl deactivates\n");
26556 return $line_of_tokens;
26559 elsif ( $tokenizer_self->{_in_end} ) {
26560 $line_of_tokens->{_line_type} = 'END_START';
26561 write_logfile_entry("Starting __END__ section\n");
26562 $tokenizer_self->{_saw_end} = 1;
26564 # keep parsing after __END__ if use AutoLoader was seen
26565 if ( $tokenizer_self->{_saw_autoloader} ) {
26566 $tokenizer_self->{_in_end} = 0;
26567 write_logfile_entry(
26568 "AutoLoader seen, continuing; -nlal deactivates\n");
26570 return $line_of_tokens;
26573 # now, finally, we know that this line is type 'CODE'
26574 $line_of_tokens->{_line_type} = 'CODE';
26576 # remember if we have seen any real code
26577 if ( !$tokenizer_self->{_started_tokenizing}
26578 && $input_line !~ /^\s*$/
26579 && $input_line !~ /^\s*#/ )
26581 $tokenizer_self->{_started_tokenizing} = 1;
26584 if ( $tokenizer_self->{_debugger_object} ) {
26585 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
26588 # Note: if keyword 'format' occurs in this line code, it is still CODE
26589 # (keyword 'format' need not start a line)
26590 if ( $tokenizer_self->{_in_format} ) {
26591 write_logfile_entry("Entering format section\n");
26594 if ( $tokenizer_self->{_in_quote}
26595 and ( $tokenizer_self->{_line_start_quote} < 0 ) )
26598 #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
26600 ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
26602 $tokenizer_self->{_line_start_quote} = $input_line_number;
26603 write_logfile_entry(
26604 "Start multi-line quote or pattern ending in $quote_target\n");
26607 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
26608 && !$tokenizer_self->{_in_quote} )
26610 $tokenizer_self->{_line_start_quote} = -1;
26611 write_logfile_entry("End of multi-line quote or pattern\n");
26614 # we are returning a line of CODE
26615 return $line_of_tokens;
26618 sub find_starting_indentation_level {
26620 # We need to find the indentation level of the first line of the
26621 # script being formatted. Often it will be zero for an entire file,
26622 # but if we are formatting a local block of code (within an editor for
26623 # example) it may not be zero. The user may specify this with the
26624 # -sil=n parameter but normally doesn't so we have to guess.
26626 # USES GLOBAL VARIABLES: $tokenizer_self
26627 my $starting_level = 0;
26629 # use value if given as parameter
26630 if ( $tokenizer_self->{_know_starting_level} ) {
26631 $starting_level = $tokenizer_self->{_starting_level};
26634 # if we know there is a hash_bang line, the level must be zero
26635 elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
26636 $tokenizer_self->{_know_starting_level} = 1;
26639 # otherwise figure it out from the input file
26644 # keep looking at lines until we find a hash bang or piece of code
26647 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
26650 # if first line is #! then assume starting level is zero
26651 if ( $i == 1 && $line =~ /^\#\!/ ) {
26652 $starting_level = 0;
26655 next if ( $line =~ /^\s*#/ ); # skip past comments
26656 next if ( $line =~ /^\s*$/ ); # skip past blank lines
26657 $starting_level = guess_old_indentation_level($line);
26660 $msg = "Line $i implies starting-indentation-level = $starting_level\n";
26661 write_logfile_entry("$msg");
26663 $tokenizer_self->{_starting_level} = $starting_level;
26664 reset_indentation_level($starting_level);
26668 sub guess_old_indentation_level {
26671 # Guess the indentation level of an input line.
26673 # For the first line of code this result will define the starting
26674 # indentation level. It will mainly be non-zero when perltidy is applied
26675 # within an editor to a local block of code.
26677 # This is an impossible task in general because we can't know what tabs
26678 # meant for the old script and how many spaces were used for one
26679 # indentation level in the given input script. For example it may have
26680 # been previously formatted with -i=7 -et=3. But we can at least try to
26681 # make sure that perltidy guesses correctly if it is applied repeatedly to
26682 # a block of code within an editor, so that the block stays at the same
26683 # level when perltidy is applied repeatedly.
26685 # USES GLOBAL VARIABLES: $tokenizer_self
26688 # find leading tabs, spaces, and any statement label
26690 if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
26692 # If there are leading tabs, we use the tab scheme for this run, if
26693 # any, so that the code will remain stable when editing.
26694 if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} }
26696 if ($2) { $spaces += length($2) }
26698 # correct for outdented labels
26699 if ( $3 && $tokenizer_self->{'_outdent_labels'} ) {
26700 $spaces += $tokenizer_self->{_continuation_indentation};
26704 # compute indentation using the value of -i for this run.
26705 # If -i=0 is used for this run (which is possible) it doesn't matter
26706 # what we do here but we'll guess that the old run used 4 spaces per level.
26707 my $indent_columns = $tokenizer_self->{_indent_columns};
26708 $indent_columns = 4 if ( !$indent_columns );
26709 $level = int( $spaces / $indent_columns );
26713 # This is a currently unused debug routine
26714 sub dump_functions {
26717 foreach my $pkg ( keys %is_user_function ) {
26718 print $fh "\nnon-constant subs in package $pkg\n";
26720 foreach my $sub ( keys %{ $is_user_function{$pkg} } ) {
26722 if ( $is_block_list_function{$pkg}{$sub} ) {
26723 $msg = 'block_list';
26726 if ( $is_block_function{$pkg}{$sub} ) {
26729 print $fh "$sub $msg\n";
26733 foreach my $pkg ( keys %is_constant ) {
26734 print $fh "\nconstants and constant subs in package $pkg\n";
26736 foreach my $sub ( keys %{ $is_constant{$pkg} } ) {
26737 print $fh "$sub\n";
26745 # count number of 1's in a string of 1's and 0's
26746 # example: ones_count("010101010101") gives 6
26748 return $str =~ tr/1/0/;
26751 sub prepare_for_a_new_file {
26753 # previous tokens needed to determine what to expect next
26754 $last_nonblank_token = ';'; # the only possible starting state which
26755 $last_nonblank_type = ';'; # will make a leading brace a code block
26756 $last_nonblank_block_type = '';
26758 # scalars for remembering statement types across multiple lines
26759 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
26760 $in_attribute_list = 0;
26762 # scalars for remembering where we are in the file
26763 $current_package = "main";
26764 $context = UNKNOWN_CONTEXT;
26766 # hashes used to remember function information
26767 %is_constant = (); # user-defined constants
26768 %is_user_function = (); # user-defined functions
26769 %user_function_prototype = (); # their prototypes
26770 %is_block_function = ();
26771 %is_block_list_function = ();
26772 %saw_function_definition = ();
26774 # variables used to track depths of various containers
26775 # and report nesting errors
26778 $square_bracket_depth = 0;
26779 @current_depth[ 0 .. $#closing_brace_names ] =
26780 (0) x scalar @closing_brace_names;
26783 @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
26784 ( 0 .. $#closing_brace_names );
26785 @current_sequence_number = ();
26786 $paren_type[$paren_depth] = '';
26787 $paren_semicolon_count[$paren_depth] = 0;
26788 $paren_structural_type[$brace_depth] = '';
26789 $brace_type[$brace_depth] = ';'; # identify opening brace as code block
26790 $brace_structural_type[$brace_depth] = '';
26791 $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
26792 $brace_package[$paren_depth] = $current_package;
26793 $square_bracket_type[$square_bracket_depth] = '';
26794 $square_bracket_structural_type[$square_bracket_depth] = '';
26796 initialize_tokenizer_state();
26800 { # begin tokenize_this_line
26802 use constant BRACE => 0;
26803 use constant SQUARE_BRACKET => 1;
26804 use constant PAREN => 2;
26805 use constant QUESTION_COLON => 3;
26807 # TV1: scalars for processing one LINE.
26808 # Re-initialized on each entry to sub tokenize_this_line.
26810 $block_type, $container_type, $expecting,
26811 $i, $i_tok, $input_line,
26812 $input_line_number, $last_nonblank_i, $max_token_index,
26813 $next_tok, $next_type, $peeked_ahead,
26814 $prototype, $rhere_target_list, $rtoken_map,
26815 $rtoken_type, $rtokens, $tok,
26816 $type, $type_sequence, $indent_flag,
26819 # TV2: refs to ARRAYS for processing one LINE
26820 # Re-initialized on each call.
26821 my $routput_token_list = []; # stack of output token indexes
26822 my $routput_token_type = []; # token types
26823 my $routput_block_type = []; # types of code block
26824 my $routput_container_type = []; # paren types, such as if, elsif, ..
26825 my $routput_type_sequence = []; # nesting sequential number
26826 my $routput_indent_flag = []; #
26828 # TV3: SCALARS for quote variables. These are initialized with a
26829 # subroutine call and continually updated as lines are processed.
26830 my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
26831 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
26833 # TV4: SCALARS for multi-line identifiers and
26834 # statements. These are initialized with a subroutine call
26835 # and continually updated as lines are processed.
26836 my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
26838 # TV5: SCALARS for tracking indentation level.
26839 # Initialized once and continually updated as lines are
26842 $nesting_token_string, $nesting_type_string,
26843 $nesting_block_string, $nesting_block_flag,
26844 $nesting_list_string, $nesting_list_flag,
26845 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
26846 $in_statement_continuation, $level_in_tokenizer,
26847 $slevel_in_tokenizer, $rslevel_stack,
26850 # TV6: SCALARS for remembering several previous
26851 # tokens. Initialized once and continually updated as
26852 # lines are processed.
26854 $last_nonblank_container_type, $last_nonblank_type_sequence,
26855 $last_last_nonblank_token, $last_last_nonblank_type,
26856 $last_last_nonblank_block_type, $last_last_nonblank_container_type,
26857 $last_last_nonblank_type_sequence, $last_nonblank_prototype,
26860 # ----------------------------------------------------------------
26861 # beginning of tokenizer variable access and manipulation routines
26862 # ----------------------------------------------------------------
26864 sub initialize_tokenizer_state {
26866 # TV1: initialized on each call
26867 # TV2: initialized on each call
26871 $quote_character = "";
26874 $quoted_string_1 = "";
26875 $quoted_string_2 = "";
26876 $allowed_quote_modifiers = "";
26879 $id_scan_state = '';
26882 $indented_if_level = 0;
26885 $nesting_token_string = "";
26886 $nesting_type_string = "";
26887 $nesting_block_string = '1'; # initially in a block
26888 $nesting_block_flag = 1;
26889 $nesting_list_string = '0'; # initially not in a list
26890 $nesting_list_flag = 0; # initially not in a list
26891 $ci_string_in_tokenizer = "";
26892 $continuation_string_in_tokenizer = "0";
26893 $in_statement_continuation = 0;
26894 $level_in_tokenizer = 0;
26895 $slevel_in_tokenizer = 0;
26896 $rslevel_stack = [];
26899 $last_nonblank_container_type = '';
26900 $last_nonblank_type_sequence = '';
26901 $last_last_nonblank_token = ';';
26902 $last_last_nonblank_type = ';';
26903 $last_last_nonblank_block_type = '';
26904 $last_last_nonblank_container_type = '';
26905 $last_last_nonblank_type_sequence = '';
26906 $last_nonblank_prototype = "";
26910 sub save_tokenizer_state {
26913 $block_type, $container_type, $expecting,
26914 $i, $i_tok, $input_line,
26915 $input_line_number, $last_nonblank_i, $max_token_index,
26916 $next_tok, $next_type, $peeked_ahead,
26917 $prototype, $rhere_target_list, $rtoken_map,
26918 $rtoken_type, $rtokens, $tok,
26919 $type, $type_sequence, $indent_flag,
26923 $routput_token_list, $routput_token_type,
26924 $routput_block_type, $routput_container_type,
26925 $routput_type_sequence, $routput_indent_flag,
26929 $in_quote, $quote_type,
26930 $quote_character, $quote_pos,
26931 $quote_depth, $quoted_string_1,
26932 $quoted_string_2, $allowed_quote_modifiers,
26936 [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
26939 $nesting_token_string, $nesting_type_string,
26940 $nesting_block_string, $nesting_block_flag,
26941 $nesting_list_string, $nesting_list_flag,
26942 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
26943 $in_statement_continuation, $level_in_tokenizer,
26944 $slevel_in_tokenizer, $rslevel_stack,
26948 $last_nonblank_container_type,
26949 $last_nonblank_type_sequence,
26950 $last_last_nonblank_token,
26951 $last_last_nonblank_type,
26952 $last_last_nonblank_block_type,
26953 $last_last_nonblank_container_type,
26954 $last_last_nonblank_type_sequence,
26955 $last_nonblank_prototype,
26957 return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
26960 sub restore_tokenizer_state {
26962 my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
26964 $block_type, $container_type, $expecting,
26965 $i, $i_tok, $input_line,
26966 $input_line_number, $last_nonblank_i, $max_token_index,
26967 $next_tok, $next_type, $peeked_ahead,
26968 $prototype, $rhere_target_list, $rtoken_map,
26969 $rtoken_type, $rtokens, $tok,
26970 $type, $type_sequence, $indent_flag,
26974 $routput_token_list, $routput_token_type,
26975 $routput_block_type, $routput_container_type,
26976 $routput_type_sequence, $routput_type_sequence,
26980 $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
26981 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
26984 ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
26988 $nesting_token_string, $nesting_type_string,
26989 $nesting_block_string, $nesting_block_flag,
26990 $nesting_list_string, $nesting_list_flag,
26991 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
26992 $in_statement_continuation, $level_in_tokenizer,
26993 $slevel_in_tokenizer, $rslevel_stack,
26997 $last_nonblank_container_type,
26998 $last_nonblank_type_sequence,
26999 $last_last_nonblank_token,
27000 $last_last_nonblank_type,
27001 $last_last_nonblank_block_type,
27002 $last_last_nonblank_container_type,
27003 $last_last_nonblank_type_sequence,
27004 $last_nonblank_prototype,
27009 sub get_indentation_level {
27011 # patch to avoid reporting error if indented if is not terminated
27012 if ($indented_if_level) { return $level_in_tokenizer - 1 }
27013 return $level_in_tokenizer;
27016 sub reset_indentation_level {
27017 $level_in_tokenizer = $slevel_in_tokenizer = shift;
27018 push @{$rslevel_stack}, $slevel_in_tokenizer;
27024 $peeked_ahead = defined($flag) ? $flag : $peeked_ahead;
27025 return $peeked_ahead;
27028 # ------------------------------------------------------------
27029 # end of tokenizer variable access and manipulation routines
27030 # ------------------------------------------------------------
27032 # ------------------------------------------------------------
27033 # beginning of various scanner interface routines
27034 # ------------------------------------------------------------
27035 sub scan_replacement_text {
27037 # check for here-docs in replacement text invoked by
27038 # a substitution operator with executable modifier 'e'.
27041 # $replacement_text
27043 # $rht = reference to any here-doc targets
27044 my ($replacement_text) = @_;
27047 return unless ( $replacement_text =~ /<</ );
27049 write_logfile_entry("scanning replacement text for here-doc targets\n");
27051 # save the logger object for error messages
27052 my $logger_object = $tokenizer_self->{_logger_object};
27054 # localize all package variables
27056 $tokenizer_self, $last_nonblank_token,
27057 $last_nonblank_type, $last_nonblank_block_type,
27058 $statement_type, $in_attribute_list,
27059 $current_package, $context,
27060 %is_constant, %is_user_function,
27061 %user_function_prototype, %is_block_function,
27062 %is_block_list_function, %saw_function_definition,
27063 $brace_depth, $paren_depth,
27064 $square_bracket_depth, @current_depth,
27065 @total_depth, $total_depth,
27066 @nesting_sequence_number, @current_sequence_number,
27067 @paren_type, @paren_semicolon_count,
27068 @paren_structural_type, @brace_type,
27069 @brace_structural_type, @brace_context,
27070 @brace_package, @square_bracket_type,
27071 @square_bracket_structural_type, @depth_array,
27072 @starting_line_of_current_depth, @nested_ternary_flag,
27073 @nested_statement_type,
27076 # save all lexical variables
27077 my $rstate = save_tokenizer_state();
27078 _decrement_count(); # avoid error check for multiple tokenizers
27080 # make a new tokenizer
27082 my $rpending_logfile_message;
27083 my $source_object =
27084 Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
27085 $rpending_logfile_message );
27086 my $tokenizer = Perl::Tidy::Tokenizer->new(
27087 source_object => $source_object,
27088 logger_object => $logger_object,
27089 starting_line_number => $input_line_number,
27092 # scan the replacement text
27093 1 while ( $tokenizer->get_line() );
27095 # remove any here doc targets
27097 if ( $tokenizer_self->{_in_here_doc} ) {
27101 $tokenizer_self->{_here_doc_target},
27102 $tokenizer_self->{_here_quote_character}
27104 if ( $tokenizer_self->{_rhere_target_list} ) {
27105 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
27106 $tokenizer_self->{_rhere_target_list} = undef;
27108 $tokenizer_self->{_in_here_doc} = undef;
27111 # now its safe to report errors
27112 my $severe_error = $tokenizer->report_tokenization_errors();
27114 # TODO: Could propagate a severe error up
27116 # restore all tokenizer lexical variables
27117 restore_tokenizer_state($rstate);
27119 # return the here doc targets
27123 sub scan_bare_identifier {
27124 ( $i, $tok, $type, $prototype ) =
27125 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
27126 $rtoken_map, $max_token_index );
27130 sub scan_identifier {
27131 ( $i, $tok, $type, $id_scan_state, $identifier ) =
27132 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
27133 $max_token_index, $expecting, $paren_type[$paren_depth] );
27138 ( $i, $tok, $type, $id_scan_state ) =
27139 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
27140 $id_scan_state, $max_token_index );
27146 ( $i, $type, $number ) =
27147 scan_number_do( $input_line, $i, $rtoken_map, $type,
27148 $max_token_index );
27152 # a sub to warn if token found where term expected
27153 sub error_if_expecting_TERM {
27154 if ( $expecting == TERM ) {
27155 if ( $really_want_term{$last_nonblank_type} ) {
27156 report_unexpected( $tok, "term", $i_tok, $last_nonblank_i,
27157 $rtoken_map, $rtoken_type, $input_line );
27164 # a sub to warn if token found where operator expected
27165 sub error_if_expecting_OPERATOR {
27167 if ( $expecting == OPERATOR ) {
27168 if ( !defined($thing) ) { $thing = $tok }
27169 report_unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
27170 $rtoken_map, $rtoken_type, $input_line );
27171 if ( $i_tok == 0 ) {
27172 interrupt_logfile();
27173 warning("Missing ';' above?\n");
27181 # ------------------------------------------------------------
27182 # end scanner interfaces
27183 # ------------------------------------------------------------
27185 my %is_for_foreach;
27186 @_ = qw(for foreach);
27187 @is_for_foreach{@_} = (1) x scalar(@_);
27191 @is_my_our{@_} = (1) x scalar(@_);
27193 # These keywords may introduce blocks after parenthesized expressions,
27195 # keyword ( .... ) { BLOCK }
27196 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
27197 my %is_blocktype_with_paren;
27199 qw(if elsif unless while until for foreach switch case given when catch);
27200 @is_blocktype_with_paren{@_} = (1) x scalar(@_);
27202 # ------------------------------------------------------------
27203 # begin hash of code for handling most token types
27204 # ------------------------------------------------------------
27205 my $tokenization_code = {
27207 # no special code for these types yet, but syntax checks
27242 error_if_expecting_TERM()
27243 if ( $expecting == TERM );
27246 error_if_expecting_TERM()
27247 if ( $expecting == TERM );
27251 # start looking for a scalar
27252 error_if_expecting_OPERATOR("Scalar")
27253 if ( $expecting == OPERATOR );
27256 if ( $identifier eq '$^W' ) {
27257 $tokenizer_self->{_saw_perl_dash_w} = 1;
27260 # Check for identifier in indirect object slot
27261 # (vorboard.pl, sort.t). Something like:
27262 # /^(print|printf|sort|exec|system)$/
27264 $is_indirect_object_taker{$last_nonblank_token}
27266 || ( ( $last_nonblank_token eq '(' )
27267 && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
27268 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
27277 $paren_semicolon_count[$paren_depth] = 0;
27279 $container_type = $want_paren;
27282 elsif ( $statement_type =~ /^sub\b/ ) {
27283 $container_type = $statement_type;
27286 $container_type = $last_nonblank_token;
27288 # We can check for a syntax error here of unexpected '(',
27289 # but this is going to get messy...
27291 $expecting == OPERATOR
27293 # be sure this is not a method call of the form
27294 # &method(...), $method->(..), &{method}(...),
27295 # $ref[2](list) is ok & short for $ref[2]->(list)
27296 # NOTE: at present, braces in something like &{ xxx }
27297 # are not marked as a block, we might have a method call
27298 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
27303 # ref: camel 3 p 703.
27304 if ( $last_last_nonblank_token eq 'do' ) {
27306 "do SUBROUTINE is deprecated; consider & or -> notation\n"
27311 # if this is an empty list, (), then it is not an
27312 # error; for example, we might have a constant pi and
27313 # invoke it with pi() or just pi;
27314 my ( $next_nonblank_token, $i_next ) =
27315 find_next_nonblank_token( $i, $rtokens,
27316 $max_token_index );
27317 if ( $next_nonblank_token ne ')' ) {
27319 error_if_expecting_OPERATOR('(');
27321 if ( $last_nonblank_type eq 'C' ) {
27323 "$last_nonblank_token has a void prototype\n";
27325 elsif ( $last_nonblank_type eq 'i' ) {
27327 && $last_nonblank_token =~ /^\$/ )
27330 "Do you mean '$last_nonblank_token->(' ?\n";
27334 interrupt_logfile();
27338 } ## end if ( $next_nonblank_token...
27339 } ## end else [ if ( $last_last_nonblank_token...
27340 } ## end if ( $expecting == OPERATOR...
27342 $paren_type[$paren_depth] = $container_type;
27343 ( $type_sequence, $indent_flag ) =
27344 increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
27346 # propagate types down through nested parens
27347 # for example: the second paren in 'if ((' would be structural
27348 # since the first is.
27350 if ( $last_nonblank_token eq '(' ) {
27351 $type = $last_nonblank_type;
27354 # We exclude parens as structural after a ',' because it
27355 # causes subtle problems with continuation indentation for
27356 # something like this, where the first 'or' will not get
27361 # ( not defined $check )
27363 # or $check eq "new"
27364 # or $check eq "old",
27367 # Likewise, we exclude parens where a statement can start
27368 # because of problems with continuation indentation, like
27371 # ($firstline =~ /^#\!.*perl/)
27372 # and (print $File::Find::name, "\n")
27375 # (ref($usage_fref) =~ /CODE/)
27377 # : (&blast_usage, &blast_params, &blast_general_params);
27383 if ( $last_nonblank_type eq ')' ) {
27385 "Syntax error? found token '$last_nonblank_type' then '('\n"
27388 $paren_structural_type[$paren_depth] = $type;
27392 ( $type_sequence, $indent_flag ) =
27393 decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
27395 if ( $paren_structural_type[$paren_depth] eq '{' ) {
27399 $container_type = $paren_type[$paren_depth];
27401 # restore statement type as 'sub' at closing paren of a signature
27402 # so that a subsequent ':' is identified as an attribute
27403 if ( $container_type =~ /^sub\b/ ) {
27404 $statement_type = $container_type;
27407 # /^(for|foreach)$/
27408 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
27409 my $num_sc = $paren_semicolon_count[$paren_depth];
27410 if ( $num_sc > 0 && $num_sc != 2 ) {
27411 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
27415 if ( $paren_depth > 0 ) { $paren_depth-- }
27418 if ( $last_nonblank_type eq ',' ) {
27419 complain("Repeated ','s \n");
27422 # patch for operator_expected: note if we are in the list (use.t)
27423 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
27424 ## FIXME: need to move this elsewhere, perhaps check after a '('
27425 ## elsif ($last_nonblank_token eq '(') {
27426 ## warning("Leading ','s illegal in some versions of perl\n");
27430 $context = UNKNOWN_CONTEXT;
27431 $statement_type = '';
27434 # /^(for|foreach)$/
27435 if ( $is_for_foreach{ $paren_type[$paren_depth] } )
27436 { # mark ; in for loop
27438 # Be careful: we do not want a semicolon such as the
27439 # following to be included:
27441 # for (sort {strcoll($a,$b);} keys %investments) {
27443 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
27444 && $square_bracket_depth ==
27445 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
27449 $paren_semicolon_count[$paren_depth]++;
27455 error_if_expecting_OPERATOR("String")
27456 if ( $expecting == OPERATOR );
27459 $allowed_quote_modifiers = "";
27462 error_if_expecting_OPERATOR("String")
27463 if ( $expecting == OPERATOR );
27466 $allowed_quote_modifiers = "";
27469 error_if_expecting_OPERATOR("String")
27470 if ( $expecting == OPERATOR );
27473 $allowed_quote_modifiers = "";
27478 if ( $expecting == UNKNOWN ) { # indeterminate, must guess..
27480 ( $is_pattern, $msg ) =
27481 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
27482 $max_token_index );
27485 write_diagnostics("DIVIDE:$msg\n");
27486 write_logfile_entry($msg);
27489 else { $is_pattern = ( $expecting == TERM ) }
27494 $allowed_quote_modifiers = '[msixpodualngc]';
27496 else { # not a pattern; check for a /= token
27498 if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /=
27504 #DEBUG - collecting info on what tokens follow a divide
27505 # for development of guessing algorithm
27506 #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
27507 # #write_diagnostics( "DIVIDE? $input_line\n" );
27513 # if we just saw a ')', we will label this block with
27514 # its type. We need to do this to allow sub
27515 # code_block_type to determine if this brace starts a
27516 # code block or anonymous hash. (The type of a paren
27517 # pair is the preceding token, such as 'if', 'else',
27519 $container_type = "";
27521 # ATTRS: for a '{' following an attribute list, reset
27522 # things to look like we just saw the sub name
27523 if ( $statement_type =~ /^sub/ ) {
27524 $last_nonblank_token = $statement_type;
27525 $last_nonblank_type = 'i';
27526 $statement_type = "";
27529 # patch for SWITCH/CASE: hide these keywords from an immediately
27530 # following opening brace
27531 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
27532 && $statement_type eq $last_nonblank_token )
27534 $last_nonblank_token = ";";
27537 elsif ( $last_nonblank_token eq ')' ) {
27538 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
27540 # defensive move in case of a nesting error (pbug.t)
27541 # in which this ')' had no previous '('
27542 # this nesting error will have been caught
27543 if ( !defined($last_nonblank_token) ) {
27544 $last_nonblank_token = 'if';
27547 # check for syntax error here;
27548 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
27549 if ( $tokenizer_self->{'_extended_syntax'} ) {
27551 # we append a trailing () to mark this as an unknown
27552 # block type. This allows perltidy to format some
27553 # common extensions of perl syntax.
27554 # This is used by sub code_block_type
27555 $last_nonblank_token .= '()';
27559 join( ' ', sort keys %is_blocktype_with_paren );
27561 "syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
27567 # patch for paren-less for/foreach glitch, part 2.
27568 # see note below under 'qw'
27569 elsif ($last_nonblank_token eq 'qw'
27570 && $is_for_foreach{$want_paren} )
27572 $last_nonblank_token = $want_paren;
27573 if ( $last_last_nonblank_token eq $want_paren ) {
27575 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
27582 # now identify which of the three possible types of
27583 # curly braces we have: hash index container, anonymous
27584 # hash reference, or code block.
27586 # non-structural (hash index) curly brace pair
27587 # get marked 'L' and 'R'
27588 if ( is_non_structural_brace() ) {
27591 # patch for SWITCH/CASE:
27592 # allow paren-less identifier after 'when'
27593 # if the brace is preceded by a space
27594 if ( $statement_type eq 'when'
27595 && $last_nonblank_type eq 'i'
27596 && $last_last_nonblank_type eq 'k'
27597 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
27600 $block_type = $statement_type;
27604 # code and anonymous hash have the same type, '{', but are
27605 # distinguished by 'block_type',
27606 # which will be blank for an anonymous hash
27609 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
27610 $max_token_index );
27612 # patch to promote bareword type to function taking block
27614 && $last_nonblank_type eq 'w'
27615 && $last_nonblank_i >= 0 )
27617 if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
27618 $routput_token_type->[$last_nonblank_i] = 'G';
27622 # patch for SWITCH/CASE: if we find a stray opening block brace
27623 # where we might accept a 'case' or 'when' block, then take it
27624 if ( $statement_type eq 'case'
27625 || $statement_type eq 'when' )
27627 if ( !$block_type || $block_type eq '}' ) {
27628 $block_type = $statement_type;
27633 $brace_type[ ++$brace_depth ] = $block_type;
27634 $brace_package[$brace_depth] = $current_package;
27635 $brace_structural_type[$brace_depth] = $type;
27636 $brace_context[$brace_depth] = $context;
27637 ( $type_sequence, $indent_flag ) =
27638 increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
27641 $block_type = $brace_type[$brace_depth];
27642 if ($block_type) { $statement_type = '' }
27643 if ( defined( $brace_package[$brace_depth] ) ) {
27644 $current_package = $brace_package[$brace_depth];
27647 # can happen on brace error (caught elsewhere)
27650 ( $type_sequence, $indent_flag ) =
27651 decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
27653 if ( $brace_structural_type[$brace_depth] eq 'L' ) {
27657 # propagate type information for 'do' and 'eval' blocks, and also
27658 # for smartmatch operator. This is necessary to enable us to know
27659 # if an operator or term is expected next.
27660 if ( $is_block_operator{$block_type} ) {
27661 $tok = $block_type;
27664 $context = $brace_context[$brace_depth];
27665 if ( $brace_depth > 0 ) { $brace_depth--; }
27667 '&' => sub { # maybe sub call? start looking
27669 # We have to check for sub call unless we are sure we
27670 # are expecting an operator. This example from s2p
27671 # got mistaken as a q operator in an early version:
27672 # print BODY &q(<<'EOT');
27673 if ( $expecting != OPERATOR ) {
27675 # But only look for a sub call if we are expecting a term or
27676 # if there is no existing space after the &.
27677 # For example we probably don't want & as sub call here:
27678 # Fcntl::S_IRUSR & $mode;
27679 if ( $expecting == TERM || $next_type ne 'b' ) {
27686 '<' => sub { # angle operator or less than?
27688 if ( $expecting != OPERATOR ) {
27690 find_angle_operator_termination( $input_line, $i, $rtoken_map,
27691 $expecting, $max_token_index );
27693 if ( $type eq '<' && $expecting == TERM ) {
27694 error_if_expecting_TERM();
27695 interrupt_logfile();
27696 warning("Unterminated <> operator?\n");
27703 '?' => sub { # ?: conditional or starting pattern?
27707 if ( $expecting == UNKNOWN ) {
27710 ( $is_pattern, $msg ) =
27711 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
27712 $max_token_index );
27714 if ($msg) { write_logfile_entry($msg) }
27716 else { $is_pattern = ( $expecting == TERM ) }
27721 $allowed_quote_modifiers = '[msixpodualngc]';
27724 ( $type_sequence, $indent_flag ) =
27725 increase_nesting_depth( QUESTION_COLON,
27726 $rtoken_map->[$i_tok] );
27729 '*' => sub { # typeglob, or multiply?
27731 if ( $expecting == TERM ) {
27736 if ( $rtokens->[ $i + 1 ] eq '=' ) {
27741 elsif ( $rtokens->[ $i + 1 ] eq '*' ) {
27745 if ( $rtokens->[ $i + 1 ] eq '=' ) {
27753 '.' => sub { # what kind of . ?
27755 if ( $expecting != OPERATOR ) {
27757 if ( $type eq '.' ) {
27758 error_if_expecting_TERM()
27759 if ( $expecting == TERM );
27767 # if this is the first nonblank character, call it a label
27768 # since perl seems to just swallow it
27769 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
27773 # ATTRS: check for a ':' which introduces an attribute list
27774 # (this might eventually get its own token type)
27775 elsif ( $statement_type =~ /^sub\b/ ) {
27777 $in_attribute_list = 1;
27780 # check for scalar attribute, such as
27781 # my $foo : shared = 1;
27782 elsif ($is_my_our{$statement_type}
27783 && $current_depth[QUESTION_COLON] == 0 )
27786 $in_attribute_list = 1;
27789 # otherwise, it should be part of a ?/: operator
27791 ( $type_sequence, $indent_flag ) =
27792 decrease_nesting_depth( QUESTION_COLON,
27793 $rtoken_map->[$i_tok] );
27794 if ( $last_nonblank_token eq '?' ) {
27795 warning("Syntax error near ? :\n");
27799 '+' => sub { # what kind of plus?
27801 if ( $expecting == TERM ) {
27802 my $number = scan_number();
27804 # unary plus is safest assumption if not a number
27805 if ( !defined($number) ) { $type = 'p'; }
27807 elsif ( $expecting == OPERATOR ) {
27810 if ( $next_type eq 'w' ) { $type = 'p' }
27815 error_if_expecting_OPERATOR("Array")
27816 if ( $expecting == OPERATOR );
27819 '%' => sub { # hash or modulo?
27821 # first guess is hash if no following blank
27822 if ( $expecting == UNKNOWN ) {
27823 if ( $next_type ne 'b' ) { $expecting = TERM }
27825 if ( $expecting == TERM ) {
27830 $square_bracket_type[ ++$square_bracket_depth ] =
27831 $last_nonblank_token;
27832 ( $type_sequence, $indent_flag ) =
27833 increase_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
27835 # It may seem odd, but structural square brackets have
27836 # type '{' and '}'. This simplifies the indentation logic.
27837 if ( !is_non_structural_brace() ) {
27840 $square_bracket_structural_type[$square_bracket_depth] = $type;
27843 ( $type_sequence, $indent_flag ) =
27844 decrease_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
27846 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
27851 # propagate type information for smartmatch operator. This is
27852 # necessary to enable us to know if an operator or term is expected
27854 if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
27855 $tok = $square_bracket_type[$square_bracket_depth];
27858 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
27860 '-' => sub { # what kind of minus?
27862 if ( ( $expecting != OPERATOR )
27863 && $is_file_test_operator{$next_tok} )
27865 my ( $next_nonblank_token, $i_next ) =
27866 find_next_nonblank_token( $i + 1, $rtokens,
27867 $max_token_index );
27869 # check for a quoted word like "-w=>xx";
27870 # it is sufficient to just check for a following '='
27871 if ( $next_nonblank_token eq '=' ) {
27880 elsif ( $expecting == TERM ) {
27881 my $number = scan_number();
27883 # maybe part of bareword token? unary is safest
27884 if ( !defined($number) ) { $type = 'm'; }
27887 elsif ( $expecting == OPERATOR ) {
27891 if ( $next_type eq 'w' ) {
27899 # check for special variables like ${^WARNING_BITS}
27900 if ( $expecting == TERM ) {
27902 # FIXME: this should work but will not catch errors
27903 # because we also have to be sure that previous token is
27904 # a type character ($,@,%).
27905 if ( $last_nonblank_token eq '{'
27906 && ( $next_tok =~ /^[A-Za-z_]/ ) )
27909 if ( $next_tok eq 'W' ) {
27910 $tokenizer_self->{_saw_perl_dash_w} = 1;
27912 $tok = $tok . $next_tok;
27918 unless ( error_if_expecting_TERM() ) {
27920 # Something like this is valid but strange:
27922 complain("The '^' seems unusual here\n");
27928 '::' => sub { # probably a sub call
27929 scan_bare_identifier();
27931 '<<' => sub { # maybe a here-doc?
27933 unless ( $i < $max_token_index )
27934 ; # here-doc not possible if end of line
27936 if ( $expecting != OPERATOR ) {
27937 my ( $found_target, $here_doc_target, $here_quote_character,
27940 $found_target, $here_doc_target, $here_quote_character, $i,
27943 = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
27944 $max_token_index );
27946 if ($found_target) {
27947 push @{$rhere_target_list},
27948 [ $here_doc_target, $here_quote_character ];
27950 if ( length($here_doc_target) > 80 ) {
27951 my $truncated = substr( $here_doc_target, 0, 80 );
27952 complain("Long here-target: '$truncated' ...\n");
27954 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
27956 "Unconventional here-target: '$here_doc_target'\n");
27959 elsif ( $expecting == TERM ) {
27960 unless ($saw_error) {
27962 # shouldn't happen..
27963 warning("Program bug; didn't find here doc target\n");
27964 report_definite_bug();
27971 '<<~' => sub { # a here-doc, new type added in v26
27973 unless ( $i < $max_token_index )
27974 ; # here-doc not possible if end of line
27975 if ( $expecting != OPERATOR ) {
27976 my ( $found_target, $here_doc_target, $here_quote_character,
27979 $found_target, $here_doc_target, $here_quote_character, $i,
27982 = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
27983 $max_token_index );
27985 if ($found_target) {
27987 if ( length($here_doc_target) > 80 ) {
27988 my $truncated = substr( $here_doc_target, 0, 80 );
27989 complain("Long here-target: '$truncated' ...\n");
27991 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
27993 "Unconventional here-target: '$here_doc_target'\n");
27996 # Note that we put a leading space on the here quote
27997 # character indicate that it may be preceded by spaces
27998 $here_quote_character = " " . $here_quote_character;
27999 push @{$rhere_target_list},
28000 [ $here_doc_target, $here_quote_character ];
28003 elsif ( $expecting == TERM ) {
28004 unless ($saw_error) {
28006 # shouldn't happen..
28007 warning("Program bug; didn't find here doc target\n");
28008 report_definite_bug();
28017 # if -> points to a bare word, we must scan for an identifier,
28018 # otherwise something like ->y would look like the y operator
28022 # type = 'pp' for pre-increment, '++' for post-increment
28024 if ( $expecting == TERM ) { $type = 'pp' }
28025 elsif ( $expecting == UNKNOWN ) {
28026 my ( $next_nonblank_token, $i_next ) =
28027 find_next_nonblank_token( $i, $rtokens, $max_token_index );
28028 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
28033 if ( $last_nonblank_type eq $tok ) {
28034 complain("Repeated '=>'s \n");
28037 # patch for operator_expected: note if we are in the list (use.t)
28038 # TODO: make version numbers a new token type
28039 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
28042 # type = 'mm' for pre-decrement, '--' for post-decrement
28045 if ( $expecting == TERM ) { $type = 'mm' }
28046 elsif ( $expecting == UNKNOWN ) {
28047 my ( $next_nonblank_token, $i_next ) =
28048 find_next_nonblank_token( $i, $rtokens, $max_token_index );
28049 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
28054 error_if_expecting_TERM()
28055 if ( $expecting == TERM );
28059 error_if_expecting_TERM()
28060 if ( $expecting == TERM );
28064 error_if_expecting_TERM()
28065 if ( $expecting == TERM );
28069 # ------------------------------------------------------------
28070 # end hash of code for handling individual token types
28071 # ------------------------------------------------------------
28073 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
28075 # These block types terminate statements and do not need a trailing
28077 # patched for SWITCH/CASE/
28078 my %is_zero_continuation_block_type;
28079 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
28080 if elsif else unless while until for foreach switch case given when);
28081 @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
28083 my %is_not_zero_continuation_block_type;
28084 @_ = qw(sort grep map do eval);
28085 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
28087 my %is_logical_container;
28088 @_ = qw(if elsif unless while and or err not && ! || for foreach);
28089 @is_logical_container{@_} = (1) x scalar(@_);
28091 my %is_binary_type;
28093 @is_binary_type{@_} = (1) x scalar(@_);
28095 my %is_binary_keyword;
28096 @_ = qw(and or err eq ne cmp);
28097 @is_binary_keyword{@_} = (1) x scalar(@_);
28099 # 'L' is token for opening { at hash key
28100 my %is_opening_type;
28101 @_ = qw" L { ( [ ";
28102 @is_opening_type{@_} = (1) x scalar(@_);
28104 # 'R' is token for closing } at hash key
28105 my %is_closing_type;
28106 @_ = qw" R } ) ] ";
28107 @is_closing_type{@_} = (1) x scalar(@_);
28109 my %is_redo_last_next_goto;
28110 @_ = qw(redo last next goto);
28111 @is_redo_last_next_goto{@_} = (1) x scalar(@_);
28113 my %is_use_require;
28114 @_ = qw(use require);
28115 @is_use_require{@_} = (1) x scalar(@_);
28117 my %is_sub_package;
28118 @_ = qw(sub package);
28119 @is_sub_package{@_} = (1) x scalar(@_);
28121 # This hash holds the hash key in $tokenizer_self for these keywords:
28122 my %is_format_END_DATA = (
28123 'format' => '_in_format',
28124 '__END__' => '_in_end',
28125 '__DATA__' => '_in_data',
28128 # original ref: camel 3 p 147,
28129 # but perl may accept undocumented flags
28130 # perl 5.10 adds 'p' (preserve)
28131 # Perl version 5.22 added 'n'
28132 # From http://perldoc.perl.org/perlop.html we have
28133 # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
28134 # s/PATTERN/REPLACEMENT/msixpodualngcer
28135 # y/SEARCHLIST/REPLACEMENTLIST/cdsr
28136 # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
28137 # qr/STRING/msixpodualn
28138 my %quote_modifiers = (
28139 's' => '[msixpodualngcer]',
28142 'm' => '[msixpodualngc]',
28143 'qr' => '[msixpodualn]',
28150 # table showing how many quoted things to look for after quote operator..
28151 # s, y, tr have 2 (pattern and replacement)
28152 # others have 1 (pattern only)
28153 my %quote_items = (
28165 sub tokenize_this_line {
28167 # This routine breaks a line of perl code into tokens which are of use in
28168 # indentation and reformatting. One of my goals has been to define tokens
28169 # such that a newline may be inserted between any pair of tokens without
28170 # changing or invalidating the program. This version comes close to this,
28171 # although there are necessarily a few exceptions which must be caught by
28172 # the formatter. Many of these involve the treatment of bare words.
28174 # The tokens and their types are returned in arrays. See previous
28175 # routine for their names.
28177 # See also the array "valid_token_types" in the BEGIN section for an
28180 # To simplify things, token types are either a single character, or they
28181 # are identical to the tokens themselves.
28183 # As a debugging aid, the -D flag creates a file containing a side-by-side
28184 # comparison of the input string and its tokenization for each line of a file.
28185 # This is an invaluable debugging aid.
28187 # In addition to tokens, and some associated quantities, the tokenizer
28188 # also returns flags indication any special line types. These include
28189 # quotes, here_docs, formats.
28191 # -----------------------------------------------------------------------
28193 # How to add NEW_TOKENS:
28195 # New token types will undoubtedly be needed in the future both to keep up
28196 # with changes in perl and to help adapt the tokenizer to other applications.
28198 # Here are some notes on the minimal steps. I wrote these notes while
28199 # adding the 'v' token type for v-strings, which are things like version
28200 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You
28201 # can use your editor to search for the string "NEW_TOKENS" to find the
28202 # appropriate sections to change):
28204 # *. Try to talk somebody else into doing it! If not, ..
28206 # *. Make a backup of your current version in case things don't work out!
28208 # *. Think of a new, unused character for the token type, and add to
28209 # the array @valid_token_types in the BEGIN section of this package.
28210 # For example, I used 'v' for v-strings.
28212 # *. Implement coding to recognize the $type of the token in this routine.
28213 # This is the hardest part, and is best done by imitating or modifying
28214 # some of the existing coding. For example, to recognize v-strings, I
28215 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
28216 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
28218 # *. Update sub operator_expected. This update is critically important but
28219 # the coding is trivial. Look at the comments in that routine for help.
28220 # For v-strings, which should behave like numbers, I just added 'v' to the
28221 # regex used to handle numbers and strings (types 'n' and 'Q').
28223 # *. Implement a 'bond strength' rule in sub set_bond_strengths in
28224 # Perl::Tidy::Formatter for breaking lines around this token type. You can
28225 # skip this step and take the default at first, then adjust later to get
28226 # desired results. For adding type 'v', I looked at sub bond_strength and
28227 # saw that number type 'n' was using default strengths, so I didn't do
28228 # anything. I may tune it up someday if I don't like the way line
28229 # breaks with v-strings look.
28231 # *. Implement a 'whitespace' rule in sub set_whitespace_flags in
28232 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
28233 # and saw that type 'n' used spaces on both sides, so I just added 'v'
28234 # to the array @spaces_both_sides.
28236 # *. Update HtmlWriter package so that users can colorize the token as
28237 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
28238 # that package. For v-strings, I initially chose to use a default color
28239 # equal to the default for numbers, but it might be nice to change that
28242 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
28244 # *. Run lots and lots of debug tests. Start with special files designed
28245 # to test the new token type. Run with the -D flag to create a .DEBUG
28246 # file which shows the tokenization. When these work ok, test as many old
28247 # scripts as possible. Start with all of the '.t' files in the 'test'
28248 # directory of the distribution file. Compare .tdy output with previous
28249 # version and updated version to see the differences. Then include as
28250 # many more files as possible. My own technique has been to collect a huge
28251 # number of perl scripts (thousands!) into one directory and run perltidy
28252 # *, then run diff between the output of the previous version and the
28255 # *. For another example, search for the smartmatch operator '~~'
28256 # with your editor to see where updates were made for it.
28258 # -----------------------------------------------------------------------
28260 my $line_of_tokens = shift;
28261 my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
28263 # patch while coding change is underway
28264 # make callers private data to allow access
28265 # $tokenizer_self = $caller_tokenizer_self;
28267 # extract line number for use in error messages
28268 $input_line_number = $line_of_tokens->{_line_number};
28270 # reinitialize for multi-line quote
28271 $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
28273 # check for pod documentation
28274 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
28276 # must not be in multi-line quote
28277 # and must not be in an equation
28278 if ( !$in_quote && ( operator_expected( 'b', '=', 'b' ) == TERM ) )
28280 $tokenizer_self->{_in_pod} = 1;
28285 $input_line = $untrimmed_input_line;
28289 # trim start of this line unless we are continuing a quoted line
28290 # do not trim end because we might end in a quote (test: deken4.pl)
28291 # Perl::Tidy::Formatter will delete needless trailing blanks
28292 unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
28293 $input_line =~ s/^\s*//; # trim left end
28296 # Set a flag to indicate if we might be at an __END__ or __DATA__ line
28297 # This will be used below to avoid quoting a bare word followed by
28299 my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/;
28301 # update the copy of the line for use in error messages
28302 # This must be exactly what we give the pre_tokenizer
28303 $tokenizer_self->{_line_text} = $input_line;
28305 # re-initialize for the main loop
28306 $routput_token_list = []; # stack of output token indexes
28307 $routput_token_type = []; # token types
28308 $routput_block_type = []; # types of code block
28309 $routput_container_type = []; # paren types, such as if, elsif, ..
28310 $routput_type_sequence = []; # nesting sequential number
28312 $rhere_target_list = [];
28314 $tok = $last_nonblank_token;
28315 $type = $last_nonblank_type;
28316 $prototype = $last_nonblank_prototype;
28317 $last_nonblank_i = -1;
28318 $block_type = $last_nonblank_block_type;
28319 $container_type = $last_nonblank_container_type;
28320 $type_sequence = $last_nonblank_type_sequence;
28324 # tokenization is done in two stages..
28325 # stage 1 is a very simple pre-tokenization
28326 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
28328 # a little optimization for a full-line comment
28329 if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
28330 $max_tokens_wanted = 1 # no use tokenizing a comment
28333 # start by breaking the line into pre-tokens
28334 ( $rtokens, $rtoken_map, $rtoken_type ) =
28335 pre_tokenize( $input_line, $max_tokens_wanted );
28337 $max_token_index = scalar( @{$rtokens} ) - 1;
28338 push( @{$rtokens}, ' ', ' ', ' ' ); # extra whitespace simplifies logic
28339 push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced
28340 push( @{$rtoken_type}, 'b', 'b', 'b' );
28342 # initialize for main loop
28343 foreach my $ii ( 0 .. $max_token_index + 3 ) {
28344 $routput_token_type->[$ii] = "";
28345 $routput_block_type->[$ii] = "";
28346 $routput_container_type->[$ii] = "";
28347 $routput_type_sequence->[$ii] = "";
28348 $routput_indent_flag->[$ii] = 0;
28353 # ------------------------------------------------------------
28354 # begin main tokenization loop
28355 # ------------------------------------------------------------
28357 # we are looking at each pre-token of one line and combining them
28359 while ( ++$i <= $max_token_index ) {
28361 if ($in_quote) { # continue looking for end of a quote
28362 $type = $quote_type;
28364 unless ( @{$routput_token_list} )
28365 { # initialize if continuation line
28366 push( @{$routput_token_list}, $i );
28367 $routput_token_type->[$i] = $type;
28370 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
28372 # scan for the end of the quote or pattern
28374 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
28375 $quoted_string_1, $quoted_string_2
28378 $i, $in_quote, $quote_character,
28379 $quote_pos, $quote_depth, $quoted_string_1,
28380 $quoted_string_2, $rtokens, $rtoken_map,
28384 # all done if we didn't find it
28385 last if ($in_quote);
28387 # save pattern and replacement text for rescanning
28388 my $qs1 = $quoted_string_1;
28389 my $qs2 = $quoted_string_2;
28391 # re-initialize for next search
28392 $quote_character = '';
28395 $quoted_string_1 = "";
28396 $quoted_string_2 = "";
28397 last if ( ++$i > $max_token_index );
28399 # look for any modifiers
28400 if ($allowed_quote_modifiers) {
28402 # check for exact quote modifiers
28403 if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) {
28404 my $str = $rtokens->[$i];
28405 my $saw_modifier_e;
28406 while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
28407 my $pos = pos($str);
28408 my $char = substr( $str, $pos - 1, 1 );
28409 $saw_modifier_e ||= ( $char eq 'e' );
28412 # For an 'e' quote modifier we must scan the replacement
28413 # text for here-doc targets.
28414 if ($saw_modifier_e) {
28416 my $rht = scan_replacement_text($qs1);
28418 # Change type from 'Q' to 'h' for quotes with
28419 # here-doc targets so that the formatter (see sub
28420 # print_line_of_tokens) will not make any line
28421 # breaks after this point.
28423 push @{$rhere_target_list}, @{$rht};
28425 if ( $i_tok < 0 ) {
28426 my $ilast = $routput_token_list->[-1];
28427 $routput_token_type->[$ilast] = $type;
28432 if ( defined( pos($str) ) ) {
28435 if ( pos($str) == length($str) ) {
28436 last if ( ++$i > $max_token_index );
28439 # Looks like a joined quote modifier
28440 # and keyword, maybe something like
28441 # s/xxx/yyy/gefor @k=...
28442 # Example is "galgen.pl". Would have to split
28443 # the word and insert a new token in the
28444 # pre-token list. This is so rare that I haven't
28445 # done it. Will just issue a warning citation.
28447 # This error might also be triggered if my quote
28448 # modifier characters are incomplete
28452 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
28453 Please put a space between quote modifiers and trailing keywords.
28456 # print "token $rtokens->[$i]\n";
28457 # my $num = length($str) - pos($str);
28458 # $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num);
28459 # print "continuing with new token $rtokens->[$i]\n";
28461 # skipping past this token does least damage
28462 last if ( ++$i > $max_token_index );
28467 # example file: rokicki4.pl
28468 # This error might also be triggered if my quote
28469 # modifier characters are incomplete
28470 write_logfile_entry(
28471 "Note: found word $str at quote modifier location\n"
28477 $allowed_quote_modifiers = "";
28481 unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) {
28483 # try to catch some common errors
28484 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
28486 if ( $last_nonblank_token eq 'eq' ) {
28487 complain("Should 'eq' be '==' here ?\n");
28489 elsif ( $last_nonblank_token eq 'ne' ) {
28490 complain("Should 'ne' be '!=' here ?\n");
28494 $last_last_nonblank_token = $last_nonblank_token;
28495 $last_last_nonblank_type = $last_nonblank_type;
28496 $last_last_nonblank_block_type = $last_nonblank_block_type;
28497 $last_last_nonblank_container_type =
28498 $last_nonblank_container_type;
28499 $last_last_nonblank_type_sequence =
28500 $last_nonblank_type_sequence;
28501 $last_nonblank_token = $tok;
28502 $last_nonblank_type = $type;
28503 $last_nonblank_prototype = $prototype;
28504 $last_nonblank_block_type = $block_type;
28505 $last_nonblank_container_type = $container_type;
28506 $last_nonblank_type_sequence = $type_sequence;
28507 $last_nonblank_i = $i_tok;
28510 # store previous token type
28511 if ( $i_tok >= 0 ) {
28512 $routput_token_type->[$i_tok] = $type;
28513 $routput_block_type->[$i_tok] = $block_type;
28514 $routput_container_type->[$i_tok] = $container_type;
28515 $routput_type_sequence->[$i_tok] = $type_sequence;
28516 $routput_indent_flag->[$i_tok] = $indent_flag;
28518 my $pre_tok = $rtokens->[$i]; # get the next pre-token
28519 my $pre_type = $rtoken_type->[$i]; # and type
28521 $type = $pre_type; # to be modified as necessary
28522 $block_type = ""; # blank for all tokens except code block braces
28523 $container_type = ""; # blank for all tokens except some parens
28524 $type_sequence = ""; # blank for all tokens except ?/:
28526 $prototype = ""; # blank for all tokens except user defined subs
28529 # this pre-token will start an output token
28530 push( @{$routput_token_list}, $i_tok );
28532 # continue gathering identifier if necessary
28533 # but do not start on blanks and comments
28534 if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
28536 if ( $id_scan_state =~ /^(sub|package)/ ) {
28543 last if ($id_scan_state);
28544 next if ( ( $i > 0 ) || $type );
28546 # didn't find any token; start over
28551 # handle whitespace tokens..
28552 next if ( $type eq 'b' );
28553 my $prev_tok = $i > 0 ? $rtokens->[ $i - 1 ] : ' ';
28554 my $prev_type = $i > 0 ? $rtoken_type->[ $i - 1 ] : 'b';
28556 # Build larger tokens where possible, since we are not in a quote.
28558 # First try to assemble digraphs. The following tokens are
28559 # excluded and handled specially:
28560 # '/=' is excluded because the / might start a pattern.
28561 # 'x=' is excluded since it might be $x=, with $ on previous line
28562 # '**' and *= might be typeglobs of punctuation variables
28563 # I have allowed tokens starting with <, such as <=,
28564 # because I don't think these could be valid angle operators.
28565 # test file: storrs4.pl
28566 my $test_tok = $tok . $rtokens->[ $i + 1 ];
28567 my $combine_ok = $is_digraph{$test_tok};
28569 # check for special cases which cannot be combined
28572 # '//' must be defined_or operator if an operator is expected.
28573 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
28574 # could be migrated here for clarity
28576 # Patch for RT#102371, misparsing a // in the following snippet:
28577 # state $b //= ccc();
28578 # The solution is to always accept the digraph (or trigraph) after
28579 # token type 'Z' (possible file handle). The reason is that
28580 # sub operator_expected gives TERM expected here, which is
28581 # wrong in this case.
28582 if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
28583 my $next_type = $rtokens->[ $i + 1 ];
28585 operator_expected( $prev_type, $tok, $next_type );
28587 # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
28588 $combine_ok = 0 if ( $expecting == TERM );
28591 # Patch for RT #114359: Missparsing of "print $x ** 0.5;
28592 # Accept the digraphs '**' only after type 'Z'
28593 # Otherwise postpone the decision.
28594 if ( $test_tok eq '**' ) {
28595 if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 }
28602 && ( $test_tok ne '/=' ) # might be pattern
28603 && ( $test_tok ne 'x=' ) # might be $x
28604 && ( $test_tok ne '*=' ) # typeglob?
28606 # Moved above as part of fix for
28607 # RT #114359: Missparsing of "print $x ** 0.5;
28608 # && ( $test_tok ne '**' ) # typeglob?
28614 # Now try to assemble trigraphs. Note that all possible
28615 # perl trigraphs can be constructed by appending a character
28617 $test_tok = $tok . $rtokens->[ $i + 1 ];
28619 if ( $is_trigraph{$test_tok} ) {
28624 # The only current tetragraph is the double diamond operator
28625 # and its first three characters are not a trigraph, so
28626 # we do can do a special test for it
28627 elsif ( $test_tok eq '<<>' ) {
28628 $test_tok .= $rtokens->[ $i + 2 ];
28629 if ( $is_tetragraph{$test_tok} ) {
28637 $next_tok = $rtokens->[ $i + 1 ];
28638 $next_type = $rtoken_type->[ $i + 1 ];
28640 TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
28643 $last_nonblank_token, $tok,
28644 $next_tok, $brace_depth,
28645 $brace_type[$brace_depth], $paren_depth,
28646 $paren_type[$paren_depth]
28648 print STDOUT "TOKENIZE:(@debug_list)\n";
28651 # turn off attribute list on first non-blank, non-bareword
28652 if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
28654 ###############################################################
28655 # We have the next token, $tok.
28656 # Now we have to examine this token and decide what it is
28657 # and define its $type
28659 # section 1: bare words
28660 ###############################################################
28662 if ( $pre_type eq 'w' ) {
28663 $expecting = operator_expected( $prev_type, $tok, $next_type );
28664 my ( $next_nonblank_token, $i_next ) =
28665 find_next_nonblank_token( $i, $rtokens, $max_token_index );
28667 # ATTRS: handle sub and variable attributes
28668 if ($in_attribute_list) {
28670 # treat bare word followed by open paren like qw(
28671 if ( $next_nonblank_token eq '(' ) {
28672 $in_quote = $quote_items{'q'};
28673 $allowed_quote_modifiers = $quote_modifiers{'q'};
28679 # handle bareword not followed by open paren
28686 # quote a word followed by => operator
28687 # unless the word __END__ or __DATA__ and the only word on
28689 if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) {
28691 if ( $rtokens->[ $i_next + 1 ] eq '>' ) {
28692 if ( $is_constant{$current_package}{$tok} ) {
28695 elsif ( $is_user_function{$current_package}{$tok} ) {
28698 $user_function_prototype{$current_package}{$tok};
28700 elsif ( $tok =~ /^v\d+$/ ) {
28702 report_v_string($tok);
28704 else { $type = 'w' }
28710 # quote a bare word within braces..like xxx->{s}; note that we
28711 # must be sure this is not a structural brace, to avoid
28712 # mistaking {s} in the following for a quoted bare word:
28713 # for(@[){s}bla}BLA}
28714 # Also treat q in something like var{-q} as a bare word, not qoute operator
28716 $next_nonblank_token eq '}'
28718 $last_nonblank_type eq 'L'
28719 || ( $last_nonblank_type eq 'm'
28720 && $last_last_nonblank_type eq 'L' )
28728 # a bare word immediately followed by :: is not a keyword;
28729 # use $tok_kw when testing for keywords to avoid a mistake
28731 if ( $rtokens->[ $i + 1 ] eq ':'
28732 && $rtokens->[ $i + 2 ] eq ':' )
28737 # handle operator x (now we know it isn't $x=)
28738 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
28739 if ( $tok eq 'x' ) {
28741 if ( $rtokens->[ $i + 1 ] eq '=' ) { # x=
28751 # FIXME: Patch: mark something like x4 as an integer for now
28752 # It gets fixed downstream. This is easier than
28753 # splitting the pretoken.
28758 elsif ( $tok_kw eq 'CORE::' ) {
28759 $type = $tok = $tok_kw;
28762 elsif ( ( $tok eq 'strict' )
28763 and ( $last_nonblank_token eq 'use' ) )
28765 $tokenizer_self->{_saw_use_strict} = 1;
28766 scan_bare_identifier();
28769 elsif ( ( $tok eq 'warnings' )
28770 and ( $last_nonblank_token eq 'use' ) )
28772 $tokenizer_self->{_saw_perl_dash_w} = 1;
28774 # scan as identifier, so that we pick up something like:
28775 # use warnings::register
28776 scan_bare_identifier();
28780 $tok eq 'AutoLoader'
28781 && $tokenizer_self->{_look_for_autoloader}
28783 $last_nonblank_token eq 'use'
28785 # these regexes are from AutoSplit.pm, which we want
28787 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
28788 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
28792 write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
28793 $tokenizer_self->{_saw_autoloader} = 1;
28794 $tokenizer_self->{_look_for_autoloader} = 0;
28795 scan_bare_identifier();
28799 $tok eq 'SelfLoader'
28800 && $tokenizer_self->{_look_for_selfloader}
28801 && ( $last_nonblank_token eq 'use'
28802 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
28803 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
28806 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
28807 $tokenizer_self->{_saw_selfloader} = 1;
28808 $tokenizer_self->{_look_for_selfloader} = 0;
28809 scan_bare_identifier();
28812 elsif ( ( $tok eq 'constant' )
28813 and ( $last_nonblank_token eq 'use' ) )
28815 scan_bare_identifier();
28816 my ( $next_nonblank_token, $i_next ) =
28817 find_next_nonblank_token( $i, $rtokens,
28818 $max_token_index );
28820 if ($next_nonblank_token) {
28822 if ( $is_keyword{$next_nonblank_token} ) {
28824 # Assume qw is used as a quote and okay, as in:
28825 # use constant qw{ DEBUG 0 };
28826 # Not worth trying to parse for just a warning
28828 # NOTE: This warning is deactivated because recent
28829 # versions of perl do not complain here, but
28830 # the coding is retained for reference.
28831 if ( 0 && $next_nonblank_token ne 'qw' ) {
28833 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
28838 # FIXME: could check for error in which next token is
28839 # not a word (number, punctuation, ..)
28841 $is_constant{$current_package}{$next_nonblank_token}
28847 # various quote operators
28848 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
28850 if ( $expecting == OPERATOR ) {
28852 # Be careful not to call an error for a qw quote
28853 # where a parenthesized list is allowed. For example,
28854 # it could also be a for/foreach construct such as
28856 # foreach my $key qw\Uno Due Tres Quadro\ {
28857 # print "Set $key\n";
28861 # Or it could be a function call.
28862 # NOTE: Braces in something like &{ xxx } are not
28863 # marked as a block, we might have a method call.
28864 # &method(...), $method->(..), &{method}(...),
28865 # $ref[2](list) is ok & short for $ref[2]->(list)
28867 # See notes in 'sub code_block_type' and
28868 # 'sub is_non_structural_brace'
28872 && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
28873 || $is_for_foreach{$want_paren} )
28876 error_if_expecting_OPERATOR();
28879 $in_quote = $quote_items{$tok};
28880 $allowed_quote_modifiers = $quote_modifiers{$tok};
28882 # All quote types are 'Q' except possibly qw quotes.
28883 # qw quotes are special in that they may generally be trimmed
28884 # of leading and trailing whitespace. So they are given a
28885 # separate type, 'q', unless requested otherwise.
28887 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
28890 $quote_type = $type;
28893 # check for a statement label
28895 ( $next_nonblank_token eq ':' )
28896 && ( $rtokens->[ $i_next + 1 ] ne ':' )
28897 && ( $i_next <= $max_token_index ) # colon on same line
28901 if ( $tok !~ /[A-Z]/ ) {
28902 push @{ $tokenizer_self->{_rlower_case_labels_at} },
28903 $input_line_number;
28911 # 'sub' || 'package'
28912 elsif ( $is_sub_package{$tok_kw} ) {
28913 error_if_expecting_OPERATOR()
28914 if ( $expecting == OPERATOR );
28918 # Note on token types for format, __DATA__, __END__:
28919 # It simplifies things to give these type ';', so that when we
28920 # start rescanning we will be expecting a token of type TERM.
28921 # We will switch to type 'k' before outputting the tokens.
28922 elsif ( $is_format_END_DATA{$tok_kw} ) {
28923 $type = ';'; # make tokenizer look for TERM next
28924 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
28928 elsif ( $is_keyword{$tok_kw} ) {
28931 # Since for and foreach may not be followed immediately
28932 # by an opening paren, we have to remember which keyword
28933 # is associated with the next '('
28934 if ( $is_for_foreach{$tok} ) {
28935 if ( new_statement_ok() ) {
28936 $want_paren = $tok;
28940 # recognize 'use' statements, which are special
28941 elsif ( $is_use_require{$tok} ) {
28942 $statement_type = $tok;
28943 error_if_expecting_OPERATOR()
28944 if ( $expecting == OPERATOR );
28947 # remember my and our to check for trailing ": shared"
28948 elsif ( $is_my_our{$tok} ) {
28949 $statement_type = $tok;
28952 # Check for misplaced 'elsif' and 'else', but allow isolated
28953 # else or elsif blocks to be formatted. This is indicated
28954 # by a last noblank token of ';'
28955 elsif ( $tok eq 'elsif' ) {
28956 if ( $last_nonblank_token ne ';'
28957 && $last_nonblank_block_type !~
28958 /^(if|elsif|unless)$/ )
28961 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
28965 elsif ( $tok eq 'else' ) {
28967 # patched for SWITCH/CASE
28969 $last_nonblank_token ne ';'
28970 && $last_nonblank_block_type !~
28971 /^(if|elsif|unless|case|when)$/
28973 # patch to avoid an unwanted error message for
28974 # the case of a parenless 'case' (RT 105484):
28975 # switch ( 1 ) { case x { 2 } else { } }
28976 && $statement_type !~
28977 /^(if|elsif|unless|case|when)$/
28981 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
28985 elsif ( $tok eq 'continue' ) {
28986 if ( $last_nonblank_token ne ';'
28987 && $last_nonblank_block_type !~
28988 /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
28991 # note: ';' '{' and '}' in list above
28992 # because continues can follow bare blocks;
28993 # ':' is labeled block
28995 ############################################
28996 # NOTE: This check has been deactivated because
28997 # continue has an alternative usage for given/when
28998 # blocks in perl 5.10
28999 ## warning("'$tok' should follow a block\n");
29000 ############################################
29004 # patch for SWITCH/CASE if 'case' and 'when are
29005 # treated as keywords.
29006 elsif ( $tok eq 'when' || $tok eq 'case' ) {
29007 $statement_type = $tok; # next '{' is block
29011 # indent trailing if/unless/while/until
29012 # outdenting will be handled by later indentation loop
29013 ## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
29025 ## if ( $tok =~ /^(if|unless|while|until)$/
29026 ## && $next_nonblank_token ne '(' )
29028 ## $indent_flag = 1;
29032 # check for inline label following
29033 # /^(redo|last|next|goto)$/
29034 elsif (( $last_nonblank_type eq 'k' )
29035 && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
29041 # something else --
29044 scan_bare_identifier();
29045 if ( $type eq 'w' ) {
29047 if ( $expecting == OPERATOR ) {
29049 # don't complain about possible indirect object
29053 # sub new($) { ... }
29054 # $b = new A::; # calls A::new
29055 # $c = new A; # same thing but suspicious
29056 # This will call A::new but we have a 'new' in
29057 # main:: which looks like a constant.
29059 if ( $last_nonblank_type eq 'C' ) {
29060 if ( $tok !~ /::$/ ) {
29062 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
29063 Maybe indirectet object notation?
29068 error_if_expecting_OPERATOR("bareword");
29072 # mark bare words immediately followed by a paren as
29074 $next_tok = $rtokens->[ $i + 1 ];
29075 if ( $next_tok eq '(' ) {
29079 # underscore after file test operator is file handle
29080 if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
29084 # patch for SWITCH/CASE if 'case' and 'when are
29085 # not treated as keywords:
29089 && $brace_type[$brace_depth] eq 'switch'
29091 || ( $tok eq 'when'
29092 && $brace_type[$brace_depth] eq 'given' )
29095 $statement_type = $tok; # next '{' is block
29096 $type = 'k'; # for keyword syntax coloring
29099 # patch for SWITCH/CASE if switch and given not keywords
29100 # Switch is not a perl 5 keyword, but we will gamble
29101 # and mark switch followed by paren as a keyword. This
29102 # is only necessary to get html syntax coloring nice,
29103 # and does not commit this as being a switch/case.
29104 if ( $next_nonblank_token eq '('
29105 && ( $tok eq 'switch' || $tok eq 'given' ) )
29107 $type = 'k'; # for keyword syntax coloring
29113 ###############################################################
29114 # section 2: strings of digits
29115 ###############################################################
29116 elsif ( $pre_type eq 'd' ) {
29117 $expecting = operator_expected( $prev_type, $tok, $next_type );
29118 error_if_expecting_OPERATOR("Number")
29119 if ( $expecting == OPERATOR );
29120 my $number = scan_number();
29121 if ( !defined($number) ) {
29123 # shouldn't happen - we should always get a number
29124 warning("non-number beginning with digit--program bug\n");
29125 report_definite_bug();
29129 ###############################################################
29130 # section 3: all other tokens
29131 ###############################################################
29134 last if ( $tok eq '#' );
29135 my $code = $tokenization_code->{$tok};
29138 operator_expected( $prev_type, $tok, $next_type );
29145 # -----------------------------
29146 # end of main tokenization loop
29147 # -----------------------------
29149 if ( $i_tok >= 0 ) {
29150 $routput_token_type->[$i_tok] = $type;
29151 $routput_block_type->[$i_tok] = $block_type;
29152 $routput_container_type->[$i_tok] = $container_type;
29153 $routput_type_sequence->[$i_tok] = $type_sequence;
29154 $routput_indent_flag->[$i_tok] = $indent_flag;
29157 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
29158 $last_last_nonblank_token = $last_nonblank_token;
29159 $last_last_nonblank_type = $last_nonblank_type;
29160 $last_last_nonblank_block_type = $last_nonblank_block_type;
29161 $last_last_nonblank_container_type = $last_nonblank_container_type;
29162 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
29163 $last_nonblank_token = $tok;
29164 $last_nonblank_type = $type;
29165 $last_nonblank_block_type = $block_type;
29166 $last_nonblank_container_type = $container_type;
29167 $last_nonblank_type_sequence = $type_sequence;
29168 $last_nonblank_prototype = $prototype;
29171 # reset indentation level if necessary at a sub or package
29172 # in an attempt to recover from a nesting error
29173 if ( $level_in_tokenizer < 0 ) {
29174 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
29175 reset_indentation_level(0);
29176 brace_warning("resetting level to 0 at $1 $2\n");
29180 # all done tokenizing this line ...
29181 # now prepare the final list of tokens and types
29183 my @token_type = (); # stack of output token types
29184 my @block_type = (); # stack of output code block types
29185 my @container_type = (); # stack of output code container types
29186 my @type_sequence = (); # stack of output type sequence numbers
29187 my @tokens = (); # output tokens
29188 my @levels = (); # structural brace levels of output tokens
29189 my @slevels = (); # secondary nesting levels of output tokens
29190 my @nesting_tokens = (); # string of tokens leading to this depth
29191 my @nesting_types = (); # string of token types leading to this depth
29192 my @nesting_blocks = (); # string of block types leading to this depth
29193 my @nesting_lists = (); # string of list types leading to this depth
29194 my @ci_string = (); # string needed to compute continuation indentation
29195 my @container_environment = (); # BLOCK or LIST
29196 my $container_environment = '';
29197 my $im = -1; # previous $i value
29199 my $ci_string_sum = ones_count($ci_string_in_tokenizer);
29201 # Computing Token Indentation
29203 # The final section of the tokenizer forms tokens and also computes
29204 # parameters needed to find indentation. It is much easier to do it
29205 # in the tokenizer than elsewhere. Here is a brief description of how
29206 # indentation is computed. Perl::Tidy computes indentation as the sum
29209 # (1) structural indentation, such as if/else/elsif blocks
29210 # (2) continuation indentation, such as long parameter call lists.
29212 # These are occasionally called primary and secondary indentation.
29214 # Structural indentation is introduced by tokens of type '{', although
29215 # the actual tokens might be '{', '(', or '['. Structural indentation
29216 # is of two types: BLOCK and non-BLOCK. Default structural indentation
29217 # is 4 characters if the standard indentation scheme is used.
29219 # Continuation indentation is introduced whenever a line at BLOCK level
29220 # is broken before its termination. Default continuation indentation
29221 # is 2 characters in the standard indentation scheme.
29223 # Both types of indentation may be nested arbitrarily deep and
29224 # interlaced. The distinction between the two is somewhat arbitrary.
29226 # For each token, we will define two variables which would apply if
29227 # the current statement were broken just before that token, so that
29228 # that token started a new line:
29230 # $level = the structural indentation level,
29231 # $ci_level = the continuation indentation level
29233 # The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
29234 # assuming defaults. However, in some special cases it is customary
29235 # to modify $ci_level from this strict value.
29237 # The total structural indentation is easy to compute by adding and
29238 # subtracting 1 from a saved value as types '{' and '}' are seen. The
29239 # running value of this variable is $level_in_tokenizer.
29241 # The total continuation is much more difficult to compute, and requires
29242 # several variables. These variables are:
29244 # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
29245 # each indentation level, if there are intervening open secondary
29246 # structures just prior to that level.
29247 # $continuation_string_in_tokenizer = a string of 1's and 0's indicating
29248 # if the last token at that level is "continued", meaning that it
29249 # is not the first token of an expression.
29250 # $nesting_block_string = a string of 1's and 0's indicating, for each
29251 # indentation level, if the level is of type BLOCK or not.
29252 # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
29253 # $nesting_list_string = a string of 1's and 0's indicating, for each
29254 # indentation level, if it is appropriate for list formatting.
29255 # If so, continuation indentation is used to indent long list items.
29256 # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
29257 # @{$rslevel_stack} = a stack of total nesting depths at each
29258 # structural indentation level, where "total nesting depth" means
29259 # the nesting depth that would occur if every nesting token -- '{', '[',
29260 # and '(' -- , regardless of context, is used to compute a nesting
29263 #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
29264 #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
29266 my ( $ci_string_i, $level_i, $nesting_block_string_i,
29267 $nesting_list_string_i, $nesting_token_string_i,
29268 $nesting_type_string_i, );
29270 foreach my $i ( @{$routput_token_list} )
29271 { # scan the list of pre-tokens indexes
29273 # self-checking for valid token types
29274 my $type = $routput_token_type->[$i];
29275 my $forced_indentation_flag = $routput_indent_flag->[$i];
29277 # See if we should undo the $forced_indentation_flag.
29278 # Forced indentation after 'if', 'unless', 'while' and 'until'
29279 # expressions without trailing parens is optional and doesn't
29280 # always look good. It is usually okay for a trailing logical
29281 # expression, but if the expression is a function call, code block,
29282 # or some kind of list it puts in an unwanted extra indentation
29283 # level which is hard to remove.
29285 # Example where extra indentation looks ok:
29287 # if $det_a < 0 and $det_b > 0
29288 # or $det_a > 0 and $det_b < 0;
29290 # Example where extra indentation is not needed because
29291 # the eval brace also provides indentation:
29292 # print "not " if defined eval {
29293 # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
29296 # The following rule works fairly well:
29297 # Undo the flag if the end of this line, or start of the next
29298 # line, is an opening container token or a comma.
29299 # This almost always works, but if not after another pass it will
29301 if ( $forced_indentation_flag && $type eq 'k' ) {
29303 my $ilast = $routput_token_list->[$ixlast];
29304 my $toklast = $routput_token_type->[$ilast];
29305 if ( $toklast eq '#' ) {
29307 $ilast = $routput_token_list->[$ixlast];
29308 $toklast = $routput_token_type->[$ilast];
29310 if ( $toklast eq 'b' ) {
29312 $ilast = $routput_token_list->[$ixlast];
29313 $toklast = $routput_token_type->[$ilast];
29315 if ( $toklast =~ /^[\{,]$/ ) {
29316 $forced_indentation_flag = 0;
29319 ( $toklast, my $i_next ) =
29320 find_next_nonblank_token( $max_token_index, $rtokens,
29321 $max_token_index );
29322 if ( $toklast =~ /^[\{,]$/ ) {
29323 $forced_indentation_flag = 0;
29328 # if we are already in an indented if, see if we should outdent
29329 if ($indented_if_level) {
29331 # don't try to nest trailing if's - shouldn't happen
29332 if ( $type eq 'k' ) {
29333 $forced_indentation_flag = 0;
29336 # check for the normal case - outdenting at next ';'
29337 elsif ( $type eq ';' ) {
29338 if ( $level_in_tokenizer == $indented_if_level ) {
29339 $forced_indentation_flag = -1;
29340 $indented_if_level = 0;
29344 # handle case of missing semicolon
29345 elsif ( $type eq '}' ) {
29346 if ( $level_in_tokenizer == $indented_if_level ) {
29347 $indented_if_level = 0;
29349 # TBD: This could be a subroutine call
29350 $level_in_tokenizer--;
29351 if ( @{$rslevel_stack} > 1 ) {
29352 pop( @{$rslevel_stack} );
29354 if ( length($nesting_block_string) > 1 )
29355 { # true for valid script
29356 chop $nesting_block_string;
29357 chop $nesting_list_string;
29364 my $tok = $rtokens->[$i]; # the token, but ONLY if same as pretoken
29365 $level_i = $level_in_tokenizer;
29367 # This can happen by running perltidy on non-scripts
29368 # although it could also be bug introduced by programming change.
29369 # Perl silently accepts a 032 (^Z) and takes it as the end
29370 if ( !$is_valid_token_type{$type} ) {
29371 my $val = ord($type);
29373 "unexpected character decimal $val ($type) in script\n");
29374 $tokenizer_self->{_in_error} = 1;
29377 # ----------------------------------------------------------------
29378 # TOKEN TYPE PATCHES
29379 # output __END__, __DATA__, and format as type 'k' instead of ';'
29380 # to make html colors correct, etc.
29381 my $fix_type = $type;
29382 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
29384 # output anonymous 'sub' as keyword
29385 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
29387 # -----------------------------------------------------------------
29389 $nesting_token_string_i = $nesting_token_string;
29390 $nesting_type_string_i = $nesting_type_string;
29391 $nesting_block_string_i = $nesting_block_string;
29392 $nesting_list_string_i = $nesting_list_string;
29394 # set primary indentation levels based on structural braces
29395 # Note: these are set so that the leading braces have a HIGHER
29396 # level than their CONTENTS, which is convenient for indentation
29397 # Also, define continuation indentation for each token.
29398 if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
29401 # use environment before updating
29402 $container_environment =
29403 $nesting_block_flag ? 'BLOCK'
29404 : $nesting_list_flag ? 'LIST'
29407 # if the difference between total nesting levels is not 1,
29408 # there are intervening non-structural nesting types between
29409 # this '{' and the previous unclosed '{'
29410 my $intervening_secondary_structure = 0;
29411 if ( @{$rslevel_stack} ) {
29412 $intervening_secondary_structure =
29413 $slevel_in_tokenizer - $rslevel_stack->[-1];
29416 # Continuation Indentation
29418 # Having tried setting continuation indentation both in the formatter and
29419 # in the tokenizer, I can say that setting it in the tokenizer is much,
29420 # much easier. The formatter already has too much to do, and can't
29421 # make decisions on line breaks without knowing what 'ci' will be at
29422 # arbitrary locations.
29424 # But a problem with setting the continuation indentation (ci) here
29425 # in the tokenizer is that we do not know where line breaks will actually
29426 # be. As a result, we don't know if we should propagate continuation
29427 # indentation to higher levels of structure.
29429 # For nesting of only structural indentation, we never need to do this.
29430 # For example, in a long if statement, like this
29432 # if ( !$output_block_type[$i]
29433 # && ($in_statement_continuation) )
29438 # the second line has ci but we do normally give the lines within the BLOCK
29439 # any ci. This would be true if we had blocks nested arbitrarily deeply.
29441 # But consider something like this, where we have created a break after
29442 # an opening paren on line 1, and the paren is not (currently) a
29443 # structural indentation token:
29445 # my $file = $menubar->Menubutton(
29446 # qw/-text File -underline 0 -menuitems/ => [
29448 # Cascade => '~View',
29452 # The second line has ci, so it would seem reasonable to propagate it
29453 # down, giving the third line 1 ci + 1 indentation. This suggests the
29454 # following rule, which is currently used to propagating ci down: if there
29455 # are any non-structural opening parens (or brackets, or braces), before
29456 # an opening structural brace, then ci is propagated down, and otherwise
29457 # not. The variable $intervening_secondary_structure contains this
29458 # information for the current token, and the string
29459 # "$ci_string_in_tokenizer" is a stack of previous values of this
29462 # save the current states
29463 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
29464 $level_in_tokenizer++;
29466 if ($forced_indentation_flag) {
29468 # break BEFORE '?' when there is forced indentation
29469 if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
29470 if ( $type eq 'k' ) {
29471 $indented_if_level = $level_in_tokenizer;
29474 # do not change container environment here if we are not
29475 # at a real list. Adding this check prevents "blinkers"
29476 # often near 'unless" clauses, such as in the following
29481 ## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
29484 $nesting_block_string .= "$nesting_block_flag";
29488 if ( $routput_block_type->[$i] ) {
29489 $nesting_block_flag = 1;
29490 $nesting_block_string .= '1';
29493 $nesting_block_flag = 0;
29494 $nesting_block_string .= '0';
29498 # we will use continuation indentation within containers
29499 # which are not blocks and not logical expressions
29501 if ( !$routput_block_type->[$i] ) {
29503 # propagate flag down at nested open parens
29504 if ( $routput_container_type->[$i] eq '(' ) {
29505 $bit = 1 if $nesting_list_flag;
29508 # use list continuation if not a logical grouping
29509 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
29513 $is_logical_container{ $routput_container_type->[$i]
29517 $nesting_list_string .= $bit;
29518 $nesting_list_flag = $bit;
29520 $ci_string_in_tokenizer .=
29521 ( $intervening_secondary_structure != 0 ) ? '1' : '0';
29522 $ci_string_sum = ones_count($ci_string_in_tokenizer);
29523 $continuation_string_in_tokenizer .=
29524 ( $in_statement_continuation > 0 ) ? '1' : '0';
29526 # Sometimes we want to give an opening brace continuation indentation,
29527 # and sometimes not. For code blocks, we don't do it, so that the leading
29528 # '{' gets outdented, like this:
29530 # if ( !$output_block_type[$i]
29531 # && ($in_statement_continuation) )
29534 # For other types, we will give them continuation indentation. For example,
29535 # here is how a list looks with the opening paren indented:
29538 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
29539 # [ "homer", "marge", "bart" ], );
29541 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
29543 my $total_ci = $ci_string_sum;
29545 !$routput_block_type->[$i] # patch: skip for BLOCK
29546 && ($in_statement_continuation)
29547 && !( $forced_indentation_flag && $type eq ':' )
29550 $total_ci += $in_statement_continuation
29551 unless ( $ci_string_in_tokenizer =~ /1$/ );
29554 $ci_string_i = $total_ci;
29555 $in_statement_continuation = 0;
29558 elsif ($type eq '}'
29560 || $forced_indentation_flag < 0 )
29563 # only a nesting error in the script would prevent popping here
29564 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
29566 $level_i = --$level_in_tokenizer;
29568 # restore previous level values
29569 if ( length($nesting_block_string) > 1 )
29570 { # true for valid script
29571 chop $nesting_block_string;
29572 $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
29573 chop $nesting_list_string;
29574 $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
29576 chop $ci_string_in_tokenizer;
29577 $ci_string_sum = ones_count($ci_string_in_tokenizer);
29579 $in_statement_continuation =
29580 chop $continuation_string_in_tokenizer;
29582 # zero continuation flag at terminal BLOCK '}' which
29583 # ends a statement.
29584 if ( $routput_block_type->[$i] ) {
29586 # ...These include non-anonymous subs
29587 # note: could be sub ::abc { or sub 'abc
29588 if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
29590 # note: older versions of perl require the /gc modifier
29591 # here or else the \G does not work.
29592 if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
29594 $in_statement_continuation = 0;
29598 # ...and include all block types except user subs with
29599 # block prototypes and these: (sort|grep|map|do|eval)
29600 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
29602 $is_zero_continuation_block_type{
29603 $routput_block_type->[$i]
29606 $in_statement_continuation = 0;
29609 # ..but these are not terminal types:
29610 # /^(sort|grep|map|do|eval)$/ )
29612 $is_not_zero_continuation_block_type{
29613 $routput_block_type->[$i]
29618 # ..and a block introduced by a label
29619 # /^\w+\s*:$/gc ) {
29620 elsif ( $routput_block_type->[$i] =~ /:$/ ) {
29621 $in_statement_continuation = 0;
29624 # user function with block prototype
29626 $in_statement_continuation = 0;
29630 # If we are in a list, then
29631 # we must set continuation indentation at the closing
29632 # paren of something like this (paren after $check):
29635 # ( not defined $check )
29637 # or $check eq "new"
29638 # or $check eq "old",
29640 elsif ( $tok eq ')' ) {
29641 $in_statement_continuation = 1
29642 if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
29645 elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
29648 # use environment after updating
29649 $container_environment =
29650 $nesting_block_flag ? 'BLOCK'
29651 : $nesting_list_flag ? 'LIST'
29653 $ci_string_i = $ci_string_sum + $in_statement_continuation;
29654 $nesting_block_string_i = $nesting_block_string;
29655 $nesting_list_string_i = $nesting_list_string;
29658 # not a structural indentation type..
29661 $container_environment =
29662 $nesting_block_flag ? 'BLOCK'
29663 : $nesting_list_flag ? 'LIST'
29666 # zero the continuation indentation at certain tokens so
29667 # that they will be at the same level as its container. For
29668 # commas, this simplifies the -lp indentation logic, which
29669 # counts commas. For ?: it makes them stand out.
29670 if ($nesting_list_flag) {
29671 if ( $type =~ /^[,\?\:]$/ ) {
29672 $in_statement_continuation = 0;
29676 # be sure binary operators get continuation indentation
29678 $container_environment
29679 && ( $type eq 'k' && $is_binary_keyword{$tok}
29680 || $is_binary_type{$type} )
29683 $in_statement_continuation = 1;
29686 # continuation indentation is sum of any open ci from previous
29687 # levels plus the current level
29688 $ci_string_i = $ci_string_sum + $in_statement_continuation;
29690 # update continuation flag ...
29691 # if this isn't a blank or comment..
29692 if ( $type ne 'b' && $type ne '#' ) {
29694 # and we are in a BLOCK
29695 if ($nesting_block_flag) {
29697 # the next token after a ';' and label starts a new stmt
29698 if ( $type eq ';' || $type eq 'J' ) {
29699 $in_statement_continuation = 0;
29702 # otherwise, we are continuing the current statement
29704 $in_statement_continuation = 1;
29708 # if we are not in a BLOCK..
29711 # do not use continuation indentation if not list
29712 # environment (could be within if/elsif clause)
29713 if ( !$nesting_list_flag ) {
29714 $in_statement_continuation = 0;
29717 # otherwise, the token after a ',' starts a new term
29719 # Patch FOR RT#99961; no continuation after a ';'
29720 # This is needed because perltidy currently marks
29721 # a block preceded by a type character like % or @
29722 # as a non block, to simplify formatting. But these
29723 # are actually blocks and can have semicolons.
29724 # See code_block_type() and is_non_structural_brace().
29725 elsif ( $type eq ',' || $type eq ';' ) {
29726 $in_statement_continuation = 0;
29729 # otherwise, we are continuing the current term
29731 $in_statement_continuation = 1;
29737 if ( $level_in_tokenizer < 0 ) {
29738 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
29739 $tokenizer_self->{_saw_negative_indentation} = 1;
29740 warning("Starting negative indentation\n");
29744 # set secondary nesting levels based on all containment token types
29745 # Note: these are set so that the nesting depth is the depth
29746 # of the PREVIOUS TOKEN, which is convenient for setting
29747 # the strength of token bonds
29748 my $slevel_i = $slevel_in_tokenizer;
29751 if ( $is_opening_type{$type} ) {
29752 $slevel_in_tokenizer++;
29753 $nesting_token_string .= $tok;
29754 $nesting_type_string .= $type;
29758 elsif ( $is_closing_type{$type} ) {
29759 $slevel_in_tokenizer--;
29760 my $char = chop $nesting_token_string;
29762 if ( $char ne $matching_start_token{$tok} ) {
29763 $nesting_token_string .= $char . $tok;
29764 $nesting_type_string .= $type;
29767 chop $nesting_type_string;
29771 push( @block_type, $routput_block_type->[$i] );
29772 push( @ci_string, $ci_string_i );
29773 push( @container_environment, $container_environment );
29774 push( @container_type, $routput_container_type->[$i] );
29775 push( @levels, $level_i );
29776 push( @nesting_tokens, $nesting_token_string_i );
29777 push( @nesting_types, $nesting_type_string_i );
29778 push( @slevels, $slevel_i );
29779 push( @token_type, $fix_type );
29780 push( @type_sequence, $routput_type_sequence->[$i] );
29781 push( @nesting_blocks, $nesting_block_string );
29782 push( @nesting_lists, $nesting_list_string );
29784 # now form the previous token
29787 $rtoken_map->[$i] - $rtoken_map->[$im]; # how many characters
29791 substr( $input_line, $rtoken_map->[$im], $num ) );
29797 $num = length($input_line) - $rtoken_map->[$im]; # make the last token
29799 push( @tokens, substr( $input_line, $rtoken_map->[$im], $num ) );
29802 $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
29803 $tokenizer_self->{_in_quote} = $in_quote;
29804 $tokenizer_self->{_quote_target} =
29805 $in_quote ? matching_end_token($quote_character) : "";
29806 $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
29808 $line_of_tokens->{_rtoken_type} = \@token_type;
29809 $line_of_tokens->{_rtokens} = \@tokens;
29810 $line_of_tokens->{_rblock_type} = \@block_type;
29811 $line_of_tokens->{_rcontainer_type} = \@container_type;
29812 $line_of_tokens->{_rcontainer_environment} = \@container_environment;
29813 $line_of_tokens->{_rtype_sequence} = \@type_sequence;
29814 $line_of_tokens->{_rlevels} = \@levels;
29815 $line_of_tokens->{_rslevels} = \@slevels;
29816 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
29817 $line_of_tokens->{_rci_levels} = \@ci_string;
29818 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
29822 } # end tokenize_this_line
29824 #########i#############################################################
29825 # Tokenizer routines which assist in identifying token types
29826 #######################################################################
29828 sub operator_expected {
29830 # Many perl symbols have two or more meanings. For example, '<<'
29831 # can be a shift operator or a here-doc operator. The
29832 # interpretation of these symbols depends on the current state of
29833 # the tokenizer, which may either be expecting a term or an
29834 # operator. For this example, a << would be a shift if an operator
29835 # is expected, and a here-doc if a term is expected. This routine
29836 # is called to make this decision for any current token. It returns
29837 # one of three possible values:
29839 # OPERATOR - operator expected (or at least, not a term)
29840 # UNKNOWN - can't tell
29841 # TERM - a term is expected (or at least, not an operator)
29843 # The decision is based on what has been seen so far. This
29844 # information is stored in the "$last_nonblank_type" and
29845 # "$last_nonblank_token" variables. For example, if the
29846 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
29847 # if $last_nonblank_type is 'n' (numeric), we are expecting an
29850 # If a UNKNOWN is returned, the calling routine must guess. A major
29851 # goal of this tokenizer is to minimize the possibility of returning
29852 # UNKNOWN, because a wrong guess can spoil the formatting of a
29855 # adding NEW_TOKENS: it is critically important that this routine be
29856 # updated to allow it to determine if an operator or term is to be
29857 # expected after the new token. Doing this simply involves adding
29858 # the new token character to one of the regexes in this routine or
29859 # to one of the hash lists
29860 # that it uses, which are initialized in the BEGIN section.
29861 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
29864 my ( $prev_type, $tok, $next_type ) = @_;
29866 my $op_expected = UNKNOWN;
29868 ##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
29870 # Note: function prototype is available for token type 'U' for future
29871 # program development. It contains the leading and trailing parens,
29872 # and no blanks. It might be used to eliminate token type 'C', for
29873 # example (prototype = '()'). Thus:
29874 # if ($last_nonblank_type eq 'U') {
29875 # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
29878 # A possible filehandle (or object) requires some care...
29879 if ( $last_nonblank_type eq 'Z' ) {
29882 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
29883 $op_expected = UNKNOWN;
29886 # For possible file handle like "$a", Perl uses weird parsing rules.
29888 # print $a/2,"/hi"; - division
29889 # print $a / 2,"/hi"; - division
29890 # print $a/ 2,"/hi"; - division
29891 # print $a /2,"/hi"; - pattern (and error)!
29892 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
29893 $op_expected = TERM;
29896 # Note when an operation is being done where a
29897 # filehandle might be expected, since a change in whitespace
29898 # could change the interpretation of the statement.
29900 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
29901 complain("operator in print statement not recommended\n");
29902 $op_expected = OPERATOR;
29907 # Check for smartmatch operator before preceding brace or square bracket.
29908 # For example, at the ? after the ] in the following expressions we are
29909 # expecting an operator:
29911 # qr/3/ ~~ ['1234'] ? 1 : 0;
29912 # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
29913 elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) {
29914 $op_expected = OPERATOR;
29917 # handle something after 'do' and 'eval'
29918 elsif ( $is_block_operator{$last_nonblank_token} ) {
29920 # something like $a = eval "expression";
29922 if ( $last_nonblank_type eq 'k' ) {
29923 $op_expected = TERM; # expression or list mode following keyword
29926 # something like $a = do { BLOCK } / 2;
29927 # or this ? after a smartmatch anonynmous hash or array reference:
29928 # qr/3/ ~~ ['1234'] ? 1 : 0;
29931 $op_expected = OPERATOR; # block mode following }
29935 # handle bare word..
29936 elsif ( $last_nonblank_type eq 'w' ) {
29938 # unfortunately, we can't tell what type of token to expect next
29939 # after most bare words
29940 $op_expected = UNKNOWN;
29943 # operator, but not term possible after these types
29944 # Note: moved ')' from type to token because parens in list context
29945 # get marked as '{' '}' now. This is a minor glitch in the following:
29946 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
29948 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
29949 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
29951 $op_expected = OPERATOR;
29953 # in a 'use' statement, numbers and v-strings are not true
29954 # numbers, so to avoid incorrect error messages, we will
29955 # mark them as unknown for now (use.t)
29956 # TODO: it would be much nicer to create a new token V for VERSION
29957 # number in a use statement. Then this could be a check on type V
29958 # and related patches which change $statement_type for '=>'
29959 # and ',' could be removed. Further, it would clean things up to
29960 # scan the 'use' statement with a separate subroutine.
29961 if ( ( $statement_type eq 'use' )
29962 && ( $last_nonblank_type =~ /^[nv]$/ ) )
29964 $op_expected = UNKNOWN;
29967 # expecting VERSION or {} after package NAMESPACE
29968 elsif ($statement_type =~ /^package\b/
29969 && $last_nonblank_token =~ /^package\b/ )
29971 $op_expected = TERM;
29975 # no operator after many keywords, such as "die", "warn", etc
29976 elsif ( $expecting_term_token{$last_nonblank_token} ) {
29978 # patch for dor.t (defined or).
29979 # perl functions which may be unary operators
29980 # TODO: This list is incomplete, and these should be put
29983 && $next_type eq '/'
29984 && $last_nonblank_type eq 'k'
29985 && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
29987 $op_expected = OPERATOR;
29990 $op_expected = TERM;
29994 # no operator after things like + - ** (i.e., other operators)
29995 elsif ( $expecting_term_types{$last_nonblank_type} ) {
29996 $op_expected = TERM;
29999 # a few operators, like "time", have an empty prototype () and so
30000 # take no parameters but produce a value to operate on
30001 elsif ( $expecting_operator_token{$last_nonblank_token} ) {
30002 $op_expected = OPERATOR;
30005 # post-increment and decrement produce values to be operated on
30006 elsif ( $expecting_operator_types{$last_nonblank_type} ) {
30007 $op_expected = OPERATOR;
30010 # no value to operate on after sub block
30011 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
30013 # a right brace here indicates the end of a simple block.
30014 # all non-structural right braces have type 'R'
30015 # all braces associated with block operator keywords have been given those
30016 # keywords as "last_nonblank_token" and caught above.
30017 # (This statement is order dependent, and must come after checking
30018 # $last_nonblank_token).
30019 elsif ( $last_nonblank_type eq '}' ) {
30021 # patch for dor.t (defined or).
30023 && $next_type eq '/'
30024 && $last_nonblank_token eq ']' )
30026 $op_expected = OPERATOR;
30029 # Patch for RT #116344: misparse a ternary operator after an anonymous
30031 # return ref {} ? 1 : 0;
30032 # The right brace should really be marked type 'R' in this case, and
30033 # it is safest to return an UNKNOWN here. Expecting a TERM will
30034 # cause the '?' to always be interpreted as a pattern delimiter
30035 # rather than introducing a ternary operator.
30036 elsif ( $tok eq '?' ) {
30037 $op_expected = UNKNOWN;
30040 $op_expected = TERM;
30044 # something else..what did I forget?
30047 # collecting diagnostics on unknown operator types..see what was missed
30048 $op_expected = UNKNOWN;
30050 "OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
30054 TOKENIZER_DEBUG_FLAG_EXPECT && do {
30056 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
30058 return $op_expected;
30061 sub new_statement_ok {
30063 # return true if the current token can start a new statement
30064 # USES GLOBAL VARIABLES: $last_nonblank_type
30066 return label_ok() # a label would be ok here
30068 || $last_nonblank_type eq 'J'; # or we follow a label
30074 # Decide if a bare word followed by a colon here is a label
30075 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
30076 # $brace_depth, @brace_type
30078 # if it follows an opening or closing code block curly brace..
30079 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
30080 && $last_nonblank_type eq $last_nonblank_token )
30083 # it is a label if and only if the curly encloses a code block
30084 return $brace_type[$brace_depth];
30087 # otherwise, it is a label if and only if it follows a ';' (real or fake)
30090 return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
30094 sub code_block_type {
30096 # Decide if this is a block of code, and its type.
30097 # Must be called only when $type = $token = '{'
30098 # The problem is to distinguish between the start of a block of code
30099 # and the start of an anonymous hash reference
30100 # Returns "" if not code block, otherwise returns 'last_nonblank_token'
30101 # to indicate the type of code block. (For example, 'last_nonblank_token'
30102 # might be 'if' for an if block, 'else' for an else block, etc).
30103 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
30104 # $last_nonblank_block_type, $brace_depth, @brace_type
30106 # handle case of multiple '{'s
30108 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
30110 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
30111 if ( $last_nonblank_token eq '{'
30112 && $last_nonblank_type eq $last_nonblank_token )
30115 # opening brace where a statement may appear is probably
30116 # a code block but might be and anonymous hash reference
30117 if ( $brace_type[$brace_depth] ) {
30118 return decide_if_code_block( $i, $rtokens, $rtoken_type,
30119 $max_token_index );
30122 # cannot start a code block within an anonymous hash
30128 elsif ( $last_nonblank_token eq ';' ) {
30130 # an opening brace where a statement may appear is probably
30131 # a code block but might be and anonymous hash reference
30132 return decide_if_code_block( $i, $rtokens, $rtoken_type,
30133 $max_token_index );
30136 # handle case of '}{'
30137 elsif ($last_nonblank_token eq '}'
30138 && $last_nonblank_type eq $last_nonblank_token )
30141 # a } { situation ...
30142 # could be hash reference after code block..(blktype1.t)
30143 if ($last_nonblank_block_type) {
30144 return decide_if_code_block( $i, $rtokens, $rtoken_type,
30145 $max_token_index );
30148 # must be a block if it follows a closing hash reference
30150 return $last_nonblank_token;
30154 ################################################################
30155 # NOTE: braces after type characters start code blocks, but for
30156 # simplicity these are not identified as such. See also
30157 # sub is_non_structural_brace.
30158 ################################################################
30160 ## elsif ( $last_nonblank_type eq 't' ) {
30161 ## return $last_nonblank_token;
30164 # brace after label:
30165 elsif ( $last_nonblank_type eq 'J' ) {
30166 return $last_nonblank_token;
30169 # otherwise, look at previous token. This must be a code block if
30170 # it follows any of these:
30171 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
30172 elsif ( $is_code_block_token{$last_nonblank_token} ) {
30174 # Bug Patch: Note that the opening brace after the 'if' in the following
30175 # snippet is an anonymous hash ref and not a code block!
30176 # print 'hi' if { x => 1, }->{x};
30177 # We can identify this situation because the last nonblank type
30178 # will be a keyword (instead of a closing peren)
30179 if ( $last_nonblank_token =~ /^(if|unless)$/
30180 && $last_nonblank_type eq 'k' )
30185 return $last_nonblank_token;
30189 # or a sub or package BLOCK
30190 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
30191 && $last_nonblank_token =~ /^(sub|package)\b/ )
30193 return $last_nonblank_token;
30196 elsif ( $statement_type =~ /^(sub|package)\b/ ) {
30197 return $statement_type;
30200 # user-defined subs with block parameters (like grep/map/eval)
30201 elsif ( $last_nonblank_type eq 'G' ) {
30202 return $last_nonblank_token;
30206 elsif ( $last_nonblank_type eq 'w' ) {
30207 return decide_if_code_block( $i, $rtokens, $rtoken_type,
30208 $max_token_index );
30211 # Patch for bug # RT #94338 reported by Daniel Trizen
30212 # for-loop in a parenthesized block-map triggering an error message:
30213 # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
30214 # Check for a code block within a parenthesized function call
30215 elsif ( $last_nonblank_token eq '(' ) {
30216 my $paren_type = $paren_type[$paren_depth];
30217 if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) {
30219 # We will mark this as a code block but use type 't' instead
30220 # of the name of the contining function. This will allow for
30221 # correct parsing but will usually produce better formatting.
30222 # Braces with block type 't' are not broken open automatically
30223 # in the formatter as are other code block types, and this usually
30225 return 't'; # (Not $paren_type)
30232 # handle unknown syntax ') {'
30233 # we previously appended a '()' to mark this case
30234 elsif ( $last_nonblank_token =~ /\(\)$/ ) {
30235 return $last_nonblank_token;
30238 # anything else must be anonymous hash reference
30244 sub decide_if_code_block {
30246 # USES GLOBAL VARIABLES: $last_nonblank_token
30247 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
30249 my ( $next_nonblank_token, $i_next ) =
30250 find_next_nonblank_token( $i, $rtokens, $max_token_index );
30252 # we are at a '{' where a statement may appear.
30253 # We must decide if this brace starts an anonymous hash or a code
30255 # return "" if anonymous hash, and $last_nonblank_token otherwise
30257 # initialize to be code BLOCK
30258 my $code_block_type = $last_nonblank_token;
30260 # Check for the common case of an empty anonymous hash reference:
30261 # Maybe something like sub { { } }
30262 if ( $next_nonblank_token eq '}' ) {
30263 $code_block_type = "";
30268 # To guess if this '{' is an anonymous hash reference, look ahead
30269 # and test as follows:
30271 # it is a hash reference if next come:
30272 # - a string or digit followed by a comma or =>
30273 # - bareword followed by =>
30274 # otherwise it is a code block
30276 # Examples of anonymous hash ref:
30280 # Examples of code blocks:
30281 # {1; print "hello\n", 1;}
30284 # We are only going to look ahead one more (nonblank/comment) line.
30285 # Strange formatting could cause a bad guess, but that's unlikely.
30289 # Ignore the rest of this line if it is a side comment
30290 if ( $next_nonblank_token ne '#' ) {
30291 @pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ];
30292 @pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ];
30294 my ( $rpre_tokens, $rpre_types ) =
30295 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
30296 # generous, and prevents
30298 # time in mangled files
30299 if ( defined($rpre_types) && @{$rpre_types} ) {
30300 push @pre_types, @{$rpre_types};
30301 push @pre_tokens, @{$rpre_tokens};
30304 # put a sentinel token to simplify stopping the search
30305 push @pre_types, '}';
30306 push @pre_types, '}';
30309 $jbeg = 1 if $pre_types[0] eq 'b';
30311 # first look for one of these
30313 # - bareword with leading -
30317 if ( $pre_types[$j] =~ /^[\'\"]/ ) {
30319 # find the closing quote; don't worry about escapes
30320 my $quote_mark = $pre_types[$j];
30321 foreach my $k ( $j + 1 .. $#pre_types - 1 ) {
30322 if ( $pre_types[$k] eq $quote_mark ) {
30324 my $next = $pre_types[$j];
30329 elsif ( $pre_types[$j] eq 'd' ) {
30332 elsif ( $pre_types[$j] eq 'w' ) {
30335 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
30338 if ( $j > $jbeg ) {
30340 $j++ if $pre_types[$j] eq 'b';
30342 # Patched for RT #95708
30345 # it is a comma which is not a pattern delimeter except for qw
30347 $pre_types[$j] eq ','
30348 && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
30352 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
30355 $code_block_type = "";
30360 return $code_block_type;
30363 sub report_unexpected {
30365 # report unexpected token type and show where it is
30366 # USES GLOBAL VARIABLES: $tokenizer_self
30367 my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
30368 $rpretoken_type, $input_line )
30371 if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
30372 my $msg = "found $found where $expecting expected";
30373 my $pos = $rpretoken_map->[$i_tok];
30374 interrupt_logfile();
30375 my $input_line_number = $tokenizer_self->{_last_line_number};
30376 my ( $offset, $numbered_line, $underline ) =
30377 make_numbered_line( $input_line_number, $input_line, $pos );
30378 $underline = write_on_underline( $underline, $pos - $offset, '^' );
30381 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
30382 my $pos_prev = $rpretoken_map->[$last_nonblank_i];
30384 if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) {
30385 $num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev;
30388 $num = $pos - $pos_prev;
30390 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
30393 write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
30394 $trailer = " (previous token underlined)";
30396 warning( $numbered_line . "\n" );
30397 warning( $underline . "\n" );
30398 warning( $msg . $trailer . "\n" );
30404 sub is_non_structural_brace {
30406 # Decide if a brace or bracket is structural or non-structural
30407 # by looking at the previous token and type
30408 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
30410 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
30411 # Tentatively deactivated because it caused the wrong operator expectation
30413 # $user = @vars[1] / 100;
30414 # Must update sub operator_expected before re-implementing.
30415 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
30419 ################################################################
30420 # NOTE: braces after type characters start code blocks, but for
30421 # simplicity these are not identified as such. See also
30422 # sub code_block_type
30423 ################################################################
30425 ##if ($last_nonblank_type eq 't') {return 0}
30427 # otherwise, it is non-structural if it is decorated
30428 # by type information.
30429 # For example, the '{' here is non-structural: ${xxx}
30431 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
30433 # or if we follow a hash or array closing curly brace or bracket
30434 # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
30435 # because the first '}' would have been given type 'R'
30436 || $last_nonblank_type =~ /^([R\]])$/
30440 #########i#############################################################
30441 # Tokenizer routines for tracking container nesting depths
30442 #######################################################################
30444 # The following routines keep track of nesting depths of the nesting
30445 # types, ( [ { and ?. This is necessary for determining the indentation
30446 # level, and also for debugging programs. Not only do they keep track of
30447 # nesting depths of the individual brace types, but they check that each
30448 # of the other brace types is balanced within matching pairs. For
30449 # example, if the program sees this sequence:
30453 # then it can determine that there is an extra left paren somewhere
30454 # between the { and the }. And so on with every other possible
30455 # combination of outer and inner brace types. For another
30460 # which has an extra ] within the parens.
30462 # The brace types have indexes 0 .. 3 which are indexes into
30465 # The pair ? : are treated as just another nesting type, with ? acting
30466 # as the opening brace and : acting as the closing brace.
30470 # $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
30472 # saves the nesting depth of brace type $b (where $b is either of the other
30473 # nesting types) when brace type $a enters a new depth. When this depth
30474 # decreases, a check is made that the current depth of brace types $b is
30475 # unchanged, or otherwise there must have been an error. This can
30476 # be very useful for localizing errors, particularly when perl runs to
30477 # the end of a large file (such as this one) and announces that there
30478 # is a problem somewhere.
30480 # A numerical sequence number is maintained for every nesting type,
30481 # so that each matching pair can be uniquely identified in a simple
30484 sub increase_nesting_depth {
30485 my ( $aa, $pos ) = @_;
30487 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
30488 # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
30490 $current_depth[$aa]++;
30492 $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
30493 my $input_line_number = $tokenizer_self->{_last_line_number};
30494 my $input_line = $tokenizer_self->{_line_text};
30496 # Sequence numbers increment by number of items. This keeps
30497 # a unique set of numbers but still allows the relative location
30498 # of any type to be determined.
30499 $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
30500 my $seqno = $nesting_sequence_number[$aa];
30501 $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
30503 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
30504 [ $input_line_number, $input_line, $pos ];
30506 for my $bb ( 0 .. $#closing_brace_names ) {
30507 next if ( $bb == $aa );
30508 $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
30511 # set a flag for indenting a nested ternary statement
30513 if ( $aa == QUESTION_COLON ) {
30514 $nested_ternary_flag[ $current_depth[$aa] ] = 0;
30515 if ( $current_depth[$aa] > 1 ) {
30516 if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
30517 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
30518 if ( $pdepth == $total_depth - 1 ) {
30520 $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
30525 $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
30526 $statement_type = "";
30527 return ( $seqno, $indent );
30530 sub decrease_nesting_depth {
30532 my ( $aa, $pos ) = @_;
30534 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
30535 # @current_sequence_number, @depth_array, @starting_line_of_current_depth
30538 my $input_line_number = $tokenizer_self->{_last_line_number};
30539 my $input_line = $tokenizer_self->{_line_text};
30543 if ( $current_depth[$aa] > 0 ) {
30545 # set a flag for un-indenting after seeing a nested ternary statement
30546 $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
30547 if ( $aa == QUESTION_COLON ) {
30548 $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
30550 $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
30552 # check that any brace types $bb contained within are balanced
30553 for my $bb ( 0 .. $#closing_brace_names ) {
30554 next if ( $bb == $aa );
30556 unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
30557 $current_depth[$bb] )
30560 $current_depth[$bb] -
30561 $depth_array[$aa][$bb][ $current_depth[$aa] ];
30563 # don't whine too many times
30564 my $saw_brace_error = get_saw_brace_error();
30566 $saw_brace_error <= MAX_NAG_MESSAGES
30568 # if too many closing types have occurred, we probably
30569 # already caught this error
30570 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
30573 interrupt_logfile();
30575 $starting_line_of_current_depth[$aa]
30576 [ $current_depth[$aa] ];
30577 my $sl = $rsl->[0];
30578 my $rel = [ $input_line_number, $input_line, $pos ];
30579 my $el = $rel->[0];
30582 if ( $diff == 1 || $diff == -1 ) {
30590 ? $opening_brace_names[$bb]
30591 : $closing_brace_names[$bb];
30592 write_error_indicator_pair( @{$rsl}, '^' );
30594 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
30599 $starting_line_of_current_depth[$bb]
30600 [ $current_depth[$bb] ];
30601 my $ml = $rml->[0];
30603 " The most recent un-matched $bname is on line $ml\n";
30604 write_error_indicator_pair( @{$rml}, '^' );
30606 write_error_indicator_pair( @{$rel}, '^' );
30610 increment_brace_error();
30613 $current_depth[$aa]--;
30617 my $saw_brace_error = get_saw_brace_error();
30618 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
30620 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
30622 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
30624 increment_brace_error();
30626 return ( $seqno, $outdent );
30629 sub check_final_nesting_depths {
30631 # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
30633 for my $aa ( 0 .. $#closing_brace_names ) {
30635 if ( $current_depth[$aa] ) {
30637 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
30638 my $sl = $rsl->[0];
30640 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
30641 The most recent un-matched $opening_brace_names[$aa] is on line $sl
30643 indicate_error( $msg, @{$rsl}, '^' );
30644 increment_brace_error();
30650 #########i#############################################################
30651 # Tokenizer routines for looking ahead in input stream
30652 #######################################################################
30654 sub peek_ahead_for_n_nonblank_pre_tokens {
30656 # returns next n pretokens if they exist
30657 # returns undef's if hits eof without seeing any pretokens
30658 # USES GLOBAL VARIABLES: $tokenizer_self
30659 my $max_pretokens = shift;
30662 my ( $rpre_tokens, $rmap, $rpre_types );
30664 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
30666 $line =~ s/^\s*//; # trim leading blanks
30667 next if ( length($line) <= 0 ); # skip blank
30668 next if ( $line =~ /^#/ ); # skip comment
30669 ( $rpre_tokens, $rmap, $rpre_types ) =
30670 pre_tokenize( $line, $max_pretokens );
30673 return ( $rpre_tokens, $rpre_types );
30676 # look ahead for next non-blank, non-comment line of code
30677 sub peek_ahead_for_nonblank_token {
30679 # USES GLOBAL VARIABLES: $tokenizer_self
30680 my ( $rtokens, $max_token_index ) = @_;
30684 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
30686 $line =~ s/^\s*//; # trim leading blanks
30687 next if ( length($line) <= 0 ); # skip blank
30688 next if ( $line =~ /^#/ ); # skip comment
30689 my ( $rtok, $rmap, $rtype ) =
30690 pre_tokenize( $line, 2 ); # only need 2 pre-tokens
30691 my $j = $max_token_index + 1;
30693 foreach my $tok ( @{$rtok} ) {
30694 last if ( $tok =~ "\n" );
30695 $rtokens->[ ++$j ] = $tok;
30702 #########i#############################################################
30703 # Tokenizer guessing routines for ambiguous situations
30704 #######################################################################
30706 sub guess_if_pattern_or_conditional {
30708 # this routine is called when we have encountered a ? following an
30709 # unknown bareword, and we must decide if it starts a pattern or not
30710 # input parameters:
30711 # $i - token index of the ? starting possible pattern
30712 # output parameters:
30713 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
30714 # msg = a warning or diagnostic message
30715 # USES GLOBAL VARIABLES: $last_nonblank_token
30717 # FIXME: this needs to be rewritten
30719 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
30720 my $is_pattern = 0;
30721 my $msg = "guessing that ? after $last_nonblank_token starts a ";
30723 if ( $i >= $max_token_index ) {
30724 $msg .= "conditional (no end to pattern found on the line)\n";
30729 my $next_token = $rtokens->[$i]; # first token after ?
30731 # look for a possible ending ? on this line..
30733 my $quote_depth = 0;
30734 my $quote_character = '';
30738 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
30741 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
30742 $quote_pos, $quote_depth, $max_token_index );
30746 # we didn't find an ending ? on this line,
30747 # so we bias towards conditional
30749 $msg .= "conditional (no ending ? on this line)\n";
30751 # we found an ending ?, so we bias towards a pattern
30755 # Watch out for an ending ? in quotes, like this
30756 # my $case_flag = File::Spec->case_tolerant ? '(?i)' : '';
30760 foreach my $ii ( $ibeg + 1 .. $i - 1 ) {
30761 my $tok = $rtokens->[$ii];
30762 if ( $tok eq ":" ) { $colons++ }
30763 if ( $tok eq "'" ) { $s_quote++ }
30764 if ( $tok eq '"' ) { $d_quote++ }
30766 if ( $s_quote % 2 || $d_quote % 2 || $colons ) {
30768 $msg .= "found ending ? but unbalanced quote chars\n";
30770 elsif ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
30772 $msg .= "pattern (found ending ? and pattern expected)\n";
30775 $msg .= "pattern (uncertain, but found ending ?)\n";
30779 return ( $is_pattern, $msg );
30782 sub guess_if_pattern_or_division {
30784 # this routine is called when we have encountered a / following an
30785 # unknown bareword, and we must decide if it starts a pattern or is a
30787 # input parameters:
30788 # $i - token index of the / starting possible pattern
30789 # output parameters:
30790 # $is_pattern = 0 if probably division, =1 if probably a pattern
30791 # msg = a warning or diagnostic message
30792 # USES GLOBAL VARIABLES: $last_nonblank_token
30793 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
30794 my $is_pattern = 0;
30795 my $msg = "guessing that / after $last_nonblank_token starts a ";
30797 if ( $i >= $max_token_index ) {
30798 $msg .= "division (no end to pattern found on the line)\n";
30802 my $divide_expected =
30803 numerator_expected( $i, $rtokens, $max_token_index );
30805 my $next_token = $rtokens->[$i]; # first token after slash
30807 # look for a possible ending / on this line..
30809 my $quote_depth = 0;
30810 my $quote_character = '';
30814 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
30817 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
30818 $quote_pos, $quote_depth, $max_token_index );
30822 # we didn't find an ending / on this line,
30823 # so we bias towards division
30824 if ( $divide_expected >= 0 ) {
30826 $msg .= "division (no ending / on this line)\n";
30829 $msg = "multi-line pattern (division not possible)\n";
30835 # we found an ending /, so we bias towards a pattern
30838 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
30840 if ( $divide_expected >= 0 ) {
30842 if ( $i - $ibeg > 60 ) {
30843 $msg .= "division (matching / too distant)\n";
30847 $msg .= "pattern (but division possible too)\n";
30853 $msg .= "pattern (division not possible)\n";
30858 if ( $divide_expected >= 0 ) {
30860 $msg .= "division (pattern not possible)\n";
30865 "pattern (uncertain, but division would not work here)\n";
30870 return ( $is_pattern, $msg );
30873 # try to resolve here-doc vs. shift by looking ahead for
30874 # non-code or the end token (currently only looks for end token)
30875 # returns 1 if it is probably a here doc, 0 if not
30876 sub guess_if_here_doc {
30878 # This is how many lines we will search for a target as part of the
30879 # guessing strategy. It is a constant because there is probably
30880 # little reason to change it.
30881 # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
30883 my $HERE_DOC_WINDOW = 40;
30885 my $next_token = shift;
30886 my $here_doc_expected = 0;
30889 my $msg = "checking <<";
30891 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
30895 if ( $line =~ /^$next_token$/ ) {
30896 $msg .= " -- found target $next_token ahead $k lines\n";
30897 $here_doc_expected = 1; # got it
30900 last if ( $k >= $HERE_DOC_WINDOW );
30903 unless ($here_doc_expected) {
30905 if ( !defined($line) ) {
30906 $here_doc_expected = -1; # hit eof without seeing target
30907 $msg .= " -- must be shift; target $next_token not in file\n";
30910 else { # still unsure..taking a wild guess
30912 if ( !$is_constant{$current_package}{$next_token} ) {
30913 $here_doc_expected = 1;
30915 " -- guessing it's a here-doc ($next_token not a constant)\n";
30919 " -- guessing it's a shift ($next_token is a constant)\n";
30923 write_logfile_entry($msg);
30924 return $here_doc_expected;
30927 #########i#############################################################
30928 # Tokenizer Routines for scanning identifiers and related items
30929 #######################################################################
30931 sub scan_bare_identifier_do {
30933 # this routine is called to scan a token starting with an alphanumeric
30934 # variable or package separator, :: or '.
30935 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
30936 # $last_nonblank_type,@paren_type, $paren_depth
30938 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
30942 my $package = undef;
30946 # we have to back up one pretoken at a :: since each : is one pretoken
30947 if ( $tok eq '::' ) { $i_beg-- }
30948 if ( $tok eq '->' ) { $i_beg-- }
30949 my $pos_beg = $rtoken_map->[$i_beg];
30950 pos($input_line) = $pos_beg;
30957 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
30959 my $pos = pos($input_line);
30960 my $numc = $pos - $pos_beg;
30961 $tok = substr( $input_line, $pos_beg, $numc );
30963 # type 'w' includes anything without leading type info
30964 # ($,%,@,*) including something like abc::def::ghi
30968 if ( defined($2) ) { $sub_name = $2; }
30969 if ( defined($1) ) {
30972 # patch: don't allow isolated package name which just ends
30973 # in the old style package separator (single quote). Example:
30975 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
30979 $package =~ s/\'/::/g;
30980 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
30981 $package =~ s/::$//;
30984 $package = $current_package;
30986 if ( $is_keyword{$tok} ) {
30991 # if it is a bareword..
30992 if ( $type eq 'w' ) {
30994 # check for v-string with leading 'v' type character
30995 # (This seems to have precedence over filehandle, type 'Y')
30996 if ( $tok =~ /^v\d[_\d]*$/ ) {
30998 # we only have the first part - something like 'v101' -
31000 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
31001 $pos = pos($input_line);
31002 $numc = $pos - $pos_beg;
31003 $tok = substr( $input_line, $pos_beg, $numc );
31007 # warn if this version can't handle v-strings
31008 report_v_string($tok);
31011 elsif ( $is_constant{$package}{$sub_name} ) {
31015 # bareword after sort has implied empty prototype; for example:
31016 # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
31017 # This has priority over whatever the user has specified.
31018 elsif ($last_nonblank_token eq 'sort'
31019 && $last_nonblank_type eq 'k' )
31024 # Note: strangely, perl does not seem to really let you create
31025 # functions which act like eval and do, in the sense that eval
31026 # and do may have operators following the final }, but any operators
31027 # that you create with prototype (&) apparently do not allow
31028 # trailing operators, only terms. This seems strange.
31029 # If this ever changes, here is the update
31030 # to make perltidy behave accordingly:
31032 # elsif ( $is_block_function{$package}{$tok} ) {
31033 # $tok='eval'; # patch to do braces like eval - doesn't work
31036 # FIXME: This could become a separate type to allow for different
31038 elsif ( $is_block_function{$package}{$sub_name} ) {
31042 elsif ( $is_block_list_function{$package}{$sub_name} ) {
31045 elsif ( $is_user_function{$package}{$sub_name} ) {
31047 $prototype = $user_function_prototype{$package}{$sub_name};
31050 # check for indirect object
31053 # added 2001-03-27: must not be followed immediately by '('
31055 ( $input_line !~ m/\G\(/gc )
31060 # preceded by keyword like 'print', 'printf' and friends
31061 $is_indirect_object_taker{$last_nonblank_token}
31063 # or preceded by something like 'print(' or 'printf('
31065 ( $last_nonblank_token eq '(' )
31066 && $is_indirect_object_taker{ $paren_type[$paren_depth]
31074 # may not be indirect object unless followed by a space
31075 if ( $input_line =~ m/\G\s+/gc ) {
31079 # Perl's indirect object notation is a very bad
31080 # thing and can cause subtle bugs, especially for
31081 # beginning programmers. And I haven't even been
31082 # able to figure out a sane warning scheme which
31083 # doesn't get in the way of good scripts.
31085 # Complain if a filehandle has any lower case
31086 # letters. This is suggested good practice.
31087 # Use 'sub_name' because something like
31088 # main::MYHANDLE is ok for filehandle
31089 if ( $sub_name =~ /[a-z]/ ) {
31091 # could be bug caused by older perltidy if
31093 if ( $input_line =~ m/\G\s*\(/gc ) {
31095 "Caution: unknown word '$tok' in indirect object slot\n"
31101 # bareword not followed by a space -- may not be filehandle
31102 # (may be function call defined in a 'use' statement)
31109 # Now we must convert back from character position
31110 # to pre_token index.
31111 # I don't think an error flag can occur here ..but who knows
31114 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
31116 warning("scan_bare_identifier: Possibly invalid tokenization\n");
31120 # no match but line not blank - could be syntax error
31121 # perl will take '::' alone without complaint
31125 # change this warning to log message if it becomes annoying
31126 warning("didn't find identifier after leading ::\n");
31128 return ( $i, $tok, $type, $prototype );
31133 # This is the new scanner and will eventually replace scan_identifier.
31134 # Only type 'sub' and 'package' are implemented.
31135 # Token types $ * % @ & -> are not yet implemented.
31137 # Scan identifier following a type token.
31138 # The type of call depends on $id_scan_state: $id_scan_state = ''
31139 # for starting call, in which case $tok must be the token defining
31142 # If the type token is the last nonblank token on the line, a value
31143 # of $id_scan_state = $tok is returned, indicating that further
31144 # calls must be made to get the identifier. If the type token is
31145 # not the last nonblank token on the line, the identifier is
31146 # scanned and handled and a value of '' is returned.
31147 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
31148 # $statement_type, $tokenizer_self
31150 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
31154 my ( $i_beg, $pos_beg );
31156 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
31157 #my ($a,$b,$c) = caller;
31158 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
31160 # on re-entry, start scanning at first token on the line
31161 if ($id_scan_state) {
31166 # on initial entry, start scanning just after type token
31169 $id_scan_state = $tok;
31173 # find $i_beg = index of next nonblank token,
31174 # and handle empty lines
31175 my $blank_line = 0;
31176 my $next_nonblank_token = $rtokens->[$i_beg];
31177 if ( $i_beg > $max_token_index ) {
31182 # only a '#' immediately after a '$' is not a comment
31183 if ( $next_nonblank_token eq '#' ) {
31184 unless ( $tok eq '$' ) {
31189 if ( $next_nonblank_token =~ /^\s/ ) {
31190 ( $next_nonblank_token, $i_beg ) =
31191 find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
31192 $max_token_index );
31193 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
31199 # handle non-blank line; identifier, if any, must follow
31200 unless ($blank_line) {
31202 if ( $id_scan_state eq 'sub' ) {
31203 ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
31204 $input_line, $i, $i_beg,
31205 $tok, $type, $rtokens,
31206 $rtoken_map, $id_scan_state, $max_token_index
31210 elsif ( $id_scan_state eq 'package' ) {
31211 ( $i, $tok, $type ) =
31212 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
31213 $rtoken_map, $max_token_index );
31214 $id_scan_state = '';
31218 warning("invalid token in scan_id: $tok\n");
31219 $id_scan_state = '';
31223 if ( $id_scan_state && ( !defined($type) || !$type ) ) {
31225 # shouldn't happen:
31227 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
31229 report_definite_bug();
31232 TOKENIZER_DEBUG_FLAG_NSCAN && do {
31234 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
31236 return ( $i, $tok, $type, $id_scan_state );
31239 sub check_prototype {
31240 my ( $proto, $package, $subname ) = @_;
31241 return unless ( defined($package) && defined($subname) );
31242 if ( defined($proto) ) {
31243 $proto =~ s/^\s*\(\s*//;
31244 $proto =~ s/\s*\)$//;
31246 $is_user_function{$package}{$subname} = 1;
31247 $user_function_prototype{$package}{$subname} = "($proto)";
31249 # prototypes containing '&' must be treated specially..
31250 if ( $proto =~ /\&/ ) {
31252 # right curly braces of prototypes ending in
31253 # '&' may be followed by an operator
31254 if ( $proto =~ /\&$/ ) {
31255 $is_block_function{$package}{$subname} = 1;
31258 # right curly braces of prototypes NOT ending in
31259 # '&' may NOT be followed by an operator
31260 elsif ( $proto !~ /\&$/ ) {
31261 $is_block_list_function{$package}{$subname} = 1;
31266 $is_constant{$package}{$subname} = 1;
31270 $is_user_function{$package}{$subname} = 1;
31275 sub do_scan_package {
31277 # do_scan_package parses a package name
31278 # it is called with $i_beg equal to the index of the first nonblank
31279 # token following a 'package' token.
31280 # USES GLOBAL VARIABLES: $current_package,
31282 # package NAMESPACE
31283 # package NAMESPACE VERSION
31284 # package NAMESPACE BLOCK
31285 # package NAMESPACE VERSION BLOCK
31287 # If VERSION is provided, package sets the $VERSION variable in the given
31288 # namespace to a version object with the VERSION provided. VERSION must be
31289 # a "strict" style version number as defined by the version module: a
31290 # positive decimal number (integer or decimal-fraction) without
31291 # exponentiation or else a dotted-decimal v-string with a leading 'v'
31292 # character and at least three components.
31293 # reference http://perldoc.perl.org/functions/package.html
31295 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
31298 my $package = undef;
31299 my $pos_beg = $rtoken_map->[$i_beg];
31300 pos($input_line) = $pos_beg;
31302 # handle non-blank line; package name, if any, must follow
31303 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
31305 $package = ( defined($1) && $1 ) ? $1 : 'main';
31306 $package =~ s/\'/::/g;
31307 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
31308 $package =~ s/::$//;
31309 my $pos = pos($input_line);
31310 my $numc = $pos - $pos_beg;
31311 $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
31314 # Now we must convert back from character position
31315 # to pre_token index.
31316 # I don't think an error flag can occur here ..but ?
31319 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
31320 if ($error) { warning("Possibly invalid package\n") }
31321 $current_package = $package;
31323 # we should now have package NAMESPACE
31324 # now expecting VERSION, BLOCK, or ; to follow ...
31325 # package NAMESPACE VERSION
31326 # package NAMESPACE BLOCK
31327 # package NAMESPACE VERSION BLOCK
31328 my ( $next_nonblank_token, $i_next ) =
31329 find_next_nonblank_token( $i, $rtokens, $max_token_index );
31331 # check that something recognizable follows, but do not parse.
31332 # A VERSION number will be parsed later as a number or v-string in the
31333 # normal way. What is important is to set the statement type if
31334 # everything looks okay so that the operator_expected() routine
31335 # knows that the number is in a package statement.
31336 # Examples of valid primitive tokens that might follow are:
31337 # 1235 . ; { } v3 v
31338 if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) {
31339 $statement_type = $tok;
31343 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
31348 # no match but line not blank --
31349 # could be a label with name package, like package: , for example.
31354 return ( $i, $tok, $type );
31357 sub scan_identifier_do {
31359 # This routine assembles tokens into identifiers. It maintains a
31360 # scan state, id_scan_state. It updates id_scan_state based upon
31361 # current id_scan_state and token, and returns an updated
31362 # id_scan_state and the next index after the identifier.
31363 # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
31364 # $last_nonblank_type
31366 my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
31367 $expecting, $container_type )
31371 my $tok_begin = $rtokens->[$i_begin];
31372 if ( $tok_begin eq ':' ) { $tok_begin = '::' }
31373 my $id_scan_state_begin = $id_scan_state;
31374 my $identifier_begin = $identifier;
31375 my $tok = $tok_begin;
31378 my $in_prototype_or_signature = $container_type =~ /^sub/;
31380 # these flags will be used to help figure out the type:
31381 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
31384 # allow old package separator (') except in 'use' statement
31385 my $allow_tick = ( $last_nonblank_token ne 'use' );
31387 # get started by defining a type and a state if necessary
31388 unless ($id_scan_state) {
31389 $context = UNKNOWN_CONTEXT;
31391 # fixup for digraph
31392 if ( $tok eq '>' ) {
31396 $identifier = $tok;
31398 if ( $tok eq '$' || $tok eq '*' ) {
31399 $id_scan_state = '$';
31400 $context = SCALAR_CONTEXT;
31402 elsif ( $tok eq '%' || $tok eq '@' ) {
31403 $id_scan_state = '$';
31404 $context = LIST_CONTEXT;
31406 elsif ( $tok eq '&' ) {
31407 $id_scan_state = '&';
31409 elsif ( $tok eq 'sub' or $tok eq 'package' ) {
31410 $saw_alpha = 0; # 'sub' is considered type info here
31411 $id_scan_state = '$';
31412 $identifier .= ' '; # need a space to separate sub from sub name
31414 elsif ( $tok eq '::' ) {
31415 $id_scan_state = 'A';
31417 elsif ( $tok =~ /^[A-Za-z_]/ ) {
31418 $id_scan_state = ':';
31420 elsif ( $tok eq '->' ) {
31421 $id_scan_state = '$';
31426 my ( $a, $b, $c ) = caller;
31427 warning("Program Bug: scan_identifier given bad token = $tok \n");
31428 warning(" called from sub $a line: $c\n");
31429 report_definite_bug();
31431 $saw_type = !$saw_alpha;
31435 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
31438 # now loop to gather the identifier
31441 while ( $i < $max_token_index ) {
31442 $i_save = $i unless ( $tok =~ /^\s*$/ );
31443 $tok = $rtokens->[ ++$i ];
31445 if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
31450 if ( $id_scan_state eq '$' ) { # starting variable name
31452 if ( $tok eq '$' ) {
31454 $identifier .= $tok;
31456 # we've got a punctuation variable if end of line (punct.t)
31457 if ( $i == $max_token_index ) {
31459 $id_scan_state = '';
31464 # POSTDEFREF ->@ ->% ->& ->*
31465 elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
31466 $identifier .= $tok;
31468 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
31470 $id_scan_state = ':'; # now need ::
31471 $identifier .= $tok;
31473 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
31475 $id_scan_state = ':'; # now need ::
31476 $identifier .= $tok;
31478 # Perl will accept leading digits in identifiers,
31479 # although they may not always produce useful results.
31480 # Something like $main::0 is ok. But this also works:
31482 # sub howdy::123::bubba{ print "bubba $54321!\n" }
31483 # howdy::123::bubba();
31486 elsif ( $tok =~ /^[0-9]/ ) { # numeric
31488 $id_scan_state = ':'; # now need ::
31489 $identifier .= $tok;
31491 elsif ( $tok eq '::' ) {
31492 $id_scan_state = 'A';
31493 $identifier .= $tok;
31496 # $# and POSTDEFREF ->$#
31497 elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array
31498 $identifier .= $tok; # keep same state, a $ could follow
31500 elsif ( $tok eq '{' ) {
31502 # check for something like ${#} or ${©}
31506 || $identifier eq '@'
31507 || $identifier eq '$#'
31509 && $i + 2 <= $max_token_index
31510 && $rtokens->[ $i + 2 ] eq '}'
31511 && $rtokens->[ $i + 1 ] !~ /[\s\w]/
31514 my $next2 = $rtokens->[ $i + 2 ];
31515 my $next1 = $rtokens->[ $i + 1 ];
31516 $identifier .= $tok . $next1 . $next2;
31518 $id_scan_state = '';
31522 # skip something like ${xxx} or ->{
31523 $id_scan_state = '';
31525 # if this is the first token of a line, any tokens for this
31526 # identifier have already been accumulated
31527 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
31532 # space ok after leading $ % * & @
31533 elsif ( $tok =~ /^\s*$/ ) {
31535 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
31537 if ( length($identifier) > 1 ) {
31538 $id_scan_state = '';
31540 $type = 'i'; # probably punctuation variable
31545 # spaces after $'s are common, and space after @
31546 # is harmless, so only complain about space
31547 # after other type characters. Space after $ and
31548 # @ will be removed in formatting. Report space
31549 # after % and * because they might indicate a
31550 # parsing error. In other words '% ' might be a
31551 # modulo operator. Delete this warning if it
31553 if ( $identifier !~ /^[\@\$]$/ ) {
31555 "Space in identifier, following $identifier\n";
31561 # space after '->' is ok
31563 elsif ( $tok eq '^' ) {
31565 # check for some special variables like $^W
31566 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
31567 $identifier .= $tok;
31568 $id_scan_state = 'A';
31570 # Perl accepts '$^]' or '@^]', but
31571 # there must not be a space before the ']'.
31572 my $next1 = $rtokens->[ $i + 1 ];
31573 if ( $next1 eq ']' ) {
31575 $identifier .= $next1;
31576 $id_scan_state = "";
31581 $id_scan_state = '';
31584 else { # something else
31586 if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) {
31587 $id_scan_state = '';
31589 $type = 'i'; # probably punctuation variable
31593 # check for various punctuation variables
31594 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
31595 $identifier .= $tok;
31598 # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
31599 elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) {
31600 $identifier .= $tok;
31603 elsif ( $identifier eq '$#' ) {
31605 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
31607 # perl seems to allow just these: $#: $#- $#+
31608 elsif ( $tok =~ /^[\:\-\+]$/ ) {
31610 $identifier .= $tok;
31614 write_logfile_entry( 'Use of $# is deprecated' . "\n" );
31617 elsif ( $identifier eq '$$' ) {
31619 # perl does not allow references to punctuation
31620 # variables without braces. For example, this
31624 # You would have to use
31628 if ( $tok eq '{' ) { $type = 't' }
31629 else { $type = 'i' }
31631 elsif ( $identifier eq '->' ) {
31636 if ( length($identifier) == 1 ) { $identifier = ''; }
31638 $id_scan_state = '';
31642 elsif ( $id_scan_state eq '&' ) { # starting sub call?
31644 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric ..
31645 $id_scan_state = ':'; # now need ::
31647 $identifier .= $tok;
31649 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
31650 $id_scan_state = ':'; # now need ::
31652 $identifier .= $tok;
31654 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
31655 $id_scan_state = ':'; # now need ::
31657 $identifier .= $tok;
31659 elsif ( $tok =~ /^\s*$/ ) { # allow space
31661 elsif ( $tok eq '::' ) { # leading ::
31662 $id_scan_state = 'A'; # accept alpha next
31663 $identifier .= $tok;
31665 elsif ( $tok eq '{' ) {
31666 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
31668 $id_scan_state = '';
31673 # punctuation variable?
31674 # testfile: cunningham4.pl
31676 # We have to be careful here. If we are in an unknown state,
31677 # we will reject the punctuation variable. In the following
31678 # example the '&' is a binary operator but we are in an unknown
31679 # state because there is no sigil on 'Prima', so we don't
31680 # know what it is. But it is a bad guess that
31681 # '&~' is a function variable.
31682 # $self->{text}->{colorMap}->[
31683 # Prima::PodView::COLOR_CODE_FOREGROUND
31684 # & ~tb::COLOR_INDEX ] =
31685 # $sec->{ColorCode}
31686 if ( $identifier eq '&' && $expecting ) {
31687 $identifier .= $tok;
31694 $id_scan_state = '';
31698 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)
31700 if ( $tok =~ /^[A-Za-z_]/ ) { # found it
31701 $identifier .= $tok;
31702 $id_scan_state = ':'; # now need ::
31705 elsif ( $tok eq "'" && $allow_tick ) {
31706 $identifier .= $tok;
31707 $id_scan_state = ':'; # now need ::
31710 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
31711 $identifier .= $tok;
31712 $id_scan_state = ':'; # now need ::
31715 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
31716 $id_scan_state = '(';
31717 $identifier .= $tok;
31719 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
31720 $id_scan_state = ')';
31721 $identifier .= $tok;
31724 $id_scan_state = '';
31729 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha
31731 if ( $tok eq '::' ) { # got it
31732 $identifier .= $tok;
31733 $id_scan_state = 'A'; # now require alpha
31735 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here
31736 $identifier .= $tok;
31737 $id_scan_state = ':'; # now need ::
31740 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
31741 $identifier .= $tok;
31742 $id_scan_state = ':'; # now need ::
31745 elsif ( $tok eq "'" && $allow_tick ) { # tick
31747 if ( $is_keyword{$identifier} ) {
31748 $id_scan_state = ''; # that's all
31752 $identifier .= $tok;
31755 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
31756 $id_scan_state = '(';
31757 $identifier .= $tok;
31759 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
31760 $id_scan_state = ')';
31761 $identifier .= $tok;
31764 $id_scan_state = ''; # that's all
31769 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype
31771 if ( $tok eq '(' ) { # got it
31772 $identifier .= $tok;
31773 $id_scan_state = ')'; # now find the end of it
31775 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
31776 $identifier .= $tok;
31779 $id_scan_state = ''; # that's all - no prototype
31784 elsif ( $id_scan_state eq ')' ) { # looking for ) to end
31786 if ( $tok eq ')' ) { # got it
31787 $identifier .= $tok;
31788 $id_scan_state = ''; # all done
31791 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
31792 $identifier .= $tok;
31794 else { # probable error in script, but keep going
31795 warning("Unexpected '$tok' while seeking end of prototype\n");
31796 $identifier .= $tok;
31799 else { # can get here due to error in initialization
31800 $id_scan_state = '';
31806 if ( $id_scan_state eq ')' ) {
31807 warning("Hit end of line while seeking ) to end prototype\n");
31810 # once we enter the actual identifier, it may not extend beyond
31811 # the end of the current line
31812 if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
31813 $id_scan_state = '';
31815 if ( $i < 0 ) { $i = 0 }
31822 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
31825 else { $type = 'i' }
31827 elsif ( $identifier eq '->' ) {
31831 ( length($identifier) > 1 )
31833 # In something like '@$=' we have an identifier '@$'
31834 # In something like '$${' we have type '$$' (and only
31835 # part of an identifier)
31836 && !( $identifier =~ /\$$/ && $tok eq '{' )
31837 && ( $identifier !~ /^(sub |package )$/ )
31842 else { $type = 't' }
31844 elsif ($saw_alpha) {
31846 # type 'w' includes anything without leading type info
31847 # ($,%,@,*) including something like abc::def::ghi
31852 } # this can happen on a restart
31856 $tok = $identifier;
31857 if ($message) { write_logfile_entry($message) }
31864 TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
31865 my ( $a, $b, $c ) = caller;
31867 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
31869 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
31871 return ( $i, $tok, $type, $id_scan_state, $identifier );
31876 # saved package and subnames in case prototype is on separate line
31877 my ( $package_saved, $subname_saved );
31881 # do_scan_sub parses a sub name and prototype
31882 # it is called with $i_beg equal to the index of the first nonblank
31883 # token following a 'sub' token.
31885 # TODO: add future error checks to be sure we have a valid
31886 # sub name. For example, 'sub &doit' is wrong. Also, be sure
31887 # a name is given if and only if a non-anonymous sub is
31889 # USES GLOBAL VARS: $current_package, $last_nonblank_token,
31890 # $in_attribute_list, %saw_function_definition,
31894 $input_line, $i, $i_beg,
31895 $tok, $type, $rtokens,
31896 $rtoken_map, $id_scan_state, $max_token_index
31898 $id_scan_state = ""; # normally we get everything in one call
31899 my $subname = undef;
31900 my $package = undef;
31905 my $pos_beg = $rtoken_map->[$i_beg];
31906 pos($input_line) = $pos_beg;
31908 # Look for the sub NAME
31910 $input_line =~ m/\G\s*
31911 ((?:\w*(?:'|::))*) # package - something that ends in :: or '
31912 (\w+) # NAME - required
31919 $package = ( defined($1) && $1 ) ? $1 : $current_package;
31920 $package =~ s/\'/::/g;
31921 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
31922 $package =~ s/::$//;
31923 my $pos = pos($input_line);
31924 my $numc = $pos - $pos_beg;
31925 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
31929 # Now look for PROTO ATTRS
31930 # Look for prototype/attributes which are usually on the same
31931 # line as the sub name but which might be on a separate line.
31932 # For example, we might have an anonymous sub with attributes,
31933 # or a prototype on a separate line from its sub name
31935 # NOTE: We only want to parse PROTOTYPES here. If we see anything that
31936 # does not look like a prototype, we assume it is a SIGNATURE and we
31937 # will stop and let the the standard tokenizer handle it. In
31938 # particular, we stop if we see any nested parens, braces, or commas.
31939 my $saw_opening_paren = $input_line =~ /\G\s*\(/;
31941 $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO
31942 (\s*:)? # ATTRS leading ':'
31950 # If we also found the sub name on this call then append PROTO.
31951 # This is not necessary but for compatability with previous
31952 # versions when the -csc flag is used:
31953 if ( $match && $proto ) {
31958 # Handle prototype on separate line from subname
31959 if ($subname_saved) {
31960 $package = $package_saved;
31961 $subname = $subname_saved;
31962 $tok = $last_nonblank_token;
31969 # ATTRS: if there are attributes, back up and let the ':' be
31970 # found later by the scanner.
31971 my $pos = pos($input_line);
31973 $pos -= length($attrs);
31976 my $next_nonblank_token = $tok;
31978 # catch case of line with leading ATTR ':' after anonymous sub
31979 if ( $pos == $pos_beg && $tok eq ':' ) {
31981 $in_attribute_list = 1;
31984 # Otherwise, if we found a match we must convert back from
31985 # string position to the pre_token index for continued parsing.
31988 # I don't think an error flag can occur here ..but ?
31990 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
31991 $max_token_index );
31992 if ($error) { warning("Possibly invalid sub\n") }
31994 # check for multiple definitions of a sub
31995 ( $next_nonblank_token, my $i_next ) =
31996 find_next_nonblank_token_on_this_line( $i, $rtokens,
31997 $max_token_index );
32000 if ( $next_nonblank_token =~ /^(\s*|#)$/ )
32001 { # skip blank or side comment
32002 my ( $rpre_tokens, $rpre_types ) =
32003 peek_ahead_for_n_nonblank_pre_tokens(1);
32004 if ( defined($rpre_tokens) && @{$rpre_tokens} ) {
32005 $next_nonblank_token = $rpre_tokens->[0];
32008 $next_nonblank_token = '}';
32011 $package_saved = "";
32012 $subname_saved = "";
32014 # See what's next...
32015 if ( $next_nonblank_token eq '{' ) {
32018 # Check for multiple definitions of a sub, but
32019 # it is ok to have multiple sub BEGIN, etc,
32020 # so we do not complain if name is all caps
32021 if ( $saw_function_definition{$package}{$subname}
32022 && $subname !~ /^[A-Z]+$/ )
32024 my $lno = $saw_function_definition{$package}{$subname};
32026 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
32029 $saw_function_definition{$package}{$subname} =
32030 $tokenizer_self->{_last_line_number};
32033 elsif ( $next_nonblank_token eq ';' ) {
32035 elsif ( $next_nonblank_token eq '}' ) {
32038 # ATTRS - if an attribute list follows, remember the name
32039 # of the sub so the next opening brace can be labeled.
32040 # Setting 'statement_type' causes any ':'s to introduce
32042 elsif ( $next_nonblank_token eq ':' ) {
32043 $statement_type = $tok;
32046 # if we stopped before an open paren ...
32047 elsif ( $next_nonblank_token eq '(' ) {
32049 # If we DID NOT see this paren above then it must be on the
32050 # next line so we will set a flag to come back here and see if
32051 # it is a PROTOTYPE
32053 # Otherwise, we assume it is a SIGNATURE rather than a
32054 # PROTOTYPE and let the normal tokenizer handle it as a list
32055 if ( !$saw_opening_paren ) {
32056 $id_scan_state = 'sub'; # we must come back to get proto
32057 $package_saved = $package;
32058 $subname_saved = $subname;
32060 $statement_type = $tok;
32062 elsif ($next_nonblank_token) { # EOF technically ok
32064 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
32067 check_prototype( $proto, $package, $subname );
32070 # no match but line not blank
32073 return ( $i, $tok, $type, $id_scan_state );
32077 #########i###############################################################
32078 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
32079 #########################################################################
32081 sub find_next_nonblank_token {
32082 my ( $i, $rtokens, $max_token_index ) = @_;
32084 if ( $i >= $max_token_index ) {
32085 if ( !peeked_ahead() ) {
32088 peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
32091 my $next_nonblank_token = $rtokens->[ ++$i ];
32093 if ( $next_nonblank_token =~ /^\s*$/ ) {
32094 $next_nonblank_token = $rtokens->[ ++$i ];
32096 return ( $next_nonblank_token, $i );
32099 sub numerator_expected {
32101 # this is a filter for a possible numerator, in support of guessing
32102 # for the / pattern delimiter token.
32107 # Note: I am using the convention that variables ending in
32108 # _expected have these 3 possible values.
32109 my ( $i, $rtokens, $max_token_index ) = @_;
32110 my $numerator_expected = 0;
32112 my $next_token = $rtokens->[ $i + 1 ];
32113 if ( $next_token eq '=' ) { $i++; } # handle /=
32114 my ( $next_nonblank_token, $i_next ) =
32115 find_next_nonblank_token( $i, $rtokens, $max_token_index );
32117 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
32118 $numerator_expected = 1;
32122 if ( $next_nonblank_token =~ /^\s*$/ ) {
32123 $numerator_expected = 0;
32126 $numerator_expected = -1;
32129 return $numerator_expected;
32132 sub pattern_expected {
32134 # This is the start of a filter for a possible pattern.
32135 # It looks at the token after a possible pattern and tries to
32136 # determine if that token could end a pattern.
32141 my ( $i, $rtokens, $max_token_index ) = @_;
32142 my $is_pattern = 0;
32144 my $next_token = $rtokens->[ $i + 1 ];
32145 if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier
32146 my ( $next_nonblank_token, $i_next ) =
32147 find_next_nonblank_token( $i, $rtokens, $max_token_index );
32149 # list of tokens which may follow a pattern
32150 # (can probably be expanded)
32151 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
32157 if ( $next_nonblank_token =~ /^\s*$/ ) {
32164 return $is_pattern;
32167 sub find_next_nonblank_token_on_this_line {
32168 my ( $i, $rtokens, $max_token_index ) = @_;
32169 my $next_nonblank_token;
32171 if ( $i < $max_token_index ) {
32172 $next_nonblank_token = $rtokens->[ ++$i ];
32174 if ( $next_nonblank_token =~ /^\s*$/ ) {
32176 if ( $i < $max_token_index ) {
32177 $next_nonblank_token = $rtokens->[ ++$i ];
32182 $next_nonblank_token = "";
32184 return ( $next_nonblank_token, $i );
32187 sub find_angle_operator_termination {
32189 # We are looking at a '<' and want to know if it is an angle operator.
32190 # We are to return:
32191 # $i = pretoken index of ending '>' if found, current $i otherwise
32192 # $type = 'Q' if found, '>' otherwise
32193 my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
32196 pos($input_line) = 1 + $rtoken_map->[$i];
32200 # we just have to find the next '>' if a term is expected
32201 if ( $expecting == TERM ) { $filter = '[\>]' }
32203 # we have to guess if we don't know what is expected
32204 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
32206 # shouldn't happen - we shouldn't be here if operator is expected
32207 else { warning("Program Bug in find_angle_operator_termination\n") }
32209 # To illustrate what we might be looking at, in case we are
32210 # guessing, here are some examples of valid angle operators
32217 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
32218 # <${PREFIX}*img*.$IMAGE_TYPE>
32219 # <img*.$IMAGE_TYPE>
32220 # <Timg*.$IMAGE_TYPE>
32221 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
32223 # Here are some examples of lines which do not have angle operators:
32224 # return undef unless $self->[2]++ < $#{$self->[1]};
32227 # the following line from dlister.pl caused trouble:
32228 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
32230 # If the '<' starts an angle operator, it must end on this line and
32231 # it must not have certain characters like ';' and '=' in it. I use
32232 # this to limit the testing. This filter should be improved if
32235 if ( $input_line =~ /($filter)/g ) {
32239 # We MAY have found an angle operator termination if we get
32240 # here, but we need to do more to be sure we haven't been
32242 my $pos = pos($input_line);
32244 my $pos_beg = $rtoken_map->[$i];
32245 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
32247 # Reject if the closing '>' follows a '-' as in:
32248 # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
32249 if ( $expecting eq UNKNOWN ) {
32250 my $check = substr( $input_line, $pos - 2, 1 );
32251 if ( $check eq '-' ) {
32252 return ( $i, $type );
32256 ######################################debug#####
32257 #write_diagnostics( "ANGLE? :$str\n");
32258 #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
32259 ######################################debug#####
32263 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
32265 # It may be possible that a quote ends midway in a pretoken.
32266 # If this happens, it may be necessary to split the pretoken.
32269 "Possible tokinization error..please check this line\n");
32270 report_possible_bug();
32273 # Now let's see where we stand....
32274 # OK if math op not possible
32275 if ( $expecting == TERM ) {
32278 # OK if there are no more than 2 pre-tokens inside
32279 # (not possible to write 2 token math between < and >)
32280 # This catches most common cases
32281 elsif ( $i <= $i_beg + 3 ) {
32282 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
32288 # Let's try a Brace Test: any braces inside must balance
32290 while ( $str =~ /\{/g ) { $br++ }
32291 while ( $str =~ /\}/g ) { $br-- }
32293 while ( $str =~ /\[/g ) { $sb++ }
32294 while ( $str =~ /\]/g ) { $sb-- }
32296 while ( $str =~ /\(/g ) { $pr++ }
32297 while ( $str =~ /\)/g ) { $pr-- }
32299 # if braces do not balance - not angle operator
32300 if ( $br || $sb || $pr ) {
32304 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
32307 # we should keep doing more checks here...to be continued
32308 # Tentatively accepting this as a valid angle operator.
32309 # There are lots more things that can be checked.
32312 "ANGLE-Guessing yes: $str expecting=$expecting\n");
32313 write_logfile_entry("Guessing angle operator here: $str\n");
32318 # didn't find ending >
32320 if ( $expecting == TERM ) {
32321 warning("No ending > for angle operator\n");
32325 return ( $i, $type );
32328 sub scan_number_do {
32330 # scan a number in any of the formats that Perl accepts
32331 # Underbars (_) are allowed in decimal numbers.
32332 # input parameters -
32333 # $input_line - the string to scan
32334 # $i - pre_token index to start scanning
32335 # $rtoken_map - reference to the pre_token map giving starting
32336 # character position in $input_line of token $i
32337 # output parameters -
32338 # $i - last pre_token index of the number just scanned
32339 # number - the number (characters); or undef if not a number
32341 my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
32342 my $pos_beg = $rtoken_map->[$i];
32345 my $number = undef;
32346 my $type = $input_type;
32348 my $first_char = substr( $input_line, $pos_beg, 1 );
32350 # Look for bad starting characters; Shouldn't happen..
32351 if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
32352 warning("Program bug - scan_number given character $first_char\n");
32353 report_definite_bug();
32354 return ( $i, $type, $number );
32357 # handle v-string without leading 'v' character ('Two Dot' rule)
32359 # TODO: v-strings may contain underscores
32360 pos($input_line) = $pos_beg;
32361 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
32362 $pos = pos($input_line);
32363 my $numc = $pos - $pos_beg;
32364 $number = substr( $input_line, $pos_beg, $numc );
32366 report_v_string($number);
32369 # handle octal, hex, binary
32370 if ( !defined($number) ) {
32371 pos($input_line) = $pos_beg;
32372 if ( $input_line =~
32373 /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
32375 $pos = pos($input_line);
32376 my $numc = $pos - $pos_beg;
32377 $number = substr( $input_line, $pos_beg, $numc );
32383 if ( !defined($number) ) {
32384 pos($input_line) = $pos_beg;
32386 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
32387 $pos = pos($input_line);
32389 # watch out for things like 0..40 which would give 0. by this;
32390 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
32391 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
32395 my $numc = $pos - $pos_beg;
32396 $number = substr( $input_line, $pos_beg, $numc );
32401 # filter out non-numbers like e + - . e2 .e3 +e6
32402 # the rule: at least one digit, and any 'e' must be preceded by a digit
32404 $number !~ /\d/ # no digits
32405 || ( $number =~ /^(.*)[eE]/
32406 && $1 !~ /\d/ ) # or no digits before the 'e'
32410 $type = $input_type;
32411 return ( $i, $type, $number );
32414 # Found a number; now we must convert back from character position
32415 # to pre_token index. An error here implies user syntax error.
32416 # An example would be an invalid octal number like '009'.
32419 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
32420 if ($error) { warning("Possibly invalid number\n") }
32422 return ( $i, $type, $number );
32425 sub inverse_pretoken_map {
32427 # Starting with the current pre_token index $i, scan forward until
32428 # finding the index of the next pre_token whose position is $pos.
32429 my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
32432 while ( ++$i <= $max_token_index ) {
32434 if ( $pos <= $rtoken_map->[$i] ) {
32436 # Let the calling routine handle errors in which we do not
32437 # land on a pre-token boundary. It can happen by running
32438 # perltidy on some non-perl scripts, for example.
32439 if ( $pos < $rtoken_map->[$i] ) { $error = 1 }
32444 return ( $i, $error );
32447 sub find_here_doc {
32449 # find the target of a here document, if any
32450 # input parameters:
32451 # $i - token index of the second < of <<
32452 # ($i must be less than the last token index if this is called)
32453 # output parameters:
32454 # $found_target = 0 didn't find target; =1 found target
32455 # HERE_TARGET - the target string (may be empty string)
32456 # $i - unchanged if not here doc,
32457 # or index of the last token of the here target
32458 # $saw_error - flag noting unbalanced quote on here target
32459 my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
32461 my $found_target = 0;
32462 my $here_doc_target = '';
32463 my $here_quote_character = '';
32465 my ( $next_nonblank_token, $i_next_nonblank, $next_token );
32466 $next_token = $rtokens->[ $i + 1 ];
32468 # perl allows a backslash before the target string (heredoc.t)
32470 if ( $next_token eq '\\' ) {
32472 $next_token = $rtokens->[ $i + 2 ];
32475 ( $next_nonblank_token, $i_next_nonblank ) =
32476 find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
32478 if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
32481 my $quote_depth = 0;
32486 $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
32489 = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
32490 $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
32492 if ($in_quote) { # didn't find end of quote, so no target found
32494 if ( $expecting == TERM ) {
32496 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
32501 else { # found ending quote
32506 foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) {
32507 $tokj = $rtokens->[$j];
32509 # we have to remove any backslash before the quote character
32510 # so that the here-doc-target exactly matches this string
32514 && $rtokens->[ $j + 1 ] eq $here_quote_character );
32515 $here_doc_target .= $tokj;
32520 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
32522 write_logfile_entry(
32523 "found blank here-target after <<; suggest using \"\"\n");
32526 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
32528 my $here_doc_expected;
32529 if ( $expecting == UNKNOWN ) {
32530 $here_doc_expected = guess_if_here_doc($next_token);
32533 $here_doc_expected = 1;
32536 if ($here_doc_expected) {
32538 $here_doc_target = $next_token;
32545 if ( $expecting == TERM ) {
32547 write_logfile_entry("Note: bare here-doc operator <<\n");
32554 # patch to neglect any prepended backslash
32555 if ( $found_target && $backslash ) { $i++ }
32557 return ( $found_target, $here_doc_target, $here_quote_character, $i,
32563 # follow (or continue following) quoted string(s)
32564 # $in_quote return code:
32565 # 0 - ok, found end
32566 # 1 - still must find end of quote whose target is $quote_character
32567 # 2 - still looking for end of first of two quotes
32569 # Returns updated strings:
32570 # $quoted_string_1 = quoted string seen while in_quote=1
32571 # $quoted_string_2 = quoted string seen while in_quote=2
32573 $i, $in_quote, $quote_character,
32574 $quote_pos, $quote_depth, $quoted_string_1,
32575 $quoted_string_2, $rtokens, $rtoken_map,
32579 my $in_quote_starting = $in_quote;
32582 if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
32585 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
32588 = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
32589 $quote_pos, $quote_depth, $max_token_index );
32590 $quoted_string_2 .= $quoted_string;
32591 if ( $in_quote == 1 ) {
32592 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
32593 $quote_character = '';
32596 $quoted_string_2 .= "\n";
32600 if ( $in_quote == 1 ) { # one (more) quote to follow
32603 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
32606 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
32607 $quote_pos, $quote_depth, $max_token_index );
32608 $quoted_string_1 .= $quoted_string;
32609 if ( $in_quote == 1 ) {
32610 $quoted_string_1 .= "\n";
32613 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
32614 $quoted_string_1, $quoted_string_2 );
32617 sub follow_quoted_string {
32619 # scan for a specific token, skipping escaped characters
32620 # if the quote character is blank, use the first non-blank character
32621 # input parameters:
32622 # $rtokens = reference to the array of tokens
32623 # $i = the token index of the first character to search
32624 # $in_quote = number of quoted strings being followed
32625 # $beginning_tok = the starting quote character
32626 # $quote_pos = index to check next for alphanumeric delimiter
32627 # output parameters:
32628 # $i = the token index of the ending quote character
32629 # $in_quote = decremented if found end, unchanged if not
32630 # $beginning_tok = the starting quote character
32631 # $quote_pos = index to check next for alphanumeric delimiter
32632 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
32633 # $quoted_string = the text of the quote (without quotation tokens)
32634 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
32637 my ( $tok, $end_tok );
32638 my $i = $i_beg - 1;
32639 my $quoted_string = "";
32641 TOKENIZER_DEBUG_FLAG_QUOTE && do {
32643 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
32646 # get the corresponding end token
32647 if ( $beginning_tok !~ /^\s*$/ ) {
32648 $end_tok = matching_end_token($beginning_tok);
32651 # a blank token means we must find and use the first non-blank one
32653 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
32655 while ( $i < $max_token_index ) {
32656 $tok = $rtokens->[ ++$i ];
32658 if ( $tok !~ /^\s*$/ ) {
32660 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
32661 $i = $max_token_index;
32665 if ( length($tok) > 1 ) {
32666 if ( $quote_pos <= 0 ) { $quote_pos = 1 }
32667 $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
32670 $beginning_tok = $tok;
32673 $end_tok = matching_end_token($beginning_tok);
32679 $allow_quote_comments = 1;
32684 # There are two different loops which search for the ending quote
32685 # character. In the rare case of an alphanumeric quote delimiter, we
32686 # have to look through alphanumeric tokens character-by-character, since
32687 # the pre-tokenization process combines multiple alphanumeric
32688 # characters, whereas for a non-alphanumeric delimiter, only tokens of
32689 # length 1 can match.
32691 ###################################################################
32692 # Case 1 (rare): loop for case of alphanumeric quote delimiter..
32693 # "quote_pos" is the position the current word to begin searching
32694 ###################################################################
32695 if ( $beginning_tok =~ /\w/ ) {
32697 # Note this because it is not recommended practice except
32698 # for obfuscated perl contests
32699 if ( $in_quote == 1 ) {
32700 write_logfile_entry(
32701 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
32704 while ( $i < $max_token_index ) {
32706 if ( $quote_pos == 0 || ( $i < 0 ) ) {
32707 $tok = $rtokens->[ ++$i ];
32709 if ( $tok eq '\\' ) {
32711 # retain backslash unless it hides the end token
32712 $quoted_string .= $tok
32713 unless $rtokens->[ $i + 1 ] eq $end_tok;
32715 last if ( $i >= $max_token_index );
32716 $tok = $rtokens->[ ++$i ];
32719 my $old_pos = $quote_pos;
32721 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
32725 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
32727 if ( $quote_pos > 0 ) {
32730 substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
32734 if ( $quote_depth == 0 ) {
32740 $quoted_string .= substr( $tok, $old_pos );
32745 ########################################################################
32746 # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
32747 ########################################################################
32750 while ( $i < $max_token_index ) {
32751 $tok = $rtokens->[ ++$i ];
32753 if ( $tok eq $end_tok ) {
32756 if ( $quote_depth == 0 ) {
32761 elsif ( $tok eq $beginning_tok ) {
32764 elsif ( $tok eq '\\' ) {
32766 # retain backslash unless it hides the beginning or end token
32767 $tok = $rtokens->[ ++$i ];
32768 $quoted_string .= '\\'
32769 unless ( $tok eq $end_tok || $tok eq $beginning_tok );
32771 $quoted_string .= $tok;
32774 if ( $i > $max_token_index ) { $i = $max_token_index }
32775 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
32779 sub indicate_error {
32780 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
32781 interrupt_logfile();
32783 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
32788 sub write_error_indicator_pair {
32789 my ( $line_number, $input_line, $pos, $carrat ) = @_;
32790 my ( $offset, $numbered_line, $underline ) =
32791 make_numbered_line( $line_number, $input_line, $pos );
32792 $underline = write_on_underline( $underline, $pos - $offset, $carrat );
32793 warning( $numbered_line . "\n" );
32794 $underline =~ s/\s*$//;
32795 warning( $underline . "\n" );
32799 sub make_numbered_line {
32801 # Given an input line, its line number, and a character position of
32802 # interest, create a string not longer than 80 characters of the form
32803 # $lineno: sub_string
32804 # such that the sub_string of $str contains the position of interest
32806 # Here is an example of what we want, in this case we add trailing
32807 # '...' because the line is long.
32809 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
32811 # Here is another example, this time in which we used leading '...'
32812 # because of excessive length:
32814 # 2: ... er of the World Wide Web Consortium's
32816 # input parameters are:
32817 # $lineno = line number
32818 # $str = the text of the line
32819 # $pos = position of interest (the error) : 0 = first character
32822 # - $offset = an offset which corrects the position in case we only
32823 # display part of a line, such that $pos-$offset is the effective
32824 # position from the start of the displayed line.
32825 # - $numbered_line = the numbered line as above,
32826 # - $underline = a blank 'underline' which is all spaces with the same
32827 # number of characters as the numbered line.
32829 my ( $lineno, $str, $pos ) = @_;
32830 my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
32831 my $excess = length($str) - $offset - 68;
32832 my $numc = ( $excess > 0 ) ? 68 : undef;
32834 if ( defined($numc) ) {
32835 if ( $offset == 0 ) {
32836 $str = substr( $str, $offset, $numc - 4 ) . " ...";
32839 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
32844 if ( $offset == 0 ) {
32847 $str = "... " . substr( $str, $offset + 4 );
32851 my $numbered_line = sprintf( "%d: ", $lineno );
32852 $offset -= length($numbered_line);
32853 $numbered_line .= $str;
32854 my $underline = " " x length($numbered_line);
32855 return ( $offset, $numbered_line, $underline );
32858 sub write_on_underline {
32860 # The "underline" is a string that shows where an error is; it starts
32861 # out as a string of blanks with the same length as the numbered line of
32862 # code above it, and we have to add marking to show where an error is.
32863 # In the example below, we want to write the string '--^' just below
32864 # the line of bad code:
32866 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
32868 # We are given the current underline string, plus a position and a
32869 # string to write on it.
32871 # In the above example, there will be 2 calls to do this:
32872 # First call: $pos=19, pos_chr=^
32873 # Second call: $pos=16, pos_chr=---
32875 # This is a trivial thing to do with substr, but there is some
32878 my ( $underline, $pos, $pos_chr ) = @_;
32880 # check for error..shouldn't happen
32881 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
32884 my $excess = length($pos_chr) + $pos - length($underline);
32885 if ( $excess > 0 ) {
32886 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
32888 substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
32889 return ($underline);
32894 # Break a string, $str, into a sequence of preliminary tokens. We
32895 # are interested in these types of tokens:
32896 # words (type='w'), example: 'max_tokens_wanted'
32897 # digits (type = 'd'), example: '0755'
32898 # whitespace (type = 'b'), example: ' '
32899 # any other single character (i.e. punct; type = the character itself).
32900 # We cannot do better than this yet because we might be in a quoted
32901 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
32903 my ( $str, $max_tokens_wanted ) = @_;
32905 # we return references to these 3 arrays:
32906 my @tokens = (); # array of the tokens themselves
32907 my @token_map = (0); # string position of start of each token
32908 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
32913 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
32916 # note that this must come before words!
32917 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
32920 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
32922 # single-character punctuation
32923 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
32927 return ( \@tokens, \@token_map, \@type );
32931 push @token_map, pos($str);
32933 } while ( --$max_tokens_wanted != 0 );
32935 return ( \@tokens, \@token_map, \@type );
32940 # this is an old debug routine
32941 # not called, but saved for reference
32942 my ( $rtokens, $rtoken_map ) = @_;
32943 my $num = scalar( @{$rtokens} );
32945 foreach my $i ( 0 .. $num - 1 ) {
32946 my $len = length( $rtokens->[$i] );
32947 print STDOUT "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n";
32953 my %matching_end_token;
32956 %matching_end_token = (
32964 sub matching_end_token {
32966 # return closing character for a pattern
32967 my $beginning_token = shift;
32968 if ( $matching_end_token{$beginning_token} ) {
32969 return $matching_end_token{$beginning_token};
32971 return ($beginning_token);
32975 sub dump_token_types {
32976 my ( $class, $fh ) = @_;
32978 # This should be the latest list of token types in use
32979 # adding NEW_TOKENS: add a comment here
32980 print $fh <<'END_OF_LIST';
32982 Here is a list of the token types currently used for lines of type 'CODE'.
32983 For the following tokens, the "type" of a token is just the token itself.
32985 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
32986 ( ) <= >= == =~ !~ != ++ -- /= x=
32987 ... **= <<= >>= &&= ||= //= <=>
32988 , + - / * | % ! x ~ = \ ? : . < > ^ &
32990 The following additional token types are defined:
32993 b blank (white space)
32994 { indent: opening structural curly brace or square bracket or paren
32995 (code block, anonymous hash reference, or anonymous array reference)
32996 } outdent: right structural curly brace or square bracket or paren
32997 [ left non-structural square bracket (enclosing an array index)
32998 ] right non-structural square bracket
32999 ( left non-structural paren (all but a list right of an =)
33000 ) right non-structural paren
33001 L left non-structural curly brace (enclosing a key)
33002 R right non-structural curly brace
33003 ; terminal semicolon
33004 f indicates a semicolon in a "for" statement
33005 h here_doc operator <<
33007 Q indicates a quote or pattern
33008 q indicates a qw quote block
33010 C user-defined constant or constant function (with void prototype = ())
33011 U user-defined function taking parameters
33012 G user-defined function taking block parameter (like grep/map/eval)
33013 M (unused, but reserved for subroutine definition name)
33014 P (unused, but -html uses it to label pod text)
33015 t type indicater such as %,$,@,*,&,sub
33016 w bare word (perhaps a subroutine call)
33017 i identifier of some type (with leading %, $, @, *, &, sub, -> )
33020 F a file test operator (like -e)
33022 Z identifier in indirect object slot: may be file handle, object
33023 J LABEL: code block label
33024 j LABEL after next, last, redo, goto
33027 pp pre-increment operator ++
33028 mm pre-decrement operator --
33029 A : used as attribute separator
33031 Here are the '_line_type' codes used internally:
33032 SYSTEM - system-specific code before hash-bang line
33033 CODE - line of perl code (including comments)
33034 POD_START - line starting pod, such as '=head'
33035 POD - pod documentation text
33036 POD_END - last line of pod section, '=cut'
33037 HERE - text of here-document
33038 HERE_END - last line of here-doc (target word)
33039 FORMAT - format section
33040 FORMAT_END - last line of format section, '.'
33041 DATA_START - __DATA__ line
33042 DATA - unidentified text following __DATA__
33043 END_START - __END__ line
33044 END - unidentified text following __END__
33045 ERROR - we are in big trouble, probably not a perl script
33053 # These names are used in error messages
33054 @opening_brace_names = qw# '{' '[' '(' '?' #;
33055 @closing_brace_names = qw# '}' ']' ')' ':' #;
33060 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
33061 <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
33063 @is_digraph{@digraphs} = (1) x scalar(@digraphs);
33065 my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
33066 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
33068 my @tetragraphs = qw( <<>> );
33069 @is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs);
33071 # make a hash of all valid token types for self-checking the tokenizer
33072 # (adding NEW_TOKENS : select a new character and add to this list)
33073 my @valid_token_types = qw#
33074 A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
33075 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
33077 push( @valid_token_types, @digraphs );
33078 push( @valid_token_types, @trigraphs );
33079 push( @valid_token_types, @tetragraphs );
33080 push( @valid_token_types, ( '#', ',', 'CORE::' ) );
33081 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
33083 # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
33084 my @file_test_operators =
33085 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
33086 @is_file_test_operator{@file_test_operators} =
33087 (1) x scalar(@file_test_operators);
33089 # these functions have prototypes of the form (&), so when they are
33090 # followed by a block, that block MAY BE followed by an operator.
33091 # Smartmatch operator ~~ may be followed by anonymous hash or array ref
33092 @q = qw( do eval );
33093 @is_block_operator{@q} = (1) x scalar(@q);
33095 # these functions allow an identifier in the indirect object slot
33096 @q = qw( print printf sort exec system say);
33097 @is_indirect_object_taker{@q} = (1) x scalar(@q);
33099 # These tokens may precede a code block
33100 # patched for SWITCH/CASE/CATCH. Actually these could be removed
33101 # now and we could let the extended-syntax coding handle them
33103 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
33104 unless do while until eval for foreach map grep sort
33105 switch case given when catch try finally);
33106 @is_code_block_token{@q} = (1) x scalar(@q);
33108 # I'll build the list of keywords incrementally
33111 # keywords and tokens after which a value or pattern is expected,
33112 # but not an operator. In other words, these should consume terms
33113 # to their right, or at least they are not expected to be followed
33114 # immediately by operators.
33115 my @value_requestor = qw(
33338 # patched above for SWITCH/CASE given/when err say
33339 # 'err' is a fairly safe addition.
33340 # TODO: 'default' still needed if appropriate
33341 # 'use feature' seen, but perltidy works ok without it.
33342 # Concerned that 'default' could break code.
33343 push( @Keywords, @value_requestor );
33345 # These are treated the same but are not keywords:
33350 push( @value_requestor, @extra_vr );
33352 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
33354 # this list contains keywords which do not look for arguments,
33355 # so that they might be followed by an operator, or at least
33357 my @operator_requestor = qw(
33381 push( @Keywords, @operator_requestor );
33383 # These are treated the same but are not considered keywords:
33390 push( @operator_requestor, @extra_or );
33392 @expecting_operator_token{@operator_requestor} =
33393 (1) x scalar(@operator_requestor);
33395 # these token TYPES expect trailing operator but not a term
33396 # note: ++ and -- are post-increment and decrement, 'C' = constant
33397 my @operator_requestor_types = qw( ++ -- C <> q );
33398 @expecting_operator_types{@operator_requestor_types} =
33399 (1) x scalar(@operator_requestor_types);
33401 # these token TYPES consume values (terms)
33402 # note: pp and mm are pre-increment and decrement
33403 # f=semicolon in for, F=file test operator
33404 my @value_requestor_type = qw#
33405 L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
33406 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
33407 <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
33408 f F pp mm Y p m U J G j >> << ^ t
33409 ~. ^. |. &. ^.= |.= &.=
33411 push( @value_requestor_type, ',' )
33412 ; # (perl doesn't like a ',' in a qw block)
33413 @expecting_term_types{@value_requestor_type} =
33414 (1) x scalar(@value_requestor_type);
33416 # Note: the following valid token types are not assigned here to
33417 # hashes requesting to be followed by values or terms, but are
33418 # instead currently hard-coded into sub operator_expected:
33419 # ) -> :: Q R Z ] b h i k n v w } #
33421 # For simple syntax checking, it is nice to have a list of operators which
33422 # will really be unhappy if not followed by a term. This includes most
33424 %really_want_term = %expecting_term_types;
33426 # with these exceptions...
33427 delete $really_want_term{'U'}; # user sub, depends on prototype
33428 delete $really_want_term{'F'}; # file test works on $_ if no following term
33429 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
33432 @q = qw(q qq qw qx qr s y tr m);
33433 @is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
33435 # These keywords are handled specially in the tokenizer code:
33436 my @special_keywords = qw(
33452 push( @Keywords, @special_keywords );
33454 # Keywords after which list formatting may be used
33455 # WARNING: do not include |map|grep|eval or perl may die on
33456 # syntax errors (map1.t).
33457 my @keyword_taking_list = qw(
33531 @is_keyword_taking_list{@keyword_taking_list} =
33532 (1) x scalar(@keyword_taking_list);
33534 # These are not used in any way yet
33535 # my @unused_keywords = qw(
33541 # The list of keywords was originally extracted from function 'keyword' in
33542 # perl file toke.c version 5.005.03, using this utility, plus a
33543 # little editing: (file getkwd.pl):
33544 # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
33545 # Add 'get' prefix where necessary, then split into the above lists.
33546 # This list should be updated as necessary.
33547 # The list should not contain these special variables:
33548 # ARGV DATA ENV SIG STDERR STDIN STDOUT
33551 @is_keyword{@Keywords} = (1) x scalar(@Keywords);