1 ############################################################
3 # perltidy - a perl script indenter and formatter
5 # Copyright (c) 2000-2006 by Steve Hancock
6 # Distributed under the GPL license agreement; see file COPYING
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 # For brief instructions instructions, try 'perltidy -h'.
23 # For more complete documentation, try 'man perltidy'
24 # or visit http://perltidy.sourceforge.net
26 # This script is an example of the default style. It was formatted with:
31 # Michael Cartmell supplied code for adaptation to VMS and helped with
33 # Hugh S. Myers supplied sub streamhandle and the supporting code to
34 # create a Perl::Tidy module which can operate on strings, arrays, etc.
35 # Yves Orton supplied coding to help detect Windows versions.
36 # Axel Rose supplied a patch for MacPerl.
37 # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
38 # Many others have supplied key ideas, suggestions, and bug reports;
39 # see the CHANGES file.
41 ############################################################
44 use 5.004; # need IO::File from 5.004 or later
45 BEGIN { $^W = 1; } # turn on warnings
59 @ISA = qw( Exporter );
60 @EXPORT = qw( &perltidy );
66 ( $VERSION = q($Id: Tidy.pm,v 1.56 2006/07/19 23:13:33 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
71 # given filename and mode (r or w), create an object which:
72 # has a 'getline' method if mode='r', and
73 # has a 'print' method if mode='w'.
74 # The objects also need a 'close' method.
76 # How the object is made:
78 # if $filename is: Make object using:
79 # ---------------- -----------------
80 # '-' (STDIN if mode = 'r', STDOUT if mode='w')
82 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
83 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar)
85 # (check for 'print' method for 'w' mode)
86 # (check for 'getline' method for 'r' mode)
87 my $ref = ref( my $filename = shift );
94 if ( $ref eq 'ARRAY' ) {
95 $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
97 elsif ( $ref eq 'SCALAR' ) {
98 $New = sub { Perl::Tidy::IOScalar->new(@_) };
102 # Accept an object with a getline method for reading. Note:
103 # IO::File is built-in and does not respond to the defined
104 # operator. If this causes trouble, the check can be
105 # skipped and we can just let it crash if there is no
107 if ( $mode =~ /[rR]/ ) {
108 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
109 $New = sub { $filename };
112 $New = sub { undef };
114 ------------------------------------------------------------------------
115 No 'getline' method is defined for object of class $ref
116 Please check your call to Perl::Tidy::perltidy. Trace follows.
117 ------------------------------------------------------------------------
122 # Accept an object with a print method for writing.
123 # See note above about IO::File
124 if ( $mode =~ /[wW]/ ) {
125 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
126 $New = sub { $filename };
129 $New = sub { undef };
131 ------------------------------------------------------------------------
132 No 'print' method is defined for object of class $ref
133 Please check your call to Perl::Tidy::perltidy. Trace follows.
134 ------------------------------------------------------------------------
143 if ( $filename eq '-' ) {
144 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
147 $New = sub { IO::File->new(@_) };
150 $fh = $New->( $filename, $mode )
151 or warn "Couldn't open file:$filename in mode:$mode : $!\n";
152 return $fh, ( $ref or $filename );
155 sub find_input_line_ending {
157 # Peek at a file and return first line ending character.
158 # Quietly return undef in case of any trouble.
159 my ($input_file) = @_;
162 # silently ignore input from object or stdin
163 if ( ref($input_file) || $input_file eq '-' ) {
166 open( INFILE, $input_file ) || return $ending;
170 read( INFILE, $buf, 1024 );
172 if ( $buf && $buf =~ /([\012\015]+)/ ) {
176 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
179 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
182 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
196 # concatenate a path and file basename
197 # returns undef in case of error
199 BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
201 # use File::Spec if we can
202 unless ($missing_file_spec) {
203 return File::Spec->catfile(@_);
206 # Perl 5.004 systems may not have File::Spec so we'll make
207 # a simple try. We assume File::Basename is available.
208 # return undef if not successful.
210 my $path = join '/', @_;
211 my $test_file = $path . $name;
212 my ( $test_name, $test_path ) = fileparse($test_file);
213 return $test_file if ( $test_name eq $name );
214 return undef if ( $^O eq 'VMS' );
216 # this should work at least for Windows and Unix:
217 $test_file = $path . '/' . $name;
218 ( $test_name, $test_path ) = fileparse($test_file);
219 return $test_file if ( $test_name eq $name );
223 sub make_temporary_filename {
225 # Make a temporary filename.
227 # The POSIX tmpnam() function tends to be unreliable for non-unix
228 # systems (at least for the win32 systems that I've tested), so use
229 # a pre-defined name. A slight disadvantage of this is that two
230 # perltidy runs in the same working directory may conflict.
231 # However, the chance of that is small and managable by the user.
232 # An alternative would be to check for the file's existance and use,
233 # say .TMP0, .TMP1, etc, but that scheme has its own problems. So,
235 my $name = "perltidy.TMP";
236 if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
239 eval "use POSIX qw(tmpnam)";
240 if ($@) { return $name }
243 # just make a couple of tries before giving up and using the default
245 my $tmpname = tmpnam();
246 my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
256 # Here is a map of the flow of data from the input source to the output
259 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
260 # input groups output
261 # lines tokens lines of lines lines
264 # The names correspond to the package names responsible for the unit processes.
266 # The overall process is controlled by the "main" package.
268 # LineSource is the stream of input lines
270 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
271 # if necessary. A token is any section of the input line which should be
272 # manipulated as a single entity during formatting. For example, a single
273 # ',' character is a token, and so is an entire side comment. It handles
274 # the complexities of Perl syntax, such as distinguishing between '<<' as
275 # a shift operator and as a here-document, or distinguishing between '/'
276 # as a divide symbol and as a pattern delimiter.
278 # Formatter inserts and deletes whitespace between tokens, and breaks
279 # sequences of tokens at appropriate points as output lines. It bases its
280 # decisions on the default rules as modified by any command-line options.
282 # VerticalAligner collects groups of lines together and tries to line up
283 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
285 # FileWriter simply writes lines to the output stream.
287 # The Logger package, not shown, records significant events and warning
288 # messages. It writes a .LOG file, which may be saved with a
289 # '-log' or a '-g' flag.
293 # variables needed by interrupt handler:
297 # this routine may be called to give a status report if interrupted. If a
298 # parameter is given, it will call exit with that parameter. This is no
299 # longer used because it works under Unix but not under Windows.
300 sub interrupt_handler {
302 my $exit_flag = shift;
303 print STDERR "perltidy interrupted";
305 my $input_line_number =
306 Perl::Tidy::Tokenizer::get_input_line_number();
307 print STDERR " at line $input_line_number";
311 if ( ref $input_file ) { print STDERR " of reference to:" }
312 else { print STDERR " of file:" }
313 print STDERR " $input_file";
316 exit $exit_flag if defined($exit_flag);
323 destination => undef,
330 dump_options => undef,
331 dump_options_type => undef,
332 dump_getopt_flags => undef,
333 dump_options_category => undef,
334 dump_options_range => undef,
335 dump_abbreviations => undef,
338 # don't overwrite callers ARGV
343 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
345 my @good_keys = sort keys %defaults;
346 @bad_keys = sort @bad_keys;
348 ------------------------------------------------------------------------
349 Unknown perltidy parameter : (@bad_keys)
350 perltidy only understands : (@good_keys)
351 ------------------------------------------------------------------------
356 my $get_hash_ref = sub {
358 my $hash_ref = $input_hash{$key};
359 if ( defined($hash_ref) ) {
360 unless ( ref($hash_ref) eq 'HASH' ) {
361 my $what = ref($hash_ref);
363 $what ? "but is ref to $what" : "but is not a reference";
365 ------------------------------------------------------------------------
366 error in call to perltidy:
367 -$key must be reference to HASH $but_is
368 ------------------------------------------------------------------------
375 %input_hash = ( %defaults, %input_hash );
376 my $argv = $input_hash{'argv'};
377 my $destination_stream = $input_hash{'destination'};
378 my $errorfile_stream = $input_hash{'errorfile'};
379 my $logfile_stream = $input_hash{'logfile'};
380 my $perltidyrc_stream = $input_hash{'perltidyrc'};
381 my $source_stream = $input_hash{'source'};
382 my $stderr_stream = $input_hash{'stderr'};
383 my $user_formatter = $input_hash{'formatter'};
385 # various dump parameters
386 my $dump_options_type = $input_hash{'dump_options_type'};
387 my $dump_options = $get_hash_ref->('dump_options');
388 my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
389 my $dump_options_category = $get_hash_ref->('dump_options_category');
390 my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
391 my $dump_options_range = $get_hash_ref->('dump_options_range');
393 # validate dump_options_type
394 if ( defined($dump_options) ) {
395 unless ( defined($dump_options_type) ) {
396 $dump_options_type = 'perltidyrc';
398 unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
400 ------------------------------------------------------------------------
401 Please check value of -dump_options_type in call to perltidy;
402 saw: '$dump_options_type'
403 expecting: 'perltidyrc' or 'full'
404 ------------------------------------------------------------------------
410 $dump_options_type = "";
413 if ($user_formatter) {
415 # if the user defines a formatter, there is no output stream,
416 # but we need a null stream to keep coding simple
417 $destination_stream = Perl::Tidy::DevNull->new();
420 # see if ARGV is overridden
421 if ( defined($argv) ) {
423 my $rargv = ref $argv;
424 if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
428 if ( $rargv eq 'ARRAY' ) {
433 ------------------------------------------------------------------------
434 Please check value of -argv in call to perltidy;
435 it must be a string or ref to ARRAY but is: $rargv
436 ------------------------------------------------------------------------
443 my ( $rargv, $msg ) = parse_args($argv);
446 Error parsing this string passed to to perltidy with 'argv':
454 # redirect STDERR if requested
455 if ($stderr_stream) {
456 my ( $fh_stderr, $stderr_file ) =
457 Perl::Tidy::streamhandle( $stderr_stream, 'w' );
458 if ($fh_stderr) { *STDERR = $fh_stderr }
461 ------------------------------------------------------------------------
462 Unable to redirect STDERR to $stderr_stream
463 Please check value of -stderr in call to perltidy
464 ------------------------------------------------------------------------
469 my $rpending_complaint;
470 $$rpending_complaint = "";
471 my $rpending_logfile_message;
472 $$rpending_logfile_message = "";
474 my ( $is_Windows, $Windows_type ) =
475 look_for_Windows($rpending_complaint);
477 # VMS file names are restricted to a 40.40 format, so we append _tdy
478 # instead of .tdy, etc. (but see also sub check_vms_filename)
481 if ( $^O eq 'VMS' ) {
487 $dot_pattern = '\.'; # must escape for use in regex
490 # handle command line options
491 my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
492 $rexpansion, $roption_category, $roption_range )
493 = process_command_line(
494 $perltidyrc_stream, $is_Windows, $Windows_type,
495 $rpending_complaint, $dump_options_type,
498 # return or exit immediately after all dumps
501 # Getopt parameters and their flags
502 if ( defined($dump_getopt_flags) ) {
504 foreach my $op ( @{$roption_string} ) {
507 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
511 $dump_getopt_flags->{$opt} = $flag;
515 if ( defined($dump_options_category) ) {
517 %{$dump_options_category} = %{$roption_category};
520 if ( defined($dump_options_range) ) {
522 %{$dump_options_range} = %{$roption_range};
525 if ( defined($dump_abbreviations) ) {
527 %{$dump_abbreviations} = %{$rexpansion};
530 if ( defined($dump_options) ) {
532 %{$dump_options} = %{$rOpts};
535 return if ($quit_now);
537 # dump from command line
538 if ( $rOpts->{'dump-options'} ) {
539 dump_options( $rOpts, $roption_string );
543 check_options( $rOpts, $is_Windows, $Windows_type,
544 $rpending_complaint );
546 if ($user_formatter) {
547 $rOpts->{'format'} = 'user';
550 # there must be one entry here for every possible format
551 my %default_file_extension = (
557 # be sure we have a valid output format
558 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
559 my $formats = join ' ',
560 sort map { "'" . $_ . "'" } keys %default_file_extension;
561 my $fmt = $rOpts->{'format'};
562 die "-format='$fmt' but must be one of: $formats\n";
565 my $output_extension =
566 make_extension( $rOpts->{'output-file-extension'},
567 $default_file_extension{ $rOpts->{'format'} }, $dot );
569 my $backup_extension =
570 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
572 my $html_toc_extension =
573 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
575 my $html_src_extension =
576 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
578 # check for -b option;
579 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
580 && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
581 && @ARGV > 0; # silently ignore if standard input;
582 # this allows -b to be in a .perltidyrc file
583 # without error messages when running from an editor
585 # turn off -b with warnings in case of conflicts with other options
586 if ($in_place_modify) {
587 if ( $rOpts->{'standard-output'} ) {
588 warn "Ignoring -b; you may not use -b and -st together\n";
589 $in_place_modify = 0;
591 if ($destination_stream) {
593 "Ignoring -b; you may not specify a destination array and -b together\n";
594 $in_place_modify = 0;
596 if ($source_stream) {
598 "Ignoring -b; you may not specify a source array and -b together\n";
599 $in_place_modify = 0;
601 if ( $rOpts->{'outfile'} ) {
602 warn "Ignoring -b; you may not use -b and -o together\n";
603 $in_place_modify = 0;
605 if ( defined( $rOpts->{'output-path'} ) ) {
606 warn "Ignoring -b; you may not use -b and -opath together\n";
607 $in_place_modify = 0;
611 Perl::Tidy::Formatter::check_options($rOpts);
612 if ( $rOpts->{'format'} eq 'html' ) {
613 Perl::Tidy::HtmlWriter->check_options($rOpts);
616 # make the pattern of file extensions that we shouldn't touch
617 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
618 if ($output_extension) {
619 $_ = quotemeta($output_extension);
620 $forbidden_file_extensions .= "|$_";
622 if ( $in_place_modify && $backup_extension ) {
623 $_ = quotemeta($backup_extension);
624 $forbidden_file_extensions .= "|$_";
626 $forbidden_file_extensions .= ')$';
628 # Create a diagnostics object if requested;
629 # This is only useful for code development
630 my $diagnostics_object = undef;
631 if ( $rOpts->{'DIAGNOSTICS'} ) {
632 $diagnostics_object = Perl::Tidy::Diagnostics->new();
635 # no filenames should be given if input is from an array
636 if ($source_stream) {
639 "You may not specify any filenames when a source array is given\n";
642 # we'll stuff the source array into ARGV
643 unshift( @ARGV, $source_stream );
645 # No special treatment for source stream which is a filename.
646 # This will enable checks for binary files and other bad stuff.
647 $source_stream = undef unless ref($source_stream);
650 # use stdin by default if no source array and no args
652 unshift( @ARGV, '-' ) unless @ARGV;
655 # loop to process all files in argument list
656 my $number_of_files = @ARGV;
657 my $formatter = undef;
659 while ( $input_file = shift @ARGV ) {
661 my $input_file_permissions;
663 #---------------------------------------------------------------
664 # determine the input file name
665 #---------------------------------------------------------------
666 if ($source_stream) {
667 $fileroot = "perltidy";
669 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
670 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
671 $in_place_modify = 0;
674 $fileroot = $input_file;
675 unless ( -e $input_file ) {
677 # file doesn't exist - check for a file glob
678 if ( $input_file =~ /([\?\*\[\{])/ ) {
680 # Windows shell may not remove quotes, so do it
681 my $input_file = $input_file;
682 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
683 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
684 my $pattern = fileglob_to_re($input_file);
686 if ( !$@ && opendir( DIR, './' ) ) {
688 grep { /$pattern/ && !-d $_ } readdir(DIR);
691 unshift @ARGV, @files;
696 print "skipping file: '$input_file': no matches found\n";
700 unless ( -f $input_file ) {
701 print "skipping file: $input_file: not a regular file\n";
705 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
707 "skipping file: $input_file: Non-text (override with -f)\n";
711 # we should have a valid filename now
712 $fileroot = $input_file;
713 $input_file_permissions = ( stat $input_file )[2] & 07777;
715 if ( $^O eq 'VMS' ) {
716 ( $fileroot, $dot ) = check_vms_filename($fileroot);
719 # add option to change path here
720 if ( defined( $rOpts->{'output-path'} ) ) {
722 my ( $base, $old_path ) = fileparse($fileroot);
723 my $new_path = $rOpts->{'output-path'};
724 unless ( -d $new_path ) {
725 unless ( mkdir $new_path, 0777 ) {
726 die "unable to create directory $new_path: $!\n";
729 my $path = $new_path;
730 $fileroot = catfile( $path, $base );
733 ------------------------------------------------------------------------
734 Problem combining $new_path and $base to make a filename; check -opath
735 ------------------------------------------------------------------------
741 # Skip files with same extension as the output files because
742 # this can lead to a messy situation with files like
743 # script.tdy.tdy.tdy ... or worse problems ... when you
744 # rerun perltidy over and over with wildcard input.
747 && ( $input_file =~ /$forbidden_file_extensions/o
748 || $input_file eq 'DIAGNOSTICS' )
751 print "skipping file: $input_file: wrong extension\n";
755 # the 'source_object' supplies a method to read the input file
757 Perl::Tidy::LineSource->new( $input_file, $rOpts,
758 $rpending_logfile_message );
759 next unless ($source_object);
761 # register this file name with the Diagnostics package
762 $diagnostics_object->set_input_file($input_file)
763 if $diagnostics_object;
765 #---------------------------------------------------------------
766 # determine the output file name
767 #---------------------------------------------------------------
768 my $output_file = undef;
769 my $actual_output_extension;
771 if ( $rOpts->{'outfile'} ) {
773 if ( $number_of_files <= 1 ) {
775 if ( $rOpts->{'standard-output'} ) {
776 die "You may not use -o and -st together\n";
778 elsif ($destination_stream) {
780 "You may not specify a destination array and -o together\n";
782 elsif ( defined( $rOpts->{'output-path'} ) ) {
783 die "You may not specify -o and -opath together\n";
785 elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
786 die "You may not specify -o and -oext together\n";
788 $output_file = $rOpts->{outfile};
790 # make sure user gives a file name after -o
791 if ( $output_file =~ /^-/ ) {
792 die "You must specify a valid filename after -o\n";
795 # do not overwrite input file with -o
796 if ( defined($input_file_permissions)
797 && ( $output_file eq $input_file ) )
800 "Use 'perltidy -b $input_file' to modify in-place\n";
804 die "You may not use -o with more than one input file\n";
807 elsif ( $rOpts->{'standard-output'} ) {
808 if ($destination_stream) {
810 "You may not specify a destination array and -st together\n";
814 if ( $number_of_files <= 1 ) {
817 die "You may not use -st with more than one input file\n";
820 elsif ($destination_stream) {
821 $output_file = $destination_stream;
823 elsif ($source_stream) { # source but no destination goes to stdout
826 elsif ( $input_file eq '-' ) {
830 if ($in_place_modify) {
831 $output_file = IO::File->new_tmpfile()
832 or die "cannot open temp file for -b option: $!\n";
835 $actual_output_extension = $output_extension;
836 $output_file = $fileroot . $output_extension;
840 # the 'sink_object' knows how to write the output file
841 my $tee_file = $fileroot . $dot . "TEE";
843 my $line_separator = $rOpts->{'output-line-ending'};
844 if ( $rOpts->{'preserve-line-endings'} ) {
845 $line_separator = find_input_line_ending($input_file);
847 $line_separator = "\n" unless defined($line_separator);
850 Perl::Tidy::LineSink->new( $output_file, $tee_file,
851 $line_separator, $rOpts, $rpending_logfile_message );
853 #---------------------------------------------------------------
854 # initialize the error logger
855 #---------------------------------------------------------------
856 my $warning_file = $fileroot . $dot . "ERR";
857 if ($errorfile_stream) { $warning_file = $errorfile_stream }
858 my $log_file = $fileroot . $dot . "LOG";
859 if ($logfile_stream) { $log_file = $logfile_stream }
862 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
864 write_logfile_header(
865 $rOpts, $logger_object, $config_file,
866 $rraw_options, $Windows_type
868 if ($$rpending_logfile_message) {
869 $logger_object->write_logfile_entry($$rpending_logfile_message);
871 if ($$rpending_complaint) {
872 $logger_object->complain($$rpending_complaint);
875 #---------------------------------------------------------------
876 # initialize the debug object, if any
877 #---------------------------------------------------------------
878 my $debugger_object = undef;
879 if ( $rOpts->{DEBUG} ) {
881 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
884 #---------------------------------------------------------------
885 # create a formatter for this file : html writer or pretty printer
886 #---------------------------------------------------------------
888 # we have to delete any old formatter because, for safety,
889 # the formatter will check to see that there is only one.
892 if ($user_formatter) {
893 $formatter = $user_formatter;
895 elsif ( $rOpts->{'format'} eq 'html' ) {
897 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
898 $actual_output_extension, $html_toc_extension,
899 $html_src_extension );
901 elsif ( $rOpts->{'format'} eq 'tidy' ) {
902 $formatter = Perl::Tidy::Formatter->new(
903 logger_object => $logger_object,
904 diagnostics_object => $diagnostics_object,
905 sink_object => $sink_object,
909 die "I don't know how to do -format=$rOpts->{'format'}\n";
912 unless ($formatter) {
913 die "Unable to continue with $rOpts->{'format'} formatting\n";
916 #---------------------------------------------------------------
917 # create the tokenizer for this file
918 #---------------------------------------------------------------
919 $tokenizer = undef; # must destroy old tokenizer
920 $tokenizer = Perl::Tidy::Tokenizer->new(
921 source_object => $source_object,
922 logger_object => $logger_object,
923 debugger_object => $debugger_object,
924 diagnostics_object => $diagnostics_object,
925 starting_level => $rOpts->{'starting-indentation-level'},
926 tabs => $rOpts->{'tabs'},
927 indent_columns => $rOpts->{'indent-columns'},
928 look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
929 look_for_autoloader => $rOpts->{'look-for-autoloader'},
930 look_for_selfloader => $rOpts->{'look-for-selfloader'},
931 trim_qw => $rOpts->{'trim-qw'},
934 #---------------------------------------------------------------
936 #---------------------------------------------------------------
937 process_this_file( $tokenizer, $formatter );
939 #---------------------------------------------------------------
940 # close the input source and report errors
941 #---------------------------------------------------------------
942 $source_object->close_input_file();
944 # get file names to use for syntax check
945 my $ifname = $source_object->get_input_file_copy_name();
946 my $ofname = $sink_object->get_output_file_copy();
948 #---------------------------------------------------------------
949 # handle the -b option (backup and modify in-place)
950 #---------------------------------------------------------------
951 if ($in_place_modify) {
952 unless ( -f $input_file ) {
954 # oh, oh, no real file to backup ..
955 # shouldn't happen because of numerous preliminary checks
957 "problem with -b backing up input file '$input_file': not a file\n";
959 my $backup_name = $input_file . $backup_extension;
960 if ( -f $backup_name ) {
963 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
965 rename( $input_file, $backup_name )
967 "problem renaming $input_file to $backup_name for -b option: $!\n";
968 $ifname = $backup_name;
970 seek( $output_file, 0, 0 )
971 or die "unable to rewind tmp file for -b option: $!\n";
973 my $fout = IO::File->new("> $input_file")
975 "problem opening $input_file for write for -b option; check directory permissions: $!\n";
977 while ( $line = $output_file->getline() ) {
981 $output_file = $input_file;
982 $ofname = $input_file;
985 #---------------------------------------------------------------
986 # clean up and report errors
987 #---------------------------------------------------------------
988 $sink_object->close_output_file() if $sink_object;
989 $debugger_object->close_debug_file() if $debugger_object;
991 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
994 if ($input_file_permissions) {
996 # give output script same permissions as input script, but
997 # make it user-writable or else we can't run perltidy again.
998 # Thus we retain whatever executable flags were set.
999 if ( $rOpts->{'format'} eq 'tidy' ) {
1000 chmod( $input_file_permissions | 0600, $output_file );
1003 # else use default permissions for html and any other format
1006 if ( $logger_object && $rOpts->{'check-syntax'} ) {
1008 check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1012 $logger_object->finish( $infile_syntax_ok, $formatter )
1014 } # end of loop to process all files
1015 } # end of main program
1018 sub fileglob_to_re {
1020 # modified (corrected) from version in find2perl
1022 $x =~ s#([./^\$()])#\\$1#g; # escape special characters
1023 $x =~ s#\*#.*#g; # '*' -> '.*'
1024 $x =~ s#\?#.#g; # '?' -> '.'
1025 "^$x\\z"; # match whole word
1028 sub make_extension {
1030 # Make a file extension, including any leading '.' if necessary
1031 # The '.' may actually be an '_' under VMS
1032 my ( $extension, $default, $dot ) = @_;
1034 # Use the default if none specified
1035 $extension = $default unless ($extension);
1037 # Only extensions with these leading characters get a '.'
1038 # This rule gives the user some freedom
1039 if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1040 $extension = $dot . $extension;
1045 sub write_logfile_header {
1046 my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) =
1048 $logger_object->write_logfile_entry(
1049 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1051 if ($Windows_type) {
1052 $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1054 my $options_string = join( ' ', @$rraw_options );
1057 $logger_object->write_logfile_entry(
1058 "Found Configuration File >>> $config_file \n");
1060 $logger_object->write_logfile_entry(
1061 "Configuration and command line parameters for this run:\n");
1062 $logger_object->write_logfile_entry("$options_string\n");
1064 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1065 $rOpts->{'logfile'} = 1; # force logfile to be saved
1066 $logger_object->write_logfile_entry(
1067 "Final parameter set for this run\n");
1068 $logger_object->write_logfile_entry(
1069 "------------------------------------\n");
1071 foreach ( keys %{$rOpts} ) {
1072 $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" );
1074 $logger_object->write_logfile_entry(
1075 "------------------------------------\n");
1077 $logger_object->write_logfile_entry(
1078 "To find error messages search for 'WARNING' with your editor\n");
1081 sub generate_options {
1083 ######################################################################
1084 # Generate and return references to:
1085 # @option_string - the list of options to be passed to Getopt::Long
1086 # @defaults - the list of default options
1087 # %expansion - a hash showing how all abbreviations are expanded
1088 # %category - a hash giving the general category of each option
1089 # %option_range - a hash giving the valid ranges of certain options
1091 # Note: a few options are not documented in the man page and usage
1092 # message. This is because these are experimental or debug options and
1093 # may or may not be retained in future versions.
1095 # Here are the undocumented flags as far as I know. Any of them
1096 # may disappear at any time. They are mainly for fine-tuning
1099 # fll --> fuzzy-line-length # a trivial parameter which gets
1100 # turned off for the extrude option
1101 # which is mainly for debugging
1102 # chk --> check-multiline-quotes # check for old bug; to be deleted
1103 # scl --> short-concatenation-item-length # helps break at '.'
1104 # recombine # for debugging line breaks
1105 # valign # for debugging vertical alignment
1106 # I --> DIAGNOSTICS # for debugging
1107 ######################################################################
1109 # here is a summary of the Getopt codes:
1110 # <none> does not take an argument
1111 # =s takes a mandatory string
1112 # :s takes an optional string (DO NOT USE - filenames will get eaten up)
1113 # =i takes a mandatory integer
1114 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1115 # ! does not take an argument and may be negated
1116 # i.e., -foo and -nofoo are allowed
1117 # a double dash signals the end of the options list
1119 #---------------------------------------------------------------
1120 # Define the option string passed to GetOptions.
1121 #---------------------------------------------------------------
1123 my @option_string = ();
1125 my %option_category = ();
1126 my %option_range = ();
1127 my $rexpansion = \%expansion;
1129 # names of categories in manual
1130 # leading integers will allow sorting
1131 my @category_name = (
1133 '1. Basic formatting options',
1134 '2. Code indentation control',
1135 '3. Whitespace control',
1136 '4. Comment controls',
1137 '5. Linebreak controls',
1138 '6. Controlling list formatting',
1139 '7. Retaining or ignoring existing line breaks',
1140 '8. Blank line control',
1141 '9. Other controls',
1143 '11. pod2html options',
1144 '12. Controlling HTML properties',
1148 # These options are parsed directly by perltidy:
1151 # However, they are included in the option set so that they will
1152 # be seen in the options dump.
1154 # These long option names have no abbreviations or are treated specially
1155 @option_string = qw(
1164 my $category = 13; # Debugging
1165 foreach (@option_string) {
1166 my $opt = $_; # must avoid changing the actual flag
1168 $option_category{$opt} = $category_name[$category];
1171 $category = 11; # HTML
1172 $option_category{html} = $category_name[$category];
1174 # routine to install and check options
1175 my $add_option = sub {
1176 my ( $long_name, $short_name, $flag ) = @_;
1177 push @option_string, $long_name . $flag;
1178 $option_category{$long_name} = $category_name[$category];
1180 if ( $expansion{$short_name} ) {
1181 my $existing_name = $expansion{$short_name}[0];
1183 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1185 $expansion{$short_name} = [$long_name];
1186 if ( $flag eq '!' ) {
1187 my $nshort_name = 'n' . $short_name;
1188 my $nolong_name = 'no' . $long_name;
1189 if ( $expansion{$nshort_name} ) {
1190 my $existing_name = $expansion{$nshort_name}[0];
1192 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1194 $expansion{$nshort_name} = [$nolong_name];
1199 # Install long option names which have a simple abbreviation.
1200 # Options with code '!' get standard negation ('no' for long names,
1201 # 'n' for abbreviations). Categories follow the manual.
1203 ###########################
1204 $category = 0; # I/O_Control
1205 ###########################
1206 $add_option->( 'backup-and-modify-in-place', 'b', '!' );
1207 $add_option->( 'backup-file-extension', 'bext', '=s' );
1208 $add_option->( 'force-read-binary', 'f', '!' );
1209 $add_option->( 'format', 'fmt', '=s' );
1210 $add_option->( 'logfile', 'log', '!' );
1211 $add_option->( 'logfile-gap', 'g', ':i' );
1212 $add_option->( 'outfile', 'o', '=s' );
1213 $add_option->( 'output-file-extension', 'oext', '=s' );
1214 $add_option->( 'output-path', 'opath', '=s' );
1215 $add_option->( 'profile', 'pro', '=s' );
1216 $add_option->( 'quiet', 'q', '!' );
1217 $add_option->( 'standard-error-output', 'se', '!' );
1218 $add_option->( 'standard-output', 'st', '!' );
1219 $add_option->( 'warning-output', 'w', '!' );
1221 ########################################
1222 $category = 1; # Basic formatting options
1223 ########################################
1224 $add_option->( 'check-syntax', 'syn', '!' );
1225 $add_option->( 'entab-leading-whitespace', 'et', '=i' );
1226 $add_option->( 'indent-columns', 'i', '=i' );
1227 $add_option->( 'maximum-line-length', 'l', '=i' );
1228 $add_option->( 'output-line-ending', 'ole', '=s' );
1229 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
1230 $add_option->( 'preserve-line-endings', 'ple', '!' );
1231 $add_option->( 'tabs', 't', '!' );
1233 ########################################
1234 $category = 2; # Code indentation control
1235 ########################################
1236 $add_option->( 'continuation-indentation', 'ci', '=i' );
1237 $add_option->( 'starting-indentation-level', 'sil', '=i' );
1238 $add_option->( 'line-up-parentheses', 'lp', '!' );
1239 $add_option->( 'outdent-keyword-list', 'okwl', '=s' );
1240 $add_option->( 'outdent-keywords', 'okw', '!' );
1241 $add_option->( 'outdent-labels', 'ola', '!' );
1242 $add_option->( 'outdent-long-quotes', 'olq', '!' );
1243 $add_option->( 'indent-closing-brace', 'icb', '!' );
1244 $add_option->( 'closing-token-indentation', 'cti', '=i' );
1245 $add_option->( 'closing-paren-indentation', 'cpi', '=i' );
1246 $add_option->( 'closing-brace-indentation', 'cbi', '=i' );
1247 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1248 $add_option->( 'brace-left-and-indent', 'bli', '!' );
1249 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
1251 ########################################
1252 $category = 3; # Whitespace control
1253 ########################################
1254 $add_option->( 'add-semicolons', 'asc', '!' );
1255 $add_option->( 'add-whitespace', 'aws', '!' );
1256 $add_option->( 'block-brace-tightness', 'bbt', '=i' );
1257 $add_option->( 'brace-tightness', 'bt', '=i' );
1258 $add_option->( 'delete-old-whitespace', 'dws', '!' );
1259 $add_option->( 'delete-semicolons', 'dsm', '!' );
1260 $add_option->( 'nospace-after-keyword', 'nsak', '=s' );
1261 $add_option->( 'nowant-left-space', 'nwls', '=s' );
1262 $add_option->( 'nowant-right-space', 'nwrs', '=s' );
1263 $add_option->( 'paren-tightness', 'pt', '=i' );
1264 $add_option->( 'space-after-keyword', 'sak', '=s' );
1265 $add_option->( 'space-for-semicolon', 'sfs', '!' );
1266 $add_option->( 'space-function-paren', 'sfp', '!' );
1267 $add_option->( 'space-keyword-paren', 'skp', '!' );
1268 $add_option->( 'space-terminal-semicolon', 'sts', '!' );
1269 $add_option->( 'square-bracket-tightness', 'sbt', '=i' );
1270 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
1271 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1272 $add_option->( 'trim-qw', 'tqw', '!' );
1273 $add_option->( 'want-left-space', 'wls', '=s' );
1274 $add_option->( 'want-right-space', 'wrs', '=s' );
1276 ########################################
1277 $category = 4; # Comment controls
1278 ########################################
1279 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
1280 $add_option->( 'closing-side-comment-interval', 'csci', '=i' );
1281 $add_option->( 'closing-side-comment-list', 'cscl', '=s' );
1282 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1283 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
1284 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
1285 $add_option->( 'closing-side-comments', 'csc', '!' );
1286 $add_option->( 'format-skipping', 'fs', '!' );
1287 $add_option->( 'format-skipping-begin', 'fsb', '=s' );
1288 $add_option->( 'format-skipping-end', 'fse', '=s' );
1289 $add_option->( 'hanging-side-comments', 'hsc', '!' );
1290 $add_option->( 'indent-block-comments', 'ibc', '!' );
1291 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
1292 $add_option->( 'minimum-space-to-comment', 'msc', '=i' );
1293 $add_option->( 'outdent-long-comments', 'olc', '!' );
1294 $add_option->( 'outdent-static-block-comments', 'osbc', '!' );
1295 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
1296 $add_option->( 'static-block-comments', 'sbc', '!' );
1297 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
1298 $add_option->( 'static-side-comments', 'ssc', '!' );
1300 ########################################
1301 $category = 5; # Linebreak controls
1302 ########################################
1303 $add_option->( 'add-newlines', 'anl', '!' );
1304 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
1305 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1306 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
1307 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
1308 $add_option->( 'cuddled-else', 'ce', '!' );
1309 $add_option->( 'delete-old-newlines', 'dnl', '!' );
1310 $add_option->( 'opening-brace-always-on-right', 'bar', '' );
1311 $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
1312 $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
1313 $add_option->( 'opening-paren-right', 'opr', '!' );
1314 $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
1315 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
1316 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
1317 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
1318 $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
1319 $add_option->( 'stack-closing-paren', 'scp', '!' );
1320 $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
1321 $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
1322 $add_option->( 'stack-opening-paren', 'sop', '!' );
1323 $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
1324 $add_option->( 'vertical-tightness', 'vt', '=i' );
1325 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
1326 $add_option->( 'want-break-after', 'wba', '=s' );
1327 $add_option->( 'want-break-before', 'wbb', '=s' );
1329 ########################################
1330 $category = 6; # Controlling list formatting
1331 ########################################
1332 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1333 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
1334 $add_option->( 'maximum-fields-per-table', 'mft', '=i' );
1336 ########################################
1337 $category = 7; # Retaining or ignoring existing line breaks
1338 ########################################
1339 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1340 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1341 $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1342 $add_option->( 'ignore-old-breakpoints', 'iob', '!' );
1344 ########################################
1345 $category = 8; # Blank line control
1346 ########################################
1347 $add_option->( 'blanks-before-blocks', 'bbb', '!' );
1348 $add_option->( 'blanks-before-comments', 'bbc', '!' );
1349 $add_option->( 'blanks-before-subs', 'bbs', '!' );
1350 $add_option->( 'long-block-line-count', 'lbl', '=i' );
1351 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1352 $add_option->( 'swallow-optional-blank-lines', 'sob', '!' );
1354 ########################################
1355 $category = 9; # Other controls
1356 ########################################
1357 $add_option->( 'delete-block-comments', 'dbc', '!' );
1358 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1359 $add_option->( 'delete-pod', 'dp', '!' );
1360 $add_option->( 'delete-side-comments', 'dsc', '!' );
1361 $add_option->( 'tee-block-comments', 'tbc', '!' );
1362 $add_option->( 'tee-pod', 'tp', '!' );
1363 $add_option->( 'tee-side-comments', 'tsc', '!' );
1364 $add_option->( 'look-for-autoloader', 'lal', '!' );
1365 $add_option->( 'look-for-hash-bang', 'x', '!' );
1366 $add_option->( 'look-for-selfloader', 'lsl', '!' );
1367 $add_option->( 'pass-version-line', 'pvl', '!' );
1369 ########################################
1370 $category = 13; # Debugging
1371 ########################################
1372 $add_option->( 'DEBUG', 'D', '!' );
1373 $add_option->( 'DIAGNOSTICS', 'I', '!' );
1374 $add_option->( 'check-multiline-quotes', 'chk', '!' );
1375 $add_option->( 'dump-defaults', 'ddf', '!' );
1376 $add_option->( 'dump-long-names', 'dln', '!' );
1377 $add_option->( 'dump-options', 'dop', '!' );
1378 $add_option->( 'dump-profile', 'dpro', '!' );
1379 $add_option->( 'dump-short-names', 'dsn', '!' );
1380 $add_option->( 'dump-token-types', 'dtt', '!' );
1381 $add_option->( 'dump-want-left-space', 'dwls', '!' );
1382 $add_option->( 'dump-want-right-space', 'dwrs', '!' );
1383 $add_option->( 'fuzzy-line-length', 'fll', '!' );
1384 $add_option->( 'help', 'h', '' );
1385 $add_option->( 'short-concatenation-item-length', 'scl', '=i' );
1386 $add_option->( 'show-options', 'opt', '!' );
1387 $add_option->( 'version', 'v', '' );
1389 #---------------------------------------------------------------------
1391 # The Perl::Tidy::HtmlWriter will add its own options to the string
1392 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1394 ########################################
1395 # Set categories 10, 11, 12
1396 ########################################
1397 # Based on their known order
1398 $category = 12; # HTML properties
1399 foreach my $opt (@option_string) {
1400 my $long_name = $opt;
1401 $long_name =~ s/(!|=.*|:.*)$//;
1402 unless ( defined( $option_category{$long_name} ) ) {
1403 if ( $long_name =~ /^html-linked/ ) {
1404 $category = 10; # HTML options
1406 elsif ( $long_name =~ /^pod2html/ ) {
1407 $category = 11; # Pod2html
1409 $option_category{$long_name} = $category_name[$category];
1413 #---------------------------------------------------------------
1414 # Assign valid ranges to certain options
1415 #---------------------------------------------------------------
1416 # In the future, these may be used to make preliminary checks
1417 # hash keys are long names
1418 # If key or value is undefined:
1419 # strings may have any value
1420 # integer ranges are >=0
1421 # If value is defined:
1422 # value is [qw(any valid words)] for strings
1423 # value is [min, max] for integers
1424 # if min is undefined, there is no lower limit
1425 # if max is undefined, there is no upper limit
1426 # Parameters not listed here have defaults
1427 $option_range{'format'} = [qw(tidy html user)];
1428 $option_range{'output-line-ending'} = [qw(dos win mac unix)];
1430 $option_range{'block-brace-tightness'} = [ 0, 2 ];
1431 $option_range{'brace-tightness'} = [ 0, 2 ];
1432 $option_range{'paren-tightness'} = [ 0, 2 ];
1433 $option_range{'square-bracket-tightness'} = [ 0, 2 ];
1435 $option_range{'block-brace-vertical-tightness'} = [ 0, 2 ];
1436 $option_range{'brace-vertical-tightness'} = [ 0, 2 ];
1437 $option_range{'brace-vertical-tightness-closing'} = [ 0, 2 ];
1438 $option_range{'paren-vertical-tightness'} = [ 0, 2 ];
1439 $option_range{'paren-vertical-tightness-closing'} = [ 0, 2 ];
1440 $option_range{'square-bracket-vertical-tightness'} = [ 0, 2 ];
1441 $option_range{'square-bracket-vertical-tightness-closing'} = [ 0, 2 ];
1442 $option_range{'vertical-tightness'} = [ 0, 2 ];
1443 $option_range{'vertical-tightness-closing'} = [ 0, 2 ];
1445 $option_range{'closing-brace-indentation'} = [ 0, 3 ];
1446 $option_range{'closing-paren-indentation'} = [ 0, 3 ];
1447 $option_range{'closing-square-bracket-indentation'} = [ 0, 3 ];
1448 $option_range{'closing-token-indentation'} = [ 0, 3 ];
1450 $option_range{'closing-side-comment-else-flag'} = [ 0, 2 ];
1451 $option_range{'comma-arrow-breakpoints'} = [ 0, 3 ];
1453 # Note: we could actually allow negative ci if someone really wants it:
1454 # $option_range{'continuation-indentation'} = [ undef, undef ];
1456 #---------------------------------------------------------------
1457 # Assign default values to the above options here, except
1458 # for 'outfile' and 'help'.
1459 # These settings should approximate the perlstyle(1) suggestions.
1460 #---------------------------------------------------------------
1465 blanks-before-blocks
1466 blanks-before-comments
1468 block-brace-tightness=0
1469 block-brace-vertical-tightness=0
1471 brace-vertical-tightness-closing=0
1472 brace-vertical-tightness=0
1473 break-at-old-logical-breakpoints
1474 break-at-old-ternary-breakpoints
1475 break-at-old-keyword-breakpoints
1476 comma-arrow-breakpoints=1
1478 closing-side-comment-interval=6
1479 closing-side-comment-maximum-text=20
1480 closing-side-comment-else-flag=0
1481 closing-paren-indentation=0
1482 closing-brace-indentation=0
1483 closing-square-bracket-indentation=0
1484 continuation-indentation=2
1488 hanging-side-comments
1489 indent-block-comments
1491 long-block-line-count=8
1494 maximum-consecutive-blank-lines=1
1495 maximum-fields-per-table=0
1496 maximum-line-length=80
1497 minimum-space-to-comment=4
1498 nobrace-left-and-indent
1500 nodelete-old-whitespace
1505 nostatic-side-comments
1506 noswallow-optional-blank-lines
1511 outdent-long-comments
1513 paren-vertical-tightness-closing=0
1514 paren-vertical-tightness=0
1518 short-concatenation-item-length=8
1520 square-bracket-tightness=1
1521 square-bracket-vertical-tightness-closing=0
1522 square-bracket-vertical-tightness=0
1523 static-block-comments
1526 backup-file-extension=bak
1530 html-table-of-contents
1534 push @defaults, "perl-syntax-check-flags=-c -T";
1536 #---------------------------------------------------------------
1537 # Define abbreviations which will be expanded into the above primitives.
1538 # These may be defined recursively.
1539 #---------------------------------------------------------------
1542 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
1543 'fnl' => [qw(freeze-newlines)],
1544 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1545 'fws' => [qw(freeze-whitespace)],
1546 'indent-only' => [qw(freeze-newlines freeze-whitespace)],
1547 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1548 'nooutdent-long-lines' =>
1549 [qw(nooutdent-long-quotes nooutdent-long-comments)],
1550 'noll' => [qw(nooutdent-long-lines)],
1551 'io' => [qw(indent-only)],
1552 'delete-all-comments' =>
1553 [qw(delete-block-comments delete-side-comments delete-pod)],
1554 'nodelete-all-comments' =>
1555 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1556 'dac' => [qw(delete-all-comments)],
1557 'ndac' => [qw(nodelete-all-comments)],
1558 'gnu' => [qw(gnu-style)],
1559 'pbp' => [qw(perl-best-practices)],
1560 'tee-all-comments' =>
1561 [qw(tee-block-comments tee-side-comments tee-pod)],
1562 'notee-all-comments' =>
1563 [qw(notee-block-comments notee-side-comments notee-pod)],
1564 'tac' => [qw(tee-all-comments)],
1565 'ntac' => [qw(notee-all-comments)],
1566 'html' => [qw(format=html)],
1567 'nhtml' => [qw(format=tidy)],
1568 'tidy' => [qw(format=tidy)],
1570 'break-after-comma-arrows' => [qw(cab=0)],
1571 'nobreak-after-comma-arrows' => [qw(cab=1)],
1572 'baa' => [qw(cab=0)],
1573 'nbaa' => [qw(cab=1)],
1575 'break-at-old-trinary-breakpoints' => [qw(bot)],
1577 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1578 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1579 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1580 'icp' => [qw(cpi=2 cbi=2 csbi=2)],
1581 'nicp' => [qw(cpi=0 cbi=0 csbi=0)],
1583 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1584 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1585 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1586 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],
1587 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],
1589 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1590 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1591 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1593 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1594 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1595 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1597 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1598 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1599 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1601 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1602 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1603 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1605 'otr' => [qw(opr ohbr osbr)],
1606 'opening-token-right' => [qw(opr ohbr osbr)],
1607 'notr' => [qw(nopr nohbr nosbr)],
1608 'noopening-token-right' => [qw(nopr nohbr nosbr)],
1610 'sot' => [qw(sop sohb sosb)],
1611 'nsot' => [qw(nsop nsohb nsosb)],
1612 'stack-opening-tokens' => [qw(sop sohb sosb)],
1613 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1615 'sct' => [qw(scp schb scsb)],
1616 'stack-closing-tokens' => => [qw(scp schb scsb)],
1617 'nsct' => [qw(nscp nschb nscsb)],
1618 'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1620 # 'mangle' originally deleted pod and comments, but to keep it
1621 # reversible, it no longer does. But if you really want to
1622 # delete them, just use:
1625 # An interesting use for 'mangle' is to do this:
1626 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
1627 # which will form as many one-line blocks as possible
1633 delete-old-whitespace
1636 maximum-consecutive-blank-lines=0
1637 maximum-line-length=100000
1641 noblanks-before-blocks
1642 noblanks-before-subs
1647 # 'extrude' originally deleted pod and comments, but to keep it
1648 # reversible, it no longer does. But if you really want to
1649 # delete them, just use
1652 # An interesting use for 'extrude' is to do this:
1653 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
1654 # which will break up all one-line blocks.
1661 delete-old-whitespace
1664 maximum-consecutive-blank-lines=0
1665 maximum-line-length=1
1668 noblanks-before-blocks
1669 noblanks-before-subs
1675 # this style tries to follow the GNU Coding Standards (which do
1676 # not really apply to perl but which are followed by some perl
1680 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
1684 # Style suggested in Damian Conway's Perl Best Practices
1685 'perl-best-practices' => [
1686 qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
1687 q(wbb=% + - * / x != == >= <= =~ !~ < > | & >= < = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=)
1690 # Additional styles can be added here
1693 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
1695 # Uncomment next line to dump all expansions for debugging:
1696 # dump_short_names(\%expansion);
1698 \@option_string, \@defaults, \%expansion,
1699 \%option_category, \%option_range
1702 } # end of generate_options
1704 sub process_command_line {
1707 $perltidyrc_stream, $is_Windows, $Windows_type,
1708 $rpending_complaint, $dump_options_type
1714 $roption_string, $rdefaults, $rexpansion,
1715 $roption_category, $roption_range
1716 ) = generate_options();
1718 #---------------------------------------------------------------
1719 # set the defaults by passing the above list through GetOptions
1720 #---------------------------------------------------------------
1726 # do not load the defaults if we are just dumping perltidyrc
1727 unless ( $dump_options_type eq 'perltidyrc' ) {
1728 for $i (@$rdefaults) { push @ARGV, "--" . $i }
1731 # Patch to save users Getopt::Long configuration
1732 # and set to Getopt::Long defaults. Use eval to avoid
1733 # breaking old versions of Perl without these routines.
1735 eval { $glc = Getopt::Long::Configure() };
1737 eval { Getopt::Long::ConfigDefaults() };
1739 else { $glc = undef }
1741 if ( !GetOptions( \%Opts, @$roption_string ) ) {
1742 die "Programming Bug: error in setting default options";
1745 # Patch to put the previous Getopt::Long configuration back
1746 eval { Getopt::Long::Configure($glc) } if defined $glc;
1750 my @raw_options = ();
1751 my $config_file = "";
1752 my $saw_ignore_profile = 0;
1753 my $saw_extrude = 0;
1754 my $saw_dump_profile = 0;
1757 #---------------------------------------------------------------
1758 # Take a first look at the command-line parameters. Do as many
1759 # immediate dumps as possible, which can avoid confusion if the
1760 # perltidyrc file has an error.
1761 #---------------------------------------------------------------
1762 foreach $i (@ARGV) {
1765 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
1766 $saw_ignore_profile = 1;
1769 # note: this must come before -pro and -profile, below:
1770 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
1771 $saw_dump_profile = 1;
1773 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
1776 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
1779 unless ( -e $config_file ) {
1780 warn "cannot find file given with -pro=$config_file: $!\n";
1784 elsif ( $i =~ /^-(pro|profile)=?$/ ) {
1785 die "usage: -pro=filename or --profile=filename, no spaces\n";
1787 elsif ( $i =~ /^-extrude$/ ) {
1790 elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
1794 elsif ( $i =~ /^-(version|v)$/ ) {
1798 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
1799 dump_defaults(@$rdefaults);
1802 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
1803 dump_long_names(@$roption_string);
1806 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
1807 dump_short_names($rexpansion);
1810 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
1811 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
1816 if ( $saw_dump_profile && $saw_ignore_profile ) {
1817 warn "No profile to dump because of -npro\n";
1821 #---------------------------------------------------------------
1822 # read any .perltidyrc configuration file
1823 #---------------------------------------------------------------
1824 unless ($saw_ignore_profile) {
1826 # resolve possible conflict between $perltidyrc_stream passed
1827 # as call parameter to perltidy and -pro=filename on command
1829 if ($perltidyrc_stream) {
1832 Conflict: a perltidyrc configuration file was specified both as this
1833 perltidy call parameter: $perltidyrc_stream
1834 and with this -profile=$config_file.
1835 Using -profile=$config_file.
1839 $config_file = $perltidyrc_stream;
1843 # look for a config file if we don't have one yet
1844 my $rconfig_file_chatter;
1845 $$rconfig_file_chatter = "";
1847 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
1848 $rpending_complaint )
1849 unless $config_file;
1851 # open any config file
1854 ( $fh_config, $config_file ) =
1855 Perl::Tidy::streamhandle( $config_file, 'r' );
1856 unless ($fh_config) {
1857 $$rconfig_file_chatter .=
1858 "# $config_file exists but cannot be opened\n";
1862 if ($saw_dump_profile) {
1863 if ($saw_dump_profile) {
1864 dump_config_file( $fh_config, $config_file,
1865 $rconfig_file_chatter );
1872 my ( $rconfig_list, $death_message ) =
1873 read_config_file( $fh_config, $config_file, $rexpansion );
1874 die $death_message if ($death_message);
1876 # process any .perltidyrc parameters right now so we can
1878 if (@$rconfig_list) {
1879 local @ARGV = @$rconfig_list;
1881 expand_command_abbreviations( $rexpansion, \@raw_options,
1884 if ( !GetOptions( \%Opts, @$roption_string ) ) {
1886 "Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
1889 # Anything left in this local @ARGV is an error and must be
1890 # invalid bare words from the configuration file. We cannot
1891 # check this earlier because bare words may have been valid
1892 # values for parameters. We had to wait for GetOptions to have
1896 my $str = "\'" . pop(@ARGV) . "\'";
1897 while ( my $param = pop(@ARGV) ) {
1898 if ( length($str) < 70 ) {
1899 $str .= ", '$param'";
1907 There are $count unrecognized values in the configuration file '$config_file':
1909 Use leading dashes for parameters. Use -npro to ignore this file.
1913 # Undo any options which cause premature exit. They are not
1914 # appropriate for a config file, and it could be hard to
1915 # diagnose the cause of the premature exit.
1924 dump-want-left-space
1925 dump-want-right-space
1933 if ( defined( $Opts{$_} ) ) {
1935 warn "ignoring --$_ in config file: $config_file\n";
1942 #---------------------------------------------------------------
1943 # now process the command line parameters
1944 #---------------------------------------------------------------
1945 expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
1947 if ( !GetOptions( \%Opts, @$roption_string ) ) {
1948 die "Error on command line; for help try 'perltidy -h'\n";
1951 return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
1952 $rexpansion, $roption_category, $roption_range );
1953 } # end of process_command_line
1957 my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
1959 #---------------------------------------------------------------
1960 # check and handle any interactions among the basic options..
1961 #---------------------------------------------------------------
1963 # Since -vt, -vtc, and -cti are abbreviations, but under
1964 # msdos, an unquoted input parameter like vtc=1 will be
1965 # seen as 2 parameters, vtc and 1, so the abbreviations
1966 # won't be seen. Therefore, we will catch them here if
1969 if ( defined $rOpts->{'vertical-tightness'} ) {
1970 my $vt = $rOpts->{'vertical-tightness'};
1971 $rOpts->{'paren-vertical-tightness'} = $vt;
1972 $rOpts->{'square-bracket-vertical-tightness'} = $vt;
1973 $rOpts->{'brace-vertical-tightness'} = $vt;
1976 if ( defined $rOpts->{'vertical-tightness-closing'} ) {
1977 my $vtc = $rOpts->{'vertical-tightness-closing'};
1978 $rOpts->{'paren-vertical-tightness-closing'} = $vtc;
1979 $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
1980 $rOpts->{'brace-vertical-tightness-closing'} = $vtc;
1983 if ( defined $rOpts->{'closing-token-indentation'} ) {
1984 my $cti = $rOpts->{'closing-token-indentation'};
1985 $rOpts->{'closing-square-bracket-indentation'} = $cti;
1986 $rOpts->{'closing-brace-indentation'} = $cti;
1987 $rOpts->{'closing-paren-indentation'} = $cti;
1990 # In quiet mode, there is no log file and hence no way to report
1991 # results of syntax check, so don't do it.
1992 if ( $rOpts->{'quiet'} ) {
1993 $rOpts->{'check-syntax'} = 0;
1996 # can't check syntax if no output
1997 if ( $rOpts->{'format'} ne 'tidy' ) {
1998 $rOpts->{'check-syntax'} = 0;
2001 # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2002 # wide variety of nasty problems on these systems, because they cannot
2003 # reliably run backticks. Don't even think about changing this!
2004 if ( $rOpts->{'check-syntax'}
2006 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2008 $rOpts->{'check-syntax'} = 0;
2011 # It's really a bad idea to check syntax as root unless you wrote
2012 # the script yourself. FIXME: not sure if this works with VMS
2013 unless ($is_Windows) {
2015 if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2016 $rOpts->{'check-syntax'} = 0;
2017 $$rpending_complaint .=
2018 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2022 # see if user set a non-negative logfile-gap
2023 if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2025 # a zero gap will be taken as a 1
2026 if ( $rOpts->{'logfile-gap'} == 0 ) {
2027 $rOpts->{'logfile-gap'} = 1;
2030 # setting a non-negative logfile gap causes logfile to be saved
2031 $rOpts->{'logfile'} = 1;
2034 # not setting logfile gap, or setting it negative, causes default of 50
2036 $rOpts->{'logfile-gap'} = 50;
2039 # set short-cut flag when only indentation is to be done.
2040 # Note that the user may or may not have already set the
2042 if ( !$rOpts->{'add-whitespace'}
2043 && !$rOpts->{'delete-old-whitespace'}
2044 && !$rOpts->{'add-newlines'}
2045 && !$rOpts->{'delete-old-newlines'} )
2047 $rOpts->{'indent-only'} = 1;
2050 # -isbc implies -ibc
2051 if ( $rOpts->{'indent-spaced-block-comments'} ) {
2052 $rOpts->{'indent-block-comments'} = 1;
2055 # -bli flag implies -bl
2056 if ( $rOpts->{'brace-left-and-indent'} ) {
2057 $rOpts->{'opening-brace-on-new-line'} = 1;
2060 if ( $rOpts->{'opening-brace-always-on-right'}
2061 && $rOpts->{'opening-brace-on-new-line'} )
2064 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2065 'opening-brace-on-new-line' (-bl). Ignoring -bl.
2067 $rOpts->{'opening-brace-on-new-line'} = 0;
2070 # it simplifies things if -bl is 0 rather than undefined
2071 if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2072 $rOpts->{'opening-brace-on-new-line'} = 0;
2075 # -sbl defaults to -bl if not defined
2076 if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2077 $rOpts->{'opening-sub-brace-on-new-line'} =
2078 $rOpts->{'opening-brace-on-new-line'};
2081 # set shortcut flag if no blanks to be written
2082 unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) {
2083 $rOpts->{'swallow-optional-blank-lines'} = 1;
2086 if ( $rOpts->{'entab-leading-whitespace'} ) {
2087 if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2088 warn "-et=n must use a positive integer; ignoring -et\n";
2089 $rOpts->{'entab-leading-whitespace'} = undef;
2092 # entab leading whitespace has priority over the older 'tabs' option
2093 if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2096 if ( $rOpts->{'output-line-ending'} ) {
2097 unless ( is_unix() ) {
2098 warn "ignoring -ole; only works under unix\n";
2099 $rOpts->{'output-line-ending'} = undef;
2102 if ( $rOpts->{'preserve-line-endings'} ) {
2103 unless ( is_unix() ) {
2104 warn "ignoring -ple; only works under unix\n";
2105 $rOpts->{'preserve-line-endings'} = undef;
2111 sub expand_command_abbreviations {
2113 # go through @ARGV and expand any abbreviations
2115 my ( $rexpansion, $rraw_options, $config_file ) = @_;
2118 # set a pass limit to prevent an infinite loop;
2119 # 10 should be plenty, but it may be increased to allow deeply
2120 # nested expansions.
2121 my $max_passes = 10;
2124 # keep looping until all expansions have been converted into actual
2126 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2128 my $abbrev_count = 0;
2130 # loop over each item in @ARGV..
2131 foreach $word (@ARGV) {
2133 # convert any leading 'no-' to just 'no'
2134 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2136 # if it is a dash flag (instead of a file name)..
2137 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2142 # save the raw input for debug output in case of circular refs
2143 if ( $pass_count == 0 ) {
2144 push( @$rraw_options, $word );
2147 # recombine abbreviation and flag, if necessary,
2148 # to allow abbreviations with arguments such as '-vt=1'
2149 if ( $rexpansion->{ $abr . $flags } ) {
2150 $abr = $abr . $flags;
2154 # if we see this dash item in the expansion hash..
2155 if ( $rexpansion->{$abr} ) {
2158 # stuff all of the words that it expands to into the
2159 # new arg list for the next pass
2160 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2161 next unless $abbrev; # for safety; shouldn't happen
2162 push( @new_argv, '--' . $abbrev . $flags );
2166 # not in expansion hash, must be actual long name
2168 push( @new_argv, $word );
2172 # not a dash item, so just save it for the next pass
2174 push( @new_argv, $word );
2176 } # end of this pass
2178 # update parameter list @ARGV to the new one
2180 last unless ( $abbrev_count > 0 );
2182 # make sure we are not in an infinite loop
2183 if ( $pass_count == $max_passes ) {
2185 "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
2186 print STDERR "Here are the raw options\n";
2188 print STDERR "(@$rraw_options)\n";
2189 my $num = @new_argv;
2192 print STDERR "After $max_passes passes here is ARGV\n";
2193 print STDERR "(@new_argv)\n";
2196 print STDERR "After $max_passes passes ARGV has $num entries\n";
2201 Please check your configuration file $config_file for circular-references.
2202 To deactivate it, use -npro.
2207 Program bug - circular-references in the %expansion hash, probably due to
2208 a recent program change.
2211 } # end of check for circular references
2212 } # end of loop over all passes
2215 # Debug routine -- this will dump the expansion hash
2216 sub dump_short_names {
2217 my $rexpansion = shift;
2219 List of short names. This list shows how all abbreviations are
2220 translated into other abbreviations and, eventually, into long names.
2221 New abbreviations may be defined in a .perltidyrc file.
2222 For a list of all long names, use perltidy --dump-long-names (-dln).
2223 --------------------------------------------------------------------------
2225 foreach my $abbrev ( sort keys %$rexpansion ) {
2226 my @list = @{ $$rexpansion{$abbrev} };
2227 print STDOUT "$abbrev --> @list\n";
2231 sub check_vms_filename {
2233 # given a valid filename (the perltidy input file)
2234 # create a modified filename and separator character
2237 # Contributed by Michael Cartmell
2239 my ( $base, $path ) = fileparse( $_[0] );
2241 # remove explicit ; version
2242 $base =~ s/;-?\d*$//
2244 # remove explicit . version ie two dots in filename NB ^ escapes a dot
2245 or $base =~ s/( # begin capture $1
2246 (?:^|[^^])\. # match a dot not preceded by a caret
2247 (?: # followed by nothing
2249 .*[^^] # anything ending in a non caret
2252 \.-?\d*$ # match . version number
2255 # normalise filename, if there are no unescaped dots then append one
2256 $base .= '.' unless $base =~ /(?:^|[^^])\./;
2258 # if we don't already have an extension then we just append the extention
2259 my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2260 return ( $path . $base, $separator );
2265 # TODO: are these more standard names?
2266 # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2268 # Returns a string that determines what MS OS we are on.
2269 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2270 # Returns blank string if not an MS system.
2271 # Original code contributed by: Yves Orton
2272 # We need to know this to decide where to look for config files
2274 my $rpending_complaint = shift;
2276 return $os unless $^O =~ /win32|dos/i; # is it a MS box?
2278 # Systems built from Perl source may not have Win32.pm
2279 # But probably have Win32::GetOSVersion() anyway so the
2280 # following line is not 'required':
2281 # return $os unless eval('require Win32');
2283 # Use the standard API call to determine the version
2284 my ( $undef, $major, $minor, $build, $id );
2285 eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2288 # NAME ID MAJOR MINOR
2289 # Windows NT 4 2 4 0
2290 # Windows 2000 2 5 0
2292 # Windows Server 2003 2 5 2
2294 return "win32s" unless $id; # If id==0 then its a win32s box.
2295 $os = { # Magic numbers from MSDN
2296 # documentation of GetOSVersion
2303 0 => "2000", # or NT 4, see below
2310 # If $os is undefined, the above code is out of date. Suggested updates
2312 unless ( defined $os ) {
2314 $$rpending_complaint .= <<EOS;
2315 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2316 We won't be able to look for a system-wide config file.
2320 # Unfortunately the logic used for the various versions isnt so clever..
2321 # so we have to handle an outside case.
2322 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2326 return ( $^O !~ /win32|dos/i )
2329 && ( $^O ne 'MacOS' );
2332 sub look_for_Windows {
2334 # determine Windows sub-type and location of
2335 # system-wide configuration files
2336 my $rpending_complaint = shift;
2337 my $is_Windows = ( $^O =~ /win32|dos/i );
2338 my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
2339 return ( $is_Windows, $Windows_type );
2342 sub find_config_file {
2344 # look for a .perltidyrc configuration file
2345 my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2346 $rpending_complaint ) = @_;
2348 $$rconfig_file_chatter .= "# Config file search...system reported as:";
2350 $$rconfig_file_chatter .= "Windows $Windows_type\n";
2353 $$rconfig_file_chatter .= " $^O\n";
2356 # sub to check file existance and record all tests
2357 my $exists_config_file = sub {
2358 my $config_file = shift;
2359 return 0 unless $config_file;
2360 $$rconfig_file_chatter .= "# Testing: $config_file\n";
2361 return -f $config_file;
2366 # look in current directory first
2367 $config_file = ".perltidyrc";
2368 return $config_file if $exists_config_file->($config_file);
2370 # Default environment vars.
2371 my @envs = qw(PERLTIDY HOME);
2373 # Check the NT/2k/XP locations, first a local machine def, then a
2375 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2377 # Now go through the enviornment ...
2378 foreach my $var (@envs) {
2379 $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2380 if ( defined( $ENV{$var} ) ) {
2381 $$rconfig_file_chatter .= " = $ENV{$var}\n";
2383 # test ENV{ PERLTIDY } as file:
2384 if ( $var eq 'PERLTIDY' ) {
2385 $config_file = "$ENV{$var}";
2386 return $config_file if $exists_config_file->($config_file);
2389 # test ENV as directory:
2390 $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2391 return $config_file if $exists_config_file->($config_file);
2394 $$rconfig_file_chatter .= "\n";
2398 # then look for a system-wide definition
2399 # where to look varies with OS
2402 if ($Windows_type) {
2403 my ( $os, $system, $allusers ) =
2404 Win_Config_Locs( $rpending_complaint, $Windows_type );
2406 # Check All Users directory, if there is one.
2408 $config_file = catfile( $allusers, ".perltidyrc" );
2409 return $config_file if $exists_config_file->($config_file);
2412 # Check system directory.
2413 $config_file = catfile( $system, ".perltidyrc" );
2414 return $config_file if $exists_config_file->($config_file);
2418 # Place to add customization code for other systems
2419 elsif ( $^O eq 'OS2' ) {
2421 elsif ( $^O eq 'MacOS' ) {
2423 elsif ( $^O eq 'VMS' ) {
2426 # Assume some kind of Unix
2429 $config_file = "/usr/local/etc/perltidyrc";
2430 return $config_file if $exists_config_file->($config_file);
2432 $config_file = "/etc/perltidyrc";
2433 return $config_file if $exists_config_file->($config_file);
2436 # Couldn't find a config file
2440 sub Win_Config_Locs {
2442 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2443 # or undef if its not a win32 OS. In list context returns OS, System
2444 # Directory, and All Users Directory. All Users will be empty on a
2445 # 9x/Me box. Contributed by: Yves Orton.
2447 my $rpending_complaint = shift;
2448 my $os = (@_) ? shift: Win_OS_Type();
2454 if ( $os =~ /9[58]|Me/ ) {
2455 $system = "C:/Windows";
2457 elsif ( $os =~ /NT|XP|200?/ ) {
2458 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2461 ? "C:/WinNT/profiles/All Users/"
2462 : "C:/Documents and Settings/All Users/";
2466 # This currently would only happen on a win32s computer. I dont have
2467 # one to test, so I am unsure how to proceed. Suggestions welcome!
2468 $$rpending_complaint .=
2469 "I dont know a sensible place to look for config files on an $os system.\n";
2472 return wantarray ? ( $os, $system, $allusers ) : $os;
2475 sub dump_config_file {
2477 my $config_file = shift;
2478 my $rconfig_file_chatter = shift;
2479 print STDOUT "$$rconfig_file_chatter";
2481 print STDOUT "# Dump of file: '$config_file'\n";
2482 while ( $_ = $fh->getline() ) { print STDOUT }
2483 eval { $fh->close() };
2486 print STDOUT "# ...no config file found\n";
2490 sub read_config_file {
2492 my ( $fh, $config_file, $rexpansion ) = @_;
2493 my @config_list = ();
2495 # file is bad if non-empty $death_message is returned
2496 my $death_message = "";
2500 while ( $_ = $fh->getline() ) {
2503 next if /^\s*#/; # skip full-line comment
2504 ( $_, $death_message ) = strip_comment( $_, $config_file, $line_no );
2505 last if ($death_message);
2506 s/^\s*(.*?)\s*$/$1/; # trim both ends
2509 # look for something of the general form
2514 if ( $_ =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
2515 my ( $newname, $body, $curly ) = ( $2, $3, $4 );
2517 # handle a new alias definition
2521 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
2526 if ( ${$rexpansion}{$name} ) {
2528 my @names = sort keys %$rexpansion;
2530 "Here is a list of all installed aliases\n(@names)\n"
2531 . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2534 ${$rexpansion}{$name} = [];
2540 my ( $rbody_parts, $msg ) = parse_args($body);
2542 $death_message = <<EOM;
2543 Error reading file '$config_file' at line number $line_no.
2545 Please fix this line or use -npro to avoid reading this file
2552 # remove leading dashes if this is an alias
2553 foreach (@$rbody_parts) { s/^\-+//; }
2554 push @{ ${$rexpansion}{$name} }, @$rbody_parts;
2557 push( @config_list, @$rbody_parts );
2564 "Unexpected '}' seen in config file $config_file line $.\n";
2571 eval { $fh->close() };
2572 return ( \@config_list, $death_message );
2577 my ( $instr, $config_file, $line_no ) = @_;
2580 # nothing to do if no comments
2581 if ( $instr !~ /#/ ) {
2582 return ( $instr, $msg );
2585 # use simple method of no quotes
2586 elsif ( $instr !~ /['"]/ ) {
2587 $instr =~ s/\s*\#.*$//; # simple trim
2588 return ( $instr, $msg );
2591 # handle comments and quotes
2593 my $quote_char = "";
2596 # looking for ending quote character
2598 if ( $instr =~ /\G($quote_char)/gc ) {
2602 elsif ( $instr =~ /\G(.)/gc ) {
2606 # error..we reached the end without seeing the ending quote char
2609 Error reading file $config_file at line number $line_no.
2610 Did not see ending quote character <$quote_char> in this text:
2612 Please fix this line or use -npro to avoid reading this file
2618 # accumulating characters and looking for start of a quoted string
2620 if ( $instr =~ /\G([\"\'])/gc ) {
2624 elsif ( $instr =~ /\G#/gc ) {
2627 elsif ( $instr =~ /\G(.)/gc ) {
2635 return ( $outstr, $msg );
2640 # Parse a command string containing multiple string with possible
2641 # quotes, into individual commands. It might look like this, for example:
2643 # -wba=" + - " -some-thing -wbb='. && ||'
2645 # There is no need, at present, to handle escaped quote characters.
2646 # (They are not perltidy tokens, so needn't be in strings).
2649 my @body_parts = ();
2650 my $quote_char = "";
2655 # looking for ending quote character
2657 if ( $body =~ /\G($quote_char)/gc ) {
2660 elsif ( $body =~ /\G(.)/gc ) {
2664 # error..we reached the end without seeing the ending quote char
2666 if ( length($part) ) { push @body_parts, $part; }
2668 Did not see ending quote character <$quote_char> in this text:
2675 # accumulating characters and looking for start of a quoted string
2677 if ( $body =~ /\G([\"\'])/gc ) {
2680 elsif ( $body =~ /\G(\s+)/gc ) {
2681 if ( length($part) ) { push @body_parts, $part; }
2684 elsif ( $body =~ /\G(.)/gc ) {
2688 if ( length($part) ) { push @body_parts, $part; }
2693 return ( \@body_parts, $msg );
2696 sub dump_long_names {
2698 my @names = sort @_;
2700 # Command line long names (passed to GetOptions)
2701 #---------------------------------------------------------------
2702 # here is a summary of the Getopt codes:
2703 # <none> does not take an argument
2704 # =s takes a mandatory string
2705 # :s takes an optional string
2706 # =i takes a mandatory integer
2707 # :i takes an optional integer
2708 # ! does not take an argument and may be negated
2709 # i.e., -foo and -nofoo are allowed
2710 # a double dash signals the end of the options list
2712 #---------------------------------------------------------------
2715 foreach (@names) { print STDOUT "$_\n" }
2719 my @defaults = sort @_;
2720 print STDOUT "Default command line options:\n";
2721 foreach (@_) { print STDOUT "$_\n" }
2726 # write the options back out as a valid .perltidyrc file
2727 my ( $rOpts, $roption_string ) = @_;
2729 my $rGetopt_flags = \%Getopt_flags;
2730 foreach my $opt ( @{$roption_string} ) {
2732 if ( $opt =~ /(.*)(!|=.*)$/ ) {
2736 if ( defined( $rOpts->{$opt} ) ) {
2737 $rGetopt_flags->{$opt} = $flag;
2740 print STDOUT "# Final parameter set for this run:\n";
2741 foreach my $key ( sort keys %{$rOpts} ) {
2742 my $flag = $rGetopt_flags->{$key};
2743 my $value = $rOpts->{$key};
2747 if ( $flag =~ /^=/ ) {
2748 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
2749 $suffix = "=" . $value;
2751 elsif ( $flag =~ /^!/ ) {
2752 $prefix .= "no" unless ($value);
2758 "# ERROR in dump_options: unrecognized flag $flag for $key\n";
2761 print STDOUT $prefix . $key . $suffix . "\n";
2767 This is perltidy, v$VERSION
2769 Copyright 2000-2006, Steve Hancock
2771 Perltidy is free software and may be copied under the terms of the GNU
2772 General Public License, which is included in the distribution files.
2774 Complete documentation for perltidy can be found using 'man perltidy'
2775 or on the internet at http://perltidy.sourceforge.net.
2782 This is perltidy version $VERSION, a perl script indenter. Usage:
2784 perltidy [ options ] file1 file2 file3 ...
2785 (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
2786 perltidy [ options ] file1 -o outfile
2787 perltidy [ options ] file1 -st >outfile
2788 perltidy [ options ] <infile >outfile
2790 Options have short and long forms. Short forms are shown; see
2791 man pages for long forms. Note: '=s' indicates a required string,
2792 and '=n' indicates a required integer.
2796 -o=file name of the output file (only if single input file)
2797 -oext=s change output extension from 'tdy' to s
2798 -opath=path change path to be 'path' for output files
2799 -b backup original to .bak and modify file in-place
2800 -bext=s change default backup extension from 'bak' to s
2801 -q deactivate error messages (for running under editor)
2802 -w include non-critical warning messages in the .ERR error output
2803 -syn run perl -c to check syntax (default under unix systems)
2804 -log save .LOG file, which has useful diagnostics
2805 -f force perltidy to read a binary file
2806 -g like -log but writes more detailed .LOG file, for debugging scripts
2807 -opt write the set of options actually used to a .LOG file
2808 -npro ignore .perltidyrc configuration command file
2809 -pro=file read configuration commands from file instead of .perltidyrc
2810 -st send output to standard output, STDOUT
2811 -se send error output to standard error output, STDERR
2812 -v display version number to standard output and quit
2815 -i=n use n columns per indentation level (default n=4)
2816 -t tabs: use one tab character per indentation level, not recommeded
2817 -nt no tabs: use n spaces per indentation level (default)
2818 -et=n entab leading whitespace n spaces per tab; not recommended
2819 -io "indent only": just do indentation, no other formatting.
2820 -sil=n set starting indentation level to n; use if auto detection fails
2821 -ole=s specify output line ending (s=dos or win, mac, unix)
2822 -ple keep output line endings same as input (input must be filename)
2825 -fws freeze whitespace; this disables all whitespace changes
2826 and disables the following switches:
2827 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)
2828 -bbt same as -bt but for code block braces; same as -bt if not given
2829 -bbvt block braces vertically tight; use with -bl or -bli
2830 -bbvtl=s make -bbvt to apply to selected list of block types
2831 -pt=n paren tightness (n=0, 1 or 2)
2832 -sbt=n square bracket tightness (n=0, 1, or 2)
2833 -bvt=n brace vertical tightness,
2834 n=(0=open, 1=close unless multiple steps on a line, 2=always close)
2835 -pvt=n paren vertical tightness (see -bvt for n)
2836 -sbvt=n square bracket vertical tightness (see -bvt for n)
2837 -bvtc=n closing brace vertical tightness:
2838 n=(0=open, 1=sometimes close, 2=always close)
2839 -pvtc=n closing paren vertical tightness, see -bvtc for n.
2840 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
2841 -ci=n sets continuation indentation=n, default is n=2 spaces
2842 -lp line up parentheses, brackets, and non-BLOCK braces
2843 -sfs add space before semicolon in for( ; ; )
2844 -aws allow perltidy to add whitespace (default)
2845 -dws delete all old non-essential whitespace
2846 -icb indent closing brace of a code block
2847 -cti=n closing indentation of paren, square bracket, or non-block brace:
2848 n=0 none, =1 align with opening, =2 one full indentation level
2849 -icp equivalent to -cti=2
2850 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /'
2851 -wrs=s want space right of tokens in string;
2852 -sts put space before terminal semicolon of a statement
2853 -sak=s put space between keywords given in s and '(';
2854 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
2857 -fnl freeze newlines; this disables all line break changes
2858 and disables the following switches:
2859 -anl add newlines; ok to introduce new line breaks
2860 -bbs add blank line before subs and packages
2861 -bbc add blank line before block comments
2862 -bbb add blank line between major blocks
2863 -sob swallow optional blank lines
2864 -ce cuddled else; use this style: '} else {'
2865 -dnl delete old newlines (default)
2866 -mbl=n maximum consecutive blank lines (default=1)
2867 -l=n maximum line length; default n=80
2868 -bl opening brace on new line
2869 -sbl opening sub brace on new line. value of -bl is used if not given.
2870 -bli opening brace on new line and indented
2871 -bar opening brace always on right, even for long clauses
2872 -vt=n vertical tightness (requires -lp); n controls break after opening
2873 token: 0=never 1=no break if next line balanced 2=no break
2874 -vtc=n vertical tightness of closing container; n controls if closing
2875 token starts new line: 0=always 1=not unless list 1=never
2876 -wba=s want break after tokens in string; i.e. wba=': .'
2877 -wbb=s want break before tokens in string
2879 Following Old Breakpoints
2880 -boc break at old comma breaks: turns off all automatic list formatting
2881 -bol break at old logical breakpoints: or, and, ||, && (default)
2882 -bok break at old list keyword breakpoints such as map, sort (default)
2883 -bot break at old conditional (ternary ?:) operator breakpoints (default)
2884 -cab=n break at commas after a comma-arrow (=>):
2885 n=0 break at all commas after =>
2886 n=1 stable: break unless this breaks an existing one-line container
2887 n=2 break only if a one-line container cannot be formed
2888 n=3 do not treat commas after => specially at all
2891 -ibc indent block comments (default)
2892 -isbc indent spaced block comments; may indent unless no leading space
2893 -msc=n minimum desired spaces to side comment, default 4
2894 -csc add or update closing side comments after closing BLOCK brace
2895 -dcsc delete closing side comments created by a -csc command
2896 -cscp=s change closing side comment prefix to be other than '## end'
2897 -cscl=s change closing side comment to apply to selected list of blocks
2898 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
2899 -csct=n maximum number of columns of appended text, default n=20
2900 -cscw causes warning if old side comment is overwritten with -csc
2902 -sbc use 'static block comments' identified by leading '##' (default)
2903 -sbcp=s change static block comment identifier to be other than '##'
2904 -osbc outdent static block comments
2906 -ssc use 'static side comments' identified by leading '##' (default)
2907 -sscp=s change static side comment identifier to be other than '##'
2909 Delete selected text
2910 -dac delete all comments AND pod
2911 -dbc delete block comments
2912 -dsc delete side comments
2915 Send selected text to a '.TEE' file
2916 -tac tee all comments AND pod
2917 -tbc tee block comments
2918 -tsc tee side comments
2922 -olq outdent long quoted strings (default)
2923 -olc outdent a long block comment line
2924 -ola outdent statement labels
2925 -okw outdent control keywords (redo, next, last, goto, return)
2926 -okwl=s specify alternative keywords for -okw command
2929 -mft=n maximum fields per table; default n=40
2930 -x do not format lines before hash-bang line (i.e., for VMS)
2931 -asc allows perltidy to add a ';' when missing (default)
2932 -dsm allows perltidy to delete an unnecessary ';' (default)
2934 Combinations of other parameters
2935 -gnu attempt to follow GNU Coding Standards as applied to perl
2936 -mangle remove as many newlines as possible (but keep comments and pods)
2937 -extrude insert as many newlines as possible
2939 Dump and die, debugging
2940 -dop dump options used in this run to standard output and quit
2941 -ddf dump default options to standard output and quit
2942 -dsn dump all option short names to standard output and quit
2943 -dln dump option long names to standard output and quit
2944 -dpro dump whatever configuration file is in effect to standard output
2945 -dtt dump all token types to standard output and quit
2948 -html write an html file (see 'man perl2web' for many options)
2949 Note: when -html is used, no indentation or formatting are done.
2950 Hint: try perltidy -html -css=mystyle.css filename.pl
2951 and edit mystyle.css to change the appearance of filename.html.
2952 -nnn gives line numbers
2953 -pre only writes out <pre>..</pre> code section
2954 -toc places a table of contents to subs at the top (default)
2955 -pod passes pod text through pod2html (default)
2956 -frm write html as a frame (3 files)
2957 -text=s extra extension for table of contents if -frm, default='toc'
2958 -sext=s extra extension for file content if -frm, default='src'
2960 A prefix of "n" negates short form toggle switches, and a prefix of "no"
2961 negates the long forms. For example, -nasc means don't add missing
2964 If you are unable to see this entire text, try "perltidy -h | more"
2965 For more detailed information, and additional options, try "man perltidy",
2966 or go to the perltidy home page at http://perltidy.sourceforge.net
2971 sub process_this_file {
2973 my ( $truth, $beauty ) = @_;
2975 # loop to process each line of this file
2976 while ( my $line_of_tokens = $truth->get_line() ) {
2977 $beauty->write_line($line_of_tokens);
2981 eval { $beauty->finish_formatting() };
2982 $truth->report_tokenization_errors();
2987 # Use 'perl -c' to make sure that we did not create bad syntax
2988 # This is a very good independent check for programming errors
2990 # Given names of the input and output files, ($ifname, $ofname),
2991 # we do the following:
2992 # - check syntax of the input file
2993 # - if bad, all done (could be an incomplete code snippet)
2994 # - if infile syntax ok, then check syntax of the output file;
2995 # - if outfile syntax bad, issue warning; this implies a code bug!
2996 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
2998 my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
2999 my $infile_syntax_ok = 0;
3000 my $line_of_dashes = '-' x 42 . "\n";
3002 my $flags = $rOpts->{'perl-syntax-check-flags'};
3004 # be sure we invoke perl with -c
3005 # note: perl will accept repeated flags like '-c -c'. It is safest
3006 # to append another -c than try to find an interior bundled c, as
3007 # in -Tc, because such a 'c' might be in a quoted string, for example.
3008 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3010 # be sure we invoke perl with -x if requested
3011 # same comments about repeated parameters applies
3012 if ( $rOpts->{'look-for-hash-bang'} ) {
3013 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3016 # this shouldn't happen unless a termporary file couldn't be made
3017 if ( $ifname eq '-' ) {
3018 $logger_object->write_logfile_entry(
3019 "Cannot run perl -c on STDIN and STDOUT\n");
3020 return $infile_syntax_ok;
3023 $logger_object->write_logfile_entry(
3024 "checking input file syntax with perl $flags\n");
3025 $logger_object->write_logfile_entry($line_of_dashes);
3027 # Not all operating systems/shells support redirection of the standard
3029 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3031 my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
3032 $logger_object->write_logfile_entry("$perl_output\n");
3034 if ( $perl_output =~ /syntax\s*OK/ ) {
3035 $infile_syntax_ok = 1;
3036 $logger_object->write_logfile_entry($line_of_dashes);
3037 $logger_object->write_logfile_entry(
3038 "checking output file syntax with perl $flags ...\n");
3039 $logger_object->write_logfile_entry($line_of_dashes);
3042 do_syntax_check( $ofname, $flags, $error_redirection );
3043 $logger_object->write_logfile_entry("$perl_output\n");
3045 unless ( $perl_output =~ /syntax\s*OK/ ) {
3046 $logger_object->write_logfile_entry($line_of_dashes);
3047 $logger_object->warning(
3048 "The output file has a syntax error when tested with perl $flags $ofname !\n"
3050 $logger_object->warning(
3051 "This implies an error in perltidy; the file $ofname is bad\n");
3052 $logger_object->report_definite_bug();
3054 # the perl version number will be helpful for diagnosing the problem
3055 $logger_object->write_logfile_entry(
3056 qx/perl -v $error_redirection/ . "\n" );
3061 # Only warn of perl -c syntax errors. Other messages,
3062 # such as missing modules, are too common. They can be
3063 # seen by running with perltidy -w
3064 $logger_object->complain("A syntax check using perl $flags gives: \n");
3065 $logger_object->complain($line_of_dashes);
3066 $logger_object->complain("$perl_output\n");
3067 $logger_object->complain($line_of_dashes);
3068 $infile_syntax_ok = -1;
3069 $logger_object->write_logfile_entry($line_of_dashes);
3070 $logger_object->write_logfile_entry(
3071 "The output file will not be checked because of input file problems\n"
3074 return $infile_syntax_ok;
3077 sub do_syntax_check {
3078 my ( $fname, $flags, $error_redirection ) = @_;
3080 # We have to quote the filename in case it has unusual characters
3081 # or spaces. Example: this filename #CM11.pm# gives trouble.
3082 $fname = '"' . $fname . '"';
3084 # Under VMS something like -T will become -t (and an error) so we
3085 # will put quotes around the flags. Double quotes seem to work on
3086 # Unix/Windows/VMS, but this may not work on all systems. (Single
3087 # quotes do not work under Windows). It could become necessary to
3088 # put double quotes around each flag, such as: -"c" -"T"
3089 # We may eventually need some system-dependent coding here.
3090 $flags = '"' . $flags . '"';
3092 # now wish for luck...
3093 return qx/perl $flags $fname $error_redirection/;
3096 #####################################################################
3098 # This is a stripped down version of IO::Scalar
3099 # Given a reference to a scalar, it supplies either:
3100 # a getline method which reads lines (mode='r'), or
3101 # a print method which reads lines (mode='w')
3103 #####################################################################
3104 package Perl::Tidy::IOScalar;
3108 my ( $package, $rscalar, $mode ) = @_;
3109 my $ref = ref $rscalar;
3110 if ( $ref ne 'SCALAR' ) {
3112 ------------------------------------------------------------------------
3113 expecting ref to SCALAR but got ref to ($ref); trace follows:
3114 ------------------------------------------------------------------------
3118 if ( $mode eq 'w' ) {
3120 return bless [ $rscalar, $mode ], $package;
3122 elsif ( $mode eq 'r' ) {
3124 # Convert a scalar to an array.
3125 # This avoids looking for "\n" on each call to getline
3126 my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
3128 return bless [ \@array, $mode, $i_next ], $package;
3132 ------------------------------------------------------------------------
3133 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3134 ------------------------------------------------------------------------
3141 my $mode = $self->[1];
3142 if ( $mode ne 'r' ) {
3144 ------------------------------------------------------------------------
3145 getline call requires mode = 'r' but mode = ($mode); trace follows:
3146 ------------------------------------------------------------------------
3149 my $i = $self->[2]++;
3150 ##my $line = $self->[0]->[$i];
3151 return $self->[0]->[$i];
3156 my $mode = $self->[1];
3157 if ( $mode ne 'w' ) {
3159 ------------------------------------------------------------------------
3160 print call requires mode = 'w' but mode = ($mode); trace follows:
3161 ------------------------------------------------------------------------
3164 ${ $self->[0] } .= $_[0];
3166 sub close { return }
3168 #####################################################################
3170 # This is a stripped down version of IO::ScalarArray
3171 # Given a reference to an array, it supplies either:
3172 # a getline method which reads lines (mode='r'), or
3173 # a print method which reads lines (mode='w')
3175 # NOTE: this routine assumes that that there aren't any embedded
3176 # newlines within any of the array elements. There are no checks
3179 #####################################################################
3180 package Perl::Tidy::IOScalarArray;
3184 my ( $package, $rarray, $mode ) = @_;
3185 my $ref = ref $rarray;
3186 if ( $ref ne 'ARRAY' ) {
3188 ------------------------------------------------------------------------
3189 expecting ref to ARRAY but got ref to ($ref); trace follows:
3190 ------------------------------------------------------------------------
3194 if ( $mode eq 'w' ) {
3196 return bless [ $rarray, $mode ], $package;
3198 elsif ( $mode eq 'r' ) {
3200 return bless [ $rarray, $mode, $i_next ], $package;
3204 ------------------------------------------------------------------------
3205 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3206 ------------------------------------------------------------------------
3213 my $mode = $self->[1];
3214 if ( $mode ne 'r' ) {
3216 ------------------------------------------------------------------------
3217 getline requires mode = 'r' but mode = ($mode); trace follows:
3218 ------------------------------------------------------------------------
3221 my $i = $self->[2]++;
3222 ##my $line = $self->[0]->[$i];
3223 return $self->[0]->[$i];
3228 my $mode = $self->[1];
3229 if ( $mode ne 'w' ) {
3231 ------------------------------------------------------------------------
3232 print requires mode = 'w' but mode = ($mode); trace follows:
3233 ------------------------------------------------------------------------
3236 push @{ $self->[0] }, $_[0];
3238 sub close { return }
3240 #####################################################################
3242 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3243 # which returns the next line to be parsed
3245 #####################################################################
3247 package Perl::Tidy::LineSource;
3251 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3252 my $input_file_copy = undef;
3255 my $input_line_ending;
3256 if ( $rOpts->{'preserve-line-endings'} ) {
3257 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3260 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3261 return undef unless $fh;
3263 # in order to check output syntax when standard output is used,
3264 # or when it is an object, we have to make a copy of the file
3265 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3268 # Turning off syntax check when input output is used.
3269 # The reason is that temporary files cause problems on
3271 $rOpts->{'check-syntax'} = 0;
3272 $input_file_copy = '-';
3274 $$rpending_logfile_message .= <<EOM;
3275 Note: --syntax check will be skipped because standard input is used
3282 _fh_copy => $fh_copy,
3283 _filename => $input_file,
3284 _input_file_copy => $input_file_copy,
3285 _input_line_ending => $input_line_ending,
3286 _rinput_buffer => [],
3291 sub get_input_file_copy_name {
3293 my $ifname = $self->{_input_file_copy};
3295 $ifname = $self->{_filename};
3300 sub close_input_file {
3302 eval { $self->{_fh}->close() };
3303 eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
3309 my $fh = $self->{_fh};
3310 my $fh_copy = $self->{_fh_copy};
3311 my $rinput_buffer = $self->{_rinput_buffer};
3313 if ( scalar(@$rinput_buffer) ) {
3314 $line = shift @$rinput_buffer;
3317 $line = $fh->getline();
3319 # patch to read raw mac files under unix, dos
3320 # see if the first line has embedded \r's
3321 if ( $line && !$self->{_started} ) {
3322 if ( $line =~ /[\015][^\015\012]/ ) {
3324 # found one -- break the line up and store in a buffer
3325 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3326 my $count = @$rinput_buffer;
3327 $line = shift @$rinput_buffer;
3329 $self->{_started}++;
3332 if ( $line && $fh_copy ) { $fh_copy->print($line); }
3339 my $fh = $self->{_fh};
3340 my $fh_copy = $self->{_fh_copy};
3341 $line = $fh->getline();
3342 if ( $line && $fh_copy ) { $fh_copy->print($line); }
3346 #####################################################################
3348 # the Perl::Tidy::LineSink class supplies a write_line method for
3349 # actual file writing
3351 #####################################################################
3353 package Perl::Tidy::LineSink;
3357 my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3358 $rpending_logfile_message )
3361 my $fh_copy = undef;
3363 my $output_file_copy = "";
3364 my $output_file_open = 0;
3366 if ( $rOpts->{'format'} eq 'tidy' ) {
3367 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3368 unless ($fh) { die "Cannot write to output stream\n"; }
3369 $output_file_open = 1;
3372 # in order to check output syntax when standard output is used,
3373 # or when it is an object, we have to make a copy of the file
3374 if ( $output_file eq '-' || ref $output_file ) {
3375 if ( $rOpts->{'check-syntax'} ) {
3377 # Turning off syntax check when standard output is used.
3378 # The reason is that temporary files cause problems on
3380 $rOpts->{'check-syntax'} = 0;
3381 $output_file_copy = '-';
3382 $$rpending_logfile_message .= <<EOM;
3383 Note: --syntax check will be skipped because standard output is used
3391 _fh_copy => $fh_copy,
3393 _output_file => $output_file,
3394 _output_file_open => $output_file_open,
3395 _output_file_copy => $output_file_copy,
3397 _tee_file => $tee_file,
3398 _tee_file_opened => 0,
3399 _line_separator => $line_separator,
3406 my $fh = $self->{_fh};
3407 my $fh_copy = $self->{_fh_copy};
3409 my $output_file_open = $self->{_output_file_open};
3411 $_[0] .= $self->{_line_separator};
3413 $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3414 print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
3416 if ( $self->{_tee_flag} ) {
3417 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3418 my $fh_tee = $self->{_fh_tee};
3419 print $fh_tee $_[0];
3423 sub get_output_file_copy {
3425 my $ofname = $self->{_output_file_copy};
3427 $ofname = $self->{_output_file};
3434 $self->{_tee_flag} = 1;
3439 $self->{_tee_flag} = 0;
3442 sub really_open_tee_file {
3444 my $tee_file = $self->{_tee_file};
3446 $fh_tee = IO::File->new(">$tee_file")
3447 or die("couldn't open TEE file $tee_file: $!\n");
3448 $self->{_tee_file_opened} = 1;
3449 $self->{_fh_tee} = $fh_tee;
3452 sub close_output_file {
3454 eval { $self->{_fh}->close() } if $self->{_output_file_open};
3455 eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
3456 $self->close_tee_file();
3459 sub close_tee_file {
3462 if ( $self->{_tee_file_opened} ) {
3463 eval { $self->{_fh_tee}->close() };
3464 $self->{_tee_file_opened} = 0;
3468 #####################################################################
3470 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3471 # useful for program development.
3473 # Only one such file is created regardless of the number of input
3474 # files processed. This allows the results of processing many files
3475 # to be summarized in a single file.
3477 #####################################################################
3479 package Perl::Tidy::Diagnostics;
3485 _write_diagnostics_count => 0,
3486 _last_diagnostic_file => "",
3492 sub set_input_file {
3494 $self->{_input_file} = $_[0];
3497 # This is a diagnostic routine which is useful for program development.
3498 # Output from debug messages go to a file named DIAGNOSTICS, where
3499 # they are labeled by file and line. This allows many files to be
3500 # scanned at once for some particular condition of interest.
3501 sub write_diagnostics {
3504 unless ( $self->{_write_diagnostics_count} ) {
3505 open DIAGNOSTICS, ">DIAGNOSTICS"
3506 or death("couldn't open DIAGNOSTICS: $!\n");
3509 my $last_diagnostic_file = $self->{_last_diagnostic_file};
3510 my $input_file = $self->{_input_file};
3511 if ( $last_diagnostic_file ne $input_file ) {
3512 print DIAGNOSTICS "\nFILE:$input_file\n";
3514 $self->{_last_diagnostic_file} = $input_file;
3515 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3516 print DIAGNOSTICS "$input_line_number:\t@_";
3517 $self->{_write_diagnostics_count}++;
3520 #####################################################################
3522 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
3524 #####################################################################
3526 package Perl::Tidy::Logger;
3531 my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3533 # remove any old error output file
3534 unless ( ref($warning_file) ) {
3535 if ( -e $warning_file ) { unlink($warning_file) }
3539 _log_file => $log_file,
3540 _fh_warnings => undef,
3542 _fh_warnings => undef,
3543 _last_input_line_written => 0,
3544 _at_end_of_file => 0,
3546 _block_log_output => 0,
3547 _line_of_tokens => undef,
3548 _output_line_number => undef,
3549 _wrote_line_information_string => 0,
3550 _wrote_column_headings => 0,
3551 _warning_file => $warning_file,
3552 _warning_count => 0,
3553 _complaint_count => 0,
3554 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure
3555 _saw_brace_error => 0,
3556 _saw_extrude => $saw_extrude,
3557 _output_array => [],
3561 sub close_log_file {
3564 if ( $self->{_fh_warnings} ) {
3565 eval { $self->{_fh_warnings}->close() };
3566 $self->{_fh_warnings} = undef;
3570 sub get_warning_count {
3572 return $self->{_warning_count};
3575 sub get_use_prefix {
3577 return $self->{_use_prefix};
3580 sub block_log_output {
3582 $self->{_block_log_output} = 1;
3585 sub unblock_log_output {
3587 $self->{_block_log_output} = 0;
3590 sub interrupt_logfile {
3592 $self->{_use_prefix} = 0;
3593 $self->warning("\n");
3594 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
3597 sub resume_logfile {
3599 $self->write_logfile_entry( '#' x 60 . "\n" );
3600 $self->{_use_prefix} = 1;
3603 sub we_are_at_the_last_line {
3605 unless ( $self->{_wrote_line_information_string} ) {
3606 $self->write_logfile_entry("Last line\n\n");
3608 $self->{_at_end_of_file} = 1;
3611 # record some stuff in case we go down in flames
3614 my ( $line_of_tokens, $output_line_number ) = @_;
3615 my $input_line = $line_of_tokens->{_line_text};
3616 my $input_line_number = $line_of_tokens->{_line_number};
3618 # save line information in case we have to write a logfile message
3619 $self->{_line_of_tokens} = $line_of_tokens;
3620 $self->{_output_line_number} = $output_line_number;
3621 $self->{_wrote_line_information_string} = 0;
3623 my $last_input_line_written = $self->{_last_input_line_written};
3624 my $rOpts = $self->{_rOpts};
3627 ( $input_line_number - $last_input_line_written ) >=
3628 $rOpts->{'logfile-gap'}
3630 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
3633 my $rlevels = $line_of_tokens->{_rlevels};
3634 my $structural_indentation_level = $$rlevels[0];
3635 $self->{_last_input_line_written} = $input_line_number;
3636 ( my $out_str = $input_line ) =~ s/^\s*//;
3639 $out_str = ( '.' x $structural_indentation_level ) . $out_str;
3641 if ( length($out_str) > 35 ) {
3642 $out_str = substr( $out_str, 0, 35 ) . " ....";
3644 $self->logfile_output( "", "$out_str\n" );
3648 sub write_logfile_entry {
3651 # add leading >>> to avoid confusing error mesages and code
3652 $self->logfile_output( ">>>", "@_" );
3655 sub write_column_headings {
3658 $self->{_wrote_column_headings} = 1;
3659 my $routput_array = $self->{_output_array};
3660 push @{$routput_array}, <<EOM;
3661 The nesting depths in the table below are at the start of the lines.
3662 The indicated output line numbers are not always exact.
3663 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
3665 in:out indent c b nesting code + messages; (messages begin with >>>)
3666 lines levels i k (code begins with one '.' per indent level)
3667 ------ ----- - - -------- -------------------------------------------
3671 sub make_line_information_string {
3673 # make columns of information when a logfile message needs to go out
3675 my $line_of_tokens = $self->{_line_of_tokens};
3676 my $input_line_number = $line_of_tokens->{_line_number};
3677 my $line_information_string = "";
3678 if ($input_line_number) {
3680 my $output_line_number = $self->{_output_line_number};
3681 my $brace_depth = $line_of_tokens->{_curly_brace_depth};
3682 my $paren_depth = $line_of_tokens->{_paren_depth};
3683 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
3684 my $python_indentation_level =
3685 $line_of_tokens->{_python_indentation_level};
3686 my $rlevels = $line_of_tokens->{_rlevels};
3687 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
3688 my $rci_levels = $line_of_tokens->{_rci_levels};
3689 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
3691 my $structural_indentation_level = $$rlevels[0];
3693 $self->write_column_headings() unless $self->{_wrote_column_headings};
3695 # keep logfile columns aligned for scripts up to 999 lines;
3696 # for longer scripts it doesn't really matter
3697 my $extra_space = "";
3699 ( $input_line_number < 10 ) ? " "
3700 : ( $input_line_number < 100 ) ? " "
3703 ( $output_line_number < 10 ) ? " "
3704 : ( $output_line_number < 100 ) ? " "
3707 # there are 2 possible nesting strings:
3708 # the original which looks like this: (0 [1 {2
3709 # the new one, which looks like this: {{[
3710 # the new one is easier to read, and shows the order, but
3711 # could be arbitrarily long, so we use it unless it is too long
3712 my $nesting_string =
3713 "($paren_depth [$square_bracket_depth {$brace_depth";
3714 my $nesting_string_new = $$rnesting_tokens[0];
3716 my $ci_level = $$rci_levels[0];
3717 if ( $ci_level > 9 ) { $ci_level = '*' }
3718 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
3720 if ( length($nesting_string_new) <= 8 ) {
3722 $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
3724 if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
3725 $line_information_string =
3726 "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
3728 return $line_information_string;
3731 sub logfile_output {
3733 my ( $prompt, $msg ) = @_;
3734 return if ( $self->{_block_log_output} );
3736 my $routput_array = $self->{_output_array};
3737 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
3738 push @{$routput_array}, "$msg";
3741 my $line_information_string = $self->make_line_information_string();
3742 $self->{_wrote_line_information_string} = 1;
3744 if ($line_information_string) {
3745 push @{$routput_array}, "$line_information_string $prompt$msg";
3748 push @{$routput_array}, "$msg";
3753 sub get_saw_brace_error {
3755 return $self->{_saw_brace_error};
3758 sub increment_brace_error {
3760 $self->{_saw_brace_error}++;
3765 use constant BRACE_WARNING_LIMIT => 10;
3766 my $saw_brace_error = $self->{_saw_brace_error};
3768 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
3772 $self->{_saw_brace_error} = $saw_brace_error;
3774 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
3775 $self->warning("No further warnings of this type will be given\n");
3781 # handle non-critical warning messages based on input flag
3783 my $rOpts = $self->{_rOpts};
3785 # these appear in .ERR output only if -w flag is used
3786 if ( $rOpts->{'warning-output'} ) {
3790 # otherwise, they go to the .LOG file
3792 $self->{_complaint_count}++;
3793 $self->write_logfile_entry(@_);
3799 # report errors to .ERR file (or stdout)
3801 use constant WARNING_LIMIT => 50;
3803 my $rOpts = $self->{_rOpts};
3804 unless ( $rOpts->{'quiet'} ) {
3806 my $warning_count = $self->{_warning_count};
3807 unless ($warning_count) {
3808 my $warning_file = $self->{_warning_file};
3810 if ( $rOpts->{'standard-error-output'} ) {
3811 $fh_warnings = *STDERR;
3814 ( $fh_warnings, my $filename ) =
3815 Perl::Tidy::streamhandle( $warning_file, 'w' );
3816 $fh_warnings or die("couldn't open $filename $!\n");
3817 warn "## Please see file $filename\n";
3819 $self->{_fh_warnings} = $fh_warnings;
3822 my $fh_warnings = $self->{_fh_warnings};
3823 if ( $warning_count < WARNING_LIMIT ) {
3824 if ( $self->get_use_prefix() > 0 ) {
3825 my $input_line_number =
3826 Perl::Tidy::Tokenizer::get_input_line_number();
3827 print $fh_warnings "$input_line_number:\t@_";
3828 $self->write_logfile_entry("WARNING: @_");
3831 print $fh_warnings @_;
3832 $self->write_logfile_entry(@_);
3836 $self->{_warning_count} = $warning_count;
3838 if ( $warning_count == WARNING_LIMIT ) {
3839 print $fh_warnings "No further warnings will be given";
3844 # programming bug codes:
3846 # 0 = maybe, not sure.
3848 sub report_possible_bug {
3850 my $saw_code_bug = $self->{_saw_code_bug};
3851 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
3854 sub report_definite_bug {
3856 $self->{_saw_code_bug} = 1;
3859 sub ask_user_for_bug_report {
3862 my ( $infile_syntax_ok, $formatter ) = @_;
3863 my $saw_code_bug = $self->{_saw_code_bug};
3864 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
3865 $self->warning(<<EOM);
3867 You may have encountered a code bug in perltidy. If you think so, and
3868 the problem is not listed in the BUGS file at
3869 http://perltidy.sourceforge.net, please report it so that it can be
3870 corrected. Include the smallest possible script which has the problem,
3871 along with the .LOG file. See the manual pages for contact information.
3876 elsif ( $saw_code_bug == 1 ) {
3877 if ( $self->{_saw_extrude} ) {
3878 $self->warning(<<EOM);
3880 You may have encountered a bug in perltidy. However, since you are using the
3881 -extrude option, the problem may be with perl or one of its modules, which have
3882 occasional problems with this type of file. If you believe that the
3883 problem is with perltidy, and the problem is not listed in the BUGS file at
3884 http://perltidy.sourceforge.net, please report it so that it can be corrected.
3885 Include the smallest possible script which has the problem, along with the .LOG
3886 file. See the manual pages for contact information.
3891 $self->warning(<<EOM);
3893 Oops, you seem to have encountered a bug in perltidy. Please check the
3894 BUGS file at http://perltidy.sourceforge.net. If the problem is not
3895 listed there, please report it so that it can be corrected. Include the
3896 smallest possible script which produces this message, along with the
3897 .LOG file if appropriate. See the manual pages for contact information.
3898 Your efforts are appreciated.
3901 my $added_semicolon_count = 0;
3903 $added_semicolon_count =
3904 $formatter->get_added_semicolon_count();
3906 if ( $added_semicolon_count > 0 ) {
3907 $self->warning(<<EOM);
3909 The log file shows that perltidy added $added_semicolon_count semicolons.
3910 Please rerun with -nasc to see if that is the cause of the syntax error. Even
3911 if that is the problem, please report it so that it can be fixed.
3921 # called after all formatting to summarize errors
3923 my ( $infile_syntax_ok, $formatter ) = @_;
3925 my $rOpts = $self->{_rOpts};
3926 my $warning_count = $self->{_warning_count};
3927 my $saw_code_bug = $self->{_saw_code_bug};
3929 my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
3930 || $saw_code_bug == 1
3931 || $rOpts->{'logfile'};
3932 my $log_file = $self->{_log_file};
3933 if ($warning_count) {
3934 if ($save_logfile) {
3935 $self->block_log_output(); # avoid echoing this to the logfile
3937 "The logfile $log_file may contain useful information\n");
3938 $self->unblock_log_output();
3941 if ( $self->{_complaint_count} > 0 ) {
3943 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
3947 if ( $self->{_saw_brace_error}
3948 && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
3950 $self->warning("To save a full .LOG file rerun with -g\n");
3953 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
3955 if ($save_logfile) {
3956 my $log_file = $self->{_log_file};
3957 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
3959 my $routput_array = $self->{_output_array};
3960 foreach ( @{$routput_array} ) { $fh->print($_) }
3961 eval { $fh->close() };
3966 #####################################################################
3968 # The Perl::Tidy::DevNull class supplies a dummy print method
3970 #####################################################################
3972 package Perl::Tidy::DevNull;
3973 sub new { return bless {}, $_[0] }
3974 sub print { return }
3975 sub close { return }
3977 #####################################################################
3979 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
3981 #####################################################################
3983 package Perl::Tidy::HtmlWriter;
3993 %short_to_long_names
3997 $missing_html_entities
4000 # replace unsafe characters with HTML entity representation if HTML::Entities
4002 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4006 my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4007 $html_src_extension )
4010 my $html_file_opened = 0;
4012 ( $html_fh, my $html_filename ) =
4013 Perl::Tidy::streamhandle( $html_file, 'w' );
4015 warn("can't open $html_file: $!\n");
4018 $html_file_opened = 1;
4020 if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4021 $input_file = "NONAME";
4024 # write the table of contents to a string
4026 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4029 my @pre_string_stack;
4030 if ( $rOpts->{'html-pre-only'} ) {
4032 # pre section goes directly to the output stream
4033 $html_pre_fh = $html_fh;
4034 $html_pre_fh->print( <<"PRE_END");
4040 # pre section go out to a temporary string
4042 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4043 push @pre_string_stack, \$pre_string;
4046 # pod text gets diverted if the 'pod2html' is used
4049 if ( $rOpts->{'pod2html'} ) {
4050 if ( $rOpts->{'html-pre-only'} ) {
4051 undef $rOpts->{'pod2html'};
4054 eval "use Pod::Html";
4057 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4058 undef $rOpts->{'pod2html'};
4061 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4068 if ( $rOpts->{'frames'} ) {
4069 unless ($extension) {
4071 "cannot use frames without a specified output extension; ignoring -frm\n";
4072 undef $rOpts->{'frames'};
4075 $toc_filename = $input_file . $html_toc_extension . $extension;
4076 $src_filename = $input_file . $html_src_extension . $extension;
4080 # ----------------------------------------------------------
4081 # Output is now directed as follows:
4082 # html_toc_fh <-- table of contents items
4083 # html_pre_fh <-- the <pre> section of formatted code, except:
4084 # html_pod_fh <-- pod goes here with the pod2html option
4085 # ----------------------------------------------------------
4087 my $title = $rOpts->{'title'};
4089 ( $title, my $path ) = fileparse($input_file);
4091 my $toc_item_count = 0;
4092 my $in_toc_package = "";
4095 _input_file => $input_file, # name of input file
4096 _title => $title, # title, unescaped
4097 _html_file => $html_file, # name of .html output file
4098 _toc_filename => $toc_filename, # for frames option
4099 _src_filename => $src_filename, # for frames option
4100 _html_file_opened => $html_file_opened, # a flag
4101 _html_fh => $html_fh, # the output stream
4102 _html_pre_fh => $html_pre_fh, # pre section goes here
4103 _rpre_string_stack => \@pre_string_stack, # stack of pre sections
4104 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html
4105 _rpod_string => \$pod_string, # string holding pod
4106 _pod_cut_count => 0, # how many =cut's?
4107 _html_toc_fh => $html_toc_fh, # fh for table of contents
4108 _rtoc_string => \$toc_string, # string holding toc
4109 _rtoc_item_count => \$toc_item_count, # how many toc items
4110 _rin_toc_package => \$in_toc_package, # package name
4111 _rtoc_name_count => {}, # hash to track unique names
4112 _rpackage_stack => [], # stack to check for package
4114 _rlast_level => \$last_level, # brace indentation level
4120 # Add an item to the html table of contents.
4121 # This is called even if no table of contents is written,
4122 # because we still want to put the anchors in the <pre> text.
4123 # We are given an anchor name and its type; types are:
4124 # 'package', 'sub', '__END__', '__DATA__', 'EOF'
4125 # There must be an 'EOF' call at the end to wrap things up.
4127 my ( $name, $type ) = @_;
4128 my $html_toc_fh = $self->{_html_toc_fh};
4129 my $html_pre_fh = $self->{_html_pre_fh};
4130 my $rtoc_name_count = $self->{_rtoc_name_count};
4131 my $rtoc_item_count = $self->{_rtoc_item_count};
4132 my $rlast_level = $self->{_rlast_level};
4133 my $rin_toc_package = $self->{_rin_toc_package};
4134 my $rpackage_stack = $self->{_rpackage_stack};
4136 # packages contain sublists of subs, so to avoid errors all package
4137 # items are written and finished with the following routines
4138 my $end_package_list = sub {
4139 if ($$rin_toc_package) {
4140 $html_toc_fh->print("</ul>\n</li>\n");
4141 $$rin_toc_package = "";
4145 my $start_package_list = sub {
4146 my ( $unique_name, $package ) = @_;
4147 if ($$rin_toc_package) { $end_package_list->() }
4148 $html_toc_fh->print(<<EOM);
4149 <li><a href=\"#$unique_name\">package $package</a>
4152 $$rin_toc_package = $package;
4155 # start the table of contents on the first item
4156 unless ($$rtoc_item_count) {
4158 # but just quit if we hit EOF without any other entries
4159 # in this case, there will be no toc
4160 return if ( $type eq 'EOF' );
4161 $html_toc_fh->print( <<"TOC_END");
4162 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4166 $$rtoc_item_count++;
4168 # make a unique anchor name for this location:
4169 # - packages get a 'package-' prefix
4170 # - subs use their names
4171 my $unique_name = $name;
4172 if ( $type eq 'package' ) { $unique_name = "package-$name" }
4174 # append '-1', '-2', etc if necessary to make unique; this will
4175 # be unique because subs and packages cannot have a '-'
4176 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4177 $unique_name .= "-$count";
4180 # - all names get terminal '-' if pod2html is used, to avoid
4181 # conflicts with anchor names created by pod2html
4182 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4184 # start/stop lists of subs
4185 if ( $type eq 'sub' ) {
4186 my $package = $rpackage_stack->[$$rlast_level];
4187 unless ($package) { $package = 'main' }
4189 # if we're already in a package/sub list, be sure its the right
4190 # package or else close it
4191 if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4192 $end_package_list->();
4195 # start a package/sub list if necessary
4196 unless ($$rin_toc_package) {
4197 $start_package_list->( $unique_name, $package );
4201 # now write an entry in the toc for this item
4202 if ( $type eq 'package' ) {
4203 $start_package_list->( $unique_name, $name );
4205 elsif ( $type eq 'sub' ) {
4206 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4209 $end_package_list->();
4210 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4213 # write the anchor in the <pre> section
4214 $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4216 # end the table of contents, if any, on the end of file
4217 if ( $type eq 'EOF' ) {
4218 $html_toc_fh->print( <<"TOC_END");
4220 <!-- END CODE INDEX -->
4227 # This is the official list of tokens which may be identified by the
4228 # user. Long names are used as getopt keys. Short names are
4229 # convenient short abbreviations for specifying input. Short names
4230 # somewhat resemble token type characters, but are often different
4231 # because they may only be alphanumeric, to allow command line
4232 # input. Also, note that because of case insensitivity of html,
4233 # this table must be in a single case only (I've chosen to use all
4235 # When adding NEW_TOKENS: update this hash table
4236 # short names => long names
4237 %short_to_long_names = (
4247 'pu' => 'punctuation',
4248 'i' => 'identifier',
4250 'h' => 'here-doc-target',
4251 'hh' => 'here-doc-text',
4253 'sc' => 'semicolon',
4254 'm' => 'subroutine',
4258 # Now we have to map actual token types into one of the above short
4259 # names; any token types not mapped will get 'punctuation'
4262 # The values of this hash table correspond to the keys of the
4263 # previous hash table.
4264 # The keys of this hash table are token types and can be seen
4265 # by running with --dump-token-types (-dtt).
4267 # When adding NEW_TOKENS: update this hash table
4268 # $type => $short_name
4269 %token_short_names = (
4294 # These token types will all be called identifiers for now
4295 # FIXME: need to separate user defined modules as separate type
4296 my @identifier = qw" i t U C Y Z G :: ";
4297 @token_short_names{@identifier} = ('i') x scalar(@identifier);
4299 # These token types will be called 'structure'
4300 my @structure = qw" { } ";
4301 @token_short_names{@structure} = ('s') x scalar(@structure);
4303 # OLD NOTES: save for reference
4304 # Any of these could be added later if it would be useful.
4305 # For now, they will by default become punctuation
4306 # my @list = qw" L R [ ] ";
4307 # @token_long_names{@list} = ('non-structure') x scalar(@list);
4310 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4312 # @token_long_names{@list} = ('math') x scalar(@list);
4314 # my @list = qw" & &= ~ ~= ^ ^= | |= ";
4315 # @token_long_names{@list} = ('bit') x scalar(@list);
4317 # my @list = qw" == != < > <= <=> ";
4318 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4320 # my @list = qw" && || ! &&= ||= //= ";
4321 # @token_long_names{@list} = ('logical') x scalar(@list);
4323 # my @list = qw" . .= =~ !~ x x= ";
4324 # @token_long_names{@list} = ('string-operators') x scalar(@list);
4327 # my @list = qw" .. -> <> ... \ ? ";
4328 # @token_long_names{@list} = ('misc-operators') x scalar(@list);
4332 sub make_getopt_long_names {
4334 my ($rgetopt_names) = @_;
4335 while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4336 push @$rgetopt_names, "html-color-$name=s";
4337 push @$rgetopt_names, "html-italic-$name!";
4338 push @$rgetopt_names, "html-bold-$name!";
4340 push @$rgetopt_names, "html-color-background=s";
4341 push @$rgetopt_names, "html-linked-style-sheet=s";
4342 push @$rgetopt_names, "nohtml-style-sheets";
4343 push @$rgetopt_names, "html-pre-only";
4344 push @$rgetopt_names, "html-line-numbers";
4345 push @$rgetopt_names, "html-entities!";
4346 push @$rgetopt_names, "stylesheet";
4347 push @$rgetopt_names, "html-table-of-contents!";
4348 push @$rgetopt_names, "pod2html!";
4349 push @$rgetopt_names, "frames!";
4350 push @$rgetopt_names, "html-toc-extension=s";
4351 push @$rgetopt_names, "html-src-extension=s";
4353 # Pod::Html parameters:
4354 push @$rgetopt_names, "backlink=s";
4355 push @$rgetopt_names, "cachedir=s";
4356 push @$rgetopt_names, "htmlroot=s";
4357 push @$rgetopt_names, "libpods=s";
4358 push @$rgetopt_names, "podpath=s";
4359 push @$rgetopt_names, "podroot=s";
4360 push @$rgetopt_names, "title=s";
4362 # Pod::Html parameters with leading 'pod' which will be removed
4363 # before the call to Pod::Html
4364 push @$rgetopt_names, "podquiet!";
4365 push @$rgetopt_names, "podverbose!";
4366 push @$rgetopt_names, "podrecurse!";
4367 push @$rgetopt_names, "podflush";
4368 push @$rgetopt_names, "podheader!";
4369 push @$rgetopt_names, "podindex!";
4372 sub make_abbreviated_names {
4374 # We're appending things like this to the expansion list:
4375 # 'hcc' => [qw(html-color-comment)],
4376 # 'hck' => [qw(html-color-keyword)],
4379 my ($rexpansion) = @_;
4381 # abbreviations for color/bold/italic properties
4382 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4383 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
4384 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
4385 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
4386 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4387 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4390 # abbreviations for all other html options
4391 ${$rexpansion}{"hcbg"} = ["html-color-background"];
4392 ${$rexpansion}{"pre"} = ["html-pre-only"];
4393 ${$rexpansion}{"toc"} = ["html-table-of-contents"];
4394 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
4395 ${$rexpansion}{"nnn"} = ["html-line-numbers"];
4396 ${$rexpansion}{"hent"} = ["html-entities"];
4397 ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4398 ${$rexpansion}{"css"} = ["html-linked-style-sheet"];
4399 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
4400 ${$rexpansion}{"ss"} = ["stylesheet"];
4401 ${$rexpansion}{"pod"} = ["pod2html"];
4402 ${$rexpansion}{"npod"} = ["nopod2html"];
4403 ${$rexpansion}{"frm"} = ["frames"];
4404 ${$rexpansion}{"nfrm"} = ["noframes"];
4405 ${$rexpansion}{"text"} = ["html-toc-extension"];
4406 ${$rexpansion}{"sext"} = ["html-src-extension"];
4411 # This will be called once after options have been parsed
4415 # X11 color names for default settings that seemed to look ok
4416 # (these color names are only used for programming clarity; the hex
4417 # numbers are actually written)
4418 use constant ForestGreen => "#228B22";
4419 use constant SaddleBrown => "#8B4513";
4420 use constant magenta4 => "#8B008B";
4421 use constant IndianRed3 => "#CD5555";
4422 use constant DeepSkyBlue4 => "#00688B";
4423 use constant MediumOrchid3 => "#B452CD";
4424 use constant black => "#000000";
4425 use constant white => "#FFFFFF";
4426 use constant red => "#FF0000";
4428 # set default color, bold, italic properties
4429 # anything not listed here will be given the default (punctuation) color --
4430 # these types currently not listed and get default: ws pu s sc cm co p
4431 # When adding NEW_TOKENS: add an entry here if you don't want defaults
4433 # set_default_properties( $short_name, default_color, bold?, italic? );
4434 set_default_properties( 'c', ForestGreen, 0, 0 );
4435 set_default_properties( 'pd', ForestGreen, 0, 1 );
4436 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown
4437 set_default_properties( 'q', IndianRed3, 0, 0 );
4438 set_default_properties( 'hh', IndianRed3, 0, 1 );
4439 set_default_properties( 'h', IndianRed3, 1, 0 );
4440 set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
4441 set_default_properties( 'w', black, 0, 0 );
4442 set_default_properties( 'n', MediumOrchid3, 0, 0 );
4443 set_default_properties( 'v', MediumOrchid3, 0, 0 );
4444 set_default_properties( 'j', IndianRed3, 1, 0 );
4445 set_default_properties( 'm', red, 1, 0 );
4447 set_default_color( 'html-color-background', white );
4448 set_default_color( 'html-color-punctuation', black );
4450 # setup property lookup tables for tokens based on their short names
4451 # every token type has a short name, and will use these tables
4452 # to do the html markup
4453 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4454 $html_color{$short_name} = $rOpts->{"html-color-$long_name"};
4455 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
4456 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4459 # write style sheet to STDOUT and die if requested
4460 if ( defined( $rOpts->{'stylesheet'} ) ) {
4461 write_style_sheet_file('-');
4465 # make sure user gives a file name after -css
4466 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4467 $css_linkname = $rOpts->{'html-linked-style-sheet'};
4468 if ( $css_linkname =~ /^-/ ) {
4469 die "You must specify a valid filename after -css\n";
4473 # check for conflict
4474 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4475 $rOpts->{'nohtml-style-sheets'} = 0;
4476 warning("You can't specify both -css and -nss; -nss ignored\n");
4479 # write a style sheet file if necessary
4480 if ($css_linkname) {
4482 # if the selected filename exists, don't write, because user may
4483 # have done some work by hand to create it; use backup name instead
4484 # Also, this will avoid a potential disaster in which the user
4485 # forgets to specify the style sheet, like this:
4486 # perltidy -html -css myfile1.pl myfile2.pl
4487 # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4488 my $css_filename = $css_linkname;
4489 unless ( -e $css_filename ) {
4490 write_style_sheet_file($css_filename);
4493 $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4496 sub write_style_sheet_file {
4498 my $css_filename = shift;
4500 unless ( $fh = IO::File->new("> $css_filename") ) {
4501 die "can't open $css_filename: $!\n";
4503 write_style_sheet_data($fh);
4504 eval { $fh->close };
4507 sub write_style_sheet_data {
4509 # write the style sheet data to an open file handle
4512 my $bg_color = $rOpts->{'html-color-background'};
4513 my $text_color = $rOpts->{'html-color-punctuation'};
4515 # pre-bgcolor is new, and may not be defined
4516 my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4517 $pre_bg_color = $bg_color unless $pre_bg_color;
4519 $fh->print(<<"EOM");
4520 /* default style sheet generated by perltidy */
4521 body {background: $bg_color; color: $text_color}
4522 pre { color: $text_color;
4523 background: $pre_bg_color;
4524 font-family: courier;
4529 foreach my $short_name ( sort keys %short_to_long_names ) {
4530 my $long_name = $short_to_long_names{$short_name};
4532 my $abbrev = '.' . $short_name;
4533 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment
4534 my $color = $html_color{$short_name};
4535 if ( !defined($color) ) { $color = $text_color }
4536 $fh->print("$abbrev \{ color: $color;");
4538 if ( $html_bold{$short_name} ) {
4539 $fh->print(" font-weight:bold;");
4542 if ( $html_italic{$short_name} ) {
4543 $fh->print(" font-style:italic;");
4545 $fh->print("} /* $long_name */\n");
4549 sub set_default_color {
4551 # make sure that options hash $rOpts->{$key} contains a valid color
4552 my ( $key, $color ) = @_;
4553 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
4554 $rOpts->{$key} = check_RGB($color);
4559 # if color is a 6 digit hex RGB value, prepend a #, otherwise
4560 # assume that it is a valid ascii color name
4562 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
4566 sub set_default_properties {
4567 my ( $short_name, $color, $bold, $italic ) = @_;
4569 set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
4571 $key = "html-bold-$short_to_long_names{$short_name}";
4572 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
4573 $key = "html-italic-$short_to_long_names{$short_name}";
4574 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
4579 # Use Pod::Html to process the pod and make the page
4580 # then merge the perltidy code sections into it.
4581 # return 1 if success, 0 otherwise
4583 my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
4584 my $input_file = $self->{_input_file};
4585 my $title = $self->{_title};
4586 my $success_flag = 0;
4588 # don't try to use pod2html if no pod
4589 unless ($pod_string) {
4590 return $success_flag;
4593 # Pod::Html requires a real temporary filename
4594 # If we are making a frame, we have a name available
4595 # Otherwise, we have to fine one
4597 if ( $rOpts->{'frames'} ) {
4598 $tmpfile = $self->{_toc_filename};
4601 $tmpfile = Perl::Tidy::make_temporary_filename();
4603 my $fh_tmp = IO::File->new( $tmpfile, 'w' );
4605 warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4606 return $success_flag;
4609 #------------------------------------------------------------------
4610 # Warning: a temporary file is open; we have to clean up if
4611 # things go bad. From here on all returns should be by going to
4612 # RETURN so that the temporary file gets unlinked.
4613 #------------------------------------------------------------------
4615 # write the pod text to the temporary file
4616 $fh_tmp->print($pod_string);
4619 # Hand off the pod to pod2html.
4620 # Note that we can use the same temporary filename for input and output
4621 # because of the way pod2html works.
4625 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
4628 # Flags with string args:
4629 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
4630 # "podpath=s", "podroot=s"
4631 # Note: -css=s is handled by perltidy itself
4632 foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
4633 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
4636 # Toggle switches; these have extra leading 'pod'
4637 # "header!", "index!", "recurse!", "quiet!", "verbose!"
4638 foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
4639 my $kwd = $kw; # allows us to strip 'pod'
4640 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
4641 elsif ( defined( $rOpts->{$kw} ) ) {
4643 push @args, "--no$kwd";
4649 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
4651 # Must clean up if pod2html dies (it can);
4652 # Be careful not to overwrite callers __DIE__ routine
4653 local $SIG{__DIE__} = sub {
4655 unlink $tmpfile if -e $tmpfile;
4661 $fh_tmp = IO::File->new( $tmpfile, 'r' );
4664 # this error shouldn't happen ... we just used this filename
4665 warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4669 my $html_fh = $self->{_html_fh};
4674 # This routine will write the html selectively and store the toc
4675 my $html_print = sub {
4677 $html_fh->print($_) unless ($no_print);
4678 if ($in_toc) { push @toc, $_ }
4682 # loop over lines of html output from pod2html and merge in
4683 # the necessary perltidy html sections
4684 my ( $saw_body, $saw_index, $saw_body_end );
4685 while ( my $line = $fh_tmp->getline() ) {
4687 if ( $line =~ /^\s*<html>\s*$/i ) {
4688 my $date = localtime;
4689 $html_print->("<!-- Generated by perltidy on $date -->\n");
4690 $html_print->($line);
4693 # Copy the perltidy css, if any, after <body> tag
4694 elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
4696 $html_print->($css_string) if $css_string;
4697 $html_print->($line);
4699 # add a top anchor and heading
4700 $html_print->("<a name=\"-top-\"></a>\n");
4701 $title = escape_html($title);
4702 $html_print->("<h1>$title</h1>\n");
4704 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
4707 # when frames are used, an extra table of contents in the
4708 # contents panel is confusing, so don't print it
4709 $no_print = $rOpts->{'frames'}
4710 || !$rOpts->{'html-table-of-contents'};
4711 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
4712 $html_print->($line);
4715 # Copy the perltidy toc, if any, after the Pod::Html toc
4716 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
4718 $html_print->($line);
4720 $html_print->("<hr />\n") if $rOpts->{'frames'};
4721 $html_print->("<h2>Code Index:</h2>\n");
4722 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
4723 $html_print->(@toc);
4729 # Copy one perltidy section after each marker
4730 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
4732 $html_print->($1) if $1;
4734 # Intermingle code and pod sections if we saw multiple =cut's.
4735 if ( $self->{_pod_cut_count} > 1 ) {
4736 my $rpre_string = shift(@$rpre_string_stack);
4737 if ($$rpre_string) {
4738 $html_print->('<pre>');
4739 $html_print->($$rpre_string);
4740 $html_print->('</pre>');
4744 # shouldn't happen: we stored a string before writing
4747 "Problem merging html stream with pod2html; order may be wrong\n";
4749 $html_print->($line);
4752 # If didn't see multiple =cut lines, we'll put the pod out first
4753 # and then the code, because it's less confusing.
4756 # since we are not intermixing code and pod, we don't need
4757 # or want any <hr> lines which separated pod and code
4758 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
4762 # Copy any remaining code section before the </body> tag
4763 elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
4765 if (@$rpre_string_stack) {
4766 unless ( $self->{_pod_cut_count} > 1 ) {
4767 $html_print->('<hr />');
4769 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
4770 $html_print->('<pre>');
4771 $html_print->($$rpre_string);
4772 $html_print->('</pre>');
4775 $html_print->($line);
4778 $html_print->($line);
4783 unless ($saw_body) {
4784 warn "Did not see <body> in pod2html output\n";
4787 unless ($saw_body_end) {
4788 warn "Did not see </body> in pod2html output\n";
4791 unless ($saw_index) {
4792 warn "Did not find INDEX END in pod2html output\n";
4797 eval { $html_fh->close() };
4799 # note that we have to unlink tmpfile before making frames
4800 # because the tmpfile may be one of the names used for frames
4801 unlink $tmpfile if -e $tmpfile;
4802 if ( $success_flag && $rOpts->{'frames'} ) {
4803 $self->make_frame( \@toc );
4805 return $success_flag;
4810 # Make a frame with table of contents in the left panel
4811 # and the text in the right panel.
4813 # $html_filename contains the no-frames html output
4814 # $rtoc is a reference to an array with the table of contents
4817 my $input_file = $self->{_input_file};
4818 my $html_filename = $self->{_html_file};
4819 my $toc_filename = $self->{_toc_filename};
4820 my $src_filename = $self->{_src_filename};
4821 my $title = $self->{_title};
4822 $title = escape_html($title);
4824 # FUTURE input parameter:
4825 my $top_basename = "";
4827 # We need to produce 3 html files:
4828 # 1. - the table of contents
4829 # 2. - the contents (source code) itself
4830 # 3. - the frame which contains them
4832 # get basenames for relative links
4833 my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
4834 my ( $src_basename, $src_path ) = fileparse($src_filename);
4836 # 1. Make the table of contents panel, with appropriate changes
4837 # to the anchor names
4838 my $src_frame_name = 'SRC';
4840 write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
4843 # 2. The current .html filename is renamed to be the contents panel
4844 rename( $html_filename, $src_filename )
4845 or die "Cannot rename $html_filename to $src_filename:$!\n";
4847 # 3. Then use the original html filename for the frame
4849 $title, $html_filename, $top_basename,
4850 $toc_basename, $src_basename, $src_frame_name
4854 sub write_toc_html {
4856 # write a separate html table of contents file for frames
4857 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
4858 my $fh = IO::File->new( $toc_filename, 'w' )
4859 or die "Cannot open $toc_filename:$!\n";
4863 <title>$title</title>
4866 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
4870 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
4871 $fh->print( join "", @$rtoc );
4880 sub write_frame_html {
4882 # write an html file to be the table of contents frame
4884 $title, $frame_filename, $top_basename,
4885 $toc_basename, $src_basename, $src_frame_name
4888 my $fh = IO::File->new( $frame_filename, 'w' )
4889 or die "Cannot open $toc_basename:$!\n";
4892 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4893 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
4894 <?xml version="1.0" encoding="iso-8859-1" ?>
4895 <html xmlns="http://www.w3.org/1999/xhtml">
4897 <title>$title</title>
4901 # two left panels, one right, if master index file
4902 if ($top_basename) {
4904 <frameset cols="20%,80%">
4905 <frameset rows="30%,70%">
4906 <frame src = "$top_basename" />
4907 <frame src = "$toc_basename" />
4912 # one left panels, one right, if no master index file
4915 <frameset cols="20%,*">
4916 <frame src = "$toc_basename" />
4920 <frame src = "$src_basename" name = "$src_frame_name" />
4923 <p>If you see this message, you are using a non-frame-capable web client.</p>
4924 <p>This document contains:</p>
4926 <li><a href="$toc_basename">A table of contents</a></li>
4927 <li><a href="$src_basename">The source code</a></li>
4936 sub change_anchor_names {
4938 # add a filename and target to anchors
4939 # also return the first anchor
4940 my ( $rlines, $filename, $target ) = @_;
4942 foreach my $line (@$rlines) {
4944 # We're looking for lines like this:
4945 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
4946 # ---- - -------- -----------------
4948 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
4952 my $href = "$filename#$name";
4953 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
4954 unless ($first_anchor) { $first_anchor = $href }
4957 return $first_anchor;
4960 sub close_html_file {
4962 return unless $self->{_html_file_opened};
4964 my $html_fh = $self->{_html_fh};
4965 my $rtoc_string = $self->{_rtoc_string};
4967 # There are 3 basic paths to html output...
4969 # ---------------------------------
4970 # Path 1: finish up if in -pre mode
4971 # ---------------------------------
4972 if ( $rOpts->{'html-pre-only'} ) {
4973 $html_fh->print( <<"PRE_END");
4976 eval { $html_fh->close() };
4981 $self->add_toc_item( 'EOF', 'EOF' );
4983 my $rpre_string_stack = $self->{_rpre_string_stack};
4985 # Patch to darken the <pre> background color in case of pod2html and
4986 # interleaved code/documentation. Otherwise, the distinction
4987 # between code and documentation is blurred.
4988 if ( $rOpts->{pod2html}
4989 && $self->{_pod_cut_count} >= 1
4990 && $rOpts->{'html-color-background'} eq '#FFFFFF' )
4992 $rOpts->{'html-pre-color-background'} = '#F0F0F0';
4995 # put the css or its link into a string, if used
4997 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
4999 # use css linked to another file
5000 if ( $rOpts->{'html-linked-style-sheet'} ) {
5002 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5006 # use css embedded in this file
5007 elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5008 $fh_css->print( <<'ENDCSS');
5009 <style type="text/css">
5012 write_style_sheet_data($fh_css);
5013 $fh_css->print( <<"ENDCSS");
5019 # -----------------------------------------------------------
5020 # path 2: use pod2html if requested
5021 # If we fail for some reason, continue on to path 3
5022 # -----------------------------------------------------------
5023 if ( $rOpts->{'pod2html'} ) {
5024 my $rpod_string = $self->{_rpod_string};
5025 $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5026 $rpre_string_stack )
5030 # --------------------------------------------------
5031 # path 3: write code in html, with pod only in italics
5032 # --------------------------------------------------
5033 my $input_file = $self->{_input_file};
5034 my $title = escape_html($input_file);
5035 my $date = localtime;
5036 $html_fh->print( <<"HTML_START");
5037 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5038 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5039 <!-- Generated by perltidy on $date -->
5040 <html xmlns="http://www.w3.org/1999/xhtml">
5042 <title>$title</title>
5045 # output the css, if used
5047 $html_fh->print($css_string);
5048 $html_fh->print( <<"ENDCSS");
5055 $html_fh->print( <<"HTML_START");
5057 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5061 $html_fh->print("<a name=\"-top-\"></a>\n");
5062 $html_fh->print( <<"EOM");
5066 # copy the table of contents
5068 && !$rOpts->{'frames'}
5069 && $rOpts->{'html-table-of-contents'} )
5071 $html_fh->print($$rtoc_string);
5074 # copy the pre section(s)
5075 my $fname_comment = $input_file;
5076 $fname_comment =~ s/--+/-/g; # protect HTML comment tags
5077 $html_fh->print( <<"END_PRE");
5079 <!-- contents of filename: $fname_comment -->
5083 foreach my $rpre_string (@$rpre_string_stack) {
5084 $html_fh->print($$rpre_string);
5087 # and finish the html page
5088 $html_fh->print( <<"HTML_END");
5093 eval { $html_fh->close() }; # could be object without close method
5095 if ( $rOpts->{'frames'} ) {
5096 my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5097 $self->make_frame( \@toc );
5103 my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5104 my ( @colored_tokens, $j, $string, $type, $token, $level );
5105 my $rlast_level = $self->{_rlast_level};
5106 my $rpackage_stack = $self->{_rpackage_stack};
5108 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5109 $type = $$rtoken_type[$j];
5110 $token = $$rtokens[$j];
5111 $level = $$rlevels[$j];
5112 $level = 0 if ( $level < 0 );
5114 #-------------------------------------------------------
5115 # Update the package stack. The package stack is needed to keep
5116 # the toc correct because some packages may be declared within
5117 # blocks and go out of scope when we leave the block.
5118 #-------------------------------------------------------
5119 if ( $level > $$rlast_level ) {
5120 unless ( $rpackage_stack->[ $level - 1 ] ) {
5121 $rpackage_stack->[ $level - 1 ] = 'main';
5123 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5125 elsif ( $level < $$rlast_level ) {
5126 my $package = $rpackage_stack->[$level];
5127 unless ($package) { $package = 'main' }
5129 # if we change packages due to a nesting change, we
5130 # have to make an entry in the toc
5131 if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5132 $self->add_toc_item( $package, 'package' );
5135 $$rlast_level = $level;
5137 #-------------------------------------------------------
5138 # Intercept a sub name here; split it
5139 # into keyword 'sub' and sub name; and add an
5141 #-------------------------------------------------------
5142 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5143 $token = $self->markup_html_element( $1, 'k' );
5144 push @colored_tokens, $token;
5148 # but don't include sub declarations in the toc;
5149 # these wlll have leading token types 'i;'
5150 my $signature = join "", @$rtoken_type;
5151 unless ( $signature =~ /^i;/ ) {
5152 my $subname = $token;
5153 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5154 $self->add_toc_item( $subname, 'sub' );
5158 #-------------------------------------------------------
5159 # Intercept a package name here; split it
5160 # into keyword 'package' and name; add to the toc,
5161 # and update the package stack
5162 #-------------------------------------------------------
5163 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5164 $token = $self->markup_html_element( $1, 'k' );
5165 push @colored_tokens, $token;
5168 $self->add_toc_item( "$token", 'package' );
5169 $rpackage_stack->[$level] = $token;
5172 $token = $self->markup_html_element( $token, $type );
5173 push @colored_tokens, $token;
5175 return ( \@colored_tokens );
5178 sub markup_html_element {
5180 my ( $token, $type ) = @_;
5182 return $token if ( $type eq 'b' ); # skip a blank token
5183 return $token if ( $token =~ /^\s*$/ ); # skip a blank line
5184 $token = escape_html($token);
5186 # get the short abbreviation for this token type
5187 my $short_name = $token_short_names{$type};
5188 if ( !defined($short_name) ) {
5189 $short_name = "pu"; # punctuation is default
5192 # handle style sheets..
5193 if ( !$rOpts->{'nohtml-style-sheets'} ) {
5194 if ( $short_name ne 'pu' ) {
5195 $token = qq(<span class="$short_name">) . $token . "</span>";
5199 # handle no style sheets..
5201 my $color = $html_color{$short_name};
5203 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5204 $token = qq(<font color="$color">) . $token . "</font>";
5206 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5207 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
5215 if ($missing_html_entities) {
5216 $token =~ s/\&/&/g;
5217 $token =~ s/\</</g;
5218 $token =~ s/\>/>/g;
5219 $token =~ s/\"/"/g;
5222 HTML::Entities::encode_entities($token);
5227 sub finish_formatting {
5229 # called after last line
5231 $self->close_html_file();
5238 return unless $self->{_html_file_opened};
5239 my $html_pre_fh = $self->{_html_pre_fh};
5240 my ($line_of_tokens) = @_;
5241 my $line_type = $line_of_tokens->{_line_type};
5242 my $input_line = $line_of_tokens->{_line_text};
5243 my $line_number = $line_of_tokens->{_line_number};
5246 # markup line of code..
5248 if ( $line_type eq 'CODE' ) {
5249 my $rtoken_type = $line_of_tokens->{_rtoken_type};
5250 my $rtokens = $line_of_tokens->{_rtokens};
5251 my $rlevels = $line_of_tokens->{_rlevels};
5253 if ( $input_line =~ /(^\s*)/ ) {
5259 my ($rcolored_tokens) =
5260 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5261 $html_line .= join '', @$rcolored_tokens;
5264 # markup line of non-code..
5267 if ( $line_type eq 'HERE' ) { $line_character = 'H' }
5268 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
5269 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
5270 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5271 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
5272 elsif ( $line_type eq 'END_START' ) {
5273 $line_character = 'k';
5274 $self->add_toc_item( '__END__', '__END__' );
5276 elsif ( $line_type eq 'DATA_START' ) {
5277 $line_character = 'k';
5278 $self->add_toc_item( '__DATA__', '__DATA__' );
5280 elsif ( $line_type =~ /^POD/ ) {
5281 $line_character = 'P';
5282 if ( $rOpts->{'pod2html'} ) {
5283 my $html_pod_fh = $self->{_html_pod_fh};
5284 if ( $line_type eq 'POD_START' ) {
5286 my $rpre_string_stack = $self->{_rpre_string_stack};
5287 my $rpre_string = $rpre_string_stack->[-1];
5289 # if we have written any non-blank lines to the
5290 # current pre section, start writing to a new output
5292 if ( $$rpre_string =~ /\S/ ) {
5295 Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5296 $self->{_html_pre_fh} = $html_pre_fh;
5297 push @$rpre_string_stack, \$pre_string;
5299 # leave a marker in the pod stream so we know
5300 # where to put the pre section we just
5302 my $for_html = '=for html'; # don't confuse pod utils
5303 $html_pod_fh->print(<<EOM);
5306 <!-- pERLTIDY sECTION -->
5311 # otherwise, just clear the current string and start
5315 $html_pod_fh->print("\n");
5318 $html_pod_fh->print( $input_line . "\n" );
5319 if ( $line_type eq 'POD_END' ) {
5320 $self->{_pod_cut_count}++;
5321 $html_pod_fh->print("\n");
5326 else { $line_character = 'Q' }
5327 $html_line = $self->markup_html_element( $input_line, $line_character );
5330 # add the line number if requested
5331 if ( $rOpts->{'html-line-numbers'} ) {
5333 ( $line_number < 10 ) ? " "
5334 : ( $line_number < 100 ) ? " "
5335 : ( $line_number < 1000 ) ? " "
5337 $html_line = $extra_space . $line_number . " " . $html_line;
5341 $html_pre_fh->print("$html_line\n");
5344 #####################################################################
5346 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5347 # line breaks to the token stream
5349 # WARNING: This is not a real class for speed reasons. Only one
5350 # Formatter may be used.
5352 #####################################################################
5354 package Perl::Tidy::Formatter;
5358 # Caution: these debug flags produce a lot of output
5359 # They should all be 0 except when debugging small scripts
5360 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
5361 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
5362 use constant FORMATTER_DEBUG_FLAG_CI => 0;
5363 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
5364 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
5365 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
5366 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5367 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
5368 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
5369 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
5370 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
5371 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
5373 my $debug_warning = sub {
5374 print "FORMATTER_DEBUGGING with key $_[0]\n";
5377 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
5378 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
5379 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
5380 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
5381 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
5382 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
5383 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5384 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
5385 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
5386 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
5387 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
5388 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
5395 $max_gnu_stack_index
5396 $gnu_position_predictor
5397 $line_start_index_to_go
5398 $last_indentation_written
5399 $last_unadjusted_indentation
5402 $saw_VERSION_in_this_file
5407 $gnu_sequence_number
5408 $last_output_indentation
5414 @type_sequence_to_go
5415 @container_environment_to_go
5416 @bond_strength_to_go
5417 @forced_breakpoint_to_go
5420 @leading_spaces_to_go
5421 @reduced_spaces_to_go
5422 @matching_token_to_go
5424 @nesting_blocks_to_go
5426 @nesting_depth_to_go
5428 @old_breakpoint_to_go
5432 %saved_opening_indentation
5435 $comma_count_in_batch
5436 $old_line_count_in_batch
5437 $last_nonblank_index_to_go
5438 $last_nonblank_type_to_go
5439 $last_nonblank_token_to_go
5440 $last_last_nonblank_index_to_go
5441 $last_last_nonblank_type_to_go
5442 $last_last_nonblank_token_to_go
5443 @nonblank_lines_at_depth
5447 $in_format_skipping_section
5448 $format_skipping_pattern_begin
5449 $format_skipping_pattern_end
5451 $forced_breakpoint_count
5452 $forced_breakpoint_undo_count
5453 @forced_breakpoint_undo_stack
5454 %postponed_breakpoint
5458 $first_embedded_tab_at
5459 $last_embedded_tab_at
5460 $deleted_semicolon_count
5461 $first_deleted_semicolon_at
5462 $last_deleted_semicolon_at
5463 $added_semicolon_count
5464 $first_added_semicolon_at
5465 $last_added_semicolon_at
5466 $first_tabbing_disagreement
5467 $last_tabbing_disagreement
5468 $in_tabbing_disagreement
5469 $tabbing_disagreement_count
5473 $last_line_leading_type
5474 $last_line_leading_level
5475 $last_last_line_leading_level
5478 %block_opening_line_number
5479 $csc_new_statement_ok
5480 $accumulating_text_for_block
5482 $rleading_block_if_elsif_text
5483 $leading_block_text_level
5484 $leading_block_text_length_exceeded
5485 $leading_block_text_line_length
5486 $leading_block_text_line_number
5487 $closing_side_comment_prefix_pattern
5488 $closing_side_comment_list_pattern
5490 $last_nonblank_token
5492 $last_last_nonblank_token
5493 $last_last_nonblank_type
5494 $last_nonblank_block_type
5497 %is_if_brace_follower
5498 %space_after_keyword
5501 %is_last_next_redo_return
5502 %is_other_brace_follower
5503 %is_else_brace_follower
5504 %is_anon_sub_brace_follower
5505 %is_anon_sub_1_brace_follower
5507 %is_sort_map_grep_eval
5508 %is_sort_map_grep_eval_do
5509 %is_block_without_semicolon
5514 %is_if_unless_and_or_last_next_redo_return
5515 %is_until_while_for_if_elsif_else
5521 $is_static_block_comment
5522 $index_start_one_line_block
5523 $semicolons_before_block_self_destruct
5524 $index_max_forced_break
5527 $vertical_aligner_object
5532 $last_line_had_side_comment
5535 $static_block_comment_pattern
5536 $static_side_comment_pattern
5537 %opening_vertical_tightness
5538 %closing_vertical_tightness
5539 %closing_token_indentation
5541 %opening_token_right
5542 %stack_opening_token
5543 %stack_closing_token
5545 $block_brace_vertical_tightness_pattern
5548 $rOpts_add_whitespace
5549 $rOpts_block_brace_tightness
5550 $rOpts_block_brace_vertical_tightness
5551 $rOpts_brace_left_and_indent
5552 $rOpts_comma_arrow_breakpoints
5553 $rOpts_break_at_old_keyword_breakpoints
5554 $rOpts_break_at_old_comma_breakpoints
5555 $rOpts_break_at_old_logical_breakpoints
5556 $rOpts_break_at_old_ternary_breakpoints
5557 $rOpts_closing_side_comment_else_flag
5558 $rOpts_closing_side_comment_maximum_text
5559 $rOpts_continuation_indentation
5561 $rOpts_delete_old_whitespace
5562 $rOpts_fuzzy_line_length
5563 $rOpts_indent_columns
5564 $rOpts_line_up_parentheses
5565 $rOpts_maximum_fields_per_table
5566 $rOpts_maximum_line_length
5567 $rOpts_short_concatenation_item_length
5568 $rOpts_swallow_optional_blank_lines
5569 $rOpts_ignore_old_breakpoints
5570 $rOpts_format_skipping
5571 $rOpts_space_function_paren
5572 $rOpts_space_keyword_paren
5574 $half_maximum_line_length
5578 %is_keyword_returning_list
5582 %right_bond_strength
5599 # default list of block types for which -bli would apply
5600 $bli_list_string = 'if else elsif unless while for foreach do : sub';
5603 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
5604 <= >= == =~ !~ != ++ -- /= x=
5606 @is_digraph{@_} = (1) x scalar(@_);
5608 @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
5609 @is_trigraph{@_} = (1) x scalar(@_);
5612 = **= += *= &= <<= &&=
5613 -= /= |= >>= ||= //=
5617 @is_assignment{@_} = (1) x scalar(@_);
5627 @is_keyword_returning_list{@_} = (1) x scalar(@_);
5629 @_ = qw(is if unless and or err last next redo return);
5630 @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
5632 # always break after a closing curly of these block types:
5633 @_ = qw(until while for if elsif else);
5634 @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
5636 @_ = qw(last next redo return);
5637 @is_last_next_redo_return{@_} = (1) x scalar(@_);
5639 @_ = qw(sort map grep);
5640 @is_sort_map_grep{@_} = (1) x scalar(@_);
5642 @_ = qw(sort map grep eval);
5643 @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
5645 @_ = qw(sort map grep eval do);
5646 @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
5649 @is_if_unless{@_} = (1) x scalar(@_);
5651 @_ = qw(and or err);
5652 @is_and_or{@_} = (1) x scalar(@_);
5654 # Identify certain operators which often occur in chains
5655 @_ = qw(&& || and or : ? .);
5656 @is_chain_operator{@_} = (1) x scalar(@_);
5658 # We can remove semicolons after blocks preceded by these keywords
5659 @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
5660 unless while until for foreach);
5661 @is_block_without_semicolon{@_} = (1) x scalar(@_);
5663 # 'L' is token for opening { at hash key
5665 @is_opening_type{@_} = (1) x scalar(@_);
5667 # 'R' is token for closing } at hash key
5669 @is_closing_type{@_} = (1) x scalar(@_);
5672 @is_opening_token{@_} = (1) x scalar(@_);
5675 @is_closing_token{@_} = (1) x scalar(@_);
5679 use constant WS_YES => 1;
5680 use constant WS_OPTIONAL => 0;
5681 use constant WS_NO => -1;
5683 # Token bond strengths.
5684 use constant NO_BREAK => 10000;
5685 use constant VERY_STRONG => 100;
5686 use constant STRONG => 2.1;
5687 use constant NOMINAL => 1.1;
5688 use constant WEAK => 0.8;
5689 use constant VERY_WEAK => 0.55;
5691 # values for testing indexes in output array
5692 use constant UNDEFINED_INDEX => -1;
5694 # Maximum number of little messages; probably need not be changed.
5695 use constant MAX_NAG_MESSAGES => 6;
5697 # increment between sequence numbers for each type
5698 # For example, ?: pairs might have numbers 7,11,15,...
5699 use constant TYPE_SEQUENCE_INCREMENT => 4;
5703 # methods to count instances
5705 sub get_count { $_count; }
5706 sub _increment_count { ++$_count }
5707 sub _decrement_count { --$_count }
5710 # interface to Perl::Tidy::Logger routines
5712 if ($logger_object) {
5713 $logger_object->warning(@_);
5718 if ($logger_object) {
5719 $logger_object->complain(@_);
5723 sub write_logfile_entry {
5724 if ($logger_object) {
5725 $logger_object->write_logfile_entry(@_);
5730 if ($logger_object) {
5731 $logger_object->black_box(@_);
5735 sub report_definite_bug {
5736 if ($logger_object) {
5737 $logger_object->report_definite_bug();
5741 sub get_saw_brace_error {
5742 if ($logger_object) {
5743 $logger_object->get_saw_brace_error();
5747 sub we_are_at_the_last_line {
5748 if ($logger_object) {
5749 $logger_object->we_are_at_the_last_line();
5753 # interface to Perl::Tidy::Diagnostics routine
5754 sub write_diagnostics {
5756 if ($diagnostics_object) {
5757 $diagnostics_object->write_diagnostics(@_);
5761 sub get_added_semicolon_count {
5763 return $added_semicolon_count;
5767 $_[0]->_decrement_count();
5774 # we are given an object with a write_line() method to take lines
5776 sink_object => undef,
5777 diagnostics_object => undef,
5778 logger_object => undef,
5780 my %args = ( %defaults, @_ );
5782 $logger_object = $args{logger_object};
5783 $diagnostics_object = $args{diagnostics_object};
5785 # we create another object with a get_line() and peek_ahead() method
5786 my $sink_object = $args{sink_object};
5787 $file_writer_object =
5788 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
5790 # initialize the leading whitespace stack to negative levels
5791 # so that we can never run off the end of the stack
5792 $gnu_position_predictor = 0; # where the current token is predicted to be
5793 $max_gnu_stack_index = 0;
5794 $max_gnu_item_index = -1;
5795 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
5796 @gnu_item_list = ();
5797 $last_output_indentation = 0;
5798 $last_indentation_written = 0;
5799 $last_unadjusted_indentation = 0;
5800 $last_leading_token = "";
5802 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
5803 $saw_END_or_DATA_ = 0;
5805 @block_type_to_go = ();
5806 @type_sequence_to_go = ();
5807 @container_environment_to_go = ();
5808 @bond_strength_to_go = ();
5809 @forced_breakpoint_to_go = ();
5810 @lengths_to_go = (); # line length to start of ith token
5812 @matching_token_to_go = ();
5813 @mate_index_to_go = ();
5814 @nesting_blocks_to_go = ();
5815 @ci_levels_to_go = ();
5816 @nesting_depth_to_go = (0);
5817 @nobreak_to_go = ();
5818 @old_breakpoint_to_go = ();
5821 @leading_spaces_to_go = ();
5822 @reduced_spaces_to_go = ();
5825 @has_broken_sublist = ();
5826 @want_comma_break = ();
5829 $first_tabbing_disagreement = 0;
5830 $last_tabbing_disagreement = 0;
5831 $tabbing_disagreement_count = 0;
5832 $in_tabbing_disagreement = 0;
5833 $input_line_tabbing = undef;
5835 $last_line_type = "";
5836 $last_last_line_leading_level = 0;
5837 $last_line_leading_level = 0;
5838 $last_line_leading_type = '#';
5840 $last_nonblank_token = ';';
5841 $last_nonblank_type = ';';
5842 $last_last_nonblank_token = ';';
5843 $last_last_nonblank_type = ';';
5844 $last_nonblank_block_type = "";
5845 $last_output_level = 0;
5846 $looking_for_else = 0;
5847 $embedded_tab_count = 0;
5848 $first_embedded_tab_at = 0;
5849 $last_embedded_tab_at = 0;
5850 $deleted_semicolon_count = 0;
5851 $first_deleted_semicolon_at = 0;
5852 $last_deleted_semicolon_at = 0;
5853 $added_semicolon_count = 0;
5854 $first_added_semicolon_at = 0;
5855 $last_added_semicolon_at = 0;
5856 $last_line_had_side_comment = 0;
5857 $is_static_block_comment = 0;
5858 %postponed_breakpoint = ();
5860 # variables for adding side comments
5861 %block_leading_text = ();
5862 %block_opening_line_number = ();
5863 $csc_new_statement_ok = 1;
5865 %saved_opening_indentation = ();
5866 $in_format_skipping_section = 0;
5868 reset_block_text_accumulator();
5870 prepare_for_new_input_lines();
5872 $vertical_aligner_object =
5873 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
5874 $logger_object, $diagnostics_object );
5876 if ( $rOpts->{'entab-leading-whitespace'} ) {
5877 write_logfile_entry(
5878 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
5881 elsif ( $rOpts->{'tabs'} ) {
5882 write_logfile_entry("Indentation will be with a tab character\n");
5885 write_logfile_entry(
5886 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
5889 # This was the start of a formatter referent, but object-oriented
5890 # coding has turned out to be too slow here.
5891 $formatter_self = {};
5893 bless $formatter_self, $class;
5895 # Safety check..this is not a class yet
5896 if ( _increment_count() > 1 ) {
5898 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
5900 return $formatter_self;
5903 sub prepare_for_new_input_lines {
5905 $gnu_sequence_number++; # increment output batch counter
5906 %last_gnu_equals = ();
5907 %gnu_comma_count = ();
5908 %gnu_arrow_count = ();
5909 $line_start_index_to_go = 0;
5910 $max_gnu_item_index = UNDEFINED_INDEX;
5911 $index_max_forced_break = UNDEFINED_INDEX;
5912 $max_index_to_go = UNDEFINED_INDEX;
5913 $last_nonblank_index_to_go = UNDEFINED_INDEX;
5914 $last_nonblank_type_to_go = '';
5915 $last_nonblank_token_to_go = '';
5916 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
5917 $last_last_nonblank_type_to_go = '';
5918 $last_last_nonblank_token_to_go = '';
5919 $forced_breakpoint_count = 0;
5920 $forced_breakpoint_undo_count = 0;
5921 $rbrace_follower = undef;
5922 $lengths_to_go[0] = 0;
5923 $old_line_count_in_batch = 1;
5924 $comma_count_in_batch = 0;
5925 $starting_in_quote = 0;
5927 destroy_one_line_block();
5933 my ($line_of_tokens) = @_;
5935 my $line_type = $line_of_tokens->{_line_type};
5936 my $input_line = $line_of_tokens->{_line_text};
5938 my $want_blank_line_next = 0;
5940 # _line_type codes are:
5941 # SYSTEM - system-specific code before hash-bang line
5942 # CODE - line of perl code (including comments)
5943 # POD_START - line starting pod, such as '=head'
5944 # POD - pod documentation text
5945 # POD_END - last line of pod section, '=cut'
5946 # HERE - text of here-document
5947 # HERE_END - last line of here-doc (target word)
5948 # FORMAT - format section
5949 # FORMAT_END - last line of format section, '.'
5950 # DATA_START - __DATA__ line
5951 # DATA - unidentified text following __DATA__
5952 # END_START - __END__ line
5953 # END - unidentified text following __END__
5954 # ERROR - we are in big trouble, probably not a perl script
5956 # handle line of code..
5957 if ( $line_type eq 'CODE' ) {
5959 # let logger see all non-blank lines of code
5960 if ( $input_line !~ /^\s*$/ ) {
5961 my $output_line_number =
5962 $vertical_aligner_object->get_output_line_number();
5963 black_box( $line_of_tokens, $output_line_number );
5965 print_line_of_tokens($line_of_tokens);
5968 # handle line of non-code..
5974 if ( $line_type =~ /^POD/ ) {
5976 # Pod docs should have a preceding blank line. But be
5977 # very careful in __END__ and __DATA__ sections, because:
5978 # 1. the user may be using this section for any purpose whatsoever
5979 # 2. the blank counters are not active there
5980 # It should be safe to request a blank line between an
5981 # __END__ or __DATA__ and an immediately following '=head'
5982 # type line, (types END_START and DATA_START), but not for
5983 # any other lines of type END or DATA.
5984 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
5985 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
5987 && $line_type eq 'POD_START'
5988 && $last_line_type !~ /^(END|DATA)$/ )
5993 # patch to put a blank line after =cut
5994 # (required by podchecker)
5995 if ( $line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
5996 $file_writer_object->reset_consecutive_blank_lines();
5997 $want_blank_line_next = 1;
6001 # leave the blank counters in a predictable state
6002 # after __END__ or __DATA__
6003 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6004 $file_writer_object->reset_consecutive_blank_lines();
6005 $saw_END_or_DATA_ = 1;
6008 # write unindented non-code line
6009 if ( !$skip_line ) {
6010 if ($tee_line) { $file_writer_object->tee_on() }
6011 write_unindented_line($input_line);
6012 if ($tee_line) { $file_writer_object->tee_off() }
6013 if ($want_blank_line_next) { want_blank_line(); }
6016 $last_line_type = $line_type;
6019 sub create_one_line_block {
6020 $index_start_one_line_block = $_[0];
6021 $semicolons_before_block_self_destruct = $_[1];
6024 sub destroy_one_line_block {
6025 $index_start_one_line_block = UNDEFINED_INDEX;
6026 $semicolons_before_block_self_destruct = 0;
6029 sub leading_spaces_to_go {
6031 # return the number of indentation spaces for a token in the output stream;
6032 # these were previously stored by 'set_leading_whitespace'.
6034 return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
6040 # return the number of leading spaces associated with an indentation
6041 # variable $indentation is either a constant number of spaces or an object
6042 # with a get_SPACES method.
6043 my $indentation = shift;
6044 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6047 sub get_RECOVERABLE_SPACES {
6049 # return the number of spaces (+ means shift right, - means shift left)
6050 # that we would like to shift a group of lines with the same indentation
6051 # to get them to line up with their opening parens
6052 my $indentation = shift;
6053 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6056 sub get_AVAILABLE_SPACES_to_go {
6058 my $item = $leading_spaces_to_go[ $_[0] ];
6060 # return the number of available leading spaces associated with an
6061 # indentation variable. $indentation is either a constant number of
6062 # spaces or an object with a get_AVAILABLE_SPACES method.
6063 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6066 sub new_lp_indentation_item {
6068 # this is an interface to the IndentationItem class
6069 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6071 # A negative level implies not to store the item in the item_list
6073 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6075 my $item = Perl::Tidy::IndentationItem->new(
6077 $ci_level, $available_spaces,
6078 $index, $gnu_sequence_number,
6079 $align_paren, $max_gnu_stack_index,
6080 $line_start_index_to_go,
6083 if ( $level >= 0 ) {
6084 $gnu_item_list[$max_gnu_item_index] = $item;
6090 sub set_leading_whitespace {
6092 # This routine defines leading whitespace
6093 # given: the level and continuation_level of a token,
6094 # define: space count of leading string which would apply if it
6095 # were the first token of a new line.
6097 my ( $level, $ci_level, $in_continued_quote ) = @_;
6099 # modify for -bli, which adds one continuation indentation for
6101 if ( $rOpts_brace_left_and_indent
6102 && $max_index_to_go == 0
6103 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6108 # patch to avoid trouble when input file has negative indentation.
6109 # other logic should catch this error.
6110 if ( $level < 0 ) { $level = 0 }
6112 #-------------------------------------------
6113 # handle the standard indentation scheme
6114 #-------------------------------------------
6115 unless ($rOpts_line_up_parentheses) {
6116 my $space_count = $ci_level * $rOpts_continuation_indentation + $level *
6117 $rOpts_indent_columns;
6119 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6121 if ($in_continued_quote) {
6125 $leading_spaces_to_go[$max_index_to_go] = $space_count;
6126 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6130 #-------------------------------------------------------------
6131 # handle case of -lp indentation..
6132 #-------------------------------------------------------------
6134 # The continued_quote flag means that this is the first token of a
6135 # line, and it is the continuation of some kind of multi-line quote
6136 # or pattern. It requires special treatment because it must have no
6137 # added leading whitespace. So we create a special indentation item
6138 # which is not in the stack.
6139 if ($in_continued_quote) {
6140 my $space_count = 0;
6141 my $available_space = 0;
6142 $level = -1; # flag to prevent storing in item_list
6143 $leading_spaces_to_go[$max_index_to_go] =
6144 $reduced_spaces_to_go[$max_index_to_go] =
6145 new_lp_indentation_item( $space_count, $level, $ci_level,
6146 $available_space, 0 );
6150 # get the top state from the stack
6151 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6152 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6153 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6155 my $type = $types_to_go[$max_index_to_go];
6156 my $token = $tokens_to_go[$max_index_to_go];
6157 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6159 if ( $type eq '{' || $type eq '(' ) {
6161 $gnu_comma_count{ $total_depth + 1 } = 0;
6162 $gnu_arrow_count{ $total_depth + 1 } = 0;
6164 # If we come to an opening token after an '=' token of some type,
6165 # see if it would be helpful to 'break' after the '=' to save space
6166 my $last_equals = $last_gnu_equals{$total_depth};
6167 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6169 # find the position if we break at the '='
6170 my $i_test = $last_equals;
6171 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6174 ##my $too_close = ($i_test==$max_index_to_go-1);
6176 my $test_position = total_line_length( $i_test, $max_index_to_go );
6180 # the equals is not just before an open paren (testing)
6183 # if we are beyond the midpoint
6184 $gnu_position_predictor > $half_maximum_line_length
6186 # or we are beyont the 1/4 point and there was an old
6187 # break at the equals
6189 $gnu_position_predictor > $half_maximum_line_length / 2
6191 $old_breakpoint_to_go[$last_equals]
6192 || ( $last_equals > 0
6193 && $old_breakpoint_to_go[ $last_equals - 1 ] )
6194 || ( $last_equals > 1
6195 && $types_to_go[ $last_equals - 1 ] eq 'b'
6196 && $old_breakpoint_to_go[ $last_equals - 2 ] )
6202 # then make the switch -- note that we do not set a real
6203 # breakpoint here because we may not really need one; sub
6204 # scan_list will do that if necessary
6205 $line_start_index_to_go = $i_test + 1;
6206 $gnu_position_predictor = $test_position;
6211 # Check for decreasing depth ..
6212 # Note that one token may have both decreasing and then increasing
6213 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
6214 # in this example we would first go back to (1,0) then up to (2,0)
6216 if ( $level < $current_level || $ci_level < $current_ci_level ) {
6218 # loop to find the first entry at or completely below this level
6219 my ( $lev, $ci_lev );
6221 if ($max_gnu_stack_index) {
6223 # save index of token which closes this level
6224 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6226 # Undo any extra indentation if we saw no commas
6227 my $available_spaces =
6228 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6230 my $comma_count = 0;
6231 my $arrow_count = 0;
6232 if ( $type eq '}' || $type eq ')' ) {
6233 $comma_count = $gnu_comma_count{$total_depth};
6234 $arrow_count = $gnu_arrow_count{$total_depth};
6235 $comma_count = 0 unless $comma_count;
6236 $arrow_count = 0 unless $arrow_count;
6238 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6239 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6241 if ( $available_spaces > 0 ) {
6243 if ( $comma_count <= 0 || $arrow_count > 0 ) {
6245 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6247 $gnu_stack[$max_gnu_stack_index]
6248 ->get_SEQUENCE_NUMBER();
6250 # Be sure this item was created in this batch. This
6251 # should be true because we delete any available
6252 # space from open items at the end of each batch.
6253 if ( $gnu_sequence_number != $seqno
6254 || $i > $max_gnu_item_index )
6257 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6259 report_definite_bug();
6263 if ( $arrow_count == 0 ) {
6265 ->permanently_decrease_AVAILABLE_SPACES(
6270 ->tentatively_decrease_AVAILABLE_SPACES(
6277 $j <= $max_gnu_item_index ;
6282 ->decrease_SPACES($available_spaces);
6289 --$max_gnu_stack_index;
6290 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6291 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6293 # stop when we reach a level at or below the current level
6294 if ( $lev <= $level && $ci_lev <= $ci_level ) {
6296 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6297 $current_level = $lev;
6298 $current_ci_level = $ci_lev;
6303 # reached bottom of stack .. should never happen because
6304 # only negative levels can get here, and $level was forced
6305 # to be positive above.
6308 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6310 report_definite_bug();
6316 # handle increasing depth
6317 if ( $level > $current_level || $ci_level > $current_ci_level ) {
6319 # Compute the standard incremental whitespace. This will be
6320 # the minimum incremental whitespace that will be used. This
6321 # choice results in a smooth transition between the gnu-style
6322 # and the standard style.
6323 my $standard_increment =
6324 ( $level - $current_level ) * $rOpts_indent_columns +
6325 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6327 # Now we have to define how much extra incremental space
6328 # ("$available_space") we want. This extra space will be
6329 # reduced as necessary when long lines are encountered or when
6330 # it becomes clear that we do not have a good list.
6331 my $available_space = 0;
6332 my $align_paren = 0;
6335 # initialization on empty stack..
6336 if ( $max_gnu_stack_index == 0 ) {
6337 $space_count = $level * $rOpts_indent_columns;
6340 # if this is a BLOCK, add the standard increment
6341 elsif ($last_nonblank_block_type) {
6342 $space_count += $standard_increment;
6345 # if last nonblank token was not structural indentation,
6346 # just use standard increment
6347 elsif ( $last_nonblank_type ne '{' ) {
6348 $space_count += $standard_increment;
6351 # otherwise use the space to the first non-blank level change token
6354 $space_count = $gnu_position_predictor;
6356 my $min_gnu_indentation =
6357 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6359 $available_space = $space_count - $min_gnu_indentation;
6360 if ( $available_space >= $standard_increment ) {
6361 $min_gnu_indentation += $standard_increment;
6363 elsif ( $available_space > 1 ) {
6364 $min_gnu_indentation += $available_space + 1;
6366 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
6367 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
6368 $min_gnu_indentation += 2;
6371 $min_gnu_indentation += 1;
6375 $min_gnu_indentation += $standard_increment;
6377 $available_space = $space_count - $min_gnu_indentation;
6379 if ( $available_space < 0 ) {
6380 $space_count = $min_gnu_indentation;
6381 $available_space = 0;
6386 # update state, but not on a blank token
6387 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
6389 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
6391 ++$max_gnu_stack_index;
6392 $gnu_stack[$max_gnu_stack_index] =
6393 new_lp_indentation_item( $space_count, $level, $ci_level,
6394 $available_space, $align_paren );
6396 # If the opening paren is beyond the half-line length, then
6397 # we will use the minimum (standard) indentation. This will
6398 # help avoid problems associated with running out of space
6399 # near the end of a line. As a result, in deeply nested
6400 # lists, there will be some indentations which are limited
6401 # to this minimum standard indentation. But the most deeply
6402 # nested container will still probably be able to shift its
6403 # parameters to the right for proper alignment, so in most
6404 # cases this will not be noticable.
6405 if ( $available_space > 0
6406 && $space_count > $half_maximum_line_length )
6408 $gnu_stack[$max_gnu_stack_index]
6409 ->tentatively_decrease_AVAILABLE_SPACES($available_space);
6414 # Count commas and look for non-list characters. Once we see a
6415 # non-list character, we give up and don't look for any more commas.
6416 if ( $type eq '=>' ) {
6417 $gnu_arrow_count{$total_depth}++;
6419 # tentatively treating '=>' like '=' for estimating breaks
6420 # TODO: this could use some experimentation
6421 $last_gnu_equals{$total_depth} = $max_index_to_go;
6424 elsif ( $type eq ',' ) {
6425 $gnu_comma_count{$total_depth}++;
6428 elsif ( $is_assignment{$type} ) {
6429 $last_gnu_equals{$total_depth} = $max_index_to_go;
6432 # this token might start a new line
6433 # if this is a non-blank..
6434 if ( $type ne 'b' ) {
6439 # this is the first nonblank token of the line
6440 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
6442 # or previous character was one of these:
6443 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
6445 # or previous character was opening and this does not close it
6446 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6447 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6449 # or this token is one of these:
6450 || $type =~ /^([\.]|\|\||\&\&)$/
6452 # or this is a closing structure
6453 || ( $last_nonblank_type_to_go eq '}'
6454 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6456 # or previous token was keyword 'return'
6457 || ( $last_nonblank_type_to_go eq 'k'
6458 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6460 # or starting a new line at certain keywords is fine
6462 && $is_if_unless_and_or_last_next_redo_return{$token} )
6464 # or this is after an assignment after a closing structure
6466 $is_assignment{$last_nonblank_type_to_go}
6468 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6470 # and it is significantly to the right
6471 || $gnu_position_predictor > $half_maximum_line_length
6476 check_for_long_gnu_style_lines();
6477 $line_start_index_to_go = $max_index_to_go;
6479 # back up 1 token if we want to break before that type
6480 # otherwise, we may strand tokens like '?' or ':' on a line
6481 if ( $line_start_index_to_go > 0 ) {
6482 if ( $last_nonblank_type_to_go eq 'k' ) {
6484 if ( $want_break_before{$last_nonblank_token_to_go} ) {
6485 $line_start_index_to_go--;
6488 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6489 $line_start_index_to_go--;
6495 # remember the predicted position of this token on the output line
6496 if ( $max_index_to_go > $line_start_index_to_go ) {
6497 $gnu_position_predictor =
6498 total_line_length( $line_start_index_to_go, $max_index_to_go );
6501 $gnu_position_predictor = $space_count +
6502 token_sequence_length( $max_index_to_go, $max_index_to_go );
6505 # store the indentation object for this token
6506 # this allows us to manipulate the leading whitespace
6507 # (in case we have to reduce indentation to fit a line) without
6508 # having to change any token values
6509 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
6510 $reduced_spaces_to_go[$max_index_to_go] =
6511 ( $max_gnu_stack_index > 0 && $ci_level )
6512 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
6513 : $gnu_stack[$max_gnu_stack_index];
6517 sub check_for_long_gnu_style_lines {
6519 # look at the current estimated maximum line length, and
6520 # remove some whitespace if it exceeds the desired maximum
6522 # this is only for the '-lp' style
6523 return unless ($rOpts_line_up_parentheses);
6525 # nothing can be done if no stack items defined for this line
6526 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6528 # see if we have exceeded the maximum desired line length
6529 # keep 2 extra free because they are needed in some cases
6530 # (result of trial-and-error testing)
6532 $gnu_position_predictor - $rOpts_maximum_line_length + 2;
6534 return if ( $spaces_needed < 0 );
6536 # We are over the limit, so try to remove a requested number of
6537 # spaces from leading whitespace. We are only allowed to remove
6538 # from whitespace items created on this batch, since others have
6539 # already been used and cannot be undone.
6540 my @candidates = ();
6543 # loop over all whitespace items created for the current batch
6544 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6545 my $item = $gnu_item_list[$i];
6547 # item must still be open to be a candidate (otherwise it
6548 # cannot influence the current token)
6549 next if ( $item->get_CLOSED() >= 0 );
6551 my $available_spaces = $item->get_AVAILABLE_SPACES();
6553 if ( $available_spaces > 0 ) {
6554 push( @candidates, [ $i, $available_spaces ] );
6558 return unless (@candidates);
6560 # sort by available whitespace so that we can remove whitespace
6561 # from the maximum available first
6562 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
6564 # keep removing whitespace until we are done or have no more
6566 foreach $candidate (@candidates) {
6567 my ( $i, $available_spaces ) = @{$candidate};
6568 my $deleted_spaces =
6569 ( $available_spaces > $spaces_needed )
6571 : $available_spaces;
6573 # remove the incremental space from this item
6574 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
6578 # update the leading whitespace of this item and all items
6579 # that came after it
6580 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
6582 my $old_spaces = $gnu_item_list[$i]->get_SPACES();
6583 if ( $old_spaces > $deleted_spaces ) {
6584 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
6587 # shouldn't happen except for code bug:
6589 my $level = $gnu_item_list[$i_debug]->get_LEVEL();
6590 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();
6591 my $old_level = $gnu_item_list[$i]->get_LEVEL();
6592 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
6594 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
6596 report_definite_bug();
6599 $gnu_position_predictor -= $deleted_spaces;
6600 $spaces_needed -= $deleted_spaces;
6601 last unless ( $spaces_needed > 0 );
6605 sub finish_lp_batch {
6607 # This routine is called once after each each output stream batch is
6608 # finished to undo indentation for all incomplete -lp
6609 # indentation levels. It is too risky to leave a level open,
6610 # because then we can't backtrack in case of a long line to follow.
6611 # This means that comments and blank lines will disrupt this
6612 # indentation style. But the vertical aligner may be able to
6613 # get the space back if there are side comments.
6615 # this is only for the 'lp' style
6616 return unless ($rOpts_line_up_parentheses);
6618 # nothing can be done if no stack items defined for this line
6619 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6621 # loop over all whitespace items created for the current batch
6623 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6624 my $item = $gnu_item_list[$i];
6626 # only look for open items
6627 next if ( $item->get_CLOSED() >= 0 );
6629 # Tentatively remove all of the available space
6630 # (The vertical aligner will try to get it back later)
6631 my $available_spaces = $item->get_AVAILABLE_SPACES();
6632 if ( $available_spaces > 0 ) {
6634 # delete incremental space for this item
6636 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
6638 # Reduce the total indentation space of any nodes that follow
6639 # Note that any such nodes must necessarily be dependents
6641 foreach ( $i + 1 .. $max_gnu_item_index ) {
6642 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
6649 sub reduce_lp_indentation {
6651 # reduce the leading whitespace at token $i if possible by $spaces_needed
6652 # (a large value of $spaces_needed will remove all excess space)
6653 # NOTE: to be called from scan_list only for a sequence of tokens
6654 # contained between opening and closing parens/braces/brackets
6656 my ( $i, $spaces_wanted ) = @_;
6657 my $deleted_spaces = 0;
6659 my $item = $leading_spaces_to_go[$i];
6660 my $available_spaces = $item->get_AVAILABLE_SPACES();
6663 $available_spaces > 0
6664 && ( ( $spaces_wanted <= $available_spaces )
6665 || !$item->get_HAVE_CHILD() )
6669 # we'll remove these spaces, but mark them as recoverable
6671 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
6674 return $deleted_spaces;
6677 sub token_sequence_length {
6679 # return length of tokens ($ifirst .. $ilast) including first & last
6680 # returns 0 if $ifirst > $ilast
6683 return 0 if ( $ilast < 0 || $ifirst > $ilast );
6684 return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
6685 return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
6688 sub total_line_length {
6690 # return length of a line of tokens ($ifirst .. $ilast)
6693 if ( $ifirst < 0 ) { $ifirst = 0 }
6695 return leading_spaces_to_go($ifirst) +
6696 token_sequence_length( $ifirst, $ilast );
6699 sub excess_line_length {
6701 # return number of characters by which a line of tokens ($ifirst..$ilast)
6702 # exceeds the allowable line length.
6705 if ( $ifirst < 0 ) { $ifirst = 0 }
6706 return leading_spaces_to_go($ifirst) +
6707 token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
6710 sub finish_formatting {
6712 # flush buffer and write any informative messages
6716 $file_writer_object->decrement_output_line_number()
6717 ; # fix up line number since it was incremented
6718 we_are_at_the_last_line();
6719 if ( $added_semicolon_count > 0 ) {
6720 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
6722 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
6723 write_logfile_entry("$added_semicolon_count $what added:\n");
6724 write_logfile_entry(
6725 " $first at input line $first_added_semicolon_at\n");
6727 if ( $added_semicolon_count > 1 ) {
6728 write_logfile_entry(
6729 " Last at input line $last_added_semicolon_at\n");
6731 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
6732 write_logfile_entry("\n");
6735 if ( $deleted_semicolon_count > 0 ) {
6736 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
6738 ( $deleted_semicolon_count > 1 )
6741 write_logfile_entry(
6742 "$deleted_semicolon_count unnecessary $what deleted:\n");
6743 write_logfile_entry(
6744 " $first at input line $first_deleted_semicolon_at\n");
6746 if ( $deleted_semicolon_count > 1 ) {
6747 write_logfile_entry(
6748 " Last at input line $last_deleted_semicolon_at\n");
6750 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
6751 write_logfile_entry("\n");
6754 if ( $embedded_tab_count > 0 ) {
6755 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
6757 ( $embedded_tab_count > 1 )
6758 ? "quotes or patterns"
6759 : "quote or pattern";
6760 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
6761 write_logfile_entry(
6762 "This means the display of this script could vary with device or software\n"
6764 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
6766 if ( $embedded_tab_count > 1 ) {
6767 write_logfile_entry(
6768 " Last at input line $last_embedded_tab_at\n");
6770 write_logfile_entry("\n");
6773 if ($first_tabbing_disagreement) {
6774 write_logfile_entry(
6775 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
6779 if ($in_tabbing_disagreement) {
6780 write_logfile_entry(
6781 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
6786 if ($last_tabbing_disagreement) {
6788 write_logfile_entry(
6789 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
6793 write_logfile_entry("No indentation disagreement seen\n");
6796 write_logfile_entry("\n");
6798 $vertical_aligner_object->report_anything_unusual();
6800 $file_writer_object->report_line_length_errors();
6805 # This routine is called to check the Opts hash after it is defined
6808 my ( $tabbing_string, $tab_msg );
6810 make_static_block_comment_pattern();
6811 make_static_side_comment_pattern();
6812 make_closing_side_comment_prefix();
6813 make_closing_side_comment_list_pattern();
6814 $format_skipping_pattern_begin =
6815 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
6816 $format_skipping_pattern_end =
6817 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
6819 # If closing side comments ARE selected, then we can safely
6820 # delete old closing side comments unless closing side comment
6821 # warnings are requested. This is a good idea because it will
6822 # eliminate any old csc's which fall below the line count threshold.
6823 # We cannot do this if warnings are turned on, though, because we
6824 # might delete some text which has been added. So that must
6825 # be handled when comments are created.
6826 if ( $rOpts->{'closing-side-comments'} ) {
6827 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
6828 $rOpts->{'delete-closing-side-comments'} = 1;
6832 # If closing side comments ARE NOT selected, but warnings ARE
6833 # selected and we ARE DELETING csc's, then we will pretend to be
6834 # adding with a huge interval. This will force the comments to be
6835 # generated for comparison with the old comments, but not added.
6836 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
6837 if ( $rOpts->{'delete-closing-side-comments'} ) {
6838 $rOpts->{'delete-closing-side-comments'} = 0;
6839 $rOpts->{'closing-side-comments'} = 1;
6840 $rOpts->{'closing-side-comment-interval'} = 100000000;
6845 make_block_brace_vertical_tightness_pattern();
6847 if ( $rOpts->{'line-up-parentheses'} ) {
6849 if ( $rOpts->{'indent-only'}
6850 || !$rOpts->{'add-newlines'}
6851 || !$rOpts->{'delete-old-newlines'} )
6854 -----------------------------------------------------------------------
6855 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
6857 The -lp indentation logic requires that perltidy be able to coordinate
6858 arbitrarily large numbers of line breakpoints. This isn't possible
6859 with these flags. Sometimes an acceptable workaround is to use -wocb=3
6860 -----------------------------------------------------------------------
6862 $rOpts->{'line-up-parentheses'} = 0;
6866 # At present, tabs are not compatable with the line-up-parentheses style
6867 # (it would be possible to entab the total leading whitespace
6868 # just prior to writing the line, if desired).
6869 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
6871 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
6873 $rOpts->{'tabs'} = 0;
6876 # Likewise, tabs are not compatable with outdenting..
6877 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
6879 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
6881 $rOpts->{'tabs'} = 0;
6884 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
6886 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
6888 $rOpts->{'tabs'} = 0;
6891 if ( !$rOpts->{'space-for-semicolon'} ) {
6892 $want_left_space{'f'} = -1;
6895 if ( $rOpts->{'space-terminal-semicolon'} ) {
6896 $want_left_space{';'} = 1;
6899 # implement outdenting preferences for keywords
6900 %outdent_keyword = ();
6903 @_ = qw(next last redo goto return);
6905 # override defaults if requested
6906 if ( $_ = $rOpts->{'outdent-keyword-list'} ) {
6912 # FUTURE: if not a keyword, assume that it is an identifier
6914 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
6915 $outdent_keyword{$_} = 1;
6918 warn "ignoring '$_' in -okwl list; not a perl keyword";
6922 # implement user whitespace preferences
6923 if ( $_ = $rOpts->{'want-left-space'} ) {
6927 @want_left_space{@_} = (1) x scalar(@_);
6930 if ( $_ = $rOpts->{'want-right-space'} ) {
6934 @want_right_space{@_} = (1) x scalar(@_);
6936 if ( $_ = $rOpts->{'nowant-left-space'} ) {
6940 @want_left_space{@_} = (-1) x scalar(@_);
6943 if ( $_ = $rOpts->{'nowant-right-space'} ) {
6947 @want_right_space{@_} = (-1) x scalar(@_);
6949 if ( $rOpts->{'dump-want-left-space'} ) {
6950 dump_want_left_space(*STDOUT);
6954 if ( $rOpts->{'dump-want-right-space'} ) {
6955 dump_want_right_space(*STDOUT);
6959 # default keywords for which space is introduced before an opening paren
6960 # (at present, including them messes up vertical alignment)
6961 @_ = qw(my local our and or err eq ne if else elsif until
6962 unless while for foreach return switch case given when);
6963 @space_after_keyword{@_} = (1) x scalar(@_);
6965 # allow user to modify these defaults
6966 if ( $_ = $rOpts->{'space-after-keyword'} ) {
6970 @space_after_keyword{@_} = (1) x scalar(@_);
6973 if ( $_ = $rOpts->{'nospace-after-keyword'} ) {
6977 @space_after_keyword{@_} = (0) x scalar(@_);
6980 # implement user break preferences
6981 if ( $_ = $rOpts->{'want-break-after'} ) {
6983 foreach my $tok (@_) {
6984 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
6985 my $lbs = $left_bond_strength{$tok};
6986 my $rbs = $right_bond_strength{$tok};
6987 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
6988 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
6994 if ( $_ = $rOpts->{'want-break-before'} ) {
6998 foreach my $tok (@_) {
6999 my $lbs = $left_bond_strength{$tok};
7000 my $rbs = $right_bond_strength{$tok};
7001 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7002 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7008 # make note if breaks are before certain key types
7009 %want_break_before = ();
7012 my $tok ( '.', ',', ':', '?', '&&', '||', 'and', 'or', 'err', 'xor' )
7014 $want_break_before{$tok} =
7015 $left_bond_strength{$tok} < $right_bond_strength{$tok};
7018 # Coordinate ?/: breaks, which must be similar
7019 if ( !$want_break_before{':'} ) {
7020 $want_break_before{'?'} = $want_break_before{':'};
7021 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7022 $left_bond_strength{'?'} = NO_BREAK;
7025 # Define here tokens which may follow the closing brace of a do statement
7026 # on the same line, as in:
7027 # } while ( $something);
7028 @_ = qw(until while unless if ; : );
7030 @is_do_follower{@_} = (1) x scalar(@_);
7032 # These tokens may follow the closing brace of an if or elsif block.
7033 # In other words, for cuddled else we want code to look like:
7034 # } elsif ( $something) {
7036 if ( $rOpts->{'cuddled-else'} ) {
7037 @_ = qw(else elsif);
7038 @is_if_brace_follower{@_} = (1) x scalar(@_);
7041 %is_if_brace_follower = ();
7044 # nothing can follow the closing curly of an else { } block:
7045 %is_else_brace_follower = ();
7047 # what can follow a multi-line anonymous sub definition closing curly:
7048 @_ = qw# ; : => or and && || ~~ ) #;
7050 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7052 # what can follow a one-line anonynomous sub closing curly:
7053 # one-line anonumous subs also have ']' here...
7054 # see tk3.t and PP.pm
7055 @_ = qw# ; : => or and && || ) ] ~~ #;
7057 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7059 # What can follow a closing curly of a block
7060 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7061 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7062 @_ = qw# ; : => or and && || ) #;
7065 # allow cuddled continue if cuddled else is specified
7066 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7068 @is_other_brace_follower{@_} = (1) x scalar(@_);
7070 $right_bond_strength{'{'} = WEAK;
7071 $left_bond_strength{'{'} = VERY_STRONG;
7073 # make -l=0 equal to -l=infinite
7074 if ( !$rOpts->{'maximum-line-length'} ) {
7075 $rOpts->{'maximum-line-length'} = 1000000;
7078 # make -lbl=0 equal to -lbl=infinite
7079 if ( !$rOpts->{'long-block-line-count'} ) {
7080 $rOpts->{'long-block-line-count'} = 1000000;
7083 my $ole = $rOpts->{'output-line-ending'};
7092 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7093 my $str = join " ", keys %endings;
7095 Unrecognized line ending '$ole'; expecting one of: $str
7098 if ( $rOpts->{'preserve-line-endings'} ) {
7099 warn "Ignoring -ple; conflicts with -ole\n";
7100 $rOpts->{'preserve-line-endings'} = undef;
7104 # hashes used to simplify setting whitespace
7106 '{' => $rOpts->{'brace-tightness'},
7107 '}' => $rOpts->{'brace-tightness'},
7108 '(' => $rOpts->{'paren-tightness'},
7109 ')' => $rOpts->{'paren-tightness'},
7110 '[' => $rOpts->{'square-bracket-tightness'},
7111 ']' => $rOpts->{'square-bracket-tightness'},
7120 # frequently used parameters
7121 $rOpts_add_newlines = $rOpts->{'add-newlines'};
7122 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
7123 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7124 $rOpts_block_brace_vertical_tightness =
7125 $rOpts->{'block-brace-vertical-tightness'};
7126 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
7127 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7128 $rOpts_break_at_old_ternary_breakpoints =
7129 $rOpts->{'break-at-old-ternary-breakpoints'};
7130 $rOpts_break_at_old_comma_breakpoints =
7131 $rOpts->{'break-at-old-comma-breakpoints'};
7132 $rOpts_break_at_old_keyword_breakpoints =
7133 $rOpts->{'break-at-old-keyword-breakpoints'};
7134 $rOpts_break_at_old_logical_breakpoints =
7135 $rOpts->{'break-at-old-logical-breakpoints'};
7136 $rOpts_closing_side_comment_else_flag =
7137 $rOpts->{'closing-side-comment-else-flag'};
7138 $rOpts_closing_side_comment_maximum_text =
7139 $rOpts->{'closing-side-comment-maximum-text'};
7140 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7141 $rOpts_cuddled_else = $rOpts->{'cuddled-else'};
7142 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
7143 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
7144 $rOpts_indent_columns = $rOpts->{'indent-columns'};
7145 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
7146 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7147 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
7148 $rOpts_short_concatenation_item_length =
7149 $rOpts->{'short-concatenation-item-length'};
7150 $rOpts_swallow_optional_blank_lines =
7151 $rOpts->{'swallow-optional-blank-lines'};
7152 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
7153 $rOpts_format_skipping = $rOpts->{'format-skipping'};
7154 $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
7155 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
7156 $half_maximum_line_length = $rOpts_maximum_line_length / 2;
7158 # Note that both opening and closing tokens can access the opening
7159 # and closing flags of their container types.
7160 %opening_vertical_tightness = (
7161 '(' => $rOpts->{'paren-vertical-tightness'},
7162 '{' => $rOpts->{'brace-vertical-tightness'},
7163 '[' => $rOpts->{'square-bracket-vertical-tightness'},
7164 ')' => $rOpts->{'paren-vertical-tightness'},
7165 '}' => $rOpts->{'brace-vertical-tightness'},
7166 ']' => $rOpts->{'square-bracket-vertical-tightness'},
7169 %closing_vertical_tightness = (
7170 '(' => $rOpts->{'paren-vertical-tightness-closing'},
7171 '{' => $rOpts->{'brace-vertical-tightness-closing'},
7172 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7173 ')' => $rOpts->{'paren-vertical-tightness-closing'},
7174 '}' => $rOpts->{'brace-vertical-tightness-closing'},
7175 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7178 # assume flag for '>' same as ')' for closing qw quotes
7179 %closing_token_indentation = (
7180 ')' => $rOpts->{'closing-paren-indentation'},
7181 '}' => $rOpts->{'closing-brace-indentation'},
7182 ']' => $rOpts->{'closing-square-bracket-indentation'},
7183 '>' => $rOpts->{'closing-paren-indentation'},
7186 %opening_token_right = (
7187 '(' => $rOpts->{'opening-paren-right'},
7188 '{' => $rOpts->{'opening-hash-brace-right'},
7189 '[' => $rOpts->{'opening-square-bracket-right'},
7192 %stack_opening_token = (
7193 '(' => $rOpts->{'stack-opening-paren'},
7194 '{' => $rOpts->{'stack-opening-hash-brace'},
7195 '[' => $rOpts->{'stack-opening-square-bracket'},
7198 %stack_closing_token = (
7199 ')' => $rOpts->{'stack-closing-paren'},
7200 '}' => $rOpts->{'stack-closing-hash-brace'},
7201 ']' => $rOpts->{'stack-closing-square-bracket'},
7205 sub make_static_block_comment_pattern {
7207 # create the pattern used to identify static block comments
7208 $static_block_comment_pattern = '^\s*##';
7210 # allow the user to change it
7211 if ( $rOpts->{'static-block-comment-prefix'} ) {
7212 my $prefix = $rOpts->{'static-block-comment-prefix'};
7213 $prefix =~ s/^\s*//;
7214 my $pattern = $prefix;
7216 # user may give leading caret to force matching left comments only
7217 if ( $prefix !~ /^\^#/ ) {
7218 if ( $prefix !~ /^#/ ) {
7220 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7222 $pattern = '^\s*' . $prefix;
7224 eval "'##'=~/$pattern/";
7227 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7229 $static_block_comment_pattern = $pattern;
7233 sub make_format_skipping_pattern {
7234 my ( $opt_name, $default ) = @_;
7235 my $param = $rOpts->{$opt_name};
7236 unless ($param) { $param = $default }
7238 if ( $param !~ /^#/ ) {
7239 die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7241 my $pattern = '^' . $param . '\s';
7242 eval "'#'=~/$pattern/";
7245 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7250 sub make_closing_side_comment_list_pattern {
7252 # turn any input list into a regex for recognizing selected block types
7253 $closing_side_comment_list_pattern = '^\w+';
7254 if ( defined( $rOpts->{'closing-side-comment-list'} )
7255 && $rOpts->{'closing-side-comment-list'} )
7257 $closing_side_comment_list_pattern =
7258 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7262 sub make_bli_pattern {
7264 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7265 && $rOpts->{'brace-left-and-indent-list'} )
7267 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7270 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7273 sub make_block_brace_vertical_tightness_pattern {
7275 # turn any input list into a regex for recognizing selected block types
7276 $block_brace_vertical_tightness_pattern =
7277 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7279 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7280 && $rOpts->{'block-brace-vertical-tightness-list'} )
7282 $block_brace_vertical_tightness_pattern =
7283 make_block_pattern( '-bbvtl',
7284 $rOpts->{'block-brace-vertical-tightness-list'} );
7288 sub make_block_pattern {
7290 # given a string of block-type keywords, return a regex to match them
7291 # The only tricky part is that labels are indicated with a single ':'
7292 # and the 'sub' token text may have additional text after it (name of
7297 # input string: "if else elsif unless while for foreach do : sub";
7298 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7300 my ( $abbrev, $string ) = @_;
7301 $string =~ s/^\s+//;
7302 $string =~ s/\s+$//;
7303 my @list = split /\s+/, $string;
7309 if ( $i eq 'sub' ) {
7311 elsif ( $i eq ':' ) {
7312 push @words, '\w+:';
7314 elsif ( $i =~ /^\w/ ) {
7318 warn "unrecognized block type $i after $abbrev, ignoring\n";
7321 my $pattern = '(' . join( '|', @words ) . ')$';
7322 if ( $seen{'sub'} ) {
7323 $pattern = '(' . $pattern . '|sub)';
7325 $pattern = '^' . $pattern;
7329 sub make_static_side_comment_pattern {
7331 # create the pattern used to identify static side comments
7332 $static_side_comment_pattern = '^##';
7334 # allow the user to change it
7335 if ( $rOpts->{'static-side-comment-prefix'} ) {
7336 my $prefix = $rOpts->{'static-side-comment-prefix'};
7337 $prefix =~ s/^\s*//;
7338 my $pattern = '^' . $prefix;
7339 eval "'##'=~/$pattern/";
7342 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
7344 $static_side_comment_pattern = $pattern;
7348 sub make_closing_side_comment_prefix {
7350 # Be sure we have a valid closing side comment prefix
7351 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
7352 my $csc_prefix_pattern;
7353 if ( !defined($csc_prefix) ) {
7354 $csc_prefix = '## end';
7355 $csc_prefix_pattern = '^##\s+end';
7358 my $test_csc_prefix = $csc_prefix;
7359 if ( $test_csc_prefix !~ /^#/ ) {
7360 $test_csc_prefix = '#' . $test_csc_prefix;
7363 # make a regex to recognize the prefix
7364 my $test_csc_prefix_pattern = $test_csc_prefix;
7366 # escape any special characters
7367 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
7369 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
7371 # allow exact number of intermediate spaces to vary
7372 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
7374 # make sure we have a good pattern
7375 # if we fail this we probably have an error in escaping
7377 eval "'##'=~/$test_csc_prefix_pattern/";
7380 # shouldn't happen..must have screwed up escaping, above
7381 report_definite_bug();
7383 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
7385 # just warn and keep going with defaults
7386 warn "Please consider using a simpler -cscp prefix\n";
7387 warn "Using default -cscp instead; please check output\n";
7390 $csc_prefix = $test_csc_prefix;
7391 $csc_prefix_pattern = $test_csc_prefix_pattern;
7394 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
7395 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
7398 sub dump_want_left_space {
7402 These values are the main control of whitespace to the left of a token type;
7403 They may be altered with the -wls parameter.
7404 For a list of token types, use perltidy --dump-token-types (-dtt)
7405 1 means the token wants a space to its left
7406 -1 means the token does not want a space to its left
7407 ------------------------------------------------------------------------
7409 foreach ( sort keys %want_left_space ) {
7410 print $fh "$_\t$want_left_space{$_}\n";
7414 sub dump_want_right_space {
7418 These values are the main control of whitespace to the right of a token type;
7419 They may be altered with the -wrs parameter.
7420 For a list of token types, use perltidy --dump-token-types (-dtt)
7421 1 means the token wants a space to its right
7422 -1 means the token does not want a space to its right
7423 ------------------------------------------------------------------------
7425 foreach ( sort keys %want_right_space ) {
7426 print $fh "$_\t$want_right_space{$_}\n";
7430 { # begin is_essential_whitespace
7432 my %is_sort_grep_map;
7437 @_ = qw(sort grep map);
7438 @is_sort_grep_map{@_} = (1) x scalar(@_);
7440 @_ = qw(for foreach);
7441 @is_for_foreach{@_} = (1) x scalar(@_);
7445 sub is_essential_whitespace {
7447 # Essential whitespace means whitespace which cannot be safely deleted
7448 # without risking the introduction of a syntax error.
7449 # We are given three tokens and their types:
7450 # ($tokenl, $typel) is the token to the left of the space in question
7451 # ($tokenr, $typer) is the token to the right of the space in question
7452 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
7454 # This is a slow routine but is not needed too often except when -mangle
7457 # Note: This routine should almost never need to be changed. It is
7458 # for avoiding syntax problems rather than for formatting.
7459 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
7463 # never combine two bare words or numbers
7464 # examples: and ::ok(1)
7466 # for bla::bla:: abc
7467 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7468 # $input eq"quit" to make $inputeq"quit"
7469 # my $size=-s::SINK if $file; <==OK but we won't do it
7470 # don't join something like: for bla::bla:: abc
7471 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7472 ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
7474 # do not combine a number with a concatination dot
7475 # example: pom.caputo:
7476 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
7477 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
7478 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
7480 # do not join a minus with a bare word, because you might form
7481 # a file test operator. Example from Complex.pm:
7482 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
7483 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
7485 # and something like this could become ambiguous without space
7487 # use constant III=>1;
7491 || ( ( $tokenl eq '-' )
7492 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
7494 # '= -' should not become =- or you will get a warning
7496 # || ($tokenr eq '-')
7498 # keep a space between a quote and a bareword to prevent the
7499 # bareword from becomming a quote modifier.
7500 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7502 # keep a space between a token ending in '$' and any word;
7503 # this caused trouble: "die @$ if $@"
7504 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7505 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7507 # perl is very fussy about spaces before <<
7508 || ( $tokenr =~ /^\<\</ )
7510 # avoid combining tokens to create new meanings. Example:
7511 # $a+ +$b must not become $a++$b
7512 || ( $is_digraph{ $tokenl . $tokenr } )
7513 || ( $is_trigraph{ $tokenl . $tokenr } )
7515 # another example: do not combine these two &'s:
7516 # allow_options & &OPT_EXECCGI
7517 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
7519 # don't combine $$ or $# with any alphanumeric
7520 # (testfile mangle.t with --mangle)
7521 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
7523 # retain any space after possible filehandle
7524 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
7525 || ( $typel eq 'Z' )
7527 # Perl is sensitive to whitespace after the + here:
7528 # $b = xvals $a + 0.1 * yvals $a;
7529 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
7531 # keep paren separate in 'use Foo::Bar ()'
7535 && $tokenll eq 'use' )
7537 # keep any space between filehandle and paren:
7538 # file mangle.t with --mangle:
7539 || ( $typel eq 'Y' && $tokenr eq '(' )
7541 # retain any space after here doc operator ( hereerr.t)
7542 || ( $typel eq 'h' )
7544 # FIXME: this needs some further work; extrude.t has test cases
7545 # it is safest to retain any space after start of ? : operator
7546 # because of perl's quirky parser.
7547 # ie, this line will fail if you remove the space after the '?':
7548 # $b=join $comma ? ',' : ':', @_; # ok
7549 # $b=join $comma ?',' : ':', @_; # error!
7551 # $b=join $comma?',' : ':', @_; # not a problem!
7552 ## || ($typel eq '?')
7554 # be careful with a space around ++ and --, to avoid ambiguity as to
7555 # which token it applies
7556 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
7557 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
7559 # need space after foreach my; for example, this will fail in
7560 # older versions of Perl:
7561 # foreach my$ft(@filetypes)...
7566 && $is_for_foreach{$tokenll} && $tokenr =~ /^\$/
7569 # must have space between grep and left paren; "grep(" will fail
7570 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
7572 # don't stick numbers next to left parens, as in:
7573 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
7574 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
7576 ; # the value of this long logic sequence is the result we want
7581 sub set_white_space_flag {
7583 # This routine examines each pair of nonblank tokens and
7584 # sets values for array @white_space_flag.
7586 # $white_space_flag[$j] is a flag indicating whether a white space
7587 # BEFORE token $j is needed, with the following values:
7589 # -1 do not want a space before token $j
7590 # 0 optional space or $j is a whitespace
7591 # 1 want a space before token $j
7594 # The values for the first token will be defined based
7595 # upon the contents of the "to_go" output array.
7597 # Note: retain debug print statements because they are usually
7598 # required after adding new token types.
7602 # initialize these global hashes, which control the use of
7603 # whitespace around tokens:
7608 # %space_after_keyword
7610 # Many token types are identical to the tokens themselves.
7611 # See the tokenizer for a complete list. Here are some special types:
7613 # f = semicolon in for statement
7616 # Note that :: is excluded since it should be contained in an identifier
7617 # Note that '->' is excluded because it never gets space
7618 # parentheses and brackets are excluded since they are handled specially
7619 # curly braces are included but may be overridden by logic, such as
7622 # NEW_TOKENS: create a whitespace rule here. This can be as
7623 # simple as adding your new letter to @spaces_both_sides, for
7627 @is_opening_type{@_} = (1) x scalar(@_);
7630 @is_closing_type{@_} = (1) x scalar(@_);
7632 my @spaces_both_sides = qw"
7633 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
7634 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~
7635 &&= ||= //= <=> A k f w F n C Y U G v
7638 my @spaces_left_side = qw"
7639 t ! ~ m p { \ h pp mm Z j
7641 push( @spaces_left_side, '#' ); # avoids warning message
7643 my @spaces_right_side = qw"
7644 ; } ) ] R J ++ -- **=
7646 push( @spaces_right_side, ',' ); # avoids warning message
7647 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
7648 @want_right_space{@spaces_both_sides} =
7649 (1) x scalar(@spaces_both_sides);
7650 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);
7651 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
7652 @want_left_space{@spaces_right_side} =
7653 (-1) x scalar(@spaces_right_side);
7654 @want_right_space{@spaces_right_side} =
7655 (1) x scalar(@spaces_right_side);
7656 $want_left_space{'L'} = WS_NO;
7657 $want_left_space{'->'} = WS_NO;
7658 $want_right_space{'->'} = WS_NO;
7659 $want_left_space{'**'} = WS_NO;
7660 $want_right_space{'**'} = WS_NO;
7662 # hash type information must stay tightly bound
7664 $binary_ws_rules{'i'}{'L'} = WS_NO;
7665 $binary_ws_rules{'i'}{'{'} = WS_YES;
7666 $binary_ws_rules{'k'}{'{'} = WS_YES;
7667 $binary_ws_rules{'U'}{'{'} = WS_YES;
7668 $binary_ws_rules{'i'}{'['} = WS_NO;
7669 $binary_ws_rules{'R'}{'L'} = WS_NO;
7670 $binary_ws_rules{'R'}{'{'} = WS_NO;
7671 $binary_ws_rules{'t'}{'L'} = WS_NO;
7672 $binary_ws_rules{'t'}{'{'} = WS_NO;
7673 $binary_ws_rules{'}'}{'L'} = WS_NO;
7674 $binary_ws_rules{'}'}{'{'} = WS_NO;
7675 $binary_ws_rules{'$'}{'L'} = WS_NO;
7676 $binary_ws_rules{'$'}{'{'} = WS_NO;
7677 $binary_ws_rules{'@'}{'L'} = WS_NO;
7678 $binary_ws_rules{'@'}{'{'} = WS_NO;
7679 $binary_ws_rules{'='}{'L'} = WS_YES;
7681 # the following includes ') {'
7682 # as in : if ( xxx ) { yyy }
7683 $binary_ws_rules{']'}{'L'} = WS_NO;
7684 $binary_ws_rules{']'}{'{'} = WS_NO;
7685 $binary_ws_rules{')'}{'{'} = WS_YES;
7686 $binary_ws_rules{')'}{'['} = WS_NO;
7687 $binary_ws_rules{']'}{'['} = WS_NO;
7688 $binary_ws_rules{']'}{'{'} = WS_NO;
7689 $binary_ws_rules{'}'}{'['} = WS_NO;
7690 $binary_ws_rules{'R'}{'['} = WS_NO;
7692 $binary_ws_rules{']'}{'++'} = WS_NO;
7693 $binary_ws_rules{']'}{'--'} = WS_NO;
7694 $binary_ws_rules{')'}{'++'} = WS_NO;
7695 $binary_ws_rules{')'}{'--'} = WS_NO;
7697 $binary_ws_rules{'R'}{'++'} = WS_NO;
7698 $binary_ws_rules{'R'}{'--'} = WS_NO;
7700 $binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label
7701 $binary_ws_rules{'w'}{':'} = WS_NO;
7702 $binary_ws_rules{'i'}{'Q'} = WS_YES;
7703 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
7705 # FIXME: we need to split 'i' into variables and functions
7706 # and have no space for functions but space for variables. For now,
7707 # I have a special patch in the special rules below
7708 $binary_ws_rules{'i'}{'('} = WS_NO;
7710 $binary_ws_rules{'w'}{'('} = WS_NO;
7711 $binary_ws_rules{'w'}{'{'} = WS_YES;
7713 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
7714 my ( $last_token, $last_type, $last_block_type, $token, $type,
7716 my (@white_space_flag);
7717 my $j_tight_closing_paren = -1;
7719 if ( $max_index_to_go >= 0 ) {
7720 $token = $tokens_to_go[$max_index_to_go];
7721 $type = $types_to_go[$max_index_to_go];
7722 $block_type = $block_type_to_go[$max_index_to_go];
7730 # loop over all tokens
7733 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
7735 if ( $$rtoken_type[$j] eq 'b' ) {
7736 $white_space_flag[$j] = WS_OPTIONAL;
7740 # set a default value, to be changed as needed
7742 $last_token = $token;
7744 $last_block_type = $block_type;
7745 $token = $$rtokens[$j];
7746 $type = $$rtoken_type[$j];
7747 $block_type = $$rblock_type[$j];
7749 #---------------------------------------------------------------
7751 # handle space on the inside of opening braces
7752 #---------------------------------------------------------------
7755 if ( $is_opening_type{$last_type} ) {
7757 $j_tight_closing_paren = -1;
7759 # let's keep empty matched braces together: () {} []
7761 if ( $token eq $matching_token{$last_token} ) {
7771 # we're considering the right of an opening brace
7772 # tightness = 0 means always pad inside with space
7773 # tightness = 1 means pad inside if "complex"
7774 # tightness = 2 means never pad inside with space
7777 if ( $last_type eq '{'
7778 && $last_token eq '{'
7779 && $last_block_type )
7781 $tightness = $rOpts_block_brace_tightness;
7783 else { $tightness = $tightness{$last_token} }
7785 if ( $tightness <= 0 ) {
7788 elsif ( $tightness > 1 ) {
7793 # Patch to count '-foo' as single token so that
7794 # each of $a{-foo} and $a{foo} and $a{'foo'} do
7795 # not get spaces with default formatting.
7799 && $last_token eq '{'
7800 && $$rtoken_type[ $j + 1 ] eq 'w' );
7802 # $j_next is where a closing token should be if
7803 # the container has a single token
7805 ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
7808 my $tok_next = $$rtokens[$j_next];
7809 my $type_next = $$rtoken_type[$j_next];
7811 # for tightness = 1, if there is just one token
7812 # within the matching pair, we will keep it tight
7814 $tok_next eq $matching_token{$last_token}
7816 # but watch out for this: [ [ ] (misc.t)
7817 && $last_token ne $token
7821 # remember where to put the space for the closing paren
7822 $j_tight_closing_paren = $j_next;
7830 } # done with opening braces and brackets
7832 if FORMATTER_DEBUG_FLAG_WHITE;
7834 #---------------------------------------------------------------
7836 # handle space on inside of closing brace pairs
7837 #---------------------------------------------------------------
7840 if ( $is_closing_type{$type} ) {
7842 if ( $j == $j_tight_closing_paren ) {
7844 $j_tight_closing_paren = -1;
7849 if ( !defined($ws) ) {
7852 if ( $type eq '}' && $token eq '}' && $block_type ) {
7853 $tightness = $rOpts_block_brace_tightness;
7855 else { $tightness = $tightness{$token} }
7857 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
7863 if FORMATTER_DEBUG_FLAG_WHITE;
7865 #---------------------------------------------------------------
7867 # use the binary table
7868 #---------------------------------------------------------------
7869 if ( !defined($ws) ) {
7870 $ws = $binary_ws_rules{$last_type}{$type};
7873 if FORMATTER_DEBUG_FLAG_WHITE;
7875 #---------------------------------------------------------------
7877 # some special cases
7878 #---------------------------------------------------------------
7879 if ( $token eq '(' ) {
7881 # This will have to be tweaked as tokenization changes.
7882 # We usually want a space at '} (', for example:
7883 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
7886 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
7887 # At present, the above & block is marked as type L/R so this case
7888 # won't go through here.
7889 if ( $last_type eq '}' ) { $ws = WS_YES }
7891 # NOTE: some older versions of Perl had occasional problems if
7892 # spaces are introduced between keywords or functions and opening
7893 # parens. So the default is not to do this except is certain
7894 # cases. The current Perl seems to tolerate spaces.
7896 # Space between keyword and '('
7897 elsif ( $last_type eq 'k' ) {
7899 unless ( $rOpts_space_keyword_paren
7900 || $space_after_keyword{$last_token} );
7903 # Space between function and '('
7904 # -----------------------------------------------------
7905 # 'w' and 'i' checks for something like:
7906 # myfun( &myfun( ->myfun(
7907 # -----------------------------------------------------
7908 elsif (( $last_type =~ /^[wU]$/ )
7909 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
7911 $ws = WS_NO unless ($rOpts_space_function_paren);
7914 # space between something like $i and ( in
7915 # for $i ( 0 .. 20 ) {
7916 # FIXME: eventually, type 'i' needs to be split into multiple
7917 # token types so this can be a hardwired rule.
7918 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
7922 # allow constant function followed by '()' to retain no space
7923 elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
7928 # patch for SWITCH/CASE: make space at ']{' optional
7929 # since the '{' might begin a case or when block
7930 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
7934 # keep space between 'sub' and '{' for anonymous sub definition
7935 if ( $type eq '{' ) {
7936 if ( $last_token eq 'sub' ) {
7940 # this is needed to avoid no space in '){'
7941 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
7943 # avoid any space before the brace or bracket in something like
7944 # @opts{'a','b',...}
7945 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
7950 elsif ( $type eq 'i' ) {
7952 # never a space before ->
7953 if ( $token =~ /^\-\>/ ) {
7958 # retain any space between '-' and bare word
7959 elsif ( $type eq 'w' || $type eq 'C' ) {
7960 $ws = WS_OPTIONAL if $last_type eq '-';
7962 # never a space before ->
7963 if ( $token =~ /^\-\>/ ) {
7968 # retain any space between '-' and bare word
7969 # example: avoid space between 'USER' and '-' here:
7970 # $myhash{USER-NAME}='steve';
7971 elsif ( $type eq 'm' || $type eq '-' ) {
7972 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
7975 # always space before side comment
7976 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
7978 # always preserver whatever space was used after a possible
7979 # filehandle (except _) or here doc operator
7982 && ( ( $last_type eq 'Z' && $last_token ne '_' )
7983 || $last_type eq 'h' )
7990 if FORMATTER_DEBUG_FLAG_WHITE;
7992 #---------------------------------------------------------------
7994 # default rules not covered above
7995 #---------------------------------------------------------------
7996 # if we fall through to here,
7997 # look at the pre-defined hash tables for the two tokens, and
7998 # if (they are equal) use the common value
7999 # if (either is zero or undef) use the other
8000 # if (either is -1) use it
8014 if ( !defined($ws) ) {
8015 my $wl = $want_left_space{$type};
8016 my $wr = $want_right_space{$last_type};
8017 if ( !defined($wl) ) { $wl = 0 }
8018 if ( !defined($wr) ) { $wr = 0 }
8019 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8022 if ( !defined($ws) ) {
8025 "WS flag is undefined for tokens $last_token $token\n");
8028 # Treat newline as a whitespace. Otherwise, we might combine
8029 # 'Send' and '-recipients' here according to the above rules:
8030 # my $msg = new Fax::Send
8031 # -recipients => $to,
8033 if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8038 && ( $last_type !~ /^[Zh]$/ ) )
8041 # If this happens, we have a non-fatal but undesirable
8042 # hole in the above rules which should be patched.
8044 "WS flag is zero for tokens $last_token $token\n");
8046 $white_space_flag[$j] = $ws;
8048 FORMATTER_DEBUG_FLAG_WHITE && do {
8049 my $str = substr( $last_token, 0, 15 );
8050 $str .= ' ' x ( 16 - length($str) );
8051 if ( !defined($ws_1) ) { $ws_1 = "*" }
8052 if ( !defined($ws_2) ) { $ws_2 = "*" }
8053 if ( !defined($ws_3) ) { $ws_3 = "*" }
8054 if ( !defined($ws_4) ) { $ws_4 = "*" }
8056 "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8059 return \@white_space_flag;
8062 { # begin print_line_of_tokens
8069 my $rcontainer_type;
8070 my $rcontainer_environment;
8073 my $rnesting_tokens;
8075 my $rnesting_blocks;
8078 my $python_indentation_level;
8080 # These local token variables are stored by store_token_to_go:
8083 my $container_environment;
8085 my $in_continued_quote;
8088 my $no_internal_newlines;
8094 # routine to pull the jth token from the line of tokens
8097 $token = $$rtokens[$j];
8098 $type = $$rtoken_type[$j];
8099 $block_type = $$rblock_type[$j];
8100 $container_type = $$rcontainer_type[$j];
8101 $container_environment = $$rcontainer_environment[$j];
8102 $type_sequence = $$rtype_sequence[$j];
8103 $level = $$rlevels[$j];
8104 $slevel = $$rslevels[$j];
8105 $nesting_blocks = $$rnesting_blocks[$j];
8106 $ci_level = $$rci_levels[$j];
8112 sub save_current_token {
8115 $block_type, $ci_level,
8116 $container_environment, $container_type,
8117 $in_continued_quote, $level,
8118 $nesting_blocks, $no_internal_newlines,
8120 $type, $type_sequence,
8124 sub restore_current_token {
8126 $block_type, $ci_level,
8127 $container_environment, $container_type,
8128 $in_continued_quote, $level,
8129 $nesting_blocks, $no_internal_newlines,
8131 $type, $type_sequence,
8136 # Routine to place the current token into the output stream.
8137 # Called once per output token.
8138 sub store_token_to_go {
8140 my $flag = $no_internal_newlines;
8141 if ( $_[0] ) { $flag = 1 }
8143 $tokens_to_go[ ++$max_index_to_go ] = $token;
8144 $types_to_go[$max_index_to_go] = $type;
8145 $nobreak_to_go[$max_index_to_go] = $flag;
8146 $old_breakpoint_to_go[$max_index_to_go] = 0;
8147 $forced_breakpoint_to_go[$max_index_to_go] = 0;
8148 $block_type_to_go[$max_index_to_go] = $block_type;
8149 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
8150 $container_environment_to_go[$max_index_to_go] = $container_environment;
8151 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;
8152 $ci_levels_to_go[$max_index_to_go] = $ci_level;
8153 $mate_index_to_go[$max_index_to_go] = -1;
8154 $matching_token_to_go[$max_index_to_go] = '';
8156 # Note: negative levels are currently retained as a diagnostic so that
8157 # the 'final indentation level' is correctly reported for bad scripts.
8158 # But this means that every use of $level as an index must be checked.
8159 # If this becomes too much of a problem, we might give up and just clip
8161 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
8162 $levels_to_go[$max_index_to_go] = $level;
8163 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
8164 $lengths_to_go[ $max_index_to_go + 1 ] =
8165 $lengths_to_go[$max_index_to_go] + length($token);
8167 # Define the indentation that this token would have if it started
8168 # a new line. We have to do this now because we need to know this
8169 # when considering one-line blocks.
8170 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
8172 if ( $type ne 'b' ) {
8173 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
8174 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
8175 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
8176 $last_nonblank_index_to_go = $max_index_to_go;
8177 $last_nonblank_type_to_go = $type;
8178 $last_nonblank_token_to_go = $token;
8179 if ( $type eq ',' ) {
8180 $comma_count_in_batch++;
8184 FORMATTER_DEBUG_FLAG_STORE && do {
8185 my ( $a, $b, $c ) = caller();
8187 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
8191 sub insert_new_token_to_go {
8193 # insert a new token into the output stream. use same level as
8194 # previous token; assumes a character at max_index_to_go.
8195 save_current_token();
8196 ( $token, $type, $slevel, $no_internal_newlines ) = @_;
8198 if ( $max_index_to_go == UNDEFINED_INDEX ) {
8199 warning("code bug: bad call to insert_new_token_to_go\n");
8201 $level = $levels_to_go[$max_index_to_go];
8203 # FIXME: it seems to be necessary to use the next, rather than
8204 # previous, value of this variable when creating a new blank (align.t)
8205 #my $slevel = $nesting_depth_to_go[$max_index_to_go];
8206 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
8207 $ci_level = $ci_levels_to_go[$max_index_to_go];
8208 $container_environment = $container_environment_to_go[$max_index_to_go];
8209 $in_continued_quote = 0;
8211 $type_sequence = "";
8212 store_token_to_go();
8213 restore_current_token();
8217 sub print_line_of_tokens {
8219 my $line_of_tokens = shift;
8221 # This routine is called once per input line to process all of
8222 # the tokens on that line. This is the first stage of
8225 # Full-line comments and blank lines may be processed immediately.
8227 # For normal lines of code, the tokens are stored one-by-one,
8228 # via calls to 'sub store_token_to_go', until a known line break
8229 # point is reached. Then, the batch of collected tokens is
8230 # passed along to 'sub output_line_to_go' for further
8231 # processing. This routine decides if there should be
8232 # whitespace between each pair of non-white tokens, so later
8233 # routines only need to decide on any additional line breaks.
8234 # Any whitespace is initally a single space character. Later,
8235 # the vertical aligner may expand that to be multiple space
8236 # characters if necessary for alignment.
8238 # extract input line number for error messages
8239 $input_line_number = $line_of_tokens->{_line_number};
8241 $rtoken_type = $line_of_tokens->{_rtoken_type};
8242 $rtokens = $line_of_tokens->{_rtokens};
8243 $rlevels = $line_of_tokens->{_rlevels};
8244 $rslevels = $line_of_tokens->{_rslevels};
8245 $rblock_type = $line_of_tokens->{_rblock_type};
8246 $rcontainer_type = $line_of_tokens->{_rcontainer_type};
8247 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
8248 $rtype_sequence = $line_of_tokens->{_rtype_sequence};
8249 $input_line = $line_of_tokens->{_line_text};
8250 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
8251 $rci_levels = $line_of_tokens->{_rci_levels};
8252 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
8254 $in_continued_quote = $starting_in_quote =
8255 $line_of_tokens->{_starting_in_quote};
8256 $in_quote = $line_of_tokens->{_ending_in_quote};
8257 $ending_in_quote = $in_quote;
8258 $python_indentation_level =
8259 $line_of_tokens->{_python_indentation_level};
8264 my $next_nonblank_token;
8265 my $next_nonblank_token_type;
8266 my $rwhite_space_flag;
8268 $jmax = @$rtokens - 1;
8270 $container_type = "";
8271 $container_environment = "";
8272 $type_sequence = "";
8273 $no_internal_newlines = 1 - $rOpts_add_newlines;
8274 $is_static_block_comment = 0;
8276 # Handle a continued quote..
8277 if ($in_continued_quote) {
8279 # A line which is entirely a quote or pattern must go out
8280 # verbatim. Note: the \n is contained in $input_line.
8282 if ( ( $input_line =~ "\t" ) ) {
8283 note_embedded_tab();
8285 write_unindented_line("$input_line");
8286 $last_line_had_side_comment = 0;
8290 # prior to version 20010406, perltidy had a bug which placed
8291 # continuation indentation before the last line of some multiline
8292 # quotes and patterns -- exactly the lines passing this way.
8293 # To help find affected lines in scripts run with these
8294 # versions, run with '-chk', and it will warn of any quotes or
8295 # patterns which might have been modified by these early
8297 if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
8299 "-chk: please check this line for extra leading whitespace\n"
8304 # Write line verbatim if we are in a formatting skip section
8305 if ($in_format_skipping_section) {
8306 write_unindented_line("$input_line");
8307 $last_line_had_side_comment = 0;
8309 # Note: extra space appended to comment simplifies pattern matching
8311 && $$rtoken_type[0] eq '#'
8312 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
8314 $in_format_skipping_section = 0;
8315 write_logfile_entry("Exiting formatting skip section\n");
8320 # See if we are entering a formatting skip section
8321 if ( $rOpts_format_skipping
8323 && $$rtoken_type[0] eq '#'
8324 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
8327 $in_format_skipping_section = 1;
8328 write_logfile_entry("Entering formatting skip section\n");
8329 write_unindented_line("$input_line");
8330 $last_line_had_side_comment = 0;
8334 # delete trailing blank tokens
8335 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
8337 # Handle a blank line..
8340 # For the 'swallow-optional-blank-lines' option, we delete all
8341 # old blank lines and let the blank line rules generate any
8343 if ( !$rOpts_swallow_optional_blank_lines ) {
8345 $file_writer_object->write_blank_code_line();
8346 $last_line_leading_type = 'b';
8348 $last_line_had_side_comment = 0;
8352 # see if this is a static block comment (starts with ## by default)
8353 my $is_static_block_comment_without_leading_space = 0;
8355 && $$rtoken_type[0] eq '#'
8356 && $rOpts->{'static-block-comments'}
8357 && $input_line =~ /$static_block_comment_pattern/o )
8359 $is_static_block_comment = 1;
8360 $is_static_block_comment_without_leading_space =
8361 substr( $input_line, 0, 1 ) eq '#';
8364 # create a hanging side comment if appropriate
8367 && $$rtoken_type[0] eq '#' # only token is a comment
8368 && $last_line_had_side_comment # last line had side comment
8369 && $input_line =~ /^\s/ # there is some leading space
8370 && !$is_static_block_comment # do not make static comment hanging
8371 && $rOpts->{'hanging-side-comments'} # user is allowing this
8375 # We will insert an empty qw string at the start of the token list
8376 # to force this comment to be a side comment. The vertical aligner
8377 # should then line it up with the previous side comment.
8378 unshift @$rtoken_type, 'q';
8379 unshift @$rtokens, '';
8380 unshift @$rlevels, $$rlevels[0];
8381 unshift @$rslevels, $$rslevels[0];
8382 unshift @$rblock_type, '';
8383 unshift @$rcontainer_type, '';
8384 unshift @$rcontainer_environment, '';
8385 unshift @$rtype_sequence, '';
8386 unshift @$rnesting_tokens, $$rnesting_tokens[0];
8387 unshift @$rci_levels, $$rci_levels[0];
8388 unshift @$rnesting_blocks, $$rnesting_blocks[0];
8392 # remember if this line has a side comment
8393 $last_line_had_side_comment =
8394 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
8396 # Handle a block (full-line) comment..
8397 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
8399 if ( $rOpts->{'delete-block-comments'} ) { return }
8401 if ( $rOpts->{'tee-block-comments'} ) {
8402 $file_writer_object->tee_on();
8405 destroy_one_line_block();
8406 output_line_to_go();
8408 # output a blank line before block comments
8410 $last_line_leading_type !~ /^[#b]$/
8411 && $rOpts->{'blanks-before-comments'} # only if allowed
8413 $is_static_block_comment # never before static block comments
8416 flush(); # switching to new output stream
8417 $file_writer_object->write_blank_code_line();
8418 $last_line_leading_type = 'b';
8421 # TRIM COMMENTS -- This could be turned off as a option
8422 $$rtokens[0] =~ s/\s*$//; # trim right end
8425 $rOpts->{'indent-block-comments'}
8426 && ( !$rOpts->{'indent-spaced-block-comments'}
8427 || $input_line =~ /^\s+/ )
8428 && !$is_static_block_comment_without_leading_space
8432 store_token_to_go();
8433 output_line_to_go();
8436 flush(); # switching to new output stream
8437 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
8438 $last_line_leading_type = '#';
8440 if ( $rOpts->{'tee-block-comments'} ) {
8441 $file_writer_object->tee_off();
8446 # compare input/output indentation except for continuation lines
8447 # (because they have an unknown amount of initial blank space)
8448 # and lines which are quotes (because they may have been outdented)
8449 # Note: this test is placed here because we know the continuation flag
8450 # at this point, which allows us to avoid non-meaningful checks.
8451 my $structural_indentation_level = $$rlevels[0];
8452 compare_indentation_levels( $python_indentation_level,
8453 $structural_indentation_level )
8454 unless ( $python_indentation_level < 0
8455 || ( $$rci_levels[0] > 0 )
8456 || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
8459 # Patch needed for MakeMaker. Do not break a statement
8460 # in which $VERSION may be calculated. See MakeMaker.pm;
8461 # this is based on the coding in it.
8462 # The first line of a file that matches this will be eval'd:
8463 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8465 # *VERSION = \'1.01';
8466 # ( $VERSION ) = '$Revision: 1.56 $ ' =~ /\$Revision:\s+([^\s]+)/;
8467 # We will pass such a line straight through without breaking
8468 # it unless -npvl is used
8470 my $is_VERSION_statement = 0;
8473 !$saw_VERSION_in_this_file
8474 && $input_line =~ /VERSION/ # quick check to reject most lines
8475 && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8478 $saw_VERSION_in_this_file = 1;
8479 $is_VERSION_statement = 1;
8480 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
8481 $no_internal_newlines = 1;
8484 # take care of indentation-only
8485 # NOTE: In previous versions we sent all qw lines out immediately here.
8486 # No longer doing this: also write a line which is entirely a 'qw' list
8487 # to allow stacking of opening and closing tokens. Note that interior
8488 # qw lines will still go out at the end of this routine.
8489 if ( $rOpts->{'indent-only'} ) {
8491 $input_line =~ s/^\s*//; # trim left end
8492 $input_line =~ s/\s*$//; # trim right end
8495 $token = $input_line;
8498 $container_type = "";
8499 $container_environment = "";
8500 $type_sequence = "";
8501 store_token_to_go();
8502 output_line_to_go();
8506 push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding
8507 push( @$rtoken_type, 'b', 'b' );
8508 ($rwhite_space_flag) =
8509 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
8511 # find input tabbing to allow checks for tabbing disagreement
8513 ##$input_line_tabbing = "";
8514 ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
8516 # if the buffer hasn't been flushed, add a leading space if
8517 # necessary to keep essential whitespace. This is really only
8518 # necessary if we are squeezing out all ws.
8519 if ( $max_index_to_go >= 0 ) {
8521 $old_line_count_in_batch++;
8524 is_essential_whitespace(
8525 $last_last_nonblank_token,
8526 $last_last_nonblank_type,
8527 $tokens_to_go[$max_index_to_go],
8528 $types_to_go[$max_index_to_go],
8534 my $slevel = $$rslevels[0];
8535 insert_new_token_to_go( ' ', 'b', $slevel,
8536 $no_internal_newlines );
8540 # If we just saw the end of an elsif block, write nag message
8541 # if we do not see another elseif or an else.
8542 if ($looking_for_else) {
8544 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
8545 write_logfile_entry("(No else block)\n");
8547 $looking_for_else = 0;
8550 # This is a good place to kill incomplete one-line blocks
8551 if ( ( $semicolons_before_block_self_destruct == 0 )
8552 && ( $max_index_to_go >= 0 )
8553 && ( $types_to_go[$max_index_to_go] eq ';' )
8554 && ( $$rtokens[0] ne '}' ) )
8556 destroy_one_line_block();
8557 output_line_to_go();
8560 # loop to process the tokens one-by-one
8564 foreach $j ( 0 .. $jmax ) {
8566 # pull out the local values for this token
8569 if ( $type eq '#' ) {
8571 # trim trailing whitespace
8572 # (there is no option at present to prevent this)
8576 $rOpts->{'delete-side-comments'}
8578 # delete closing side comments if necessary
8579 || ( $rOpts->{'delete-closing-side-comments'}
8580 && $token =~ /$closing_side_comment_prefix_pattern/o
8581 && $last_nonblank_block_type =~
8582 /$closing_side_comment_list_pattern/o )
8585 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8586 unstore_token_to_go();
8592 # If we are continuing after seeing a right curly brace, flush
8593 # buffer unless we see what we are looking for, as in
8595 if ( $rbrace_follower && $type ne 'b' ) {
8597 unless ( $rbrace_follower->{$token} ) {
8598 output_line_to_go();
8600 $rbrace_follower = undef;
8603 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
8604 $next_nonblank_token = $$rtokens[$j_next];
8605 $next_nonblank_token_type = $$rtoken_type[$j_next];
8607 #--------------------------------------------------------
8608 # Start of section to patch token text
8609 #--------------------------------------------------------
8611 # Modify certain tokens here for whitespace
8612 # The following is not yet done, but could be:
8614 if ( $type =~ /^[wit]$/ ) {
8617 # change '$ var' to '$var' etc
8618 # '-> new' to '->new'
8619 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8623 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
8626 # change 'LABEL :' to 'LABEL:'
8627 elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
8629 # patch to add space to something like "x10"
8630 # This avoids having to split this token in the pre-tokenizer
8631 elsif ( $type eq 'n' ) {
8632 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
8635 elsif ( $type eq 'Q' ) {
8636 note_embedded_tab() if ( $token =~ "\t" );
8638 # make note of something like '$var = s/xxx/yyy/;'
8639 # in case it should have been '$var =~ s/xxx/yyy/;'
8641 $token =~ /^(s|tr|y|m|\/)/
8642 && $last_nonblank_token =~ /^(=|==|!=)$/
8644 # precededed by simple scalar
8645 && $last_last_nonblank_type eq 'i'
8646 && $last_last_nonblank_token =~ /^\$/
8648 # followed by some kind of termination
8649 # (but give complaint if we can's see far enough ahead)
8650 && $next_nonblank_token =~ /^[; \)\}]$/
8652 # scalar is not decleared
8654 $types_to_go[0] eq 'k'
8655 && $tokens_to_go[0] =~ /^(my|our|local)$/
8659 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8661 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
8666 # trim blanks from right of qw quotes
8667 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8668 elsif ( $type eq 'q' ) {
8670 note_embedded_tab() if ( $token =~ "\t" );
8673 #--------------------------------------------------------
8674 # End of section to patch token text
8675 #--------------------------------------------------------
8677 # insert any needed whitespace
8678 if ( ( $type ne 'b' )
8679 && ( $max_index_to_go >= 0 )
8680 && ( $types_to_go[$max_index_to_go] ne 'b' )
8681 && $rOpts_add_whitespace )
8683 my $ws = $$rwhite_space_flag[$j];
8686 insert_new_token_to_go( ' ', 'b', $slevel,
8687 $no_internal_newlines );
8691 # Do not allow breaks which would promote a side comment to a
8692 # block comment. In order to allow a break before an opening
8693 # or closing BLOCK, followed by a side comment, those sections
8694 # of code will handle this flag separately.
8695 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
8696 my $is_opening_BLOCK =
8700 && $block_type ne 't' );
8701 my $is_closing_BLOCK =
8705 && $block_type ne 't' );
8707 if ( $side_comment_follows
8708 && !$is_opening_BLOCK
8709 && !$is_closing_BLOCK )
8711 $no_internal_newlines = 1;
8714 # We're only going to handle breaking for code BLOCKS at this
8715 # (top) level. Other indentation breaks will be handled by
8716 # sub scan_list, which is better suited to dealing with them.
8717 if ($is_opening_BLOCK) {
8719 # Tentatively output this token. This is required before
8720 # calling starting_one_line_block. We may have to unstore
8721 # it, though, if we have to break before it.
8722 store_token_to_go($side_comment_follows);
8724 # Look ahead to see if we might form a one-line block
8726 starting_one_line_block( $j, $jmax, $level, $slevel,
8727 $ci_level, $rtokens, $rtoken_type, $rblock_type );
8728 clear_breakpoint_undo_stack();
8730 # to simplify the logic below, set a flag to indicate if
8731 # this opening brace is far from the keyword which introduces it
8732 my $keyword_on_same_line = 1;
8733 if ( ( $max_index_to_go >= 0 )
8734 && ( $last_nonblank_type eq ')' ) )
8736 if ( $block_type =~ /^(if|else|elsif)$/
8737 && ( $tokens_to_go[0] eq '}' )
8738 && $rOpts_cuddled_else )
8740 $keyword_on_same_line = 1;
8742 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
8744 $keyword_on_same_line = 0;
8748 # decide if user requested break before '{'
8751 # use -bl flag if not a sub block of any type
8752 $block_type !~ /^sub/
8753 ? $rOpts->{'opening-brace-on-new-line'}
8755 # use -sbl flag unless this is an anonymous sub block
8756 : $block_type !~ /^sub\W*$/
8757 ? $rOpts->{'opening-sub-brace-on-new-line'}
8759 # do not break for anonymous subs
8762 # Break before an opening '{' ...
8768 # and we were unable to start looking for a block,
8769 && $index_start_one_line_block == UNDEFINED_INDEX
8771 # or if it will not be on same line as its keyword, so that
8772 # it will be outdented (eval.t, overload.t), and the user
8773 # has not insisted on keeping it on the right
8774 || ( !$keyword_on_same_line
8775 && !$rOpts->{'opening-brace-always-on-right'} )
8780 # but only if allowed
8781 unless ($no_internal_newlines) {
8783 # since we already stored this token, we must unstore it
8784 unstore_token_to_go();
8786 # then output the line
8787 output_line_to_go();
8789 # and now store this token at the start of a new line
8790 store_token_to_go($side_comment_follows);
8794 # Now update for side comment
8795 if ($side_comment_follows) { $no_internal_newlines = 1 }
8797 # now output this line
8798 unless ($no_internal_newlines) {
8799 output_line_to_go();
8803 elsif ($is_closing_BLOCK) {
8805 # If there is a pending one-line block ..
8806 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8808 # we have to terminate it if..
8811 # it is too long (final length may be different from
8812 # initial estimate). note: must allow 1 space for this token
8813 excess_line_length( $index_start_one_line_block,
8814 $max_index_to_go ) >= 0
8816 # or if it has too many semicolons
8817 || ( $semicolons_before_block_self_destruct == 0
8818 && $last_nonblank_type ne ';' )
8821 destroy_one_line_block();
8825 # put a break before this closing curly brace if appropriate
8826 unless ( $no_internal_newlines
8827 || $index_start_one_line_block != UNDEFINED_INDEX )
8830 # add missing semicolon if ...
8831 # there are some tokens
8833 ( $max_index_to_go > 0 )
8835 # and we don't have one
8836 && ( $last_nonblank_type ne ';' )
8838 # patch until some block type issues are fixed:
8839 # Do not add semi-colon for block types '{',
8840 # '}', and ';' because we cannot be sure yet
8841 # that this is a block and not an anonomyous
8842 # hash (blktype.t, blktype1.t)
8843 && ( $block_type !~ /^[\{\};]$/ )
8845 # it seems best not to add semicolons in these
8846 # special block types: sort|map|grep
8847 && ( !$is_sort_map_grep{$block_type} )
8849 # and we are allowed to do so.
8850 && $rOpts->{'add-semicolons'}
8854 save_current_token();
8857 $level = $levels_to_go[$max_index_to_go];
8858 $slevel = $nesting_depth_to_go[$max_index_to_go];
8860 $nesting_blocks_to_go[$max_index_to_go];
8861 $ci_level = $ci_levels_to_go[$max_index_to_go];
8863 $container_type = "";
8864 $container_environment = "";
8865 $type_sequence = "";
8867 # Note - we remove any blank AFTER extracting its
8868 # parameters such as level, etc, above
8869 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8870 unstore_token_to_go();
8872 store_token_to_go();
8874 note_added_semicolon();
8875 restore_current_token();
8878 # then write out everything before this closing curly brace
8879 output_line_to_go();
8883 # Now update for side comment
8884 if ($side_comment_follows) { $no_internal_newlines = 1 }
8886 # store the closing curly brace
8887 store_token_to_go();
8889 # ok, we just stored a closing curly brace. Often, but
8890 # not always, we want to end the line immediately.
8891 # So now we have to check for special cases.
8893 # if this '}' successfully ends a one-line block..
8894 my $is_one_line_block = 0;
8896 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8898 # Remember the type of token just before the
8899 # opening brace. It would be more general to use
8900 # a stack, but this will work for one-line blocks.
8901 $is_one_line_block =
8902 $types_to_go[$index_start_one_line_block];
8904 # we have to actually make it by removing tentative
8905 # breaks that were set within it
8906 undo_forced_breakpoint_stack(0);
8907 set_nobreaks( $index_start_one_line_block,
8908 $max_index_to_go - 1 );
8910 # then re-initialize for the next one-line block
8911 destroy_one_line_block();
8913 # then decide if we want to break after the '}' ..
8914 # We will keep going to allow certain brace followers as in:
8915 # do { $ifclosed = 1; last } unless $losing;
8917 # But make a line break if the curly ends a
8918 # significant block:
8919 ##if ( $is_until_while_for_if_elsif_else{$block_type} ) {
8921 $is_block_without_semicolon{$block_type}
8923 # if needless semicolon follows we handle it later
8924 && $next_nonblank_token ne ';'
8927 output_line_to_go() unless ($no_internal_newlines);
8931 # set string indicating what we need to look for brace follower
8933 if ( $block_type eq 'do' ) {
8934 $rbrace_follower = \%is_do_follower;
8936 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
8937 $rbrace_follower = \%is_if_brace_follower;
8939 elsif ( $block_type eq 'else' ) {
8940 $rbrace_follower = \%is_else_brace_follower;
8943 # added eval for borris.t
8944 elsif ($is_sort_map_grep_eval{$block_type}
8945 || $is_one_line_block eq 'G' )
8947 $rbrace_follower = undef;
8952 elsif ( $block_type =~ /^sub\W*$/ ) {
8954 if ($is_one_line_block) {
8955 $rbrace_follower = \%is_anon_sub_1_brace_follower;
8958 $rbrace_follower = \%is_anon_sub_brace_follower;
8962 # None of the above: specify what can follow a closing
8963 # brace of a block which is not an
8964 # if/elsif/else/do/sort/map/grep/eval
8966 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
8968 $rbrace_follower = \%is_other_brace_follower;
8971 # See if an elsif block is followed by another elsif or else;
8973 if ( $block_type eq 'elsif' ) {
8975 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
8976 $looking_for_else = 1; # ok, check on next line
8980 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
8981 write_logfile_entry("No else block :(\n");
8986 # keep going after certain block types (map,sort,grep,eval)
8987 # added eval for borris.t
8993 # if no more tokens, postpone decision until re-entring
8994 elsif ( ( $next_nonblank_token_type eq 'b' )
8995 && $rOpts_add_newlines )
8997 unless ($rbrace_follower) {
8998 output_line_to_go() unless ($no_internal_newlines);
9002 elsif ($rbrace_follower) {
9004 unless ( $rbrace_follower->{$next_nonblank_token} ) {
9005 output_line_to_go() unless ($no_internal_newlines);
9007 $rbrace_follower = undef;
9011 output_line_to_go() unless ($no_internal_newlines);
9014 } # end treatment of closing block token
9017 elsif ( $type eq ';' ) {
9019 # kill one-line blocks with too many semicolons
9020 $semicolons_before_block_self_destruct--;
9022 ( $semicolons_before_block_self_destruct < 0 )
9023 || ( $semicolons_before_block_self_destruct == 0
9024 && $next_nonblank_token_type !~ /^[b\}]$/ )
9027 destroy_one_line_block();
9030 # Remove unnecessary semicolons, but not after bare
9031 # blocks, where it could be unsafe if the brace is
9035 $last_nonblank_token eq '}'
9037 $is_block_without_semicolon{
9038 $last_nonblank_block_type}
9039 || $last_nonblank_block_type =~ /^sub\s+\w/
9040 || $last_nonblank_block_type =~ /^\w+:$/ )
9042 || $last_nonblank_type eq ';'
9047 $rOpts->{'delete-semicolons'}
9049 # don't delete ; before a # because it would promote it
9050 # to a block comment
9051 && ( $next_nonblank_token_type ne '#' )
9054 note_deleted_semicolon();
9056 unless ( $no_internal_newlines
9057 || $index_start_one_line_block != UNDEFINED_INDEX );
9061 write_logfile_entry("Extra ';'\n");
9064 store_token_to_go();
9067 unless ( $no_internal_newlines
9068 || ( $next_nonblank_token eq '}' ) );
9072 # handle here_doc target string
9073 elsif ( $type eq 'h' ) {
9074 $no_internal_newlines =
9075 1; # no newlines after seeing here-target
9076 destroy_one_line_block();
9077 store_token_to_go();
9080 # handle all other token types
9083 # if this is a blank...
9084 if ( $type eq 'b' ) {
9086 # make it just one character
9087 $token = ' ' if $rOpts_add_whitespace;
9089 # delete it if unwanted by whitespace rules
9090 # or we are deleting all whitespace
9091 my $ws = $$rwhite_space_flag[ $j + 1 ];
9092 if ( ( defined($ws) && $ws == -1 )
9093 || $rOpts_delete_old_whitespace )
9096 # unless it might make a syntax error
9098 unless is_essential_whitespace(
9099 $last_last_nonblank_token,
9100 $last_last_nonblank_type,
9101 $tokens_to_go[$max_index_to_go],
9102 $types_to_go[$max_index_to_go],
9103 $$rtokens[ $j + 1 ],
9104 $$rtoken_type[ $j + 1 ]
9108 store_token_to_go();
9111 # remember two previous nonblank OUTPUT tokens
9112 if ( $type ne '#' && $type ne 'b' ) {
9113 $last_last_nonblank_token = $last_nonblank_token;
9114 $last_last_nonblank_type = $last_nonblank_type;
9115 $last_nonblank_token = $token;
9116 $last_nonblank_type = $type;
9117 $last_nonblank_block_type = $block_type;
9120 # unset the continued-quote flag since it only applies to the
9121 # first token, and we want to resume normal formatting if
9122 # there are additional tokens on the line
9123 $in_continued_quote = 0;
9125 } # end of loop over all tokens in this 'line_of_tokens'
9127 # we have to flush ..
9130 # if there is a side comment
9131 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
9133 # if this line ends in a quote
9134 # NOTE: This is critically important for insuring that quoted lines
9135 # do not get processed by things like -sot and -sct
9138 # if this is a VERSION statement
9139 || $is_VERSION_statement
9141 # to keep a label on one line if that is how it is now
9142 || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
9144 # if we are instructed to keep all old line breaks
9145 || !$rOpts->{'delete-old-newlines'}
9148 destroy_one_line_block();
9149 output_line_to_go();
9152 # mark old line breakpoints in current output stream
9153 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
9154 $old_breakpoint_to_go[$max_index_to_go] = 1;
9157 } # end print_line_of_tokens
9159 sub note_added_semicolon {
9160 $last_added_semicolon_at = $input_line_number;
9161 if ( $added_semicolon_count == 0 ) {
9162 $first_added_semicolon_at = $last_added_semicolon_at;
9164 $added_semicolon_count++;
9165 write_logfile_entry("Added ';' here\n");
9168 sub note_deleted_semicolon {
9169 $last_deleted_semicolon_at = $input_line_number;
9170 if ( $deleted_semicolon_count == 0 ) {
9171 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
9173 $deleted_semicolon_count++;
9174 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
9177 sub note_embedded_tab {
9178 $embedded_tab_count++;
9179 $last_embedded_tab_at = $input_line_number;
9180 if ( !$first_embedded_tab_at ) {
9181 $first_embedded_tab_at = $last_embedded_tab_at;
9184 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
9185 write_logfile_entry("Embedded tabs in quote or pattern\n");
9189 sub starting_one_line_block {
9191 # after seeing an opening curly brace, look for the closing brace
9192 # and see if the entire block will fit on a line. This routine is
9193 # not always right because it uses the old whitespace, so a check
9194 # is made later (at the closing brace) to make sure we really
9195 # have a one-line block. We have to do this preliminary check,
9196 # though, because otherwise we would always break at a semicolon
9197 # within a one-line block if the block contains multiple statements.
9199 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
9203 # kill any current block - we can only go 1 deep
9204 destroy_one_line_block();
9207 # 1=distance from start of block to opening brace exceeds line length
9212 # shouldn't happen: there must have been a prior call to
9213 # store_token_to_go to put the opening brace in the output stream
9214 if ( $max_index_to_go < 0 ) {
9215 warning("program bug: store_token_to_go called incorrectly\n");
9216 report_definite_bug();
9220 # cannot use one-line blocks with cuddled else else/elsif lines
9221 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
9226 my $block_type = $$rblock_type[$j];
9228 # find the starting keyword for this block (such as 'if', 'else', ...)
9230 if ( $block_type =~ /^[\{\}\;\:]$/ ) {
9231 $i_start = $max_index_to_go;
9234 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
9236 # For something like "if (xxx) {", the keyword "if" will be
9237 # just after the most recent break. This will be 0 unless
9238 # we have just killed a one-line block and are starting another.
9240 $i_start = $index_max_forced_break + 1;
9241 if ( $types_to_go[$i_start] eq 'b' ) {
9245 unless ( $tokens_to_go[$i_start] eq $block_type ) {
9250 # the previous nonblank token should start these block types
9252 ( $last_last_nonblank_token_to_go eq $block_type )
9253 || ( $block_type =~ /^sub/
9254 && $last_last_nonblank_token_to_go =~ /^sub/ )
9257 $i_start = $last_last_nonblank_index_to_go;
9260 # patch for SWITCH/CASE to retain one-line case/when blocks
9261 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
9262 $i_start = $index_max_forced_break + 1;
9263 if ( $types_to_go[$i_start] eq 'b' ) {
9266 unless ( $tokens_to_go[$i_start] eq $block_type ) {
9275 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
9279 # see if length is too long to even start
9280 if ( $pos > $rOpts_maximum_line_length ) {
9284 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
9286 # old whitespace could be arbitrarily large, so don't use it
9287 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
9288 else { $pos += length( $$rtokens[$i] ) }
9290 # Return false result if we exceed the maximum line length,
9291 if ( $pos > $rOpts_maximum_line_length ) {
9295 # or encounter another opening brace before finding the closing brace.
9296 elsif ($$rtokens[$i] eq '{'
9297 && $$rtoken_type[$i] eq '{'
9298 && $$rblock_type[$i] )
9303 # if we find our closing brace..
9304 elsif ($$rtokens[$i] eq '}'
9305 && $$rtoken_type[$i] eq '}'
9306 && $$rblock_type[$i] )
9309 # be sure any trailing comment also fits on the line
9311 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
9313 if ( $$rtoken_type[$i_nonblank] eq '#' ) {
9314 $pos += length( $$rtokens[$i_nonblank] );
9316 if ( $i_nonblank > $i + 1 ) {
9317 $pos += length( $$rtokens[ $i + 1 ] );
9320 if ( $pos > $rOpts_maximum_line_length ) {
9325 # ok, it's a one-line block
9326 create_one_line_block( $i_start, 20 );
9330 # just keep going for other characters
9335 # Allow certain types of new one-line blocks to form by joining
9336 # input lines. These can be safely done, but for other block types,
9337 # we keep old one-line blocks but do not form new ones. It is not
9338 # always a good idea to make as many one-line blocks as possible,
9339 # so other types are not done. The user can always use -mangle.
9340 if ( $is_sort_map_grep_eval{$block_type} ) {
9341 create_one_line_block( $i_start, 1 );
9347 sub unstore_token_to_go {
9349 # remove most recent token from output stream
9350 if ( $max_index_to_go > 0 ) {
9354 $max_index_to_go = UNDEFINED_INDEX;
9359 sub want_blank_line {
9361 $file_writer_object->want_blank_line();
9364 sub write_unindented_line {
9366 $file_writer_object->write_line( $_[0] );
9371 # If there is a single, long parameter within parens, like this:
9373 # $self->command( "/msg "
9375 # . " You said $1, but did you know that it's square was "
9376 # . $1 * $1 . " ?" );
9378 # we can remove the continuation indentation of the 2nd and higher lines
9379 # to achieve this effect, which is more pleasing:
9381 # $self->command("/msg "
9383 # . " You said $1, but did you know that it's square was "
9384 # . $1 * $1 . " ?");
9386 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
9387 my $max_line = @$ri_first - 1;
9389 # must be multiple lines
9390 return unless $max_line > $line_open;
9392 my $lev_start = $levels_to_go[$i_start];
9393 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
9395 # see if all additional lines in this container have continuation
9398 my $line_1 = 1 + $line_open;
9399 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
9400 my $ibeg = $$ri_first[$n];
9401 my $iend = $$ri_last[$n];
9402 if ( $ibeg eq $closing_index ) { $n--; last }
9403 return if ( $lev_start != $levels_to_go[$ibeg] );
9404 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
9405 last if ( $closing_index <= $iend );
9408 # we can reduce the indentation of all continuation lines
9409 my $continuation_line_count = $n - $line_open;
9410 @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9411 (0) x ($continuation_line_count);
9412 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9413 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
9416 sub set_logical_padding {
9418 # Look at a batch of lines and see if extra padding can improve the
9419 # alignment when there are certain leading operators. Here is an
9420 # example, in which some extra space is introduced before
9421 # '( $year' to make it line up with the subsequent lines:
9423 # if ( ( $Year < 1601 )
9424 # || ( $Year > 2899 )
9425 # || ( $EndYear < 1601 )
9426 # || ( $EndYear > 2899 ) )
9428 # &Error_OutOfRange;
9431 my ( $ri_first, $ri_last ) = @_;
9432 my $max_line = @$ri_first - 1;
9434 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
9435 $tok_next, $has_leading_op_next, $has_leading_op );
9437 # looking at each line of this batch..
9438 foreach $line ( 0 .. $max_line - 1 ) {
9440 # see if the next line begins with a logical operator
9441 $ibeg = $$ri_first[$line];
9442 $iend = $$ri_last[$line];
9443 $ibeg_next = $$ri_first[ $line + 1 ];
9444 $tok_next = $tokens_to_go[$ibeg_next];
9445 $has_leading_op_next = $is_chain_operator{$tok_next};
9446 next unless ($has_leading_op_next);
9448 # next line must not be at lesser depth
9450 if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
9452 # identify the token in this line to be padded on the left
9455 # handle lines at same depth...
9456 if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
9458 # if this is not first line of the batch ...
9461 # and we have leading operator
9462 next if $has_leading_op;
9465 # 1. the previous line is at lesser depth, or
9466 # 2. the previous line ends in an assignment
9468 # Example 1: previous line at lesser depth
9469 # if ( ( $Year < 1601 ) # <- we are here but
9470 # || ( $Year > 2899 ) # list has not yet
9471 # || ( $EndYear < 1601 ) # collapsed vertically
9472 # || ( $EndYear > 2899 ) )
9475 # Example 2: previous line ending in assignment:
9477 # $year % 4 ? 0 # <- We are here
9483 $is_assignment{ $types_to_go[$iendm] }
9484 || ( $nesting_depth_to_go[$ibegm] <
9485 $nesting_depth_to_go[$ibeg] )
9488 # we will add padding before the first token
9492 # for first line of the batch..
9495 # WARNING: Never indent if first line is starting in a
9496 # continued quote, which would change the quote.
9497 next if $starting_in_quote;
9499 # if this is text after closing '}'
9500 # then look for an interior token to pad
9501 if ( $types_to_go[$ibeg] eq '}' ) {
9505 # otherwise, we might pad if it looks really good
9508 # we might pad token $ibeg, so be sure that it
9509 # is at the same depth as the next line.
9511 if ( $nesting_depth_to_go[$ibeg] !=
9512 $nesting_depth_to_go[$ibeg_next] );
9514 # We can pad on line 1 of a statement if at least 3
9515 # lines will be aligned. Otherwise, it
9516 # can look very confusing.
9517 if ( $max_line > 2 ) {
9518 my $leading_token = $tokens_to_go[$ibeg_next];
9520 # never indent line 1 of a '.' series because
9521 # previous line is most likely at same level.
9522 # TODO: we should also look at the leasing_spaces
9523 # of the last output line and skip if it is same
9525 next if ( $leading_token eq '.' );
9528 foreach my $l ( 2 .. 3 ) {
9529 my $ibeg_next_next = $$ri_first[ $line + $l ];
9531 unless $tokens_to_go[$ibeg_next_next] eq
9535 next unless $count == 3;
9545 # find interior token to pad if necessary
9546 if ( !defined($ipad) ) {
9548 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
9550 # find any unclosed container
9552 unless ( $type_sequence_to_go[$i]
9553 && $mate_index_to_go[$i] > $iend );
9555 # find next nonblank token to pad
9557 if ( $types_to_go[$ipad] eq 'b' ) {
9559 last if ( $ipad > $iend );
9565 # next line must not be at greater depth
9566 my $iend_next = $$ri_last[ $line + 1 ];
9568 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
9569 $nesting_depth_to_go[$ipad] );
9571 # lines must be somewhat similar to be padded..
9572 my $inext_next = $ibeg_next + 1;
9573 if ( $types_to_go[$inext_next] eq 'b' ) {
9576 my $type = $types_to_go[$ipad];
9578 # see if there are multiple continuation lines
9579 my $logical_continuation_lines = 1;
9580 if ( $line + 2 <= $max_line ) {
9581 my $leading_token = $tokens_to_go[$ibeg_next];
9582 my $ibeg_next_next = $$ri_first[ $line + 2 ];
9583 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
9584 && $nesting_depth_to_go[$ibeg_next] eq
9585 $nesting_depth_to_go[$ibeg_next_next] )
9587 $logical_continuation_lines++;
9592 # either we have multiple continuation lines to follow
9593 # and we are not padding the first token
9594 ( $logical_continuation_lines > 1 && $ipad > 0 )
9600 $types_to_go[$inext_next] eq $type
9602 # and keywords must match if keyword
9605 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
9611 #----------------------begin special check---------------
9613 # One more check is needed before we can make the pad.
9614 # If we are in a list with some long items, we want each
9615 # item to stand out. So in the following example, the
9616 # first line begining with '$casefold->' would look good
9617 # padded to align with the next line, but then it
9618 # would be indented more than the last line, so we
9622 # $casefold->{code} eq '0041'
9623 # && $casefold->{status} eq 'C'
9624 # && $casefold->{mapping} eq '0061',
9629 # It would be faster, and almost as good, to use a comma
9630 # count, and not pad if comma_count > 1 and the previous
9631 # line did not end with a comma.
9635 my $ibg = $$ri_first[ $line + 1 ];
9636 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
9638 # just use simplified formula for leading spaces to avoid
9639 # needless sub calls
9640 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
9642 # look at each line beyond the next ..
9644 foreach $l ( $line + 2 .. $max_line ) {
9645 my $ibg = $$ri_first[$l];
9647 # quit looking at the end of this container
9649 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
9650 || ( $nesting_depth_to_go[$ibg] < $depth );
9652 # cannot do the pad if a later line would be
9654 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
9660 # don't pad if we end in a broken list
9661 if ( $l == $max_line ) {
9662 my $i2 = $$ri_last[$l];
9663 if ( $types_to_go[$i2] eq '#' ) {
9664 my $i1 = $$ri_first[$l];
9667 terminal_type( \@types_to_go, \@block_type_to_go, $i1,
9672 next unless $ok_to_pad;
9674 #----------------------end special check---------------
9676 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
9677 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
9678 $pad_spaces = $length_2 - $length_1;
9680 # make sure this won't change if -lp is used
9681 my $indentation_1 = $leading_spaces_to_go[$ibeg];
9682 if ( ref($indentation_1) ) {
9683 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
9684 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
9685 unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
9691 # we might be able to handle a pad of -1 by removing a blank
9693 if ( $pad_spaces < 0 ) {
9694 if ( $pad_spaces == -1 ) {
9695 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
9696 $tokens_to_go[ $ipad - 1 ] = '';
9702 # now apply any padding for alignment
9703 if ( $ipad >= 0 && $pad_spaces ) {
9704 my $length_t = total_line_length( $ibeg, $iend );
9705 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
9706 $tokens_to_go[$ipad] =
9707 ' ' x $pad_spaces . $tokens_to_go[$ipad];
9715 $has_leading_op = $has_leading_op_next;
9716 } # end of loop over lines
9720 sub correct_lp_indentation {
9722 # When the -lp option is used, we need to make a last pass through
9723 # each line to correct the indentation positions in case they differ
9724 # from the predictions. This is necessary because perltidy uses a
9725 # predictor/corrector method for aligning with opening parens. The
9726 # predictor is usually good, but sometimes stumbles. The corrector
9727 # tries to patch things up once the actual opening paren locations
9729 my ( $ri_first, $ri_last ) = @_;
9732 # Note on flag '$do_not_pad':
9733 # We want to avoid a situation like this, where the aligner inserts
9734 # whitespace before the '=' to align it with a previous '=', because
9735 # otherwise the parens might become mis-aligned in a situation like
9736 # this, where the '=' has become aligned with the previous line,
9737 # pushing the opening '(' forward beyond where we want it.
9739 # $mkFloor::currentRoom = '';
9740 # $mkFloor::c_entry = $c->Entry(
9742 # -relief => 'sunken',
9746 # We leave it to the aligner to decide how to do this.
9748 # first remove continuation indentation if appropriate
9749 my $max_line = @$ri_first - 1;
9751 # looking at each line of this batch..
9752 my ( $ibeg, $iend );
9754 foreach $line ( 0 .. $max_line ) {
9755 $ibeg = $$ri_first[$line];
9756 $iend = $$ri_last[$line];
9758 # looking at each token in this output line..
9760 foreach $i ( $ibeg .. $iend ) {
9762 # How many space characters to place before this token
9763 # for special alignment. Actual padding is done in the
9766 # looking for next unvisited indentation item
9767 my $indentation = $leading_spaces_to_go[$i];
9768 if ( !$indentation->get_MARKED() ) {
9769 $indentation->set_MARKED(1);
9771 # looking for indentation item for which we are aligning
9772 # with parens, braces, and brackets
9773 next unless ( $indentation->get_ALIGN_PAREN() );
9775 # skip closed container on this line
9778 if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
9779 if ( $type_sequence_to_go[$im]
9780 && $mate_index_to_go[$im] <= $iend )
9786 if ( $line == 1 && $i == $ibeg ) {
9790 # Ok, let's see what the error is and try to fix it
9792 my $predicted_pos = $indentation->get_SPACES();
9795 # token is mid-line - use length to previous token
9796 $actual_pos = total_line_length( $ibeg, $i - 1 );
9798 # for mid-line token, we must check to see if all
9799 # additional lines have continuation indentation,
9800 # and remove it if so. Otherwise, we do not get
9802 my $closing_index = $indentation->get_CLOSED();
9803 if ( $closing_index > $iend ) {
9804 my $ibeg_next = $$ri_first[ $line + 1 ];
9805 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
9806 undo_lp_ci( $line, $i, $closing_index, $ri_first,
9811 elsif ( $line > 0 ) {
9813 # handle case where token starts a new line;
9814 # use length of previous line
9815 my $ibegm = $$ri_first[ $line - 1 ];
9816 my $iendm = $$ri_last[ $line - 1 ];
9817 $actual_pos = total_line_length( $ibegm, $iendm );
9821 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
9825 # token is first character of first line of batch
9826 $actual_pos = $predicted_pos;
9829 my $move_right = $actual_pos - $predicted_pos;
9831 # done if no error to correct (gnu2.t)
9832 if ( $move_right == 0 ) {
9833 $indentation->set_RECOVERABLE_SPACES($move_right);
9837 # if we have not seen closure for this indentation in
9838 # this batch, we can only pass on a request to the
9840 my $closing_index = $indentation->get_CLOSED();
9842 if ( $closing_index < 0 ) {
9843 $indentation->set_RECOVERABLE_SPACES($move_right);
9847 # If necessary, look ahead to see if there is really any
9848 # leading whitespace dependent on this whitespace, and
9849 # also find the longest line using this whitespace.
9850 # Since it is always safe to move left if there are no
9851 # dependents, we only need to do this if we may have
9852 # dependent nodes or need to move right.
9854 my $right_margin = 0;
9855 my $have_child = $indentation->get_HAVE_CHILD();
9857 my %saw_indentation;
9859 $saw_indentation{$indentation} = $indentation;
9861 if ( $have_child || $move_right > 0 ) {
9864 if ( $i == $ibeg ) {
9865 $max_length = total_line_length( $ibeg, $iend );
9868 # look ahead at the rest of the lines of this batch..
9870 foreach $line_t ( $line + 1 .. $max_line ) {
9871 my $ibeg_t = $$ri_first[$line_t];
9872 my $iend_t = $$ri_last[$line_t];
9873 last if ( $closing_index <= $ibeg_t );
9875 # remember all different indentation objects
9876 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
9877 $saw_indentation{$indentation_t} = $indentation_t;
9880 # remember longest line in the group
9881 my $length_t = total_line_length( $ibeg_t, $iend_t );
9882 if ( $length_t > $max_length ) {
9883 $max_length = $length_t;
9886 $right_margin = $rOpts_maximum_line_length - $max_length;
9887 if ( $right_margin < 0 ) { $right_margin = 0 }
9890 my $first_line_comma_count =
9891 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
9892 my $comma_count = $indentation->get_COMMA_COUNT();
9893 my $arrow_count = $indentation->get_ARROW_COUNT();
9895 # This is a simple approximate test for vertical alignment:
9896 # if we broke just after an opening paren, brace, bracket,
9897 # and there are 2 or more commas in the first line,
9898 # and there are no '=>'s,
9899 # then we are probably vertically aligned. We could set
9900 # an exact flag in sub scan_list, but this is good
9902 my $indentation_count = keys %saw_indentation;
9903 my $is_vertically_aligned =
9905 && $first_line_comma_count > 1
9906 && $indentation_count == 1
9907 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
9909 # Make the move if possible ..
9912 # we can always move left
9915 # but we should only move right if we are sure it will
9916 # not spoil vertical alignment
9917 || ( $comma_count == 0 )
9918 || ( $comma_count > 0 && !$is_vertically_aligned )
9922 ( $move_right <= $right_margin )
9926 foreach ( keys %saw_indentation ) {
9927 $saw_indentation{$_}
9928 ->permanently_decrease_AVAILABLE_SPACES( -$move );
9932 # Otherwise, record what we want and the vertical aligner
9933 # will try to recover it.
9935 $indentation->set_RECOVERABLE_SPACES($move_right);
9943 # flush is called to output any tokens in the pipeline, so that
9944 # an alternate source of lines can be written in the correct order
9947 destroy_one_line_block();
9948 output_line_to_go();
9949 Perl::Tidy::VerticalAligner::flush();
9952 # sub output_line_to_go sends one logical line of tokens on down the
9953 # pipeline to the VerticalAligner package, breaking the line into continuation
9954 # lines as necessary. The line of tokens is ready to go in the "to_go"
9956 sub output_line_to_go {
9958 # debug stuff; this routine can be called from many points
9959 FORMATTER_DEBUG_FLAG_OUTPUT && do {
9960 my ( $a, $b, $c ) = caller;
9962 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9964 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9965 write_diagnostics("$output_str\n");
9968 # just set a tentative breakpoint if we might be in a one-line block
9969 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9970 set_forced_breakpoint($max_index_to_go);
9974 my $cscw_block_comment;
9975 $cscw_block_comment = add_closing_side_comment()
9976 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9978 match_opening_and_closing_tokens();
9980 # tell the -lp option we are outputting a batch so it can close
9981 # any unfinished items in its stack
9984 # If this line ends in a code block brace, set breaks at any
9985 # previous closing code block braces to breakup a chain of code
9986 # blocks on one line. This is very rare but can happen for
9987 # user-defined subs. For example we might be looking at this:
9988 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
9989 my $saw_good_break = 0; # flag to force breaks even if short line
9992 # looking for opening or closing block brace
9993 $block_type_to_go[$max_index_to_go]
9995 # but not one of these which are never duplicated on a line:
9996 ##&& !$is_until_while_for_if_elsif_else{ $block_type_to_go
9997 ## [$max_index_to_go] }
9998 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
10001 my $lev = $nesting_depth_to_go[$max_index_to_go];
10003 # Walk backwards from the end and
10004 # set break at any closing block braces at the same level.
10005 # But quit if we are not in a chain of blocks.
10006 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
10007 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
10008 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
10010 if ( $block_type_to_go[$i] ) {
10011 if ( $tokens_to_go[$i] eq '}' ) {
10012 set_forced_breakpoint($i);
10013 $saw_good_break = 1;
10017 # quit if we see anything besides words, function, blanks
10019 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
10024 my $imax = $max_index_to_go;
10026 # trim any blank tokens
10027 if ( $max_index_to_go >= 0 ) {
10028 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
10029 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
10032 # anything left to write?
10033 if ( $imin <= $imax ) {
10035 # add a blank line before certain key types
10036 if ( $last_line_leading_type !~ /^[#b]/ ) {
10037 my $want_blank = 0;
10038 my $leading_token = $tokens_to_go[$imin];
10039 my $leading_type = $types_to_go[$imin];
10041 # blank lines before subs except declarations and one-liners
10042 # MCONVERSION LOCATION - for sub tokenization change
10043 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
10044 $want_blank = ( $rOpts->{'blanks-before-subs'} )
10046 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10047 $imax ) !~ /^[\;\}]$/
10051 # break before all package declarations
10052 # MCONVERSION LOCATION - for tokenizaton change
10053 elsif ($leading_token =~ /^(package\s)/
10054 && $leading_type eq 'i' )
10056 $want_blank = ( $rOpts->{'blanks-before-subs'} );
10059 # break before certain key blocks except one-liners
10060 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
10061 $want_blank = ( $rOpts->{'blanks-before-subs'} )
10063 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10068 # Break before certain block types if we haven't had a
10069 # break at this level for a while. This is the
10070 # difficult decision..
10071 elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
10072 && $leading_type eq 'k' )
10074 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
10075 if ( !defined($lc) ) { $lc = 0 }
10077 $want_blank = $rOpts->{'blanks-before-blocks'}
10078 && $lc >= $rOpts->{'long-block-line-count'}
10079 && $file_writer_object->get_consecutive_nonblank_lines() >=
10080 $rOpts->{'long-block-line-count'}
10082 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10089 # future: send blank line down normal path to VerticalAligner
10090 Perl::Tidy::VerticalAligner::flush();
10091 $file_writer_object->write_blank_code_line();
10095 # update blank line variables and count number of consecutive
10096 # non-blank, non-comment lines at this level
10097 $last_last_line_leading_level = $last_line_leading_level;
10098 $last_line_leading_level = $levels_to_go[$imin];
10099 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
10100 $last_line_leading_type = $types_to_go[$imin];
10101 if ( $last_line_leading_level == $last_last_line_leading_level
10102 && $last_line_leading_type ne 'b'
10103 && $last_line_leading_type ne '#'
10104 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
10106 $nonblank_lines_at_depth[$last_line_leading_level]++;
10109 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
10112 FORMATTER_DEBUG_FLAG_FLUSH && do {
10113 my ( $package, $file, $line ) = caller;
10115 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
10118 # add a couple of extra terminal blank tokens
10121 # set all forced breakpoints for good list formatting
10122 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
10125 $max_index_to_go > 0
10128 || $old_line_count_in_batch > 1
10129 || is_unbalanced_batch()
10131 $comma_count_in_batch
10132 && ( $rOpts_maximum_fields_per_table > 0
10133 || $rOpts_comma_arrow_breakpoints == 0 )
10138 $saw_good_break ||= scan_list();
10141 # let $ri_first and $ri_last be references to lists of
10142 # first and last tokens of line fragments to output..
10143 my ( $ri_first, $ri_last );
10145 # write a single line if..
10148 # we aren't allowed to add any newlines
10149 !$rOpts_add_newlines
10151 # or, we don't already have an interior breakpoint
10152 # and we didn't see a good breakpoint
10154 !$forced_breakpoint_count
10155 && !$saw_good_break
10157 # and this line is 'short'
10162 @$ri_first = ($imin);
10163 @$ri_last = ($imax);
10166 # otherwise use multiple lines
10169 ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break);
10171 # now we do a correction step to clean this up a bit
10172 # (The only time we would not do this is for debugging)
10173 if ( $rOpts->{'recombine'} ) {
10174 ( $ri_first, $ri_last ) =
10175 recombine_breakpoints( $ri_first, $ri_last );
10179 # do corrector step if -lp option is used
10180 my $do_not_pad = 0;
10181 if ($rOpts_line_up_parentheses) {
10182 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
10184 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
10186 prepare_for_new_input_lines();
10188 # output any new -cscw block comment
10189 if ($cscw_block_comment) {
10191 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
10195 sub reset_block_text_accumulator {
10197 # save text after 'if' and 'elsif' to append after 'else'
10198 if ($accumulating_text_for_block) {
10200 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
10201 push @{$rleading_block_if_elsif_text}, $leading_block_text;
10204 $accumulating_text_for_block = "";
10205 $leading_block_text = "";
10206 $leading_block_text_level = 0;
10207 $leading_block_text_length_exceeded = 0;
10208 $leading_block_text_line_number = 0;
10209 $leading_block_text_line_length = 0;
10212 sub set_block_text_accumulator {
10214 $accumulating_text_for_block = $tokens_to_go[$i];
10215 if ( $accumulating_text_for_block !~ /^els/ ) {
10216 $rleading_block_if_elsif_text = [];
10218 $leading_block_text = "";
10219 $leading_block_text_level = $levels_to_go[$i];
10220 $leading_block_text_line_number =
10221 $vertical_aligner_object->get_output_line_number();
10222 $leading_block_text_length_exceeded = 0;
10224 # this will contain the column number of the last character
10225 # of the closing side comment
10226 $leading_block_text_line_length =
10227 length($accumulating_text_for_block) +
10228 length( $rOpts->{'closing-side-comment-prefix'} ) +
10229 $leading_block_text_level * $rOpts_indent_columns + 3;
10232 sub accumulate_block_text {
10235 # accumulate leading text for -csc, ignoring any side comments
10236 if ( $accumulating_text_for_block
10237 && !$leading_block_text_length_exceeded
10238 && $types_to_go[$i] ne '#' )
10241 my $added_length = length( $tokens_to_go[$i] );
10242 $added_length += 1 if $i == 0;
10243 my $new_line_length = $leading_block_text_line_length + $added_length;
10245 # we can add this text if we don't exceed some limits..
10248 # we must not have already exceeded the text length limit
10249 length($leading_block_text) <
10250 $rOpts_closing_side_comment_maximum_text
10253 # the new total line length must be below the line length limit
10254 # or the new length must be below the text length limit
10255 # (ie, we may allow one token to exceed the text length limit)
10256 && ( $new_line_length < $rOpts_maximum_line_length
10257 || length($leading_block_text) + $added_length <
10258 $rOpts_closing_side_comment_maximum_text )
10260 # UNLESS: we are adding a closing paren before the brace we seek.
10261 # This is an attempt to avoid situations where the ... to be
10262 # added are longer than the omitted right paren, as in:
10264 # foreach my $item (@a_rather_long_variable_name_here) {
10266 # } ## end foreach my $item (@a_rather_long_variable_name_here...
10269 $tokens_to_go[$i] eq ')'
10272 $i + 1 <= $max_index_to_go
10273 && $block_type_to_go[ $i + 1 ] eq
10274 $accumulating_text_for_block
10276 || ( $i + 2 <= $max_index_to_go
10277 && $block_type_to_go[ $i + 2 ] eq
10278 $accumulating_text_for_block )
10284 # add an extra space at each newline
10285 if ( $i == 0 ) { $leading_block_text .= ' ' }
10287 # add the token text
10288 $leading_block_text .= $tokens_to_go[$i];
10289 $leading_block_text_line_length = $new_line_length;
10292 # show that text was truncated if necessary
10293 elsif ( $types_to_go[$i] ne 'b' ) {
10294 $leading_block_text_length_exceeded = 1;
10295 $leading_block_text .= '...';
10301 my %is_if_elsif_else_unless_while_until_for_foreach;
10305 # These block types may have text between the keyword and opening
10306 # curly. Note: 'else' does not, but must be included to allow trailing
10307 # if/elsif text to be appended.
10308 # patch for SWITCH/CASE: added 'case' and 'when'
10309 @_ = qw(if elsif else unless while until for foreach case when);
10310 @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
10313 sub accumulate_csc_text {
10315 # called once per output buffer when -csc is used. Accumulates
10316 # the text placed after certain closing block braces.
10317 # Defines and returns the following for this buffer:
10319 my $block_leading_text = ""; # the leading text of the last '}'
10320 my $rblock_leading_if_elsif_text;
10321 my $i_block_leading_text =
10322 -1; # index of token owning block_leading_text
10323 my $block_line_count = 100; # how many lines the block spans
10324 my $terminal_type = 'b'; # type of last nonblank token
10325 my $i_terminal = 0; # index of last nonblank token
10326 my $terminal_block_type = "";
10328 for my $i ( 0 .. $max_index_to_go ) {
10329 my $type = $types_to_go[$i];
10330 my $block_type = $block_type_to_go[$i];
10331 my $token = $tokens_to_go[$i];
10333 # remember last nonblank token type
10334 if ( $type ne '#' && $type ne 'b' ) {
10335 $terminal_type = $type;
10336 $terminal_block_type = $block_type;
10340 my $type_sequence = $type_sequence_to_go[$i];
10341 if ( $block_type && $type_sequence ) {
10343 if ( $token eq '}' ) {
10345 # restore any leading text saved when we entered this block
10346 if ( defined( $block_leading_text{$type_sequence} ) ) {
10347 ( $block_leading_text, $rblock_leading_if_elsif_text ) =
10348 @{ $block_leading_text{$type_sequence} };
10349 $i_block_leading_text = $i;
10350 delete $block_leading_text{$type_sequence};
10351 $rleading_block_if_elsif_text =
10352 $rblock_leading_if_elsif_text;
10355 # if we run into a '}' then we probably started accumulating
10356 # at something like a trailing 'if' clause..no harm done.
10357 if ( $accumulating_text_for_block
10358 && $levels_to_go[$i] <= $leading_block_text_level )
10360 my $lev = $levels_to_go[$i];
10361 reset_block_text_accumulator();
10364 if ( defined( $block_opening_line_number{$type_sequence} ) )
10366 my $output_line_number =
10367 $vertical_aligner_object->get_output_line_number();
10368 $block_line_count = $output_line_number -
10369 $block_opening_line_number{$type_sequence} + 1;
10370 delete $block_opening_line_number{$type_sequence};
10374 # Error: block opening line undefined for this line..
10375 # This shouldn't be possible, but it is not a
10376 # significant problem.
10380 elsif ( $token eq '{' ) {
10383 $vertical_aligner_object->get_output_line_number();
10384 $block_opening_line_number{$type_sequence} = $line_number;
10386 if ( $accumulating_text_for_block
10387 && $levels_to_go[$i] == $leading_block_text_level )
10390 if ( $accumulating_text_for_block eq $block_type ) {
10392 # save any leading text before we enter this block
10393 $block_leading_text{$type_sequence} = [
10394 $leading_block_text,
10395 $rleading_block_if_elsif_text
10397 $block_opening_line_number{$type_sequence} =
10398 $leading_block_text_line_number;
10399 reset_block_text_accumulator();
10403 # shouldn't happen, but not a serious error.
10404 # We were accumulating -csc text for block type
10405 # $accumulating_text_for_block and unexpectedly
10406 # encountered a '{' for block type $block_type.
10413 && $csc_new_statement_ok
10414 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
10415 && $token =~ /$closing_side_comment_list_pattern/o )
10417 set_block_text_accumulator($i);
10421 # note: ignoring type 'q' because of tricks being played
10422 # with 'q' for hanging side comments
10423 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
10424 $csc_new_statement_ok =
10425 ( $block_type || $type eq 'J' || $type eq ';' );
10428 && $accumulating_text_for_block
10429 && $levels_to_go[$i] == $leading_block_text_level )
10431 reset_block_text_accumulator();
10434 accumulate_block_text($i);
10439 # Treat an 'else' block specially by adding preceding 'if' and
10440 # 'elsif' text. Otherwise, the 'end else' is not helpful,
10441 # especially for cuddled-else formatting.
10442 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
10443 $block_leading_text =
10444 make_else_csc_text( $i_terminal, $terminal_block_type,
10445 $block_leading_text, $rblock_leading_if_elsif_text );
10448 return ( $terminal_type, $i_terminal, $i_block_leading_text,
10449 $block_leading_text, $block_line_count );
10453 sub make_else_csc_text {
10455 # create additional -csc text for an 'else' and optionally 'elsif',
10456 # depending on the value of switch
10457 # $rOpts_closing_side_comment_else_flag:
10459 # = 0 add 'if' text to trailing else
10460 # = 1 same as 0 plus:
10461 # add 'if' to 'elsif's if can fit in line length
10462 # add last 'elsif' to trailing else if can fit in one line
10463 # = 2 same as 1 but do not check if exceed line length
10465 # $rif_elsif_text = a reference to a list of all previous closing
10466 # side comments created for this if block
10468 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
10469 my $csc_text = $block_leading_text;
10471 if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
10476 my $count = @{$rif_elsif_text};
10477 return $csc_text unless ($count);
10479 my $if_text = '[ if' . $rif_elsif_text->[0];
10481 # always show the leading 'if' text on 'else'
10482 if ( $block_type eq 'else' ) {
10483 $csc_text .= $if_text;
10486 # see if that's all
10487 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
10491 my $last_elsif_text = "";
10492 if ( $count > 1 ) {
10493 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
10494 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
10497 # tentatively append one more item
10498 my $saved_text = $csc_text;
10499 if ( $block_type eq 'else' ) {
10500 $csc_text .= $last_elsif_text;
10503 $csc_text .= ' ' . $if_text;
10506 # all done if no length checks requested
10507 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
10511 # undo it if line length exceeded
10513 length($csc_text) + length($block_type) +
10514 length( $rOpts->{'closing-side-comment-prefix'} ) +
10515 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
10516 if ( $length > $rOpts_maximum_line_length ) {
10517 $csc_text = $saved_text;
10522 sub add_closing_side_comment {
10524 # add closing side comments after closing block braces if -csc used
10525 my $cscw_block_comment;
10527 #---------------------------------------------------------------
10528 # Step 1: loop through all tokens of this line to accumulate
10529 # the text needed to create the closing side comments. Also see
10530 # how the line ends.
10531 #---------------------------------------------------------------
10533 my ( $terminal_type, $i_terminal, $i_block_leading_text,
10534 $block_leading_text, $block_line_count )
10535 = accumulate_csc_text();
10537 #---------------------------------------------------------------
10538 # Step 2: make the closing side comment if this ends a block
10539 #---------------------------------------------------------------
10540 my $have_side_comment = $i_terminal != $max_index_to_go;
10542 # if this line might end in a block closure..
10544 $terminal_type eq '}'
10549 # the block is long enough
10550 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
10552 # or there is an existing comment to check
10553 || ( $have_side_comment
10554 && $rOpts->{'closing-side-comment-warnings'} )
10557 # .. and if this is one of the types of interest
10558 && $block_type_to_go[$i_terminal] =~
10559 /$closing_side_comment_list_pattern/o
10561 # .. but not an anonymous sub
10562 # These are not normally of interest, and their closing braces are
10563 # often followed by commas or semicolons anyway. This also avoids
10564 # possible erratic output due to line numbering inconsistencies
10565 # in the cases where their closing braces terminate a line.
10566 && $block_type_to_go[$i_terminal] ne 'sub'
10568 # ..and the corresponding opening brace must is not in this batch
10569 # (because we do not need to tag one-line blocks, although this
10570 # should also be caught with a positive -csci value)
10571 && $mate_index_to_go[$i_terminal] < 0
10576 # this is the last token (line doesnt have a side comment)
10577 !$have_side_comment
10579 # or the old side comment is a closing side comment
10580 || $tokens_to_go[$max_index_to_go] =~
10581 /$closing_side_comment_prefix_pattern/o
10586 # then make the closing side comment text
10588 "$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
10590 # append any extra descriptive text collected above
10591 if ( $i_block_leading_text == $i_terminal ) {
10592 $token .= $block_leading_text;
10594 $token =~ s/\s*$//; # trim any trailing whitespace
10596 # handle case of existing closing side comment
10597 if ($have_side_comment) {
10599 # warn if requested and tokens differ significantly
10600 if ( $rOpts->{'closing-side-comment-warnings'} ) {
10601 my $old_csc = $tokens_to_go[$max_index_to_go];
10602 my $new_csc = $token;
10603 $new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
10604 my $new_trailing_dots = $1;
10605 $old_csc =~ s/\.\.\.\s*$//;
10606 $new_csc =~ s/\s+//g; # trim all whitespace
10607 $old_csc =~ s/\s+//g;
10609 # Patch to handle multiple closing side comments at
10610 # else and elsif's. These have become too complicated
10611 # to check, so if we see an indication of
10612 # '[ if' or '[ # elsif', then assume they were made
10614 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
10615 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
10617 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
10618 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
10621 # if old comment is contained in new comment,
10622 # only compare the common part.
10623 if ( length($new_csc) > length($old_csc) ) {
10624 $new_csc = substr( $new_csc, 0, length($old_csc) );
10627 # if the new comment is shorter and has been limited,
10628 # only compare the common part.
10629 if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
10631 $old_csc = substr( $old_csc, 0, length($new_csc) );
10634 # any remaining difference?
10635 if ( $new_csc ne $old_csc ) {
10637 # just leave the old comment if we are below the threshold
10638 # for creating side comments
10639 if ( $block_line_count <
10640 $rOpts->{'closing-side-comment-interval'} )
10645 # otherwise we'll make a note of it
10649 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
10652 # save the old side comment in a new trailing block comment
10653 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
10656 $cscw_block_comment =
10657 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
10662 # No differences.. we can safely delete old comment if we
10663 # are below the threshold
10664 if ( $block_line_count <
10665 $rOpts->{'closing-side-comment-interval'} )
10668 unstore_token_to_go()
10669 if ( $types_to_go[$max_index_to_go] eq '#' );
10670 unstore_token_to_go()
10671 if ( $types_to_go[$max_index_to_go] eq 'b' );
10676 # switch to the new csc (unless we deleted it!)
10677 $tokens_to_go[$max_index_to_go] = $token if $token;
10680 # handle case of NO existing closing side comment
10683 # insert the new side comment into the output token stream
10685 my $block_type = '';
10686 my $type_sequence = '';
10687 my $container_environment =
10688 $container_environment_to_go[$max_index_to_go];
10689 my $level = $levels_to_go[$max_index_to_go];
10690 my $slevel = $nesting_depth_to_go[$max_index_to_go];
10691 my $no_internal_newlines = 0;
10693 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
10694 my $ci_level = $ci_levels_to_go[$max_index_to_go];
10695 my $in_continued_quote = 0;
10697 # first insert a blank token
10698 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
10700 # then the side comment
10701 insert_new_token_to_go( $token, $type, $slevel,
10702 $no_internal_newlines );
10705 return $cscw_block_comment;
10708 sub previous_nonblank_token {
10713 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
10714 return $tokens_to_go[ $i - 1 ];
10717 return $tokens_to_go[ $i - 2 ];
10724 sub send_lines_to_vertical_aligner {
10726 my ( $ri_first, $ri_last, $do_not_pad ) = @_;
10728 my $rindentation_list = [0]; # ref to indentations for each line
10730 # define the array @matching_token_to_go for the output tokens
10731 # which will be non-blank for each special token (such as =>)
10732 # for which alignment is required.
10733 set_vertical_alignment_markers( $ri_first, $ri_last );
10735 # flush if necessary to avoid unwanted alignment
10736 my $must_flush = 0;
10737 if ( @$ri_first > 1 ) {
10739 # flush before a long if statement
10740 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
10745 Perl::Tidy::VerticalAligner::flush();
10748 set_logical_padding( $ri_first, $ri_last );
10750 # loop to prepare each line for shipment
10751 my $n_last_line = @$ri_first - 1;
10753 for my $n ( 0 .. $n_last_line ) {
10754 my $ibeg = $$ri_first[$n];
10755 my $iend = $$ri_last[$n];
10760 my $i_start = $ibeg;
10764 my @container_name = ("");
10765 my @multiple_comma_arrows = (undef);
10767 my $j = 0; # field index
10770 for $i ( $ibeg .. $iend ) {
10772 # Keep track of containers balanced on this line only.
10773 # These are used below to prevent unwanted cross-line alignments.
10774 # Unbalanced containers already avoid aligning across
10775 # container boundaries.
10776 if ( $tokens_to_go[$i] eq '(' ) {
10777 my $i_mate = $mate_index_to_go[$i];
10778 if ( $i_mate > $i && $i_mate <= $iend ) {
10780 my $seqno = $type_sequence_to_go[$i];
10781 my $count = comma_arrow_count($seqno);
10782 $multiple_comma_arrows[$depth] = $count && $count > 1;
10783 my $name = previous_nonblank_token($i);
10785 $container_name[$depth] = "+" . $name;
10788 elsif ( $tokens_to_go[$i] eq ')' ) {
10789 $depth-- if $depth > 0;
10792 # if we find a new synchronization token, we are done with
10794 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
10796 my $tok = my $raw_tok = $matching_token_to_go[$i];
10798 # make separators in different nesting depths unique
10799 # by appending the nesting depth digit.
10800 if ( $raw_tok ne '#' ) {
10801 $tok .= "$nesting_depth_to_go[$i]";
10804 # do any special decorations for commas to avoid unwanted
10805 # cross-line alignments.
10806 if ( $raw_tok eq ',' ) {
10807 if ( $container_name[$depth] ) {
10808 $tok .= $container_name[$depth];
10812 # decorate '=>' with:
10813 # - Nothing if this container is unbalanced on this line.
10814 # - The previous token if it is balanced and multiple '=>'s
10815 # - The container name if it is bananced and no other '=>'s
10816 elsif ( $raw_tok eq '=>' ) {
10817 if ( $container_name[$depth] ) {
10818 if ( $multiple_comma_arrows[$depth] ) {
10819 $tok .= "+" . previous_nonblank_token($i);
10822 $tok .= $container_name[$depth];
10827 # concatenate the text of the consecutive tokens to form
10830 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
10832 # store the alignment token for this field
10833 push( @tokens, $tok );
10835 # get ready for the next batch
10838 $patterns[$j] = "";
10841 # continue accumulating tokens
10842 # handle non-keywords..
10843 if ( $types_to_go[$i] ne 'k' ) {
10844 my $type = $types_to_go[$i];
10846 # Mark most things before arrows as a quote to
10847 # get them to line up. Testfile: mixed.pl.
10848 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
10849 my $next_type = $types_to_go[ $i + 1 ];
10850 my $i_next_nonblank =
10851 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
10853 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
10858 # minor patch to make numbers and quotes align
10859 if ( $type eq 'n' ) { $type = 'Q' }
10861 $patterns[$j] .= $type;
10864 # for keywords we have to use the actual text
10867 # map certain keywords to the same 'if' class to align
10868 # long if/elsif sequences. my testfile: elsif.pl
10869 my $tok = $tokens_to_go[$i];
10870 if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) {
10873 $patterns[$j] .= $tok;
10877 # done with this line .. join text of tokens to make the last field
10878 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
10880 my ( $indentation, $lev, $level_end, $terminal_type,
10881 $is_semicolon_terminated, $is_outdented_line )
10882 = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
10883 $ri_first, $ri_last, $rindentation_list );
10885 # we will allow outdenting of long lines..
10886 my $outdent_long_lines = (
10888 # which are long quotes, if allowed
10889 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
10891 # which are long block comments, if allowed
10893 $types_to_go[$ibeg] eq '#'
10894 && $rOpts->{'outdent-long-comments'}
10896 # but not if this is a static block comment
10897 && !$is_static_block_comment
10902 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
10904 my $rvertical_tightness_flags =
10905 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
10906 $ri_first, $ri_last );
10908 # flush an outdented line to avoid any unwanted vertical alignment
10909 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10911 my $is_terminal_ternary = 0;
10912 if ( $tokens_to_go[$ibeg] eq ':'
10913 || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
10915 if ( ( $terminal_type eq ';' && $level_end <= $lev )
10916 || ( $level_end < $lev ) )
10918 $is_terminal_ternary = 1;
10922 # send this new line down the pipe
10923 my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
10924 Perl::Tidy::VerticalAligner::append_line(
10931 $forced_breakpoint_to_go[$iend] || $in_comma_list,
10932 $outdent_long_lines,
10933 $is_terminal_ternary,
10934 $is_semicolon_terminated,
10936 $rvertical_tightness_flags,
10940 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
10942 # flush an outdented line to avoid any unwanted vertical alignment
10943 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10947 } # end of loop to output each line
10949 # remember indentation of lines containing opening containers for
10950 # later use by sub set_adjusted_indentation
10951 save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
10954 { # begin unmatched_indexes
10956 # closure to keep track of unbalanced containers.
10957 # arrays shared by the routines in this block:
10958 my @unmatched_opening_indexes_in_this_batch;
10959 my @unmatched_closing_indexes_in_this_batch;
10960 my %comma_arrow_count;
10962 sub is_unbalanced_batch {
10963 @unmatched_opening_indexes_in_this_batch +
10964 @unmatched_closing_indexes_in_this_batch;
10967 sub comma_arrow_count {
10969 return $comma_arrow_count{$seqno};
10972 sub match_opening_and_closing_tokens {
10974 # Match up indexes of opening and closing braces, etc, in this batch.
10975 # This has to be done after all tokens are stored because unstoring
10976 # of tokens would otherwise cause trouble.
10978 @unmatched_opening_indexes_in_this_batch = ();
10979 @unmatched_closing_indexes_in_this_batch = ();
10980 %comma_arrow_count = ();
10982 my ( $i, $i_mate, $token );
10983 foreach $i ( 0 .. $max_index_to_go ) {
10984 if ( $type_sequence_to_go[$i] ) {
10985 $token = $tokens_to_go[$i];
10986 if ( $token =~ /^[\(\[\{\?]$/ ) {
10987 push @unmatched_opening_indexes_in_this_batch, $i;
10989 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
10991 $i_mate = pop @unmatched_opening_indexes_in_this_batch;
10992 if ( defined($i_mate) && $i_mate >= 0 ) {
10993 if ( $type_sequence_to_go[$i_mate] ==
10994 $type_sequence_to_go[$i] )
10996 $mate_index_to_go[$i] = $i_mate;
10997 $mate_index_to_go[$i_mate] = $i;
11000 push @unmatched_opening_indexes_in_this_batch,
11002 push @unmatched_closing_indexes_in_this_batch, $i;
11006 push @unmatched_closing_indexes_in_this_batch, $i;
11010 elsif ( $tokens_to_go[$i] eq '=>' ) {
11011 if (@unmatched_opening_indexes_in_this_batch) {
11012 my $j = $unmatched_opening_indexes_in_this_batch[-1];
11013 my $seqno = $type_sequence_to_go[$j];
11014 $comma_arrow_count{$seqno}++;
11020 sub save_opening_indentation {
11022 # This should be called after each batch of tokens is output. It
11023 # saves indentations of lines of all unmatched opening tokens.
11024 # These will be used by sub get_opening_indentation.
11026 my ( $ri_first, $ri_last, $rindentation_list ) = @_;
11028 # we no longer need indentations of any saved indentations which
11029 # are unmatched closing tokens in this batch, because we will
11030 # never encounter them again. So we can delete them to keep
11031 # the hash size down.
11032 foreach (@unmatched_closing_indexes_in_this_batch) {
11033 my $seqno = $type_sequence_to_go[$_];
11034 delete $saved_opening_indentation{$seqno};
11037 # we need to save indentations of any unmatched opening tokens
11038 # in this batch because we may need them in a subsequent batch.
11039 foreach (@unmatched_opening_indexes_in_this_batch) {
11040 my $seqno = $type_sequence_to_go[$_];
11041 $saved_opening_indentation{$seqno} = [
11042 lookup_opening_indentation(
11043 $_, $ri_first, $ri_last, $rindentation_list
11048 } # end unmatched_indexes
11050 sub get_opening_indentation {
11052 # get the indentation of the line which output the opening token
11053 # corresponding to a given closing token in the current output batch.
11056 # $i_closing - index in this line of a closing token ')' '}' or ']'
11058 # $ri_first - reference to list of the first index $i for each output
11059 # line in this batch
11060 # $ri_last - reference to list of the last index $i for each output line
11062 # $rindentation_list - reference to a list containing the indentation
11063 # used for each line.
11066 # -the indentation of the line which contained the opening token
11067 # which matches the token at index $i_opening
11068 # -and its offset (number of columns) from the start of the line
11070 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
11072 # first, see if the opening token is in the current batch
11073 my $i_opening = $mate_index_to_go[$i_closing];
11074 my ( $indent, $offset );
11075 if ( $i_opening >= 0 ) {
11077 # it is..look up the indentation
11078 ( $indent, $offset ) =
11079 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
11080 $rindentation_list );
11083 # if not, it should have been stored in the hash by a previous batch
11085 my $seqno = $type_sequence_to_go[$i_closing];
11087 if ( $saved_opening_indentation{$seqno} ) {
11088 ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
11091 # some kind of serious error
11092 # (example is badfile.t)
11099 # if no sequence number it must be an unbalanced container
11105 return ( $indent, $offset );
11108 sub lookup_opening_indentation {
11110 # get the indentation of the line in the current output batch
11111 # which output a selected opening token
11114 # $i_opening - index of an opening token in the current output batch
11115 # whose line indentation we need
11116 # $ri_first - reference to list of the first index $i for each output
11117 # line in this batch
11118 # $ri_last - reference to list of the last index $i for each output line
11120 # $rindentation_list - reference to a list containing the indentation
11121 # used for each line. (NOTE: the first slot in
11122 # this list is the last returned line number, and this is
11123 # followed by the list of indentations).
11126 # -the indentation of the line which contained token $i_opening
11127 # -and its offset (number of columns) from the start of the line
11129 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
11131 my $nline = $rindentation_list->[0]; # line number of previous lookup
11133 # reset line location if necessary
11134 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
11136 # find the correct line
11137 unless ( $i_opening > $ri_last->[-1] ) {
11138 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
11141 # error - token index is out of bounds - shouldn't happen
11144 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
11146 report_definite_bug();
11147 $nline = $#{$ri_last};
11150 $rindentation_list->[0] =
11151 $nline; # save line number to start looking next call
11152 my $ibeg = $ri_start->[$nline];
11153 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
11154 return ( $rindentation_list->[ $nline + 1 ], $offset );
11158 my %is_if_elsif_else_unless_while_until_for_foreach;
11162 # These block types may have text between the keyword and opening
11163 # curly. Note: 'else' does not, but must be included to allow trailing
11164 # if/elsif text to be appended.
11165 # patch for SWITCH/CASE: added 'case' and 'when'
11166 @_ = qw(if elsif else unless while until for foreach case when);
11167 @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
11170 sub set_adjusted_indentation {
11172 # This routine has the final say regarding the actual indentation of
11173 # a line. It starts with the basic indentation which has been
11174 # defined for the leading token, and then takes into account any
11175 # options that the user has set regarding special indenting and
11178 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
11179 $rindentation_list )
11182 # we need to know the last token of this line
11183 my ( $terminal_type, $i_terminal ) =
11184 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
11186 my $is_outdented_line = 0;
11188 my $is_semicolon_terminated = $terminal_type eq ';'
11189 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
11191 ##########################################################
11192 # Section 1: set a flag and a default indentation
11194 # Most lines are indented according to the initial token.
11195 # But it is common to outdent to the level just after the
11196 # terminal token in certain cases...
11197 # adjust_indentation flag:
11198 # 0 - do not adjust
11200 # 2 - vertically align with opening token
11202 ##########################################################
11203 my $adjust_indentation = 0;
11204 my $default_adjust_indentation = $adjust_indentation;
11206 my ( $opening_indentation, $opening_offset );
11208 # if we are at a closing token of some type..
11209 if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
11211 # get the indentation of the line containing the corresponding
11213 ( $opening_indentation, $opening_offset ) =
11214 get_opening_indentation( $ibeg, $ri_first, $ri_last,
11215 $rindentation_list );
11217 # First set the default behavior:
11218 # default behavior is to outdent closing lines
11219 # of the form: "); }; ]; )->xxx;"
11221 $is_semicolon_terminated
11223 # and 'cuddled parens' of the form: ")->pack("
11225 $terminal_type eq '('
11226 && $types_to_go[$ibeg] eq ')'
11227 && ( $nesting_depth_to_go[$iend] + 1 ==
11228 $nesting_depth_to_go[$ibeg] )
11232 $adjust_indentation = 1;
11235 # TESTING: outdent something like '),'
11237 $terminal_type eq ','
11239 # allow just one character before the comma
11240 && $i_terminal == $ibeg + 1
11242 # requre LIST environment; otherwise, we may outdent too much --
11243 # this can happen in calls without parentheses (overload.t);
11244 && $container_environment_to_go[$i_terminal] eq 'LIST'
11247 $adjust_indentation = 1;
11250 # undo continuation indentation of a terminal closing token if
11251 # it is the last token before a level decrease. This will allow
11252 # a closing token to line up with its opening counterpart, and
11253 # avoids a indentation jump larger than 1 level.
11254 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
11255 && $i_terminal == $ibeg )
11257 my $ci = $ci_levels_to_go[$ibeg];
11258 my $lev = $levels_to_go[$ibeg];
11259 my $next_type = $types_to_go[ $ibeg + 1 ];
11260 my $i_next_nonblank =
11261 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
11262 if ( $i_next_nonblank <= $max_index_to_go
11263 && $levels_to_go[$i_next_nonblank] < $lev )
11265 $adjust_indentation = 1;
11269 $default_adjust_indentation = $adjust_indentation;
11271 # Now modify default behavior according to user request:
11272 # handle option to indent non-blocks of the form ); }; ];
11273 # But don't do special indentation to something like ')->pack('
11274 if ( !$block_type_to_go[$ibeg] ) {
11275 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
11277 if ( $i_terminal <= $ibeg + 1
11278 || $is_semicolon_terminated )
11280 $adjust_indentation = 2;
11283 $adjust_indentation = 0;
11286 elsif ( $cti == 2 ) {
11287 if ($is_semicolon_terminated) {
11288 $adjust_indentation = 3;
11291 $adjust_indentation = 0;
11294 elsif ( $cti == 3 ) {
11295 $adjust_indentation = 3;
11299 # handle option to indent blocks
11302 $rOpts->{'indent-closing-brace'}
11304 $i_terminal == $ibeg # isolated terminal '}'
11305 || $is_semicolon_terminated
11309 $adjust_indentation = 3;
11314 # if at ');', '};', '>;', and '];' of a terminal qw quote
11315 elsif ($$rpatterns[0] =~ /^qb*;$/
11316 && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
11318 if ( $closing_token_indentation{$1} == 0 ) {
11319 $adjust_indentation = 1;
11322 $adjust_indentation = 3;
11326 ##########################################################
11327 # Section 2: set indentation according to flag set above
11329 # Select the indentation object to define leading
11330 # whitespace. If we are outdenting something like '} } );'
11331 # then we want to use one level below the last token
11332 # ($i_terminal) in order to get it to fully outdent through
11334 ##########################################################
11337 my $level_end = $levels_to_go[$iend];
11339 if ( $adjust_indentation == 0 ) {
11340 $indentation = $leading_spaces_to_go[$ibeg];
11341 $lev = $levels_to_go[$ibeg];
11343 elsif ( $adjust_indentation == 1 ) {
11344 $indentation = $reduced_spaces_to_go[$i_terminal];
11345 $lev = $levels_to_go[$i_terminal];
11348 # handle indented closing token which aligns with opening token
11349 elsif ( $adjust_indentation == 2 ) {
11351 # handle option to align closing token with opening token
11352 $lev = $levels_to_go[$ibeg];
11354 # calculate spaces needed to align with opening token
11356 get_SPACES($opening_indentation) + $opening_offset;
11358 # Indent less than the previous line.
11360 # Problem: For -lp we don't exactly know what it was if there
11361 # were recoverable spaces sent to the aligner. A good solution
11362 # would be to force a flush of the vertical alignment buffer, so
11363 # that we would know. For now, this rule is used for -lp:
11365 # When the last line did not start with a closing token we will
11366 # be optimistic that the aligner will recover everything wanted.
11368 # This rule will prevent us from breaking a hierarchy of closing
11369 # tokens, and in a worst case will leave a closing paren too far
11370 # indented, but this is better than frequently leaving it not
11372 my $last_spaces = get_SPACES($last_indentation_written);
11373 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
11375 get_RECOVERABLE_SPACES($last_indentation_written);
11378 # reset the indentation to the new space count if it works
11379 # only options are all or none: nothing in-between looks good
11380 $lev = $levels_to_go[$ibeg];
11381 if ( $space_count < $last_spaces ) {
11382 if ($rOpts_line_up_parentheses) {
11383 my $lev = $levels_to_go[$ibeg];
11385 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11388 $indentation = $space_count;
11392 # revert to default if it doesnt work
11394 $space_count = leading_spaces_to_go($ibeg);
11395 if ( $default_adjust_indentation == 0 ) {
11396 $indentation = $leading_spaces_to_go[$ibeg];
11398 elsif ( $default_adjust_indentation == 1 ) {
11399 $indentation = $reduced_spaces_to_go[$i_terminal];
11400 $lev = $levels_to_go[$i_terminal];
11405 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
11408 # handle -icb (indented closing code block braces)
11409 # Updated method for indented block braces: indent one full level if
11410 # there is no continuation indentation. This will occur for major
11411 # structures such as sub, if, else, but not for things like map
11414 # Note: only code blocks without continuation indentation are
11415 # handled here (if, else, unless, ..). In the following snippet,
11416 # the terminal brace of the sort block will have continuation
11417 # indentation as shown so it will not be handled by the coding
11418 # here. We would have to undo the continuation indentation to do
11419 # this, but it probably looks ok as is. This is a possible future
11420 # update for semicolon terminated lines.
11422 # if ($sortby eq 'date' or $sortby eq 'size') {
11424 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
11429 if ( $block_type_to_go[$ibeg]
11430 && $ci_levels_to_go[$i_terminal] == 0 )
11432 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
11433 $indentation = $spaces + $rOpts_indent_columns;
11435 # NOTE: for -lp we could create a new indentation object, but
11436 # there is probably no need to do it
11439 # handle -icp and any -icb block braces which fall through above
11440 # test such as the 'sort' block mentioned above.
11443 # There are currently two ways to handle -icp...
11444 # One way is to use the indentation of the previous line:
11445 # $indentation = $last_indentation_written;
11447 # The other way is to use the indentation that the previous line
11448 # would have had if it hadn't been adjusted:
11449 $indentation = $last_unadjusted_indentation;
11451 # Current method: use the minimum of the two. This avoids
11452 # inconsistent indentation.
11453 if ( get_SPACES($last_indentation_written) <
11454 get_SPACES($indentation) )
11456 $indentation = $last_indentation_written;
11460 # use previous indentation but use own level
11461 # to cause list to be flushed properly
11462 $lev = $levels_to_go[$ibeg];
11465 # remember indentation except for multi-line quotes, which get
11467 unless ( $ibeg == 0 && $starting_in_quote ) {
11468 $last_indentation_written = $indentation;
11469 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
11470 $last_leading_token = $tokens_to_go[$ibeg];
11473 # be sure lines with leading closing tokens are not outdented more
11474 # than the line which contained the corresponding opening token.
11476 #############################################################
11477 # updated per bug report in alex_bug.pl: we must not
11478 # mess with the indentation of closing logical braces so
11479 # we must treat something like '} else {' as if it were
11480 # an isolated brace my $is_isolated_block_brace = (
11481 # $iend == $ibeg ) && $block_type_to_go[$ibeg];
11482 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
11483 && ( $iend == $ibeg
11484 || $is_if_elsif_else_unless_while_until_for_foreach{
11485 $block_type_to_go[$ibeg] } );
11486 #############################################################
11487 if ( !$is_isolated_block_brace && defined($opening_indentation) ) {
11488 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
11489 $indentation = $opening_indentation;
11493 # remember the indentation of each line of this batch
11494 push @{$rindentation_list}, $indentation;
11496 # outdent lines with certain leading tokens...
11499 # must be first word of this batch
11505 # certain leading keywords if requested
11507 $rOpts->{'outdent-keywords'}
11508 && $types_to_go[$ibeg] eq 'k'
11509 && $outdent_keyword{ $tokens_to_go[$ibeg] }
11512 # or labels if requested
11513 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
11515 # or static block comments if requested
11516 || ( $types_to_go[$ibeg] eq '#'
11517 && $rOpts->{'outdent-static-block-comments'}
11518 && $is_static_block_comment )
11523 my $space_count = leading_spaces_to_go($ibeg);
11524 if ( $space_count > 0 ) {
11525 $space_count -= $rOpts_continuation_indentation;
11526 $is_outdented_line = 1;
11527 if ( $space_count < 0 ) { $space_count = 0 }
11529 # do not promote a spaced static block comment to non-spaced;
11530 # this is not normally necessary but could be for some
11531 # unusual user inputs (such as -ci = -i)
11532 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
11536 if ($rOpts_line_up_parentheses) {
11538 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11541 $indentation = $space_count;
11546 return ( $indentation, $lev, $level_end, $terminal_type,
11547 $is_semicolon_terminated, $is_outdented_line );
11551 sub set_vertical_tightness_flags {
11553 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
11555 # Define vertical tightness controls for the nth line of a batch.
11556 # We create an array of parameters which tell the vertical aligner
11557 # if we should combine this line with the next line to achieve the
11558 # desired vertical tightness. The array of parameters contains:
11560 # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace
11561 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
11562 # if closing: spaces of padding to use
11563 # [2] sequence number of container
11564 # [3] valid flag: do not append if this flag is false. Will be
11565 # true if appropriate -vt flag is set. Otherwise, Will be
11566 # made true only for 2 line container in parens with -lp
11568 # These flags are used by sub set_leading_whitespace in
11569 # the vertical aligner
11571 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
11573 # For non-BLOCK tokens, we will need to examine the next line
11574 # too, so we won't consider the last line.
11575 if ( $n < $n_last_line ) {
11577 # see if last token is an opening token...not a BLOCK...
11578 my $ibeg_next = $$ri_first[ $n + 1 ];
11579 my $token_end = $tokens_to_go[$iend];
11580 my $iend_next = $$ri_last[ $n + 1 ];
11582 $type_sequence_to_go[$iend]
11583 && !$block_type_to_go[$iend]
11584 && $is_opening_token{$token_end}
11586 $opening_vertical_tightness{$token_end} > 0
11588 # allow 2-line method call to be closed up
11589 || ( $rOpts_line_up_parentheses
11590 && $token_end eq '('
11592 && $types_to_go[ $iend - 1 ] ne 'b' )
11597 # avoid multiple jumps in nesting depth in one line if
11599 my $ovt = $opening_vertical_tightness{$token_end};
11600 my $iend_next = $$ri_last[ $n + 1 ];
11603 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
11604 $nesting_depth_to_go[$ibeg_next] )
11608 # If -vt flag has not been set, mark this as invalid
11609 # and aligner will validate it if it sees the closing paren
11611 my $valid_flag = $ovt;
11612 @{$rvertical_tightness_flags} =
11613 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
11617 # see if first token of next line is a closing token...
11618 # ..and be sure this line does not have a side comment
11619 my $token_next = $tokens_to_go[$ibeg_next];
11620 if ( $type_sequence_to_go[$ibeg_next]
11621 && !$block_type_to_go[$ibeg_next]
11622 && $is_closing_token{$token_next}
11623 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
11625 my $ovt = $opening_vertical_tightness{$token_next};
11626 my $cvt = $closing_vertical_tightness{$token_next};
11629 # never append a trailing line like )->pack(
11630 # because it will throw off later alignment
11632 $nesting_depth_to_go[$ibeg_next] ==
11633 $nesting_depth_to_go[ $iend_next + 1 ] + 1
11638 $container_environment_to_go[$ibeg_next] ne 'LIST'
11642 # allow closing up 2-line method calls
11643 || ( $rOpts_line_up_parentheses
11644 && $token_next eq ')' )
11651 # decide which trailing closing tokens to append..
11653 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
11655 my $str = join( '',
11656 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
11658 # append closing token if followed by comment or ';'
11659 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
11663 my $valid_flag = $cvt;
11664 @{$rvertical_tightness_flags} = (
11666 $tightness{$token_next} == 2 ? 0 : 1,
11667 $type_sequence_to_go[$ibeg_next], $valid_flag,
11673 # Opening Token Right
11674 # If requested, move an isolated trailing opening token to the end of
11675 # the previous line which ended in a comma. We could do this
11676 # in sub recombine_breakpoints but that would cause problems
11677 # with -lp formatting. The problem is that indentation will
11678 # quickly move far to the right in nested expressions. By
11679 # doing it after indentation has been set, we avoid changes
11680 # to the indentation. Actual movement of the token takes place
11681 # in sub write_leader_and_string.
11683 $opening_token_right{ $tokens_to_go[$ibeg_next] }
11685 # previous line is not opening
11686 # (use -sot to combine with it)
11687 && !$is_opening_token{$token_end}
11689 # previous line ended in one of these
11690 # (add other cases if necessary; '=>' and '.' are not necessary
11691 ##&& ($is_opening_token{$token_end} || $token_end eq ',')
11692 && !$block_type_to_go[$ibeg_next]
11694 # this is a line with just an opening token
11695 && ( $iend_next == $ibeg_next
11696 || $iend_next == $ibeg_next + 1
11697 && $types_to_go[$iend_next] eq '#' )
11699 # looks bad if we align vertically with the wrong container
11700 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
11703 my $valid_flag = 1;
11704 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11705 @{$rvertical_tightness_flags} =
11706 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
11709 # Stacking of opening and closing tokens
11711 my $token_beg_next = $tokens_to_go[$ibeg_next];
11713 # patch to make something like 'qw(' behave like an opening paren
11715 if ( $types_to_go[$ibeg_next] eq 'q' ) {
11716 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
11717 $token_beg_next = $1;
11721 if ( $is_closing_token{$token_end}
11722 && $is_closing_token{$token_beg_next} )
11724 $stackable = $stack_closing_token{$token_beg_next}
11725 unless ( $block_type_to_go[$ibeg_next] )
11726 ; # shouldn't happen; just checking
11728 elsif ($is_opening_token{$token_end}
11729 && $is_opening_token{$token_beg_next} )
11731 $stackable = $stack_opening_token{$token_beg_next}
11732 unless ( $block_type_to_go[$ibeg_next] )
11733 ; # shouldn't happen; just checking
11738 my $is_semicolon_terminated;
11739 if ( $n + 1 == $n_last_line ) {
11740 my ( $terminal_type, $i_terminal ) = terminal_type(
11741 \@types_to_go, \@block_type_to_go,
11742 $ibeg_next, $iend_next
11744 $is_semicolon_terminated = $terminal_type eq ';'
11745 && $nesting_depth_to_go[$iend_next] <
11746 $nesting_depth_to_go[$ibeg_next];
11749 # this must be a line with just an opening token
11750 # or end in a semicolon
11752 $is_semicolon_terminated
11753 || ( $iend_next == $ibeg_next
11754 || $iend_next == $ibeg_next + 1
11755 && $types_to_go[$iend_next] eq '#' )
11758 my $valid_flag = 1;
11759 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11760 @{$rvertical_tightness_flags} =
11761 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
11767 # Check for a last line with isolated opening BLOCK curly
11768 elsif ($rOpts_block_brace_vertical_tightness
11770 && $types_to_go[$iend] eq '{'
11771 && $block_type_to_go[$iend] =~
11772 /$block_brace_vertical_tightness_pattern/o )
11774 @{$rvertical_tightness_flags} =
11775 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
11778 # pack in the sequence numbers of the ends of this line
11779 $rvertical_tightness_flags->[4] = get_seqno($ibeg);
11780 $rvertical_tightness_flags->[5] = get_seqno($iend);
11781 return $rvertical_tightness_flags;
11786 # get opening and closing sequence numbers of a token for the vertical
11787 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
11788 # to be treated somewhat like opening and closing tokens for stacking
11789 # tokens by the vertical aligner.
11791 my $seqno = $type_sequence_to_go[$ii];
11792 if ( $types_to_go[$ii] eq 'q' ) {
11795 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
11798 if ( !$ending_in_quote ) {
11799 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
11807 my %is_vertical_alignment_type;
11808 my %is_vertical_alignment_keyword;
11813 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
11814 { ? : => =~ && || // ~~
11816 @is_vertical_alignment_type{@_} = (1) x scalar(@_);
11818 @_ = qw(if unless and or err eq ne for foreach while until);
11819 @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
11822 sub set_vertical_alignment_markers {
11824 # This routine takes the first step toward vertical alignment of the
11825 # lines of output text. It looks for certain tokens which can serve as
11826 # vertical alignment markers (such as an '=').
11828 # Method: We look at each token $i in this output batch and set
11829 # $matching_token_to_go[$i] equal to those tokens at which we would
11830 # accept vertical alignment.
11832 # nothing to do if we aren't allowed to change whitespace
11833 if ( !$rOpts_add_whitespace ) {
11834 for my $i ( 0 .. $max_index_to_go ) {
11835 $matching_token_to_go[$i] = '';
11840 my ( $ri_first, $ri_last ) = @_;
11842 # remember the index of last nonblank token before any sidecomment
11843 my $i_terminal = $max_index_to_go;
11844 if ( $types_to_go[$i_terminal] eq '#' ) {
11845 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
11846 if ( $i_terminal > 0 ) { --$i_terminal }
11850 # look at each line of this batch..
11851 my $last_vertical_alignment_before_index;
11852 my $vert_last_nonblank_type;
11853 my $vert_last_nonblank_token;
11854 my $vert_last_nonblank_block_type;
11855 my $max_line = @$ri_first - 1;
11856 my ( $i, $type, $token, $block_type, $alignment_type );
11857 my ( $ibeg, $iend, $line );
11859 foreach $line ( 0 .. $max_line ) {
11860 $ibeg = $$ri_first[$line];
11861 $iend = $$ri_last[$line];
11862 $last_vertical_alignment_before_index = -1;
11863 $vert_last_nonblank_type = '';
11864 $vert_last_nonblank_token = '';
11865 $vert_last_nonblank_block_type = '';
11867 # look at each token in this output line..
11868 foreach $i ( $ibeg .. $iend ) {
11869 $alignment_type = '';
11870 $type = $types_to_go[$i];
11871 $block_type = $block_type_to_go[$i];
11872 $token = $tokens_to_go[$i];
11874 # check for flag indicating that we should not align
11876 if ( $matching_token_to_go[$i] ) {
11877 $matching_token_to_go[$i] = '';
11881 #--------------------------------------------------------
11882 # First see if we want to align BEFORE this token
11883 #--------------------------------------------------------
11885 # The first possible token that we can align before
11886 # is index 2 because: 1) it doesn't normally make sense to
11887 # align before the first token and 2) the second
11888 # token must be a blank if we are to align before
11890 if ( $i < $ibeg + 2 ) { }
11892 # must follow a blank token
11893 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
11895 # align a side comment --
11896 elsif ( $type eq '#' ) {
11900 # it is a static side comment
11902 $rOpts->{'static-side-comments'}
11903 && $token =~ /$static_side_comment_pattern/o
11906 # or a closing side comment
11907 || ( $vert_last_nonblank_block_type
11909 /$closing_side_comment_prefix_pattern/o )
11912 $alignment_type = $type;
11913 } ## Example of a static side comment
11916 # otherwise, do not align two in a row to create a
11918 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
11920 # align before one of these keywords
11921 # (within a line, since $i>1)
11922 elsif ( $type eq 'k' ) {
11924 # /^(if|unless|and|or|eq|ne)$/
11925 if ( $is_vertical_alignment_keyword{$token} ) {
11926 $alignment_type = $token;
11930 # align before one of these types..
11931 # Note: add '.' after new vertical aligner is operational
11932 elsif ( $is_vertical_alignment_type{$type} ) {
11933 $alignment_type = $token;
11935 # Do not align a terminal token. Although it might
11936 # occasionally look ok to do this, it has been found to be
11937 # a good general rule. The main problems are:
11938 # (1) that the terminal token (such as an = or :) might get
11939 # moved far to the right where it is hard to see because
11940 # nothing follows it, and
11941 # (2) doing so may prevent other good alignments.
11942 if ( $i == $iend || $i >= $i_terminal ) {
11943 $alignment_type = "";
11946 # Do not align leading ': ('. This would prevent
11947 # alignment in something like the following:
11949 # ( $input_line_number < 10 ) ? " "
11950 # : ( $input_line_number < 100 ) ? " "
11952 if ( $i == $ibeg + 2
11953 && $types_to_go[$ibeg] eq ':'
11954 && $types_to_go[ $i - 1 ] eq 'b' )
11956 $alignment_type = "";
11959 # For a paren after keyword, only align something like this:
11961 # elsif ( $b ) { &b }
11962 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
11963 $alignment_type = ""
11964 unless $vert_last_nonblank_token =~
11965 /^(if|unless|elsif)$/;
11968 # be sure the alignment tokens are unique
11969 # This didn't work well: reason not determined
11970 # if ($token ne $type) {$alignment_type .= $type}
11973 # NOTE: This is deactivated because it causes the previous
11974 # if/elsif alignment to fail
11975 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
11976 #{ $alignment_type = $type; }
11978 if ($alignment_type) {
11979 $last_vertical_alignment_before_index = $i;
11982 #--------------------------------------------------------
11983 # Next see if we want to align AFTER the previous nonblank
11984 #--------------------------------------------------------
11986 # We want to line up ',' and interior ';' tokens, with the added
11987 # space AFTER these tokens. (Note: interior ';' is included
11988 # because it may occur in short blocks).
11991 # we haven't already set it
11994 # and its not the first token of the line
11997 # and it follows a blank
11998 && $types_to_go[ $i - 1 ] eq 'b'
12000 # and previous token IS one of these:
12001 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
12003 # and it's NOT one of these
12004 && ( $type !~ /^[b\#\)\]\}]$/ )
12006 # then go ahead and align
12010 $alignment_type = $vert_last_nonblank_type;
12013 #--------------------------------------------------------
12014 # then store the value
12015 #--------------------------------------------------------
12016 $matching_token_to_go[$i] = $alignment_type;
12017 if ( $type ne 'b' ) {
12018 $vert_last_nonblank_type = $type;
12019 $vert_last_nonblank_token = $token;
12020 $vert_last_nonblank_block_type = $block_type;
12027 sub terminal_type {
12029 # returns type of last token on this line (terminal token), as follows:
12030 # returns # for a full-line comment
12031 # returns ' ' for a blank line
12032 # otherwise returns final token type
12034 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
12036 # check for full-line comment..
12037 if ( $$rtype[$ibeg] eq '#' ) {
12038 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
12042 # start at end and walk bakwards..
12043 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
12045 # skip past any side comment and blanks
12046 next if ( $$rtype[$i] eq 'b' );
12047 next if ( $$rtype[$i] eq '#' );
12049 # found it..make sure it is a BLOCK termination,
12050 # but hide a terminal } after sort/grep/map because it is not
12051 # necessarily the end of the line. (terminal.t)
12052 my $terminal_type = $$rtype[$i];
12054 $terminal_type eq '}'
12055 && ( !$$rblock_type[$i]
12056 || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
12059 $terminal_type = 'b';
12061 return wantarray ? ( $terminal_type, $i ) : $terminal_type;
12065 return wantarray ? ( ' ', $ibeg ) : ' ';
12070 my %is_good_keyword_breakpoint;
12071 my %is_lt_gt_le_ge;
12073 sub set_bond_strengths {
12077 @_ = qw(if unless while until for foreach);
12078 @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
12080 @_ = qw(lt gt le ge);
12081 @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
12083 ###############################################################
12084 # NOTE: NO_BREAK's set here are HINTS which may not be honored;
12085 # essential NO_BREAKS's must be enforced in section 2, below.
12086 ###############################################################
12088 # adding NEW_TOKENS: add a left and right bond strength by
12089 # mimmicking what is done for an existing token type. You
12090 # can skip this step at first and take the default, then
12091 # tweak later to get desired results.
12093 # The bond strengths should roughly follow precenence order where
12094 # possible. If you make changes, please check the results very
12095 # carefully on a variety of scripts.
12097 # no break around possible filehandle
12098 $left_bond_strength{'Z'} = NO_BREAK;
12099 $right_bond_strength{'Z'} = NO_BREAK;
12101 # never put a bare word on a new line:
12102 # example print (STDERR, "bla"); will fail with break after (
12103 $left_bond_strength{'w'} = NO_BREAK;
12105 # blanks always have infinite strength to force breaks after real tokens
12106 $right_bond_strength{'b'} = NO_BREAK;
12108 # try not to break on exponentation
12109 @_ = qw" ** .. ... <=> ";
12110 @left_bond_strength{@_} = (STRONG) x scalar(@_);
12111 @right_bond_strength{@_} = (STRONG) x scalar(@_);
12113 # The comma-arrow has very low precedence but not a good break point
12114 $left_bond_strength{'=>'} = NO_BREAK;
12115 $right_bond_strength{'=>'} = NOMINAL;
12117 # ok to break after label
12118 $left_bond_strength{'J'} = NO_BREAK;
12119 $right_bond_strength{'J'} = NOMINAL;
12120 $left_bond_strength{'j'} = STRONG;
12121 $right_bond_strength{'j'} = STRONG;
12122 $left_bond_strength{'A'} = STRONG;
12123 $right_bond_strength{'A'} = STRONG;
12125 $left_bond_strength{'->'} = STRONG;
12126 $right_bond_strength{'->'} = VERY_STRONG;
12128 # breaking AFTER these is just ok:
12129 @_ = qw" % + - * / x ";
12130 @left_bond_strength{@_} = (STRONG) x scalar(@_);
12131 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12133 # breaking BEFORE these is just ok:
12135 @right_bond_strength{@_} = (STRONG) x scalar(@_);
12136 @left_bond_strength{@_} = (NOMINAL) x scalar(@_);
12138 # I prefer breaking before the string concatenation operator
12139 # because it can be hard to see at the end of a line
12140 # swap these to break after a '.'
12141 # this could be a future option
12142 $right_bond_strength{'.'} = STRONG;
12143 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
12146 @left_bond_strength{@_} = (STRONG) x scalar(@_);
12147 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12149 # make these a little weaker than nominal so that they get
12150 # favored for end-of-line characters
12151 @_ = qw"!= == =~ !~ ~~";
12152 @left_bond_strength{@_} = (STRONG) x scalar(@_);
12153 @right_bond_strength{@_} =
12154 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
12156 # break AFTER these
12157 @_ = qw" < > | & >= <=";
12158 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
12159 @right_bond_strength{@_} =
12160 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
12162 # breaking either before or after a quote is ok
12163 # but bias for breaking before a quote
12164 $left_bond_strength{'Q'} = NOMINAL;
12165 $right_bond_strength{'Q'} = NOMINAL + 0.02;
12166 $left_bond_strength{'q'} = NOMINAL;
12167 $right_bond_strength{'q'} = NOMINAL;
12169 # starting a line with a keyword is usually ok
12170 $left_bond_strength{'k'} = NOMINAL;
12172 # we usually want to bond a keyword strongly to what immediately
12173 # follows, rather than leaving it stranded at the end of a line
12174 $right_bond_strength{'k'} = STRONG;
12176 $left_bond_strength{'G'} = NOMINAL;
12177 $right_bond_strength{'G'} = STRONG;
12179 # it is good to break AFTER various assignment operators
12181 = **= += *= &= <<= &&=
12182 -= /= |= >>= ||= //=
12186 @left_bond_strength{@_} = (STRONG) x scalar(@_);
12187 @right_bond_strength{@_} =
12188 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
12190 # break BEFORE '&&' and '||' and '//'
12191 # set strength of '||' to same as '=' so that chains like
12192 # $a = $b || $c || $d will break before the first '||'
12193 $right_bond_strength{'||'} = NOMINAL;
12194 $left_bond_strength{'||'} = $right_bond_strength{'='};
12196 # same thing for '//'
12197 $right_bond_strength{'//'} = NOMINAL;
12198 $left_bond_strength{'//'} = $right_bond_strength{'='};
12200 # set strength of && a little higher than ||
12201 $right_bond_strength{'&&'} = NOMINAL;
12202 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
12204 $left_bond_strength{';'} = VERY_STRONG;
12205 $right_bond_strength{';'} = VERY_WEAK;
12206 $left_bond_strength{'f'} = VERY_STRONG;
12208 # make right strength of for ';' a little less than '='
12209 # to make for contents break after the ';' to avoid this:
12210 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
12211 # $number_of_fields )
12212 # and make it weaker than ',' and 'and' too
12213 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
12215 # The strengths of ?/: should be somewhere between
12216 # an '=' and a quote (NOMINAL),
12217 # make strength of ':' slightly less than '?' to help
12218 # break long chains of ? : after the colons
12219 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
12220 $right_bond_strength{':'} = NO_BREAK;
12221 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
12222 $right_bond_strength{'?'} = NO_BREAK;
12224 $left_bond_strength{','} = VERY_STRONG;
12225 $right_bond_strength{','} = VERY_WEAK;
12227 # Set bond strengths of certain keywords
12228 # make 'or', 'err', 'and' slightly weaker than a ','
12229 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
12230 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
12231 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
12232 $left_bond_strength{'xor'} = NOMINAL;
12233 $right_bond_strength{'and'} = NOMINAL;
12234 $right_bond_strength{'or'} = NOMINAL;
12235 $right_bond_strength{'err'} = NOMINAL;
12236 $right_bond_strength{'xor'} = STRONG;
12239 # patch-its always ok to break at end of line
12240 $nobreak_to_go[$max_index_to_go] = 0;
12242 # adding a small 'bias' to strengths is a simple way to make a line
12243 # break at the first of a sequence of identical terms. For example,
12244 # to force long string of conditional operators to break with
12245 # each line ending in a ':', we can add a small number to the bond
12246 # strength of each ':'
12247 my $colon_bias = 0;
12254 my $code_bias = -.01;
12258 my $last_nonblank_type = $type;
12259 my $last_nonblank_token = $token;
12260 my $delta_bias = 0.0001;
12261 my $list_str = $left_bond_strength{'?'};
12263 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
12264 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
12267 # preliminary loop to compute bond strengths
12268 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
12269 $last_type = $type;
12270 if ( $type ne 'b' ) {
12271 $last_nonblank_type = $type;
12272 $last_nonblank_token = $token;
12274 $type = $types_to_go[$i];
12276 # strength on both sides of a blank is the same
12277 if ( $type eq 'b' && $last_type ne 'b' ) {
12278 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
12282 $token = $tokens_to_go[$i];
12283 $block_type = $block_type_to_go[$i];
12285 $next_type = $types_to_go[$i_next];
12286 $next_token = $tokens_to_go[$i_next];
12287 $total_nesting_depth = $nesting_depth_to_go[$i_next];
12288 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12289 $next_nonblank_type = $types_to_go[$i_next_nonblank];
12290 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12292 # Some token chemistry... The decision about where to break a
12293 # line depends upon a "bond strength" between tokens. The LOWER
12294 # the bond strength, the MORE likely a break. The strength
12295 # values are based on trial-and-error, and need to be tweaked
12296 # occasionally to get desired results. Things to keep in mind
12298 # 1. relative strengths are important. small differences
12299 # in strengths can make big formatting differences.
12300 # 2. each indentation level adds one unit of bond strength
12301 # 3. a value of NO_BREAK makes an unbreakable bond
12302 # 4. a value of VERY_WEAK is the strength of a ','
12303 # 5. values below NOMINAL are considered ok break points
12304 # 6. values above NOMINAL are considered poor break points
12305 # We are computing the strength of the bond between the current
12306 # token and the NEXT token.
12307 my $bond_str = VERY_STRONG; # a default, high strength
12309 #---------------------------------------------------------------
12311 # use minimum of left and right bond strengths if defined;
12312 # digraphs and trigraphs like to break on their left
12313 #---------------------------------------------------------------
12314 my $bsr = $right_bond_strength{$type};
12316 if ( !defined($bsr) ) {
12318 if ( $is_digraph{$type} || $is_trigraph{$type} ) {
12322 $bsr = VERY_STRONG;
12326 # define right bond strengths of certain keywords
12327 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
12328 $bsr = $right_bond_strength{$token};
12330 elsif ( $token eq 'ne' or $token eq 'eq' ) {
12333 my $bsl = $left_bond_strength{$next_nonblank_type};
12335 # set terminal bond strength to the nominal value
12336 # this will cause good preceding breaks to be retained
12337 if ( $i_next_nonblank > $max_index_to_go ) {
12341 if ( !defined($bsl) ) {
12343 if ( $is_digraph{$next_nonblank_type}
12344 || $is_trigraph{$next_nonblank_type} )
12349 $bsl = VERY_STRONG;
12353 # define right bond strengths of certain keywords
12354 if ( $next_nonblank_type eq 'k'
12355 && defined( $left_bond_strength{$next_nonblank_token} ) )
12357 $bsl = $left_bond_strength{$next_nonblank_token};
12359 elsif ($next_nonblank_token eq 'ne'
12360 or $next_nonblank_token eq 'eq' )
12364 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
12365 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
12368 # Note: it might seem that we would want to keep a NO_BREAK if
12369 # either token has this value. This didn't work, because in an
12370 # arrow list, it prevents the comma from separating from the
12371 # following bare word (which is probably quoted by its arrow).
12372 # So necessary NO_BREAK's have to be handled as special cases
12373 # in the final section.
12374 $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
12375 my $bond_str_1 = $bond_str;
12377 #---------------------------------------------------------------
12380 #---------------------------------------------------------------
12382 # allow long lines before final { in an if statement, as in:
12387 # Otherwise, the line before the { tends to be too short.
12388 if ( $type eq ')' ) {
12389 if ( $next_nonblank_type eq '{' ) {
12390 $bond_str = VERY_WEAK + 0.03;
12394 elsif ( $type eq '(' ) {
12395 if ( $next_nonblank_type eq '{' ) {
12396 $bond_str = NOMINAL;
12400 # break on something like '} (', but keep this stronger than a ','
12401 # example is in 'howe.pl'
12402 elsif ( $type eq 'R' or $type eq '}' ) {
12403 if ( $next_nonblank_type eq '(' ) {
12404 $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
12408 #-----------------------------------------------------------------
12409 # adjust bond strength bias
12410 #-----------------------------------------------------------------
12412 elsif ( $type eq 'f' ) {
12413 $bond_str += $f_bias;
12414 $f_bias += $delta_bias;
12417 # in long ?: conditionals, bias toward just one set per line (colon.t)
12418 elsif ( $type eq ':' ) {
12419 if ( !$want_break_before{$type} ) {
12420 $bond_str += $colon_bias;
12421 $colon_bias += $delta_bias;
12425 if ( $next_nonblank_type eq ':'
12426 && $want_break_before{$next_nonblank_type} )
12428 $bond_str += $colon_bias;
12429 $colon_bias += $delta_bias;
12432 # if leading '.' is used, align all but 'short' quotes;
12433 # the idea is to not place something like "\n" on a single line.
12434 elsif ( $next_nonblank_type eq '.' ) {
12435 if ( $want_break_before{'.'} ) {
12437 $last_nonblank_type eq '.'
12440 $rOpts_short_concatenation_item_length )
12441 && ( $token !~ /^[\)\]\}]$/ )
12444 $dot_bias += $delta_bias;
12446 $bond_str += $dot_bias;
12449 elsif ($next_nonblank_type eq '&&'
12450 && $want_break_before{$next_nonblank_type} )
12452 $bond_str += $amp_bias;
12453 $amp_bias += $delta_bias;
12455 elsif ($next_nonblank_type eq '||'
12456 && $want_break_before{$next_nonblank_type} )
12458 $bond_str += $bar_bias;
12459 $bar_bias += $delta_bias;
12461 elsif ( $next_nonblank_type eq 'k' ) {
12463 if ( $next_nonblank_token eq 'and'
12464 && $want_break_before{$next_nonblank_token} )
12466 $bond_str += $and_bias;
12467 $and_bias += $delta_bias;
12469 elsif ($next_nonblank_token =~ /^(or|err)$/
12470 && $want_break_before{$next_nonblank_token} )
12472 $bond_str += $or_bias;
12473 $or_bias += $delta_bias;
12476 # FIXME: needs more testing
12477 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
12478 $bond_str = $list_str if ( $bond_str > $list_str );
12480 elsif ( $token eq 'err'
12481 && !$want_break_before{$token} )
12483 $bond_str += $or_bias;
12484 $or_bias += $delta_bias;
12489 && !$want_break_before{$type} )
12491 $bond_str += $colon_bias;
12492 $colon_bias += $delta_bias;
12494 elsif ( $type eq '&&'
12495 && !$want_break_before{$type} )
12497 $bond_str += $amp_bias;
12498 $amp_bias += $delta_bias;
12500 elsif ( $type eq '||'
12501 && !$want_break_before{$type} )
12503 $bond_str += $bar_bias;
12504 $bar_bias += $delta_bias;
12506 elsif ( $type eq 'k' ) {
12508 if ( $token eq 'and'
12509 && !$want_break_before{$token} )
12511 $bond_str += $and_bias;
12512 $and_bias += $delta_bias;
12514 elsif ( $token eq 'or'
12515 && !$want_break_before{$token} )
12517 $bond_str += $or_bias;
12518 $or_bias += $delta_bias;
12522 # keep matrix and hash indices together
12523 # but make them a little below STRONG to allow breaking open
12524 # something like {'some-word'}{'some-very-long-word'} at the }{
12526 if ( ( $type eq ']' or $type eq 'R' )
12527 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
12530 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
12533 if ( $next_nonblank_token =~ /^->/ ) {
12535 # increase strength to the point where a break in the following
12536 # will be after the opening paren rather than at the arrow:
12538 if ( $type eq 'i' ) {
12539 $bond_str = 1.45 * STRONG;
12542 elsif ( $type =~ /^[\)\]\}R]$/ ) {
12543 $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
12546 # otherwise make strength before an '->' a little over a '+'
12548 if ( $bond_str <= NOMINAL ) {
12549 $bond_str = NOMINAL + 0.01;
12554 if ( $token eq ')' && $next_nonblank_token eq '[' ) {
12555 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
12558 # map1.t -- correct for a quirk in perl
12560 && $next_nonblank_type eq 'i'
12561 && $last_nonblank_type eq 'k'
12562 && $is_sort_map_grep{$last_nonblank_token} )
12564 # /^(sort|map|grep)$/ )
12566 $bond_str = NO_BREAK;
12569 # extrude.t: do not break before paren at:
12571 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
12572 $bond_str = NO_BREAK;
12575 # good to break after end of code blocks
12576 if ( $type eq '}' && $block_type ) {
12578 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
12579 $code_bias += $delta_bias;
12582 if ( $type eq 'k' ) {
12584 # allow certain control keywords to stand out
12585 if ( $next_nonblank_type eq 'k'
12586 && $is_last_next_redo_return{$token} )
12588 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
12591 # Don't break after keyword my. This is a quick fix for a
12592 # rare problem with perl. An example is this line from file
12594 # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
12596 if ( $token eq 'my' ) {
12597 $bond_str = NO_BREAK;
12602 # good to break before 'if', 'unless', etc
12603 if ( $is_if_brace_follower{$next_nonblank_token} ) {
12604 $bond_str = VERY_WEAK;
12607 if ( $next_nonblank_type eq 'k' ) {
12609 # keywords like 'unless', 'if', etc, within statements
12611 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
12612 $bond_str = VERY_WEAK / 1.05;
12616 # try not to break before a comma-arrow
12617 elsif ( $next_nonblank_type eq '=>' ) {
12618 if ( $bond_str < STRONG ) { $bond_str = STRONG }
12621 #----------------------------------------------------------------------
12622 # only set NO_BREAK's from here on
12623 #----------------------------------------------------------------------
12624 if ( $type eq 'C' or $type eq 'U' ) {
12626 # use strict requires that bare word and => not be separated
12627 if ( $next_nonblank_type eq '=>' ) {
12628 $bond_str = NO_BREAK;
12633 # use strict requires that bare word within braces not start new line
12634 elsif ( $type eq 'L' ) {
12636 if ( $next_nonblank_type eq 'w' ) {
12637 $bond_str = NO_BREAK;
12641 # in older version of perl, use strict can cause problems with
12642 # breaks before bare words following opening parens. For example,
12643 # this will fail under older versions if a break is made between
12646 # open( MAIL, "a long filename or command");
12648 elsif ( $type eq '{' ) {
12650 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
12652 # but it's fine to break if the word is followed by a '=>'
12653 # or if it is obviously a sub call
12654 my $i_next_next_nonblank = $i_next_nonblank + 1;
12655 my $next_next_type = $types_to_go[$i_next_next_nonblank];
12656 if ( $next_next_type eq 'b'
12657 && $i_next_nonblank < $max_index_to_go )
12659 $i_next_next_nonblank++;
12660 $next_next_type = $types_to_go[$i_next_next_nonblank];
12663 ##if ( $next_next_type ne '=>' ) {
12664 # these are ok: '->xxx', '=>', '('
12666 # We'll check for an old breakpoint and keep a leading
12667 # bareword if it was that way in the input file.
12668 # Presumably it was ok that way. For example, the
12669 # following would remain unchanged:
12672 # January, February, March, April,
12673 # May, June, July, August,
12674 # September, October, November, December,
12677 # This should be sufficient:
12678 if ( !$old_breakpoint_to_go[$i]
12679 && ( $next_next_type eq ',' || $next_next_type eq '}' )
12682 $bond_str = NO_BREAK;
12687 elsif ( $type eq 'w' ) {
12689 if ( $next_nonblank_type eq 'R' ) {
12690 $bond_str = NO_BREAK;
12693 # use strict requires that bare word and => not be separated
12694 if ( $next_nonblank_type eq '=>' ) {
12695 $bond_str = NO_BREAK;
12699 # in fact, use strict hates bare words on any new line. For
12700 # example, a break before the underscore here provokes the
12701 # wrath of use strict:
12702 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
12703 elsif ( $type eq 'F' ) {
12704 $bond_str = NO_BREAK;
12707 # use strict does not allow separating type info from trailing { }
12708 # testfile is readmail.pl
12709 elsif ( $type eq 't' or $type eq 'i' ) {
12711 if ( $next_nonblank_type eq 'L' ) {
12712 $bond_str = NO_BREAK;
12716 # Do not break between a possible filehandle and a ? or / and do
12717 # not introduce a break after it if there is no blank
12719 elsif ( $type eq 'Z' ) {
12724 # if there is no blank and we do not want one. Examples:
12725 # print $x++ # do not break after $x
12726 # print HTML"HELLO" # break ok after HTML
12729 && defined( $want_left_space{$next_type} )
12730 && $want_left_space{$next_type} == WS_NO
12733 # or we might be followed by the start of a quote
12734 || $next_nonblank_type =~ /^[\/\?]$/
12737 $bond_str = NO_BREAK;
12741 # Do not break before a possible file handle
12742 if ( $next_nonblank_type eq 'Z' ) {
12743 $bond_str = NO_BREAK;
12746 # As a defensive measure, do not break between a '(' and a
12747 # filehandle. In some cases, this can cause an error. For
12748 # example, the following program works:
12755 # But this program fails:
12763 # This is normally only a problem with the 'extrude' option
12764 if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
12765 $bond_str = NO_BREAK;
12768 # patch to put cuddled elses back together when on multiple
12769 # lines, as in: } \n else \n { \n
12770 if ($rOpts_cuddled_else) {
12772 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
12773 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
12775 $bond_str = NO_BREAK;
12779 # keep '}' together with ';'
12780 if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
12781 $bond_str = NO_BREAK;
12784 # never break between sub name and opening paren
12785 if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
12786 $bond_str = NO_BREAK;
12789 #---------------------------------------------------------------
12791 # now take nesting depth into account
12792 #---------------------------------------------------------------
12793 # final strength incorporates the bond strength and nesting depth
12796 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
12797 if ( $total_nesting_depth > 0 ) {
12798 $strength = $bond_str + $total_nesting_depth;
12801 $strength = $bond_str;
12805 $strength = NO_BREAK;
12808 # always break after side comment
12809 if ( $type eq '#' ) { $strength = 0 }
12811 $bond_strength_to_go[$i] = $strength;
12813 FORMATTER_DEBUG_FLAG_BOND && do {
12814 my $str = substr( $token, 0, 15 );
12815 $str .= ' ' x ( 16 - length($str) );
12817 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
12824 sub pad_array_to_go {
12826 # to simplify coding in scan_list and set_bond_strengths, it helps
12827 # to create some extra blank tokens at the end of the arrays
12828 $tokens_to_go[ $max_index_to_go + 1 ] = '';
12829 $tokens_to_go[ $max_index_to_go + 2 ] = '';
12830 $types_to_go[ $max_index_to_go + 1 ] = 'b';
12831 $types_to_go[ $max_index_to_go + 2 ] = 'b';
12832 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
12833 $nesting_depth_to_go[$max_index_to_go];
12836 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
12837 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
12839 # shouldn't happen:
12840 unless ( get_saw_brace_error() ) {
12842 "Program bug in scan_list: hit nesting error which should have been caught\n"
12844 report_definite_bug();
12848 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
12853 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
12854 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
12858 { # begin scan_list
12861 $block_type, $current_depth,
12863 $i_last_nonblank_token, $last_colon_sequence_number,
12864 $last_nonblank_token, $last_nonblank_type,
12865 $last_old_breakpoint_count, $minimum_depth,
12866 $next_nonblank_block_type, $next_nonblank_token,
12867 $next_nonblank_type, $old_breakpoint_count,
12868 $starting_breakpoint_count, $starting_depth,
12874 @breakpoint_stack, @breakpoint_undo_stack,
12875 @comma_index, @container_type,
12876 @identifier_count_stack, @index_before_arrow,
12877 @interrupted_list, @item_count_stack,
12878 @last_comma_index, @last_dot_index,
12879 @last_nonblank_type, @old_breakpoint_count_stack,
12880 @opening_structure_index_stack, @rfor_semicolon_list,
12881 @has_old_logical_breakpoints, @rand_or_list,
12885 # routine to define essential variables when we go 'up' to
12887 sub check_for_new_minimum_depth {
12889 if ( $depth < $minimum_depth ) {
12891 $minimum_depth = $depth;
12893 # these arrays need not retain values between calls
12894 $breakpoint_stack[$depth] = $starting_breakpoint_count;
12895 $container_type[$depth] = "";
12896 $identifier_count_stack[$depth] = 0;
12897 $index_before_arrow[$depth] = -1;
12898 $interrupted_list[$depth] = 1;
12899 $item_count_stack[$depth] = 0;
12900 $last_nonblank_type[$depth] = "";
12901 $opening_structure_index_stack[$depth] = -1;
12903 $breakpoint_undo_stack[$depth] = undef;
12904 $comma_index[$depth] = undef;
12905 $last_comma_index[$depth] = undef;
12906 $last_dot_index[$depth] = undef;
12907 $old_breakpoint_count_stack[$depth] = undef;
12908 $has_old_logical_breakpoints[$depth] = 0;
12909 $rand_or_list[$depth] = [];
12910 $rfor_semicolon_list[$depth] = [];
12911 $i_equals[$depth] = -1;
12913 # these arrays must retain values between calls
12914 if ( !defined( $has_broken_sublist[$depth] ) ) {
12915 $dont_align[$depth] = 0;
12916 $has_broken_sublist[$depth] = 0;
12917 $want_comma_break[$depth] = 0;
12922 # routine to decide which commas to break at within a container;
12924 # $bp_count = number of comma breakpoints set
12925 # $do_not_break_apart = a flag indicating if container need not
12927 sub set_comma_breakpoints {
12931 my $do_not_break_apart = 0;
12932 if ( $item_count_stack[$dd] && !$dont_align[$dd] ) {
12934 my $fbc = $forced_breakpoint_count;
12936 # always open comma lists not preceded by keywords,
12937 # barewords, identifiers (that is, anything that doesn't
12938 # look like a function call)
12939 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
12941 set_comma_breakpoints_do(
12943 $opening_structure_index_stack[$dd],
12945 $item_count_stack[$dd],
12946 $identifier_count_stack[$dd],
12948 $next_nonblank_type,
12949 $container_type[$dd],
12950 $interrupted_list[$dd],
12951 \$do_not_break_apart,
12954 $bp_count = $forced_breakpoint_count - $fbc;
12955 $do_not_break_apart = 0 if $must_break_open;
12957 return ( $bp_count, $do_not_break_apart );
12960 my %is_logical_container;
12963 @_ = qw# if elsif unless while and or err not && | || ? : ! #;
12964 @is_logical_container{@_} = (1) x scalar(@_);
12967 sub set_for_semicolon_breakpoints {
12969 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
12970 set_forced_breakpoint($_);
12974 sub set_logical_breakpoints {
12977 $item_count_stack[$dd] == 0
12978 && $is_logical_container{ $container_type[$dd] }
12981 || $has_old_logical_breakpoints[$dd]
12985 # Look for breaks in this order:
12988 foreach my $i ( 0 .. 3 ) {
12989 if ( $rand_or_list[$dd][$i] ) {
12990 foreach ( @{ $rand_or_list[$dd][$i] } ) {
12991 set_forced_breakpoint($_);
12994 # break at any 'if' and 'unless' too
12995 foreach ( @{ $rand_or_list[$dd][4] } ) {
12996 set_forced_breakpoint($_);
12998 $rand_or_list[$dd] = [];
13005 sub is_unbreakable_container {
13007 # never break a container of one of these types
13008 # because bad things can happen (map1.t)
13010 $is_sort_map_grep{ $container_type[$dd] };
13015 # This routine is responsible for setting line breaks for all lists,
13016 # so that hierarchical structure can be displayed and so that list
13017 # items can be vertically aligned. The output of this routine is
13018 # stored in the array @forced_breakpoint_to_go, which is used to set
13019 # final breakpoints.
13021 $starting_depth = $nesting_depth_to_go[0];
13024 $current_depth = $starting_depth;
13026 $last_colon_sequence_number = -1;
13027 $last_nonblank_token = ';';
13028 $last_nonblank_type = ';';
13029 $last_old_breakpoint_count = 0;
13030 $minimum_depth = $current_depth + 1; # forces update in check below
13031 $old_breakpoint_count = 0;
13032 $starting_breakpoint_count = $forced_breakpoint_count;
13035 $type_sequence = '';
13037 check_for_new_minimum_depth($current_depth);
13039 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
13040 my $want_previous_breakpoint = -1;
13042 my $saw_good_breakpoint;
13043 my $i_line_end = -1;
13044 my $i_line_start = -1;
13046 # loop over all tokens in this batch
13047 while ( ++$i <= $max_index_to_go ) {
13048 if ( $type ne 'b' ) {
13049 $i_last_nonblank_token = $i - 1;
13050 $last_nonblank_type = $type;
13051 $last_nonblank_token = $token;
13053 $type = $types_to_go[$i];
13054 $block_type = $block_type_to_go[$i];
13055 $token = $tokens_to_go[$i];
13056 $type_sequence = $type_sequence_to_go[$i];
13057 my $next_type = $types_to_go[ $i + 1 ];
13058 my $next_token = $tokens_to_go[ $i + 1 ];
13059 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13060 $next_nonblank_type = $types_to_go[$i_next_nonblank];
13061 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
13062 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
13064 # set break if flag was set
13065 if ( $want_previous_breakpoint >= 0 ) {
13066 set_forced_breakpoint($want_previous_breakpoint);
13067 $want_previous_breakpoint = -1;
13070 $last_old_breakpoint_count = $old_breakpoint_count;
13071 if ( $old_breakpoint_to_go[$i] ) {
13073 $i_line_start = $i_next_nonblank;
13075 $old_breakpoint_count++;
13077 # Break before certain keywords if user broke there and
13078 # this is a 'safe' break point. The idea is to retain
13079 # any preferred breaks for sequential list operations,
13080 # like a schwartzian transform.
13081 if ($rOpts_break_at_old_keyword_breakpoints) {
13083 $next_nonblank_type eq 'k'
13084 && $is_keyword_returning_list{$next_nonblank_token}
13085 && ( $type =~ /^[=\)\]\}Riw]$/
13087 && $is_keyword_returning_list{$token} )
13091 # we actually have to set this break next time through
13092 # the loop because if we are at a closing token (such
13093 # as '}') which forms a one-line block, this break might
13095 $want_previous_breakpoint = $i;
13099 next if ( $type eq 'b' );
13100 $depth = $nesting_depth_to_go[ $i + 1 ];
13102 # safety check - be sure we always break after a comment
13103 # Shouldn't happen .. an error here probably means that the
13104 # nobreak flag did not get turned off correctly during
13106 if ( $type eq '#' ) {
13107 if ( $i != $max_index_to_go ) {
13109 "Non-fatal program bug: backup logic needed to break after a comment\n"
13111 report_definite_bug();
13112 $nobreak_to_go[$i] = 0;
13113 set_forced_breakpoint($i);
13117 # Force breakpoints at certain tokens in long lines.
13118 # Note that such breakpoints will be undone later if these tokens
13119 # are fully contained within parens on a line.
13123 && $token =~ /^(if|unless)$/
13127 # or container is broken (by side-comment, etc)
13128 || ( $next_nonblank_token eq '('
13129 && $mate_index_to_go[$i_next_nonblank] < $i )
13133 set_forced_breakpoint( $i - 1 );
13136 # remember locations of '||' and '&&' for possible breaks if we
13137 # decide this is a long logical expression.
13138 if ( $type eq '||' ) {
13139 push @{ $rand_or_list[$depth][2] }, $i;
13140 ++$has_old_logical_breakpoints[$depth]
13141 if ( ( $i == $i_line_start || $i == $i_line_end )
13142 && $rOpts_break_at_old_logical_breakpoints );
13144 elsif ( $type eq '&&' ) {
13145 push @{ $rand_or_list[$depth][3] }, $i;
13146 ++$has_old_logical_breakpoints[$depth]
13147 if ( ( $i == $i_line_start || $i == $i_line_end )
13148 && $rOpts_break_at_old_logical_breakpoints );
13150 elsif ( $type eq 'f' ) {
13151 push @{ $rfor_semicolon_list[$depth] }, $i;
13153 elsif ( $type eq 'k' ) {
13154 if ( $token eq 'and' ) {
13155 push @{ $rand_or_list[$depth][1] }, $i;
13156 ++$has_old_logical_breakpoints[$depth]
13157 if ( ( $i == $i_line_start || $i == $i_line_end )
13158 && $rOpts_break_at_old_logical_breakpoints );
13161 # break immediately at 'or's which are probably not in a logical
13162 # block -- but we will break in logical breaks below so that
13163 # they do not add to the forced_breakpoint_count
13164 elsif ( $token eq 'or' ) {
13165 push @{ $rand_or_list[$depth][0] }, $i;
13166 ++$has_old_logical_breakpoints[$depth]
13167 if ( ( $i == $i_line_start || $i == $i_line_end )
13168 && $rOpts_break_at_old_logical_breakpoints );
13169 if ( $is_logical_container{ $container_type[$depth] } ) {
13172 if ($is_long_line) { set_forced_breakpoint($i) }
13173 elsif ( ( $i == $i_line_start || $i == $i_line_end )
13174 && $rOpts_break_at_old_logical_breakpoints )
13176 $saw_good_breakpoint = 1;
13180 elsif ( $token eq 'if' || $token eq 'unless' ) {
13181 push @{ $rand_or_list[$depth][4] }, $i;
13182 if ( ( $i == $i_line_start || $i == $i_line_end )
13183 && $rOpts_break_at_old_logical_breakpoints )
13185 set_forced_breakpoint($i);
13189 elsif ( $is_assignment{$type} ) {
13190 $i_equals[$depth] = $i;
13193 if ($type_sequence) {
13195 # handle any postponed closing breakpoints
13196 if ( $token =~ /^[\)\]\}\:]$/ ) {
13197 if ( $type eq ':' ) {
13198 $last_colon_sequence_number = $type_sequence;
13200 # TESTING: retain break at a ':' line break
13201 if ( ( $i == $i_line_start || $i == $i_line_end )
13202 && $rOpts_break_at_old_ternary_breakpoints )
13206 set_forced_breakpoint($i);
13208 # break at previous '='
13209 if ( $i_equals[$depth] > 0 ) {
13210 set_forced_breakpoint( $i_equals[$depth] );
13211 $i_equals[$depth] = -1;
13215 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
13216 my $inc = ( $type eq ':' ) ? 0 : 1;
13217 set_forced_breakpoint( $i - $inc );
13218 delete $postponed_breakpoint{$type_sequence};
13222 # set breaks at ?/: if they will get separated (and are
13223 # not a ?/: chain), or if the '?' is at the end of the
13225 elsif ( $token eq '?' ) {
13226 my $i_colon = $mate_index_to_go[$i];
13228 $i_colon <= 0 # the ':' is not in this batch
13229 || $i == 0 # this '?' is the first token of the line
13231 $max_index_to_go # or this '?' is the last token
13235 # don't break at a '?' if preceded by ':' on
13236 # this line of previous ?/: pair on this line.
13237 # This is an attempt to preserve a chain of ?/:
13238 # expressions (elsif2.t). And don't break if
13239 # this has a side comment.
13240 set_forced_breakpoint($i)
13242 $type_sequence == (
13243 $last_colon_sequence_number +
13244 TYPE_SEQUENCE_INCREMENT
13246 || $tokens_to_go[$max_index_to_go] eq '#'
13248 set_closing_breakpoint($i);
13253 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
13255 #------------------------------------------------------------
13256 # Handle Increasing Depth..
13258 # prepare for a new list when depth increases
13259 # token $i is a '(','{', or '['
13260 #------------------------------------------------------------
13261 if ( $depth > $current_depth ) {
13263 $breakpoint_stack[$depth] = $forced_breakpoint_count;
13264 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
13265 $has_broken_sublist[$depth] = 0;
13266 $identifier_count_stack[$depth] = 0;
13267 $index_before_arrow[$depth] = -1;
13268 $interrupted_list[$depth] = 0;
13269 $item_count_stack[$depth] = 0;
13270 $last_comma_index[$depth] = undef;
13271 $last_dot_index[$depth] = undef;
13272 $last_nonblank_type[$depth] = $last_nonblank_type;
13273 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
13274 $opening_structure_index_stack[$depth] = $i;
13275 $rand_or_list[$depth] = [];
13276 $rfor_semicolon_list[$depth] = [];
13277 $i_equals[$depth] = -1;
13278 $want_comma_break[$depth] = 0;
13279 $container_type[$depth] =
13280 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
13281 ? $last_nonblank_token
13283 $has_old_logical_breakpoints[$depth] = 0;
13285 # if line ends here then signal closing token to break
13286 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
13288 set_closing_breakpoint($i);
13291 # Not all lists of values should be vertically aligned..
13292 $dont_align[$depth] =
13294 # code BLOCKS are handled at a higher level
13295 ( $block_type ne "" )
13297 # certain paren lists
13298 || ( $type eq '(' ) && (
13300 # it does not usually look good to align a list of
13301 # identifiers in a parameter list, as in:
13302 # my($var1, $var2, ...)
13303 # (This test should probably be refined, for now I'm just
13304 # testing for any keyword)
13305 ( $last_nonblank_type eq 'k' )
13307 # a trailing '(' usually indicates a non-list
13308 || ( $next_nonblank_type eq '(' )
13311 # patch to outdent opening brace of long if/for/..
13312 # statements (like this one). See similar coding in
13313 # set_continuation breaks. We have also catch it here for
13314 # short line fragments which otherwise will not go through
13315 # set_continuation_breaks.
13319 # if we have the ')' but not its '(' in this batch..
13320 && ( $last_nonblank_token eq ')' )
13321 && $mate_index_to_go[$i_last_nonblank_token] < 0
13323 # and user wants brace to left
13324 && !$rOpts->{'opening-brace-always-on-right'}
13326 && ( $type eq '{' ) # should be true
13327 && ( $token eq '{' ) # should be true
13330 set_forced_breakpoint( $i - 1 );
13334 #------------------------------------------------------------
13335 # Handle Decreasing Depth..
13337 # finish off any old list when depth decreases
13338 # token $i is a ')','}', or ']'
13339 #------------------------------------------------------------
13340 elsif ( $depth < $current_depth ) {
13342 check_for_new_minimum_depth($depth);
13344 # force all outer logical containers to break after we see on
13346 $has_old_logical_breakpoints[$depth] ||=
13347 $has_old_logical_breakpoints[$current_depth];
13349 # Patch to break between ') {' if the paren list is broken.
13350 # There is similar logic in set_continuation_breaks for
13351 # non-broken lists.
13353 && $next_nonblank_block_type
13354 && $interrupted_list[$current_depth]
13355 && $next_nonblank_type eq '{'
13356 && !$rOpts->{'opening-brace-always-on-right'} )
13358 set_forced_breakpoint($i);
13361 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
13363 # set breaks at commas if necessary
13364 my ( $bp_count, $do_not_break_apart ) =
13365 set_comma_breakpoints($current_depth);
13367 my $i_opening = $opening_structure_index_stack[$current_depth];
13368 my $saw_opening_structure = ( $i_opening >= 0 );
13370 # this term is long if we had to break at interior commas..
13371 my $is_long_term = $bp_count > 0;
13373 # ..or if the length between opening and closing parens exceeds
13374 # allowed line length
13375 if ( !$is_long_term && $saw_opening_structure ) {
13376 my $i_opening_minus = find_token_starting_list($i_opening);
13378 # Note: we have to allow for one extra space after a
13379 # closing token so that we do not strand a comma or
13380 # semicolon, hence the '>=' here (oneline.t)
13382 excess_line_length( $i_opening_minus, $i ) >= 0;
13385 # We've set breaks after all comma-arrows. Now we have to
13386 # undo them if this can be a one-line block
13387 # (the only breakpoints set will be due to comma-arrows)
13390 # user doesn't require breaking after all comma-arrows
13391 ( $rOpts_comma_arrow_breakpoints != 0 )
13393 # and if the opening structure is in this batch
13394 && $saw_opening_structure
13396 # and either on the same old line
13398 $old_breakpoint_count_stack[$current_depth] ==
13399 $last_old_breakpoint_count
13401 # or user wants to form long blocks with arrows
13402 || $rOpts_comma_arrow_breakpoints == 2
13405 # and we made some breakpoints between the opening and closing
13406 && ( $breakpoint_undo_stack[$current_depth] <
13407 $forced_breakpoint_undo_count )
13409 # and this block is short enough to fit on one line
13410 # Note: use < because need 1 more space for possible comma
13415 undo_forced_breakpoint_stack(
13416 $breakpoint_undo_stack[$current_depth] );
13419 # now see if we have any comma breakpoints left
13420 my $has_comma_breakpoints =
13421 ( $breakpoint_stack[$current_depth] !=
13422 $forced_breakpoint_count );
13424 # update broken-sublist flag of the outer container
13425 $has_broken_sublist[$depth] = $has_broken_sublist[$depth]
13426 || $has_broken_sublist[$current_depth]
13428 || $has_comma_breakpoints;
13430 # Having come to the closing ')', '}', or ']', now we have to decide if we
13431 # should 'open up' the structure by placing breaks at the opening and
13432 # closing containers. This is a tricky decision. Here are some of the
13433 # basic considerations:
13435 # -If this is a BLOCK container, then any breakpoints will have already
13436 # been set (and according to user preferences), so we need do nothing here.
13438 # -If we have a comma-separated list for which we can align the list items,
13439 # then we need to do so because otherwise the vertical aligner cannot
13440 # currently do the alignment.
13442 # -If this container does itself contain a container which has been broken
13443 # open, then it should be broken open to properly show the structure.
13445 # -If there is nothing to align, and no other reason to break apart,
13446 # then do not do it.
13448 # We will not break open the parens of a long but 'simple' logical expression.
13451 # This is an example of a simple logical expression and its formatting:
13453 # if ( $bigwasteofspace1 && $bigwasteofspace2
13454 # || $bigwasteofspace3 && $bigwasteofspace4 )
13456 # Most people would prefer this than the 'spacey' version:
13459 # $bigwasteofspace1 && $bigwasteofspace2
13460 # || $bigwasteofspace3 && $bigwasteofspace4
13463 # To illustrate the rules for breaking logical expressions, consider:
13467 # and ( exists $ids_excl_uc{$id_uc}
13468 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
13470 # This is on the verge of being difficult to read. The current default is to
13471 # open it up like this:
13476 # and ( exists $ids_excl_uc{$id_uc}
13477 # or grep $id_uc =~ /$_/, @ids_excl_uc )
13480 # This is a compromise which tries to avoid being too dense and to spacey.
13481 # A more spaced version would be:
13487 # exists $ids_excl_uc{$id_uc}
13488 # or grep $id_uc =~ /$_/, @ids_excl_uc
13492 # Some people might prefer the spacey version -- an option could be added. The
13493 # innermost expression contains a long block '( exists $ids_... ')'.
13495 # Here is how the logic goes: We will force a break at the 'or' that the
13496 # innermost expression contains, but we will not break apart its opening and
13497 # closing containers because (1) it contains no multi-line sub-containers itself,
13498 # and (2) there is no alignment to be gained by breaking it open like this
13501 # exists $ids_excl_uc{$id_uc}
13502 # or grep $id_uc =~ /$_/, @ids_excl_uc
13505 # (although this looks perfectly ok and might be good for long expressions). The
13506 # outer 'if' container, though, contains a broken sub-container, so it will be
13507 # broken open to avoid too much density. Also, since it contains no 'or's, there
13508 # will be a forced break at its 'and'.
13510 # set some flags telling something about this container..
13511 my $is_simple_logical_expression = 0;
13512 if ( $item_count_stack[$current_depth] == 0
13513 && $saw_opening_structure
13514 && $tokens_to_go[$i_opening] eq '('
13515 && $is_logical_container{ $container_type[$current_depth] }
13519 # This seems to be a simple logical expression with
13520 # no existing breakpoints. Set a flag to prevent
13522 if ( !$has_comma_breakpoints ) {
13523 $is_simple_logical_expression = 1;
13526 # This seems to be a simple logical expression with
13527 # breakpoints (broken sublists, for example). Break
13528 # at all 'or's and '||'s.
13530 set_logical_breakpoints($current_depth);
13535 && @{ $rfor_semicolon_list[$current_depth] } )
13537 set_for_semicolon_breakpoints($current_depth);
13539 # open up a long 'for' or 'foreach' container to allow
13540 # leading term alignment unless -lp is used.
13541 $has_comma_breakpoints = 1
13542 unless $rOpts_line_up_parentheses;
13547 # breaks for code BLOCKS are handled at a higher level
13550 # we do not need to break at the top level of an 'if'
13552 && !$is_simple_logical_expression
13554 ## modification to keep ': (' containers vertically tight;
13555 ## but probably better to let user set -vt=1 to avoid
13556 ## inconsistency with other paren types
13557 ## && ($container_type[$current_depth] ne ':')
13559 # otherwise, we require one of these reasons for breaking:
13562 # - this term has forced line breaks
13563 $has_comma_breakpoints
13565 # - the opening container is separated from this batch
13566 # for some reason (comment, blank line, code block)
13567 # - this is a non-paren container spanning multiple lines
13568 || !$saw_opening_structure
13570 # - this is a long block contained in another breakable
13573 && $container_environment_to_go[$i_opening] ne
13579 # For -lp option, we must put a breakpoint before
13580 # the token which has been identified as starting
13581 # this indentation level. This is necessary for
13582 # proper alignment.
13583 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
13585 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
13586 if ( $i_opening + 1 < $max_index_to_go
13587 && $types_to_go[ $i_opening + 1 ] eq 'b' )
13589 $item = $leading_spaces_to_go[ $i_opening + 2 ];
13591 if ( defined($item) ) {
13592 my $i_start_2 = $item->get_STARTING_INDEX();
13594 defined($i_start_2)
13596 # we are breaking after an opening brace, paren,
13597 # so don't break before it too
13598 && $i_start_2 ne $i_opening
13602 # Only break for breakpoints at the same
13603 # indentation level as the opening paren
13604 my $test1 = $nesting_depth_to_go[$i_opening];
13605 my $test2 = $nesting_depth_to_go[$i_start_2];
13606 if ( $test2 == $test1 ) {
13607 set_forced_breakpoint( $i_start_2 - 1 );
13613 # break after opening structure.
13614 # note: break before closing structure will be automatic
13615 if ( $minimum_depth <= $current_depth ) {
13617 set_forced_breakpoint($i_opening)
13618 unless ( $do_not_break_apart
13619 || is_unbreakable_container($current_depth) );
13621 # break at '.' of lower depth level before opening token
13622 if ( $last_dot_index[$depth] ) {
13623 set_forced_breakpoint( $last_dot_index[$depth] );
13626 # break before opening structure if preeced by another
13627 # closing structure and a comma. This is normally
13628 # done by the previous closing brace, but not
13629 # if it was a one-line block.
13630 if ( $i_opening > 2 ) {
13632 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
13636 if ( $types_to_go[$i_prev] eq ','
13637 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
13639 set_forced_breakpoint($i_prev);
13642 # also break before something like ':(' or '?('
13645 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
13647 my $token_prev = $tokens_to_go[$i_prev];
13648 if ( $want_break_before{$token_prev} ) {
13649 set_forced_breakpoint($i_prev);
13655 # break after comma following closing structure
13656 if ( $next_type eq ',' ) {
13657 set_forced_breakpoint( $i + 1 );
13660 # break before an '=' following closing structure
13662 $is_assignment{$next_nonblank_type}
13663 && ( $breakpoint_stack[$current_depth] !=
13664 $forced_breakpoint_count )
13667 set_forced_breakpoint($i);
13670 # break at any comma before the opening structure Added
13671 # for -lp, but seems to be good in general. It isn't
13672 # obvious how far back to look; the '5' below seems to
13673 # work well and will catch the comma in something like
13674 # push @list, myfunc( $param, $param, ..
13676 my $icomma = $last_comma_index[$depth];
13677 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
13678 unless ( $forced_breakpoint_to_go[$icomma] ) {
13679 set_forced_breakpoint($icomma);
13682 } # end logic to open up a container
13684 # Break open a logical container open if it was already open
13685 elsif ($is_simple_logical_expression
13686 && $has_old_logical_breakpoints[$current_depth] )
13688 set_logical_breakpoints($current_depth);
13691 # Handle long container which does not get opened up
13692 elsif ($is_long_term) {
13694 # must set fake breakpoint to alert outer containers that
13696 set_fake_breakpoint();
13700 #------------------------------------------------------------
13701 # Handle this token
13702 #------------------------------------------------------------
13704 $current_depth = $depth;
13706 # handle comma-arrow
13707 if ( $type eq '=>' ) {
13708 next if ( $last_nonblank_type eq '=>' );
13709 next if $rOpts_break_at_old_comma_breakpoints;
13710 next if $rOpts_comma_arrow_breakpoints == 3;
13711 $want_comma_break[$depth] = 1;
13712 $index_before_arrow[$depth] = $i_last_nonblank_token;
13716 elsif ( $type eq '.' ) {
13717 $last_dot_index[$depth] = $i;
13720 # Turn off alignment if we are sure that this is not a list
13721 # environment. To be safe, we will do this if we see certain
13722 # non-list tokens, such as ';', and also the environment is
13723 # not a list. Note that '=' could be in any of the = operators
13724 # (lextest.t). We can't just use the reported environment
13725 # because it can be incorrect in some cases.
13726 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
13727 && $container_environment_to_go[$i] ne 'LIST' )
13729 $dont_align[$depth] = 1;
13730 $want_comma_break[$depth] = 0;
13731 $index_before_arrow[$depth] = -1;
13734 # now just handle any commas
13735 next unless ( $type eq ',' );
13737 $last_dot_index[$depth] = undef;
13738 $last_comma_index[$depth] = $i;
13740 # break here if this comma follows a '=>'
13741 # but not if there is a side comment after the comma
13742 if ( $want_comma_break[$depth] ) {
13744 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
13745 $want_comma_break[$depth] = 0;
13746 $index_before_arrow[$depth] = -1;
13750 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
13752 # break before the previous token if it looks safe
13753 # Example of something that we will not try to break before:
13754 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
13755 my $ibreak = $index_before_arrow[$depth] - 1;
13757 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
13759 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
13760 if ( $types_to_go[$ibreak] =~ /^[,b\(\{\[]$/ ) {
13761 set_forced_breakpoint($ibreak);
13765 $want_comma_break[$depth] = 0;
13766 $index_before_arrow[$depth] = -1;
13768 # handle list which mixes '=>'s and ','s:
13769 # treat any list items so far as an interrupted list
13770 $interrupted_list[$depth] = 1;
13774 # skip past these commas if we are not supposed to format them
13775 next if ( $dont_align[$depth] );
13777 # break after all commas above starting depth
13778 if ( $depth < $starting_depth ) {
13779 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
13783 # add this comma to the list..
13784 my $item_count = $item_count_stack[$depth];
13785 if ( $item_count == 0 ) {
13787 # but do not form a list with no opening structure
13790 # open INFILE_COPY, ">$input_file_copy"
13791 # or die ("very long message");
13793 if ( ( $opening_structure_index_stack[$depth] < 0 )
13794 && $container_environment_to_go[$i] eq 'BLOCK' )
13796 $dont_align[$depth] = 1;
13801 $comma_index[$depth][$item_count] = $i;
13802 ++$item_count_stack[$depth];
13803 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
13804 $identifier_count_stack[$depth]++;
13808 #-------------------------------------------
13809 # end of loop over all tokens in this batch
13810 #-------------------------------------------
13812 # set breaks for any unfinished lists ..
13813 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
13815 $interrupted_list[$dd] = 1;
13816 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
13817 set_comma_breakpoints($dd);
13818 set_logical_breakpoints($dd)
13819 if ( $has_old_logical_breakpoints[$dd] );
13820 set_for_semicolon_breakpoints($dd);
13822 # break open container...
13823 my $i_opening = $opening_structure_index_stack[$dd];
13824 set_forced_breakpoint($i_opening)
13826 is_unbreakable_container($dd)
13828 # Avoid a break which would place an isolated ' or "
13831 && $i_opening >= $max_index_to_go - 2
13832 && $token =~ /^['"]$/ )
13836 # Return a flag indicating if the input file had some good breakpoints.
13837 # This flag will be used to force a break in a line shorter than the
13838 # allowed line length.
13839 if ( $has_old_logical_breakpoints[$current_depth] ) {
13840 $saw_good_breakpoint = 1;
13842 return $saw_good_breakpoint;
13846 sub find_token_starting_list {
13848 # When testing to see if a block will fit on one line, some
13849 # previous token(s) may also need to be on the line; particularly
13850 # if this is a sub call. So we will look back at least one
13851 # token. NOTE: This isn't perfect, but not critical, because
13852 # if we mis-identify a block, it will be wrapped and therefore
13853 # fixed the next time it is formatted.
13854 my $i_opening_paren = shift;
13855 my $i_opening_minus = $i_opening_paren;
13856 my $im1 = $i_opening_paren - 1;
13857 my $im2 = $i_opening_paren - 2;
13858 my $im3 = $i_opening_paren - 3;
13859 my $typem1 = $types_to_go[$im1];
13860 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
13861 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
13862 $i_opening_minus = $i_opening_paren;
13864 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
13865 $i_opening_minus = $im1 if $im1 >= 0;
13867 # walk back to improve length estimate
13868 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
13869 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
13870 $i_opening_minus = $j;
13872 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
13874 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
13875 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
13876 $i_opening_minus = $im2;
13878 return $i_opening_minus;
13881 { # begin set_comma_breakpoints_do
13883 my %is_keyword_with_special_leading_term;
13887 # These keywords have prototypes which allow a special leading item
13888 # followed by a list
13890 qw(formline grep kill map printf sprintf push chmod join pack unshift);
13891 @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
13894 sub set_comma_breakpoints_do {
13896 # Given a list with some commas, set breakpoints at some of the
13897 # commas, if necessary, to make it easy to read. This list is
13900 $depth, $i_opening_paren, $i_closing_paren,
13901 $item_count, $identifier_count, $rcomma_index,
13902 $next_nonblank_type, $list_type, $interrupted,
13903 $rdo_not_break_apart, $must_break_open,
13906 # nothing to do if no commas seen
13907 return if ( $item_count < 1 );
13908 my $i_first_comma = $$rcomma_index[0];
13909 my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
13910 my $i_last_comma = $i_true_last_comma;
13911 if ( $i_last_comma >= $max_index_to_go ) {
13912 $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
13913 return if ( $item_count < 1 );
13916 #---------------------------------------------------------------
13917 # find lengths of all items in the list to calculate page layout
13918 #---------------------------------------------------------------
13919 my $comma_count = $item_count;
13925 my @max_length = ( 0, 0 );
13926 my $first_term_length;
13927 my $i = $i_opening_paren;
13930 for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
13931 $is_odd = 1 - $is_odd;
13932 $i_prev_plus = $i + 1;
13933 $i = $$rcomma_index[$j];
13936 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
13938 ( $types_to_go[$i_prev_plus] eq 'b' )
13941 push @i_term_begin, $i_term_begin;
13942 push @i_term_end, $i_term_end;
13943 push @i_term_comma, $i;
13945 # note: currently adding 2 to all lengths (for comma and space)
13947 2 + token_sequence_length( $i_term_begin, $i_term_end );
13948 push @item_lengths, $length;
13951 $first_term_length = $length;
13955 if ( $length > $max_length[$is_odd] ) {
13956 $max_length[$is_odd] = $length;
13961 # now we have to make a distinction between the comma count and item
13962 # count, because the item count will be one greater than the comma
13963 # count if the last item is not terminated with a comma
13965 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
13966 ? $i_last_comma + 1
13969 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
13970 ? $i_closing_paren - 2
13971 : $i_closing_paren - 1;
13972 my $i_effective_last_comma = $i_last_comma;
13974 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
13976 if ( $last_item_length > 0 ) {
13978 # add 2 to length because other lengths include a comma and a blank
13979 $last_item_length += 2;
13980 push @item_lengths, $last_item_length;
13981 push @i_term_begin, $i_b + 1;
13982 push @i_term_end, $i_e;
13983 push @i_term_comma, undef;
13985 my $i_odd = $item_count % 2;
13987 if ( $last_item_length > $max_length[$i_odd] ) {
13988 $max_length[$i_odd] = $last_item_length;
13992 $i_effective_last_comma = $i_e + 1;
13994 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
13995 $identifier_count++;
13999 #---------------------------------------------------------------
14000 # End of length calculations
14001 #---------------------------------------------------------------
14003 #---------------------------------------------------------------
14004 # Compound List Rule 1:
14005 # Break at (almost) every comma for a list containing a broken
14006 # sublist. This has higher priority than the Interrupted List
14008 #---------------------------------------------------------------
14009 if ( $has_broken_sublist[$depth] ) {
14011 # Break at every comma except for a comma between two
14012 # simple, small terms. This prevents long vertical
14013 # columns of, say, just 0's.
14014 my $small_length = 10; # 2 + actual maximum length wanted
14016 # We'll insert a break in long runs of small terms to
14017 # allow alignment in uniform tables.
14018 my $skipped_count = 0;
14019 my $columns = table_columns_available($i_first_comma);
14020 my $fields = int( $columns / $small_length );
14021 if ( $rOpts_maximum_fields_per_table
14022 && $fields > $rOpts_maximum_fields_per_table )
14024 $fields = $rOpts_maximum_fields_per_table;
14026 my $max_skipped_count = $fields - 1;
14028 my $is_simple_last_term = 0;
14029 my $is_simple_next_term = 0;
14030 foreach my $j ( 0 .. $item_count ) {
14031 $is_simple_last_term = $is_simple_next_term;
14032 $is_simple_next_term = 0;
14033 if ( $j < $item_count
14034 && $i_term_end[$j] == $i_term_begin[$j]
14035 && $item_lengths[$j] <= $small_length )
14037 $is_simple_next_term = 1;
14040 if ( $is_simple_last_term
14041 && $is_simple_next_term
14042 && $skipped_count < $max_skipped_count )
14047 $skipped_count = 0;
14048 my $i = $i_term_comma[ $j - 1 ];
14049 last unless defined $i;
14050 set_forced_breakpoint($i);
14054 # always break at the last comma if this list is
14055 # interrupted; we wouldn't want to leave a terminal '{', for
14057 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
14061 #my ( $a, $b, $c ) = caller();
14062 #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
14063 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
14064 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
14066 #---------------------------------------------------------------
14067 # Interrupted List Rule:
14068 # A list is is forced to use old breakpoints if it was interrupted
14069 # by side comments or blank lines, or requested by user.
14070 #---------------------------------------------------------------
14071 if ( $rOpts_break_at_old_comma_breakpoints
14073 || $i_opening_paren < 0 )
14075 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
14079 #---------------------------------------------------------------
14080 # Looks like a list of items. We have to look at it and size it up.
14081 #---------------------------------------------------------------
14083 my $opening_token = $tokens_to_go[$i_opening_paren];
14084 my $opening_environment =
14085 $container_environment_to_go[$i_opening_paren];
14087 #-------------------------------------------------------------------
14088 # Return if this will fit on one line
14089 #-------------------------------------------------------------------
14091 my $i_opening_minus = find_token_starting_list($i_opening_paren);
14093 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
14095 #-------------------------------------------------------------------
14096 # Now we know that this block spans multiple lines; we have to set
14097 # at least one breakpoint -- real or fake -- as a signal to break
14098 # open any outer containers.
14099 #-------------------------------------------------------------------
14100 set_fake_breakpoint();
14102 # be sure we do not extend beyond the current list length
14103 if ( $i_effective_last_comma >= $max_index_to_go ) {
14104 $i_effective_last_comma = $max_index_to_go - 1;
14107 # Set a flag indicating if we need to break open to keep -lp
14108 # items aligned. This is necessary if any of the list terms
14109 # exceeds the available space after the '('.
14110 my $need_lp_break_open = $must_break_open;
14111 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
14112 my $columns_if_unbroken = $rOpts_maximum_line_length -
14113 total_line_length( $i_opening_minus, $i_opening_paren );
14114 $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken )
14115 || ( $max_length[1] > $columns_if_unbroken )
14116 || ( $first_term_length > $columns_if_unbroken );
14119 # Specify if the list must have an even number of fields or not.
14120 # It is generally safest to assume an even number, because the
14121 # list items might be a hash list. But if we can be sure that
14122 # it is not a hash, then we can allow an odd number for more
14124 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
14126 if ( $identifier_count >= $item_count - 1
14127 || $is_assignment{$next_nonblank_type}
14128 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
14134 # do we have a long first term which should be
14135 # left on a line by itself?
14136 my $use_separate_first_term = (
14137 $odd_or_even == 1 # only if we can use 1 field/line
14138 && $item_count > 3 # need several items
14139 && $first_term_length >
14140 2 * $max_length[0] - 2 # need long first term
14141 && $first_term_length >
14142 2 * $max_length[1] - 2 # need long first term
14145 # or do we know from the type of list that the first term should
14147 if ( !$use_separate_first_term ) {
14148 if ( $is_keyword_with_special_leading_term{$list_type} ) {
14149 $use_separate_first_term = 1;
14151 # should the container be broken open?
14152 if ( $item_count < 3 ) {
14153 if ( $i_first_comma - $i_opening_paren < 4 ) {
14154 $$rdo_not_break_apart = 1;
14157 elsif ($first_term_length < 20
14158 && $i_first_comma - $i_opening_paren < 4 )
14160 my $columns = table_columns_available($i_first_comma);
14161 if ( $first_term_length < $columns ) {
14162 $$rdo_not_break_apart = 1;
14169 if ($use_separate_first_term) {
14171 # ..set a break and update starting values
14172 $use_separate_first_term = 1;
14173 set_forced_breakpoint($i_first_comma);
14174 $i_opening_paren = $i_first_comma;
14175 $i_first_comma = $$rcomma_index[1];
14177 return if $comma_count == 1;
14178 shift @item_lengths;
14179 shift @i_term_begin;
14181 shift @i_term_comma;
14184 # if not, update the metrics to include the first term
14186 if ( $first_term_length > $max_length[0] ) {
14187 $max_length[0] = $first_term_length;
14191 # Field width parameters
14192 my $pair_width = ( $max_length[0] + $max_length[1] );
14194 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
14196 # Number of free columns across the page width for laying out tables
14197 my $columns = table_columns_available($i_first_comma);
14199 # Estimated maximum number of fields which fit this space
14200 # This will be our first guess
14201 my $number_of_fields_max =
14202 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
14204 my $number_of_fields = $number_of_fields_max;
14206 # Find the best-looking number of fields
14207 # and make this our second guess if possible
14208 my ( $number_of_fields_best, $ri_ragged_break_list,
14209 $new_identifier_count )
14210 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
14213 if ( $number_of_fields_best != 0
14214 && $number_of_fields_best < $number_of_fields_max )
14216 $number_of_fields = $number_of_fields_best;
14219 # ----------------------------------------------------------------------
14220 # If we are crowded and the -lp option is being used, try to
14221 # undo some indentation
14222 # ----------------------------------------------------------------------
14224 $rOpts_line_up_parentheses
14226 $number_of_fields == 0
14227 || ( $number_of_fields == 1
14228 && $number_of_fields != $number_of_fields_best )
14232 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
14233 if ( $available_spaces > 0 ) {
14235 my $spaces_wanted = $max_width - $columns; # for 1 field
14237 if ( $number_of_fields_best == 0 ) {
14238 $number_of_fields_best =
14239 get_maximum_fields_wanted( \@item_lengths );
14242 if ( $number_of_fields_best != 1 ) {
14243 my $spaces_wanted_2 =
14244 1 + $pair_width - $columns; # for 2 fields
14245 if ( $available_spaces > $spaces_wanted_2 ) {
14246 $spaces_wanted = $spaces_wanted_2;
14250 if ( $spaces_wanted > 0 ) {
14251 my $deleted_spaces =
14252 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
14255 if ( $deleted_spaces > 0 ) {
14256 $columns = table_columns_available($i_first_comma);
14257 $number_of_fields_max =
14258 maximum_number_of_fields( $columns, $odd_or_even,
14259 $max_width, $pair_width );
14260 $number_of_fields = $number_of_fields_max;
14262 if ( $number_of_fields_best == 1
14263 && $number_of_fields >= 1 )
14265 $number_of_fields = $number_of_fields_best;
14272 # try for one column if two won't work
14273 if ( $number_of_fields <= 0 ) {
14274 $number_of_fields = int( $columns / $max_width );
14277 # The user can place an upper bound on the number of fields,
14278 # which can be useful for doing maintenance on tables
14279 if ( $rOpts_maximum_fields_per_table
14280 && $number_of_fields > $rOpts_maximum_fields_per_table )
14282 $number_of_fields = $rOpts_maximum_fields_per_table;
14285 # How many columns (characters) and lines would this container take
14286 # if no additional whitespace were added?
14287 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
14288 $i_effective_last_comma + 1 );
14289 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
14290 my $packed_lines = 1 + int( $packed_columns / $columns );
14292 # are we an item contained in an outer list?
14293 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
14295 if ( $number_of_fields <= 0 ) {
14297 # #---------------------------------------------------------------
14298 # # We're in trouble. We can't find a single field width that works.
14299 # # There is no simple answer here; we may have a single long list
14301 # #---------------------------------------------------------------
14303 # In many cases, it may be best to not force a break if there is just one
14304 # comma, because the standard continuation break logic will do a better
14307 # In the common case that all but one of the terms can fit
14308 # on a single line, it may look better not to break open the
14309 # containing parens. Consider, for example
14313 # sort { $color_value{$::a} <=> $color_value{$::b}; }
14316 # which will look like this with the container broken:
14320 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
14323 # Here is an example of this rule for a long last term:
14325 # log_message( 0, 256, 128,
14326 # "Number of routes in adj-RIB-in to be considered: $peercount" );
14328 # And here is an example with a long first term:
14331 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
14332 # $r, $pu, $ps, $cu, $cs, $tt
14334 # if $style eq 'all';
14336 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
14337 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
14338 my $long_first_term =
14339 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
14341 # break at every comma ...
14344 # if requested by user or is best looking
14345 $number_of_fields_best == 1
14347 # or if this is a sublist of a larger list
14348 || $in_hierarchical_list
14350 # or if multiple commas and we dont have a long first or last
14352 || ( $comma_count > 1
14353 && !( $long_last_term || $long_first_term ) )
14356 foreach ( 0 .. $comma_count - 1 ) {
14357 set_forced_breakpoint( $$rcomma_index[$_] );
14360 elsif ($long_last_term) {
14362 set_forced_breakpoint($i_last_comma);
14363 $$rdo_not_break_apart = 1 unless $must_break_open;
14365 elsif ($long_first_term) {
14367 set_forced_breakpoint($i_first_comma);
14371 # let breaks be defined by default bond strength logic
14376 # --------------------------------------------------------
14377 # We have a tentative field count that seems to work.
14378 # How many lines will this require?
14379 # --------------------------------------------------------
14380 my $formatted_lines = $item_count / ($number_of_fields);
14381 if ( $formatted_lines != int $formatted_lines ) {
14382 $formatted_lines = 1 + int $formatted_lines;
14385 # So far we've been trying to fill out to the right margin. But
14386 # compact tables are easier to read, so let's see if we can use fewer
14387 # fields without increasing the number of lines.
14388 $number_of_fields =
14389 compactify_table( $item_count, $number_of_fields, $formatted_lines,
14392 # How many spaces across the page will we fill?
14393 my $columns_per_line =
14394 ( int $number_of_fields / 2 ) * $pair_width +
14395 ( $number_of_fields % 2 ) * $max_width;
14397 my $formatted_columns;
14399 if ( $number_of_fields > 1 ) {
14400 $formatted_columns =
14401 ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) *
14405 $formatted_columns = $max_width * $item_count;
14407 if ( $formatted_columns < $packed_columns ) {
14408 $formatted_columns = $packed_columns;
14411 my $unused_columns = $formatted_columns - $packed_columns;
14413 # set some empirical parameters to help decide if we should try to
14414 # align; high sparsity does not look good, especially with few lines
14415 my $sparsity = ($unused_columns) / ($formatted_columns);
14416 my $max_allowed_sparsity =
14417 ( $item_count < 3 ) ? 0.1
14418 : ( $packed_lines == 1 ) ? 0.15
14419 : ( $packed_lines == 2 ) ? 0.4
14422 # Begin check for shortcut methods, which avoid treating a list
14423 # as a table for relatively small parenthesized lists. These
14424 # are usually easier to read if not formatted as tables.
14426 $packed_lines <= 2 # probably can fit in 2 lines
14427 && $item_count < 9 # doesn't have too many items
14428 && $opening_environment eq 'BLOCK' # not a sub-container
14429 && $opening_token eq '(' # is paren list
14433 # Shortcut method 1: for -lp and just one comma:
14434 # This is a no-brainer, just break at the comma.
14436 $rOpts_line_up_parentheses # -lp
14437 && $item_count == 2 # two items, one comma
14438 && !$must_break_open
14441 my $i_break = $$rcomma_index[0];
14442 set_forced_breakpoint($i_break);
14443 $$rdo_not_break_apart = 1;
14444 set_non_alignment_flags( $comma_count, $rcomma_index );
14449 # method 2 is for most small ragged lists which might look
14450 # best if not displayed as a table.
14452 ( $number_of_fields == 2 && $item_count == 3 )
14454 $new_identifier_count > 0 # isn't all quotes
14455 && $sparsity > 0.15
14456 ) # would be fairly spaced gaps if aligned
14461 set_ragged_breakpoints( \@i_term_comma,
14462 $ri_ragged_break_list );
14463 ++$break_count if ($use_separate_first_term);
14465 # NOTE: we should really use the true break count here,
14466 # which can be greater if there are large terms and
14467 # little space, but usually this will work well enough.
14468 unless ($must_break_open) {
14470 if ( $break_count <= 1 ) {
14471 $$rdo_not_break_apart = 1;
14473 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14475 $$rdo_not_break_apart = 1;
14478 set_non_alignment_flags( $comma_count, $rcomma_index );
14482 } # end shortcut methods
14486 FORMATTER_DEBUG_FLAG_SPARSE && do {
14488 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
14492 #---------------------------------------------------------------
14493 # Compound List Rule 2:
14494 # If this list is too long for one line, and it is an item of a
14495 # larger list, then we must format it, regardless of sparsity
14496 # (ian.t). One reason that we have to do this is to trigger
14497 # Compound List Rule 1, above, which causes breaks at all commas of
14498 # all outer lists. In this way, the structure will be properly
14500 #---------------------------------------------------------------
14502 # Decide if this list is too long for one line unless broken
14503 my $total_columns = table_columns_available($i_opening_paren);
14504 my $too_long = $packed_columns > $total_columns;
14506 # For a paren list, include the length of the token just before the
14507 # '(' because this is likely a sub call, and we would have to
14508 # include the sub name on the same line as the list. This is still
14509 # imprecise, but not too bad. (steve.t)
14510 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
14513 excess_line_length( $i_opening_minus,
14514 $i_effective_last_comma + 1 ) > 0;
14517 # FIXME: For an item after a '=>', try to include the length of the
14518 # thing before the '=>'. This is crude and should be improved by
14519 # actually looking back token by token.
14520 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
14521 my $i_opening_minus = $i_opening_paren - 4;
14522 if ( $i_opening_minus >= 0 ) {
14524 excess_line_length( $i_opening_minus,
14525 $i_effective_last_comma + 1 ) > 0;
14529 # Always break lists contained in '[' and '{' if too long for 1 line,
14530 # and always break lists which are too long and part of a more complex
14532 my $must_break_open_container = $must_break_open
14534 && ( $in_hierarchical_list || $opening_token ne '(' ) );
14536 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
14538 #---------------------------------------------------------------
14539 # The main decision:
14540 # Now decide if we will align the data into aligned columns. Do not
14541 # attempt to align columns if this is a tiny table or it would be
14542 # too spaced. It seems that the more packed lines we have, the
14543 # sparser the list that can be allowed and still look ok.
14544 #---------------------------------------------------------------
14546 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
14547 || ( $formatted_lines < 2 )
14548 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
14552 #---------------------------------------------------------------
14553 # too sparse: would look ugly if aligned in a table;
14554 #---------------------------------------------------------------
14556 # use old breakpoints if this is a 'big' list
14557 # FIXME: goal is to improve set_ragged_breakpoints so that
14558 # this is not necessary.
14559 if ( $packed_lines > 2 && $item_count > 10 ) {
14560 write_logfile_entry("List sparse: using old breakpoints\n");
14561 copy_old_breakpoints( $i_first_comma, $i_last_comma );
14564 # let the continuation logic handle it if 2 lines
14568 set_ragged_breakpoints( \@i_term_comma,
14569 $ri_ragged_break_list );
14570 ++$break_count if ($use_separate_first_term);
14572 unless ($must_break_open_container) {
14573 if ( $break_count <= 1 ) {
14574 $$rdo_not_break_apart = 1;
14576 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14578 $$rdo_not_break_apart = 1;
14581 set_non_alignment_flags( $comma_count, $rcomma_index );
14586 #---------------------------------------------------------------
14587 # go ahead and format as a table
14588 #---------------------------------------------------------------
14589 write_logfile_entry(
14590 "List: auto formatting with $number_of_fields fields/row\n");
14592 my $j_first_break =
14593 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
14596 my $j = $j_first_break ;
14597 $j < $comma_count ;
14598 $j += $number_of_fields
14601 my $i = $$rcomma_index[$j];
14602 set_forced_breakpoint($i);
14608 sub set_non_alignment_flags {
14610 # set flag which indicates that these commas should not be
14612 my ( $comma_count, $rcomma_index ) = @_;
14613 foreach ( 0 .. $comma_count - 1 ) {
14614 $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
14618 sub study_list_complexity {
14620 # Look for complex tables which should be formatted with one term per line.
14621 # Returns the following:
14623 # \@i_ragged_break_list = list of good breakpoints to avoid lines
14624 # which are hard to read
14625 # $number_of_fields_best = suggested number of fields based on
14626 # complexity; = 0 if any number may be used.
14628 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
14629 my $item_count = @{$ri_term_begin};
14630 my $complex_item_count = 0;
14631 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
14632 my $i_max = @{$ritem_lengths} - 1;
14633 ##my @item_complexity;
14635 my $i_last_last_break = -3;
14636 my $i_last_break = -2;
14637 my @i_ragged_break_list;
14639 my $definitely_complex = 30;
14640 my $definitely_simple = 12;
14641 my $quote_count = 0;
14643 for my $i ( 0 .. $i_max ) {
14644 my $ib = $ri_term_begin->[$i];
14645 my $ie = $ri_term_end->[$i];
14647 # define complexity: start with the actual term length
14648 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
14650 ##TBD: join types here and check for variations
14651 ##my $str=join "", @tokens_to_go[$ib..$ie];
14654 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
14658 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
14662 if ( $ib eq $ie ) {
14663 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
14664 $complex_item_count++;
14665 $weighted_length *= 2;
14671 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
14672 $complex_item_count++;
14673 $weighted_length *= 2;
14675 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
14676 $weighted_length += 4;
14680 # add weight for extra tokens.
14681 $weighted_length += 2 * ( $ie - $ib );
14683 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
14684 ## print "# COMPLEXITY:$weighted_length $BUB\n";
14686 ##push @item_complexity, $weighted_length;
14688 # now mark a ragged break after this item it if it is 'long and
14690 if ( $weighted_length >= $definitely_complex ) {
14692 # if we broke after the previous term
14693 # then break before it too
14694 if ( $i_last_break == $i - 1
14696 && $i_last_last_break != $i - 2 )
14699 ## FIXME: don't strand a small term
14700 pop @i_ragged_break_list;
14701 push @i_ragged_break_list, $i - 2;
14702 push @i_ragged_break_list, $i - 1;
14705 push @i_ragged_break_list, $i;
14706 $i_last_last_break = $i_last_break;
14707 $i_last_break = $i;
14710 # don't break before a small last term -- it will
14711 # not look good on a line by itself.
14712 elsif ($i == $i_max
14713 && $i_last_break == $i - 1
14714 && $weighted_length <= $definitely_simple )
14716 pop @i_ragged_break_list;
14720 my $identifier_count = $i_max + 1 - $quote_count;
14722 # Need more tuning here..
14723 if ( $max_width > 12
14724 && $complex_item_count > $item_count / 2
14725 && $number_of_fields_best != 2 )
14727 $number_of_fields_best = 1;
14730 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
14733 sub get_maximum_fields_wanted {
14735 # Not all tables look good with more than one field of items.
14736 # This routine looks at a table and decides if it should be
14737 # formatted with just one field or not.
14738 # This coding is still under development.
14739 my ($ritem_lengths) = @_;
14741 my $number_of_fields_best = 0;
14743 # For just a few items, we tentatively assume just 1 field.
14744 my $item_count = @{$ritem_lengths};
14745 if ( $item_count <= 5 ) {
14746 $number_of_fields_best = 1;
14749 # For larger tables, look at it both ways and see what looks best
14753 my @max_length = ( 0, 0 );
14754 my @last_length_2 = ( undef, undef );
14755 my @first_length_2 = ( undef, undef );
14756 my $last_length = undef;
14757 my $total_variation_1 = 0;
14758 my $total_variation_2 = 0;
14759 my @total_variation_2 = ( 0, 0 );
14760 for ( my $j = 0 ; $j < $item_count ; $j++ ) {
14762 $is_odd = 1 - $is_odd;
14763 my $length = $ritem_lengths->[$j];
14764 if ( $length > $max_length[$is_odd] ) {
14765 $max_length[$is_odd] = $length;
14768 if ( defined($last_length) ) {
14769 my $dl = abs( $length - $last_length );
14770 $total_variation_1 += $dl;
14772 $last_length = $length;
14774 my $ll = $last_length_2[$is_odd];
14775 if ( defined($ll) ) {
14776 my $dl = abs( $length - $ll );
14777 $total_variation_2[$is_odd] += $dl;
14780 $first_length_2[$is_odd] = $length;
14782 $last_length_2[$is_odd] = $length;
14784 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
14786 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
14787 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
14788 $number_of_fields_best = 1;
14791 return ($number_of_fields_best);
14794 sub table_columns_available {
14795 my $i_first_comma = shift;
14797 $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
14799 # Patch: the vertical formatter does not line up lines whose lengths
14800 # exactly equal the available line length because of allowances
14801 # that must be made for side comments. Therefore, the number of
14802 # available columns is reduced by 1 character.
14807 sub maximum_number_of_fields {
14809 # how many fields will fit in the available space?
14810 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
14811 my $max_pairs = int( $columns / $pair_width );
14812 my $number_of_fields = $max_pairs * 2;
14813 if ( $odd_or_even == 1
14814 && $max_pairs * $pair_width + $max_width <= $columns )
14816 $number_of_fields++;
14818 return $number_of_fields;
14821 sub compactify_table {
14823 # given a table with a certain number of fields and a certain number
14824 # of lines, see if reducing the number of fields will make it look
14826 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
14827 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
14831 $min_fields = $number_of_fields ;
14832 $min_fields >= $odd_or_even
14833 && $min_fields * $formatted_lines >= $item_count ;
14834 $min_fields -= $odd_or_even
14837 $number_of_fields = $min_fields;
14840 return $number_of_fields;
14843 sub set_ragged_breakpoints {
14845 # Set breakpoints in a list that cannot be formatted nicely as a
14847 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
14849 my $break_count = 0;
14850 foreach (@$ri_ragged_break_list) {
14851 my $j = $ri_term_comma->[$_];
14853 set_forced_breakpoint($j);
14857 return $break_count;
14860 sub copy_old_breakpoints {
14861 my ( $i_first_comma, $i_last_comma ) = @_;
14862 for my $i ( $i_first_comma .. $i_last_comma ) {
14863 if ( $old_breakpoint_to_go[$i] ) {
14864 set_forced_breakpoint($i);
14870 my ( $i, $j ) = @_;
14871 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
14873 FORMATTER_DEBUG_FLAG_NOBREAK && do {
14874 my ( $a, $b, $c ) = caller();
14876 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
14880 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
14883 # shouldn't happen; non-critical error
14885 FORMATTER_DEBUG_FLAG_NOBREAK && do {
14886 my ( $a, $b, $c ) = caller();
14888 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
14894 sub set_fake_breakpoint {
14896 # Just bump up the breakpoint count as a signal that there are breaks.
14897 # This is useful if we have breaks but may want to postpone deciding where
14899 $forced_breakpoint_count++;
14902 sub set_forced_breakpoint {
14905 return unless defined $i && $i >= 0;
14907 # when called with certain tokens, use bond strengths to decide
14908 # if we break before or after it
14909 my $token = $tokens_to_go[$i];
14911 if ( $token =~ /^([\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
14912 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
14915 # breaks are forced before 'if' and 'unless'
14916 elsif ( $is_if_unless{$token} ) { $i-- }
14918 if ( $i >= 0 && $i <= $max_index_to_go ) {
14919 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
14921 FORMATTER_DEBUG_FLAG_FORCE && do {
14922 my ( $a, $b, $c ) = caller();
14924 "FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
14927 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
14928 $forced_breakpoint_to_go[$i_nonblank] = 1;
14930 if ( $i_nonblank > $index_max_forced_break ) {
14931 $index_max_forced_break = $i_nonblank;
14933 $forced_breakpoint_count++;
14934 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
14937 # if we break at an opening container..break at the closing
14938 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
14939 set_closing_breakpoint($i_nonblank);
14945 sub clear_breakpoint_undo_stack {
14946 $forced_breakpoint_undo_count = 0;
14949 sub undo_forced_breakpoint_stack {
14951 my $i_start = shift;
14952 if ( $i_start < 0 ) {
14954 my ( $a, $b, $c ) = caller();
14956 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
14960 while ( $forced_breakpoint_undo_count > $i_start ) {
14962 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
14963 if ( $i >= 0 && $i <= $max_index_to_go ) {
14964 $forced_breakpoint_to_go[$i] = 0;
14965 $forced_breakpoint_count--;
14967 FORMATTER_DEBUG_FLAG_UNDOBP && do {
14968 my ( $a, $b, $c ) = caller();
14970 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
14975 # shouldn't happen, but not a critical error
14977 FORMATTER_DEBUG_FLAG_UNDOBP && do {
14978 my ( $a, $b, $c ) = caller();
14980 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
14987 sub recombine_breakpoints {
14989 # sub set_continuation_breaks is very liberal in setting line breaks
14990 # for long lines, always setting breaks at good breakpoints, even
14991 # when that creates small lines. Occasionally small line fragments
14992 # are produced which would look better if they were combined.
14993 # That's the task of this routine, recombine_breakpoints.
14994 my ( $ri_first, $ri_last ) = @_;
14995 my $more_to_do = 1;
14997 # We keep looping over all of the lines of this batch
14998 # until there are no more possible recombinations
14999 my $nmax_last = @$ri_last;
15000 while ($more_to_do) {
15004 my $nmax = @$ri_last - 1;
15006 # safety check for infinite loop
15007 unless ( $nmax < $nmax_last ) {
15009 # shouldn't happen because splice below decreases nmax on each pass:
15010 # but i get paranoid sometimes
15011 die "Program bug-infinite loop in recombine breakpoints\n";
15013 $nmax_last = $nmax;
15015 my $previous_outdentable_closing_paren;
15016 my $leading_amp_count = 0;
15017 my $this_line_is_semicolon_terminated;
15019 # loop over all remaining lines in this batch
15020 for $n ( 1 .. $nmax ) {
15022 #----------------------------------------------------------
15023 # If we join the current pair of lines,
15024 # line $n-1 will become the left part of the joined line
15025 # line $n will become the right part of the joined line
15027 # Here are Indexes of the endpoint tokens of the two lines:
15029 # ---left---- | ---right---
15030 # $if $imid | $imidr $il
15032 # We want to decide if we should join tokens $imid to $imidr
15034 # We will apply a number of ad-hoc tests to see if joining
15035 # here will look ok. The code will just issue a 'next'
15036 # command if the join doesn't look good. If we get through
15037 # the gauntlet of tests, the lines will be recombined.
15038 #----------------------------------------------------------
15039 my $if = $$ri_first[ $n - 1 ];
15040 my $il = $$ri_last[$n];
15041 my $imid = $$ri_last[ $n - 1 ];
15042 my $imidr = $$ri_first[$n];
15044 #my $depth_increase=( $nesting_depth_to_go[$imidr] -
15045 # $nesting_depth_to_go[$if] );
15047 ##print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n";
15049 # If line $n is the last line, we set some flags and
15050 # do any special checks for it
15051 if ( $n == $nmax ) {
15053 # a terminal '{' should stay where it is
15054 next if $types_to_go[$imidr] eq '{';
15056 # set flag if statement $n ends in ';'
15057 $this_line_is_semicolon_terminated = $types_to_go[$il] eq ';'
15059 # with possible side comment
15060 || ( $types_to_go[$il] eq '#'
15061 && $il - $imidr >= 2
15062 && $types_to_go[ $il - 2 ] eq ';'
15063 && $types_to_go[ $il - 1 ] eq 'b' );
15066 #----------------------------------------------------------
15067 # Section 1: examine token at $imid (right end of first line
15069 #----------------------------------------------------------
15071 # an isolated '}' may join with a ';' terminated segment
15072 if ( $types_to_go[$imid] eq '}' ) {
15074 # Check for cases where combining a semicolon terminated
15075 # statement with a previous isolated closing paren will
15076 # allow the combined line to be outdented. This is
15077 # generally a good move. For example, we can join up
15078 # the last two lines here:
15080 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
15081 # $size, $atime, $mtime, $ctime, $blksize, $blocks
15087 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
15088 # $size, $atime, $mtime, $ctime, $blksize, $blocks
15091 # which makes the parens line up.
15093 # Another example, from Joe Matarazzo, probably looks best
15094 # with the 'or' clause appended to the trailing paren:
15095 # $self->some_method(
15098 # ) or die "Some_method didn't work";
15100 $previous_outdentable_closing_paren =
15101 $this_line_is_semicolon_terminated # ends in ';'
15102 && $if == $imid # only one token on last line
15103 && $tokens_to_go[$imid] eq ')' # must be structural paren
15105 # only &&, ||, and : if no others seen
15106 # (but note: our count made below could be wrong
15107 # due to intervening comments)
15108 && ( $leading_amp_count == 0
15109 || $types_to_go[$imidr] !~ /^(:|\&\&|\|\|)$/ )
15111 # but leading colons probably line up with with a
15112 # previous colon or question (count could be wrong).
15113 && $types_to_go[$imidr] ne ':'
15115 # only one step in depth allowed. this line must not
15116 # begin with a ')' itself.
15117 && ( $nesting_depth_to_go[$imid] ==
15118 $nesting_depth_to_go[$il] + 1 );
15122 $previous_outdentable_closing_paren
15124 # handle '.' and '?' specially below
15125 || ( $types_to_go[$imidr] =~ /^[\.\?]$/ )
15129 # do not recombine lines with ending &&, ||, or :
15130 elsif ( $types_to_go[$imid] =~ /^(|:|\&\&|\|\|)$/ ) {
15131 next unless $want_break_before{ $types_to_go[$imid] };
15134 # for lines ending in a comma...
15135 elsif ( $types_to_go[$imid] eq ',' ) {
15137 # an isolated '},' may join with an identifier + ';'
15138 # this is useful for the class of a 'bless' statement (bless.t)
15139 if ( $types_to_go[$if] eq '}'
15140 && $types_to_go[$imidr] eq 'i' )
15143 unless ( ( $if == ( $imid - 1 ) )
15144 && ( $il == ( $imidr + 1 ) )
15145 && $this_line_is_semicolon_terminated );
15147 # override breakpoint
15148 $forced_breakpoint_to_go[$imid] = 0;
15151 # but otherwise, do not recombine unless this will leave
15154 next unless ( $n + 1 >= $nmax );
15159 elsif ( $types_to_go[$imid] eq '(' ) {
15161 # No longer doing this
15164 elsif ( $types_to_go[$imid] eq ')' ) {
15166 # No longer doing this
15169 # keep a terminal colon
15170 elsif ( $types_to_go[$imid] eq ':' ) {
15174 # keep a terminal for-semicolon
15175 elsif ( $types_to_go[$imid] eq 'f' ) {
15179 # if '=' at end of line ...
15180 elsif ( $is_assignment{ $types_to_go[$imid] } ) {
15182 # otherwise always ok to join isolated '='
15183 unless ( $if == $imid ) {
15186 ( $types_to_go[$il] =~ /^[+-\/\*\)]$/ )
15188 # note no '$' in pattern because -> can
15189 # start long identifier
15190 && !grep { $_ =~ /^(->|=>|[\,])/ }
15191 @types_to_go[ $imidr .. $il ]
15194 # retain the break after the '=' unless ...
15198 # '=' is followed by a number and looks like math
15199 ( $types_to_go[$imidr] eq 'n' && $is_math )
15201 # or followed by a scalar and looks like math
15202 || ( ( $types_to_go[$imidr] eq 'i' )
15203 && ( $tokens_to_go[$imidr] =~ /^\$/ )
15206 # or followed by a single "short" token
15207 # ('12' is arbitrary)
15209 && token_sequence_length( $imidr, $imidr ) < 12 )
15213 unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) {
15214 $forced_breakpoint_to_go[$imid] = 0;
15219 elsif ( $types_to_go[$imid] eq 'k' ) {
15221 # make major control keywords stand out
15226 #/^(last|next|redo|return)$/
15227 $is_last_next_redo_return{ $tokens_to_go[$imid] }
15230 if ( $is_and_or{ $tokens_to_go[$imid] } ) {
15231 next unless $want_break_before{ $tokens_to_go[$imid] };
15235 #----------------------------------------------------------
15236 # Section 2: Now examine token at $imidr (left end of second
15238 #----------------------------------------------------------
15240 # join lines identified above as capable of
15241 # causing an outdented line with leading closing paren
15242 if ($previous_outdentable_closing_paren) {
15243 $forced_breakpoint_to_go[$imid] = 0;
15246 # do not recombine lines with leading &&, ||, or :
15247 elsif ( $types_to_go[$imidr] =~ /^(:|\&\&|\|\|)$/ ) {
15248 $leading_amp_count++;
15249 next if $want_break_before{ $types_to_go[$imidr] };
15252 # Identify and recombine a broken ?/: chain
15253 elsif ( $types_to_go[$imidr] eq '?' ) {
15255 # indexes of line first tokens --
15256 # mm - line before previous line
15257 # f - previous line
15260 # fff - line after next
15261 my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1;
15262 my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
15263 my $imm = $n > 1 ? $$ri_first[ $n - 2 ] : -1;
15264 my $seqno = $type_sequence_to_go[$imidr];
15266 ( $types_to_go[$if] eq ':'
15267 && $type_sequence_to_go[$if] ==
15268 $seqno - TYPE_SEQUENCE_INCREMENT );
15271 && $types_to_go[$imm] eq ':'
15272 && $type_sequence_to_go[$imm] ==
15273 $seqno - 2 * TYPE_SEQUENCE_INCREMENT );
15277 && $types_to_go[$iff] eq ':'
15278 && $type_sequence_to_go[$iff] == $seqno );
15281 && $types_to_go[$ifff] eq ':'
15282 && $type_sequence_to_go[$ifff] ==
15283 $seqno + TYPE_SEQUENCE_INCREMENT );
15285 # we require that this '?' be part of a correct sequence
15286 # of 3 in a row or else no recombination is done.
15288 unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) );
15289 $forced_breakpoint_to_go[$imid] = 0;
15292 # do not recombine lines with leading '.'
15293 elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) {
15294 my $i_next_nonblank = $imidr + 1;
15295 if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
15296 $i_next_nonblank++;
15302 # ... unless there is just one and we can reduce
15303 # this to two lines if we do. For example, this
15307 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
15309 # looks better than this:
15310 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
15311 # . '$args .= $pat;'
15316 && $types_to_go[$if] ne $types_to_go[$imidr]
15319 # ... or this would strand a short quote , like this
15320 # . "some long qoute"
15323 || ( $types_to_go[$i_next_nonblank] eq 'Q'
15324 && $i_next_nonblank >= $il - 1
15325 && length( $tokens_to_go[$i_next_nonblank] ) <
15326 $rOpts_short_concatenation_item_length )
15330 # handle leading keyword..
15331 elsif ( $types_to_go[$imidr] eq 'k' ) {
15333 # handle leading "and" and "or"
15334 if ( $is_and_or{ $tokens_to_go[$imidr] } ) {
15336 # Decide if we will combine a single terminal 'and' and
15337 # 'or' after an 'if' or 'unless'. We should consider the
15338 # possible vertical alignment, and visual clutter.
15340 # This looks best with the 'and' on the same
15341 # line as the 'if':
15344 # if $seconds and $nu < 2;
15346 # But this looks better as shown:
15349 # if !$this->{Parents}{$_}
15350 # or $this->{Parents}{$_} eq $_;
15352 # Eventually, it would be nice to look for
15353 # similarities (such as 'this' or 'Parents'), but
15354 # for now I'm using a simple rule that says that
15355 # the resulting line length must not be more than
15356 # half the maximum line length (making it 80/2 =
15357 # 40 characters by default).
15360 $this_line_is_semicolon_terminated
15363 # following 'if' or 'unless'
15364 $types_to_go[$if] eq 'k'
15365 && $is_if_unless{ $tokens_to_go[$if] }
15371 # handle leading "if" and "unless"
15372 elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) {
15374 # FIXME: This is still experimental..may not be too useful
15377 $this_line_is_semicolon_terminated
15379 # previous line begins with 'and' or 'or'
15380 && $types_to_go[$if] eq 'k'
15381 && $is_and_or{ $tokens_to_go[$if] }
15386 # handle all other leading keywords
15389 # keywords look best at start of lines,
15390 # but combine things like "1 while"
15391 unless ( $is_assignment{ $types_to_go[$imid] } ) {
15393 if ( ( $types_to_go[$imid] ne 'k' )
15394 && ( $tokens_to_go[$imidr] ne 'while' ) );
15399 # similar treatment of && and || as above for 'and' and 'or':
15400 # NOTE: This block of code is currently bypassed because
15401 # of a previous block but is retained for possible future use.
15402 elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) {
15404 # maybe looking at something like:
15405 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
15409 $this_line_is_semicolon_terminated
15411 # previous line begins with an 'if' or 'unless' keyword
15412 && $types_to_go[$if] eq 'k'
15413 && $is_if_unless{ $tokens_to_go[$if] }
15418 #----------------------------------------------------------
15420 # Combine the lines if we arrive here and it is possible
15421 #----------------------------------------------------------
15423 # honor hard breakpoints
15424 next if ( $forced_breakpoint_to_go[$imid] > 0 );
15426 my $bs = $bond_strength_to_go[$imid];
15428 # combined line cannot be too long
15430 if excess_line_length( $if, $il ) > 0;
15432 # do not recombine if we would skip in indentation levels
15433 if ( $n < $nmax ) {
15434 my $if_next = $$ri_first[ $n + 1 ];
15437 $levels_to_go[$if] < $levels_to_go[$imidr]
15438 && $levels_to_go[$imidr] < $levels_to_go[$if_next]
15440 # but an isolated 'if (' is undesirable
15443 && $imid - $if <= 2
15444 && $types_to_go[$if] eq 'k'
15445 && $tokens_to_go[$if] eq 'if'
15446 && $tokens_to_go[$imid] ne '('
15452 next if ( $bs == NO_BREAK );
15454 # remember the pair with the greatest bond strength
15461 if ( $bs > $bs_best ) {
15466 # we have 2 or more candidates, so need another pass
15471 # recombine the pair with the greatest bond strength
15473 splice @$ri_first, $n_best, 1;
15474 splice @$ri_last, $n_best - 1, 1;
15477 return ( $ri_first, $ri_last );
15480 sub set_continuation_breaks {
15482 # Define an array of indexes for inserting newline characters to
15483 # keep the line lengths below the maximum desired length. There is
15484 # an implied break after the last token, so it need not be included.
15485 # We'll break at points where the bond strength is lowest.
15487 my $saw_good_break = shift;
15488 my @i_first = (); # the first index to output
15489 my @i_last = (); # the last index to output
15490 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
15491 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
15493 set_bond_strengths();
15496 my $imax = $max_index_to_go;
15497 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
15498 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
15499 my $i_begin = $imin;
15501 my $leading_spaces = leading_spaces_to_go($imin);
15502 my $line_count = 0;
15503 my $last_break_strength = NO_BREAK;
15504 my $i_last_break = -1;
15505 my $max_bias = 0.001;
15506 my $tiny_bias = 0.0001;
15507 my $leading_alignment_token = "";
15508 my $leading_alignment_type = "";
15510 # see if any ?/:'s are in order
15511 my $colons_in_order = 1;
15513 my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
15514 foreach (@colon_list) {
15515 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
15519 # This is a sufficient but not necessary condition for colon chain
15520 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
15522 while ( $i_begin <= $imax ) {
15523 my $lowest_strength = NO_BREAK;
15524 my $starting_sum = $lengths_to_go[$i_begin];
15527 my $lowest_next_token = '';
15528 my $lowest_next_type = 'b';
15529 my $i_lowest_next_nonblank = -1;
15531 # loop to find next break point
15532 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
15533 my $type = $types_to_go[$i_test];
15534 my $token = $tokens_to_go[$i_test];
15535 my $next_type = $types_to_go[ $i_test + 1 ];
15536 my $next_token = $tokens_to_go[ $i_test + 1 ];
15537 my $i_next_nonblank =
15538 ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
15539 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
15540 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15541 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
15542 my $strength = $bond_strength_to_go[$i_test];
15543 my $must_break = 0;
15545 # FIXME: TESTING: Might want to be able to break after these
15546 # force an immediate break at certain operators
15547 # with lower level than the start of the line
15550 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
15551 || ( $next_nonblank_type eq 'k'
15552 && $next_nonblank_token =~ /^(and|or)$/ )
15554 && ( $nesting_depth_to_go[$i_begin] >
15555 $nesting_depth_to_go[$i_next_nonblank] )
15558 set_forced_breakpoint($i_next_nonblank);
15563 # Try to put a break where requested by scan_list
15564 $forced_breakpoint_to_go[$i_test]
15566 # break between ) { in a continued line so that the '{' can
15568 # See similar logic in scan_list which catches instances
15569 # where a line is just something like ') {'
15571 && ( $token eq ')' )
15572 && ( $next_nonblank_type eq '{' )
15573 && ($next_nonblank_block_type)
15574 && !$rOpts->{'opening-brace-always-on-right'} )
15576 # There is an implied forced break at a terminal opening brace
15577 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
15581 # Forced breakpoints must sometimes be overridden, for example
15582 # because of a side comment causing a NO_BREAK. It is easier
15583 # to catch this here than when they are set.
15584 if ( $strength < NO_BREAK ) {
15585 $strength = $lowest_strength - $tiny_bias;
15590 # quit if a break here would put a good terminal token on
15591 # the next line and we already have a possible break
15594 && ( $next_nonblank_type =~ /^[\;\,]$/ )
15597 $leading_spaces + $lengths_to_go[ $i_next_nonblank + 1 ]
15599 ) > $rOpts_maximum_line_length
15603 last if ( $i_lowest >= 0 );
15606 # Avoid a break which would strand a single punctuation
15607 # token. For example, we do not want to strand a leading
15608 # '.' which is followed by a long quoted string.
15611 && ( $i_test == $i_begin )
15612 && ( $i_test < $imax )
15613 && ( $token eq $type )
15616 $leading_spaces + $lengths_to_go[ $i_test + 1 ] -
15618 ) <= $rOpts_maximum_line_length
15624 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
15630 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
15633 # break at previous best break if it would have produced
15634 # a leading alignment of certain common tokens, and it
15635 # is different from the latest candidate break
15637 if ($leading_alignment_type);
15639 # Force at least one breakpoint if old code had good
15640 # break It is only called if a breakpoint is required or
15641 # desired. This will probably need some adjustments
15642 # over time. A goal is to try to be sure that, if a new
15643 # side comment is introduced into formated text, then
15644 # the same breakpoints will occur. scbreak.t
15647 $i_test == $imax # we are at the end
15648 && !$forced_breakpoint_count #
15649 && $saw_good_break # old line had good break
15650 && $type =~ /^[#;\{]$/ # and this line ends in
15651 # ';' or side comment
15652 && $i_last_break < 0 # and we haven't made a break
15653 && $i_lowest > 0 # and we saw a possible break
15654 && $i_lowest < $imax - 1 # (but not just before this ;)
15655 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
15658 $lowest_strength = $strength;
15659 $i_lowest = $i_test;
15660 $lowest_next_token = $next_nonblank_token;
15661 $lowest_next_type = $next_nonblank_type;
15662 $i_lowest_next_nonblank = $i_next_nonblank;
15663 last if $must_break;
15665 # set flags to remember if a break here will produce a
15666 # leading alignment of certain common tokens
15667 if ( $line_count > 0
15669 && ( $lowest_strength - $last_break_strength <= $max_bias )
15672 my $i_last_end = $i_begin - 1;
15673 if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
15674 my $tok_beg = $tokens_to_go[$i_begin];
15675 my $type_beg = $types_to_go[$i_begin];
15678 # check for leading alignment of certain tokens
15680 $tok_beg eq $next_nonblank_token
15681 && $is_chain_operator{$tok_beg}
15682 && ( $type_beg eq 'k'
15683 || $type_beg eq $tok_beg )
15684 && $nesting_depth_to_go[$i_begin] >=
15685 $nesting_depth_to_go[$i_next_nonblank]
15688 || ( $tokens_to_go[$i_last_end] eq $token
15689 && $is_chain_operator{$token}
15690 && ( $type eq 'k' || $type eq $token )
15691 && $nesting_depth_to_go[$i_last_end] >=
15692 $nesting_depth_to_go[$i_test] )
15695 $leading_alignment_token = $next_nonblank_token;
15696 $leading_alignment_type = $next_nonblank_type;
15702 ( $i_test >= $imax )
15706 $leading_spaces + $lengths_to_go[ $i_test + 2 ] -
15708 ) > $rOpts_maximum_line_length
15711 FORMATTER_DEBUG_FLAG_BREAK
15713 "BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
15715 # allow one extra terminal token after exceeding line length
15716 # if it would strand this token.
15717 if ( $rOpts_fuzzy_line_length
15719 && ( $i_lowest == $i_test )
15720 && ( length($token) > 1 )
15721 && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
15728 ( $i_test == $imax ) # we're done if no more tokens,
15730 ( $i_lowest >= 0 ) # or no more space and we have a break
15736 # it's always ok to break at imax if no other break was found
15737 if ( $i_lowest < 0 ) { $i_lowest = $imax }
15739 # semi-final index calculation
15740 my $i_next_nonblank = (
15741 ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
15745 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
15746 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15748 #-------------------------------------------------------
15749 # ?/: rule 1 : if a break here will separate a '?' on this
15750 # line from its closing ':', then break at the '?' instead.
15751 #-------------------------------------------------------
15753 foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
15754 next unless ( $tokens_to_go[$i] eq '?' );
15756 # do not break if probable sequence of ?/: statements
15757 next if ($is_colon_chain);
15759 # do not break if statement is broken by side comment
15762 $tokens_to_go[$max_index_to_go] eq '#'
15763 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
15764 $max_index_to_go ) !~ /^[\;\}]$/
15767 # no break needed if matching : is also on the line
15769 if ( $mate_index_to_go[$i] >= 0
15770 && $mate_index_to_go[$i] <= $i_next_nonblank );
15773 if ( $want_break_before{'?'} ) { $i_lowest-- }
15777 # final index calculation
15778 $i_next_nonblank = (
15779 ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
15783 $next_nonblank_type = $types_to_go[$i_next_nonblank];
15784 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15786 FORMATTER_DEBUG_FLAG_BREAK
15787 && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
15789 #-------------------------------------------------------
15790 # ?/: rule 2 : if we break at a '?', then break at its ':'
15792 # Note: this rule is also in sub scan_list to handle a break
15793 # at the start and end of a line (in case breaks are dictated
15794 # by side comments).
15795 #-------------------------------------------------------
15796 if ( $next_nonblank_type eq '?' ) {
15797 set_closing_breakpoint($i_next_nonblank);
15799 elsif ( $types_to_go[$i_lowest] eq '?' ) {
15800 set_closing_breakpoint($i_lowest);
15803 #-------------------------------------------------------
15804 # ?/: rule 3 : if we break at a ':' then we save
15805 # its location for further work below. We may need to go
15806 # back and break at its '?'.
15807 #-------------------------------------------------------
15808 if ( $next_nonblank_type eq ':' ) {
15809 push @i_colon_breaks, $i_next_nonblank;
15811 elsif ( $types_to_go[$i_lowest] eq ':' ) {
15812 push @i_colon_breaks, $i_lowest;
15815 # here we should set breaks for all '?'/':' pairs which are
15816 # separated by this line
15820 # save this line segment, after trimming blanks at the ends
15822 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
15824 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
15826 # set a forced breakpoint at a container opening, if necessary, to
15827 # signal a break at a closing container. Excepting '(' for now.
15828 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
15829 && !$forced_breakpoint_to_go[$i_lowest] )
15831 set_closing_breakpoint($i_lowest);
15834 # get ready to go again
15835 $i_begin = $i_lowest + 1;
15836 $last_break_strength = $lowest_strength;
15837 $i_last_break = $i_lowest;
15838 $leading_alignment_token = "";
15839 $leading_alignment_type = "";
15840 $lowest_next_token = '';
15841 $lowest_next_type = 'b';
15843 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
15847 # update indentation size
15848 if ( $i_begin <= $imax ) {
15849 $leading_spaces = leading_spaces_to_go($i_begin);
15853 #-------------------------------------------------------
15854 # ?/: rule 4 -- if we broke at a ':', then break at
15855 # corresponding '?' unless this is a chain of ?: expressions
15856 #-------------------------------------------------------
15857 if (@i_colon_breaks) {
15859 # using a simple method for deciding if we are in a ?/: chain --
15860 # this is a chain if it has multiple ?/: pairs all in order;
15862 # Note that if line starts in a ':' we count that above as a break
15863 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
15865 unless ($is_chain) {
15866 my @insert_list = ();
15867 foreach (@i_colon_breaks) {
15868 my $i_question = $mate_index_to_go[$_];
15869 if ( $i_question >= 0 ) {
15870 if ( $want_break_before{'?'} ) {
15872 if ( $i_question > 0
15873 && $types_to_go[$i_question] eq 'b' )
15879 if ( $i_question >= 0 ) {
15880 push @insert_list, $i_question;
15883 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
15887 return \@i_first, \@i_last;
15890 sub insert_additional_breaks {
15892 # this routine will add line breaks at requested locations after
15893 # sub set_continuation_breaks has made preliminary breaks.
15895 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
15898 my $line_number = 0;
15900 foreach $i_break_left ( sort @$ri_break_list ) {
15902 $i_f = $$ri_first[$line_number];
15903 $i_l = $$ri_last[$line_number];
15904 while ( $i_break_left >= $i_l ) {
15907 # shouldn't happen unless caller passes bad indexes
15908 if ( $line_number >= @$ri_last ) {
15910 "Non-fatal program bug: couldn't set break at $i_break_left\n"
15912 report_definite_bug();
15915 $i_f = $$ri_first[$line_number];
15916 $i_l = $$ri_last[$line_number];
15919 my $i_break_right = $i_break_left + 1;
15920 if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
15922 if ( $i_break_left >= $i_f
15923 && $i_break_left < $i_l
15924 && $i_break_right > $i_f
15925 && $i_break_right <= $i_l )
15927 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
15928 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
15933 sub set_closing_breakpoint {
15935 # set a breakpoint at a matching closing token
15936 # at present, this is only used to break at a ':' which matches a '?'
15937 my $i_break = shift;
15939 if ( $mate_index_to_go[$i_break] >= 0 ) {
15941 # CAUTION: infinite recursion possible here:
15942 # set_closing_breakpoint calls set_forced_breakpoint, and
15943 # set_forced_breakpoint call set_closing_breakpoint
15944 # ( test files attrib.t, BasicLyx.pm.html).
15945 # Don't reduce the '2' in the statement below
15946 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
15948 # break before } ] and ), but sub set_forced_breakpoint will decide
15949 # to break before or after a ? and :
15950 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
15951 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
15955 my $type_sequence = $type_sequence_to_go[$i_break];
15956 if ($type_sequence) {
15957 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
15958 $postponed_breakpoint{$type_sequence} = 1;
15963 # check to see if output line tabbing agrees with input line
15964 # this can be very useful for debugging a script which has an extra
15966 sub compare_indentation_levels {
15968 my ( $python_indentation_level, $structural_indentation_level ) = @_;
15969 if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
15970 $last_tabbing_disagreement = $input_line_number;
15972 if ($in_tabbing_disagreement) {
15975 $tabbing_disagreement_count++;
15977 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
15978 write_logfile_entry(
15979 "Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
15982 $in_tabbing_disagreement = $input_line_number;
15983 $first_tabbing_disagreement = $in_tabbing_disagreement
15984 unless ($first_tabbing_disagreement);
15989 if ($in_tabbing_disagreement) {
15991 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
15992 write_logfile_entry(
15993 "End indentation disagreement from input line $in_tabbing_disagreement\n"
15996 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
15997 write_logfile_entry(
15998 "No further tabbing disagreements will be noted\n");
16001 $in_tabbing_disagreement = 0;
16006 #####################################################################
16008 # the Perl::Tidy::IndentationItem class supplies items which contain
16009 # how much whitespace should be used at the start of a line
16011 #####################################################################
16013 package Perl::Tidy::IndentationItem;
16015 # Indexes for indentation items
16016 use constant SPACES => 0; # total leading white spaces
16017 use constant LEVEL => 1; # the indentation 'level'
16018 use constant CI_LEVEL => 2; # the 'continuation level'
16019 use constant AVAILABLE_SPACES => 3; # how many left spaces available
16021 use constant CLOSED => 4; # index where we saw closing '}'
16022 use constant COMMA_COUNT => 5; # how many commas at this level?
16023 use constant SEQUENCE_NUMBER => 6; # output batch number
16024 use constant INDEX => 7; # index in output batch list
16025 use constant HAVE_CHILD => 8; # any dependents?
16026 use constant RECOVERABLE_SPACES => 9; # how many spaces to the right
16027 # we would like to move to get
16028 # alignment (negative if left)
16029 use constant ALIGN_PAREN => 10; # do we want to try to align
16030 # with an opening structure?
16031 use constant MARKED => 11; # if visited by corrector logic
16032 use constant STACK_DEPTH => 12; # indentation nesting depth
16033 use constant STARTING_INDEX => 13; # first token index of this level
16034 use constant ARROW_COUNT => 14; # how many =>'s
16038 # Create an 'indentation_item' which describes one level of leading
16039 # whitespace when the '-lp' indentation is used. We return
16040 # a reference to an anonymous array of associated variables.
16041 # See above constants for storage scheme.
16043 $class, $spaces, $level,
16044 $ci_level, $available_spaces, $index,
16045 $gnu_sequence_number, $align_paren, $stack_depth,
16049 my $arrow_count = 0;
16050 my $comma_count = 0;
16051 my $have_child = 0;
16052 my $want_right_spaces = 0;
16055 $spaces, $level, $ci_level,
16056 $available_spaces, $closed, $comma_count,
16057 $gnu_sequence_number, $index, $have_child,
16058 $want_right_spaces, $align_paren, $marked,
16059 $stack_depth, $starting_index, $arrow_count,
16063 sub permanently_decrease_AVAILABLE_SPACES {
16065 # make a permanent reduction in the available indentation spaces
16066 # at one indentation item. NOTE: if there are child nodes, their
16067 # total SPACES must be reduced by the caller.
16069 my ( $item, $spaces_needed ) = @_;
16070 my $available_spaces = $item->get_AVAILABLE_SPACES();
16071 my $deleted_spaces =
16072 ( $available_spaces > $spaces_needed )
16074 : $available_spaces;
16075 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
16076 $item->decrease_SPACES($deleted_spaces);
16077 $item->set_RECOVERABLE_SPACES(0);
16079 return $deleted_spaces;
16082 sub tentatively_decrease_AVAILABLE_SPACES {
16084 # We are asked to tentatively delete $spaces_needed of indentation
16085 # for a indentation item. We may want to undo this later. NOTE: if
16086 # there are child nodes, their total SPACES must be reduced by the
16088 my ( $item, $spaces_needed ) = @_;
16089 my $available_spaces = $item->get_AVAILABLE_SPACES();
16090 my $deleted_spaces =
16091 ( $available_spaces > $spaces_needed )
16093 : $available_spaces;
16094 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
16095 $item->decrease_SPACES($deleted_spaces);
16096 $item->increase_RECOVERABLE_SPACES($deleted_spaces);
16097 return $deleted_spaces;
16100 sub get_STACK_DEPTH {
16102 return $self->[STACK_DEPTH];
16107 return $self->[SPACES];
16112 return $self->[MARKED];
16116 my ( $self, $value ) = @_;
16117 if ( defined($value) ) {
16118 $self->[MARKED] = $value;
16120 return $self->[MARKED];
16123 sub get_AVAILABLE_SPACES {
16125 return $self->[AVAILABLE_SPACES];
16128 sub decrease_SPACES {
16129 my ( $self, $value ) = @_;
16130 if ( defined($value) ) {
16131 $self->[SPACES] -= $value;
16133 return $self->[SPACES];
16136 sub decrease_AVAILABLE_SPACES {
16137 my ( $self, $value ) = @_;
16138 if ( defined($value) ) {
16139 $self->[AVAILABLE_SPACES] -= $value;
16141 return $self->[AVAILABLE_SPACES];
16144 sub get_ALIGN_PAREN {
16146 return $self->[ALIGN_PAREN];
16149 sub get_RECOVERABLE_SPACES {
16151 return $self->[RECOVERABLE_SPACES];
16154 sub set_RECOVERABLE_SPACES {
16155 my ( $self, $value ) = @_;
16156 if ( defined($value) ) {
16157 $self->[RECOVERABLE_SPACES] = $value;
16159 return $self->[RECOVERABLE_SPACES];
16162 sub increase_RECOVERABLE_SPACES {
16163 my ( $self, $value ) = @_;
16164 if ( defined($value) ) {
16165 $self->[RECOVERABLE_SPACES] += $value;
16167 return $self->[RECOVERABLE_SPACES];
16172 return $self->[CI_LEVEL];
16177 return $self->[LEVEL];
16180 sub get_SEQUENCE_NUMBER {
16182 return $self->[SEQUENCE_NUMBER];
16187 return $self->[INDEX];
16190 sub get_STARTING_INDEX {
16192 return $self->[STARTING_INDEX];
16195 sub set_HAVE_CHILD {
16196 my ( $self, $value ) = @_;
16197 if ( defined($value) ) {
16198 $self->[HAVE_CHILD] = $value;
16200 return $self->[HAVE_CHILD];
16203 sub get_HAVE_CHILD {
16205 return $self->[HAVE_CHILD];
16208 sub set_ARROW_COUNT {
16209 my ( $self, $value ) = @_;
16210 if ( defined($value) ) {
16211 $self->[ARROW_COUNT] = $value;
16213 return $self->[ARROW_COUNT];
16216 sub get_ARROW_COUNT {
16218 return $self->[ARROW_COUNT];
16221 sub set_COMMA_COUNT {
16222 my ( $self, $value ) = @_;
16223 if ( defined($value) ) {
16224 $self->[COMMA_COUNT] = $value;
16226 return $self->[COMMA_COUNT];
16229 sub get_COMMA_COUNT {
16231 return $self->[COMMA_COUNT];
16235 my ( $self, $value ) = @_;
16236 if ( defined($value) ) {
16237 $self->[CLOSED] = $value;
16239 return $self->[CLOSED];
16244 return $self->[CLOSED];
16247 #####################################################################
16249 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
16250 # contain a single output line
16252 #####################################################################
16254 package Perl::Tidy::VerticalAligner::Line;
16261 use constant JMAX => 0;
16262 use constant JMAX_ORIGINAL_LINE => 1;
16263 use constant RTOKENS => 2;
16264 use constant RFIELDS => 3;
16265 use constant RPATTERNS => 4;
16266 use constant INDENTATION => 5;
16267 use constant LEADING_SPACE_COUNT => 6;
16268 use constant OUTDENT_LONG_LINES => 7;
16269 use constant LIST_TYPE => 8;
16270 use constant IS_HANGING_SIDE_COMMENT => 9;
16271 use constant RALIGNMENTS => 10;
16272 use constant MAXIMUM_LINE_LENGTH => 11;
16273 use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
16276 $_index_map{jmax} = JMAX;
16277 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;
16278 $_index_map{rtokens} = RTOKENS;
16279 $_index_map{rfields} = RFIELDS;
16280 $_index_map{rpatterns} = RPATTERNS;
16281 $_index_map{indentation} = INDENTATION;
16282 $_index_map{leading_space_count} = LEADING_SPACE_COUNT;
16283 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;
16284 $_index_map{list_type} = LIST_TYPE;
16285 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
16286 $_index_map{ralignments} = RALIGNMENTS;
16287 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;
16288 $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
16290 my @_default_data = ();
16291 $_default_data[JMAX] = undef;
16292 $_default_data[JMAX_ORIGINAL_LINE] = undef;
16293 $_default_data[RTOKENS] = undef;
16294 $_default_data[RFIELDS] = undef;
16295 $_default_data[RPATTERNS] = undef;
16296 $_default_data[INDENTATION] = undef;
16297 $_default_data[LEADING_SPACE_COUNT] = undef;
16298 $_default_data[OUTDENT_LONG_LINES] = undef;
16299 $_default_data[LIST_TYPE] = undef;
16300 $_default_data[IS_HANGING_SIDE_COMMENT] = undef;
16301 $_default_data[RALIGNMENTS] = [];
16302 $_default_data[MAXIMUM_LINE_LENGTH] = undef;
16303 $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
16307 # methods to count object population
16309 sub get_count { $_count; }
16310 sub _increment_count { ++$_count }
16311 sub _decrement_count { --$_count }
16314 # Constructor may be called as a class method
16316 my ( $caller, %arg ) = @_;
16317 my $caller_is_obj = ref($caller);
16318 my $class = $caller_is_obj || $caller;
16320 my $self = bless [], $class;
16322 $self->[RALIGNMENTS] = [];
16325 foreach ( keys %_index_map ) {
16326 $index = $_index_map{$_};
16327 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
16328 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
16329 else { $self->[$index] = $_default_data[$index] }
16332 $self->_increment_count();
16337 $_[0]->_decrement_count();
16340 sub get_jmax { $_[0]->[JMAX] }
16341 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }
16342 sub get_rtokens { $_[0]->[RTOKENS] }
16343 sub get_rfields { $_[0]->[RFIELDS] }
16344 sub get_rpatterns { $_[0]->[RPATTERNS] }
16345 sub get_indentation { $_[0]->[INDENTATION] }
16346 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }
16347 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }
16348 sub get_list_type { $_[0]->[LIST_TYPE] }
16349 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
16350 sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
16352 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
16353 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
16354 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
16355 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
16357 sub get_starting_column {
16358 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
16361 sub increment_column {
16362 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
16364 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
16366 sub current_field_width {
16370 return $self->get_column($j);
16373 return $self->get_column($j) - $self->get_column( $j - 1 );
16377 sub field_width_growth {
16380 return $self->get_column($j) - $self->get_starting_column($j);
16383 sub starting_field_width {
16387 return $self->get_starting_column($j);
16390 return $self->get_starting_column($j) -
16391 $self->get_starting_column( $j - 1 );
16395 sub increase_field_width {
16398 my ( $j, $pad ) = @_;
16399 my $jmax = $self->get_jmax();
16400 for my $k ( $j .. $jmax ) {
16401 $self->increment_column( $k, $pad );
16405 sub get_available_space_on_right {
16407 my $jmax = $self->get_jmax();
16408 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
16411 sub set_jmax { $_[0]->[JMAX] = $_[1] }
16412 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }
16413 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }
16414 sub set_rfields { $_[0]->[RFIELDS] = $_[1] }
16415 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }
16416 sub set_indentation { $_[0]->[INDENTATION] = $_[1] }
16417 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
16418 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }
16419 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }
16420 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
16421 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
16425 #####################################################################
16427 # the Perl::Tidy::VerticalAligner::Alignment class holds information
16428 # on a single column being aligned
16430 #####################################################################
16431 package Perl::Tidy::VerticalAligner::Alignment;
16439 # Symbolic array indexes
16440 use constant COLUMN => 0; # the current column number
16441 use constant STARTING_COLUMN => 1; # column number when created
16442 use constant MATCHING_TOKEN => 2; # what token we are matching
16443 use constant STARTING_LINE => 3; # the line index of creation
16444 use constant ENDING_LINE => 4; # the most recent line to use it
16445 use constant SAVED_COLUMN => 5; # the most recent line to use it
16446 use constant SERIAL_NUMBER => 6; # unique number for this alignment
16447 # (just its index in an array)
16449 # Correspondence between variables and array indexes
16451 $_index_map{column} = COLUMN;
16452 $_index_map{starting_column} = STARTING_COLUMN;
16453 $_index_map{matching_token} = MATCHING_TOKEN;
16454 $_index_map{starting_line} = STARTING_LINE;
16455 $_index_map{ending_line} = ENDING_LINE;
16456 $_index_map{saved_column} = SAVED_COLUMN;
16457 $_index_map{serial_number} = SERIAL_NUMBER;
16459 my @_default_data = ();
16460 $_default_data[COLUMN] = undef;
16461 $_default_data[STARTING_COLUMN] = undef;
16462 $_default_data[MATCHING_TOKEN] = undef;
16463 $_default_data[STARTING_LINE] = undef;
16464 $_default_data[ENDING_LINE] = undef;
16465 $_default_data[SAVED_COLUMN] = undef;
16466 $_default_data[SERIAL_NUMBER] = undef;
16468 # class population count
16471 sub get_count { $_count; }
16472 sub _increment_count { ++$_count }
16473 sub _decrement_count { --$_count }
16478 my ( $caller, %arg ) = @_;
16479 my $caller_is_obj = ref($caller);
16480 my $class = $caller_is_obj || $caller;
16482 my $self = bless [], $class;
16484 foreach ( keys %_index_map ) {
16485 my $index = $_index_map{$_};
16486 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
16487 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
16488 else { $self->[$index] = $_default_data[$index] }
16490 $self->_increment_count();
16495 $_[0]->_decrement_count();
16498 sub get_column { return $_[0]->[COLUMN] }
16499 sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
16500 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }
16501 sub get_starting_line { return $_[0]->[STARTING_LINE] }
16502 sub get_ending_line { return $_[0]->[ENDING_LINE] }
16503 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }
16505 sub set_column { $_[0]->[COLUMN] = $_[1] }
16506 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
16507 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }
16508 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }
16509 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }
16510 sub increment_column { $_[0]->[COLUMN] += $_[1] }
16512 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
16513 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }
16517 package Perl::Tidy::VerticalAligner;
16519 # The Perl::Tidy::VerticalAligner package collects output lines and
16520 # attempts to line up certain common tokens, such as => and #, which are
16521 # identified by the calling routine.
16523 # There are two main routines: append_line and flush. Append acts as a
16524 # storage buffer, collecting lines into a group which can be vertically
16525 # aligned. When alignment is no longer possible or desirable, it dumps
16526 # the group to flush.
16528 # append_line -----> flush
16536 # Caution: these debug flags produce a lot of output
16537 # They should all be 0 except when debugging small scripts
16539 use constant VALIGN_DEBUG_FLAG_APPEND => 0;
16540 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
16541 use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
16543 my $debug_warning = sub {
16544 print "VALIGN_DEBUGGING with key $_[0]\n";
16547 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
16548 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
16553 $vertical_aligner_self
16555 $maximum_alignment_index
16559 $previous_minimum_jmax_seen
16560 $previous_maximum_jmax_seen
16561 $maximum_line_index
16566 $last_group_level_written
16567 $last_leading_space_count
16571 $last_comment_column
16572 $last_side_comment_line_number
16573 $last_side_comment_length
16574 $last_side_comment_level
16575 $outdented_line_count
16576 $first_outdented_line_at
16577 $last_outdented_line_at
16578 $diagnostics_object
16580 $file_writer_object
16581 @side_comment_history
16582 $comment_leading_space_count
16589 $cached_line_leading_space_count
16590 $cached_seqno_string
16593 $last_nonblank_seqno_string
16597 $rOpts_maximum_line_length
16598 $rOpts_continuation_indentation
16599 $rOpts_indent_columns
16601 $rOpts_entab_leading_whitespace
16604 $rOpts_minimum_space_to_comment
16612 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
16615 # variables describing the entire space group:
16617 $ralignment_list = [];
16619 $last_group_level_written = -1;
16620 $extra_indent_ok = 0; # can we move all lines to the right?
16621 $last_side_comment_length = 0;
16622 $maximum_jmax_seen = 0;
16623 $minimum_jmax_seen = 0;
16624 $previous_minimum_jmax_seen = 0;
16625 $previous_maximum_jmax_seen = 0;
16627 # variables describing each line of the group
16628 @group_lines = (); # list of all lines in group
16630 $outdented_line_count = 0;
16631 $first_outdented_line_at = 0;
16632 $last_outdented_line_at = 0;
16633 $last_side_comment_line_number = 0;
16634 $last_side_comment_level = -1;
16636 # most recent 3 side comments; [ line number, column ]
16637 $side_comment_history[0] = [ -300, 0 ];
16638 $side_comment_history[1] = [ -200, 0 ];
16639 $side_comment_history[2] = [ -100, 0 ];
16641 # write_leader_and_string cache:
16642 $cached_line_text = "";
16643 $cached_line_type = 0;
16644 $cached_line_flag = 0;
16646 $cached_line_valid = 0;
16647 $cached_line_leading_space_count = 0;
16648 $cached_seqno_string = "";
16650 # string of sequence numbers joined together
16651 $seqno_string = "";
16652 $last_nonblank_seqno_string = "";
16654 # frequently used parameters
16655 $rOpts_indent_columns = $rOpts->{'indent-columns'};
16656 $rOpts_tabs = $rOpts->{'tabs'};
16657 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
16658 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
16659 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
16660 $rOpts_valign = $rOpts->{'valign'};
16662 forget_side_comment();
16664 initialize_for_new_group();
16666 $vertical_aligner_self = {};
16667 bless $vertical_aligner_self, $class;
16668 return $vertical_aligner_self;
16671 sub initialize_for_new_group {
16672 $maximum_line_index = -1; # lines in the current group
16673 $maximum_alignment_index = -1; # alignments in current group
16674 $zero_count = 0; # count consecutive lines without tokens
16675 $current_line = undef; # line being matched for alignment
16676 $group_maximum_gap = 0; # largest gap introduced
16678 $marginal_match = 0;
16679 $comment_leading_space_count = 0;
16680 $last_leading_space_count = 0;
16683 # interface to Perl::Tidy::Diagnostics routines
16684 sub write_diagnostics {
16685 if ($diagnostics_object) {
16686 $diagnostics_object->write_diagnostics(@_);
16690 # interface to Perl::Tidy::Logger routines
16692 if ($logger_object) {
16693 $logger_object->warning(@_);
16697 sub write_logfile_entry {
16698 if ($logger_object) {
16699 $logger_object->write_logfile_entry(@_);
16703 sub report_definite_bug {
16704 if ($logger_object) {
16705 $logger_object->report_definite_bug();
16711 # return the number of leading spaces associated with an indentation
16712 # variable $indentation is either a constant number of spaces or an
16713 # object with a get_SPACES method.
16714 my $indentation = shift;
16715 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
16718 sub get_RECOVERABLE_SPACES {
16720 # return the number of spaces (+ means shift right, - means shift left)
16721 # that we would like to shift a group of lines with the same indentation
16722 # to get them to line up with their opening parens
16723 my $indentation = shift;
16724 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
16727 sub get_STACK_DEPTH {
16729 my $indentation = shift;
16730 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
16733 sub make_alignment {
16734 my ( $col, $token ) = @_;
16736 # make one new alignment at column $col which aligns token $token
16737 ++$maximum_alignment_index;
16738 my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
16740 starting_column => $col,
16741 matching_token => $token,
16742 starting_line => $maximum_line_index,
16743 ending_line => $maximum_line_index,
16744 serial_number => $maximum_alignment_index,
16746 $ralignment_list->[$maximum_alignment_index] = $alignment;
16750 sub dump_alignments {
16752 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
16753 for my $i ( 0 .. $maximum_alignment_index ) {
16754 my $column = $ralignment_list->[$i]->get_column();
16755 my $starting_column = $ralignment_list->[$i]->get_starting_column();
16756 my $matching_token = $ralignment_list->[$i]->get_matching_token();
16757 my $starting_line = $ralignment_list->[$i]->get_starting_line();
16758 my $ending_line = $ralignment_list->[$i]->get_ending_line();
16760 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
16764 sub save_alignment_columns {
16765 for my $i ( 0 .. $maximum_alignment_index ) {
16766 $ralignment_list->[$i]->save_column();
16770 sub restore_alignment_columns {
16771 for my $i ( 0 .. $maximum_alignment_index ) {
16772 $ralignment_list->[$i]->restore_column();
16776 sub forget_side_comment {
16777 $last_comment_column = 0;
16782 # sub append is called to place one line in the current vertical group.
16784 # The input parameters are:
16785 # $level = indentation level of this line
16786 # $rfields = reference to array of fields
16787 # $rpatterns = reference to array of patterns, one per field
16788 # $rtokens = reference to array of tokens starting fields 1,2,..
16790 # Here is an example of what this package does. In this example,
16791 # we are trying to line up both the '=>' and the '#'.
16793 # '18' => 'grave', # \`
16794 # '19' => 'acute', # `'
16795 # '20' => 'caron', # \v
16796 # <-tabs-><f1-><--field 2 ---><-f3->
16799 # col1 col2 col3 col4
16801 # The calling routine has already broken the entire line into 3 fields as
16802 # indicated. (So the work of identifying promising common tokens has
16803 # already been done).
16805 # In this example, there will be 2 tokens being matched: '=>' and '#'.
16806 # They are the leading parts of fields 2 and 3, but we do need to know
16807 # what they are so that we can dump a group of lines when these tokens
16810 # The fields contain the actual characters of each field. The patterns
16811 # are like the fields, but they contain mainly token types instead
16812 # of tokens, so they have fewer characters. They are used to be
16813 # sure we are matching fields of similar type.
16815 # In this example, there will be 4 column indexes being adjusted. The
16816 # first one is always at zero. The interior columns are at the start of
16817 # the matching tokens, and the last one tracks the maximum line length.
16819 # Basically, each time a new line comes in, it joins the current vertical
16820 # group if possible. Otherwise it causes the current group to be dumped
16821 # and a new group is started.
16823 # For each new group member, the column locations are increased, as
16824 # necessary, to make room for the new fields. When the group is finally
16825 # output, these column numbers are used to compute the amount of spaces of
16826 # padding needed for each field.
16828 # Programming note: the fields are assumed not to have any tab characters.
16829 # Tabs have been previously removed except for tabs in quoted strings and
16830 # side comments. Tabs in these fields can mess up the column counting.
16831 # The log file warns the user if there are any such tabs.
16834 $level, $level_end,
16835 $indentation, $rfields,
16836 $rtokens, $rpatterns,
16837 $is_forced_break, $outdent_long_lines,
16838 $is_terminal_ternary, $is_terminal_statement,
16839 $do_not_pad, $rvertical_tightness_flags,
16843 # number of fields is $jmax
16844 # number of tokens between fields is $jmax-1
16845 my $jmax = $#{$rfields};
16847 my $leading_space_count = get_SPACES($indentation);
16849 # set outdented flag to be sure we either align within statements or
16850 # across statement boundaries, but not both.
16851 my $is_outdented = $last_leading_space_count > $leading_space_count;
16852 $last_leading_space_count = $leading_space_count;
16854 # Patch: undo for hanging side comment
16855 my $is_hanging_side_comment =
16856 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
16857 $is_outdented = 0 if $is_hanging_side_comment;
16859 VALIGN_DEBUG_FLAG_APPEND0 && do {
16861 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
16864 # Validate cached line if necessary: If we can produce a container
16865 # with just 2 lines total by combining an existing cached opening
16866 # token with the closing token to follow, then we will mark both
16867 # cached flags as valid.
16868 if ($rvertical_tightness_flags) {
16869 if ( $maximum_line_index <= 0
16870 && $cached_line_type
16872 && $rvertical_tightness_flags->[2]
16873 && $rvertical_tightness_flags->[2] == $cached_seqno )
16875 $rvertical_tightness_flags->[3] ||= 1;
16876 $cached_line_valid ||= 1;
16880 # do not join an opening block brace with an unbalanced line
16881 # unless requested with a flag value of 2
16882 if ( $cached_line_type == 3
16883 && $maximum_line_index < 0
16884 && $cached_line_flag < 2
16885 && $level_jump != 0 )
16887 $cached_line_valid = 0;
16890 # patch until new aligner is finished
16891 if ($do_not_pad) { my_flush() }
16893 # shouldn't happen:
16894 if ( $level < 0 ) { $level = 0 }
16896 # do not align code across indentation level changes
16897 # or if vertical alignment is turned off for debugging
16898 if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
16900 # we are allowed to shift a group of lines to the right if its
16901 # level is greater than the previous and next group
16903 ( $level < $group_level && $last_group_level_written < $group_level );
16907 # If we know that this line will get flushed out by itself because
16908 # of level changes, we can leave the extra_indent_ok flag set.
16909 # That way, if we get an external flush call, we will still be
16910 # able to do some -lp alignment if necessary.
16911 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
16913 $group_level = $level;
16915 # wait until after the above flush to get the leading space
16916 # count because it may have been changed if the -icp flag is in
16918 $leading_space_count = get_SPACES($indentation);
16922 # --------------------------------------------------------------------
16923 # Patch to collect outdentable block COMMENTS
16924 # --------------------------------------------------------------------
16925 my $is_blank_line = "";
16926 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
16927 if ( $group_type eq 'COMMENT' ) {
16931 && $outdent_long_lines
16932 && $leading_space_count == $comment_leading_space_count
16937 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
16945 # --------------------------------------------------------------------
16946 # add dummy fields for terminal ternary
16947 # --------------------------------------------------------------------
16948 if ( $is_terminal_ternary && $current_line ) {
16949 fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
16950 $jmax = @{$rfields} - 1;
16953 # --------------------------------------------------------------------
16954 # add dummy fields for else statement
16955 # --------------------------------------------------------------------
16956 if ( $rfields->[0] =~ /^else\s*$/
16958 && $level_jump == 0 )
16960 fix_terminal_else( $rfields, $rtokens, $rpatterns );
16961 $jmax = @{$rfields} - 1;
16964 # --------------------------------------------------------------------
16965 # Step 1. Handle simple line of code with no fields to match.
16966 # --------------------------------------------------------------------
16967 if ( $jmax <= 0 ) {
16970 if ( $maximum_line_index >= 0
16971 && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
16974 # flush the current group if it has some aligned columns..
16975 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
16977 # flush current group if we are just collecting side comments..
16980 # ...and we haven't seen a comment lately
16981 ( $zero_count > 3 )
16983 # ..or if this new line doesn't fit to the left of the comments
16984 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
16985 $group_lines[0]->get_column(0) )
16992 # patch to start new COMMENT group if this comment may be outdented
16993 if ( $is_block_comment
16994 && $outdent_long_lines
16995 && $maximum_line_index < 0 )
16997 $group_type = 'COMMENT';
16998 $comment_leading_space_count = $leading_space_count;
16999 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
17003 # just write this line directly if no current group, no side comment,
17004 # and no space recovery is needed.
17005 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
17007 write_leader_and_string( $leading_space_count, $$rfields[0], 0,
17008 $outdent_long_lines, $rvertical_tightness_flags );
17016 # programming check: (shouldn't happen)
17017 # an error here implies an incorrect call was made
17018 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
17020 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
17022 report_definite_bug();
17025 # --------------------------------------------------------------------
17026 # create an object to hold this line
17027 # --------------------------------------------------------------------
17028 my $new_line = new Perl::Tidy::VerticalAligner::Line(
17030 jmax_original_line => $jmax,
17031 rtokens => $rtokens,
17032 rfields => $rfields,
17033 rpatterns => $rpatterns,
17034 indentation => $indentation,
17035 leading_space_count => $leading_space_count,
17036 outdent_long_lines => $outdent_long_lines,
17038 is_hanging_side_comment => $is_hanging_side_comment,
17039 maximum_line_length => $rOpts->{'maximum-line-length'},
17040 rvertical_tightness_flags => $rvertical_tightness_flags,
17043 # --------------------------------------------------------------------
17044 # It simplifies things to create a zero length side comment
17046 # --------------------------------------------------------------------
17047 make_side_comment( $new_line, $level_end );
17049 # --------------------------------------------------------------------
17050 # Decide if this is a simple list of items.
17051 # There are 3 list types: none, comma, comma-arrow.
17052 # We use this below to be less restrictive in deciding what to align.
17053 # --------------------------------------------------------------------
17054 if ($is_forced_break) {
17055 decide_if_list($new_line);
17058 if ($current_line) {
17060 # --------------------------------------------------------------------
17061 # Allow hanging side comment to join current group, if any
17062 # This will help keep side comments aligned, because otherwise we
17063 # will have to start a new group, making alignment less likely.
17064 # --------------------------------------------------------------------
17065 join_hanging_comment( $new_line, $current_line )
17066 if $is_hanging_side_comment;
17068 # --------------------------------------------------------------------
17069 # If there is just one previous line, and it has more fields
17070 # than the new line, try to join fields together to get a match with
17071 # the new line. At the present time, only a single leading '=' is
17072 # allowed to be compressed out. This is useful in rare cases where
17073 # a table is forced to use old breakpoints because of side comments,
17074 # and the table starts out something like this:
17075 # my %MonthChars = ('0', 'Jan', # side comment
17078 # Eliminating the '=' field will allow the remaining fields to line up.
17079 # This situation does not occur if there are no side comments
17080 # because scan_list would put a break after the opening '('.
17081 # --------------------------------------------------------------------
17082 eliminate_old_fields( $new_line, $current_line );
17084 # --------------------------------------------------------------------
17085 # If the new line has more fields than the current group,
17086 # see if we can match the first fields and combine the remaining
17087 # fields of the new line.
17088 # --------------------------------------------------------------------
17089 eliminate_new_fields( $new_line, $current_line );
17091 # --------------------------------------------------------------------
17092 # Flush previous group unless all common tokens and patterns match..
17093 # --------------------------------------------------------------------
17094 check_match( $new_line, $current_line );
17096 # --------------------------------------------------------------------
17097 # See if there is space for this line in the current group (if any)
17098 # --------------------------------------------------------------------
17099 if ($current_line) {
17100 check_fit( $new_line, $current_line );
17104 # --------------------------------------------------------------------
17105 # Append this line to the current group (or start new group)
17106 # --------------------------------------------------------------------
17107 accept_line($new_line);
17109 # Future update to allow this to vary:
17110 $current_line = $new_line if ( $maximum_line_index == 0 );
17112 my_flush() if ( $group_type eq "TERMINAL" );
17114 # --------------------------------------------------------------------
17115 # Step 8. Some old debugging stuff
17116 # --------------------------------------------------------------------
17117 VALIGN_DEBUG_FLAG_APPEND && do {
17118 print "APPEND fields:";
17119 dump_array(@$rfields);
17120 print "APPEND tokens:";
17121 dump_array(@$rtokens);
17122 print "APPEND patterns:";
17123 dump_array(@$rpatterns);
17128 sub join_hanging_comment {
17131 my $jmax = $line->get_jmax();
17132 return 0 unless $jmax == 1; # must be 2 fields
17133 my $rtokens = $line->get_rtokens();
17134 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment..
17135 my $rfields = $line->get_rfields();
17136 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty...
17137 my $old_line = shift;
17138 my $maximum_field_index = $old_line->get_jmax();
17140 unless $maximum_field_index > $jmax; # the current line has more fields
17141 my $rpatterns = $line->get_rpatterns();
17143 $line->set_is_hanging_side_comment(1);
17144 $jmax = $maximum_field_index;
17145 $line->set_jmax($jmax);
17146 $$rfields[$jmax] = $$rfields[1];
17147 $$rtokens[ $jmax - 1 ] = $$rtokens[0];
17148 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
17149 for ( my $j = 1 ; $j < $jmax ; $j++ ) {
17150 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
17151 $$rtokens[ $j - 1 ] = "";
17152 $$rpatterns[ $j - 1 ] = "";
17157 sub eliminate_old_fields {
17159 my $new_line = shift;
17160 my $jmax = $new_line->get_jmax();
17161 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
17162 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
17164 # there must be one previous line
17165 return unless ( $maximum_line_index == 0 );
17167 my $old_line = shift;
17168 my $maximum_field_index = $old_line->get_jmax();
17170 # this line must have fewer fields
17171 return unless $maximum_field_index > $jmax;
17173 # Identify specific cases where field elimination is allowed:
17174 # case=1: both lines have comma-separated lists, and the first
17175 # line has an equals
17176 # case=2: both lines have leading equals
17178 # case 1 is the default
17181 # See if case 2: both lines have leading '='
17182 # We'll require smiliar leading patterns in this case
17183 my $old_rtokens = $old_line->get_rtokens();
17184 my $rtokens = $new_line->get_rtokens();
17185 my $rpatterns = $new_line->get_rpatterns();
17186 my $old_rpatterns = $old_line->get_rpatterns();
17187 if ( $rtokens->[0] =~ /^=\d*$/
17188 && $old_rtokens->[0] eq $rtokens->[0]
17189 && $old_rpatterns->[0] eq $rpatterns->[0] )
17194 # not too many fewer fields in new line for case 1
17195 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
17197 # case 1 must have side comment
17198 my $old_rfields = $old_line->get_rfields();
17201 && length( $$old_rfields[$maximum_field_index] ) == 0 );
17203 my $rfields = $new_line->get_rfields();
17205 my $hid_equals = 0;
17207 my @new_alignments = ();
17208 my @new_fields = ();
17209 my @new_matching_patterns = ();
17210 my @new_matching_tokens = ();
17214 my $current_field = '';
17215 my $current_pattern = '';
17217 # loop over all old tokens
17219 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
17220 $current_field .= $$old_rfields[$k];
17221 $current_pattern .= $$old_rpatterns[$k];
17222 last if ( $j > $jmax - 1 );
17224 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
17226 $new_fields[$j] = $current_field;
17227 $new_matching_patterns[$j] = $current_pattern;
17228 $current_field = '';
17229 $current_pattern = '';
17230 $new_matching_tokens[$j] = $$old_rtokens[$k];
17231 $new_alignments[$j] = $old_line->get_alignment($k);
17236 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
17237 last if ( $case == 2 ); # avoid problems with stuff
17238 # like: $a=$b=$c=$d;
17242 if ( $in_match && $case == 1 )
17243 ; # disallow gaps in matching field types in case 1
17247 # Modify the current state if we are successful.
17248 # We must exactly reach the ends of both lists for success.
17249 if ( ( $j == $jmax )
17250 && ( $current_field eq '' )
17251 && ( $case != 1 || $hid_equals ) )
17253 $k = $maximum_field_index;
17254 $current_field .= $$old_rfields[$k];
17255 $current_pattern .= $$old_rpatterns[$k];
17256 $new_fields[$j] = $current_field;
17257 $new_matching_patterns[$j] = $current_pattern;
17259 $new_alignments[$j] = $old_line->get_alignment($k);
17260 $maximum_field_index = $j;
17262 $old_line->set_alignments(@new_alignments);
17263 $old_line->set_jmax($jmax);
17264 $old_line->set_rtokens( \@new_matching_tokens );
17265 $old_line->set_rfields( \@new_fields );
17266 $old_line->set_rpatterns( \@$rpatterns );
17270 # create an empty side comment if none exists
17271 sub make_side_comment {
17272 my $new_line = shift;
17273 my $level_end = shift;
17274 my $jmax = $new_line->get_jmax();
17275 my $rtokens = $new_line->get_rtokens();
17277 # if line does not have a side comment...
17278 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
17279 my $rfields = $new_line->get_rfields();
17280 my $rpatterns = $new_line->get_rpatterns();
17281 $$rtokens[$jmax] = '#';
17282 $$rfields[ ++$jmax ] = '';
17283 $$rpatterns[$jmax] = '#';
17284 $new_line->set_jmax($jmax);
17285 $new_line->set_jmax_original_line($jmax);
17288 # line has a side comment..
17291 # don't remember old side comment location for very long
17292 my $line_number = $vertical_aligner_self->get_output_line_number();
17293 my $rfields = $new_line->get_rfields();
17295 $line_number - $last_side_comment_line_number > 12
17297 # and don't remember comment location across block level changes
17298 || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
17301 forget_side_comment();
17303 $last_side_comment_line_number = $line_number;
17304 $last_side_comment_level = $level_end;
17308 sub decide_if_list {
17312 # A list will be taken to be a line with a forced break in which all
17313 # of the field separators are commas or comma-arrows (except for the
17316 # List separator tokens are things like ',3' or '=>2',
17317 # where the trailing digit is the nesting depth. Allow braces
17318 # to allow nested list items.
17319 my $rtokens = $line->get_rtokens();
17320 my $test_token = $$rtokens[0];
17321 if ( $test_token =~ /^(\,|=>)/ ) {
17322 my $list_type = $test_token;
17323 my $jmax = $line->get_jmax();
17325 foreach ( 1 .. $jmax - 2 ) {
17326 if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
17331 $line->set_list_type($list_type);
17335 sub eliminate_new_fields {
17337 return unless ( $maximum_line_index >= 0 );
17338 my $new_line = shift;
17339 my $old_line = shift;
17340 my $jmax = $new_line->get_jmax();
17342 my $old_rtokens = $old_line->get_rtokens();
17343 my $rtokens = $new_line->get_rtokens();
17344 my $is_assignment =
17345 ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] )
17346 || $group_type eq "TERMINAL" );
17348 # must be monotonic variation
17349 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
17351 # must be more fields in the new line
17352 my $maximum_field_index = $old_line->get_jmax();
17353 return unless ( $maximum_field_index < $jmax );
17355 unless ($is_assignment) {
17357 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
17358 ; # only if monotonic
17360 # never combine fields of a comma list
17362 unless ( $maximum_field_index > 1 )
17363 && ( $new_line->get_list_type() !~ /^,/ );
17366 my $rfields = $new_line->get_rfields();
17367 my $rpatterns = $new_line->get_rpatterns();
17368 my $old_rpatterns = $old_line->get_rpatterns();
17370 # loop over all OLD tokens except comment and check match
17373 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
17374 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
17375 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] )
17376 && $group_type ne "TERMINAL" )
17383 # first tokens agree, so combine extra new tokens
17385 for $k ( $maximum_field_index .. $jmax - 1 ) {
17387 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
17388 $$rfields[$k] = "";
17389 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
17390 $$rpatterns[$k] = "";
17393 $$rtokens[ $maximum_field_index - 1 ] = '#';
17394 $$rfields[$maximum_field_index] = $$rfields[$jmax];
17395 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
17396 $jmax = $maximum_field_index;
17398 $new_line->set_jmax($jmax);
17401 sub fix_terminal_ternary {
17403 # Add empty fields as necessary to align a ternary term
17408 # : $year % 100 ? 1
17409 # : $year % 400 ? 0
17412 my ( $rfields, $rtokens, $rpatterns ) = @_;
17414 my $jmax = @{$rfields} - 1;
17415 my $old_line = $group_lines[$maximum_line_index];
17416 my $rfields_old = $old_line->get_rfields();
17418 my $rpatterns_old = $old_line->get_rpatterns();
17419 my $rtokens_old = $old_line->get_rtokens();
17420 my $maximum_field_index = $old_line->get_jmax();
17422 # look for the question mark after the :
17424 my $depth_question;
17426 for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
17427 my $tok = $rtokens_old->[$j];
17428 if ( $tok =~ /^\?(\d+)$/ ) {
17429 $depth_question = $1;
17431 # depth must be correct
17432 next unless ( $depth_question eq $group_level );
17435 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
17436 $pad = " " x length($1);
17439 return; # shouldn't happen
17444 return unless ( defined($jquestion) ); # shouldn't happen
17446 # Now splice the tokens and patterns of the previous line
17447 # into the else line to insure a match. Add empty fields
17449 my $jadd = $jquestion;
17451 # Work on copies of the actual arrays in case we have
17452 # to return due to an error
17453 my @fields = @{$rfields};
17454 my @patterns = @{$rpatterns};
17455 my @tokens = @{$rtokens};
17457 VALIGN_DEBUG_FLAG_TERNARY && do {
17459 print "CURRENT FIELDS=<@{$rfields_old}>\n";
17460 print "CURRENT TOKENS=<@{$rtokens_old}>\n";
17461 print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
17462 print "UNMODIFIED FIELDS=<@{$rfields}>\n";
17463 print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
17464 print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
17467 # handle cases of leading colon on this line
17468 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
17470 my ( $colon, $therest ) = ( $1, $2 );
17472 # Handle sub-case of first field with leading colon plus additional code
17473 # This is the usual situation as at the '1' below:
17475 # : $year % 400 ? 0
17479 # Split the first field after the leading colon and insert padding.
17480 # Note that this padding will remain even if the terminal value goes
17481 # out on a separate line. This does not seem to look to bad, so no
17482 # mechanism has been included to undo it.
17483 my $field1 = shift @fields;
17484 unshift @fields, ( $colon, $pad . $therest );
17486 # change the leading pattern from : to ?
17487 return unless ( $patterns[0] =~ s/^\:/?/ );
17489 # install leading tokens and patterns of existing line
17490 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
17491 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
17493 # insert appropriate number of empty fields
17494 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
17497 # handle sub-case of first field just equal to leading colon.
17498 # This can happen for example in the example below where
17499 # the leading '(' would create a new alignment token
17500 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
17501 # : ( $mname = $name . '->' );
17504 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
17506 # prepend a leading ? onto the second pattern
17507 $patterns[1] = "?b" . $patterns[1];
17509 # pad the second field
17510 $fields[1] = $pad . $fields[1];
17512 # install leading tokens and patterns of existing line, replacing
17513 # leading token and inserting appropriate number of empty fields
17514 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
17515 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
17516 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
17520 # Handle case of no leading colon on this line. This will
17521 # be the case when -wba=':' is used. For example,
17522 # $year % 400 ? 0 :
17526 # install leading tokens and patterns of existing line
17527 $patterns[0] = '?' . 'b' . $patterns[0];
17528 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
17529 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
17531 # insert appropriate number of empty fields
17532 $jadd = $jquestion + 1;
17533 $fields[0] = $pad . $fields[0];
17534 splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
17537 VALIGN_DEBUG_FLAG_TERNARY && do {
17539 print "MODIFIED TOKENS=<@tokens>\n";
17540 print "MODIFIED PATTERNS=<@patterns>\n";
17541 print "MODIFIED FIELDS=<@fields>\n";
17544 # all ok .. update the arrays
17545 @{$rfields} = @fields;
17546 @{$rtokens} = @tokens;
17547 @{$rpatterns} = @patterns;
17549 # force a flush after this line
17550 $group_type = "TERMINAL";
17554 sub fix_terminal_else {
17556 # Add empty fields as necessary to align a balanced terminal
17557 # else block to a previous if/elsif/unless block,
17560 # if ( 1 || $x ) { print "ok 13\n"; }
17561 # else { print "not ok 13\n"; }
17563 my ( $rfields, $rtokens, $rpatterns ) = @_;
17564 my $jmax = @{$rfields} - 1;
17565 return unless ( $jmax > 0 );
17567 # check for balanced else block following if/elsif/unless
17568 my $rfields_old = $current_line->get_rfields();
17570 # TBD: add handling for 'case'
17571 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
17573 # look for the opening brace after the else, and extrace the depth
17574 my $tok_brace = $rtokens->[0];
17576 if ( $tok_brace =~ /^\{(\d+)$/ ) { $depth_brace = $1; }
17578 # probably: "else # side_comment"
17581 my $rpatterns_old = $current_line->get_rpatterns();
17582 my $rtokens_old = $current_line->get_rtokens();
17583 my $maximum_field_index = $current_line->get_jmax();
17585 # be sure the previous if/elsif is followed by an opening paren
17587 my $tok_paren = '(' . $depth_brace;
17588 my $tok_test = $rtokens_old->[$jparen];
17589 return unless ( $tok_test eq $tok_paren ); # shouldn't happen
17591 # Now find the opening block brace
17593 for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
17594 my $tok = $rtokens_old->[$j];
17595 if ( $tok eq $tok_brace ) {
17600 return unless ( defined($jbrace) ); # shouldn't happen
17602 # Now splice the tokens and patterns of the previous line
17603 # into the else line to insure a match. Add empty fields
17605 my $jadd = $jbrace - $jparen;
17606 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
17607 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
17608 splice( @{$rfields}, 1, 0, ('') x $jadd );
17610 # force a flush after this line if it does not follow a case
17611 $group_type = "TERMINAL"
17612 unless ( $rfields_old->[0] =~ /^case\s*$/ );
17618 my $new_line = shift;
17619 my $old_line = shift;
17621 my $jmax = $new_line->get_jmax();
17622 my $maximum_field_index = $old_line->get_jmax();
17624 # flush if this line has too many fields
17625 if ( $jmax > $maximum_field_index ) { my_flush(); return }
17627 # flush if adding this line would make a non-monotonic field count
17629 ( $maximum_field_index > $jmax ) # this has too few fields
17631 ( $previous_minimum_jmax_seen < $jmax ) # and wouldn't be monotonic
17632 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
17640 # otherwise append this line if everything matches
17641 my $jmax_original_line = $new_line->get_jmax_original_line();
17642 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
17643 my $rtokens = $new_line->get_rtokens();
17644 my $rfields = $new_line->get_rfields();
17645 my $rpatterns = $new_line->get_rpatterns();
17646 my $list_type = $new_line->get_list_type();
17648 my $group_list_type = $old_line->get_list_type();
17649 my $old_rpatterns = $old_line->get_rpatterns();
17650 my $old_rtokens = $old_line->get_rtokens();
17652 my $jlimit = $jmax - 1;
17653 if ( $maximum_field_index > $jmax ) {
17654 $jlimit = $jmax_original_line;
17655 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
17658 my $everything_matches = 1;
17660 # common list types always match
17661 unless ( ( $group_list_type && ( $list_type eq $group_list_type ) )
17662 || $is_hanging_side_comment )
17665 my $leading_space_count = $new_line->get_leading_space_count();
17666 my $saw_equals = 0;
17667 for my $j ( 0 .. $jlimit ) {
17670 my $old_tok = $$old_rtokens[$j];
17671 my $new_tok = $$rtokens[$j];
17673 # Dumb down the match AFTER an equals and
17674 # also dumb down after seeing a ? ternary operator ...
17675 # Everything after a + is the token which preceded the previous
17676 # opening paren (container name). We won't require them to match.
17677 if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
17679 $old_tok =~ s/\+.*$//;
17682 if ( $new_tok =~ /^[\?=]\d*$/ ) { $saw_equals = 1 }
17684 # we never match if the matching tokens differ
17686 && $old_tok ne $new_tok )
17691 # otherwise, if patterns match, we always have a match.
17692 # However, if patterns don't match, we have to be careful...
17693 elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
17695 # We have to be very careful about aligning commas when the
17696 # pattern's don't match, because it can be worse to create an
17697 # alignment where none is needed than to omit one. The current
17698 # rule: if we are within a matching sub call (indicated by '+'
17699 # in the matching token), we'll allow a marginal match, but
17702 # Here's an example where we'd like to align the '='
17703 # my $cfile = File::Spec->catfile( 't', 'callext.c' );
17704 # my $inc = File::Spec->catdir( 'Basic', 'Core' );
17705 # because the function names differ.
17706 # Future alignment logic should make this unnecessary.
17708 # Here's an example where the ','s are not contained in a call.
17709 # The first line below should probably not match the next two:
17710 # ( $a, $b ) = ( $b, $r );
17711 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
17712 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
17713 if ( $new_tok =~ /^,/ ) {
17714 if ( $$rtokens[$j] =~ /[A-Za-z]/ ) {
17715 $marginal_match = 1;
17722 # parens don't align well unless patterns match
17723 elsif ( $new_tok =~ /^\(/ ) {
17727 # Handle an '=' alignment with different patterns to
17729 elsif ( $new_tok =~ /^=\d*$/ ) {
17733 # It is best to be a little restrictive when
17734 # aligning '=' tokens. Here is an example of
17735 # two lines that we will not align:
17738 # The problem is that one is a 'my' declaration,
17739 # and the other isn't, so they're not very similar.
17740 # We will filter these out by comparing the first
17741 # letter of the pattern. This is crude, but works
17744 substr( $$old_rpatterns[$j], 0, 1 ) ne
17745 substr( $$rpatterns[$j], 0, 1 ) )
17750 # If we pass that test, we'll call it a marginal match.
17751 # Here is an example of a marginal match:
17753 # $op = compile_bblock($op);
17754 # The left tokens are both identifiers, but
17755 # one accesses a hash and the other doesn't.
17756 # We'll let this be a tentative match and undo
17757 # it later if we don't find more than 2 lines
17759 elsif ( $maximum_line_index == 0 ) {
17760 $marginal_match = 1;
17765 # Don't let line with fewer fields increase column widths
17767 if ( $maximum_field_index > $jmax ) {
17769 length( $$rfields[$j] ) - $old_line->current_field_width($j);
17772 $pad += $leading_space_count;
17775 # TESTING: suspend this rule to allow last lines to join
17776 if ( $pad > 0 ) { $match = 0; }
17780 $everything_matches = 0;
17786 if ( $maximum_field_index > $jmax ) {
17788 if ($everything_matches) {
17790 my $comment = $$rfields[$jmax];
17791 for $jmax ( $jlimit .. $maximum_field_index ) {
17792 $$rtokens[$jmax] = $$old_rtokens[$jmax];
17793 $$rfields[ ++$jmax ] = '';
17794 $$rpatterns[$jmax] = $$old_rpatterns[$jmax];
17796 $$rfields[$jmax] = $comment;
17797 $new_line->set_jmax($jmax);
17801 my_flush() unless ($everything_matches);
17806 return unless ( $maximum_line_index >= 0 );
17807 my $new_line = shift;
17808 my $old_line = shift;
17810 my $jmax = $new_line->get_jmax();
17811 my $leading_space_count = $new_line->get_leading_space_count();
17812 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
17813 my $rtokens = $new_line->get_rtokens();
17814 my $rfields = $new_line->get_rfields();
17815 my $rpatterns = $new_line->get_rpatterns();
17817 my $group_list_type = $group_lines[0]->get_list_type();
17819 my $padding_so_far = 0;
17820 my $padding_available = $old_line->get_available_space_on_right();
17822 # save current columns in case this doesn't work
17823 save_alignment_columns();
17825 my ( $j, $pad, $eight );
17826 my $maximum_field_index = $old_line->get_jmax();
17827 for $j ( 0 .. $jmax ) {
17829 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
17832 $pad += $leading_space_count;
17835 # remember largest gap of the group, excluding gap to side comment
17837 && $group_maximum_gap < -$pad
17839 && $j < $jmax - 1 )
17841 $group_maximum_gap = -$pad;
17846 ## This patch helps sometimes, but it doesn't check to see if
17847 ## the line is too long even without the side comment. It needs
17849 ##don't let a long token with no trailing side comment push
17850 ##side comments out, or end a group. (sidecmt1.t)
17851 ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
17853 # This line will need space; lets see if we want to accept it..
17856 # not if this won't fit
17857 ( $pad > $padding_available )
17859 # previously, there were upper bounds placed on padding here
17860 # (maximum_whitespace_columns), but they were not really helpful
17865 # revert to starting state then flush; things didn't work out
17866 restore_alignment_columns();
17871 # patch to avoid excessive gaps in previous lines,
17872 # due to a line of fewer fields.
17873 # return join( ".",
17874 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
17875 # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
17876 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
17878 # looks ok, squeeze this field in
17879 $old_line->increase_field_width( $j, $pad );
17880 $padding_available -= $pad;
17882 # remember largest gap of the group, excluding gap to side comment
17883 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
17884 $group_maximum_gap = $pad;
17891 # The current line either starts a new alignment group or is
17892 # accepted into the current alignment group.
17893 my $new_line = shift;
17894 $group_lines[ ++$maximum_line_index ] = $new_line;
17896 # initialize field lengths if starting new group
17897 if ( $maximum_line_index == 0 ) {
17899 my $jmax = $new_line->get_jmax();
17900 my $rfields = $new_line->get_rfields();
17901 my $rtokens = $new_line->get_rtokens();
17903 my $col = $new_line->get_leading_space_count();
17905 for $j ( 0 .. $jmax ) {
17906 $col += length( $$rfields[$j] );
17908 # create initial alignments for the new group
17910 if ( $j < $jmax ) { $token = $$rtokens[$j] }
17911 my $alignment = make_alignment( $col, $token );
17912 $new_line->set_alignment( $j, $alignment );
17915 $maximum_jmax_seen = $jmax;
17916 $minimum_jmax_seen = $jmax;
17919 # use previous alignments otherwise
17921 my @new_alignments =
17922 $group_lines[ $maximum_line_index - 1 ]->get_alignments();
17923 $new_line->set_alignments(@new_alignments);
17926 # remember group jmax extremes for next call to append_line
17927 $previous_minimum_jmax_seen = $minimum_jmax_seen;
17928 $previous_maximum_jmax_seen = $maximum_jmax_seen;
17933 # debug routine to dump array contents
17938 # flush() sends the current Perl::Tidy::VerticalAligner group down the
17939 # pipeline to Perl::Tidy::FileWriter.
17941 # This is the external flush, which also empties the cache
17944 if ( $maximum_line_index < 0 ) {
17945 if ($cached_line_type) {
17946 $seqno_string = $cached_seqno_string;
17947 entab_and_output( $cached_line_text,
17948 $cached_line_leading_space_count,
17949 $last_group_level_written );
17950 $cached_line_type = 0;
17951 $cached_line_text = "";
17952 $cached_seqno_string = "";
17960 # This is the internal flush, which leaves the cache intact
17963 return if ( $maximum_line_index < 0 );
17965 # handle a group of comment lines
17966 if ( $group_type eq 'COMMENT' ) {
17968 VALIGN_DEBUG_FLAG_APPEND0 && do {
17969 my ( $a, $b, $c ) = caller();
17971 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
17974 my $leading_space_count = $comment_leading_space_count;
17975 my $leading_string = get_leading_string($leading_space_count);
17977 # zero leading space count if any lines are too long
17978 my $max_excess = 0;
17979 for my $i ( 0 .. $maximum_line_index ) {
17980 my $str = $group_lines[$i];
17982 length($str) + $leading_space_count - $rOpts_maximum_line_length;
17983 if ( $excess > $max_excess ) {
17984 $max_excess = $excess;
17988 if ( $max_excess > 0 ) {
17989 $leading_space_count -= $max_excess;
17990 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
17991 $last_outdented_line_at =
17992 $file_writer_object->get_output_line_number();
17993 unless ($outdented_line_count) {
17994 $first_outdented_line_at = $last_outdented_line_at;
17996 $outdented_line_count += ( $maximum_line_index + 1 );
17999 # write the group of lines
18000 my $outdent_long_lines = 0;
18001 for my $i ( 0 .. $maximum_line_index ) {
18002 write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
18003 $outdent_long_lines, "" );
18007 # handle a group of code lines
18010 VALIGN_DEBUG_FLAG_APPEND0 && do {
18011 my $group_list_type = $group_lines[0]->get_list_type();
18012 my ( $a, $b, $c ) = caller();
18013 my $maximum_field_index = $group_lines[0]->get_jmax();
18015 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
18019 # some small groups are best left unaligned
18020 my $do_not_align = decide_if_aligned();
18022 # optimize side comment location
18023 $do_not_align = adjust_side_comment($do_not_align);
18025 # recover spaces for -lp option if possible
18026 my $extra_leading_spaces = get_extra_leading_spaces();
18028 # all lines of this group have the same basic leading spacing
18029 my $group_leader_length = $group_lines[0]->get_leading_space_count();
18031 # add extra leading spaces if helpful
18033 improve_continuation_indentation( $do_not_align,
18034 $group_leader_length );
18036 # loop to output all lines
18037 for my $i ( 0 .. $maximum_line_index ) {
18038 my $line = $group_lines[$i];
18039 write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
18040 $group_leader_length, $extra_leading_spaces );
18043 initialize_for_new_group();
18046 sub decide_if_aligned {
18048 # Do not try to align two lines which are not really similar
18049 return unless $maximum_line_index == 1;
18050 return if ( $group_type eq "TERMINAL" );
18052 my $group_list_type = $group_lines[0]->get_list_type();
18054 my $do_not_align = (
18056 # always align lists
18061 # don't align if it was just a marginal match
18064 # don't align two lines with big gap
18065 || $group_maximum_gap > 12
18067 # or lines with differing number of alignment tokens
18068 # TODO: this could be improved. It occasionally rejects
18070 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
18074 # But try to convert them into a simple comment group if the first line
18075 # a has side comment
18076 my $rfields = $group_lines[0]->get_rfields();
18077 my $maximum_field_index = $group_lines[0]->get_jmax();
18079 && ( $maximum_line_index > 0 )
18080 && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
18085 return $do_not_align;
18088 sub adjust_side_comment {
18090 my $do_not_align = shift;
18092 # let's see if we can move the side comment field out a little
18093 # to improve readability (the last field is always a side comment field)
18094 my $have_side_comment = 0;
18095 my $first_side_comment_line = -1;
18096 my $maximum_field_index = $group_lines[0]->get_jmax();
18097 for my $i ( 0 .. $maximum_line_index ) {
18098 my $line = $group_lines[$i];
18100 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
18101 $have_side_comment = 1;
18102 $first_side_comment_line = $i;
18107 my $kmax = $maximum_field_index + 1;
18109 if ($have_side_comment) {
18111 my $line = $group_lines[0];
18113 # the maximum space without exceeding the line length:
18114 my $avail = $line->get_available_space_on_right();
18116 # try to use the previous comment column
18117 my $side_comment_column = $line->get_column( $kmax - 2 );
18118 my $move = $last_comment_column - $side_comment_column;
18120 ## my $sc_line0 = $side_comment_history[0]->[0];
18121 ## my $sc_col0 = $side_comment_history[0]->[1];
18122 ## my $sc_line1 = $side_comment_history[1]->[0];
18123 ## my $sc_col1 = $side_comment_history[1]->[1];
18124 ## my $sc_line2 = $side_comment_history[2]->[0];
18125 ## my $sc_col2 = $side_comment_history[2]->[1];
18127 ## # FUTURE UPDATES:
18128 ## # Be sure to ignore 'do not align' and '} # end comments'
18129 ## # Find first $move > 0 and $move <= $avail as follows:
18130 ## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
18131 ## # 2. try sc_col2 if (line-sc_line2) < 12
18132 ## # 3. try min possible space, plus up to 8,
18133 ## # 4. try min possible space
18135 if ( $kmax > 0 && !$do_not_align ) {
18137 # but if this doesn't work, give up and use the minimum space
18138 if ( $move > $avail ) {
18139 $move = $rOpts_minimum_space_to_comment - 1;
18142 # but we want some minimum space to the comment
18143 my $min_move = $rOpts_minimum_space_to_comment - 1;
18145 && $last_side_comment_length > 0
18146 && ( $first_side_comment_line == 0 )
18147 && $group_level == $last_group_level_written )
18152 if ( $move < $min_move ) {
18156 # prevously, an upper bound was placed on $move here,
18157 # (maximum_space_to_comment), but it was not helpful
18159 # don't exceed the available space
18160 if ( $move > $avail ) { $move = $avail }
18162 # we can only increase space, never decrease
18164 $line->increase_field_width( $maximum_field_index - 1, $move );
18167 # remember this column for the next group
18168 $last_comment_column = $line->get_column( $kmax - 2 );
18172 # try to at least line up the existing side comment location
18173 if ( $kmax > 0 && $move > 0 && $move < $avail ) {
18174 $line->increase_field_width( $maximum_field_index - 1, $move );
18178 # reset side comment column if we can't align
18180 forget_side_comment();
18184 return $do_not_align;
18187 sub improve_continuation_indentation {
18188 my ( $do_not_align, $group_leader_length ) = @_;
18190 # See if we can increase the continuation indentation
18191 # to move all continuation lines closer to the next field
18192 # (unless it is a comment).
18194 # '$min_ci_gap'is the extra indentation that we may need to introduce.
18195 # We will only introduce this to fields which already have some ci.
18196 # Without this variable, we would occasionally get something like this
18199 # use overload '+' => \&plus,
18201 # '*' => \&multiply,
18204 # 'atan2' => \&atan2,
18206 # Whereas with this variable, we can shift variables over to get this:
18208 # use overload '+' => \&plus,
18210 # '*' => \&multiply,
18213 # 'atan2' => \&atan2,
18215 ## BUB: Deactivated####################
18216 # The trouble with this patch is that it may, for example,
18217 # move in some 'or's or ':'s, and leave some out, so that the
18218 # left edge alignment suffers.
18220 ###########################################
18222 my $maximum_field_index = $group_lines[0]->get_jmax();
18224 my $min_ci_gap = $rOpts_maximum_line_length;
18225 if ( $maximum_field_index > 1 && !$do_not_align ) {
18227 for my $i ( 0 .. $maximum_line_index ) {
18228 my $line = $group_lines[$i];
18229 my $leading_space_count = $line->get_leading_space_count();
18230 my $rfields = $line->get_rfields();
18232 my $gap = $line->get_column(0) - $leading_space_count -
18233 length( $$rfields[0] );
18235 if ( $leading_space_count > $group_leader_length ) {
18236 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
18240 if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
18247 return $min_ci_gap;
18250 sub write_vertically_aligned_line {
18252 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
18253 $extra_leading_spaces )
18255 my $rfields = $line->get_rfields();
18256 my $leading_space_count = $line->get_leading_space_count();
18257 my $outdent_long_lines = $line->get_outdent_long_lines();
18258 my $maximum_field_index = $line->get_jmax();
18259 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
18261 # add any extra spaces
18262 if ( $leading_space_count > $group_leader_length ) {
18263 $leading_space_count += $min_ci_gap;
18266 my $str = $$rfields[0];
18268 # loop to concatenate all fields of this line and needed padding
18269 my $total_pad_count = 0;
18271 for $j ( 1 .. $maximum_field_index ) {
18273 # skip zero-length side comments
18275 if ( ( $j == $maximum_field_index )
18276 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
18279 # compute spaces of padding before this field
18280 my $col = $line->get_column( $j - 1 );
18281 $pad = $col - ( length($str) + $leading_space_count );
18283 if ($do_not_align) {
18285 ( $j < $maximum_field_index )
18287 : $rOpts_minimum_space_to_comment - 1;
18290 # accumulate the padding
18291 if ( $pad > 0 ) { $total_pad_count += $pad; }
18294 if ( !defined $$rfields[$j] ) {
18295 write_diagnostics("UNDEFined field at j=$j\n");
18298 # only add padding when we have a finite field;
18299 # this avoids extra terminal spaces if we have empty fields
18300 if ( length( $$rfields[$j] ) > 0 ) {
18301 $str .= ' ' x $total_pad_count;
18302 $total_pad_count = 0;
18303 $str .= $$rfields[$j];
18306 $total_pad_count = 0;
18309 # update side comment history buffer
18310 if ( $j == $maximum_field_index ) {
18311 my $lineno = $file_writer_object->get_output_line_number();
18312 shift @side_comment_history;
18313 push @side_comment_history, [ $lineno, $col ];
18317 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
18319 # ship this line off
18320 write_leader_and_string( $leading_space_count + $extra_leading_spaces,
18321 $str, $side_comment_length, $outdent_long_lines,
18322 $rvertical_tightness_flags );
18325 sub get_extra_leading_spaces {
18327 #----------------------------------------------------------
18328 # Define any extra indentation space (for the -lp option).
18330 # If a list has side comments, sub scan_list must dump the
18331 # list before it sees everything. When this happens, it sets
18332 # the indentation to the standard scheme, but notes how
18333 # many spaces it would have liked to use. We may be able
18334 # to recover that space here in the event that that all of the
18335 # lines of a list are back together again.
18336 #----------------------------------------------------------
18338 my $extra_leading_spaces = 0;
18339 if ($extra_indent_ok) {
18340 my $object = $group_lines[0]->get_indentation();
18341 if ( ref($object) ) {
18342 my $extra_indentation_spaces_wanted =
18343 get_RECOVERABLE_SPACES($object);
18345 # all indentation objects must be the same
18347 for $i ( 1 .. $maximum_line_index ) {
18348 if ( $object != $group_lines[$i]->get_indentation() ) {
18349 $extra_indentation_spaces_wanted = 0;
18354 if ($extra_indentation_spaces_wanted) {
18356 # the maximum space without exceeding the line length:
18357 my $avail = $group_lines[0]->get_available_space_on_right();
18358 $extra_leading_spaces =
18359 ( $avail > $extra_indentation_spaces_wanted )
18360 ? $extra_indentation_spaces_wanted
18363 # update the indentation object because with -icp the terminal
18364 # ');' will use the same adjustment.
18365 $object->permanently_decrease_AVAILABLE_SPACES(
18366 -$extra_leading_spaces );
18370 return $extra_leading_spaces;
18373 sub combine_fields {
18375 # combine all fields except for the comment field ( sidecmt.t )
18377 my $maximum_field_index = $group_lines[0]->get_jmax();
18378 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
18379 my $line = $group_lines[$j];
18380 my $rfields = $line->get_rfields();
18381 foreach ( 1 .. $maximum_field_index - 1 ) {
18382 $$rfields[0] .= $$rfields[$_];
18384 $$rfields[1] = $$rfields[$maximum_field_index];
18386 $line->set_jmax(1);
18387 $line->set_column( 0, 0 );
18388 $line->set_column( 1, 0 );
18391 $maximum_field_index = 1;
18393 for $j ( 0 .. $maximum_line_index ) {
18394 my $line = $group_lines[$j];
18395 my $rfields = $line->get_rfields();
18396 for $k ( 0 .. $maximum_field_index ) {
18397 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
18399 $pad += $group_lines[$j]->get_leading_space_count();
18402 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
18408 sub get_output_line_number {
18410 # the output line number reported to a caller is the number of items
18411 # written plus the number of items in the buffer
18413 1 + $maximum_line_index + $file_writer_object->get_output_line_number();
18416 sub write_leader_and_string {
18418 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
18419 $rvertical_tightness_flags )
18422 # handle outdenting of long lines:
18423 if ($outdent_long_lines) {
18425 length($str) - $side_comment_length + $leading_space_count -
18426 $rOpts_maximum_line_length;
18427 if ( $excess > 0 ) {
18428 $leading_space_count = 0;
18429 $last_outdented_line_at =
18430 $file_writer_object->get_output_line_number();
18432 unless ($outdented_line_count) {
18433 $first_outdented_line_at = $last_outdented_line_at;
18435 $outdented_line_count++;
18439 # Make preliminary leading whitespace. It could get changed
18440 # later by entabbing, so we have to keep track of any changes
18441 # to the leading_space_count from here on.
18442 my $leading_string =
18443 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
18445 # Unpack any recombination data; it was packed by
18446 # sub send_lines_to_vertical_aligner. Contents:
18448 # [0] type: 1=opening 2=closing 3=opening block brace
18449 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
18450 # if closing: spaces of padding to use
18451 # [2] sequence number of container
18452 # [3] valid flag: do not append if this flag is false
18454 my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
18456 if ($rvertical_tightness_flags) {
18458 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
18460 ) = @{$rvertical_tightness_flags};
18463 $seqno_string = $seqno_end;
18465 # handle any cached line ..
18466 # either append this line to it or write it out
18467 if ( length($cached_line_text) ) {
18469 if ( !$cached_line_valid ) {
18470 entab_and_output( $cached_line_text,
18471 $cached_line_leading_space_count,
18472 $last_group_level_written );
18475 # handle cached line with opening container token
18476 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
18478 my $gap = $leading_space_count - length($cached_line_text);
18480 # handle option of just one tight opening per line:
18481 if ( $cached_line_flag == 1 ) {
18482 if ( defined($open_or_close) && $open_or_close == 1 ) {
18488 $leading_string = $cached_line_text . ' ' x $gap;
18489 $leading_space_count = $cached_line_leading_space_count;
18490 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
18493 entab_and_output( $cached_line_text,
18494 $cached_line_leading_space_count,
18495 $last_group_level_written );
18499 # handle cached line to place before this closing container token
18501 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
18503 if ( length($test_line) <= $rOpts_maximum_line_length ) {
18505 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
18507 # Patch to outdent closing tokens ending # in ');'
18508 # If we are joining a line like ');' to a previous stacked
18509 # set of closing tokens, then decide if we may outdent the
18510 # combined stack to the indentation of the ');'. Since we
18511 # should not normally outdent any of the other tokens more than
18512 # the indentation of the lines that contained them, we will
18513 # only do this if all of the corresponding opening
18514 # tokens were on the same line. This can happen with
18515 # -sot and -sct. For example, it is ok here:
18516 # __PACKAGE__->load_components( qw(
18521 # But, for example, we do not outdent in this example because
18522 # that would put the closing sub brace out farther than the
18523 # opening sub brace:
18525 # perltidy -sot -sct
18527 # '<Control-f>' => sub {
18529 # my $e = $c->XEvent;
18530 # itemsUnderArea $c;
18533 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
18535 # The way to tell this is if the stacked sequence numbers
18536 # of this output line are the reverse of the stacked
18537 # sequence numbers of the previous non-blank line of
18538 # sequence numbers. So we can join if the previous
18539 # nonblank string of tokens is the mirror image. For
18540 # example if stack )}] is 13:8:6 then we are looking for a
18541 # leading stack like [{( which is 6:8:13 We only need to
18542 # check the two ends, because the intermediate tokens must
18543 # fall in order. Note on speed: having to split on colons
18544 # and eliminate multiple colons might appear to be slow,
18545 # but it's not an issue because we almost never come
18546 # through here. In a typical file we don't.
18547 $seqno_string =~ s/^:+//;
18548 $last_nonblank_seqno_string =~ s/^:+//;
18549 $seqno_string =~ s/:+/:/g;
18550 $last_nonblank_seqno_string =~ s/:+/:/g;
18552 # how many spaces can we outdent?
18554 $cached_line_leading_space_count - $leading_space_count;
18556 && length($seqno_string)
18557 && length($last_nonblank_seqno_string) ==
18558 length($seqno_string) )
18561 ( split ':', $last_nonblank_seqno_string );
18562 my @seqno_now = ( split ':', $seqno_string );
18563 if ( $seqno_now[-1] == $seqno_last[0]
18564 && $seqno_now[0] == $seqno_last[-1] )
18568 # for absolute safety, be sure we only remove
18570 my $ws = substr( $test_line, 0, $diff );
18571 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
18573 $test_line = substr( $test_line, $diff );
18574 $cached_line_leading_space_count -= $diff;
18577 # shouldn't happen, but not critical:
18579 ## ERROR transferring indentation here
18586 $leading_string = "";
18587 $leading_space_count = $cached_line_leading_space_count;
18590 entab_and_output( $cached_line_text,
18591 $cached_line_leading_space_count,
18592 $last_group_level_written );
18596 $cached_line_type = 0;
18597 $cached_line_text = "";
18599 # make the line to be written
18600 my $line = $leading_string . $str;
18602 # write or cache this line
18603 if ( !$open_or_close || $side_comment_length > 0 ) {
18604 entab_and_output( $line, $leading_space_count, $group_level );
18607 $cached_line_text = $line;
18608 $cached_line_type = $open_or_close;
18609 $cached_line_flag = $tightness_flag;
18610 $cached_seqno = $seqno;
18611 $cached_line_valid = $valid;
18612 $cached_line_leading_space_count = $leading_space_count;
18613 $cached_seqno_string = $seqno_string;
18616 $last_group_level_written = $group_level;
18617 $last_side_comment_length = $side_comment_length;
18618 $extra_indent_ok = 0;
18621 sub entab_and_output {
18622 my ( $line, $leading_space_count, $level ) = @_;
18624 # The line is currently correct if there is no tabbing (recommended!)
18625 # We may have to lop off some leading spaces and replace with tabs.
18626 if ( $leading_space_count > 0 ) {
18628 # Nothing to do if no tabs
18629 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
18630 || $rOpts_indent_columns <= 0 )
18636 # Handle entab option
18637 elsif ($rOpts_entab_leading_whitespace) {
18639 $leading_space_count % $rOpts_entab_leading_whitespace;
18641 int( $leading_space_count / $rOpts_entab_leading_whitespace );
18642 my $leading_string = "\t" x $tab_count . ' ' x $space_count;
18643 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
18644 substr( $line, 0, $leading_space_count ) = $leading_string;
18648 # REMOVE AFTER TESTING
18649 # shouldn't happen - program error counting whitespace
18650 # we'll skip entabbing
18652 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
18657 # Handle option of one tab per level
18659 my $leading_string = ( "\t" x $level );
18661 $leading_space_count - $level * $rOpts_indent_columns;
18663 # shouldn't happen:
18664 if ( $space_count < 0 ) {
18666 "Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
18668 $leading_string = ( ' ' x $leading_space_count );
18671 $leading_string .= ( ' ' x $space_count );
18673 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
18674 substr( $line, 0, $leading_space_count ) = $leading_string;
18678 # REMOVE AFTER TESTING
18679 # shouldn't happen - program error counting whitespace
18680 # we'll skip entabbing
18682 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
18687 $file_writer_object->write_code_line( $line . "\n" );
18688 if ($seqno_string) {
18689 $last_nonblank_seqno_string = $seqno_string;
18693 { # begin get_leading_string
18695 my @leading_string_cache;
18697 sub get_leading_string {
18699 # define the leading whitespace string for this line..
18700 my $leading_whitespace_count = shift;
18702 # Handle case of zero whitespace, which includes multi-line quotes
18703 # (which may have a finite level; this prevents tab problems)
18704 if ( $leading_whitespace_count <= 0 ) {
18708 # look for previous result
18709 elsif ( $leading_string_cache[$leading_whitespace_count] ) {
18710 return $leading_string_cache[$leading_whitespace_count];
18713 # must compute a string for this number of spaces
18714 my $leading_string;
18716 # Handle simple case of no tabs
18717 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
18718 || $rOpts_indent_columns <= 0 )
18720 $leading_string = ( ' ' x $leading_whitespace_count );
18723 # Handle entab option
18724 elsif ($rOpts_entab_leading_whitespace) {
18726 $leading_whitespace_count % $rOpts_entab_leading_whitespace;
18729 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
18730 $leading_string = "\t" x $tab_count . ' ' x $space_count;
18733 # Handle option of one tab per level
18735 $leading_string = ( "\t" x $group_level );
18737 $leading_whitespace_count - $group_level * $rOpts_indent_columns;
18739 # shouldn't happen:
18740 if ( $space_count < 0 ) {
18742 "Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
18744 $leading_string = ( ' ' x $leading_whitespace_count );
18747 $leading_string .= ( ' ' x $space_count );
18750 $leading_string_cache[$leading_whitespace_count] = $leading_string;
18751 return $leading_string;
18753 } # end get_leading_string
18755 sub report_anything_unusual {
18757 if ( $outdented_line_count > 0 ) {
18758 write_logfile_entry(
18759 "$outdented_line_count long lines were outdented:\n");
18760 write_logfile_entry(
18761 " First at output line $first_outdented_line_at\n");
18763 if ( $outdented_line_count > 1 ) {
18764 write_logfile_entry(
18765 " Last at output line $last_outdented_line_at\n");
18767 write_logfile_entry(
18768 " use -noll to prevent outdenting, -l=n to increase line length\n"
18770 write_logfile_entry("\n");
18774 #####################################################################
18776 # the Perl::Tidy::FileWriter class writes the output file
18778 #####################################################################
18780 package Perl::Tidy::FileWriter;
18782 # Maximum number of little messages; probably need not be changed.
18783 use constant MAX_NAG_MESSAGES => 6;
18785 sub write_logfile_entry {
18787 my $logger_object = $self->{_logger_object};
18788 if ($logger_object) {
18789 $logger_object->write_logfile_entry(@_);
18795 my ( $line_sink_object, $rOpts, $logger_object ) = @_;
18798 _line_sink_object => $line_sink_object,
18799 _logger_object => $logger_object,
18801 _output_line_number => 1,
18802 _consecutive_blank_lines => 0,
18803 _consecutive_nonblank_lines => 0,
18804 _first_line_length_error => 0,
18805 _max_line_length_error => 0,
18806 _last_line_length_error => 0,
18807 _first_line_length_error_at => 0,
18808 _max_line_length_error_at => 0,
18809 _last_line_length_error_at => 0,
18810 _line_length_error_count => 0,
18811 _max_output_line_length => 0,
18812 _max_output_line_length_at => 0,
18818 $self->{_line_sink_object}->tee_on();
18823 $self->{_line_sink_object}->tee_off();
18826 sub get_output_line_number {
18828 return $self->{_output_line_number};
18831 sub decrement_output_line_number {
18833 $self->{_output_line_number}--;
18836 sub get_consecutive_nonblank_lines {
18838 return $self->{_consecutive_nonblank_lines};
18841 sub reset_consecutive_blank_lines {
18843 $self->{_consecutive_blank_lines} = 0;
18846 sub want_blank_line {
18848 unless ( $self->{_consecutive_blank_lines} ) {
18849 $self->write_blank_code_line();
18853 sub write_blank_code_line {
18855 my $rOpts = $self->{_rOpts};
18857 if ( $self->{_consecutive_blank_lines} >=
18858 $rOpts->{'maximum-consecutive-blank-lines'} );
18859 $self->{_consecutive_blank_lines}++;
18860 $self->{_consecutive_nonblank_lines} = 0;
18861 $self->write_line("\n");
18864 sub write_code_line {
18868 if ( $a =~ /^\s*$/ ) {
18869 my $rOpts = $self->{_rOpts};
18871 if ( $self->{_consecutive_blank_lines} >=
18872 $rOpts->{'maximum-consecutive-blank-lines'} );
18873 $self->{_consecutive_blank_lines}++;
18874 $self->{_consecutive_nonblank_lines} = 0;
18877 $self->{_consecutive_blank_lines} = 0;
18878 $self->{_consecutive_nonblank_lines}++;
18880 $self->write_line($a);
18887 # TODO: go through and see if the test is necessary here
18888 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
18890 $self->{_line_sink_object}->write_line($a);
18892 # This calculation of excess line length ignores any internal tabs
18893 my $rOpts = $self->{_rOpts};
18894 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
18895 if ( $a =~ /^\t+/g ) {
18896 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
18899 # Note that we just incremented output line number to future value
18900 # so we must subtract 1 for current line number
18901 if ( length($a) > 1 + $self->{_max_output_line_length} ) {
18902 $self->{_max_output_line_length} = length($a) - 1;
18903 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
18906 if ( $exceed > 0 ) {
18907 my $output_line_number = $self->{_output_line_number};
18908 $self->{_last_line_length_error} = $exceed;
18909 $self->{_last_line_length_error_at} = $output_line_number - 1;
18910 if ( $self->{_line_length_error_count} == 0 ) {
18911 $self->{_first_line_length_error} = $exceed;
18912 $self->{_first_line_length_error_at} = $output_line_number - 1;
18916 $self->{_last_line_length_error} > $self->{_max_line_length_error} )
18918 $self->{_max_line_length_error} = $exceed;
18919 $self->{_max_line_length_error_at} = $output_line_number - 1;
18922 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
18923 $self->write_logfile_entry(
18924 "Line length exceeded by $exceed characters\n");
18926 $self->{_line_length_error_count}++;
18931 sub report_line_length_errors {
18933 my $rOpts = $self->{_rOpts};
18934 my $line_length_error_count = $self->{_line_length_error_count};
18935 if ( $line_length_error_count == 0 ) {
18936 $self->write_logfile_entry(
18937 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
18938 my $max_output_line_length = $self->{_max_output_line_length};
18939 my $max_output_line_length_at = $self->{_max_output_line_length_at};
18940 $self->write_logfile_entry(
18941 " Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
18947 my $word = ( $line_length_error_count > 1 ) ? "s" : "";
18948 $self->write_logfile_entry(
18949 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
18952 $word = ( $line_length_error_count > 1 ) ? "First" : "";
18953 my $first_line_length_error = $self->{_first_line_length_error};
18954 my $first_line_length_error_at = $self->{_first_line_length_error_at};
18955 $self->write_logfile_entry(
18956 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
18959 if ( $line_length_error_count > 1 ) {
18960 my $max_line_length_error = $self->{_max_line_length_error};
18961 my $max_line_length_error_at = $self->{_max_line_length_error_at};
18962 my $last_line_length_error = $self->{_last_line_length_error};
18963 my $last_line_length_error_at = $self->{_last_line_length_error_at};
18964 $self->write_logfile_entry(
18965 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
18967 $self->write_logfile_entry(
18968 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
18974 #####################################################################
18976 # The Perl::Tidy::Debugger class shows line tokenization
18978 #####################################################################
18980 package Perl::Tidy::Debugger;
18984 my ( $class, $filename ) = @_;
18987 _debug_file => $filename,
18988 _debug_file_opened => 0,
18993 sub really_open_debug_file {
18996 my $debug_file = $self->{_debug_file};
18998 unless ( $fh = IO::File->new("> $debug_file") ) {
18999 warn("can't open $debug_file: $!\n");
19001 $self->{_debug_file_opened} = 1;
19002 $self->{_fh} = $fh;
19004 "Use -dump-token-types (-dtt) to get a list of token type codes\n";
19007 sub close_debug_file {
19010 my $fh = $self->{_fh};
19011 if ( $self->{_debug_file_opened} ) {
19013 eval { $self->{_fh}->close() };
19017 sub write_debug_entry {
19019 # This is a debug dump routine which may be modified as necessary
19020 # to dump tokens on a line-by-line basis. The output will be written
19021 # to the .DEBUG file when the -D flag is entered.
19023 my $line_of_tokens = shift;
19025 my $input_line = $line_of_tokens->{_line_text};
19026 my $rtoken_type = $line_of_tokens->{_rtoken_type};
19027 my $rtokens = $line_of_tokens->{_rtokens};
19028 my $rlevels = $line_of_tokens->{_rlevels};
19029 my $rslevels = $line_of_tokens->{_rslevels};
19030 my $rblock_type = $line_of_tokens->{_rblock_type};
19031 my $input_line_number = $line_of_tokens->{_line_number};
19032 my $line_type = $line_of_tokens->{_line_type};
19036 my $token_str = "$input_line_number: ";
19037 my $reconstructed_original = "$input_line_number: ";
19038 my $block_str = "$input_line_number: ";
19040 #$token_str .= "$line_type: ";
19041 #$reconstructed_original .= "$line_type: ";
19044 my @next_char = ( '"', '"' );
19046 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
19047 my $fh = $self->{_fh};
19049 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
19052 if ( $$rtoken_type[$j] eq 'k' ) {
19053 $pattern .= $$rtokens[$j];
19056 $pattern .= $$rtoken_type[$j];
19058 $reconstructed_original .= $$rtokens[$j];
19059 $block_str .= "($$rblock_type[$j])";
19060 $num = length( $$rtokens[$j] );
19061 my $type_str = $$rtoken_type[$j];
19063 # be sure there are no blank tokens (shouldn't happen)
19064 # This can only happen if a programming error has been made
19065 # because all valid tokens are non-blank
19066 if ( $type_str eq ' ' ) {
19067 print $fh "BLANK TOKEN on the next line\n";
19068 $type_str = $next_char[$i_next];
19069 $i_next = 1 - $i_next;
19072 if ( length($type_str) == 1 ) {
19073 $type_str = $type_str x $num;
19075 $token_str .= $type_str;
19078 # Write what you want here ...
19079 # print $fh "$input_line\n";
19080 # print $fh "$pattern\n";
19081 print $fh "$reconstructed_original\n";
19082 print $fh "$token_str\n";
19084 #print $fh "$block_str\n";
19087 #####################################################################
19089 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
19090 # method for returning the next line to be parsed, as well as a
19091 # 'peek_ahead()' method
19093 # The input parameter is an object with a 'get_line()' method
19094 # which returns the next line to be parsed
19096 #####################################################################
19098 package Perl::Tidy::LineBuffer;
19103 my $line_source_object = shift;
19106 _line_source_object => $line_source_object,
19107 _rlookahead_buffer => [],
19113 my $buffer_index = shift;
19115 my $line_source_object = $self->{_line_source_object};
19116 my $rlookahead_buffer = $self->{_rlookahead_buffer};
19117 if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
19118 $line = $$rlookahead_buffer[$buffer_index];
19121 $line = $line_source_object->get_line();
19122 push( @$rlookahead_buffer, $line );
19130 my $line_source_object = $self->{_line_source_object};
19131 my $rlookahead_buffer = $self->{_rlookahead_buffer};
19133 if ( scalar(@$rlookahead_buffer) ) {
19134 $line = shift @$rlookahead_buffer;
19137 $line = $line_source_object->get_line();
19142 ########################################################################
19144 # the Perl::Tidy::Tokenizer package is essentially a filter which
19145 # reads lines of perl source code from a source object and provides
19146 # corresponding tokenized lines through its get_line() method. Lines
19147 # flow from the source_object to the caller like this:
19149 # source_object --> LineBuffer_object --> Tokenizer --> calling routine
19150 # get_line() get_line() get_line() line_of_tokens
19152 # The source object can be any object with a get_line() method which
19153 # supplies one line (a character string) perl call.
19154 # The LineBuffer object is created by the Tokenizer.
19155 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
19156 # containing one tokenized line for each call to its get_line() method.
19158 # WARNING: This is not a real class yet. Only one tokenizer my be used.
19160 ########################################################################
19162 package Perl::Tidy::Tokenizer;
19166 # Caution: these debug flags produce a lot of output
19167 # They should all be 0 except when debugging small scripts
19169 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
19170 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
19171 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
19172 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
19173 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
19175 my $debug_warning = sub {
19176 print "TOKENIZER_DEBUGGING with key $_[0]\n";
19179 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
19180 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
19181 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
19182 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
19183 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
19189 # PACKAGE VARIABLES for for processing an entire FILE.
19193 $last_nonblank_token
19194 $last_nonblank_type
19195 $last_nonblank_block_type
19203 %user_function_prototype
19205 %is_block_list_function
19206 %saw_function_definition
19210 $square_bracket_depth
19213 @nesting_sequence_number
19214 @current_sequence_number
19216 @paren_semicolon_count
19217 @paren_structural_type
19219 @brace_structural_type
19220 @brace_statement_type
19223 @square_bracket_type
19224 @square_bracket_structural_type
19226 @starting_line_of_current_depth
19229 # GLOBAL CONSTANTS for routines in this package
19231 %is_indirect_object_taker
19233 %expecting_operator_token
19234 %expecting_operator_types
19235 %expecting_term_types
19236 %expecting_term_token
19238 %is_file_test_operator
19240 %is_valid_token_type
19242 %is_code_block_token
19244 @opening_brace_names
19245 @closing_brace_names
19246 %is_keyword_taking_list
19247 %is_q_qq_qw_qx_qr_s_y_tr_m
19250 # possible values of operator_expected()
19251 use constant TERM => -1;
19252 use constant UNKNOWN => 0;
19253 use constant OPERATOR => 1;
19255 # possible values of context
19256 use constant SCALAR_CONTEXT => -1;
19257 use constant UNKNOWN_CONTEXT => 0;
19258 use constant LIST_CONTEXT => 1;
19260 # Maximum number of little messages; probably need not be changed.
19261 use constant MAX_NAG_MESSAGES => 6;
19265 # methods to count instances
19267 sub get_count { $_count; }
19268 sub _increment_count { ++$_count }
19269 sub _decrement_count { --$_count }
19273 $_[0]->_decrement_count();
19280 # Note: 'tabs' and 'indent_columns' are temporary and should be
19283 source_object => undef,
19284 debugger_object => undef,
19285 diagnostics_object => undef,
19286 logger_object => undef,
19287 starting_level => undef,
19288 indent_columns => 4,
19290 look_for_hash_bang => 0,
19292 look_for_autoloader => 1,
19293 look_for_selfloader => 1,
19294 starting_line_number => 1,
19296 my %args = ( %defaults, @_ );
19298 # we are given an object with a get_line() method to supply source lines
19299 my $source_object = $args{source_object};
19301 # we create another object with a get_line() and peek_ahead() method
19302 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
19304 # Tokenizer state data is as follows:
19305 # _rhere_target_list reference to list of here-doc targets
19306 # _here_doc_target the target string for a here document
19307 # _here_quote_character the type of here-doc quoting (" ' ` or none)
19308 # to determine if interpolation is done
19309 # _quote_target character we seek if chasing a quote
19310 # _line_start_quote line where we started looking for a long quote
19311 # _in_here_doc flag indicating if we are in a here-doc
19312 # _in_pod flag set if we are in pod documentation
19313 # _in_error flag set if we saw severe error (binary in script)
19314 # _in_data flag set if we are in __DATA__ section
19315 # _in_end flag set if we are in __END__ section
19316 # _in_format flag set if we are in a format description
19317 # _in_attribute_list flag telling if we are looking for attributes
19318 # _in_quote flag telling if we are chasing a quote
19319 # _starting_level indentation level of first line
19320 # _input_tabstr string denoting one indentation level of input file
19321 # _know_input_tabstr flag indicating if we know _input_tabstr
19322 # _line_buffer_object object with get_line() method to supply source code
19323 # _diagnostics_object place to write debugging information
19324 # _unexpected_error_count error count used to limit output
19325 # _lower_case_labels_at line numbers where lower case labels seen
19326 $tokenizer_self = {
19327 _rhere_target_list => [],
19329 _here_doc_target => "",
19330 _here_quote_character => "",
19336 _in_attribute_list => 0,
19338 _quote_target => "",
19339 _line_start_quote => -1,
19340 _starting_level => $args{starting_level},
19341 _know_starting_level => defined( $args{starting_level} ),
19342 _tabs => $args{tabs},
19343 _indent_columns => $args{indent_columns},
19344 _look_for_hash_bang => $args{look_for_hash_bang},
19345 _trim_qw => $args{trim_qw},
19346 _input_tabstr => "",
19347 _know_input_tabstr => -1,
19348 _last_line_number => $args{starting_line_number} - 1,
19349 _saw_perl_dash_P => 0,
19350 _saw_perl_dash_w => 0,
19351 _saw_use_strict => 0,
19352 _saw_v_string => 0,
19353 _look_for_autoloader => $args{look_for_autoloader},
19354 _look_for_selfloader => $args{look_for_selfloader},
19355 _saw_autoloader => 0,
19356 _saw_selfloader => 0,
19357 _saw_hash_bang => 0,
19360 _saw_negative_indentation => 0,
19361 _started_tokenizing => 0,
19362 _line_buffer_object => $line_buffer_object,
19363 _debugger_object => $args{debugger_object},
19364 _diagnostics_object => $args{diagnostics_object},
19365 _logger_object => $args{logger_object},
19366 _unexpected_error_count => 0,
19367 _started_looking_for_here_target_at => 0,
19368 _nearly_matched_here_target_at => undef,
19370 _rlower_case_labels_at => undef,
19373 prepare_for_a_new_file();
19374 find_starting_indentation_level();
19376 bless $tokenizer_self, $class;
19378 # This is not a full class yet, so die if an attempt is made to
19379 # create more than one object.
19381 if ( _increment_count() > 1 ) {
19383 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
19386 return $tokenizer_self;
19390 # interface to Perl::Tidy::Logger routines
19392 my $logger_object = $tokenizer_self->{_logger_object};
19393 if ($logger_object) {
19394 $logger_object->warning(@_);
19399 my $logger_object = $tokenizer_self->{_logger_object};
19400 if ($logger_object) {
19401 $logger_object->complain(@_);
19405 sub write_logfile_entry {
19406 my $logger_object = $tokenizer_self->{_logger_object};
19407 if ($logger_object) {
19408 $logger_object->write_logfile_entry(@_);
19412 sub interrupt_logfile {
19413 my $logger_object = $tokenizer_self->{_logger_object};
19414 if ($logger_object) {
19415 $logger_object->interrupt_logfile();
19419 sub resume_logfile {
19420 my $logger_object = $tokenizer_self->{_logger_object};
19421 if ($logger_object) {
19422 $logger_object->resume_logfile();
19426 sub increment_brace_error {
19427 my $logger_object = $tokenizer_self->{_logger_object};
19428 if ($logger_object) {
19429 $logger_object->increment_brace_error();
19433 sub report_definite_bug {
19434 my $logger_object = $tokenizer_self->{_logger_object};
19435 if ($logger_object) {
19436 $logger_object->report_definite_bug();
19440 sub brace_warning {
19441 my $logger_object = $tokenizer_self->{_logger_object};
19442 if ($logger_object) {
19443 $logger_object->brace_warning(@_);
19447 sub get_saw_brace_error {
19448 my $logger_object = $tokenizer_self->{_logger_object};
19449 if ($logger_object) {
19450 $logger_object->get_saw_brace_error();
19457 # interface to Perl::Tidy::Diagnostics routines
19458 sub write_diagnostics {
19459 if ( $tokenizer_self->{_diagnostics_object} ) {
19460 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
19464 sub report_tokenization_errors {
19468 my $level = get_indentation_level();
19469 if ( $level != $tokenizer_self->{_starting_level} ) {
19470 warning("final indentation level: $level\n");
19473 check_final_nesting_depths();
19475 if ( $tokenizer_self->{_look_for_hash_bang}
19476 && !$tokenizer_self->{_saw_hash_bang} )
19479 "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
19482 if ( $tokenizer_self->{_in_format} ) {
19483 warning("hit EOF while in format description\n");
19486 if ( $tokenizer_self->{_in_pod} ) {
19488 # Just write log entry if this is after __END__ or __DATA__
19489 # because this happens to often, and it is not likely to be
19491 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
19492 write_logfile_entry(
19493 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
19499 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
19505 if ( $tokenizer_self->{_in_here_doc} ) {
19506 my $here_doc_target = $tokenizer_self->{_here_doc_target};
19507 my $started_looking_for_here_target_at =
19508 $tokenizer_self->{_started_looking_for_here_target_at};
19509 if ($here_doc_target) {
19511 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
19516 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
19519 my $nearly_matched_here_target_at =
19520 $tokenizer_self->{_nearly_matched_here_target_at};
19521 if ($nearly_matched_here_target_at) {
19523 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
19528 if ( $tokenizer_self->{_in_quote} ) {
19529 my $line_start_quote = $tokenizer_self->{_line_start_quote};
19530 my $quote_target = $tokenizer_self->{_quote_target};
19532 ( $tokenizer_self->{_in_attribute_list} )
19536 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
19540 unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
19541 if ( $] < 5.006 ) {
19542 write_logfile_entry("Suggest including '-w parameter'\n");
19545 write_logfile_entry("Suggest including 'use warnings;'\n");
19549 if ( $tokenizer_self->{_saw_perl_dash_P} ) {
19550 write_logfile_entry("Use of -P parameter for defines is discouraged\n");
19553 unless ( $tokenizer_self->{_saw_use_strict} ) {
19554 write_logfile_entry("Suggest including 'use strict;'\n");
19557 # it is suggested that lables have at least one upper case character
19558 # for legibility and to avoid code breakage as new keywords are introduced
19559 if ( $tokenizer_self->{_rlower_case_labels_at} ) {
19560 my @lower_case_labels_at =
19561 @{ $tokenizer_self->{_rlower_case_labels_at} };
19562 write_logfile_entry(
19563 "Suggest using upper case characters in label(s)\n");
19565 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
19569 sub report_v_string {
19571 # warn if this version can't handle v-strings
19573 unless ( $tokenizer_self->{_saw_v_string} ) {
19574 $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
19576 if ( $] < 5.006 ) {
19578 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
19583 sub get_input_line_number {
19584 return $tokenizer_self->{_last_line_number};
19587 # returns the next tokenized line
19592 # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
19593 # $square_bracket_depth, $paren_depth
19595 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
19596 $tokenizer_self->{_line_text} = $input_line;
19598 return undef unless ($input_line);
19600 my $input_line_number = ++$tokenizer_self->{_last_line_number};
19602 # Find and remove what characters terminate this line, including any
19604 my $input_line_separator = "";
19605 if ( chomp($input_line) ) { $input_line_separator = $/ }
19607 # TODO: what other characters should be included here?
19608 if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
19609 $input_line_separator = $2 . $input_line_separator;
19612 # for backwards compatability we keep the line text terminated with
19613 # a newline character
19614 $input_line .= "\n";
19615 $tokenizer_self->{_line_text} = $input_line; # update
19617 # create a data structure describing this line which will be
19618 # returned to the caller.
19620 # _line_type codes are:
19621 # SYSTEM - system-specific code before hash-bang line
19622 # CODE - line of perl code (including comments)
19623 # POD_START - line starting pod, such as '=head'
19624 # POD - pod documentation text
19625 # POD_END - last line of pod section, '=cut'
19626 # HERE - text of here-document
19627 # HERE_END - last line of here-doc (target word)
19628 # FORMAT - format section
19629 # FORMAT_END - last line of format section, '.'
19630 # DATA_START - __DATA__ line
19631 # DATA - unidentified text following __DATA__
19632 # END_START - __END__ line
19633 # END - unidentified text following __END__
19634 # ERROR - we are in big trouble, probably not a perl script
19637 # _curly_brace_depth - depth of curly braces at start of line
19638 # _square_bracket_depth - depth of square brackets at start of line
19639 # _paren_depth - depth of parens at start of line
19640 # _starting_in_quote - this line continues a multi-line quote
19641 # (so don't trim leading blanks!)
19642 # _ending_in_quote - this line ends in a multi-line quote
19643 # (so don't trim trailing blanks!)
19644 my $line_of_tokens = {
19645 _line_type => 'EOF',
19646 _line_text => $input_line,
19647 _line_number => $input_line_number,
19648 _rtoken_type => undef,
19651 _rslevels => undef,
19652 _rblock_type => undef,
19653 _rcontainer_type => undef,
19654 _rcontainer_environment => undef,
19655 _rtype_sequence => undef,
19656 _rnesting_tokens => undef,
19657 _rci_levels => undef,
19658 _rnesting_blocks => undef,
19659 _python_indentation_level => -1, ## 0,
19660 _starting_in_quote => 0, # to be set by subroutine
19661 _ending_in_quote => 0,
19662 _curly_brace_depth => $brace_depth,
19663 _square_bracket_depth => $square_bracket_depth,
19664 _paren_depth => $paren_depth,
19665 _quote_character => '',
19668 # must print line unchanged if we are in a here document
19669 if ( $tokenizer_self->{_in_here_doc} ) {
19671 $line_of_tokens->{_line_type} = 'HERE';
19672 my $here_doc_target = $tokenizer_self->{_here_doc_target};
19673 my $here_quote_character = $tokenizer_self->{_here_quote_character};
19674 my $candidate_target = $input_line;
19675 chomp $candidate_target;
19676 if ( $candidate_target eq $here_doc_target ) {
19677 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
19678 $line_of_tokens->{_line_type} = 'HERE_END';
19679 write_logfile_entry("Exiting HERE document $here_doc_target\n");
19681 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
19682 if (@$rhere_target_list) { # there can be multiple here targets
19683 ( $here_doc_target, $here_quote_character ) =
19684 @{ shift @$rhere_target_list };
19685 $tokenizer_self->{_here_doc_target} = $here_doc_target;
19686 $tokenizer_self->{_here_quote_character} =
19687 $here_quote_character;
19688 write_logfile_entry(
19689 "Entering HERE document $here_doc_target\n");
19690 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
19691 $tokenizer_self->{_started_looking_for_here_target_at} =
19692 $input_line_number;
19695 $tokenizer_self->{_in_here_doc} = 0;
19696 $tokenizer_self->{_here_doc_target} = "";
19697 $tokenizer_self->{_here_quote_character} = "";
19701 # check for error of extra whitespace
19702 # note for PERL6: leading whitespace is allowed
19704 $candidate_target =~ s/\s*$//;
19705 $candidate_target =~ s/^\s*//;
19706 if ( $candidate_target eq $here_doc_target ) {
19707 $tokenizer_self->{_nearly_matched_here_target_at} =
19708 $input_line_number;
19711 return $line_of_tokens;
19714 # must print line unchanged if we are in a format section
19715 elsif ( $tokenizer_self->{_in_format} ) {
19717 if ( $input_line =~ /^\.[\s#]*$/ ) {
19718 write_logfile_entry("Exiting format section\n");
19719 $tokenizer_self->{_in_format} = 0;
19720 $line_of_tokens->{_line_type} = 'FORMAT_END';
19723 $line_of_tokens->{_line_type} = 'FORMAT';
19725 return $line_of_tokens;
19728 # must print line unchanged if we are in pod documentation
19729 elsif ( $tokenizer_self->{_in_pod} ) {
19731 $line_of_tokens->{_line_type} = 'POD';
19732 if ( $input_line =~ /^=cut/ ) {
19733 $line_of_tokens->{_line_type} = 'POD_END';
19734 write_logfile_entry("Exiting POD section\n");
19735 $tokenizer_self->{_in_pod} = 0;
19737 if ( $input_line =~ /^\#\!.*perl\b/ ) {
19739 "Hash-bang in pod can cause older versions of perl to fail! \n"
19743 return $line_of_tokens;
19746 # must print line unchanged if we have seen a severe error (i.e., we
19747 # are seeing illegal tokens and connot continue. Syntax errors do
19748 # not pass this route). Calling routine can decide what to do, but
19749 # the default can be to just pass all lines as if they were after __END__
19750 elsif ( $tokenizer_self->{_in_error} ) {
19751 $line_of_tokens->{_line_type} = 'ERROR';
19752 return $line_of_tokens;
19755 # print line unchanged if we are __DATA__ section
19756 elsif ( $tokenizer_self->{_in_data} ) {
19758 # ...but look for POD
19759 # Note that the _in_data and _in_end flags remain set
19760 # so that we return to that state after seeing the
19761 # end of a pod section
19762 if ( $input_line =~ /^=(?!cut)/ ) {
19763 $line_of_tokens->{_line_type} = 'POD_START';
19764 write_logfile_entry("Entering POD section\n");
19765 $tokenizer_self->{_in_pod} = 1;
19766 return $line_of_tokens;
19769 $line_of_tokens->{_line_type} = 'DATA';
19770 return $line_of_tokens;
19774 # print line unchanged if we are in __END__ section
19775 elsif ( $tokenizer_self->{_in_end} ) {
19777 # ...but look for POD
19778 # Note that the _in_data and _in_end flags remain set
19779 # so that we return to that state after seeing the
19780 # end of a pod section
19781 if ( $input_line =~ /^=(?!cut)/ ) {
19782 $line_of_tokens->{_line_type} = 'POD_START';
19783 write_logfile_entry("Entering POD section\n");
19784 $tokenizer_self->{_in_pod} = 1;
19785 return $line_of_tokens;
19788 $line_of_tokens->{_line_type} = 'END';
19789 return $line_of_tokens;
19793 # check for a hash-bang line if we haven't seen one
19794 if ( !$tokenizer_self->{_saw_hash_bang} ) {
19795 if ( $input_line =~ /^\#\!.*perl\b/ ) {
19796 $tokenizer_self->{_saw_hash_bang} = $input_line_number;
19798 # check for -w and -P flags
19799 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
19800 $tokenizer_self->{_saw_perl_dash_P} = 1;
19803 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
19804 $tokenizer_self->{_saw_perl_dash_w} = 1;
19807 if ( ( $input_line_number > 1 )
19808 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
19811 # this is helpful for VMS systems; we may have accidentally
19812 # tokenized some DCL commands
19813 if ( $tokenizer_self->{_started_tokenizing} ) {
19815 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
19819 complain("Useless hash-bang after line 1\n");
19823 # Report the leading hash-bang as a system line
19824 # This will prevent -dac from deleting it
19826 $line_of_tokens->{_line_type} = 'SYSTEM';
19827 return $line_of_tokens;
19832 # wait for a hash-bang before parsing if the user invoked us with -x
19833 if ( $tokenizer_self->{_look_for_hash_bang}
19834 && !$tokenizer_self->{_saw_hash_bang} )
19836 $line_of_tokens->{_line_type} = 'SYSTEM';
19837 return $line_of_tokens;
19840 # a first line of the form ': #' will be marked as SYSTEM
19841 # since lines of this form may be used by tcsh
19842 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
19843 $line_of_tokens->{_line_type} = 'SYSTEM';
19844 return $line_of_tokens;
19847 # now we know that it is ok to tokenize the line...
19848 # the line tokenizer will modify any of these private variables:
19849 # _rhere_target_list
19856 my $ending_in_quote_last = $tokenizer_self->{_in_quote};
19857 tokenize_this_line($line_of_tokens);
19859 # Now finish defining the return structure and return it
19860 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
19862 # handle severe error (binary data in script)
19863 if ( $tokenizer_self->{_in_error} ) {
19864 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages
19865 warning("Giving up after error\n");
19866 $line_of_tokens->{_line_type} = 'ERROR';
19867 reset_indentation_level(0); # avoid error messages
19868 return $line_of_tokens;
19871 # handle start of pod documentation
19872 if ( $tokenizer_self->{_in_pod} ) {
19874 # This gets tricky..above a __DATA__ or __END__ section, perl
19875 # accepts '=cut' as the start of pod section. But afterwards,
19876 # only pod utilities see it and they may ignore an =cut without
19877 # leading =head. In any case, this isn't good.
19878 if ( $input_line =~ /^=cut\b/ ) {
19879 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
19880 complain("=cut while not in pod ignored\n");
19881 $tokenizer_self->{_in_pod} = 0;
19882 $line_of_tokens->{_line_type} = 'POD_STOP';
19885 $line_of_tokens->{_line_type} = 'POD_END';
19887 "=cut starts a pod section .. this can fool pod utilities.\n"
19889 write_logfile_entry("Entering POD section\n");
19894 $line_of_tokens->{_line_type} = 'POD_START';
19895 write_logfile_entry("Entering POD section\n");
19898 return $line_of_tokens;
19901 # update indentation levels for log messages
19902 if ( $input_line !~ /^\s*$/ ) {
19903 my $rlevels = $line_of_tokens->{_rlevels};
19904 my $structural_indentation_level = $$rlevels[0];
19905 my ( $python_indentation_level, $msg ) =
19906 find_indentation_level( $input_line, $structural_indentation_level );
19907 if ($msg) { write_logfile_entry("$msg") }
19908 if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
19909 $line_of_tokens->{_python_indentation_level} =
19910 $python_indentation_level;
19914 # see if this line contains here doc targets
19915 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
19916 if (@$rhere_target_list) {
19918 my ( $here_doc_target, $here_quote_character ) =
19919 @{ shift @$rhere_target_list };
19920 $tokenizer_self->{_in_here_doc} = 1;
19921 $tokenizer_self->{_here_doc_target} = $here_doc_target;
19922 $tokenizer_self->{_here_quote_character} = $here_quote_character;
19923 write_logfile_entry("Entering HERE document $here_doc_target\n");
19924 $tokenizer_self->{_started_looking_for_here_target_at} =
19925 $input_line_number;
19928 # NOTE: __END__ and __DATA__ statements are written unformatted
19929 # because they can theoretically contain additional characters
19930 # which are not tokenized (and cannot be read with <DATA> either!).
19931 if ( $tokenizer_self->{_in_data} ) {
19932 $line_of_tokens->{_line_type} = 'DATA_START';
19933 write_logfile_entry("Starting __DATA__ section\n");
19934 $tokenizer_self->{_saw_data} = 1;
19936 # keep parsing after __DATA__ if use SelfLoader was seen
19937 if ( $tokenizer_self->{_saw_selfloader} ) {
19938 $tokenizer_self->{_in_data} = 0;
19939 write_logfile_entry(
19940 "SelfLoader seen, continuing; -nlsl deactivates\n");
19943 return $line_of_tokens;
19946 elsif ( $tokenizer_self->{_in_end} ) {
19947 $line_of_tokens->{_line_type} = 'END_START';
19948 write_logfile_entry("Starting __END__ section\n");
19949 $tokenizer_self->{_saw_end} = 1;
19951 # keep parsing after __END__ if use AutoLoader was seen
19952 if ( $tokenizer_self->{_saw_autoloader} ) {
19953 $tokenizer_self->{_in_end} = 0;
19954 write_logfile_entry(
19955 "AutoLoader seen, continuing; -nlal deactivates\n");
19957 return $line_of_tokens;
19960 # now, finally, we know that this line is type 'CODE'
19961 $line_of_tokens->{_line_type} = 'CODE';
19963 # remember if we have seen any real code
19964 if ( !$tokenizer_self->{_started_tokenizing}
19965 && $input_line !~ /^\s*$/
19966 && $input_line !~ /^\s*#/ )
19968 $tokenizer_self->{_started_tokenizing} = 1;
19971 if ( $tokenizer_self->{_debugger_object} ) {
19972 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
19975 # Note: if keyword 'format' occurs in this line code, it is still CODE
19976 # (keyword 'format' need not start a line)
19977 if ( $tokenizer_self->{_in_format} ) {
19978 write_logfile_entry("Entering format section\n");
19981 if ( $tokenizer_self->{_in_quote}
19982 and ( $tokenizer_self->{_line_start_quote} < 0 ) )
19985 #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
19987 ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
19989 $tokenizer_self->{_line_start_quote} = $input_line_number;
19990 write_logfile_entry(
19991 "Start multi-line quote or pattern ending in $quote_target\n");
19994 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
19995 and !$tokenizer_self->{_in_quote} )
19997 $tokenizer_self->{_line_start_quote} = -1;
19998 write_logfile_entry("End of multi-line quote or pattern\n");
20001 # we are returning a line of CODE
20002 return $line_of_tokens;
20005 sub find_starting_indentation_level {
20007 # USES GLOBAL VARIABLES: $tokenizer_self
20008 my $starting_level = 0;
20009 my $know_input_tabstr = -1; # flag for find_indentation_level
20011 # use value if given as parameter
20012 if ( $tokenizer_self->{_know_starting_level} ) {
20013 $starting_level = $tokenizer_self->{_starting_level};
20016 # if we know there is a hash_bang line, the level must be zero
20017 elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
20018 $tokenizer_self->{_know_starting_level} = 1;
20021 # otherwise figure it out from the input file
20025 my $structural_indentation_level = -1; # flag for find_indentation_level
20029 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
20032 # if first line is #! then assume starting level is zero
20033 if ( $i == 1 && $line =~ /^\#\!/ ) {
20034 $starting_level = 0;
20037 next if ( $line =~ /^\s*#/ ); # must not be comment
20038 next if ( $line =~ /^\s*$/ ); # must not be blank
20039 ( $starting_level, $msg ) =
20040 find_indentation_level( $line, $structural_indentation_level );
20041 if ($msg) { write_logfile_entry("$msg") }
20044 $msg = "Line $i implies starting-indentation-level = $starting_level\n";
20046 if ( $starting_level > 0 ) {
20048 my $input_tabstr = $tokenizer_self->{_input_tabstr};
20049 if ( $input_tabstr eq "\t" ) {
20050 $msg .= "by guessing input tabbing uses 1 tab per level\n";
20053 my $cols = length($input_tabstr);
20055 "by guessing input tabbing uses $cols blanks per level\n";
20058 write_logfile_entry("$msg");
20060 $tokenizer_self->{_starting_level} = $starting_level;
20061 reset_indentation_level($starting_level);
20064 # Find indentation level given a input line. At the same time, try to
20065 # figure out the input tabbing scheme.
20067 # There are two types of calls:
20069 # Type 1: $structural_indentation_level < 0
20070 # In this case we have to guess $input_tabstr to figure out the level.
20072 # Type 2: $structural_indentation_level >= 0
20073 # In this case the level of this line is known, and this routine can
20074 # update the tabbing string, if still unknown, to make the level correct.
20076 sub find_indentation_level {
20077 my ( $line, $structural_indentation_level ) = @_;
20079 # USES GLOBAL VARIABLES: $tokenizer_self
20083 my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
20084 my $input_tabstr = $tokenizer_self->{_input_tabstr};
20086 # find leading whitespace
20087 my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
20089 # make first guess at input tabbing scheme if necessary
20090 if ( $know_input_tabstr < 0 ) {
20092 $know_input_tabstr = 0;
20094 if ( $tokenizer_self->{_tabs} ) {
20095 $input_tabstr = "\t";
20096 if ( length($leading_whitespace) > 0 ) {
20097 if ( $leading_whitespace !~ /\t/ ) {
20099 my $cols = $tokenizer_self->{_indent_columns};
20101 if ( length($leading_whitespace) < $cols ) {
20102 $cols = length($leading_whitespace);
20104 $input_tabstr = " " x $cols;
20109 $input_tabstr = " " x $tokenizer_self->{_indent_columns};
20111 if ( length($leading_whitespace) > 0 ) {
20112 if ( $leading_whitespace =~ /^\t/ ) {
20113 $input_tabstr = "\t";
20117 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
20118 $tokenizer_self->{_input_tabstr} = $input_tabstr;
20121 # determine the input tabbing scheme if possible
20122 if ( ( $know_input_tabstr == 0 )
20123 && ( length($leading_whitespace) > 0 )
20124 && ( $structural_indentation_level > 0 ) )
20126 my $saved_input_tabstr = $input_tabstr;
20128 # check for common case of one tab per indentation level
20129 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
20130 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
20131 $input_tabstr = "\t";
20132 $msg = "Guessing old indentation was tab character\n";
20138 # detab any tabs based on 8 blanks per tab
20140 if ( $leading_whitespace =~ s/^\t+/ /g ) {
20141 $entabbed = "entabbed";
20144 # now compute tabbing from number of spaces
20146 length($leading_whitespace) / $structural_indentation_level;
20147 if ( $columns == int $columns ) {
20149 "Guessing old indentation was $columns $entabbed spaces\n";
20152 $columns = int $columns;
20154 "old indentation is unclear, using $columns $entabbed spaces\n";
20156 $input_tabstr = " " x $columns;
20158 $know_input_tabstr = 1;
20159 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
20160 $tokenizer_self->{_input_tabstr} = $input_tabstr;
20162 # see if mistakes were made
20163 if ( ( $tokenizer_self->{_starting_level} > 0 )
20164 && !$tokenizer_self->{_know_starting_level} )
20167 if ( $input_tabstr ne $saved_input_tabstr ) {
20169 "I made a bad starting level guess; rerun with a value for -sil \n"
20175 # use current guess at input tabbing to get input indentation level
20177 # Patch to handle a common case of entabbed leading whitespace
20178 # If the leading whitespace equals 4 spaces and we also have
20179 # tabs, detab the input whitespace assuming 8 spaces per tab.
20180 if ( length($input_tabstr) == 4 ) {
20181 $leading_whitespace =~ s/^\t+/ /g;
20184 if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
20187 while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
20193 return ( $level, $msg );
20196 # This is a currently unused debug routine
20197 sub dump_functions {
20201 foreach $pkg ( keys %is_user_function ) {
20202 print $fh "\nnon-constant subs in package $pkg\n";
20204 foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
20206 if ( $is_block_list_function{$pkg}{$sub} ) {
20207 $msg = 'block_list';
20210 if ( $is_block_function{$pkg}{$sub} ) {
20213 print $fh "$sub $msg\n";
20217 foreach $pkg ( keys %is_constant ) {
20218 print $fh "\nconstants and constant subs in package $pkg\n";
20220 foreach $sub ( keys %{ $is_constant{$pkg} } ) {
20221 print $fh "$sub\n";
20226 sub prepare_for_a_new_file {
20228 # previous tokens needed to determine what to expect next
20229 $last_nonblank_token = ';'; # the only possible starting state which
20230 $last_nonblank_type = ';'; # will make a leading brace a code block
20231 $last_nonblank_block_type = '';
20233 # scalars for remembering statement types across multiple lines
20234 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
20235 $in_attribute_list = 0;
20237 # scalars for remembering where we are in the file
20238 $current_package = "main";
20239 $context = UNKNOWN_CONTEXT;
20241 # hashes used to remember function information
20242 %is_constant = (); # user-defined constants
20243 %is_user_function = (); # user-defined functions
20244 %user_function_prototype = (); # their prototypes
20245 %is_block_function = ();
20246 %is_block_list_function = ();
20247 %saw_function_definition = ();
20249 # variables used to track depths of various containers
20250 # and report nesting errors
20253 $square_bracket_depth = 0;
20254 @current_depth[ 0 .. $#closing_brace_names ] =
20255 (0) x scalar @closing_brace_names;
20256 @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
20257 ( 0 .. $#closing_brace_names );
20258 @current_sequence_number = ();
20259 $paren_type[$paren_depth] = '';
20260 $paren_semicolon_count[$paren_depth] = 0;
20261 $paren_structural_type[$brace_depth] = '';
20262 $brace_type[$brace_depth] = ';'; # identify opening brace as code block
20263 $brace_structural_type[$brace_depth] = '';
20264 $brace_statement_type[$brace_depth] = "";
20265 $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
20266 $brace_package[$paren_depth] = $current_package;
20267 $square_bracket_type[$square_bracket_depth] = '';
20268 $square_bracket_structural_type[$square_bracket_depth] = '';
20270 initialize_tokenizer_state();
20273 { # begin tokenize_this_line
20275 use constant BRACE => 0;
20276 use constant SQUARE_BRACKET => 1;
20277 use constant PAREN => 2;
20278 use constant QUESTION_COLON => 3;
20280 # TV1: scalars for processing one LINE.
20281 # Re-initialized on each entry to sub tokenize_this_line.
20283 $block_type, $container_type, $expecting,
20284 $i, $i_tok, $input_line,
20285 $input_line_number, $last_nonblank_i, $max_token_index,
20286 $next_tok, $next_type, $peeked_ahead,
20287 $prototype, $rhere_target_list, $rtoken_map,
20288 $rtoken_type, $rtokens, $tok,
20289 $type, $type_sequence,
20292 # TV2: refs to ARRAYS for processing one LINE
20293 # Re-initialized on each call.
20294 my $routput_token_list = []; # stack of output token indexes
20295 my $routput_token_type = []; # token types
20296 my $routput_block_type = []; # types of code block
20297 my $routput_container_type = []; # paren types, such as if, elsif, ..
20298 my $routput_type_sequence = []; # nesting sequential number
20300 # TV3: SCALARS for quote variables. These are initialized with a
20301 # subroutine call and continually updated as lines are processed.
20302 my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
20303 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
20305 # TV4: SCALARS for multi-line identifiers and
20306 # statements. These are initialized with a subroutine call
20307 # and continually updated as lines are processed.
20308 my ( $id_scan_state, $identifier, $want_paren, );
20310 # TV5: SCALARS for tracking indentation level.
20311 # Initialized once and continually updated as lines are
20314 $nesting_token_string, $nesting_type_string,
20315 $nesting_block_string, $nesting_block_flag,
20316 $nesting_list_string, $nesting_list_flag,
20317 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
20318 $in_statement_continuation, $level_in_tokenizer,
20319 $slevel_in_tokenizer, $rslevel_stack,
20322 # TV6: SCALARS for remembering several previous
20323 # tokens. Initialized once and continually updated as
20324 # lines are processed.
20326 $last_nonblank_container_type, $last_nonblank_type_sequence,
20327 $last_last_nonblank_token, $last_last_nonblank_type,
20328 $last_last_nonblank_block_type, $last_last_nonblank_container_type,
20329 $last_last_nonblank_type_sequence, $last_nonblank_prototype,
20332 # ----------------------------------------------------------------
20333 # beginning of tokenizer variable access and manipulation routines
20334 # ----------------------------------------------------------------
20336 sub initialize_tokenizer_state {
20338 # TV1: initialized on each call
20339 # TV2: initialized on each call
20343 $quote_character = "";
20346 $quoted_string_1 = "";
20347 $quoted_string_2 = "";
20348 $allowed_quote_modifiers = "";
20351 $id_scan_state = '';
20356 $nesting_token_string = "";
20357 $nesting_type_string = "";
20358 $nesting_block_string = '1'; # initially in a block
20359 $nesting_block_flag = 1;
20360 $nesting_list_string = '0'; # initially not in a list
20361 $nesting_list_flag = 0; # initially not in a list
20362 $ci_string_in_tokenizer = "";
20363 $continuation_string_in_tokenizer = "0";
20364 $in_statement_continuation = 0;
20365 $level_in_tokenizer = 0;
20366 $slevel_in_tokenizer = 0;
20367 $rslevel_stack = [];
20370 $last_nonblank_container_type = '';
20371 $last_nonblank_type_sequence = '';
20372 $last_last_nonblank_token = ';';
20373 $last_last_nonblank_type = ';';
20374 $last_last_nonblank_block_type = '';
20375 $last_last_nonblank_container_type = '';
20376 $last_last_nonblank_type_sequence = '';
20377 $last_nonblank_prototype = "";
20380 sub save_tokenizer_state {
20383 $block_type, $container_type, $expecting,
20384 $i, $i_tok, $input_line,
20385 $input_line_number, $last_nonblank_i, $max_token_index,
20386 $next_tok, $next_type, $peeked_ahead,
20387 $prototype, $rhere_target_list, $rtoken_map,
20388 $rtoken_type, $rtokens, $tok,
20389 $type, $type_sequence,
20393 $routput_token_list, $routput_token_type,
20394 $routput_block_type, $routput_container_type,
20395 $routput_type_sequence,
20399 $in_quote, $quote_type,
20400 $quote_character, $quote_pos,
20401 $quote_depth, $quoted_string_1,
20402 $quoted_string_2, $allowed_quote_modifiers,
20405 my $rTV4 = [ $id_scan_state, $identifier, $want_paren, ];
20408 $nesting_token_string, $nesting_type_string,
20409 $nesting_block_string, $nesting_block_flag,
20410 $nesting_list_string, $nesting_list_flag,
20411 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
20412 $in_statement_continuation, $level_in_tokenizer,
20413 $slevel_in_tokenizer, $rslevel_stack,
20417 $last_nonblank_container_type,
20418 $last_nonblank_type_sequence,
20419 $last_last_nonblank_token,
20420 $last_last_nonblank_type,
20421 $last_last_nonblank_block_type,
20422 $last_last_nonblank_container_type,
20423 $last_last_nonblank_type_sequence,
20424 $last_nonblank_prototype,
20426 return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
20429 sub restore_tokenizer_state {
20431 my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
20433 $block_type, $container_type, $expecting,
20434 $i, $i_tok, $input_line,
20435 $input_line_number, $last_nonblank_i, $max_token_index,
20436 $next_tok, $next_type, $peeked_ahead,
20437 $prototype, $rhere_target_list, $rtoken_map,
20438 $rtoken_type, $rtokens, $tok,
20439 $type, $type_sequence,
20443 $routput_token_list, $routput_token_type,
20444 $routput_block_type, $routput_container_type,
20445 $routput_type_sequence,
20449 $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
20450 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
20453 ( $id_scan_state, $identifier, $want_paren, ) = @{$rTV4};
20456 $nesting_token_string, $nesting_type_string,
20457 $nesting_block_string, $nesting_block_flag,
20458 $nesting_list_string, $nesting_list_flag,
20459 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
20460 $in_statement_continuation, $level_in_tokenizer,
20461 $slevel_in_tokenizer, $rslevel_stack,
20465 $last_nonblank_container_type,
20466 $last_nonblank_type_sequence,
20467 $last_last_nonblank_token,
20468 $last_last_nonblank_type,
20469 $last_last_nonblank_block_type,
20470 $last_last_nonblank_container_type,
20471 $last_last_nonblank_type_sequence,
20472 $last_nonblank_prototype,
20476 sub get_indentation_level {
20477 return $level_in_tokenizer;
20480 sub reset_indentation_level {
20481 $level_in_tokenizer = $_[0];
20482 $slevel_in_tokenizer = $_[0];
20483 push @{$rslevel_stack}, $slevel_in_tokenizer;
20487 $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
20490 # ------------------------------------------------------------
20491 # end of tokenizer variable access and manipulation routines
20492 # ------------------------------------------------------------
20494 # ------------------------------------------------------------
20495 # beginning of various scanner interface routines
20496 # ------------------------------------------------------------
20497 sub scan_replacement_text {
20499 # check for here-docs in replacement text invoked by
20500 # a substitution operator with executable modifier 'e'.
20503 # $replacement_text
20505 # $rht = reference to any here-doc targets
20506 my ($replacement_text) = @_;
20509 return undef unless ( $replacement_text =~ /<</ );
20511 write_logfile_entry("scanning replacement text for here-doc targets\n");
20513 # save the logger object for error messages
20514 my $logger_object = $tokenizer_self->{_logger_object};
20516 # localize all package variables
20518 $tokenizer_self, $last_nonblank_token,
20519 $last_nonblank_type, $last_nonblank_block_type,
20520 $statement_type, $in_attribute_list,
20521 $current_package, $context,
20522 %is_constant, %is_user_function,
20523 %user_function_prototype, %is_block_function,
20524 %is_block_list_function, %saw_function_definition,
20525 $brace_depth, $paren_depth,
20526 $square_bracket_depth, @current_depth,
20527 @nesting_sequence_number, @current_sequence_number,
20528 @paren_type, @paren_semicolon_count,
20529 @paren_structural_type, @brace_type,
20530 @brace_structural_type, @brace_statement_type,
20531 @brace_context, @brace_package,
20532 @square_bracket_type, @square_bracket_structural_type,
20533 @depth_array, @starting_line_of_current_depth,
20536 # save all lexical variables
20537 my $rstate = save_tokenizer_state();
20538 _decrement_count(); # avoid error check for multiple tokenizers
20540 # make a new tokenizer
20542 my $rpending_logfile_message;
20543 my $source_object =
20544 Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
20545 $rpending_logfile_message );
20546 my $tokenizer = Perl::Tidy::Tokenizer->new(
20547 source_object => $source_object,
20548 logger_object => $logger_object,
20549 starting_line_number => $input_line_number,
20552 # scan the replacement text
20553 1 while ( $tokenizer->get_line() );
20555 # remove any here doc targets
20557 if ( $tokenizer_self->{_in_here_doc} ) {
20561 $tokenizer_self->{_here_doc_target},
20562 $tokenizer_self->{_here_quote_character}
20564 if ( $tokenizer_self->{_rhere_target_list} ) {
20565 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
20566 $tokenizer_self->{_rhere_target_list} = undef;
20568 $tokenizer_self->{_in_here_doc} = undef;
20571 # now its safe to report errors
20572 $tokenizer->report_tokenization_errors();
20574 # restore all tokenizer lexical variables
20575 restore_tokenizer_state($rstate);
20577 # return the here doc targets
20581 sub scan_bare_identifier {
20582 ( $i, $tok, $type, $prototype ) =
20583 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
20584 $rtoken_map, $max_token_index );
20587 sub scan_identifier {
20588 ( $i, $tok, $type, $id_scan_state, $identifier ) =
20589 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
20590 $max_token_index );
20594 ( $i, $tok, $type, $id_scan_state ) =
20595 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
20596 $id_scan_state, $max_token_index );
20601 ( $i, $type, $number ) =
20602 scan_number_do( $input_line, $i, $rtoken_map, $type,
20603 $max_token_index );
20607 # a sub to warn if token found where term expected
20608 sub error_if_expecting_TERM {
20609 if ( $expecting == TERM ) {
20610 if ( $really_want_term{$last_nonblank_type} ) {
20611 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
20612 $rtoken_type, $input_line );
20618 # a sub to warn if token found where operator expected
20619 sub error_if_expecting_OPERATOR {
20620 if ( $expecting == OPERATOR ) {
20621 my $thing = defined $_[0] ? $_[0] : $tok;
20622 unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
20623 $rtoken_map, $rtoken_type, $input_line );
20624 if ( $i_tok == 0 ) {
20625 interrupt_logfile();
20626 warning("Missing ';' above?\n");
20633 # ------------------------------------------------------------
20634 # end scanner interfaces
20635 # ------------------------------------------------------------
20637 my %is_for_foreach;
20638 @_ = qw(for foreach);
20639 @is_for_foreach{@_} = (1) x scalar(@_);
20643 @is_my_our{@_} = (1) x scalar(@_);
20645 # These keywords may introduce blocks after parenthesized expressions,
20647 # keyword ( .... ) { BLOCK }
20648 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
20649 my %is_blocktype_with_paren;
20650 @_ = qw(if elsif unless while until for foreach switch case given when);
20651 @is_blocktype_with_paren{@_} = (1) x scalar(@_);
20653 # ------------------------------------------------------------
20654 # begin hash of code for handling most token types
20655 # ------------------------------------------------------------
20656 my $tokenization_code = {
20658 # no special code for these types yet, but syntax checks
20692 error_if_expecting_TERM()
20693 if ( $expecting == TERM );
20696 error_if_expecting_TERM()
20697 if ( $expecting == TERM );
20701 # start looking for a scalar
20702 error_if_expecting_OPERATOR("Scalar")
20703 if ( $expecting == OPERATOR );
20706 if ( $identifier eq '$^W' ) {
20707 $tokenizer_self->{_saw_perl_dash_w} = 1;
20710 # Check for indentifier in indirect object slot
20711 # (vorboard.pl, sort.t). Something like:
20712 # /^(print|printf|sort|exec|system)$/
20714 $is_indirect_object_taker{$last_nonblank_token}
20716 || ( ( $last_nonblank_token eq '(' )
20717 && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
20718 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
20727 $paren_semicolon_count[$paren_depth] = 0;
20729 $container_type = $want_paren;
20733 $container_type = $last_nonblank_token;
20735 # We can check for a syntax error here of unexpected '(',
20736 # but this is going to get messy...
20738 $expecting == OPERATOR
20740 # be sure this is not a method call of the form
20741 # &method(...), $method->(..), &{method}(...),
20742 # $ref[2](list) is ok & short for $ref[2]->(list)
20743 # NOTE: at present, braces in something like &{ xxx }
20744 # are not marked as a block, we might have a method call
20745 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
20750 # ref: camel 3 p 703.
20751 if ( $last_last_nonblank_token eq 'do' ) {
20753 "do SUBROUTINE is deprecated; consider & or -> notation\n"
20758 # if this is an empty list, (), then it is not an
20759 # error; for example, we might have a constant pi and
20760 # invoke it with pi() or just pi;
20761 my ( $next_nonblank_token, $i_next ) =
20762 find_next_nonblank_token( $i, $rtokens,
20763 $max_token_index );
20764 if ( $next_nonblank_token ne ')' ) {
20766 error_if_expecting_OPERATOR('(');
20768 if ( $last_nonblank_type eq 'C' ) {
20770 "$last_nonblank_token has a void prototype\n";
20772 elsif ( $last_nonblank_type eq 'i' ) {
20774 && $last_nonblank_token =~ /^\$/ )
20777 "Do you mean '$last_nonblank_token->(' ?\n";
20781 interrupt_logfile();
20785 } ## end if ( $next_nonblank_token...
20786 } ## end else [ if ( $last_last_nonblank_token...
20787 } ## end if ( $expecting == OPERATOR...
20789 $paren_type[$paren_depth] = $container_type;
20791 increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
20793 # propagate types down through nested parens
20794 # for example: the second paren in 'if ((' would be structural
20795 # since the first is.
20797 if ( $last_nonblank_token eq '(' ) {
20798 $type = $last_nonblank_type;
20801 # We exclude parens as structural after a ',' because it
20802 # causes subtle problems with continuation indentation for
20803 # something like this, where the first 'or' will not get
20808 # ( not defined $check )
20810 # or $check eq "new"
20811 # or $check eq "old",
20814 # Likewise, we exclude parens where a statement can start
20815 # because of problems with continuation indentation, like
20818 # ($firstline =~ /^#\!.*perl/)
20819 # and (print $File::Find::name, "\n")
20822 # (ref($usage_fref) =~ /CODE/)
20824 # : (&blast_usage, &blast_params, &blast_general_params);
20830 if ( $last_nonblank_type eq ')' ) {
20832 "Syntax error? found token '$last_nonblank_type' then '('\n"
20835 $paren_structural_type[$paren_depth] = $type;
20840 decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
20842 if ( $paren_structural_type[$paren_depth] eq '{' ) {
20846 $container_type = $paren_type[$paren_depth];
20848 # /^(for|foreach)$/
20849 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
20850 my $num_sc = $paren_semicolon_count[$paren_depth];
20851 if ( $num_sc > 0 && $num_sc != 2 ) {
20852 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
20856 if ( $paren_depth > 0 ) { $paren_depth-- }
20859 if ( $last_nonblank_type eq ',' ) {
20860 complain("Repeated ','s \n");
20863 # patch for operator_expected: note if we are in the list (use.t)
20864 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
20865 ## FIXME: need to move this elsewhere, perhaps check after a '('
20866 ## elsif ($last_nonblank_token eq '(') {
20867 ## warning("Leading ','s illegal in some versions of perl\n");
20871 $context = UNKNOWN_CONTEXT;
20872 $statement_type = '';
20874 # /^(for|foreach)$/
20875 if ( $is_for_foreach{ $paren_type[$paren_depth] } )
20876 { # mark ; in for loop
20878 # Be careful: we do not want a semicolon such as the
20879 # following to be included:
20881 # for (sort {strcoll($a,$b);} keys %investments) {
20883 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
20884 && $square_bracket_depth ==
20885 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
20889 $paren_semicolon_count[$paren_depth]++;
20895 error_if_expecting_OPERATOR("String")
20896 if ( $expecting == OPERATOR );
20899 $allowed_quote_modifiers = "";
20902 error_if_expecting_OPERATOR("String")
20903 if ( $expecting == OPERATOR );
20906 $allowed_quote_modifiers = "";
20909 error_if_expecting_OPERATOR("String")
20910 if ( $expecting == OPERATOR );
20913 $allowed_quote_modifiers = "";
20918 if ( $expecting == UNKNOWN ) { # indeterminte, must guess..
20920 ( $is_pattern, $msg ) =
20921 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
20922 $max_token_index );
20925 write_diagnostics("DIVIDE:$msg\n");
20926 write_logfile_entry($msg);
20929 else { $is_pattern = ( $expecting == TERM ) }
20934 $allowed_quote_modifiers = '[cgimosx]';
20936 else { # not a pattern; check for a /= token
20938 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=
20944 #DEBUG - collecting info on what tokens follow a divide
20945 # for development of guessing algorithm
20946 #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
20947 # #write_diagnostics( "DIVIDE? $input_line\n" );
20953 # if we just saw a ')', we will label this block with
20954 # its type. We need to do this to allow sub
20955 # code_block_type to determine if this brace starts a
20956 # code block or anonymous hash. (The type of a paren
20957 # pair is the preceding token, such as 'if', 'else',
20959 $container_type = "";
20961 # ATTRS: for a '{' following an attribute list, reset
20962 # things to look like we just saw the sub name
20963 if ( $statement_type =~ /^sub/ ) {
20964 $last_nonblank_token = $statement_type;
20965 $last_nonblank_type = 'i';
20966 $statement_type = "";
20969 # patch for SWITCH/CASE: hide these keywords from an immediately
20970 # following opening brace
20971 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
20972 && $statement_type eq $last_nonblank_token )
20974 $last_nonblank_token = ";";
20977 elsif ( $last_nonblank_token eq ')' ) {
20978 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
20980 # defensive move in case of a nesting error (pbug.t)
20981 # in which this ')' had no previous '('
20982 # this nesting error will have been caught
20983 if ( !defined($last_nonblank_token) ) {
20984 $last_nonblank_token = 'if';
20987 # check for syntax error here;
20988 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
20989 my $list = join( ' ', sort keys %is_blocktype_with_paren );
20991 "syntax error at ') {', didn't see one of: $list\n");
20995 # patch for paren-less for/foreach glitch, part 2.
20996 # see note below under 'qw'
20997 elsif ($last_nonblank_token eq 'qw'
20998 && $is_for_foreach{$want_paren} )
21000 $last_nonblank_token = $want_paren;
21001 if ( $last_last_nonblank_token eq $want_paren ) {
21003 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
21010 # now identify which of the three possible types of
21011 # curly braces we have: hash index container, anonymous
21012 # hash reference, or code block.
21014 # non-structural (hash index) curly brace pair
21015 # get marked 'L' and 'R'
21016 if ( is_non_structural_brace() ) {
21019 # patch for SWITCH/CASE:
21020 # allow paren-less identifier after 'when'
21021 # if the brace is preceded by a space
21022 if ( $statement_type eq 'when'
21023 && $last_nonblank_type eq 'i'
21024 && $last_last_nonblank_type eq 'k'
21025 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
21028 $block_type = $statement_type;
21032 # code and anonymous hash have the same type, '{', but are
21033 # distinguished by 'block_type',
21034 # which will be blank for an anonymous hash
21038 code_block_type( $i_tok, $rtokens, $rtoken_type,
21039 $max_token_index );
21041 # patch to promote bareword type to function taking block
21043 && $last_nonblank_type eq 'w'
21044 && $last_nonblank_i >= 0 )
21046 if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
21047 $routput_token_type->[$last_nonblank_i] = 'G';
21051 # patch for SWITCH/CASE: if we find a stray opening block brace
21052 # where we might accept a 'case' or 'when' block, then take it
21053 if ( $statement_type eq 'case'
21054 || $statement_type eq 'when' )
21056 if ( !$block_type || $block_type eq '}' ) {
21057 $block_type = $statement_type;
21061 $brace_type[ ++$brace_depth ] = $block_type;
21062 $brace_package[$brace_depth] = $current_package;
21064 increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
21065 $brace_structural_type[$brace_depth] = $type;
21066 $brace_context[$brace_depth] = $context;
21067 $brace_statement_type[$brace_depth] = $statement_type;
21070 $block_type = $brace_type[$brace_depth];
21071 if ($block_type) { $statement_type = '' }
21072 if ( defined( $brace_package[$brace_depth] ) ) {
21073 $current_package = $brace_package[$brace_depth];
21076 # can happen on brace error (caught elsewhere)
21080 decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
21082 if ( $brace_structural_type[$brace_depth] eq 'L' ) {
21086 # propagate type information for 'do' and 'eval' blocks.
21087 # This is necessary to enable us to know if an operator
21088 # or term is expected next
21089 if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
21090 $tok = $brace_type[$brace_depth];
21093 $context = $brace_context[$brace_depth];
21094 $statement_type = $brace_statement_type[$brace_depth];
21095 if ( $brace_depth > 0 ) { $brace_depth--; }
21097 '&' => sub { # maybe sub call? start looking
21099 # We have to check for sub call unless we are sure we
21100 # are expecting an operator. This example from s2p
21101 # got mistaken as a q operator in an early version:
21102 # print BODY &q(<<'EOT');
21103 if ( $expecting != OPERATOR ) {
21109 '<' => sub { # angle operator or less than?
21111 if ( $expecting != OPERATOR ) {
21113 find_angle_operator_termination( $input_line, $i, $rtoken_map,
21114 $expecting, $max_token_index );
21120 '?' => sub { # ?: conditional or starting pattern?
21124 if ( $expecting == UNKNOWN ) {
21127 ( $is_pattern, $msg ) =
21128 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
21129 $max_token_index );
21131 if ($msg) { write_logfile_entry($msg) }
21133 else { $is_pattern = ( $expecting == TERM ) }
21138 $allowed_quote_modifiers = '[cgimosx]'; # TBD:check this
21142 increase_nesting_depth( QUESTION_COLON,
21143 $$rtoken_map[$i_tok] );
21146 '*' => sub { # typeglob, or multiply?
21148 if ( $expecting == TERM ) {
21153 if ( $$rtokens[ $i + 1 ] eq '=' ) {
21158 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
21162 if ( $$rtokens[ $i + 1 ] eq '=' ) {
21170 '.' => sub { # what kind of . ?
21172 if ( $expecting != OPERATOR ) {
21174 if ( $type eq '.' ) {
21175 error_if_expecting_TERM()
21176 if ( $expecting == TERM );
21184 # if this is the first nonblank character, call it a label
21185 # since perl seems to just swallow it
21186 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
21190 # ATTRS: check for a ':' which introduces an attribute list
21191 # (this might eventually get its own token type)
21192 elsif ( $statement_type =~ /^sub/ ) {
21194 $in_attribute_list = 1;
21197 # check for scalar attribute, such as
21198 # my $foo : shared = 1;
21199 elsif ($is_my_our{$statement_type}
21200 && $current_depth[QUESTION_COLON] == 0 )
21203 $in_attribute_list = 1;
21206 # otherwise, it should be part of a ?/: operator
21209 decrease_nesting_depth( QUESTION_COLON,
21210 $$rtoken_map[$i_tok] );
21211 if ( $last_nonblank_token eq '?' ) {
21212 warning("Syntax error near ? :\n");
21216 '+' => sub { # what kind of plus?
21218 if ( $expecting == TERM ) {
21219 my $number = scan_number();
21221 # unary plus is safest assumption if not a number
21222 if ( !defined($number) ) { $type = 'p'; }
21224 elsif ( $expecting == OPERATOR ) {
21227 if ( $next_type eq 'w' ) { $type = 'p' }
21232 error_if_expecting_OPERATOR("Array")
21233 if ( $expecting == OPERATOR );
21236 '%' => sub { # hash or modulo?
21238 # first guess is hash if no following blank
21239 if ( $expecting == UNKNOWN ) {
21240 if ( $next_type ne 'b' ) { $expecting = TERM }
21242 if ( $expecting == TERM ) {
21247 $square_bracket_type[ ++$square_bracket_depth ] =
21248 $last_nonblank_token;
21250 increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
21252 # It may seem odd, but structural square brackets have
21253 # type '{' and '}'. This simplifies the indentation logic.
21254 if ( !is_non_structural_brace() ) {
21257 $square_bracket_structural_type[$square_bracket_depth] = $type;
21261 decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
21263 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
21267 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
21269 '-' => sub { # what kind of minus?
21271 if ( ( $expecting != OPERATOR )
21272 && $is_file_test_operator{$next_tok} )
21278 elsif ( $expecting == TERM ) {
21279 my $number = scan_number();
21281 # maybe part of bareword token? unary is safest
21282 if ( !defined($number) ) { $type = 'm'; }
21285 elsif ( $expecting == OPERATOR ) {
21289 if ( $next_type eq 'w' ) {
21297 # check for special variables like ${^WARNING_BITS}
21298 if ( $expecting == TERM ) {
21300 # FIXME: this should work but will not catch errors
21301 # because we also have to be sure that previous token is
21302 # a type character ($,@,%).
21303 if ( $last_nonblank_token eq '{'
21304 && ( $next_tok =~ /^[A-Za-z_]/ ) )
21307 if ( $next_tok eq 'W' ) {
21308 $tokenizer_self->{_saw_perl_dash_w} = 1;
21310 $tok = $tok . $next_tok;
21316 unless ( error_if_expecting_TERM() ) {
21318 # Something like this is valid but strange:
21320 complain("The '^' seems unusual here\n");
21326 '::' => sub { # probably a sub call
21327 scan_bare_identifier();
21329 '<<' => sub { # maybe a here-doc?
21331 unless ( $i < $max_token_index )
21332 ; # here-doc not possible if end of line
21334 if ( $expecting != OPERATOR ) {
21335 my ( $found_target, $here_doc_target, $here_quote_character,
21338 $found_target, $here_doc_target, $here_quote_character, $i,
21341 = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
21342 $max_token_index );
21344 if ($found_target) {
21345 push @{$rhere_target_list},
21346 [ $here_doc_target, $here_quote_character ];
21348 if ( length($here_doc_target) > 80 ) {
21349 my $truncated = substr( $here_doc_target, 0, 80 );
21350 complain("Long here-target: '$truncated' ...\n");
21352 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
21354 "Unconventional here-target: '$here_doc_target'\n"
21358 elsif ( $expecting == TERM ) {
21359 unless ($saw_error) {
21361 # shouldn't happen..
21362 warning("Program bug; didn't find here doc target\n");
21363 report_definite_bug();
21372 # if -> points to a bare word, we must scan for an identifier,
21373 # otherwise something like ->y would look like the y operator
21377 # type = 'pp' for pre-increment, '++' for post-increment
21379 if ( $expecting == TERM ) { $type = 'pp' }
21380 elsif ( $expecting == UNKNOWN ) {
21381 my ( $next_nonblank_token, $i_next ) =
21382 find_next_nonblank_token( $i, $rtokens, $max_token_index );
21383 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
21388 if ( $last_nonblank_type eq $tok ) {
21389 complain("Repeated '=>'s \n");
21392 # patch for operator_expected: note if we are in the list (use.t)
21393 # TODO: make version numbers a new token type
21394 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
21397 # type = 'mm' for pre-decrement, '--' for post-decrement
21400 if ( $expecting == TERM ) { $type = 'mm' }
21401 elsif ( $expecting == UNKNOWN ) {
21402 my ( $next_nonblank_token, $i_next ) =
21403 find_next_nonblank_token( $i, $rtokens, $max_token_index );
21404 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
21409 error_if_expecting_TERM()
21410 if ( $expecting == TERM );
21414 error_if_expecting_TERM()
21415 if ( $expecting == TERM );
21419 error_if_expecting_TERM()
21420 if ( $expecting == TERM );
21424 # ------------------------------------------------------------
21425 # end hash of code for handling individual token types
21426 # ------------------------------------------------------------
21428 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
21430 # These block types terminate statements and do not need a trailing
21432 # patched for SWITCH/CASE:
21433 my %is_zero_continuation_block_type;
21434 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ;
21435 if elsif else unless while until for foreach switch case given when);
21436 @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
21438 my %is_not_zero_continuation_block_type;
21439 @_ = qw(sort grep map do eval);
21440 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
21442 my %is_logical_container;
21443 @_ = qw(if elsif unless while and or err not && ! || for foreach);
21444 @is_logical_container{@_} = (1) x scalar(@_);
21446 my %is_binary_type;
21448 @is_binary_type{@_} = (1) x scalar(@_);
21450 my %is_binary_keyword;
21451 @_ = qw(and or err eq ne cmp);
21452 @is_binary_keyword{@_} = (1) x scalar(@_);
21454 # 'L' is token for opening { at hash key
21455 my %is_opening_type;
21456 @_ = qw" L { ( [ ";
21457 @is_opening_type{@_} = (1) x scalar(@_);
21459 # 'R' is token for closing } at hash key
21460 my %is_closing_type;
21461 @_ = qw" R } ) ] ";
21462 @is_closing_type{@_} = (1) x scalar(@_);
21464 my %is_redo_last_next_goto;
21465 @_ = qw(redo last next goto);
21466 @is_redo_last_next_goto{@_} = (1) x scalar(@_);
21468 my %is_use_require;
21469 @_ = qw(use require);
21470 @is_use_require{@_} = (1) x scalar(@_);
21472 my %is_sub_package;
21473 @_ = qw(sub package);
21474 @is_sub_package{@_} = (1) x scalar(@_);
21476 # This hash holds the hash key in $tokenizer_self for these keywords:
21477 my %is_format_END_DATA = (
21478 'format' => '_in_format',
21479 '__END__' => '_in_end',
21480 '__DATA__' => '_in_data',
21483 # ref: camel 3 p 147,
21484 # but perl may accept undocumented flags
21485 my %quote_modifiers = (
21486 's' => '[cegimosx]',
21489 'm' => '[cgimosx]',
21497 # table showing how many quoted things to look for after quote operator..
21498 # s, y, tr have 2 (pattern and replacement)
21499 # others have 1 (pattern only)
21500 my %quote_items = (
21512 sub tokenize_this_line {
21514 # This routine breaks a line of perl code into tokens which are of use in
21515 # indentation and reformatting. One of my goals has been to define tokens
21516 # such that a newline may be inserted between any pair of tokens without
21517 # changing or invalidating the program. This version comes close to this,
21518 # although there are necessarily a few exceptions which must be caught by
21519 # the formatter. Many of these involve the treatment of bare words.
21521 # The tokens and their types are returned in arrays. See previous
21522 # routine for their names.
21524 # See also the array "valid_token_types" in the BEGIN section for an
21527 # To simplify things, token types are either a single character, or they
21528 # are identical to the tokens themselves.
21530 # As a debugging aid, the -D flag creates a file containing a side-by-side
21531 # comparison of the input string and its tokenization for each line of a file.
21532 # This is an invaluable debugging aid.
21534 # In addition to tokens, and some associated quantities, the tokenizer
21535 # also returns flags indication any special line types. These include
21536 # quotes, here_docs, formats.
21538 # -----------------------------------------------------------------------
21540 # How to add NEW_TOKENS:
21542 # New token types will undoubtedly be needed in the future both to keep up
21543 # with changes in perl and to help adapt the tokenizer to other applications.
21545 # Here are some notes on the minimal steps. I wrote these notes while
21546 # adding the 'v' token type for v-strings, which are things like version
21547 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You
21548 # can use your editor to search for the string "NEW_TOKENS" to find the
21549 # appropriate sections to change):
21551 # *. Try to talk somebody else into doing it! If not, ..
21553 # *. Make a backup of your current version in case things don't work out!
21555 # *. Think of a new, unused character for the token type, and add to
21556 # the array @valid_token_types in the BEGIN section of this package.
21557 # For example, I used 'v' for v-strings.
21559 # *. Implement coding to recognize the $type of the token in this routine.
21560 # This is the hardest part, and is best done by immitating or modifying
21561 # some of the existing coding. For example, to recognize v-strings, I
21562 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
21563 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
21565 # *. Update sub operator_expected. This update is critically important but
21566 # the coding is trivial. Look at the comments in that routine for help.
21567 # For v-strings, which should behave like numbers, I just added 'v' to the
21568 # regex used to handle numbers and strings (types 'n' and 'Q').
21570 # *. Implement a 'bond strength' rule in sub set_bond_strengths in
21571 # Perl::Tidy::Formatter for breaking lines around this token type. You can
21572 # skip this step and take the default at first, then adjust later to get
21573 # desired results. For adding type 'v', I looked at sub bond_strength and
21574 # saw that number type 'n' was using default strengths, so I didn't do
21575 # anything. I may tune it up someday if I don't like the way line
21576 # breaks with v-strings look.
21578 # *. Implement a 'whitespace' rule in sub set_white_space_flag in
21579 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
21580 # and saw that type 'n' used spaces on both sides, so I just added 'v'
21581 # to the array @spaces_both_sides.
21583 # *. Update HtmlWriter package so that users can colorize the token as
21584 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
21585 # that package. For v-strings, I initially chose to use a default color
21586 # equal to the default for numbers, but it might be nice to change that
21589 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
21591 # *. Run lots and lots of debug tests. Start with special files designed
21592 # to test the new token type. Run with the -D flag to create a .DEBUG
21593 # file which shows the tokenization. When these work ok, test as many old
21594 # scripts as possible. Start with all of the '.t' files in the 'test'
21595 # directory of the distribution file. Compare .tdy output with previous
21596 # version and updated version to see the differences. Then include as
21597 # many more files as possible. My own technique has been to collect a huge
21598 # number of perl scripts (thousands!) into one directory and run perltidy
21599 # *, then run diff between the output of the previous version and the
21602 # *. For another example, search for the smartmatch operator '~~'
21603 # with your editor to see where updates were made for it.
21605 # -----------------------------------------------------------------------
21607 my $line_of_tokens = shift;
21608 my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
21610 # patch while coding change is underway
21611 # make callers private data to allow access
21612 # $tokenizer_self = $caller_tokenizer_self;
21614 # extract line number for use in error messages
21615 $input_line_number = $line_of_tokens->{_line_number};
21617 # reinitialize for multi-line quote
21618 $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
21620 # check for pod documentation
21621 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
21623 # must not be in multi-line quote
21624 # and must not be in an eqn
21625 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
21627 $tokenizer_self->{_in_pod} = 1;
21632 $input_line = $untrimmed_input_line;
21636 # trim start of this line unless we are continuing a quoted line
21637 # do not trim end because we might end in a quote (test: deken4.pl)
21638 # Perl::Tidy::Formatter will delete needless trailing blanks
21639 unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
21640 $input_line =~ s/^\s*//; # trim left end
21643 # update the copy of the line for use in error messages
21644 # This must be exactly what we give the pre_tokenizer
21645 $tokenizer_self->{_line_text} = $input_line;
21647 # re-initialize for the main loop
21648 $routput_token_list = []; # stack of output token indexes
21649 $routput_token_type = []; # token types
21650 $routput_block_type = []; # types of code block
21651 $routput_container_type = []; # paren types, such as if, elsif, ..
21652 $routput_type_sequence = []; # nesting sequential number
21654 $rhere_target_list = [];
21656 $tok = $last_nonblank_token;
21657 $type = $last_nonblank_type;
21658 $prototype = $last_nonblank_prototype;
21659 $last_nonblank_i = -1;
21660 $block_type = $last_nonblank_block_type;
21661 $container_type = $last_nonblank_container_type;
21662 $type_sequence = $last_nonblank_type_sequence;
21665 # tokenization is done in two stages..
21666 # stage 1 is a very simple pre-tokenization
21667 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
21669 # a little optimization for a full-line comment
21670 if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
21671 $max_tokens_wanted = 1 # no use tokenizing a comment
21674 # start by breaking the line into pre-tokens
21675 ( $rtokens, $rtoken_map, $rtoken_type ) =
21676 pre_tokenize( $input_line, $max_tokens_wanted );
21678 $max_token_index = scalar(@$rtokens) - 1;
21679 push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
21680 push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced
21681 push( @$rtoken_type, 'b', 'b', 'b' );
21683 # initialize for main loop
21684 for $i ( 0 .. $max_token_index + 3 ) {
21685 $routput_token_type->[$i] = "";
21686 $routput_block_type->[$i] = "";
21687 $routput_container_type->[$i] = "";
21688 $routput_type_sequence->[$i] = "";
21693 # ------------------------------------------------------------
21694 # begin main tokenization loop
21695 # ------------------------------------------------------------
21697 # we are looking at each pre-token of one line and combining them
21699 while ( ++$i <= $max_token_index ) {
21701 if ($in_quote) { # continue looking for end of a quote
21702 $type = $quote_type;
21704 unless ( @{$routput_token_list} )
21705 { # initialize if continuation line
21706 push( @{$routput_token_list}, $i );
21707 $routput_token_type->[$i] = $type;
21710 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
21712 # scan for the end of the quote or pattern
21714 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
21715 $quoted_string_1, $quoted_string_2
21718 $i, $in_quote, $quote_character,
21719 $quote_pos, $quote_depth, $quoted_string_1,
21720 $quoted_string_2, $rtokens, $rtoken_map,
21724 # all done if we didn't find it
21725 last if ($in_quote);
21727 # save pattern and replacement text for rescanning
21728 my $qs1 = $quoted_string_1;
21729 my $qs2 = $quoted_string_2;
21731 # re-initialize for next search
21732 $quote_character = '';
21735 $quoted_string_1 = "";
21736 $quoted_string_2 = "";
21737 last if ( ++$i > $max_token_index );
21739 # look for any modifiers
21740 if ($allowed_quote_modifiers) {
21742 # check for exact quote modifiers
21743 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
21744 my $str = $$rtokens[$i];
21745 my $saw_modifier_e;
21746 while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
21747 my $pos = pos($str);
21748 my $char = substr( $str, $pos - 1, 1 );
21749 $saw_modifier_e ||= ( $char eq 'e' );
21752 # For an 'e' quote modifier we must scan the replacement
21753 # text for here-doc targets.
21754 if ($saw_modifier_e) {
21756 my $rht = scan_replacement_text($qs1);
21758 # Change type from 'Q' to 'h' for quotes with
21759 # here-doc targets so that the formatter (see sub
21760 # print_line_of_tokens) will not make any line
21761 # breaks after this point.
21763 push @{$rhere_target_list}, @{$rht};
21765 if ( $i_tok < 0 ) {
21766 my $ilast = $routput_token_list->[-1];
21767 $routput_token_type->[$ilast] = $type;
21772 if ( defined( pos($str) ) ) {
21775 if ( pos($str) == length($str) ) {
21776 last if ( ++$i > $max_token_index );
21779 # Looks like a joined quote modifier
21780 # and keyword, maybe something like
21781 # s/xxx/yyy/gefor @k=...
21782 # Example is "galgen.pl". Would have to split
21783 # the word and insert a new token in the
21784 # pre-token list. This is so rare that I haven't
21785 # done it. Will just issue a warning citation.
21787 # This error might also be triggered if my quote
21788 # modifier characters are incomplete
21792 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
21793 Please put a space between quote modifiers and trailing keywords.
21796 # print "token $$rtokens[$i]\n";
21797 # my $num = length($str) - pos($str);
21798 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
21799 # print "continuing with new token $$rtokens[$i]\n";
21801 # skipping past this token does least damage
21802 last if ( ++$i > $max_token_index );
21807 # example file: rokicki4.pl
21808 # This error might also be triggered if my quote
21809 # modifier characters are incomplete
21810 write_logfile_entry(
21811 "Note: found word $str at quote modifier location\n"
21817 $allowed_quote_modifiers = "";
21821 unless ( $tok =~ /^\s*$/ ) {
21823 # try to catch some common errors
21824 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
21826 if ( $last_nonblank_token eq 'eq' ) {
21827 complain("Should 'eq' be '==' here ?\n");
21829 elsif ( $last_nonblank_token eq 'ne' ) {
21830 complain("Should 'ne' be '!=' here ?\n");
21834 $last_last_nonblank_token = $last_nonblank_token;
21835 $last_last_nonblank_type = $last_nonblank_type;
21836 $last_last_nonblank_block_type = $last_nonblank_block_type;
21837 $last_last_nonblank_container_type =
21838 $last_nonblank_container_type;
21839 $last_last_nonblank_type_sequence =
21840 $last_nonblank_type_sequence;
21841 $last_nonblank_token = $tok;
21842 $last_nonblank_type = $type;
21843 $last_nonblank_prototype = $prototype;
21844 $last_nonblank_block_type = $block_type;
21845 $last_nonblank_container_type = $container_type;
21846 $last_nonblank_type_sequence = $type_sequence;
21847 $last_nonblank_i = $i_tok;
21850 # store previous token type
21851 if ( $i_tok >= 0 ) {
21852 $routput_token_type->[$i_tok] = $type;
21853 $routput_block_type->[$i_tok] = $block_type;
21854 $routput_container_type->[$i_tok] = $container_type;
21855 $routput_type_sequence->[$i_tok] = $type_sequence;
21857 my $pre_tok = $$rtokens[$i]; # get the next pre-token
21858 my $pre_type = $$rtoken_type[$i]; # and type
21860 $type = $pre_type; # to be modified as necessary
21861 $block_type = ""; # blank for all tokens except code block braces
21862 $container_type = ""; # blank for all tokens except some parens
21863 $type_sequence = ""; # blank for all tokens except ?/:
21864 $prototype = ""; # blank for all tokens except user defined subs
21867 # this pre-token will start an output token
21868 push( @{$routput_token_list}, $i_tok );
21870 # continue gathering identifier if necessary
21871 # but do not start on blanks and comments
21872 if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
21874 if ( $id_scan_state =~ /^(sub|package)/ ) {
21881 last if ($id_scan_state);
21882 next if ( ( $i > 0 ) || $type );
21884 # didn't find any token; start over
21889 # handle whitespace tokens..
21890 next if ( $type eq 'b' );
21891 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
21892 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
21894 # Build larger tokens where possible, since we are not in a quote.
21896 # First try to assemble digraphs. The following tokens are
21897 # excluded and handled specially:
21898 # '/=' is excluded because the / might start a pattern.
21899 # 'x=' is excluded since it might be $x=, with $ on previous line
21900 # '**' and *= might be typeglobs of punctuation variables
21901 # I have allowed tokens starting with <, such as <=,
21902 # because I don't think these could be valid angle operators.
21903 # test file: storrs4.pl
21904 my $test_tok = $tok . $$rtokens[ $i + 1 ];
21905 my $combine_ok = $is_digraph{$test_tok};
21907 # check for special cases which cannot be combined
21910 # '//' must be defined_or operator if an operator is expected.
21911 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
21912 # could be migrated here for clarity
21913 if ( $test_tok eq '//' ) {
21914 my $next_type = $$rtokens[ $i + 1 ];
21916 operator_expected( $prev_type, $tok, $next_type );
21917 $combine_ok = 0 unless ( $expecting == OPERATOR );
21923 && ( $test_tok ne '/=' ) # might be pattern
21924 && ( $test_tok ne 'x=' ) # might be $x
21925 && ( $test_tok ne '**' ) # typeglob?
21926 && ( $test_tok ne '*=' ) # typeglob?
21932 # Now try to assemble trigraphs. Note that all possible
21933 # perl trigraphs can be constructed by appending a character
21935 $test_tok = $tok . $$rtokens[ $i + 1 ];
21937 if ( $is_trigraph{$test_tok} ) {
21944 $next_tok = $$rtokens[ $i + 1 ];
21945 $next_type = $$rtoken_type[ $i + 1 ];
21947 TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
21950 $last_nonblank_token, $tok,
21951 $next_tok, $brace_depth,
21952 $brace_type[$brace_depth], $paren_depth,
21953 $paren_type[$paren_depth]
21955 print "TOKENIZE:(@debug_list)\n";
21958 # turn off attribute list on first non-blank, non-bareword
21959 if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
21961 ###############################################################
21962 # We have the next token, $tok.
21963 # Now we have to examine this token and decide what it is
21964 # and define its $type
21966 # section 1: bare words
21967 ###############################################################
21969 if ( $pre_type eq 'w' ) {
21970 $expecting = operator_expected( $prev_type, $tok, $next_type );
21971 my ( $next_nonblank_token, $i_next ) =
21972 find_next_nonblank_token( $i, $rtokens, $max_token_index );
21974 # ATTRS: handle sub and variable attributes
21975 if ($in_attribute_list) {
21977 # treat bare word followed by open paren like qw(
21978 if ( $next_nonblank_token eq '(' ) {
21979 $in_quote = $quote_items{q};
21980 $allowed_quote_modifiers = $quote_modifiers{q};
21986 # handle bareword not followed by open paren
21993 # quote a word followed by => operator
21994 if ( $next_nonblank_token eq '=' ) {
21996 if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
21997 if ( $is_constant{$current_package}{$tok} ) {
22000 elsif ( $is_user_function{$current_package}{$tok} ) {
22003 $user_function_prototype{$current_package}{$tok};
22005 elsif ( $tok =~ /^v\d+$/ ) {
22007 report_v_string($tok);
22009 else { $type = 'w' }
22015 # quote a bare word within braces..like xxx->{s}; note that we
22016 # must be sure this is not a structural brace, to avoid
22017 # mistaking {s} in the following for a quoted bare word:
22018 # for(@[){s}bla}BLA}
22019 if ( ( $last_nonblank_type eq 'L' )
22020 && ( $next_nonblank_token eq '}' ) )
22026 # a bare word immediately followed by :: is not a keyword;
22027 # use $tok_kw when testing for keywords to avoid a mistake
22029 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
22034 # handle operator x (now we know it isn't $x=)
22035 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
22036 if ( $tok eq 'x' ) {
22038 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=
22048 # FIXME: Patch: mark something like x4 as an integer for now
22049 # It gets fixed downstream. This is easier than
22050 # splitting the pretoken.
22056 elsif ( ( $tok eq 'strict' )
22057 and ( $last_nonblank_token eq 'use' ) )
22059 $tokenizer_self->{_saw_use_strict} = 1;
22060 scan_bare_identifier();
22063 elsif ( ( $tok eq 'warnings' )
22064 and ( $last_nonblank_token eq 'use' ) )
22066 $tokenizer_self->{_saw_perl_dash_w} = 1;
22068 # scan as identifier, so that we pick up something like:
22069 # use warnings::register
22070 scan_bare_identifier();
22074 $tok eq 'AutoLoader'
22075 && $tokenizer_self->{_look_for_autoloader}
22077 $last_nonblank_token eq 'use'
22079 # these regexes are from AutoSplit.pm, which we want
22081 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
22082 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
22086 write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
22087 $tokenizer_self->{_saw_autoloader} = 1;
22088 $tokenizer_self->{_look_for_autoloader} = 0;
22089 scan_bare_identifier();
22093 $tok eq 'SelfLoader'
22094 && $tokenizer_self->{_look_for_selfloader}
22095 && ( $last_nonblank_token eq 'use'
22096 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
22097 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
22100 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
22101 $tokenizer_self->{_saw_selfloader} = 1;
22102 $tokenizer_self->{_look_for_selfloader} = 0;
22103 scan_bare_identifier();
22106 elsif ( ( $tok eq 'constant' )
22107 and ( $last_nonblank_token eq 'use' ) )
22109 scan_bare_identifier();
22110 my ( $next_nonblank_token, $i_next ) =
22111 find_next_nonblank_token( $i, $rtokens,
22112 $max_token_index );
22114 if ($next_nonblank_token) {
22116 if ( $is_keyword{$next_nonblank_token} ) {
22118 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
22122 # FIXME: could check for error in which next token is
22123 # not a word (number, punctuation, ..)
22125 $is_constant{$current_package}
22126 {$next_nonblank_token} = 1;
22131 # various quote operators
22132 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
22133 if ( $expecting == OPERATOR ) {
22135 # patch for paren-less for/foreach glitch, part 1
22136 # perl will accept this construct as valid:
22138 # foreach my $key qw\Uno Due Tres Quadro\ {
22139 # print "Set $key\n";
22141 unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
22143 error_if_expecting_OPERATOR();
22146 $in_quote = $quote_items{$tok};
22147 $allowed_quote_modifiers = $quote_modifiers{$tok};
22149 # All quote types are 'Q' except possibly qw quotes.
22150 # qw quotes are special in that they may generally be trimmed
22151 # of leading and trailing whitespace. So they are given a
22152 # separate type, 'q', unless requested otherwise.
22154 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
22157 $quote_type = $type;
22160 # check for a statement label
22162 ( $next_nonblank_token eq ':' )
22163 && ( $$rtokens[ $i_next + 1 ] ne ':' )
22164 && ( $i_next <= $max_token_index ) # colon on same line
22168 if ( $tok !~ /A-Z/ ) {
22169 push @{ $tokenizer_self->{_rlower_case_labels_at} },
22170 $input_line_number;
22178 # 'sub' || 'package'
22179 elsif ( $is_sub_package{$tok_kw} ) {
22180 error_if_expecting_OPERATOR()
22181 if ( $expecting == OPERATOR );
22185 # Note on token types for format, __DATA__, __END__:
22186 # It simplifies things to give these type ';', so that when we
22187 # start rescanning we will be expecting a token of type TERM.
22188 # We will switch to type 'k' before outputting the tokens.
22189 elsif ( $is_format_END_DATA{$tok_kw} ) {
22190 $type = ';'; # make tokenizer look for TERM next
22191 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
22195 elsif ( $is_keyword{$tok_kw} ) {
22198 # Since for and foreach may not be followed immediately
22199 # by an opening paren, we have to remember which keyword
22200 # is associated with the next '('
22201 if ( $is_for_foreach{$tok} ) {
22202 if ( new_statement_ok() ) {
22203 $want_paren = $tok;
22207 # recognize 'use' statements, which are special
22208 elsif ( $is_use_require{$tok} ) {
22209 $statement_type = $tok;
22210 error_if_expecting_OPERATOR()
22211 if ( $expecting == OPERATOR );
22214 # remember my and our to check for trailing ": shared"
22215 elsif ( $is_my_our{$tok} ) {
22216 $statement_type = $tok;
22219 # Check for misplaced 'elsif' and 'else', but allow isolated
22220 # else or elsif blocks to be formatted. This is indicated
22221 # by a last noblank token of ';'
22222 elsif ( $tok eq 'elsif' ) {
22223 if ( $last_nonblank_token ne ';'
22224 && $last_nonblank_block_type !~
22225 /^(if|elsif|unless)$/ )
22228 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
22232 elsif ( $tok eq 'else' ) {
22234 # patched for SWITCH/CASE
22235 if ( $last_nonblank_token ne ';'
22236 && $last_nonblank_block_type !~
22237 /^(if|elsif|unless|case|when)$/ )
22240 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
22244 elsif ( $tok eq 'continue' ) {
22245 if ( $last_nonblank_token ne ';'
22246 && $last_nonblank_block_type !~
22247 /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
22250 # note: ';' '{' and '}' in list above
22251 # because continues can follow bare blocks;
22252 # ':' is labeled block
22253 warning("'$tok' should follow a block\n");
22257 # patch for SWITCH/CASE if 'case' and 'when are
22258 # treated as keywords.
22259 elsif ( $tok eq 'when' || $tok eq 'case' ) {
22260 $statement_type = $tok; # next '{' is block
22264 # check for inline label following
22265 # /^(redo|last|next|goto)$/
22266 elsif (( $last_nonblank_type eq 'k' )
22267 && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
22273 # something else --
22276 scan_bare_identifier();
22277 if ( $type eq 'w' ) {
22279 if ( $expecting == OPERATOR ) {
22281 # don't complain about possible indirect object
22285 # sub new($) { ... }
22286 # $b = new A::; # calls A::new
22287 # $c = new A; # same thing but suspicious
22288 # This will call A::new but we have a 'new' in
22289 # main:: which looks like a constant.
22291 if ( $last_nonblank_type eq 'C' ) {
22292 if ( $tok !~ /::$/ ) {
22294 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
22295 Maybe indirectet object notation?
22300 error_if_expecting_OPERATOR("bareword");
22304 # mark bare words immediately followed by a paren as
22306 $next_tok = $$rtokens[ $i + 1 ];
22307 if ( $next_tok eq '(' ) {
22311 # underscore after file test operator is file handle
22312 if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
22316 # patch for SWITCH/CASE if 'case' and 'when are
22317 # not treated as keywords:
22321 && $brace_type[$brace_depth] eq 'switch'
22323 || ( $tok eq 'when'
22324 && $brace_type[$brace_depth] eq 'given' )
22327 $statement_type = $tok; # next '{' is block
22328 $type = 'k'; # for keyword syntax coloring
22331 # patch for SWITCH/CASE if switch and given not keywords
22332 # Switch is not a perl 5 keyword, but we will gamble
22333 # and mark switch followed by paren as a keyword. This
22334 # is only necessary to get html syntax coloring nice,
22335 # and does not commit this as being a switch/case.
22336 if ( $next_nonblank_token eq '('
22337 && ( $tok eq 'switch' || $tok eq 'given' ) )
22339 $type = 'k'; # for keyword syntax coloring
22345 ###############################################################
22346 # section 2: strings of digits
22347 ###############################################################
22348 elsif ( $pre_type eq 'd' ) {
22349 $expecting = operator_expected( $prev_type, $tok, $next_type );
22350 error_if_expecting_OPERATOR("Number")
22351 if ( $expecting == OPERATOR );
22352 my $number = scan_number();
22353 if ( !defined($number) ) {
22355 # shouldn't happen - we should always get a number
22356 warning("non-number beginning with digit--program bug\n");
22357 report_definite_bug();
22361 ###############################################################
22362 # section 3: all other tokens
22363 ###############################################################
22366 last if ( $tok eq '#' );
22367 my $code = $tokenization_code->{$tok};
22370 operator_expected( $prev_type, $tok, $next_type );
22377 # -----------------------------
22378 # end of main tokenization loop
22379 # -----------------------------
22381 if ( $i_tok >= 0 ) {
22382 $routput_token_type->[$i_tok] = $type;
22383 $routput_block_type->[$i_tok] = $block_type;
22384 $routput_container_type->[$i_tok] = $container_type;
22385 $routput_type_sequence->[$i_tok] = $type_sequence;
22388 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
22389 $last_last_nonblank_token = $last_nonblank_token;
22390 $last_last_nonblank_type = $last_nonblank_type;
22391 $last_last_nonblank_block_type = $last_nonblank_block_type;
22392 $last_last_nonblank_container_type = $last_nonblank_container_type;
22393 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
22394 $last_nonblank_token = $tok;
22395 $last_nonblank_type = $type;
22396 $last_nonblank_block_type = $block_type;
22397 $last_nonblank_container_type = $container_type;
22398 $last_nonblank_type_sequence = $type_sequence;
22399 $last_nonblank_prototype = $prototype;
22402 # reset indentation level if necessary at a sub or package
22403 # in an attempt to recover from a nesting error
22404 if ( $level_in_tokenizer < 0 ) {
22405 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
22406 reset_indentation_level(0);
22407 brace_warning("resetting level to 0 at $1 $2\n");
22411 # all done tokenizing this line ...
22412 # now prepare the final list of tokens and types
22414 my @token_type = (); # stack of output token types
22415 my @block_type = (); # stack of output code block types
22416 my @container_type = (); # stack of output code container types
22417 my @type_sequence = (); # stack of output type sequence numbers
22418 my @tokens = (); # output tokens
22419 my @levels = (); # structural brace levels of output tokens
22420 my @slevels = (); # secondary nesting levels of output tokens
22421 my @nesting_tokens = (); # string of tokens leading to this depth
22422 my @nesting_types = (); # string of token types leading to this depth
22423 my @nesting_blocks = (); # string of block types leading to this depth
22424 my @nesting_lists = (); # string of list types leading to this depth
22425 my @ci_string = (); # string needed to compute continuation indentation
22426 my @container_environment = (); # BLOCK or LIST
22427 my $container_environment = '';
22428 my $im = -1; # previous $i value
22430 my $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
22432 # =head1 Computing Token Indentation
22434 # The final section of the tokenizer forms tokens and also computes
22435 # parameters needed to find indentation. It is much easier to do it
22436 # in the tokenizer than elsewhere. Here is a brief description of how
22437 # indentation is computed. Perl::Tidy computes indentation as the sum
22440 # (1) structural indentation, such as if/else/elsif blocks
22441 # (2) continuation indentation, such as long parameter call lists.
22443 # These are occasionally called primary and secondary indentation.
22445 # Structural indentation is introduced by tokens of type '{', although
22446 # the actual tokens might be '{', '(', or '['. Structural indentation
22447 # is of two types: BLOCK and non-BLOCK. Default structural indentation
22448 # is 4 characters if the standard indentation scheme is used.
22450 # Continuation indentation is introduced whenever a line at BLOCK level
22451 # is broken before its termination. Default continuation indentation
22452 # is 2 characters in the standard indentation scheme.
22454 # Both types of indentation may be nested arbitrarily deep and
22455 # interlaced. The distinction between the two is somewhat arbitrary.
22457 # For each token, we will define two variables which would apply if
22458 # the current statement were broken just before that token, so that
22459 # that token started a new line:
22461 # $level = the structural indentation level,
22462 # $ci_level = the continuation indentation level
22464 # The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
22465 # assuming defaults. However, in some special cases it is customary
22466 # to modify $ci_level from this strict value.
22468 # The total structural indentation is easy to compute by adding and
22469 # subtracting 1 from a saved value as types '{' and '}' are seen. The
22470 # running value of this variable is $level_in_tokenizer.
22472 # The total continuation is much more difficult to compute, and requires
22473 # several variables. These veriables are:
22475 # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
22476 # each indentation level, if there are intervening open secondary
22477 # structures just prior to that level.
22478 # $continuation_string_in_tokenizer = a string of 1's and 0's indicating
22479 # if the last token at that level is "continued", meaning that it
22480 # is not the first token of an expression.
22481 # $nesting_block_string = a string of 1's and 0's indicating, for each
22482 # indentation level, if the level is of type BLOCK or not.
22483 # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
22484 # $nesting_list_string = a string of 1's and 0's indicating, for each
22485 # indentation level, if it is is appropriate for list formatting.
22486 # If so, continuation indentation is used to indent long list items.
22487 # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
22488 # @{$rslevel_stack} = a stack of total nesting depths at each
22489 # structural indentation level, where "total nesting depth" means
22490 # the nesting depth that would occur if every nesting token -- '{', '[',
22491 # and '(' -- , regardless of context, is used to compute a nesting
22494 #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
22495 #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
22497 my ( $ci_string_i, $level_i, $nesting_block_string_i,
22498 $nesting_list_string_i, $nesting_token_string_i,
22499 $nesting_type_string_i, );
22501 foreach $i ( @{$routput_token_list} )
22502 { # scan the list of pre-tokens indexes
22504 # self-checking for valid token types
22505 my $type = $routput_token_type->[$i];
22506 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
22507 $level_i = $level_in_tokenizer;
22509 # This can happen by running perltidy on non-scripts
22510 # although it could also be bug introduced by programming change.
22511 # Perl silently accepts a 032 (^Z) and takes it as the end
22512 if ( !$is_valid_token_type{$type} ) {
22513 my $val = ord($type);
22515 "unexpected character decimal $val ($type) in script\n");
22516 $tokenizer_self->{_in_error} = 1;
22519 # ----------------------------------------------------------------
22520 # TOKEN TYPE PATCHES
22521 # output __END__, __DATA__, and format as type 'k' instead of ';'
22522 # to make html colors correct, etc.
22523 my $fix_type = $type;
22524 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
22526 # output anonymous 'sub' as keyword
22527 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
22529 # -----------------------------------------------------------------
22531 $nesting_token_string_i = $nesting_token_string;
22532 $nesting_type_string_i = $nesting_type_string;
22533 $nesting_block_string_i = $nesting_block_string;
22534 $nesting_list_string_i = $nesting_list_string;
22536 # set primary indentation levels based on structural braces
22537 # Note: these are set so that the leading braces have a HIGHER
22538 # level than their CONTENTS, which is convenient for indentation
22539 # Also, define continuation indentation for each token.
22540 if ( $type eq '{' || $type eq 'L' ) {
22542 # use environment before updating
22543 $container_environment =
22544 $nesting_block_flag ? 'BLOCK'
22545 : $nesting_list_flag ? 'LIST'
22548 # if the difference between total nesting levels is not 1,
22549 # there are intervening non-structural nesting types between
22550 # this '{' and the previous unclosed '{'
22551 my $intervening_secondary_structure = 0;
22552 if ( @{$rslevel_stack} ) {
22553 $intervening_secondary_structure =
22554 $slevel_in_tokenizer - $rslevel_stack->[-1];
22557 # =head1 Continuation Indentation
22559 # Having tried setting continuation indentation both in the formatter and
22560 # in the tokenizer, I can say that setting it in the tokenizer is much,
22561 # much easier. The formatter already has too much to do, and can't
22562 # make decisions on line breaks without knowing what 'ci' will be at
22563 # arbitrary locations.
22565 # But a problem with setting the continuation indentation (ci) here
22566 # in the tokenizer is that we do not know where line breaks will actually
22567 # be. As a result, we don't know if we should propagate continuation
22568 # indentation to higher levels of structure.
22570 # For nesting of only structural indentation, we never need to do this.
22571 # For example, in a long if statement, like this
22573 # if ( !$output_block_type[$i]
22574 # && ($in_statement_continuation) )
22579 # the second line has ci but we do normally give the lines within the BLOCK
22580 # any ci. This would be true if we had blocks nested arbitrarily deeply.
22582 # But consider something like this, where we have created a break after
22583 # an opening paren on line 1, and the paren is not (currently) a
22584 # structural indentation token:
22586 # my $file = $menubar->Menubutton(
22587 # qw/-text File -underline 0 -menuitems/ => [
22589 # Cascade => '~View',
22593 # The second line has ci, so it would seem reasonable to propagate it
22594 # down, giving the third line 1 ci + 1 indentation. This suggests the
22595 # following rule, which is currently used to propagating ci down: if there
22596 # are any non-structural opening parens (or brackets, or braces), before
22597 # an opening structural brace, then ci is propagated down, and otherwise
22598 # not. The variable $intervening_secondary_structure contains this
22599 # information for the current token, and the string
22600 # "$ci_string_in_tokenizer" is a stack of previous values of this
22603 # save the current states
22604 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
22605 $level_in_tokenizer++;
22607 if ( $routput_block_type->[$i] ) {
22608 $nesting_block_flag = 1;
22609 $nesting_block_string .= '1';
22612 $nesting_block_flag = 0;
22613 $nesting_block_string .= '0';
22616 # we will use continuation indentation within containers
22617 # which are not blocks and not logical expressions
22619 if ( !$routput_block_type->[$i] ) {
22621 # propagate flag down at nested open parens
22622 if ( $routput_container_type->[$i] eq '(' ) {
22623 $bit = 1 if $nesting_list_flag;
22626 # use list continuation if not a logical grouping
22627 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
22631 $is_logical_container{ $routput_container_type->[$i]
22635 $nesting_list_string .= $bit;
22636 $nesting_list_flag = $bit;
22638 $ci_string_in_tokenizer .=
22639 ( $intervening_secondary_structure != 0 ) ? '1' : '0';
22640 $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
22641 $continuation_string_in_tokenizer .=
22642 ( $in_statement_continuation > 0 ) ? '1' : '0';
22644 # Sometimes we want to give an opening brace continuation indentation,
22645 # and sometimes not. For code blocks, we don't do it, so that the leading
22646 # '{' gets outdented, like this:
22648 # if ( !$output_block_type[$i]
22649 # && ($in_statement_continuation) )
22652 # For other types, we will give them continuation indentation. For example,
22653 # here is how a list looks with the opening paren indented:
22656 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
22657 # [ "homer", "marge", "bart" ], );
22659 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
22661 my $total_ci = $ci_string_sum;
22663 !$routput_block_type->[$i] # patch: skip for BLOCK
22664 && ($in_statement_continuation)
22667 $total_ci += $in_statement_continuation
22668 unless ( $ci_string_in_tokenizer =~ /1$/ );
22671 $ci_string_i = $total_ci;
22672 $in_statement_continuation = 0;
22675 elsif ( $type eq '}' || $type eq 'R' ) {
22677 # only a nesting error in the script would prevent popping here
22678 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
22680 $level_i = --$level_in_tokenizer;
22682 # restore previous level values
22683 if ( length($nesting_block_string) > 1 )
22684 { # true for valid script
22685 chop $nesting_block_string;
22686 $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
22687 chop $nesting_list_string;
22688 $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
22690 chop $ci_string_in_tokenizer;
22692 ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
22694 $in_statement_continuation =
22695 chop $continuation_string_in_tokenizer;
22697 # zero continuation flag at terminal BLOCK '}' which
22698 # ends a statement.
22699 if ( $routput_block_type->[$i] ) {
22701 # ...These include non-anonymous subs
22702 # note: could be sub ::abc { or sub 'abc
22703 if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
22705 # note: older versions of perl require the /gc modifier
22706 # here or else the \G does not work.
22707 if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
22709 $in_statement_continuation = 0;
22713 # ...and include all block types except user subs with
22714 # block prototypes and these: (sort|grep|map|do|eval)
22715 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
22717 $is_zero_continuation_block_type{
22718 $routput_block_type->[$i] } )
22720 $in_statement_continuation = 0;
22723 # ..but these are not terminal types:
22724 # /^(sort|grep|map|do|eval)$/ )
22726 $is_not_zero_continuation_block_type{
22727 $routput_block_type->[$i] } )
22731 # ..and a block introduced by a label
22732 # /^\w+\s*:$/gc ) {
22733 elsif ( $routput_block_type->[$i] =~ /:$/ ) {
22734 $in_statement_continuation = 0;
22737 # user function with block prototype
22739 $in_statement_continuation = 0;
22743 # If we are in a list, then
22744 # we must set continuatoin indentation at the closing
22745 # paren of something like this (paren after $check):
22748 # ( not defined $check )
22750 # or $check eq "new"
22751 # or $check eq "old",
22753 elsif ( $tok eq ')' ) {
22754 $in_statement_continuation = 1
22755 if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
22759 # use environment after updating
22760 $container_environment =
22761 $nesting_block_flag ? 'BLOCK'
22762 : $nesting_list_flag ? 'LIST'
22764 $ci_string_i = $ci_string_sum + $in_statement_continuation;
22765 $nesting_block_string_i = $nesting_block_string;
22766 $nesting_list_string_i = $nesting_list_string;
22769 # not a structural indentation type..
22772 $container_environment =
22773 $nesting_block_flag ? 'BLOCK'
22774 : $nesting_list_flag ? 'LIST'
22777 # zero the continuation indentation at certain tokens so
22778 # that they will be at the same level as its container. For
22779 # commas, this simplifies the -lp indentation logic, which
22780 # counts commas. For ?: it makes them stand out.
22781 if ($nesting_list_flag) {
22782 if ( $type =~ /^[,\?\:]$/ ) {
22783 $in_statement_continuation = 0;
22787 # be sure binary operators get continuation indentation
22789 $container_environment
22790 && ( $type eq 'k' && $is_binary_keyword{$tok}
22791 || $is_binary_type{$type} )
22794 $in_statement_continuation = 1;
22797 # continuation indentation is sum of any open ci from previous
22798 # levels plus the current level
22799 $ci_string_i = $ci_string_sum + $in_statement_continuation;
22801 # update continuation flag ...
22802 # if this isn't a blank or comment..
22803 if ( $type ne 'b' && $type ne '#' ) {
22805 # and we are in a BLOCK
22806 if ($nesting_block_flag) {
22808 # the next token after a ';' and label starts a new stmt
22809 if ( $type eq ';' || $type eq 'J' ) {
22810 $in_statement_continuation = 0;
22813 # otherwise, we are continuing the current statement
22815 $in_statement_continuation = 1;
22819 # if we are not in a BLOCK..
22822 # do not use continuation indentation if not list
22823 # environment (could be within if/elsif clause)
22824 if ( !$nesting_list_flag ) {
22825 $in_statement_continuation = 0;
22828 # otherwise, the next token after a ',' starts a new term
22829 elsif ( $type eq ',' ) {
22830 $in_statement_continuation = 0;
22833 # otherwise, we are continuing the current term
22835 $in_statement_continuation = 1;
22841 if ( $level_in_tokenizer < 0 ) {
22842 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
22843 $tokenizer_self->{_saw_negative_indentation} = 1;
22844 warning("Starting negative indentation\n");
22848 # set secondary nesting levels based on all continment token types
22849 # Note: these are set so that the nesting depth is the depth
22850 # of the PREVIOUS TOKEN, which is convenient for setting
22851 # the stength of token bonds
22852 my $slevel_i = $slevel_in_tokenizer;
22855 if ( $is_opening_type{$type} ) {
22856 $slevel_in_tokenizer++;
22857 $nesting_token_string .= $tok;
22858 $nesting_type_string .= $type;
22862 elsif ( $is_closing_type{$type} ) {
22863 $slevel_in_tokenizer--;
22864 my $char = chop $nesting_token_string;
22866 if ( $char ne $matching_start_token{$tok} ) {
22867 $nesting_token_string .= $char . $tok;
22868 $nesting_type_string .= $type;
22871 chop $nesting_type_string;
22875 push( @block_type, $routput_block_type->[$i] );
22876 push( @ci_string, $ci_string_i );
22877 push( @container_environment, $container_environment );
22878 push( @container_type, $routput_container_type->[$i] );
22879 push( @levels, $level_i );
22880 push( @nesting_tokens, $nesting_token_string_i );
22881 push( @nesting_types, $nesting_type_string_i );
22882 push( @slevels, $slevel_i );
22883 push( @token_type, $fix_type );
22884 push( @type_sequence, $routput_type_sequence->[$i] );
22885 push( @nesting_blocks, $nesting_block_string );
22886 push( @nesting_lists, $nesting_list_string );
22888 # now form the previous token
22891 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters
22895 substr( $input_line, $$rtoken_map[$im], $num ) );
22901 $num = length($input_line) - $$rtoken_map[$im]; # make the last token
22903 push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
22906 $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
22907 $tokenizer_self->{_in_quote} = $in_quote;
22908 $tokenizer_self->{_quote_target} =
22909 $in_quote ? matching_end_token($quote_character) : "";
22910 $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
22912 $line_of_tokens->{_rtoken_type} = \@token_type;
22913 $line_of_tokens->{_rtokens} = \@tokens;
22914 $line_of_tokens->{_rblock_type} = \@block_type;
22915 $line_of_tokens->{_rcontainer_type} = \@container_type;
22916 $line_of_tokens->{_rcontainer_environment} = \@container_environment;
22917 $line_of_tokens->{_rtype_sequence} = \@type_sequence;
22918 $line_of_tokens->{_rlevels} = \@levels;
22919 $line_of_tokens->{_rslevels} = \@slevels;
22920 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
22921 $line_of_tokens->{_rci_levels} = \@ci_string;
22922 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
22926 } # end tokenize_this_line
22928 #########i#############################################################
22929 # Tokenizer routines which assist in identifying token types
22930 #######################################################################
22932 sub operator_expected {
22934 # Many perl symbols have two or more meanings. For example, '<<'
22935 # can be a shift operator or a here-doc operator. The
22936 # interpretation of these symbols depends on the current state of
22937 # the tokenizer, which may either be expecting a term or an
22938 # operator. For this example, a << would be a shift if an operator
22939 # is expected, and a here-doc if a term is expected. This routine
22940 # is called to make this decision for any current token. It returns
22941 # one of three possible values:
22943 # OPERATOR - operator expected (or at least, not a term)
22944 # UNKNOWN - can't tell
22945 # TERM - a term is expected (or at least, not an operator)
22947 # The decision is based on what has been seen so far. This
22948 # information is stored in the "$last_nonblank_type" and
22949 # "$last_nonblank_token" variables. For example, if the
22950 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
22951 # if $last_nonblank_type is 'n' (numeric), we are expecting an
22954 # If a UNKNOWN is returned, the calling routine must guess. A major
22955 # goal of this tokenizer is to minimize the possiblity of returning
22956 # UNKNOWN, because a wrong guess can spoil the formatting of a
22959 # adding NEW_TOKENS: it is critically important that this routine be
22960 # updated to allow it to determine if an operator or term is to be
22961 # expected after the new token. Doing this simply involves adding
22962 # the new token character to one of the regexes in this routine or
22963 # to one of the hash lists
22964 # that it uses, which are initialized in the BEGIN section.
22965 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
22968 my ( $prev_type, $tok, $next_type ) = @_;
22970 my $op_expected = UNKNOWN;
22972 #print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
22974 # Note: function prototype is available for token type 'U' for future
22975 # program development. It contains the leading and trailing parens,
22976 # and no blanks. It might be used to eliminate token type 'C', for
22977 # example (prototype = '()'). Thus:
22978 # if ($last_nonblank_type eq 'U') {
22979 # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
22982 # A possible filehandle (or object) requires some care...
22983 if ( $last_nonblank_type eq 'Z' ) {
22986 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
22987 $op_expected = UNKNOWN;
22990 # For possible file handle like "$a", Perl uses weird parsing rules.
22992 # print $a/2,"/hi"; - division
22993 # print $a / 2,"/hi"; - division
22994 # print $a/ 2,"/hi"; - division
22995 # print $a /2,"/hi"; - pattern (and error)!
22996 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
22997 $op_expected = TERM;
23000 # Note when an operation is being done where a
23001 # filehandle might be expected, since a change in whitespace
23002 # could change the interpretation of the statement.
23004 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
23005 complain("operator in print statement not recommended\n");
23006 $op_expected = OPERATOR;
23011 # handle something after 'do' and 'eval'
23012 elsif ( $is_block_operator{$last_nonblank_token} ) {
23014 # something like $a = eval "expression";
23016 if ( $last_nonblank_type eq 'k' ) {
23017 $op_expected = TERM; # expression or list mode following keyword
23020 # something like $a = do { BLOCK } / 2;
23023 $op_expected = OPERATOR; # block mode following }
23027 # handle bare word..
23028 elsif ( $last_nonblank_type eq 'w' ) {
23030 # unfortunately, we can't tell what type of token to expect next
23031 # after most bare words
23032 $op_expected = UNKNOWN;
23035 # operator, but not term possible after these types
23036 # Note: moved ')' from type to token because parens in list context
23037 # get marked as '{' '}' now. This is a minor glitch in the following:
23038 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
23040 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
23041 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
23043 $op_expected = OPERATOR;
23045 # in a 'use' statement, numbers and v-strings are not true
23046 # numbers, so to avoid incorrect error messages, we will
23047 # mark them as unknown for now (use.t)
23048 # TODO: it would be much nicer to create a new token V for VERSION
23049 # number in a use statement. Then this could be a check on type V
23050 # and related patches which change $statement_type for '=>'
23051 # and ',' could be removed. Further, it would clean things up to
23052 # scan the 'use' statement with a separate subroutine.
23053 if ( ( $statement_type eq 'use' )
23054 && ( $last_nonblank_type =~ /^[nv]$/ ) )
23056 $op_expected = UNKNOWN;
23060 # no operator after many keywords, such as "die", "warn", etc
23061 elsif ( $expecting_term_token{$last_nonblank_token} ) {
23063 # patch for dor.t (defined or).
23064 # perl functions which may be unary operators
23065 # TODO: This list is incomplete, and these should be put
23068 && $next_type eq '/'
23069 && $last_nonblank_type eq 'k'
23070 && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
23072 $op_expected = OPERATOR;
23075 $op_expected = TERM;
23079 # no operator after things like + - ** (i.e., other operators)
23080 elsif ( $expecting_term_types{$last_nonblank_type} ) {
23081 $op_expected = TERM;
23084 # a few operators, like "time", have an empty prototype () and so
23085 # take no parameters but produce a value to operate on
23086 elsif ( $expecting_operator_token{$last_nonblank_token} ) {
23087 $op_expected = OPERATOR;
23090 # post-increment and decrement produce values to be operated on
23091 elsif ( $expecting_operator_types{$last_nonblank_type} ) {
23092 $op_expected = OPERATOR;
23095 # no value to operate on after sub block
23096 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
23098 # a right brace here indicates the end of a simple block.
23099 # all non-structural right braces have type 'R'
23100 # all braces associated with block operator keywords have been given those
23101 # keywords as "last_nonblank_token" and caught above.
23102 # (This statement is order dependent, and must come after checking
23103 # $last_nonblank_token).
23104 elsif ( $last_nonblank_type eq '}' ) {
23106 # patch for dor.t (defined or).
23108 && $next_type eq '/'
23109 && $last_nonblank_token eq ']' )
23111 $op_expected = OPERATOR;
23114 $op_expected = TERM;
23118 # something else..what did I forget?
23121 # collecting diagnostics on unknown operator types..see what was missed
23122 $op_expected = UNKNOWN;
23124 "OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
23128 TOKENIZER_DEBUG_FLAG_EXPECT && do {
23130 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
23132 return $op_expected;
23135 sub new_statement_ok {
23137 # return true if the current token can start a new statement
23138 # USES GLOBAL VARIABLES: $last_nonblank_type
23140 return label_ok() # a label would be ok here
23142 || $last_nonblank_type eq 'J'; # or we follow a label
23148 # Decide if a bare word followed by a colon here is a label
23149 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
23150 # $brace_depth, @brace_type
23152 # if it follows an opening or closing code block curly brace..
23153 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
23154 && $last_nonblank_type eq $last_nonblank_token )
23157 # it is a label if and only if the curly encloses a code block
23158 return $brace_type[$brace_depth];
23161 # otherwise, it is a label if and only if it follows a ';'
23164 return ( $last_nonblank_type eq ';' );
23168 sub code_block_type {
23170 # Decide if this is a block of code, and its type.
23171 # Must be called only when $type = $token = '{'
23172 # The problem is to distinguish between the start of a block of code
23173 # and the start of an anonymous hash reference
23174 # Returns "" if not code block, otherwise returns 'last_nonblank_token'
23175 # to indicate the type of code block. (For example, 'last_nonblank_token'
23176 # might be 'if' for an if block, 'else' for an else block, etc).
23177 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
23178 # $last_nonblank_block_type, $brace_depth, @brace_type
23180 # handle case of multiple '{'s
23182 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
23184 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
23185 if ( $last_nonblank_token eq '{'
23186 && $last_nonblank_type eq $last_nonblank_token )
23189 # opening brace where a statement may appear is probably
23190 # a code block but might be and anonymous hash reference
23191 if ( $brace_type[$brace_depth] ) {
23192 return decide_if_code_block( $i, $rtokens, $rtoken_type,
23193 $max_token_index );
23196 # cannot start a code block within an anonymous hash
23202 elsif ( $last_nonblank_token eq ';' ) {
23204 # an opening brace where a statement may appear is probably
23205 # a code block but might be and anonymous hash reference
23206 return decide_if_code_block( $i, $rtokens, $rtoken_type,
23207 $max_token_index );
23210 # handle case of '}{'
23211 elsif ($last_nonblank_token eq '}'
23212 && $last_nonblank_type eq $last_nonblank_token )
23215 # a } { situation ...
23216 # could be hash reference after code block..(blktype1.t)
23217 if ($last_nonblank_block_type) {
23218 return decide_if_code_block( $i, $rtokens, $rtoken_type,
23219 $max_token_index );
23222 # must be a block if it follows a closing hash reference
23224 return $last_nonblank_token;
23228 # NOTE: braces after type characters start code blocks, but for
23229 # simplicity these are not identified as such. See also
23230 # sub is_non_structural_brace.
23231 # elsif ( $last_nonblank_type eq 't' ) {
23232 # return $last_nonblank_token;
23235 # brace after label:
23236 elsif ( $last_nonblank_type eq 'J' ) {
23237 return $last_nonblank_token;
23240 # otherwise, look at previous token. This must be a code block if
23241 # it follows any of these:
23242 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
23243 elsif ( $is_code_block_token{$last_nonblank_token} ) {
23244 return $last_nonblank_token;
23247 # or a sub definition
23248 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
23249 && $last_nonblank_token =~ /^sub\b/ )
23251 return $last_nonblank_token;
23254 # user-defined subs with block parameters (like grep/map/eval)
23255 elsif ( $last_nonblank_type eq 'G' ) {
23256 return $last_nonblank_token;
23260 elsif ( $last_nonblank_type eq 'w' ) {
23261 return decide_if_code_block( $i, $rtokens, $rtoken_type,
23262 $max_token_index );
23265 # anything else must be anonymous hash reference
23271 sub decide_if_code_block {
23273 # USES GLOBAL VARIABLES: $last_nonblank_token
23274 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
23275 my ( $next_nonblank_token, $i_next ) =
23276 find_next_nonblank_token( $i, $rtokens, $max_token_index );
23278 # we are at a '{' where a statement may appear.
23279 # We must decide if this brace starts an anonymous hash or a code
23281 # return "" if anonymous hash, and $last_nonblank_token otherwise
23283 # initialize to be code BLOCK
23284 my $code_block_type = $last_nonblank_token;
23286 # Check for the common case of an empty anonymous hash reference:
23287 # Maybe something like sub { { } }
23288 if ( $next_nonblank_token eq '}' ) {
23289 $code_block_type = "";
23294 # To guess if this '{' is an anonymous hash reference, look ahead
23295 # and test as follows:
23297 # it is a hash reference if next come:
23298 # - a string or digit followed by a comma or =>
23299 # - bareword followed by =>
23300 # otherwise it is a code block
23302 # Examples of anonymous hash ref:
23306 # Examples of code blocks:
23307 # {1; print "hello\n", 1;}
23310 # We are only going to look ahead one more (nonblank/comment) line.
23311 # Strange formatting could cause a bad guess, but that's unlikely.
23312 my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
23313 my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
23314 my ( $rpre_tokens, $rpre_types ) =
23315 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
23316 # generous, and prevents
23318 # time in mangled files
23319 if ( defined($rpre_types) && @$rpre_types ) {
23320 push @pre_types, @$rpre_types;
23321 push @pre_tokens, @$rpre_tokens;
23324 # put a sentinal token to simplify stopping the search
23325 push @pre_types, '}';
23328 $jbeg = 1 if $pre_types[0] eq 'b';
23330 # first look for one of these
23332 # - bareword with leading -
23336 if ( $pre_types[$j] =~ /^[\'\"]/ ) {
23338 # find the closing quote; don't worry about escapes
23339 my $quote_mark = $pre_types[$j];
23340 for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
23341 if ( $pre_types[$k] eq $quote_mark ) {
23343 my $next = $pre_types[$j];
23348 elsif ( $pre_types[$j] eq 'd' ) {
23351 elsif ( $pre_types[$j] eq 'w' ) {
23352 unless ( $is_keyword{ $pre_tokens[$j] } ) {
23356 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
23359 if ( $j > $jbeg ) {
23361 $j++ if $pre_types[$j] eq 'b';
23363 # it's a hash ref if a comma or => follow next
23364 if ( $pre_types[$j] eq ','
23365 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
23367 $code_block_type = "";
23372 return $code_block_type;
23377 # report unexpected token type and show where it is
23378 # USES GLOBAL VARIABLES: $tokenizer_self
23379 my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
23380 $rpretoken_type, $input_line )
23383 if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
23384 my $msg = "found $found where $expecting expected";
23385 my $pos = $$rpretoken_map[$i_tok];
23386 interrupt_logfile();
23387 my $input_line_number = $tokenizer_self->{_last_line_number};
23388 my ( $offset, $numbered_line, $underline ) =
23389 make_numbered_line( $input_line_number, $input_line, $pos );
23390 $underline = write_on_underline( $underline, $pos - $offset, '^' );
23393 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
23394 my $pos_prev = $$rpretoken_map[$last_nonblank_i];
23396 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
23397 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
23400 $num = $pos - $pos_prev;
23402 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
23405 write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
23406 $trailer = " (previous token underlined)";
23408 warning( $numbered_line . "\n" );
23409 warning( $underline . "\n" );
23410 warning( $msg . $trailer . "\n" );
23415 sub is_non_structural_brace {
23417 # Decide if a brace or bracket is structural or non-structural
23418 # by looking at the previous token and type
23419 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
23421 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
23422 # Tentatively deactivated because it caused the wrong operator expectation
23424 # $user = @vars[1] / 100;
23425 # Must update sub operator_expected before re-implementing.
23426 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
23430 # NOTE: braces after type characters start code blocks, but for
23431 # simplicity these are not identified as such. See also
23432 # sub code_block_type
23433 # if ($last_nonblank_type eq 't') {return 0}
23435 # otherwise, it is non-structural if it is decorated
23436 # by type information.
23437 # For example, the '{' here is non-structural: ${xxx}
23439 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
23441 # or if we follow a hash or array closing curly brace or bracket
23442 # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
23443 # because the first '}' would have been given type 'R'
23444 || $last_nonblank_type =~ /^([R\]])$/
23448 #########i#############################################################
23449 # Tokenizer routines for tracking container nesting depths
23450 #######################################################################
23452 # The following routines keep track of nesting depths of the nesting
23453 # types, ( [ { and ?. This is necessary for determining the indentation
23454 # level, and also for debugging programs. Not only do they keep track of
23455 # nesting depths of the individual brace types, but they check that each
23456 # of the other brace types is balanced within matching pairs. For
23457 # example, if the program sees this sequence:
23461 # then it can determine that there is an extra left paren somewhere
23462 # between the { and the }. And so on with every other possible
23463 # combination of outer and inner brace types. For another
23468 # which has an extra ] within the parens.
23470 # The brace types have indexes 0 .. 3 which are indexes into
23473 # The pair ? : are treated as just another nesting type, with ? acting
23474 # as the opening brace and : acting as the closing brace.
23478 # $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
23480 # saves the nesting depth of brace type $b (where $b is either of the other
23481 # nesting types) when brace type $a enters a new depth. When this depth
23482 # decreases, a check is made that the current depth of brace types $b is
23483 # unchanged, or otherwise there must have been an error. This can
23484 # be very useful for localizing errors, particularly when perl runs to
23485 # the end of a large file (such as this one) and announces that there
23486 # is a problem somewhere.
23488 # A numerical sequence number is maintained for every nesting type,
23489 # so that each matching pair can be uniquely identified in a simple
23492 sub increase_nesting_depth {
23493 my ( $a, $pos ) = @_;
23495 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
23496 # @current_sequence_number, @depth_array, @starting_line_of_current_depth
23498 $current_depth[$a]++;
23499 my $input_line_number = $tokenizer_self->{_last_line_number};
23500 my $input_line = $tokenizer_self->{_line_text};
23502 # Sequence numbers increment by number of items. This keeps
23503 # a unique set of numbers but still allows the relative location
23504 # of any type to be determined.
23505 $nesting_sequence_number[$a] += scalar(@closing_brace_names);
23506 my $seqno = $nesting_sequence_number[$a];
23507 $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
23509 $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
23510 [ $input_line_number, $input_line, $pos ];
23512 for $b ( 0 .. $#closing_brace_names ) {
23513 next if ( $b == $a );
23514 $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
23519 sub decrease_nesting_depth {
23521 my ( $a, $pos ) = @_;
23523 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
23524 # @current_sequence_number, @depth_array, @starting_line_of_current_depth
23527 my $input_line_number = $tokenizer_self->{_last_line_number};
23528 my $input_line = $tokenizer_self->{_line_text};
23530 if ( $current_depth[$a] > 0 ) {
23532 $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
23534 # check that any brace types $b contained within are balanced
23535 for $b ( 0 .. $#closing_brace_names ) {
23536 next if ( $b == $a );
23538 unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
23539 $current_depth[$b] )
23541 my $diff = $current_depth[$b] -
23542 $depth_array[$a][$b][ $current_depth[$a] ];
23544 # don't whine too many times
23545 my $saw_brace_error = get_saw_brace_error();
23547 $saw_brace_error <= MAX_NAG_MESSAGES
23549 # if too many closing types have occured, we probably
23550 # already caught this error
23551 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
23554 interrupt_logfile();
23556 $starting_line_of_current_depth[$a][ $current_depth[$a] ];
23558 my $rel = [ $input_line_number, $input_line, $pos ];
23562 if ( $diff == 1 || $diff == -1 ) {
23570 ? $opening_brace_names[$b]
23571 : $closing_brace_names[$b];
23572 write_error_indicator_pair( @$rsl, '^' );
23574 Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
23579 $starting_line_of_current_depth[$b]
23580 [ $current_depth[$b] ];
23583 " The most recent un-matched $bname is on line $ml\n";
23584 write_error_indicator_pair( @$rml, '^' );
23586 write_error_indicator_pair( @$rel, '^' );
23590 increment_brace_error();
23593 $current_depth[$a]--;
23597 my $saw_brace_error = get_saw_brace_error();
23598 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
23600 There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
23602 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
23604 increment_brace_error();
23609 sub check_final_nesting_depths {
23612 # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
23614 for $a ( 0 .. $#closing_brace_names ) {
23616 if ( $current_depth[$a] ) {
23617 my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
23620 Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
23621 The most recent un-matched $opening_brace_names[$a] is on line $sl
23623 indicate_error( $msg, @$rsl, '^' );
23624 increment_brace_error();
23629 #########i#############################################################
23630 # Tokenizer routines for looking ahead in input stream
23631 #######################################################################
23633 sub peek_ahead_for_n_nonblank_pre_tokens {
23635 # returns next n pretokens if they exist
23636 # returns undef's if hits eof without seeing any pretokens
23637 # USES GLOBAL VARIABLES: $tokenizer_self
23638 my $max_pretokens = shift;
23641 my ( $rpre_tokens, $rmap, $rpre_types );
23643 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
23645 $line =~ s/^\s*//; # trim leading blanks
23646 next if ( length($line) <= 0 ); # skip blank
23647 next if ( $line =~ /^#/ ); # skip comment
23648 ( $rpre_tokens, $rmap, $rpre_types ) =
23649 pre_tokenize( $line, $max_pretokens );
23652 return ( $rpre_tokens, $rpre_types );
23655 # look ahead for next non-blank, non-comment line of code
23656 sub peek_ahead_for_nonblank_token {
23658 # USES GLOBAL VARIABLES: $tokenizer_self
23659 my ( $rtokens, $max_token_index ) = @_;
23663 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
23665 $line =~ s/^\s*//; # trim leading blanks
23666 next if ( length($line) <= 0 ); # skip blank
23667 next if ( $line =~ /^#/ ); # skip comment
23668 my ( $rtok, $rmap, $rtype ) =
23669 pre_tokenize( $line, 2 ); # only need 2 pre-tokens
23670 my $j = $max_token_index + 1;
23673 foreach $tok (@$rtok) {
23674 last if ( $tok =~ "\n" );
23675 $$rtokens[ ++$j ] = $tok;
23682 #########i#############################################################
23683 # Tokenizer guessing routines for ambiguous situations
23684 #######################################################################
23686 sub guess_if_pattern_or_conditional {
23688 # this routine is called when we have encountered a ? following an
23689 # unknown bareword, and we must decide if it starts a pattern or not
23690 # input parameters:
23691 # $i - token index of the ? starting possible pattern
23692 # output parameters:
23693 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
23694 # msg = a warning or diagnostic message
23695 # USES GLOBAL VARIABLES: $last_nonblank_token
23696 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
23697 my $is_pattern = 0;
23698 my $msg = "guessing that ? after $last_nonblank_token starts a ";
23700 if ( $i >= $max_token_index ) {
23701 $msg .= "conditional (no end to pattern found on the line)\n";
23706 my $next_token = $$rtokens[$i]; # first token after ?
23708 # look for a possible ending ? on this line..
23710 my $quote_depth = 0;
23711 my $quote_character = '';
23715 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
23718 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
23719 $quote_pos, $quote_depth, $max_token_index );
23723 # we didn't find an ending ? on this line,
23724 # so we bias towards conditional
23726 $msg .= "conditional (no ending ? on this line)\n";
23728 # we found an ending ?, so we bias towards a pattern
23732 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
23734 $msg .= "pattern (found ending ? and pattern expected)\n";
23737 $msg .= "pattern (uncertain, but found ending ?)\n";
23741 return ( $is_pattern, $msg );
23744 sub guess_if_pattern_or_division {
23746 # this routine is called when we have encountered a / following an
23747 # unknown bareword, and we must decide if it starts a pattern or is a
23749 # input parameters:
23750 # $i - token index of the / starting possible pattern
23751 # output parameters:
23752 # $is_pattern = 0 if probably division, =1 if probably a pattern
23753 # msg = a warning or diagnostic message
23754 # USES GLOBAL VARIABLES: $last_nonblank_token
23755 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
23756 my $is_pattern = 0;
23757 my $msg = "guessing that / after $last_nonblank_token starts a ";
23759 if ( $i >= $max_token_index ) {
23760 "division (no end to pattern found on the line)\n";
23764 my $divide_expected =
23765 numerator_expected( $i, $rtokens, $max_token_index );
23767 my $next_token = $$rtokens[$i]; # first token after slash
23769 # look for a possible ending / on this line..
23771 my $quote_depth = 0;
23772 my $quote_character = '';
23776 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
23779 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
23780 $quote_pos, $quote_depth, $max_token_index );
23784 # we didn't find an ending / on this line,
23785 # so we bias towards division
23786 if ( $divide_expected >= 0 ) {
23788 $msg .= "division (no ending / on this line)\n";
23791 $msg = "multi-line pattern (division not possible)\n";
23797 # we found an ending /, so we bias towards a pattern
23800 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
23802 if ( $divide_expected >= 0 ) {
23804 if ( $i - $ibeg > 60 ) {
23805 $msg .= "division (matching / too distant)\n";
23809 $msg .= "pattern (but division possible too)\n";
23815 $msg .= "pattern (division not possible)\n";
23820 if ( $divide_expected >= 0 ) {
23822 $msg .= "division (pattern not possible)\n";
23827 "pattern (uncertain, but division would not work here)\n";
23832 return ( $is_pattern, $msg );
23835 # try to resolve here-doc vs. shift by looking ahead for
23836 # non-code or the end token (currently only looks for end token)
23837 # returns 1 if it is probably a here doc, 0 if not
23838 sub guess_if_here_doc {
23840 # This is how many lines we will search for a target as part of the
23841 # guessing strategy. It is a constant because there is probably
23842 # little reason to change it.
23843 # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
23845 use constant HERE_DOC_WINDOW => 40;
23847 my $next_token = shift;
23848 my $here_doc_expected = 0;
23851 my $msg = "checking <<";
23853 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
23857 if ( $line =~ /^$next_token$/ ) {
23858 $msg .= " -- found target $next_token ahead $k lines\n";
23859 $here_doc_expected = 1; # got it
23862 last if ( $k >= HERE_DOC_WINDOW );
23865 unless ($here_doc_expected) {
23867 if ( !defined($line) ) {
23868 $here_doc_expected = -1; # hit eof without seeing target
23869 $msg .= " -- must be shift; target $next_token not in file\n";
23872 else { # still unsure..taking a wild guess
23874 if ( !$is_constant{$current_package}{$next_token} ) {
23875 $here_doc_expected = 1;
23877 " -- guessing it's a here-doc ($next_token not a constant)\n";
23881 " -- guessing it's a shift ($next_token is a constant)\n";
23885 write_logfile_entry($msg);
23886 return $here_doc_expected;
23889 #########i#############################################################
23890 # Tokenizer Routines for scanning identifiers and related items
23891 #######################################################################
23893 sub scan_bare_identifier_do {
23895 # this routine is called to scan a token starting with an alphanumeric
23896 # variable or package separator, :: or '.
23897 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
23898 # $last_nonblank_type,@paren_type, $paren_depth
23900 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
23904 my $package = undef;
23908 # we have to back up one pretoken at a :: since each : is one pretoken
23909 if ( $tok eq '::' ) { $i_beg-- }
23910 if ( $tok eq '->' ) { $i_beg-- }
23911 my $pos_beg = $$rtoken_map[$i_beg];
23912 pos($input_line) = $pos_beg;
23919 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
23921 my $pos = pos($input_line);
23922 my $numc = $pos - $pos_beg;
23923 $tok = substr( $input_line, $pos_beg, $numc );
23925 # type 'w' includes anything without leading type info
23926 # ($,%,@,*) including something like abc::def::ghi
23930 if ( defined($2) ) { $sub_name = $2; }
23931 if ( defined($1) ) {
23934 # patch: don't allow isolated package name which just ends
23935 # in the old style package separator (single quote). Example:
23937 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
23941 $package =~ s/\'/::/g;
23942 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
23943 $package =~ s/::$//;
23946 $package = $current_package;
23948 if ( $is_keyword{$tok} ) {
23953 # if it is a bareword..
23954 if ( $type eq 'w' ) {
23956 # check for v-string with leading 'v' type character
23957 # (This seems to have presidence over filehandle, type 'Y')
23958 if ( $tok =~ /^v\d[_\d]*$/ ) {
23960 # we only have the first part - something like 'v101' -
23962 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
23963 $pos = pos($input_line);
23964 $numc = $pos - $pos_beg;
23965 $tok = substr( $input_line, $pos_beg, $numc );
23969 # warn if this version can't handle v-strings
23970 report_v_string($tok);
23973 elsif ( $is_constant{$package}{$sub_name} ) {
23977 # bareword after sort has implied empty prototype; for example:
23978 # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
23979 # This has priority over whatever the user has specified.
23980 elsif ($last_nonblank_token eq 'sort'
23981 && $last_nonblank_type eq 'k' )
23986 # Note: strangely, perl does not seem to really let you create
23987 # functions which act like eval and do, in the sense that eval
23988 # and do may have operators following the final }, but any operators
23989 # that you create with prototype (&) apparently do not allow
23990 # trailing operators, only terms. This seems strange.
23991 # If this ever changes, here is the update
23992 # to make perltidy behave accordingly:
23994 # elsif ( $is_block_function{$package}{$tok} ) {
23995 # $tok='eval'; # patch to do braces like eval - doesn't work
23998 # FIXME: This could become a separate type to allow for different
24000 elsif ( $is_block_function{$package}{$sub_name} ) {
24004 elsif ( $is_block_list_function{$package}{$sub_name} ) {
24007 elsif ( $is_user_function{$package}{$sub_name} ) {
24009 $prototype = $user_function_prototype{$package}{$sub_name};
24012 # check for indirect object
24015 # added 2001-03-27: must not be followed immediately by '('
24017 ( $input_line !~ m/\G\(/gc )
24022 # preceded by keyword like 'print', 'printf' and friends
24023 $is_indirect_object_taker{$last_nonblank_token}
24025 # or preceded by something like 'print(' or 'printf('
24027 ( $last_nonblank_token eq '(' )
24028 && $is_indirect_object_taker{ $paren_type[$paren_depth]
24036 # may not be indirect object unless followed by a space
24037 if ( $input_line =~ m/\G\s+/gc ) {
24041 # Perl's indirect object notation is a very bad
24042 # thing and can cause subtle bugs, especially for
24043 # beginning programmers. And I haven't even been
24044 # able to figure out a sane warning scheme which
24045 # doesn't get in the way of good scripts.
24047 # Complain if a filehandle has any lower case
24048 # letters. This is suggested good practice, but the
24049 # main reason for this warning is that prior to
24050 # release 20010328, perltidy incorrectly parsed a
24051 # function call after a print/printf, with the
24052 # result that a space got added before the opening
24053 # paren, thereby converting the function name to a
24054 # filehandle according to perl's weird rules. This
24055 # will not usually generate a syntax error, so this
24056 # is a potentially serious bug. By warning
24057 # of filehandles with any lower case letters,
24058 # followed by opening parens, we will help the user
24059 # find almost all of these older errors.
24060 # use 'sub_name' because something like
24061 # main::MYHANDLE is ok for filehandle
24062 if ( $sub_name =~ /[a-z]/ ) {
24064 # could be bug caused by older perltidy if
24066 if ( $input_line =~ m/\G\s*\(/gc ) {
24068 "Caution: unknown word '$tok' in indirect object slot\n"
24074 # bareword not followed by a space -- may not be filehandle
24075 # (may be function call defined in a 'use' statement)
24082 # Now we must convert back from character position
24083 # to pre_token index.
24084 # I don't think an error flag can occur here ..but who knows
24087 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
24089 warning("scan_bare_identifier: Possibly invalid tokenization\n");
24093 # no match but line not blank - could be syntax error
24094 # perl will take '::' alone without complaint
24098 # change this warning to log message if it becomes annoying
24099 warning("didn't find identifier after leading ::\n");
24101 return ( $i, $tok, $type, $prototype );
24106 # This is the new scanner and will eventually replace scan_identifier.
24107 # Only type 'sub' and 'package' are implemented.
24108 # Token types $ * % @ & -> are not yet implemented.
24110 # Scan identifier following a type token.
24111 # The type of call depends on $id_scan_state: $id_scan_state = ''
24112 # for starting call, in which case $tok must be the token defining
24115 # If the type token is the last nonblank token on the line, a value
24116 # of $id_scan_state = $tok is returned, indicating that further
24117 # calls must be made to get the identifier. If the type token is
24118 # not the last nonblank token on the line, the identifier is
24119 # scanned and handled and a value of '' is returned.
24120 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
24121 # $statement_type, $tokenizer_self
24123 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
24127 my ( $i_beg, $pos_beg );
24129 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
24130 #my ($a,$b,$c) = caller;
24131 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
24133 # on re-entry, start scanning at first token on the line
24134 if ($id_scan_state) {
24139 # on initial entry, start scanning just after type token
24142 $id_scan_state = $tok;
24146 # find $i_beg = index of next nonblank token,
24147 # and handle empty lines
24148 my $blank_line = 0;
24149 my $next_nonblank_token = $$rtokens[$i_beg];
24150 if ( $i_beg > $max_token_index ) {
24155 # only a '#' immediately after a '$' is not a comment
24156 if ( $next_nonblank_token eq '#' ) {
24157 unless ( $tok eq '$' ) {
24162 if ( $next_nonblank_token =~ /^\s/ ) {
24163 ( $next_nonblank_token, $i_beg ) =
24164 find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
24165 $max_token_index );
24166 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
24172 # handle non-blank line; identifier, if any, must follow
24173 unless ($blank_line) {
24175 if ( $id_scan_state eq 'sub' ) {
24176 ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
24177 $input_line, $i, $i_beg,
24178 $tok, $type, $rtokens,
24179 $rtoken_map, $id_scan_state, $max_token_index
24183 elsif ( $id_scan_state eq 'package' ) {
24184 ( $i, $tok, $type ) =
24185 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
24186 $rtoken_map, $max_token_index );
24187 $id_scan_state = '';
24191 warning("invalid token in scan_id: $tok\n");
24192 $id_scan_state = '';
24196 if ( $id_scan_state && ( !defined($type) || !$type ) ) {
24198 # shouldn't happen:
24200 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
24202 report_definite_bug();
24205 TOKENIZER_DEBUG_FLAG_NSCAN && do {
24207 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
24209 return ( $i, $tok, $type, $id_scan_state );
24212 sub check_prototype {
24213 my ( $proto, $package, $subname ) = @_;
24214 return unless ( defined($package) && defined($subname) );
24215 if ( defined($proto) ) {
24216 $proto =~ s/^\s*\(\s*//;
24217 $proto =~ s/\s*\)$//;
24219 $is_user_function{$package}{$subname} = 1;
24220 $user_function_prototype{$package}{$subname} = "($proto)";
24222 # prototypes containing '&' must be treated specially..
24223 if ( $proto =~ /\&/ ) {
24225 # right curly braces of prototypes ending in
24226 # '&' may be followed by an operator
24227 if ( $proto =~ /\&$/ ) {
24228 $is_block_function{$package}{$subname} = 1;
24231 # right curly braces of prototypes NOT ending in
24232 # '&' may NOT be followed by an operator
24233 elsif ( $proto !~ /\&$/ ) {
24234 $is_block_list_function{$package}{$subname} = 1;
24239 $is_constant{$package}{$subname} = 1;
24243 $is_user_function{$package}{$subname} = 1;
24247 sub do_scan_package {
24249 # do_scan_package parses a package name
24250 # it is called with $i_beg equal to the index of the first nonblank
24251 # token following a 'package' token.
24252 # USES GLOBAL VARIABLES: $current_package,
24254 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
24257 my $package = undef;
24258 my $pos_beg = $$rtoken_map[$i_beg];
24259 pos($input_line) = $pos_beg;
24261 # handle non-blank line; package name, if any, must follow
24262 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
24264 $package = ( defined($1) && $1 ) ? $1 : 'main';
24265 $package =~ s/\'/::/g;
24266 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
24267 $package =~ s/::$//;
24268 my $pos = pos($input_line);
24269 my $numc = $pos - $pos_beg;
24270 $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
24273 # Now we must convert back from character position
24274 # to pre_token index.
24275 # I don't think an error flag can occur here ..but ?
24278 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
24279 if ($error) { warning("Possibly invalid package\n") }
24280 $current_package = $package;
24283 my ( $next_nonblank_token, $i_next ) =
24284 find_next_nonblank_token( $i, $rtokens, $max_token_index );
24285 if ( $next_nonblank_token !~ /^[;\}]$/ ) {
24287 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
24292 # no match but line not blank --
24293 # could be a label with name package, like package: , for example.
24298 return ( $i, $tok, $type );
24301 sub scan_identifier_do {
24303 # This routine assembles tokens into identifiers. It maintains a
24304 # scan state, id_scan_state. It updates id_scan_state based upon
24305 # current id_scan_state and token, and returns an updated
24306 # id_scan_state and the next index after the identifier.
24307 # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
24308 # $last_nonblank_type
24310 my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index ) = @_;
24313 my $tok_begin = $$rtokens[$i_begin];
24314 if ( $tok_begin eq ':' ) { $tok_begin = '::' }
24315 my $id_scan_state_begin = $id_scan_state;
24316 my $identifier_begin = $identifier;
24317 my $tok = $tok_begin;
24320 # these flags will be used to help figure out the type:
24321 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
24324 # allow old package separator (') except in 'use' statement
24325 my $allow_tick = ( $last_nonblank_token ne 'use' );
24327 # get started by defining a type and a state if necessary
24328 unless ($id_scan_state) {
24329 $context = UNKNOWN_CONTEXT;
24331 # fixup for digraph
24332 if ( $tok eq '>' ) {
24336 $identifier = $tok;
24338 if ( $tok eq '$' || $tok eq '*' ) {
24339 $id_scan_state = '$';
24340 $context = SCALAR_CONTEXT;
24342 elsif ( $tok eq '%' || $tok eq '@' ) {
24343 $id_scan_state = '$';
24344 $context = LIST_CONTEXT;
24346 elsif ( $tok eq '&' ) {
24347 $id_scan_state = '&';
24349 elsif ( $tok eq 'sub' or $tok eq 'package' ) {
24350 $saw_alpha = 0; # 'sub' is considered type info here
24351 $id_scan_state = '$';
24352 $identifier .= ' '; # need a space to separate sub from sub name
24354 elsif ( $tok eq '::' ) {
24355 $id_scan_state = 'A';
24357 elsif ( $tok =~ /^[A-Za-z_]/ ) {
24358 $id_scan_state = ':';
24360 elsif ( $tok eq '->' ) {
24361 $id_scan_state = '$';
24366 my ( $a, $b, $c ) = caller;
24367 warning("Program Bug: scan_identifier given bad token = $tok \n");
24368 warning(" called from sub $a line: $c\n");
24369 report_definite_bug();
24371 $saw_type = !$saw_alpha;
24375 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
24378 # now loop to gather the identifier
24381 while ( $i < $max_token_index ) {
24382 $i_save = $i unless ( $tok =~ /^\s*$/ );
24383 $tok = $$rtokens[ ++$i ];
24385 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
24390 if ( $id_scan_state eq '$' ) { # starting variable name
24392 if ( $tok eq '$' ) {
24394 $identifier .= $tok;
24396 # we've got a punctuation variable if end of line (punct.t)
24397 if ( $i == $max_token_index ) {
24399 $id_scan_state = '';
24403 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
24405 $id_scan_state = ':'; # now need ::
24406 $identifier .= $tok;
24408 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
24410 $id_scan_state = ':'; # now need ::
24411 $identifier .= $tok;
24413 # Perl will accept leading digits in identifiers,
24414 # although they may not always produce useful results.
24415 # Something like $main::0 is ok. But this also works:
24417 # sub howdy::123::bubba{ print "bubba $54321!\n" }
24418 # howdy::123::bubba();
24421 elsif ( $tok =~ /^[0-9]/ ) { # numeric
24423 $id_scan_state = ':'; # now need ::
24424 $identifier .= $tok;
24426 elsif ( $tok eq '::' ) {
24427 $id_scan_state = 'A';
24428 $identifier .= $tok;
24430 elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array
24431 $identifier .= $tok; # keep same state, a $ could follow
24433 elsif ( $tok eq '{' ) {
24435 # check for something like ${#} or ${©}
24436 if ( $identifier eq '$'
24437 && $i + 2 <= $max_token_index
24438 && $$rtokens[ $i + 2 ] eq '}'
24439 && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
24441 my $next2 = $$rtokens[ $i + 2 ];
24442 my $next1 = $$rtokens[ $i + 1 ];
24443 $identifier .= $tok . $next1 . $next2;
24445 $id_scan_state = '';
24449 # skip something like ${xxx} or ->{
24450 $id_scan_state = '';
24452 # if this is the first token of a line, any tokens for this
24453 # identifier have already been accumulated
24454 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
24459 # space ok after leading $ % * & @
24460 elsif ( $tok =~ /^\s*$/ ) {
24462 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
24464 if ( length($identifier) > 1 ) {
24465 $id_scan_state = '';
24467 $type = 'i'; # probably punctuation variable
24472 # spaces after $'s are common, and space after @
24473 # is harmless, so only complain about space
24474 # after other type characters. Space after $ and
24475 # @ will be removed in formatting. Report space
24476 # after % and * because they might indicate a
24477 # parsing error. In other words '% ' might be a
24478 # modulo operator. Delete this warning if it
24480 if ( $identifier !~ /^[\@\$]$/ ) {
24482 "Space in identifier, following $identifier\n";
24488 # space after '->' is ok
24490 elsif ( $tok eq '^' ) {
24492 # check for some special variables like $^W
24493 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
24494 $identifier .= $tok;
24495 $id_scan_state = 'A';
24497 # Perl accepts '$^]' or '@^]', but
24498 # there must not be a space before the ']'.
24499 my $next1 = $$rtokens[ $i + 1 ];
24500 if ( $next1 eq ']' ) {
24502 $identifier .= $next1;
24503 $id_scan_state = "";
24508 $id_scan_state = '';
24511 else { # something else
24513 # check for various punctuation variables
24514 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
24515 $identifier .= $tok;
24518 elsif ( $identifier eq '$#' ) {
24520 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
24522 # perl seems to allow just these: $#: $#- $#+
24523 elsif ( $tok =~ /^[\:\-\+]$/ ) {
24525 $identifier .= $tok;
24529 write_logfile_entry( 'Use of $# is deprecated' . "\n" );
24532 elsif ( $identifier eq '$$' ) {
24534 # perl does not allow references to punctuation
24535 # variables without braces. For example, this
24539 # You would have to use
24543 if ( $tok eq '{' ) { $type = 't' }
24544 else { $type = 'i' }
24546 elsif ( $identifier eq '->' ) {
24551 if ( length($identifier) == 1 ) { $identifier = ''; }
24553 $id_scan_state = '';
24557 elsif ( $id_scan_state eq '&' ) { # starting sub call?
24559 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric ..
24560 $id_scan_state = ':'; # now need ::
24562 $identifier .= $tok;
24564 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
24565 $id_scan_state = ':'; # now need ::
24567 $identifier .= $tok;
24569 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
24570 $id_scan_state = ':'; # now need ::
24572 $identifier .= $tok;
24574 elsif ( $tok =~ /^\s*$/ ) { # allow space
24576 elsif ( $tok eq '::' ) { # leading ::
24577 $id_scan_state = 'A'; # accept alpha next
24578 $identifier .= $tok;
24580 elsif ( $tok eq '{' ) {
24581 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
24583 $id_scan_state = '';
24588 # punctuation variable?
24589 # testfile: cunningham4.pl
24590 if ( $identifier eq '&' ) {
24591 $identifier .= $tok;
24598 $id_scan_state = '';
24602 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)
24604 if ( $tok =~ /^[A-Za-z_]/ ) { # found it
24605 $identifier .= $tok;
24606 $id_scan_state = ':'; # now need ::
24609 elsif ( $tok eq "'" && $allow_tick ) {
24610 $identifier .= $tok;
24611 $id_scan_state = ':'; # now need ::
24614 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
24615 $identifier .= $tok;
24616 $id_scan_state = ':'; # now need ::
24619 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
24620 $id_scan_state = '(';
24621 $identifier .= $tok;
24623 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
24624 $id_scan_state = ')';
24625 $identifier .= $tok;
24628 $id_scan_state = '';
24633 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha
24635 if ( $tok eq '::' ) { # got it
24636 $identifier .= $tok;
24637 $id_scan_state = 'A'; # now require alpha
24639 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here
24640 $identifier .= $tok;
24641 $id_scan_state = ':'; # now need ::
24644 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
24645 $identifier .= $tok;
24646 $id_scan_state = ':'; # now need ::
24649 elsif ( $tok eq "'" && $allow_tick ) { # tick
24651 if ( $is_keyword{$identifier} ) {
24652 $id_scan_state = ''; # that's all
24656 $identifier .= $tok;
24659 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
24660 $id_scan_state = '(';
24661 $identifier .= $tok;
24663 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
24664 $id_scan_state = ')';
24665 $identifier .= $tok;
24668 $id_scan_state = ''; # that's all
24673 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype
24675 if ( $tok eq '(' ) { # got it
24676 $identifier .= $tok;
24677 $id_scan_state = ')'; # now find the end of it
24679 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
24680 $identifier .= $tok;
24683 $id_scan_state = ''; # that's all - no prototype
24688 elsif ( $id_scan_state eq ')' ) { # looking for ) to end
24690 if ( $tok eq ')' ) { # got it
24691 $identifier .= $tok;
24692 $id_scan_state = ''; # all done
24695 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
24696 $identifier .= $tok;
24698 else { # probable error in script, but keep going
24699 warning("Unexpected '$tok' while seeking end of prototype\n");
24700 $identifier .= $tok;
24703 else { # can get here due to error in initialization
24704 $id_scan_state = '';
24710 if ( $id_scan_state eq ')' ) {
24711 warning("Hit end of line while seeking ) to end prototype\n");
24714 # once we enter the actual identifier, it may not extend beyond
24715 # the end of the current line
24716 if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
24717 $id_scan_state = '';
24719 if ( $i < 0 ) { $i = 0 }
24726 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
24729 else { $type = 'i' }
24731 elsif ( $identifier eq '->' ) {
24735 ( length($identifier) > 1 )
24737 # In something like '@$=' we have an identifier '@$'
24738 # In something like '$${' we have type '$$' (and only
24739 # part of an identifier)
24740 && !( $identifier =~ /\$$/ && $tok eq '{' )
24741 && ( $identifier !~ /^(sub |package )$/ )
24746 else { $type = 't' }
24748 elsif ($saw_alpha) {
24750 # type 'w' includes anything without leading type info
24751 # ($,%,@,*) including something like abc::def::ghi
24756 } # this can happen on a restart
24760 $tok = $identifier;
24761 if ($message) { write_logfile_entry($message) }
24768 TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
24769 my ( $a, $b, $c ) = caller;
24771 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
24773 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
24775 return ( $i, $tok, $type, $id_scan_state, $identifier );
24780 # saved package and subnames in case prototype is on separate line
24781 my ( $package_saved, $subname_saved );
24785 # do_scan_sub parses a sub name and prototype
24786 # it is called with $i_beg equal to the index of the first nonblank
24787 # token following a 'sub' token.
24789 # TODO: add future error checks to be sure we have a valid
24790 # sub name. For example, 'sub &doit' is wrong. Also, be sure
24791 # a name is given if and only if a non-anonymous sub is
24793 # USES GLOBAL VARS: $current_package, $last_nonblank_token,
24794 # $in_attribute_list, %saw_function_definition,
24798 $input_line, $i, $i_beg,
24799 $tok, $type, $rtokens,
24800 $rtoken_map, $id_scan_state, $max_token_index
24802 $id_scan_state = ""; # normally we get everything in one call
24803 my $subname = undef;
24804 my $package = undef;
24809 my $pos_beg = $$rtoken_map[$i_beg];
24810 pos($input_line) = $pos_beg;
24812 # sub NAME PROTO ATTRS
24814 $input_line =~ m/\G\s*
24815 ((?:\w*(?:'|::))*) # package - something that ends in :: or '
24816 (\w+) # NAME - required
24817 (\s*\([^){]*\))? # PROTO - something in parens
24818 (\s*:)? # ATTRS - leading : of attribute list
24827 $package = ( defined($1) && $1 ) ? $1 : $current_package;
24828 $package =~ s/\'/::/g;
24829 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
24830 $package =~ s/::$//;
24831 my $pos = pos($input_line);
24832 my $numc = $pos - $pos_beg;
24833 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
24837 # Look for prototype/attributes not preceded on this line by subname;
24838 # This might be an anonymous sub with attributes,
24839 # or a prototype on a separate line from its sub name
24841 $input_line =~ m/\G(\s*\([^){]*\))? # PROTO
24842 (\s*:)? # ATTRS leading ':'
24851 # Handle prototype on separate line from subname
24852 if ($subname_saved) {
24853 $package = $package_saved;
24854 $subname = $subname_saved;
24855 $tok = $last_nonblank_token;
24862 # ATTRS: if there are attributes, back up and let the ':' be
24863 # found later by the scanner.
24864 my $pos = pos($input_line);
24866 $pos -= length($attrs);
24869 my $next_nonblank_token = $tok;
24871 # catch case of line with leading ATTR ':' after anonymous sub
24872 if ( $pos == $pos_beg && $tok eq ':' ) {
24874 $in_attribute_list = 1;
24877 # We must convert back from character position
24878 # to pre_token index.
24881 # I don't think an error flag can occur here ..but ?
24884 inverse_pretoken_map( $i, $pos, $rtoken_map,
24885 $max_token_index );
24886 if ($error) { warning("Possibly invalid sub\n") }
24888 # check for multiple definitions of a sub
24889 ( $next_nonblank_token, my $i_next ) =
24890 find_next_nonblank_token_on_this_line( $i, $rtokens,
24891 $max_token_index );
24894 if ( $next_nonblank_token =~ /^(\s*|#)$/ )
24895 { # skip blank or side comment
24896 my ( $rpre_tokens, $rpre_types ) =
24897 peek_ahead_for_n_nonblank_pre_tokens(1);
24898 if ( defined($rpre_tokens) && @$rpre_tokens ) {
24899 $next_nonblank_token = $rpre_tokens->[0];
24902 $next_nonblank_token = '}';
24905 $package_saved = "";
24906 $subname_saved = "";
24907 if ( $next_nonblank_token eq '{' ) {
24909 if ( $saw_function_definition{$package}{$subname} ) {
24910 my $lno = $saw_function_definition{$package}{$subname};
24912 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
24915 $saw_function_definition{$package}{$subname} =
24916 $tokenizer_self->{_last_line_number};
24919 elsif ( $next_nonblank_token eq ';' ) {
24921 elsif ( $next_nonblank_token eq '}' ) {
24924 # ATTRS - if an attribute list follows, remember the name
24925 # of the sub so the next opening brace can be labeled.
24926 # Setting 'statement_type' causes any ':'s to introduce
24928 elsif ( $next_nonblank_token eq ':' ) {
24929 $statement_type = $tok;
24932 # see if PROTO follows on another line:
24933 elsif ( $next_nonblank_token eq '(' ) {
24934 if ( $attrs || $proto ) {
24936 "unexpected '(' after definition or declaration of sub '$subname'\n"
24940 $id_scan_state = 'sub'; # we must come back to get proto
24941 $statement_type = $tok;
24942 $package_saved = $package;
24943 $subname_saved = $subname;
24946 elsif ($next_nonblank_token) { # EOF technically ok
24948 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
24951 check_prototype( $proto, $package, $subname );
24954 # no match but line not blank
24957 return ( $i, $tok, $type, $id_scan_state );
24961 #########i###############################################################
24962 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
24963 #########################################################################
24965 sub find_next_nonblank_token {
24966 my ( $i, $rtokens, $max_token_index ) = @_;
24968 if ( $i >= $max_token_index ) {
24969 if ( !peeked_ahead() ) {
24972 peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
24975 my $next_nonblank_token = $$rtokens[ ++$i ];
24977 if ( $next_nonblank_token =~ /^\s*$/ ) {
24978 $next_nonblank_token = $$rtokens[ ++$i ];
24980 return ( $next_nonblank_token, $i );
24983 sub numerator_expected {
24985 # this is a filter for a possible numerator, in support of guessing
24986 # for the / pattern delimiter token.
24991 # Note: I am using the convention that variables ending in
24992 # _expected have these 3 possible values.
24993 my ( $i, $rtokens, $max_token_index ) = @_;
24994 my $next_token = $$rtokens[ $i + 1 ];
24995 if ( $next_token eq '=' ) { $i++; } # handle /=
24996 my ( $next_nonblank_token, $i_next ) =
24997 find_next_nonblank_token( $i, $rtokens, $max_token_index );
24999 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
25004 if ( $next_nonblank_token =~ /^\s*$/ ) {
25013 sub pattern_expected {
25015 # This is the start of a filter for a possible pattern.
25016 # It looks at the token after a possbible pattern and tries to
25017 # determine if that token could end a pattern.
25022 my ( $i, $rtokens, $max_token_index ) = @_;
25023 my $next_token = $$rtokens[ $i + 1 ];
25024 if ( $next_token =~ /^[cgimosx]/ ) { $i++; } # skip possible modifier
25025 my ( $next_nonblank_token, $i_next ) =
25026 find_next_nonblank_token( $i, $rtokens, $max_token_index );
25028 # list of tokens which may follow a pattern
25029 # (can probably be expanded)
25030 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
25036 if ( $next_nonblank_token =~ /^\s*$/ ) {
25045 sub find_next_nonblank_token_on_this_line {
25046 my ( $i, $rtokens, $max_token_index ) = @_;
25047 my $next_nonblank_token;
25049 if ( $i < $max_token_index ) {
25050 $next_nonblank_token = $$rtokens[ ++$i ];
25052 if ( $next_nonblank_token =~ /^\s*$/ ) {
25054 if ( $i < $max_token_index ) {
25055 $next_nonblank_token = $$rtokens[ ++$i ];
25060 $next_nonblank_token = "";
25062 return ( $next_nonblank_token, $i );
25065 sub find_angle_operator_termination {
25067 # We are looking at a '<' and want to know if it is an angle operator.
25068 # We are to return:
25069 # $i = pretoken index of ending '>' if found, current $i otherwise
25070 # $type = 'Q' if found, '>' otherwise
25071 my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
25074 pos($input_line) = 1 + $$rtoken_map[$i];
25078 # we just have to find the next '>' if a term is expected
25079 if ( $expecting == TERM ) { $filter = '[\>]' }
25081 # we have to guess if we don't know what is expected
25082 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
25084 # shouldn't happen - we shouldn't be here if operator is expected
25085 else { warning("Program Bug in find_angle_operator_termination\n") }
25087 # To illustrate what we might be looking at, in case we are
25088 # guessing, here are some examples of valid angle operators
25095 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
25096 # <${PREFIX}*img*.$IMAGE_TYPE>
25097 # <img*.$IMAGE_TYPE>
25098 # <Timg*.$IMAGE_TYPE>
25099 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
25101 # Here are some examples of lines which do not have angle operators:
25102 # return undef unless $self->[2]++ < $#{$self->[1]};
25105 # the following line from dlister.pl caused trouble:
25106 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
25108 # If the '<' starts an angle operator, it must end on this line and
25109 # it must not have certain characters like ';' and '=' in it. I use
25110 # this to limit the testing. This filter should be improved if
25113 if ( $input_line =~ /($filter)/g ) {
25117 # We MAY have found an angle operator termination if we get
25118 # here, but we need to do more to be sure we haven't been
25120 my $pos = pos($input_line);
25122 my $pos_beg = $$rtoken_map[$i];
25123 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
25125 # Reject if the closing '>' follows a '-' as in:
25126 # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
25127 if ( $expecting eq UNKNOWN ) {
25128 my $check = substr( $input_line, $pos - 2, 1 );
25129 if ( $check eq '-' ) {
25130 return ( $i, $type );
25134 ######################################debug#####
25135 #write_diagnostics( "ANGLE? :$str\n");
25136 #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
25137 ######################################debug#####
25141 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
25143 # It may be possible that a quote ends midway in a pretoken.
25144 # If this happens, it may be necessary to split the pretoken.
25147 "Possible tokinization error..please check this line\n");
25148 report_possible_bug();
25151 # Now let's see where we stand....
25152 # OK if math op not possible
25153 if ( $expecting == TERM ) {
25156 # OK if there are no more than 2 pre-tokens inside
25157 # (not possible to write 2 token math between < and >)
25158 # This catches most common cases
25159 elsif ( $i <= $i_beg + 3 ) {
25160 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
25166 # Let's try a Brace Test: any braces inside must balance
25168 while ( $str =~ /\{/g ) { $br++ }
25169 while ( $str =~ /\}/g ) { $br-- }
25171 while ( $str =~ /\[/g ) { $sb++ }
25172 while ( $str =~ /\]/g ) { $sb-- }
25174 while ( $str =~ /\(/g ) { $pr++ }
25175 while ( $str =~ /\)/g ) { $pr-- }
25177 # if braces do not balance - not angle operator
25178 if ( $br || $sb || $pr ) {
25182 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
25185 # we should keep doing more checks here...to be continued
25186 # Tentatively accepting this as a valid angle operator.
25187 # There are lots more things that can be checked.
25190 "ANGLE-Guessing yes: $str expecting=$expecting\n");
25191 write_logfile_entry("Guessing angle operator here: $str\n");
25196 # didn't find ending >
25198 if ( $expecting == TERM ) {
25199 warning("No ending > for angle operator\n");
25203 return ( $i, $type );
25206 sub scan_number_do {
25208 # scan a number in any of the formats that Perl accepts
25209 # Underbars (_) are allowed in decimal numbers.
25210 # input parameters -
25211 # $input_line - the string to scan
25212 # $i - pre_token index to start scanning
25213 # $rtoken_map - reference to the pre_token map giving starting
25214 # character position in $input_line of token $i
25215 # output parameters -
25216 # $i - last pre_token index of the number just scanned
25217 # number - the number (characters); or undef if not a number
25219 my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
25220 my $pos_beg = $$rtoken_map[$i];
25223 my $number = undef;
25224 my $type = $input_type;
25226 my $first_char = substr( $input_line, $pos_beg, 1 );
25228 # Look for bad starting characters; Shouldn't happen..
25229 if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
25230 warning("Program bug - scan_number given character $first_char\n");
25231 report_definite_bug();
25232 return ( $i, $type, $number );
25235 # handle v-string without leading 'v' character ('Two Dot' rule)
25237 # TODO: v-strings may contain underscores
25238 pos($input_line) = $pos_beg;
25239 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
25240 $pos = pos($input_line);
25241 my $numc = $pos - $pos_beg;
25242 $number = substr( $input_line, $pos_beg, $numc );
25244 report_v_string($number);
25247 # handle octal, hex, binary
25248 if ( !defined($number) ) {
25249 pos($input_line) = $pos_beg;
25250 if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
25252 $pos = pos($input_line);
25253 my $numc = $pos - $pos_beg;
25254 $number = substr( $input_line, $pos_beg, $numc );
25260 if ( !defined($number) ) {
25261 pos($input_line) = $pos_beg;
25263 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
25264 $pos = pos($input_line);
25266 # watch out for things like 0..40 which would give 0. by this;
25267 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
25268 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
25272 my $numc = $pos - $pos_beg;
25273 $number = substr( $input_line, $pos_beg, $numc );
25278 # filter out non-numbers like e + - . e2 .e3 +e6
25279 # the rule: at least one digit, and any 'e' must be preceded by a digit
25281 $number !~ /\d/ # no digits
25282 || ( $number =~ /^(.*)[eE]/
25283 && $1 !~ /\d/ ) # or no digits before the 'e'
25287 $type = $input_type;
25288 return ( $i, $type, $number );
25291 # Found a number; now we must convert back from character position
25292 # to pre_token index. An error here implies user syntax error.
25293 # An example would be an invalid octal number like '009'.
25296 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
25297 if ($error) { warning("Possibly invalid number\n") }
25299 return ( $i, $type, $number );
25302 sub inverse_pretoken_map {
25304 # Starting with the current pre_token index $i, scan forward until
25305 # finding the index of the next pre_token whose position is $pos.
25306 my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
25309 while ( ++$i <= $max_token_index ) {
25311 if ( $pos <= $$rtoken_map[$i] ) {
25313 # Let the calling routine handle errors in which we do not
25314 # land on a pre-token boundary. It can happen by running
25315 # perltidy on some non-perl scripts, for example.
25316 if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
25321 return ( $i, $error );
25324 sub find_here_doc {
25326 # find the target of a here document, if any
25327 # input parameters:
25328 # $i - token index of the second < of <<
25329 # ($i must be less than the last token index if this is called)
25330 # output parameters:
25331 # $found_target = 0 didn't find target; =1 found target
25332 # HERE_TARGET - the target string (may be empty string)
25333 # $i - unchanged if not here doc,
25334 # or index of the last token of the here target
25335 # $saw_error - flag noting unbalanced quote on here target
25336 my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25338 my $found_target = 0;
25339 my $here_doc_target = '';
25340 my $here_quote_character = '';
25342 my ( $next_nonblank_token, $i_next_nonblank, $next_token );
25343 $next_token = $$rtokens[ $i + 1 ];
25345 # perl allows a backslash before the target string (heredoc.t)
25347 if ( $next_token eq '\\' ) {
25349 $next_token = $$rtokens[ $i + 2 ];
25352 ( $next_nonblank_token, $i_next_nonblank ) =
25353 find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
25355 if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
25358 my $quote_depth = 0;
25363 $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
25366 = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
25367 $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
25369 if ($in_quote) { # didn't find end of quote, so no target found
25371 if ( $expecting == TERM ) {
25373 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
25378 else { # found ending quote
25383 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
25384 $tokj = $$rtokens[$j];
25386 # we have to remove any backslash before the quote character
25387 # so that the here-doc-target exactly matches this string
25391 && $$rtokens[ $j + 1 ] eq $here_quote_character );
25392 $here_doc_target .= $tokj;
25397 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
25399 write_logfile_entry(
25400 "found blank here-target after <<; suggest using \"\"\n");
25403 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
25405 my $here_doc_expected;
25406 if ( $expecting == UNKNOWN ) {
25407 $here_doc_expected = guess_if_here_doc($next_token);
25410 $here_doc_expected = 1;
25413 if ($here_doc_expected) {
25415 $here_doc_target = $next_token;
25422 if ( $expecting == TERM ) {
25424 write_logfile_entry("Note: bare here-doc operator <<\n");
25431 # patch to neglect any prepended backslash
25432 if ( $found_target && $backslash ) { $i++ }
25434 return ( $found_target, $here_doc_target, $here_quote_character, $i,
25440 # follow (or continue following) quoted string(s)
25441 # $in_quote return code:
25442 # 0 - ok, found end
25443 # 1 - still must find end of quote whose target is $quote_character
25444 # 2 - still looking for end of first of two quotes
25446 # Returns updated strings:
25447 # $quoted_string_1 = quoted string seen while in_quote=1
25448 # $quoted_string_2 = quoted string seen while in_quote=2
25450 $i, $in_quote, $quote_character,
25451 $quote_pos, $quote_depth, $quoted_string_1,
25452 $quoted_string_2, $rtokens, $rtoken_map,
25456 my $in_quote_starting = $in_quote;
25459 if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
25462 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25465 = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
25466 $quote_pos, $quote_depth, $max_token_index );
25467 $quoted_string_2 .= $quoted_string;
25468 if ( $in_quote == 1 ) {
25469 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
25470 $quote_character = '';
25473 $quoted_string_2 .= "\n";
25477 if ( $in_quote == 1 ) { # one (more) quote to follow
25480 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25483 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25484 $quote_pos, $quote_depth, $max_token_index );
25485 $quoted_string_1 .= $quoted_string;
25486 if ( $in_quote == 1 ) {
25487 $quoted_string_1 .= "\n";
25490 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25491 $quoted_string_1, $quoted_string_2 );
25494 sub follow_quoted_string {
25496 # scan for a specific token, skipping escaped characters
25497 # if the quote character is blank, use the first non-blank character
25498 # input parameters:
25499 # $rtokens = reference to the array of tokens
25500 # $i = the token index of the first character to search
25501 # $in_quote = number of quoted strings being followed
25502 # $beginning_tok = the starting quote character
25503 # $quote_pos = index to check next for alphanumeric delimiter
25504 # output parameters:
25505 # $i = the token index of the ending quote character
25506 # $in_quote = decremented if found end, unchanged if not
25507 # $beginning_tok = the starting quote character
25508 # $quote_pos = index to check next for alphanumeric delimiter
25509 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
25510 # $quoted_string = the text of the quote (without quotation tokens)
25511 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
25514 my ( $tok, $end_tok );
25515 my $i = $i_beg - 1;
25516 my $quoted_string = "";
25518 TOKENIZER_DEBUG_FLAG_QUOTE && do {
25520 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
25523 # get the corresponding end token
25524 if ( $beginning_tok !~ /^\s*$/ ) {
25525 $end_tok = matching_end_token($beginning_tok);
25528 # a blank token means we must find and use the first non-blank one
25530 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
25532 while ( $i < $max_token_index ) {
25533 $tok = $$rtokens[ ++$i ];
25535 if ( $tok !~ /^\s*$/ ) {
25537 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
25538 $i = $max_token_index;
25542 if ( length($tok) > 1 ) {
25543 if ( $quote_pos <= 0 ) { $quote_pos = 1 }
25544 $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
25547 $beginning_tok = $tok;
25550 $end_tok = matching_end_token($beginning_tok);
25556 $allow_quote_comments = 1;
25561 # There are two different loops which search for the ending quote
25562 # character. In the rare case of an alphanumeric quote delimiter, we
25563 # have to look through alphanumeric tokens character-by-character, since
25564 # the pre-tokenization process combines multiple alphanumeric
25565 # characters, whereas for a non-alphanumeric delimiter, only tokens of
25566 # length 1 can match.
25568 ###################################################################
25569 # Case 1 (rare): loop for case of alphanumeric quote delimiter..
25570 # "quote_pos" is the position the current word to begin searching
25571 ###################################################################
25572 if ( $beginning_tok =~ /\w/ ) {
25574 # Note this because it is not recommended practice except
25575 # for obfuscated perl contests
25576 if ( $in_quote == 1 ) {
25577 write_logfile_entry(
25578 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
25581 while ( $i < $max_token_index ) {
25583 if ( $quote_pos == 0 || ( $i < 0 ) ) {
25584 $tok = $$rtokens[ ++$i ];
25586 if ( $tok eq '\\' ) {
25588 # retain backslash unless it hides the end token
25589 $quoted_string .= $tok
25590 unless $$rtokens[ $i + 1 ] eq $end_tok;
25592 last if ( $i >= $max_token_index );
25593 $tok = $$rtokens[ ++$i ];
25596 my $old_pos = $quote_pos;
25598 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
25602 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
25604 if ( $quote_pos > 0 ) {
25607 substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
25611 if ( $quote_depth == 0 ) {
25617 $quoted_string .= substr( $tok, $old_pos );
25622 ########################################################################
25623 # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
25624 ########################################################################
25627 while ( $i < $max_token_index ) {
25628 $tok = $$rtokens[ ++$i ];
25630 if ( $tok eq $end_tok ) {
25633 if ( $quote_depth == 0 ) {
25638 elsif ( $tok eq $beginning_tok ) {
25641 elsif ( $tok eq '\\' ) {
25643 # retain backslash unless it hides the beginning or end token
25644 $tok = $$rtokens[ ++$i ];
25645 $quoted_string .= '\\'
25646 unless ( $tok eq $end_tok || $tok eq $beginning_tok );
25648 $quoted_string .= $tok;
25651 if ( $i > $max_token_index ) { $i = $max_token_index }
25652 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
25656 sub indicate_error {
25657 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
25658 interrupt_logfile();
25660 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
25664 sub write_error_indicator_pair {
25665 my ( $line_number, $input_line, $pos, $carrat ) = @_;
25666 my ( $offset, $numbered_line, $underline ) =
25667 make_numbered_line( $line_number, $input_line, $pos );
25668 $underline = write_on_underline( $underline, $pos - $offset, $carrat );
25669 warning( $numbered_line . "\n" );
25670 $underline =~ s/\s*$//;
25671 warning( $underline . "\n" );
25674 sub make_numbered_line {
25676 # Given an input line, its line number, and a character position of
25677 # interest, create a string not longer than 80 characters of the form
25678 # $lineno: sub_string
25679 # such that the sub_string of $str contains the position of interest
25681 # Here is an example of what we want, in this case we add trailing
25682 # '...' because the line is long.
25684 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
25686 # Here is another example, this time in which we used leading '...'
25687 # because of excessive length:
25689 # 2: ... er of the World Wide Web Consortium's
25691 # input parameters are:
25692 # $lineno = line number
25693 # $str = the text of the line
25694 # $pos = position of interest (the error) : 0 = first character
25697 # - $offset = an offset which corrects the position in case we only
25698 # display part of a line, such that $pos-$offset is the effective
25699 # position from the start of the displayed line.
25700 # - $numbered_line = the numbered line as above,
25701 # - $underline = a blank 'underline' which is all spaces with the same
25702 # number of characters as the numbered line.
25704 my ( $lineno, $str, $pos ) = @_;
25705 my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
25706 my $excess = length($str) - $offset - 68;
25707 my $numc = ( $excess > 0 ) ? 68 : undef;
25709 if ( defined($numc) ) {
25710 if ( $offset == 0 ) {
25711 $str = substr( $str, $offset, $numc - 4 ) . " ...";
25714 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
25719 if ( $offset == 0 ) {
25722 $str = "... " . substr( $str, $offset + 4 );
25726 my $numbered_line = sprintf( "%d: ", $lineno );
25727 $offset -= length($numbered_line);
25728 $numbered_line .= $str;
25729 my $underline = " " x length($numbered_line);
25730 return ( $offset, $numbered_line, $underline );
25733 sub write_on_underline {
25735 # The "underline" is a string that shows where an error is; it starts
25736 # out as a string of blanks with the same length as the numbered line of
25737 # code above it, and we have to add marking to show where an error is.
25738 # In the example below, we want to write the string '--^' just below
25739 # the line of bad code:
25741 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
25743 # We are given the current underline string, plus a position and a
25744 # string to write on it.
25746 # In the above example, there will be 2 calls to do this:
25747 # First call: $pos=19, pos_chr=^
25748 # Second call: $pos=16, pos_chr=---
25750 # This is a trivial thing to do with substr, but there is some
25753 my ( $underline, $pos, $pos_chr ) = @_;
25755 # check for error..shouldn't happen
25756 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
25759 my $excess = length($pos_chr) + $pos - length($underline);
25760 if ( $excess > 0 ) {
25761 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
25763 substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
25764 return ($underline);
25769 # Break a string, $str, into a sequence of preliminary tokens. We
25770 # are interested in these types of tokens:
25771 # words (type='w'), example: 'max_tokens_wanted'
25772 # digits (type = 'd'), example: '0755'
25773 # whitespace (type = 'b'), example: ' '
25774 # any other single character (i.e. punct; type = the character itself).
25775 # We cannot do better than this yet because we might be in a quoted
25776 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
25778 my ( $str, $max_tokens_wanted ) = @_;
25780 # we return references to these 3 arrays:
25781 my @tokens = (); # array of the tokens themselves
25782 my @token_map = (0); # string position of start of each token
25783 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
25788 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
25791 # note that this must come before words!
25792 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
25795 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
25797 # single-character punctuation
25798 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
25802 return ( \@tokens, \@token_map, \@type );
25806 push @token_map, pos($str);
25808 } while ( --$max_tokens_wanted != 0 );
25810 return ( \@tokens, \@token_map, \@type );
25815 # this is an old debug routine
25816 my ( $rtokens, $rtoken_map ) = @_;
25817 my $num = scalar(@$rtokens);
25820 for ( $i = 0 ; $i < $num ; $i++ ) {
25821 my $len = length( $$rtokens[$i] );
25822 print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
25826 sub matching_end_token {
25828 # find closing character for a pattern
25829 my $beginning_token = shift;
25831 if ( $beginning_token eq '{' ) {
25834 elsif ( $beginning_token eq '[' ) {
25837 elsif ( $beginning_token eq '<' ) {
25840 elsif ( $beginning_token eq '(' ) {
25848 sub dump_token_types {
25852 # This should be the latest list of token types in use
25853 # adding NEW_TOKENS: add a comment here
25854 print $fh <<'END_OF_LIST';
25856 Here is a list of the token types currently used for lines of type 'CODE'.
25857 For the following tokens, the "type" of a token is just the token itself.
25859 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
25860 ( ) <= >= == =~ !~ != ++ -- /= x=
25861 ... **= <<= >>= &&= ||= //= <=>
25862 , + - / * | % ! x ~ = \ ? : . < > ^ &
25864 The following additional token types are defined:
25867 b blank (white space)
25868 { indent: opening structural curly brace or square bracket or paren
25869 (code block, anonymous hash reference, or anonymous array reference)
25870 } outdent: right structural curly brace or square bracket or paren
25871 [ left non-structural square bracket (enclosing an array index)
25872 ] right non-structural square bracket
25873 ( left non-structural paren (all but a list right of an =)
25874 ) right non-structural parena
25875 L left non-structural curly brace (enclosing a key)
25876 R right non-structural curly brace
25877 ; terminal semicolon
25878 f indicates a semicolon in a "for" statement
25879 h here_doc operator <<
25881 Q indicates a quote or pattern
25882 q indicates a qw quote block
25884 C user-defined constant or constant function (with void prototype = ())
25885 U user-defined function taking parameters
25886 G user-defined function taking block parameter (like grep/map/eval)
25887 M (unused, but reserved for subroutine definition name)
25888 P (unused, but -html uses it to label pod text)
25889 t type indicater such as %,$,@,*,&,sub
25890 w bare word (perhaps a subroutine call)
25891 i identifier of some type (with leading %, $, @, *, &, sub, -> )
25894 F a file test operator (like -e)
25896 Z identifier in indirect object slot: may be file handle, object
25897 J LABEL: code block label
25898 j LABEL after next, last, redo, goto
25901 pp pre-increment operator ++
25902 mm pre-decrement operator --
25903 A : used as attribute separator
25905 Here are the '_line_type' codes used internally:
25906 SYSTEM - system-specific code before hash-bang line
25907 CODE - line of perl code (including comments)
25908 POD_START - line starting pod, such as '=head'
25909 POD - pod documentation text
25910 POD_END - last line of pod section, '=cut'
25911 HERE - text of here-document
25912 HERE_END - last line of here-doc (target word)
25913 FORMAT - format section
25914 FORMAT_END - last line of format section, '.'
25915 DATA_START - __DATA__ line
25916 DATA - unidentified text following __DATA__
25917 END_START - __END__ line
25918 END - unidentified text following __END__
25919 ERROR - we are in big trouble, probably not a perl script
25925 # These names are used in error messages
25926 @opening_brace_names = qw# '{' '[' '(' '?' #;
25927 @closing_brace_names = qw# '}' ']' ')' ':' #;
25929 ## TESTING: added ~~
25931 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
25932 <= >= == =~ !~ != ++ -- /= x= ~~
25934 @is_digraph{@digraphs} = (1) x scalar(@digraphs);
25936 my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> );
25937 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
25939 # make a hash of all valid token types for self-checking the tokenizer
25940 # (adding NEW_TOKENS : select a new character and add to this list)
25941 my @valid_token_types = qw#
25942 A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
25943 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
25945 push( @valid_token_types, @digraphs );
25946 push( @valid_token_types, @trigraphs );
25947 push( @valid_token_types, '#' );
25948 push( @valid_token_types, ',' );
25949 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
25951 # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
25952 my @file_test_operators =
25953 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
25954 @is_file_test_operator{@file_test_operators} =
25955 (1) x scalar(@file_test_operators);
25957 # these functions have prototypes of the form (&), so when they are
25958 # followed by a block, that block MAY BE followed by an operator.
25959 @_ = qw( do eval );
25960 @is_block_operator{@_} = (1) x scalar(@_);
25962 # these functions allow an identifier in the indirect object slot
25963 @_ = qw( print printf sort exec system say);
25964 @is_indirect_object_taker{@_} = (1) x scalar(@_);
25966 # These tokens may precede a code block
25967 # patched for SWITCH/CASE
25968 @_ = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
25969 unless do while until eval for foreach map grep sort
25970 switch case given when);
25971 @is_code_block_token{@_} = (1) x scalar(@_);
25973 # I'll build the list of keywords incrementally
25976 # keywords and tokens after which a value or pattern is expected,
25977 # but not an operator. In other words, these should consume terms
25978 # to their right, or at least they are not expected to be followed
25979 # immediately by operators.
25980 my @value_requestor = qw(
26199 # patched above for SWITCH/CASE given/when err say
26200 # 'err' is a fairly safe addition.
26201 # TODO: 'default' still needed if appropriate
26202 # 'use feature' seen, but perltidy works ok without it.
26203 # Concerned that 'default' could break code.
26204 push( @Keywords, @value_requestor );
26206 # These are treated the same but are not keywords:
26211 push( @value_requestor, @extra_vr );
26213 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
26215 # this list contains keywords which do not look for arguments,
26216 # so that they might be followed by an operator, or at least
26218 my @operator_requestor = qw(
26242 push( @Keywords, @operator_requestor );
26244 # These are treated the same but are not considered keywords:
26251 push( @operator_requestor, @extra_or );
26253 @expecting_operator_token{@operator_requestor} =
26254 (1) x scalar(@operator_requestor);
26256 # these token TYPES expect trailing operator but not a term
26257 # note: ++ and -- are post-increment and decrement, 'C' = constant
26258 my @operator_requestor_types = qw( ++ -- C <> q );
26259 @expecting_operator_types{@operator_requestor_types} =
26260 (1) x scalar(@operator_requestor_types);
26262 # these token TYPES consume values (terms)
26263 # note: pp and mm are pre-increment and decrement
26264 # f=semicolon in for, F=file test operator
26265 my @value_requestor_type = qw#
26266 L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
26267 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
26268 <= >= == != => \ > < % * / ? & | ** <=> ~~
26269 f F pp mm Y p m U J G j >> << ^ t
26271 push( @value_requestor_type, ',' )
26272 ; # (perl doesn't like a ',' in a qw block)
26273 @expecting_term_types{@value_requestor_type} =
26274 (1) x scalar(@value_requestor_type);
26276 # Note: the following valid token types are not assigned here to
26277 # hashes requesting to be followed by values or terms, but are
26278 # instead currently hard-coded into sub operator_expected:
26279 # ) -> :: Q R Z ] b h i k n v w } #
26281 # For simple syntax checking, it is nice to have a list of operators which
26282 # will really be unhappy if not followed by a term. This includes most
26284 %really_want_term = %expecting_term_types;
26286 # with these exceptions...
26287 delete $really_want_term{'U'}; # user sub, depends on prototype
26288 delete $really_want_term{'F'}; # file test works on $_ if no following term
26289 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
26292 @_ = qw(q qq qw qx qr s y tr m);
26293 @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
26295 # These keywords are handled specially in the tokenizer code:
26296 my @special_keywords = qw(
26312 push( @Keywords, @special_keywords );
26314 # Keywords after which list formatting may be used
26315 # WARNING: do not include |map|grep|eval or perl may die on
26316 # syntax errors (map1.t).
26317 my @keyword_taking_list = qw(
26389 @is_keyword_taking_list{@keyword_taking_list} =
26390 (1) x scalar(@keyword_taking_list);
26392 # These are not used in any way yet
26393 # my @unused_keywords = qw(
26400 # The list of keywords was extracted from function 'keyword' in
26401 # perl file toke.c version 5.005.03, using this utility, plus a
26402 # little editing: (file getkwd.pl):
26403 # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
26404 # Add 'get' prefix where necessary, then split into the above lists.
26405 # This list should be updated as necessary.
26406 # The list should not contain these special variables:
26407 # ARGV DATA ENV SIG STDERR STDIN STDOUT
26410 @is_keyword{@Keywords} = (1) x scalar(@Keywords);
26417 Perl::Tidy - Parses and beautifies perl source
26423 Perl::Tidy::perltidy(
26425 destination => $destination,
26428 perltidyrc => $perltidyrc,
26429 logfile => $logfile,
26430 errorfile => $errorfile,
26431 formatter => $formatter, # callback object (see below)
26432 dump_options => $dump_options,
26433 dump_options_type => $dump_options_type,
26438 This module makes the functionality of the perltidy utility available to perl
26439 scripts. Any or all of the input parameters may be omitted, in which case the
26440 @ARGV array will be used to provide input parameters as described
26441 in the perltidy(1) man page.
26443 For example, the perltidy script is basically just this:
26446 Perl::Tidy::perltidy();
26448 The module accepts input and output streams by a variety of methods.
26449 The following list of parameters may be any of a the following: a
26450 filename, an ARRAY reference, a SCALAR reference, or an object with
26451 either a B<getline> or B<print> method, as appropriate.
26453 source - the source of the script to be formatted
26454 destination - the destination of the formatted output
26455 stderr - standard error output
26456 perltidyrc - the .perltidyrc file
26457 logfile - the .LOG file stream, if any
26458 errorfile - the .ERR file stream, if any
26459 dump_options - ref to a hash to receive parameters (see below),
26460 dump_options_type - controls contents of dump_options
26461 dump_getopt_flags - ref to a hash to receive Getopt flags
26462 dump_options_category - ref to a hash giving category of options
26463 dump_abbreviations - ref to a hash giving all abbreviations
26465 The following chart illustrates the logic used to decide how to
26468 ref($param) $param is assumed to be:
26469 ----------- ---------------------
26471 SCALAR ref to string
26473 (other) object with getline (if source) or print method
26475 If the parameter is an object, and the object has a B<close> method, that
26476 close method will be called at the end of the stream.
26482 If the B<source> parameter is given, it defines the source of the
26487 If the B<destination> parameter is given, it will be used to define the
26488 file or memory location to receive output of perltidy.
26492 The B<stderr> parameter allows the calling program to capture the output
26493 to what would otherwise go to the standard error output device.
26497 If the B<perltidyrc> file is given, it will be used instead of any
26498 F<.perltidyrc> configuration file that would otherwise be used.
26502 If the B<argv> parameter is given, it will be used instead of the
26503 B<@ARGV> array. The B<argv> parameter may be a string, a reference to a
26504 string, or a reference to an array. If it is a string or reference to a
26505 string, it will be parsed into an array of items just as if it were a
26506 command line string.
26510 If the B<dump_options> parameter is given, it must be the reference to a hash.
26511 In this case, the parameters contained in any perltidyrc configuration file
26512 will be placed in this hash and perltidy will return immediately. This is
26513 equivalent to running perltidy with --dump-options, except that the perameters
26514 are returned in a hash rather than dumped to standard output. Also, by default
26515 only the parameters in the perltidyrc file are returned, but this can be
26516 changed (see the next parameter). This parameter provides a convenient method
26517 for external programs to read a perltidyrc file. An example program using
26518 this feature, F<perltidyrc_dump.pl>, is included in the distribution.
26520 Any combination of the B<dump_> parameters may be used together.
26522 =item dump_options_type
26524 This parameter is a string which can be used to control the parameters placed
26525 in the hash reference supplied by B<dump_options>. The possible values are
26526 'perltidyrc' (default) and 'full'. The 'full' parameter causes both the
26527 default options plus any options found in a perltidyrc file to be returned.
26529 =item dump_getopt_flags
26531 If the B<dump_getopt_flags> parameter is given, it must be the reference to a
26532 hash. This hash will receive all of the parameters that perltidy understands
26533 and flags that are passed to Getopt::Long. This parameter may be
26534 used alone or with the B<dump_options> flag. Perltidy will
26535 exit immediately after filling this hash. See the demo program
26536 F<perltidyrc_dump.pl> for example usage.
26538 =item dump_options_category
26540 If the B<dump_options_category> parameter is given, it must be the reference to a
26541 hash. This hash will receive a hash with keys equal to all long parameter names
26542 and values equal to the title of the corresponding section of the perltidy manual.
26543 See the demo program F<perltidyrc_dump.pl> for example usage.
26545 =item dump_abbreviations
26547 If the B<dump_abbreviations> parameter is given, it must be the reference to a
26548 hash. This hash will receive all abbreviations used by Perl::Tidy. See the
26549 demo program F<perltidyrc_dump.pl> for example usage.
26555 The following example passes perltidy a snippet as a reference
26556 to a string and receives the result back in a reference to
26561 # some messy source code to format
26562 my $source = <<'EOM';
26564 my @editors=('Emacs', 'Vi '); my $rand = rand();
26565 print "A poll of 10 random programmers gave these results:\n";
26567 my $i=int ($rand+rand());
26568 print " $editors[$i] users are from Venus" . ", " .
26569 "$editors[1-$i] users are from Mars" .
26574 # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
26576 perltidy( source => \$source, destination => \@dest );
26577 foreach (@dest) {print}
26579 =head1 Using the B<formatter> Callback Object
26581 The B<formatter> parameter is an optional callback object which allows
26582 the calling program to receive tokenized lines directly from perltidy for
26583 further specialized processing. When this parameter is used, the two
26584 formatting options which are built into perltidy (beautification or
26585 html) are ignored. The following diagram illustrates the logical flow:
26587 |-- (normal route) -> code beautification
26588 caller->perltidy->|-- (-html flag ) -> create html
26589 |-- (formatter given)-> callback to write_line
26591 This can be useful for processing perl scripts in some way. The
26592 parameter C<$formatter> in the perltidy call,
26594 formatter => $formatter,
26596 is an object created by the caller with a C<write_line> method which
26597 will accept and process tokenized lines, one line per call. Here is
26598 a simple example of a C<write_line> which merely prints the line number,
26599 the line type (as determined by perltidy), and the text of the line:
26603 # This is called from perltidy line-by-line
26605 my $line_of_tokens = shift;
26606 my $line_type = $line_of_tokens->{_line_type};
26607 my $input_line_number = $line_of_tokens->{_line_number};
26608 my $input_line = $line_of_tokens->{_line_text};
26609 print "$input_line_number:$line_type:$input_line";
26612 The complete program, B<perllinetype>, is contained in the examples section of
26613 the source distribution. As this example shows, the callback method
26614 receives a parameter B<$line_of_tokens>, which is a reference to a hash
26615 of other useful information. This example uses these hash entries:
26617 $line_of_tokens->{_line_number} - the line number (1,2,...)
26618 $line_of_tokens->{_line_text} - the text of the line
26619 $line_of_tokens->{_line_type} - the type of the line, one of:
26621 SYSTEM - system-specific code before hash-bang line
26622 CODE - line of perl code (including comments)
26623 POD_START - line starting pod, such as '=head'
26624 POD - pod documentation text
26625 POD_END - last line of pod section, '=cut'
26626 HERE - text of here-document
26627 HERE_END - last line of here-doc (target word)
26628 FORMAT - format section
26629 FORMAT_END - last line of format section, '.'
26630 DATA_START - __DATA__ line
26631 DATA - unidentified text following __DATA__
26632 END_START - __END__ line
26633 END - unidentified text following __END__
26634 ERROR - we are in big trouble, probably not a perl script
26636 Most applications will be only interested in lines of type B<CODE>. For
26637 another example, let's write a program which checks for one of the
26638 so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
26639 can slow down processing. Here is a B<write_line>, from the example
26640 program B<find_naughty.pl>, which does that:
26644 # This is called back from perltidy line-by-line
26645 # We're looking for $`, $&, and $'
26646 my ( $self, $line_of_tokens ) = @_;
26648 # pull out some stuff we might need
26649 my $line_type = $line_of_tokens->{_line_type};
26650 my $input_line_number = $line_of_tokens->{_line_number};
26651 my $input_line = $line_of_tokens->{_line_text};
26652 my $rtoken_type = $line_of_tokens->{_rtoken_type};
26653 my $rtokens = $line_of_tokens->{_rtokens};
26656 # skip comments, pod, etc
26657 return if ( $line_type ne 'CODE' );
26659 # loop over tokens looking for $`, $&, and $'
26660 for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
26662 # we only want to examine token types 'i' (identifier)
26663 next unless $$rtoken_type[$j] eq 'i';
26665 # pull out the actual token text
26666 my $token = $$rtokens[$j];
26669 if ( $token =~ /^\$[\`\&\']$/ ) {
26671 "$input_line_number: $token\n";
26676 This example pulls out these tokenization variables from the $line_of_tokens
26679 $rtoken_type = $line_of_tokens->{_rtoken_type};
26680 $rtokens = $line_of_tokens->{_rtokens};
26682 The variable C<$rtoken_type> is a reference to an array of token type codes,
26683 and C<$rtokens> is a reference to a corresponding array of token text.
26684 These are obviously only defined for lines of type B<CODE>.
26685 Perltidy classifies tokens into types, and has a brief code for each type.
26686 You can get a complete list at any time by running perltidy from the
26689 perltidy --dump-token-types
26691 In the present example, we are only looking for tokens of type B<i>
26692 (identifiers), so the for loop skips past all other types. When an
26693 identifier is found, its actual text is checked to see if it is one
26694 being sought. If so, the above write_line prints the token and its
26697 The B<formatter> feature is relatively new in perltidy, and further
26698 documentation needs to be written to complete its description. However,
26699 several example programs have been written and can be found in the
26700 B<examples> section of the source distribution. Probably the best way
26701 to get started is to find one of the examples which most closely matches
26702 your application and start modifying it.
26704 For help with perltidy's pecular way of breaking lines into tokens, you
26705 might run, from the command line,
26707 perltidy -D filename
26709 where F<filename> is a short script of interest. This will produce
26710 F<filename.DEBUG> with interleaved lines of text and their token types.
26711 The -D flag has been in perltidy from the beginning for this purpose.
26712 If you want to see the code which creates this file, it is
26713 C<write_debug_entry> in Tidy.pm.
26721 Thanks to Hugh Myers who developed the initial modular interface
26726 This man page documents Perl::Tidy version 20060719.
26731 perltidy at users.sourceforge.net
26735 The perltidy(1) man page describes all of the features of perltidy. It
26736 can be found at http://perltidy.sourceforge.net.