1 ############################################################
3 # perltidy - a perl script indenter and formatter
5 # Copyright (c) 2000-2006 by Steve Hancock
6 # Distributed under the GPL license agreement; see file COPYING
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 # For brief instructions instructions, try 'perltidy -h'.
23 # For more complete documentation, try 'man perltidy'
24 # or visit http://perltidy.sourceforge.net
26 # This script is an example of the default style. It was formatted with:
31 # Michael Cartmell supplied code for adaptation to VMS and helped with
33 # Hugh S. Myers supplied sub streamhandle and the supporting code to
34 # create a Perl::Tidy module which can operate on strings, arrays, etc.
35 # Yves Orton supplied coding to help detect Windows versions.
36 # Axel Rose supplied a patch for MacPerl.
37 # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
38 # Many others have supplied key ideas, suggestions, and bug reports;
39 # see the CHANGES file.
41 ############################################################
44 use 5.004; # need IO::File from 5.004 or later
45 BEGIN { $^W = 1; } # turn on warnings
59 @ISA = qw( Exporter );
60 @EXPORT = qw( &perltidy );
66 ( $VERSION = q($Id: Tidy.pm,v 1.49 2006/06/14 01:56:24 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
71 # given filename and mode (r or w), create an object which:
72 # has a 'getline' method if mode='r', and
73 # has a 'print' method if mode='w'.
74 # The objects also need a 'close' method.
76 # How the object is made:
78 # if $filename is: Make object using:
79 # ---------------- -----------------
80 # '-' (STDIN if mode = 'r', STDOUT if mode='w')
82 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
83 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar)
85 # (check for 'print' method for 'w' mode)
86 # (check for 'getline' method for 'r' mode)
87 my $ref = ref( my $filename = shift );
94 if ( $ref eq 'ARRAY' ) {
95 $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
97 elsif ( $ref eq 'SCALAR' ) {
98 $New = sub { Perl::Tidy::IOScalar->new(@_) };
102 # Accept an object with a getline method for reading. Note:
103 # IO::File is built-in and does not respond to the defined
104 # operator. If this causes trouble, the check can be
105 # skipped and we can just let it crash if there is no
107 if ( $mode =~ /[rR]/ ) {
108 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
109 $New = sub { $filename };
112 $New = sub { undef };
114 ------------------------------------------------------------------------
115 No 'getline' method is defined for object of class $ref
116 Please check your call to Perl::Tidy::perltidy. Trace follows.
117 ------------------------------------------------------------------------
122 # Accept an object with a print method for writing.
123 # See note above about IO::File
124 if ( $mode =~ /[wW]/ ) {
125 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
126 $New = sub { $filename };
129 $New = sub { undef };
131 ------------------------------------------------------------------------
132 No 'print' method is defined for object of class $ref
133 Please check your call to Perl::Tidy::perltidy. Trace follows.
134 ------------------------------------------------------------------------
143 if ( $filename eq '-' ) {
144 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
147 $New = sub { IO::File->new(@_) };
150 $fh = $New->( $filename, $mode )
151 or warn "Couldn't open file:$filename in mode:$mode : $!\n";
152 return $fh, ( $ref or $filename );
155 sub find_input_line_ending {
157 # Peek at a file and return first line ending character.
158 # Quietly return undef in case of any trouble.
159 my ($input_file) = @_;
162 # silently ignore input from object or stdin
163 if ( ref($input_file) || $input_file eq '-' ) {
166 open( INFILE, $input_file ) || return $ending;
170 read( INFILE, $buf, 1024 );
172 if ( $buf && $buf =~ /([\012\015]+)/ ) {
176 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
179 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
182 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
196 # concatenate a path and file basename
197 # returns undef in case of error
199 BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
201 # use File::Spec if we can
202 unless ($missing_file_spec) {
203 return File::Spec->catfile(@_);
206 # Perl 5.004 systems may not have File::Spec so we'll make
207 # a simple try. We assume File::Basename is available.
208 # return undef if not successful.
210 my $path = join '/', @_;
211 my $test_file = $path . $name;
212 my ( $test_name, $test_path ) = fileparse($test_file);
213 return $test_file if ( $test_name eq $name );
214 return undef if ( $^O eq 'VMS' );
216 # this should work at least for Windows and Unix:
217 $test_file = $path . '/' . $name;
218 ( $test_name, $test_path ) = fileparse($test_file);
219 return $test_file if ( $test_name eq $name );
223 sub make_temporary_filename {
225 # Make a temporary filename.
227 # The POSIX tmpnam() function tends to be unreliable for non-unix
228 # systems (at least for the win32 systems that I've tested), so use
229 # a pre-defined name. A slight disadvantage of this is that two
230 # perltidy runs in the same working directory may conflict.
231 # However, the chance of that is small and managable by the user.
232 # An alternative would be to check for the file's existance and use,
233 # say .TMP0, .TMP1, etc, but that scheme has its own problems. So,
235 my $name = "perltidy.TMP";
236 if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
239 eval "use POSIX qw(tmpnam)";
240 if ($@) { return $name }
243 # just make a couple of tries before giving up and using the default
245 my $tmpname = tmpnam();
246 my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
256 # Here is a map of the flow of data from the input source to the output
259 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
260 # input groups output
261 # lines tokens lines of lines lines
264 # The names correspond to the package names responsible for the unit processes.
266 # The overall process is controlled by the "main" package.
268 # LineSource is the stream of input lines
270 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
271 # if necessary. A token is any section of the input line which should be
272 # manipulated as a single entity during formatting. For example, a single
273 # ',' character is a token, and so is an entire side comment. It handles
274 # the complexities of Perl syntax, such as distinguishing between '<<' as
275 # a shift operator and as a here-document, or distinguishing between '/'
276 # as a divide symbol and as a pattern delimiter.
278 # Formatter inserts and deletes whitespace between tokens, and breaks
279 # sequences of tokens at appropriate points as output lines. It bases its
280 # decisions on the default rules as modified by any command-line options.
282 # VerticalAligner collects groups of lines together and tries to line up
283 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
285 # FileWriter simply writes lines to the output stream.
287 # The Logger package, not shown, records significant events and warning
288 # messages. It writes a .LOG file, which may be saved with a
289 # '-log' or a '-g' flag.
293 # variables needed by interrupt handler:
297 # this routine may be called to give a status report if interrupted. If a
298 # parameter is given, it will call exit with that parameter. This is no
299 # longer used because it works under Unix but not under Windows.
300 sub interrupt_handler {
302 my $exit_flag = shift;
303 print STDERR "perltidy interrupted";
305 my $input_line_number =
306 Perl::Tidy::Tokenizer::get_input_line_number();
307 print STDERR " at line $input_line_number";
311 if ( ref $input_file ) { print STDERR " of reference to:" }
312 else { print STDERR " of file:" }
313 print STDERR " $input_file";
316 exit $exit_flag if defined($exit_flag);
323 destination => undef,
330 dump_options => undef,
331 dump_options_type => undef,
332 dump_getopt_flags => undef,
333 dump_options_category => undef,
334 dump_options_range => undef,
335 dump_abbreviations => undef,
338 # don't overwrite callers ARGV
343 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
345 my @good_keys = sort keys %defaults;
346 @bad_keys = sort @bad_keys;
348 ------------------------------------------------------------------------
349 Unknown perltidy parameter : (@bad_keys)
350 perltidy only understands : (@good_keys)
351 ------------------------------------------------------------------------
356 my $get_hash_ref = sub {
358 my $hash_ref = $input_hash{$key};
359 if ( defined($hash_ref) ) {
360 unless ( ref($hash_ref) eq 'HASH' ) {
361 my $what = ref($hash_ref);
363 $what ? "but is ref to $what" : "but is not a reference";
365 ------------------------------------------------------------------------
366 error in call to perltidy:
367 -$key must be reference to HASH $but_is
368 ------------------------------------------------------------------------
375 %input_hash = ( %defaults, %input_hash );
376 my $argv = $input_hash{'argv'};
377 my $destination_stream = $input_hash{'destination'};
378 my $errorfile_stream = $input_hash{'errorfile'};
379 my $logfile_stream = $input_hash{'logfile'};
380 my $perltidyrc_stream = $input_hash{'perltidyrc'};
381 my $source_stream = $input_hash{'source'};
382 my $stderr_stream = $input_hash{'stderr'};
383 my $user_formatter = $input_hash{'formatter'};
385 # various dump parameters
386 my $dump_options_type = $input_hash{'dump_options_type'};
387 my $dump_options = $get_hash_ref->('dump_options');
388 my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
389 my $dump_options_category = $get_hash_ref->('dump_options_category');
390 my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
391 my $dump_options_range = $get_hash_ref->('dump_options_range');
393 # validate dump_options_type
394 if ( defined($dump_options) ) {
395 unless ( defined($dump_options_type) ) {
396 $dump_options_type = 'perltidyrc';
398 unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
400 ------------------------------------------------------------------------
401 Please check value of -dump_options_type in call to perltidy;
402 saw: '$dump_options_type'
403 expecting: 'perltidyrc' or 'full'
404 ------------------------------------------------------------------------
410 $dump_options_type = "";
413 if ($user_formatter) {
415 # if the user defines a formatter, there is no output stream,
416 # but we need a null stream to keep coding simple
417 $destination_stream = Perl::Tidy::DevNull->new();
420 # see if ARGV is overridden
421 if ( defined($argv) ) {
423 my $rargv = ref $argv;
424 if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
428 if ( $rargv eq 'ARRAY' ) {
433 ------------------------------------------------------------------------
434 Please check value of -argv in call to perltidy;
435 it must be a string or ref to ARRAY but is: $rargv
436 ------------------------------------------------------------------------
443 my ( $rargv, $msg ) = parse_args($argv);
446 Error parsing this string passed to to perltidy with 'argv':
454 # redirect STDERR if requested
455 if ($stderr_stream) {
456 my ( $fh_stderr, $stderr_file ) =
457 Perl::Tidy::streamhandle( $stderr_stream, 'w' );
458 if ($fh_stderr) { *STDERR = $fh_stderr }
461 ------------------------------------------------------------------------
462 Unable to redirect STDERR to $stderr_stream
463 Please check value of -stderr in call to perltidy
464 ------------------------------------------------------------------------
469 my $rpending_complaint;
470 $$rpending_complaint = "";
471 my $rpending_logfile_message;
472 $$rpending_logfile_message = "";
474 my ( $is_Windows, $Windows_type ) =
475 look_for_Windows($rpending_complaint);
477 # VMS file names are restricted to a 40.40 format, so we append _tdy
478 # instead of .tdy, etc. (but see also sub check_vms_filename)
481 if ( $^O eq 'VMS' ) {
487 $dot_pattern = '\.'; # must escape for use in regex
490 # handle command line options
491 my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
492 $rexpansion, $roption_category, $roption_range )
493 = process_command_line(
494 $perltidyrc_stream, $is_Windows, $Windows_type,
495 $rpending_complaint, $dump_options_type,
498 # return or exit immediately after all dumps
501 # Getopt parameters and their flags
502 if ( defined($dump_getopt_flags) ) {
504 foreach my $op ( @{$roption_string} ) {
507 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
511 $dump_getopt_flags->{$opt} = $flag;
515 if ( defined($dump_options_category) ) {
517 %{$dump_options_category} = %{$roption_category};
520 if ( defined($dump_options_range) ) {
522 %{$dump_options_range} = %{$roption_range};
525 if ( defined($dump_abbreviations) ) {
527 %{$dump_abbreviations} = %{$rexpansion};
530 if ( defined($dump_options) ) {
532 %{$dump_options} = %{$rOpts};
535 return if ($quit_now);
537 # dump from command line
538 if ( $rOpts->{'dump-options'} ) {
539 dump_options( $rOpts, $roption_string );
543 check_options( $rOpts, $is_Windows, $Windows_type,
544 $rpending_complaint );
546 if ($user_formatter) {
547 $rOpts->{'format'} = 'user';
550 # there must be one entry here for every possible format
551 my %default_file_extension = (
557 # be sure we have a valid output format
558 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
559 my $formats = join ' ',
560 sort map { "'" . $_ . "'" } keys %default_file_extension;
561 my $fmt = $rOpts->{'format'};
562 die "-format='$fmt' but must be one of: $formats\n";
565 my $output_extension =
566 make_extension( $rOpts->{'output-file-extension'},
567 $default_file_extension{ $rOpts->{'format'} }, $dot );
569 my $backup_extension =
570 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
572 my $html_toc_extension =
573 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
575 my $html_src_extension =
576 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
578 # check for -b option;
579 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
580 && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
581 && @ARGV > 0; # silently ignore if standard input;
582 # this allows -b to be in a .perltidyrc file
583 # without error messages when running from an editor
585 # turn off -b with warnings in case of conflicts with other options
586 if ($in_place_modify) {
587 if ( $rOpts->{'standard-output'} ) {
588 warn "Ignoring -b; you may not use -b and -st together\n";
589 $in_place_modify = 0;
591 if ($destination_stream) {
593 "Ignoring -b; you may not specify a destination array and -b together\n";
594 $in_place_modify = 0;
596 if ($source_stream) {
598 "Ignoring -b; you may not specify a source array and -b together\n";
599 $in_place_modify = 0;
601 if ( $rOpts->{'outfile'} ) {
602 warn "Ignoring -b; you may not use -b and -o together\n";
603 $in_place_modify = 0;
605 if ( defined( $rOpts->{'output-path'} ) ) {
606 warn "Ignoring -b; you may not use -b and -opath together\n";
607 $in_place_modify = 0;
611 Perl::Tidy::Formatter::check_options($rOpts);
612 if ( $rOpts->{'format'} eq 'html' ) {
613 Perl::Tidy::HtmlWriter->check_options($rOpts);
616 # make the pattern of file extensions that we shouldn't touch
617 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
618 if ($output_extension) {
619 $_ = quotemeta($output_extension);
620 $forbidden_file_extensions .= "|$_";
622 if ( $in_place_modify && $backup_extension ) {
623 $_ = quotemeta($backup_extension);
624 $forbidden_file_extensions .= "|$_";
626 $forbidden_file_extensions .= ')$';
628 # Create a diagnostics object if requested;
629 # This is only useful for code development
630 my $diagnostics_object = undef;
631 if ( $rOpts->{'DIAGNOSTICS'} ) {
632 $diagnostics_object = Perl::Tidy::Diagnostics->new();
635 # no filenames should be given if input is from an array
636 if ($source_stream) {
639 "You may not specify any filenames when a source array is given\n";
642 # we'll stuff the source array into ARGV
643 unshift( @ARGV, $source_stream );
645 # No special treatment for source stream which is a filename.
646 # This will enable checks for binary files and other bad stuff.
647 $source_stream = undef unless ref($source_stream);
650 # use stdin by default if no source array and no args
652 unshift( @ARGV, '-' ) unless @ARGV;
655 # loop to process all files in argument list
656 my $number_of_files = @ARGV;
657 my $formatter = undef;
659 while ( $input_file = shift @ARGV ) {
661 my $input_file_permissions;
663 #---------------------------------------------------------------
664 # determine the input file name
665 #---------------------------------------------------------------
666 if ($source_stream) {
667 $fileroot = "perltidy";
669 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
670 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
671 $in_place_modify = 0;
674 $fileroot = $input_file;
675 unless ( -e $input_file ) {
677 # file doesn't exist - check for a file glob
678 if ( $input_file =~ /([\?\*\[\{])/ ) {
680 # Windows shell may not remove quotes, so do it
681 my $input_file = $input_file;
682 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
683 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
684 my $pattern = fileglob_to_re($input_file);
686 if ( !$@ && opendir( DIR, './' ) ) {
688 grep { /$pattern/ && !-d $_ } readdir(DIR);
691 unshift @ARGV, @files;
696 print "skipping file: '$input_file': no matches found\n";
700 unless ( -f $input_file ) {
701 print "skipping file: $input_file: not a regular file\n";
705 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
707 "skipping file: $input_file: Non-text (override with -f)\n";
711 # we should have a valid filename now
712 $fileroot = $input_file;
713 $input_file_permissions = ( stat $input_file )[2] & 07777;
715 if ( $^O eq 'VMS' ) {
716 ( $fileroot, $dot ) = check_vms_filename($fileroot);
719 # add option to change path here
720 if ( defined( $rOpts->{'output-path'} ) ) {
722 my ( $base, $old_path ) = fileparse($fileroot);
723 my $new_path = $rOpts->{'output-path'};
724 unless ( -d $new_path ) {
725 unless ( mkdir $new_path, 0777 ) {
726 die "unable to create directory $new_path: $!\n";
729 my $path = $new_path;
730 $fileroot = catfile( $path, $base );
733 ------------------------------------------------------------------------
734 Problem combining $new_path and $base to make a filename; check -opath
735 ------------------------------------------------------------------------
741 # Skip files with same extension as the output files because
742 # this can lead to a messy situation with files like
743 # script.tdy.tdy.tdy ... or worse problems ... when you
744 # rerun perltidy over and over with wildcard input.
747 && ( $input_file =~ /$forbidden_file_extensions/o
748 || $input_file eq 'DIAGNOSTICS' )
751 print "skipping file: $input_file: wrong extension\n";
755 # the 'source_object' supplies a method to read the input file
757 Perl::Tidy::LineSource->new( $input_file, $rOpts,
758 $rpending_logfile_message );
759 next unless ($source_object);
761 # register this file name with the Diagnostics package
762 $diagnostics_object->set_input_file($input_file)
763 if $diagnostics_object;
765 #---------------------------------------------------------------
766 # determine the output file name
767 #---------------------------------------------------------------
768 my $output_file = undef;
769 my $actual_output_extension;
771 if ( $rOpts->{'outfile'} ) {
773 if ( $number_of_files <= 1 ) {
775 if ( $rOpts->{'standard-output'} ) {
776 die "You may not use -o and -st together\n";
778 elsif ($destination_stream) {
780 "You may not specify a destination array and -o together\n";
782 elsif ( defined( $rOpts->{'output-path'} ) ) {
783 die "You may not specify -o and -opath together\n";
785 elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
786 die "You may not specify -o and -oext together\n";
788 $output_file = $rOpts->{outfile};
790 # make sure user gives a file name after -o
791 if ( $output_file =~ /^-/ ) {
792 die "You must specify a valid filename after -o\n";
795 # do not overwrite input file with -o
796 if ( defined($input_file_permissions)
797 && ( $output_file eq $input_file ) )
800 "Use 'perltidy -b $input_file' to modify in-place\n";
804 die "You may not use -o with more than one input file\n";
807 elsif ( $rOpts->{'standard-output'} ) {
808 if ($destination_stream) {
810 "You may not specify a destination array and -st together\n";
814 if ( $number_of_files <= 1 ) {
817 die "You may not use -st with more than one input file\n";
820 elsif ($destination_stream) {
821 $output_file = $destination_stream;
823 elsif ($source_stream) { # source but no destination goes to stdout
826 elsif ( $input_file eq '-' ) {
830 if ($in_place_modify) {
831 $output_file = IO::File->new_tmpfile()
832 or die "cannot open temp file for -b option: $!\n";
835 $actual_output_extension = $output_extension;
836 $output_file = $fileroot . $output_extension;
840 # the 'sink_object' knows how to write the output file
841 my $tee_file = $fileroot . $dot . "TEE";
843 my $line_separator = $rOpts->{'output-line-ending'};
844 if ( $rOpts->{'preserve-line-endings'} ) {
845 $line_separator = find_input_line_ending($input_file);
847 $line_separator = "\n" unless defined($line_separator);
850 Perl::Tidy::LineSink->new( $output_file, $tee_file,
851 $line_separator, $rOpts, $rpending_logfile_message );
853 #---------------------------------------------------------------
854 # initialize the error logger
855 #---------------------------------------------------------------
856 my $warning_file = $fileroot . $dot . "ERR";
857 if ($errorfile_stream) { $warning_file = $errorfile_stream }
858 my $log_file = $fileroot . $dot . "LOG";
859 if ($logfile_stream) { $log_file = $logfile_stream }
862 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
864 write_logfile_header(
865 $rOpts, $logger_object, $config_file,
866 $rraw_options, $Windows_type
868 if ($$rpending_logfile_message) {
869 $logger_object->write_logfile_entry($$rpending_logfile_message);
871 if ($$rpending_complaint) {
872 $logger_object->complain($$rpending_complaint);
875 #---------------------------------------------------------------
876 # initialize the debug object, if any
877 #---------------------------------------------------------------
878 my $debugger_object = undef;
879 if ( $rOpts->{DEBUG} ) {
881 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
884 #---------------------------------------------------------------
885 # create a formatter for this file : html writer or pretty printer
886 #---------------------------------------------------------------
888 # we have to delete any old formatter because, for safety,
889 # the formatter will check to see that there is only one.
892 if ($user_formatter) {
893 $formatter = $user_formatter;
895 elsif ( $rOpts->{'format'} eq 'html' ) {
897 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
898 $actual_output_extension, $html_toc_extension,
899 $html_src_extension );
901 elsif ( $rOpts->{'format'} eq 'tidy' ) {
902 $formatter = Perl::Tidy::Formatter->new(
903 logger_object => $logger_object,
904 diagnostics_object => $diagnostics_object,
905 sink_object => $sink_object,
909 die "I don't know how to do -format=$rOpts->{'format'}\n";
912 unless ($formatter) {
913 die "Unable to continue with $rOpts->{'format'} formatting\n";
916 #---------------------------------------------------------------
917 # create the tokenizer for this file
918 #---------------------------------------------------------------
919 $tokenizer = undef; # must destroy old tokenizer
920 $tokenizer = Perl::Tidy::Tokenizer->new(
921 source_object => $source_object,
922 logger_object => $logger_object,
923 debugger_object => $debugger_object,
924 diagnostics_object => $diagnostics_object,
925 starting_level => $rOpts->{'starting-indentation-level'},
926 tabs => $rOpts->{'tabs'},
927 indent_columns => $rOpts->{'indent-columns'},
928 look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
929 look_for_autoloader => $rOpts->{'look-for-autoloader'},
930 look_for_selfloader => $rOpts->{'look-for-selfloader'},
931 trim_qw => $rOpts->{'trim-qw'},
934 #---------------------------------------------------------------
936 #---------------------------------------------------------------
937 process_this_file( $tokenizer, $formatter );
939 #---------------------------------------------------------------
940 # close the input source and report errors
941 #---------------------------------------------------------------
942 $source_object->close_input_file();
944 # get file names to use for syntax check
945 my $ifname = $source_object->get_input_file_copy_name();
946 my $ofname = $sink_object->get_output_file_copy();
948 #---------------------------------------------------------------
949 # handle the -b option (backup and modify in-place)
950 #---------------------------------------------------------------
951 if ($in_place_modify) {
952 unless ( -f $input_file ) {
954 # oh, oh, no real file to backup ..
955 # shouldn't happen because of numerous preliminary checks
957 "problem with -b backing up input file '$input_file': not a file\n";
959 my $backup_name = $input_file . $backup_extension;
960 if ( -f $backup_name ) {
963 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
965 rename( $input_file, $backup_name )
967 "problem renaming $input_file to $backup_name for -b option: $!\n";
968 $ifname = $backup_name;
970 seek( $output_file, 0, 0 )
971 or die "unable to rewind tmp file for -b option: $!\n";
973 my $fout = IO::File->new("> $input_file")
975 "problem opening $input_file for write for -b option; check directory permissions: $!\n";
977 while ( $line = $output_file->getline() ) {
981 $output_file = $input_file;
982 $ofname = $input_file;
985 #---------------------------------------------------------------
986 # clean up and report errors
987 #---------------------------------------------------------------
988 $sink_object->close_output_file() if $sink_object;
989 $debugger_object->close_debug_file() if $debugger_object;
991 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
994 if ($input_file_permissions) {
996 # give output script same permissions as input script, but
997 # make it user-writable or else we can't run perltidy again.
998 # Thus we retain whatever executable flags were set.
999 if ( $rOpts->{'format'} eq 'tidy' ) {
1000 chmod( $input_file_permissions | 0600, $output_file );
1003 # else use default permissions for html and any other format
1006 if ( $logger_object && $rOpts->{'check-syntax'} ) {
1008 check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1012 $logger_object->finish( $infile_syntax_ok, $formatter )
1014 } # end of loop to process all files
1015 } # end of main program
1018 sub fileglob_to_re {
1020 # modified (corrected) from version in find2perl
1022 $x =~ s#([./^\$()])#\\$1#g; # escape special characters
1023 $x =~ s#\*#.*#g; # '*' -> '.*'
1024 $x =~ s#\?#.#g; # '?' -> '.'
1025 "^$x\\z"; # match whole word
1028 sub make_extension {
1030 # Make a file extension, including any leading '.' if necessary
1031 # The '.' may actually be an '_' under VMS
1032 my ( $extension, $default, $dot ) = @_;
1034 # Use the default if none specified
1035 $extension = $default unless ($extension);
1037 # Only extensions with these leading characters get a '.'
1038 # This rule gives the user some freedom
1039 if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1040 $extension = $dot . $extension;
1045 sub write_logfile_header {
1046 my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) =
1048 $logger_object->write_logfile_entry(
1049 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1051 if ($Windows_type) {
1052 $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1054 my $options_string = join( ' ', @$rraw_options );
1057 $logger_object->write_logfile_entry(
1058 "Found Configuration File >>> $config_file \n");
1060 $logger_object->write_logfile_entry(
1061 "Configuration and command line parameters for this run:\n");
1062 $logger_object->write_logfile_entry("$options_string\n");
1064 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1065 $rOpts->{'logfile'} = 1; # force logfile to be saved
1066 $logger_object->write_logfile_entry(
1067 "Final parameter set for this run\n");
1068 $logger_object->write_logfile_entry(
1069 "------------------------------------\n");
1071 foreach ( keys %{$rOpts} ) {
1072 $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" );
1074 $logger_object->write_logfile_entry(
1075 "------------------------------------\n");
1077 $logger_object->write_logfile_entry(
1078 "To find error messages search for 'WARNING' with your editor\n");
1081 sub generate_options {
1083 ######################################################################
1084 # Generate and return references to:
1085 # @option_string - the list of options to be passed to Getopt::Long
1086 # @defaults - the list of default options
1087 # %expansion - a hash showing how all abbreviations are expanded
1088 # %category - a hash giving the general category of each option
1089 # %option_range - a hash giving the valid ranges of certain options
1091 # Note: a few options are not documented in the man page and usage
1092 # message. This is because these are experimental or debug options and
1093 # may or may not be retained in future versions.
1095 # Here are the undocumented flags as far as I know. Any of them
1096 # may disappear at any time. They are mainly for fine-tuning
1099 # fll --> fuzzy-line-length # a trivial parameter which gets
1100 # turned off for the extrude option
1101 # which is mainly for debugging
1102 # chk --> check-multiline-quotes # check for old bug; to be deleted
1103 # scl --> short-concatenation-item-length # helps break at '.'
1104 # recombine # for debugging line breaks
1105 # I --> DIAGNOSTICS # for debugging
1106 ######################################################################
1108 # here is a summary of the Getopt codes:
1109 # <none> does not take an argument
1110 # =s takes a mandatory string
1111 # :s takes an optional string (DO NOT USE - filenames will get eaten up)
1112 # =i takes a mandatory integer
1113 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1114 # ! does not take an argument and may be negated
1115 # i.e., -foo and -nofoo are allowed
1116 # a double dash signals the end of the options list
1118 #---------------------------------------------------------------
1119 # Define the option string passed to GetOptions.
1120 #---------------------------------------------------------------
1122 my @option_string = ();
1124 my %option_category = ();
1125 my %option_range = ();
1126 my $rexpansion = \%expansion;
1128 # names of categories in manual
1129 # leading integers will allow sorting
1130 my @category_name = (
1132 '1. Basic formatting options',
1133 '2. Code indentation control',
1134 '3. Whitespace control',
1135 '4. Comment controls',
1136 '5. Linebreak controls',
1137 '6. Controlling list formatting',
1138 '7. Retaining or ignoring existing line breaks',
1139 '8. Blank line control',
1140 '9. Other controls',
1142 '11. pod2html options',
1143 '12. Controlling HTML properties',
1147 # These options are parsed directly by perltidy:
1150 # However, they are included in the option set so that they will
1151 # be seen in the options dump.
1153 # These long option names have no abbreviations or are treated specially
1154 @option_string = qw(
1162 my $category = 13; # Debugging
1163 foreach (@option_string) {
1164 my $opt = $_; # must avoid changing the actual flag
1166 $option_category{$opt} = $category_name[$category];
1169 $category = 11; # HTML
1170 $option_category{html} = $category_name[$category];
1172 # routine to install and check options
1173 my $add_option = sub {
1174 my ( $long_name, $short_name, $flag ) = @_;
1175 push @option_string, $long_name . $flag;
1176 $option_category{$long_name} = $category_name[$category];
1178 if ( $expansion{$short_name} ) {
1179 my $existing_name = $expansion{$short_name}[0];
1181 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1183 $expansion{$short_name} = [$long_name];
1184 if ( $flag eq '!' ) {
1185 my $nshort_name = 'n' . $short_name;
1186 my $nolong_name = 'no' . $long_name;
1187 if ( $expansion{$nshort_name} ) {
1188 my $existing_name = $expansion{$nshort_name}[0];
1190 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1192 $expansion{$nshort_name} = [$nolong_name];
1197 # Install long option names which have a simple abbreviation.
1198 # Options with code '!' get standard negation ('no' for long names,
1199 # 'n' for abbreviations). Categories follow the manual.
1201 ###########################
1202 $category = 0; # I/O_Control
1203 ###########################
1204 $add_option->( 'backup-and-modify-in-place', 'b', '!' );
1205 $add_option->( 'backup-file-extension', 'bext', '=s' );
1206 $add_option->( 'force-read-binary', 'f', '!' );
1207 $add_option->( 'format', 'fmt', '=s' );
1208 $add_option->( 'logfile', 'log', '!' );
1209 $add_option->( 'logfile-gap', 'g', ':i' );
1210 $add_option->( 'outfile', 'o', '=s' );
1211 $add_option->( 'output-file-extension', 'oext', '=s' );
1212 $add_option->( 'output-path', 'opath', '=s' );
1213 $add_option->( 'profile', 'pro', '=s' );
1214 $add_option->( 'quiet', 'q', '!' );
1215 $add_option->( 'standard-error-output', 'se', '!' );
1216 $add_option->( 'standard-output', 'st', '!' );
1217 $add_option->( 'warning-output', 'w', '!' );
1219 ########################################
1220 $category = 1; # Basic formatting options
1221 ########################################
1222 $add_option->( 'check-syntax', 'syn', '!' );
1223 $add_option->( 'entab-leading-whitespace', 'et', '=i' );
1224 $add_option->( 'indent-columns', 'i', '=i' );
1225 $add_option->( 'maximum-line-length', 'l', '=i' );
1226 $add_option->( 'output-line-ending', 'ole', '=s' );
1227 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
1228 $add_option->( 'preserve-line-endings', 'ple', '!' );
1229 $add_option->( 'tabs', 't', '!' );
1231 ########################################
1232 $category = 2; # Code indentation control
1233 ########################################
1234 $add_option->( 'continuation-indentation', 'ci', '=i' );
1235 $add_option->( 'starting-indentation-level', 'sil', '=i' );
1236 $add_option->( 'line-up-parentheses', 'lp', '!' );
1237 $add_option->( 'outdent-keyword-list', 'okwl', '=s' );
1238 $add_option->( 'outdent-keywords', 'okw', '!' );
1239 $add_option->( 'outdent-labels', 'ola', '!' );
1240 $add_option->( 'outdent-long-quotes', 'olq', '!' );
1241 $add_option->( 'indent-closing-brace', 'icb', '!' );
1242 $add_option->( 'closing-token-indentation', 'cti', '=i' );
1243 $add_option->( 'closing-paren-indentation', 'cpi', '=i' );
1244 $add_option->( 'closing-brace-indentation', 'cbi', '=i' );
1245 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1246 $add_option->( 'brace-left-and-indent', 'bli', '!' );
1247 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
1249 ########################################
1250 $category = 3; # Whitespace control
1251 ########################################
1252 $add_option->( 'add-semicolons', 'asc', '!' );
1253 $add_option->( 'add-whitespace', 'aws', '!' );
1254 $add_option->( 'block-brace-tightness', 'bbt', '=i' );
1255 $add_option->( 'brace-tightness', 'bt', '=i' );
1256 $add_option->( 'delete-old-whitespace', 'dws', '!' );
1257 $add_option->( 'delete-semicolons', 'dsm', '!' );
1258 $add_option->( 'nospace-after-keyword', 'nsak', '=s' );
1259 $add_option->( 'nowant-left-space', 'nwls', '=s' );
1260 $add_option->( 'nowant-right-space', 'nwrs', '=s' );
1261 $add_option->( 'paren-tightness', 'pt', '=i' );
1262 $add_option->( 'space-after-keyword', 'sak', '=s' );
1263 $add_option->( 'space-for-semicolon', 'sfs', '!' );
1264 $add_option->( 'space-function-paren', 'sfp', '!' );
1265 $add_option->( 'space-keyword-paren', 'skp', '!' );
1266 $add_option->( 'space-terminal-semicolon', 'sts', '!' );
1267 $add_option->( 'square-bracket-tightness', 'sbt', '=i' );
1268 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
1269 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1270 $add_option->( 'trim-qw', 'tqw', '!' );
1271 $add_option->( 'want-left-space', 'wls', '=s' );
1272 $add_option->( 'want-right-space', 'wrs', '=s' );
1274 ########################################
1275 $category = 4; # Comment controls
1276 ########################################
1277 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
1278 $add_option->( 'closing-side-comment-interval', 'csci', '=i' );
1279 $add_option->( 'closing-side-comment-list', 'cscl', '=s' );
1280 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1281 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
1282 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
1283 $add_option->( 'closing-side-comments', 'csc', '!' );
1284 $add_option->( 'format-skipping', 'fs', '!' );
1285 $add_option->( 'format-skipping-begin', 'fsb', '=s' );
1286 $add_option->( 'format-skipping-end', 'fse', '=s' );
1287 $add_option->( 'hanging-side-comments', 'hsc', '!' );
1288 $add_option->( 'indent-block-comments', 'ibc', '!' );
1289 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
1290 $add_option->( 'minimum-space-to-comment', 'msc', '=i' );
1291 $add_option->( 'outdent-long-comments', 'olc', '!' );
1292 $add_option->( 'outdent-static-block-comments', 'osbc', '!' );
1293 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
1294 $add_option->( 'static-block-comments', 'sbc', '!' );
1295 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
1296 $add_option->( 'static-side-comments', 'ssc', '!' );
1298 ########################################
1299 $category = 5; # Linebreak controls
1300 ########################################
1301 $add_option->( 'add-newlines', 'anl', '!' );
1302 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
1303 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1304 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
1305 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
1306 $add_option->( 'cuddled-else', 'ce', '!' );
1307 $add_option->( 'delete-old-newlines', 'dnl', '!' );
1308 $add_option->( 'opening-brace-always-on-right', 'bar', '' );
1309 $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
1310 $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
1311 $add_option->( 'opening-paren-right', 'opr', '!' );
1312 $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
1313 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
1314 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
1315 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
1316 $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
1317 $add_option->( 'stack-closing-paren', 'scp', '!' );
1318 $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
1319 $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
1320 $add_option->( 'stack-opening-paren', 'sop', '!' );
1321 $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
1322 $add_option->( 'vertical-tightness', 'vt', '=i' );
1323 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
1324 $add_option->( 'want-break-after', 'wba', '=s' );
1325 $add_option->( 'want-break-before', 'wbb', '=s' );
1327 ########################################
1328 $category = 6; # Controlling list formatting
1329 ########################################
1330 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1331 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
1332 $add_option->( 'maximum-fields-per-table', 'mft', '=i' );
1334 ########################################
1335 $category = 7; # Retaining or ignoring existing line breaks
1336 ########################################
1337 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1338 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1339 $add_option->( 'break-at-old-trinary-breakpoints', 'bot', '!' );
1340 $add_option->( 'ignore-old-breakpoints', 'iob', '!' );
1342 ########################################
1343 $category = 8; # Blank line control
1344 ########################################
1345 $add_option->( 'blanks-before-blocks', 'bbb', '!' );
1346 $add_option->( 'blanks-before-comments', 'bbc', '!' );
1347 $add_option->( 'blanks-before-subs', 'bbs', '!' );
1348 $add_option->( 'long-block-line-count', 'lbl', '=i' );
1349 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1350 $add_option->( 'swallow-optional-blank-lines', 'sob', '!' );
1352 ########################################
1353 $category = 9; # Other controls
1354 ########################################
1355 $add_option->( 'delete-block-comments', 'dbc', '!' );
1356 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1357 $add_option->( 'delete-pod', 'dp', '!' );
1358 $add_option->( 'delete-side-comments', 'dsc', '!' );
1359 $add_option->( 'tee-block-comments', 'tbc', '!' );
1360 $add_option->( 'tee-pod', 'tp', '!' );
1361 $add_option->( 'tee-side-comments', 'tsc', '!' );
1362 $add_option->( 'look-for-autoloader', 'lal', '!' );
1363 $add_option->( 'look-for-hash-bang', 'x', '!' );
1364 $add_option->( 'look-for-selfloader', 'lsl', '!' );
1365 $add_option->( 'pass-version-line', 'pvl', '!' );
1367 ########################################
1368 $category = 13; # Debugging
1369 ########################################
1370 $add_option->( 'DEBUG', 'D', '!' );
1371 $add_option->( 'DIAGNOSTICS', 'I', '!' );
1372 $add_option->( 'check-multiline-quotes', 'chk', '!' );
1373 $add_option->( 'dump-defaults', 'ddf', '!' );
1374 $add_option->( 'dump-long-names', 'dln', '!' );
1375 $add_option->( 'dump-options', 'dop', '!' );
1376 $add_option->( 'dump-profile', 'dpro', '!' );
1377 $add_option->( 'dump-short-names', 'dsn', '!' );
1378 $add_option->( 'dump-token-types', 'dtt', '!' );
1379 $add_option->( 'dump-want-left-space', 'dwls', '!' );
1380 $add_option->( 'dump-want-right-space', 'dwrs', '!' );
1381 $add_option->( 'fuzzy-line-length', 'fll', '!' );
1382 $add_option->( 'help', 'h', '' );
1383 $add_option->( 'short-concatenation-item-length', 'scl', '=i' );
1384 $add_option->( 'show-options', 'opt', '!' );
1385 $add_option->( 'version', 'v', '' );
1387 #---------------------------------------------------------------------
1389 # The Perl::Tidy::HtmlWriter will add its own options to the string
1390 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1392 ########################################
1393 # Set categories 10, 11, 12
1394 ########################################
1395 # Based on their known order
1396 $category = 12; # HTML properties
1397 foreach my $opt (@option_string) {
1398 my $long_name = $opt;
1399 $long_name =~ s/(!|=.*|:.*)$//;
1400 unless ( defined( $option_category{$long_name} ) ) {
1401 if ( $long_name =~ /^html-linked/ ) {
1402 $category = 10; # HTML options
1404 elsif ( $long_name =~ /^pod2html/ ) {
1405 $category = 11; # Pod2html
1407 $option_category{$long_name} = $category_name[$category];
1411 #---------------------------------------------------------------
1412 # Assign valid ranges to certain options
1413 #---------------------------------------------------------------
1414 # In the future, these may be used to make preliminary checks
1415 # hash keys are long names
1416 # If key or value is undefined:
1417 # strings may have any value
1418 # integer ranges are >=0
1419 # If value is defined:
1420 # value is [qw(any valid words)] for strings
1421 # value is [min, max] for integers
1422 # if min is undefined, there is no lower limit
1423 # if max is undefined, there is no upper limit
1424 # Parameters not listed here have defaults
1425 $option_range{'format'} = [qw(tidy html user)];
1426 $option_range{'output-line-ending'} = [qw(dos win mac unix)];
1428 $option_range{'block-brace-tightness'} = [ 0, 2 ];
1429 $option_range{'brace-tightness'} = [ 0, 2 ];
1430 $option_range{'paren-tightness'} = [ 0, 2 ];
1431 $option_range{'square-bracket-tightness'} = [ 0, 2 ];
1433 $option_range{'block-brace-vertical-tightness'} = [ 0, 2 ];
1434 $option_range{'brace-vertical-tightness'} = [ 0, 2 ];
1435 $option_range{'brace-vertical-tightness-closing'} = [ 0, 2 ];
1436 $option_range{'paren-vertical-tightness'} = [ 0, 2 ];
1437 $option_range{'paren-vertical-tightness-closing'} = [ 0, 2 ];
1438 $option_range{'square-bracket-vertical-tightness'} = [ 0, 2 ];
1439 $option_range{'square-bracket-vertical-tightness-closing'} = [ 0, 2 ];
1440 $option_range{'vertical-tightness'} = [ 0, 2 ];
1441 $option_range{'vertical-tightness-closing'} = [ 0, 2 ];
1443 $option_range{'closing-brace-indentation'} = [ 0, 3 ];
1444 $option_range{'closing-paren-indentation'} = [ 0, 3 ];
1445 $option_range{'closing-square-bracket-indentation'} = [ 0, 3 ];
1446 $option_range{'closing-token-indentation'} = [ 0, 3 ];
1448 $option_range{'closing-side-comment-else-flag'} = [ 0, 2 ];
1449 $option_range{'comma-arrow-breakpoints'} = [ 0, 3 ];
1451 # Note: we could actually allow negative ci if someone really wants it:
1452 # $option_range{'continuation-indentation'} = [ undef, undef ];
1454 #---------------------------------------------------------------
1455 # Assign default values to the above options here, except
1456 # for 'outfile' and 'help'.
1457 # These settings should approximate the perlstyle(1) suggestions.
1458 #---------------------------------------------------------------
1463 blanks-before-blocks
1464 blanks-before-comments
1466 block-brace-tightness=0
1467 block-brace-vertical-tightness=0
1469 brace-vertical-tightness-closing=0
1470 brace-vertical-tightness=0
1471 break-at-old-logical-breakpoints
1472 break-at-old-trinary-breakpoints
1473 break-at-old-keyword-breakpoints
1474 comma-arrow-breakpoints=1
1476 closing-side-comment-interval=6
1477 closing-side-comment-maximum-text=20
1478 closing-side-comment-else-flag=0
1479 closing-paren-indentation=0
1480 closing-brace-indentation=0
1481 closing-square-bracket-indentation=0
1482 continuation-indentation=2
1486 hanging-side-comments
1487 indent-block-comments
1489 long-block-line-count=8
1492 maximum-consecutive-blank-lines=1
1493 maximum-fields-per-table=0
1494 maximum-line-length=80
1495 minimum-space-to-comment=4
1496 nobrace-left-and-indent
1498 nodelete-old-whitespace
1503 nostatic-side-comments
1504 noswallow-optional-blank-lines
1509 outdent-long-comments
1511 paren-vertical-tightness-closing=0
1512 paren-vertical-tightness=0
1515 short-concatenation-item-length=8
1517 square-bracket-tightness=1
1518 square-bracket-vertical-tightness-closing=0
1519 square-bracket-vertical-tightness=0
1520 static-block-comments
1523 backup-file-extension=bak
1527 html-table-of-contents
1531 push @defaults, "perl-syntax-check-flags=-c -T";
1533 #---------------------------------------------------------------
1534 # Define abbreviations which will be expanded into the above primitives.
1535 # These may be defined recursively.
1536 #---------------------------------------------------------------
1539 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
1540 'fnl' => [qw(freeze-newlines)],
1541 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1542 'fws' => [qw(freeze-whitespace)],
1543 'indent-only' => [qw(freeze-newlines freeze-whitespace)],
1544 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1545 'nooutdent-long-lines' =>
1546 [qw(nooutdent-long-quotes nooutdent-long-comments)],
1547 'noll' => [qw(nooutdent-long-lines)],
1548 'io' => [qw(indent-only)],
1549 'delete-all-comments' =>
1550 [qw(delete-block-comments delete-side-comments delete-pod)],
1551 'nodelete-all-comments' =>
1552 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1553 'dac' => [qw(delete-all-comments)],
1554 'ndac' => [qw(nodelete-all-comments)],
1555 'gnu' => [qw(gnu-style)],
1556 'tee-all-comments' =>
1557 [qw(tee-block-comments tee-side-comments tee-pod)],
1558 'notee-all-comments' =>
1559 [qw(notee-block-comments notee-side-comments notee-pod)],
1560 'tac' => [qw(tee-all-comments)],
1561 'ntac' => [qw(notee-all-comments)],
1562 'html' => [qw(format=html)],
1563 'nhtml' => [qw(format=tidy)],
1564 'tidy' => [qw(format=tidy)],
1566 'break-after-comma-arrows' => [qw(cab=0)],
1567 'nobreak-after-comma-arrows' => [qw(cab=1)],
1568 'baa' => [qw(cab=0)],
1569 'nbaa' => [qw(cab=1)],
1571 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1572 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1573 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1574 'icp' => [qw(cpi=2 cbi=2 csbi=2)],
1575 'nicp' => [qw(cpi=0 cbi=0 csbi=0)],
1577 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1578 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1579 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1580 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],
1581 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],
1583 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1584 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1585 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1587 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1588 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1589 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1591 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1592 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1593 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1595 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1596 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1597 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1599 'otr' => [qw(opr ohbr osbr)],
1600 'opening-token-right' => [qw(opr ohbr osbr)],
1601 'notr' => [qw(nopr nohbr nosbr)],
1602 'noopening-token-right' => [qw(nopr nohbr nosbr)],
1604 'sot' => [qw(sop sohb sosb)],
1605 'nsot' => [qw(nsop nsohb nsosb)],
1606 'stack-opening-tokens' => [qw(sop sohb sosb)],
1607 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1609 'sct' => [qw(scp schb scsb)],
1610 'stack-closing-tokens' => => [qw(scp schb scsb)],
1611 'nsct' => [qw(nscp nschb nscsb)],
1612 'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1614 # 'mangle' originally deleted pod and comments, but to keep it
1615 # reversible, it no longer does. But if you really want to
1616 # delete them, just use:
1619 # An interesting use for 'mangle' is to do this:
1620 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
1621 # which will form as many one-line blocks as possible
1627 delete-old-whitespace
1630 maximum-consecutive-blank-lines=0
1631 maximum-line-length=100000
1635 noblanks-before-blocks
1636 noblanks-before-subs
1641 # 'extrude' originally deleted pod and comments, but to keep it
1642 # reversible, it no longer does. But if you really want to
1643 # delete them, just use
1646 # An interesting use for 'extrude' is to do this:
1647 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
1648 # which will break up all one-line blocks.
1655 delete-old-whitespace
1658 maximum-consecutive-blank-lines=0
1659 maximum-line-length=1
1662 noblanks-before-blocks
1663 noblanks-before-subs
1669 # this style tries to follow the GNU Coding Standards (which do
1670 # not really apply to perl but which are followed by some perl
1674 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
1678 # Additional styles can be added here
1681 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
1683 # Uncomment next line to dump all expansions for debugging:
1684 # dump_short_names(\%expansion);
1686 \@option_string, \@defaults, \%expansion,
1687 \%option_category, \%option_range
1690 } # end of generate_options
1692 sub process_command_line {
1695 $perltidyrc_stream, $is_Windows, $Windows_type,
1696 $rpending_complaint, $dump_options_type
1702 $roption_string, $rdefaults, $rexpansion,
1703 $roption_category, $roption_range
1704 ) = generate_options();
1706 #---------------------------------------------------------------
1707 # set the defaults by passing the above list through GetOptions
1708 #---------------------------------------------------------------
1714 # do not load the defaults if we are just dumping perltidyrc
1715 unless ( $dump_options_type eq 'perltidyrc' ) {
1716 for $i (@$rdefaults) { push @ARGV, "--" . $i }
1719 # Patch to save users Getopt::Long configuration
1720 # and set to Getopt::Long defaults. Use eval to avoid
1721 # breaking old versions of Perl without these routines.
1723 eval { $glc = Getopt::Long::Configure() };
1725 eval { Getopt::Long::ConfigDefaults() };
1727 else { $glc = undef }
1729 if ( !GetOptions( \%Opts, @$roption_string ) ) {
1730 die "Programming Bug: error in setting default options";
1733 # Patch to put the previous Getopt::Long configuration back
1734 eval { Getopt::Long::Configure($glc) } if defined $glc;
1738 my @raw_options = ();
1739 my $config_file = "";
1740 my $saw_ignore_profile = 0;
1741 my $saw_extrude = 0;
1742 my $saw_dump_profile = 0;
1745 #---------------------------------------------------------------
1746 # Take a first look at the command-line parameters. Do as many
1747 # immediate dumps as possible, which can avoid confusion if the
1748 # perltidyrc file has an error.
1749 #---------------------------------------------------------------
1750 foreach $i (@ARGV) {
1753 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
1754 $saw_ignore_profile = 1;
1757 # note: this must come before -pro and -profile, below:
1758 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
1759 $saw_dump_profile = 1;
1761 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
1764 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
1767 unless ( -e $config_file ) {
1768 warn "cannot find file given with -pro=$config_file: $!\n";
1772 elsif ( $i =~ /^-(pro|profile)=?$/ ) {
1773 die "usage: -pro=filename or --profile=filename, no spaces\n";
1775 elsif ( $i =~ /^-extrude$/ ) {
1778 elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
1782 elsif ( $i =~ /^-(version|v)$/ ) {
1786 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
1787 dump_defaults(@$rdefaults);
1790 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
1791 dump_long_names(@$roption_string);
1794 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
1795 dump_short_names($rexpansion);
1798 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
1799 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
1804 if ( $saw_dump_profile && $saw_ignore_profile ) {
1805 warn "No profile to dump because of -npro\n";
1809 #---------------------------------------------------------------
1810 # read any .perltidyrc configuration file
1811 #---------------------------------------------------------------
1812 unless ($saw_ignore_profile) {
1814 # resolve possible conflict between $perltidyrc_stream passed
1815 # as call parameter to perltidy and -pro=filename on command
1817 if ($perltidyrc_stream) {
1820 Conflict: a perltidyrc configuration file was specified both as this
1821 perltidy call parameter: $perltidyrc_stream
1822 and with this -profile=$config_file.
1823 Using -profile=$config_file.
1827 $config_file = $perltidyrc_stream;
1831 # look for a config file if we don't have one yet
1832 my $rconfig_file_chatter;
1833 $$rconfig_file_chatter = "";
1835 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
1836 $rpending_complaint )
1837 unless $config_file;
1839 # open any config file
1842 ( $fh_config, $config_file ) =
1843 Perl::Tidy::streamhandle( $config_file, 'r' );
1844 unless ($fh_config) {
1845 $$rconfig_file_chatter .=
1846 "# $config_file exists but cannot be opened\n";
1850 if ($saw_dump_profile) {
1851 if ($saw_dump_profile) {
1852 dump_config_file( $fh_config, $config_file,
1853 $rconfig_file_chatter );
1860 my ( $rconfig_list, $death_message ) =
1861 read_config_file( $fh_config, $config_file, $rexpansion );
1862 die $death_message if ($death_message);
1864 # process any .perltidyrc parameters right now so we can
1866 if (@$rconfig_list) {
1867 local @ARGV = @$rconfig_list;
1869 expand_command_abbreviations( $rexpansion, \@raw_options,
1872 if ( !GetOptions( \%Opts, @$roption_string ) ) {
1874 "Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
1877 # Anything left in this local @ARGV is an error and must be
1878 # invalid bare words from the configuration file. We cannot
1879 # check this earlier because bare words may have been valid
1880 # values for parameters. We had to wait for GetOptions to have
1884 my $str = "\'" . pop(@ARGV) . "\'";
1885 while ( my $param = pop(@ARGV) ) {
1886 if ( length($str) < 70 ) {
1887 $str .= ", '$param'";
1895 There are $count unrecognized values in the configuration file '$config_file':
1897 Use leading dashes for parameters. Use -npro to ignore this file.
1901 # Undo any options which cause premature exit. They are not
1902 # appropriate for a config file, and it could be hard to
1903 # diagnose the cause of the premature exit.
1912 dump-want-left-space
1913 dump-want-right-space
1920 if ( defined( $Opts{$_} ) ) {
1922 warn "ignoring --$_ in config file: $config_file\n";
1929 #---------------------------------------------------------------
1930 # now process the command line parameters
1931 #---------------------------------------------------------------
1932 expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
1934 if ( !GetOptions( \%Opts, @$roption_string ) ) {
1935 die "Error on command line; for help try 'perltidy -h'\n";
1938 return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
1939 $rexpansion, $roption_category, $roption_range );
1940 } # end of process_command_line
1944 my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
1946 #---------------------------------------------------------------
1947 # check and handle any interactions among the basic options..
1948 #---------------------------------------------------------------
1950 # Since -vt, -vtc, and -cti are abbreviations, but under
1951 # msdos, an unquoted input parameter like vtc=1 will be
1952 # seen as 2 parameters, vtc and 1, so the abbreviations
1953 # won't be seen. Therefore, we will catch them here if
1956 if ( defined $rOpts->{'vertical-tightness'} ) {
1957 my $vt = $rOpts->{'vertical-tightness'};
1958 $rOpts->{'paren-vertical-tightness'} = $vt;
1959 $rOpts->{'square-bracket-vertical-tightness'} = $vt;
1960 $rOpts->{'brace-vertical-tightness'} = $vt;
1963 if ( defined $rOpts->{'vertical-tightness-closing'} ) {
1964 my $vtc = $rOpts->{'vertical-tightness-closing'};
1965 $rOpts->{'paren-vertical-tightness-closing'} = $vtc;
1966 $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
1967 $rOpts->{'brace-vertical-tightness-closing'} = $vtc;
1970 if ( defined $rOpts->{'closing-token-indentation'} ) {
1971 my $cti = $rOpts->{'closing-token-indentation'};
1972 $rOpts->{'closing-square-bracket-indentation'} = $cti;
1973 $rOpts->{'closing-brace-indentation'} = $cti;
1974 $rOpts->{'closing-paren-indentation'} = $cti;
1977 # In quiet mode, there is no log file and hence no way to report
1978 # results of syntax check, so don't do it.
1979 if ( $rOpts->{'quiet'} ) {
1980 $rOpts->{'check-syntax'} = 0;
1983 # can't check syntax if no output
1984 if ( $rOpts->{'format'} ne 'tidy' ) {
1985 $rOpts->{'check-syntax'} = 0;
1988 # Never let Windows 9x/Me systems run syntax check -- this will prevent a
1989 # wide variety of nasty problems on these systems, because they cannot
1990 # reliably run backticks. Don't even think about changing this!
1991 if ( $rOpts->{'check-syntax'}
1993 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
1995 $rOpts->{'check-syntax'} = 0;
1998 # It's really a bad idea to check syntax as root unless you wrote
1999 # the script yourself. FIXME: not sure if this works with VMS
2000 unless ($is_Windows) {
2002 if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2003 $rOpts->{'check-syntax'} = 0;
2004 $$rpending_complaint .=
2005 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2009 # see if user set a non-negative logfile-gap
2010 if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2012 # a zero gap will be taken as a 1
2013 if ( $rOpts->{'logfile-gap'} == 0 ) {
2014 $rOpts->{'logfile-gap'} = 1;
2017 # setting a non-negative logfile gap causes logfile to be saved
2018 $rOpts->{'logfile'} = 1;
2021 # not setting logfile gap, or setting it negative, causes default of 50
2023 $rOpts->{'logfile-gap'} = 50;
2026 # set short-cut flag when only indentation is to be done.
2027 # Note that the user may or may not have already set the
2029 if ( !$rOpts->{'add-whitespace'}
2030 && !$rOpts->{'delete-old-whitespace'}
2031 && !$rOpts->{'add-newlines'}
2032 && !$rOpts->{'delete-old-newlines'} )
2034 $rOpts->{'indent-only'} = 1;
2037 # -isbc implies -ibc
2038 if ( $rOpts->{'indent-spaced-block-comments'} ) {
2039 $rOpts->{'indent-block-comments'} = 1;
2042 # -bli flag implies -bl
2043 if ( $rOpts->{'brace-left-and-indent'} ) {
2044 $rOpts->{'opening-brace-on-new-line'} = 1;
2047 if ( $rOpts->{'opening-brace-always-on-right'}
2048 && $rOpts->{'opening-brace-on-new-line'} )
2051 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2052 'opening-brace-on-new-line' (-bl). Ignoring -bl.
2054 $rOpts->{'opening-brace-on-new-line'} = 0;
2057 # it simplifies things if -bl is 0 rather than undefined
2058 if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2059 $rOpts->{'opening-brace-on-new-line'} = 0;
2062 # -sbl defaults to -bl if not defined
2063 if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2064 $rOpts->{'opening-sub-brace-on-new-line'} =
2065 $rOpts->{'opening-brace-on-new-line'};
2068 # set shortcut flag if no blanks to be written
2069 unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) {
2070 $rOpts->{'swallow-optional-blank-lines'} = 1;
2073 if ( $rOpts->{'entab-leading-whitespace'} ) {
2074 if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2075 warn "-et=n must use a positive integer; ignoring -et\n";
2076 $rOpts->{'entab-leading-whitespace'} = undef;
2079 # entab leading whitespace has priority over the older 'tabs' option
2080 if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2083 if ( $rOpts->{'output-line-ending'} ) {
2084 unless ( is_unix() ) {
2085 warn "ignoring -ole; only works under unix\n";
2086 $rOpts->{'output-line-ending'} = undef;
2089 if ( $rOpts->{'preserve-line-endings'} ) {
2090 unless ( is_unix() ) {
2091 warn "ignoring -ple; only works under unix\n";
2092 $rOpts->{'preserve-line-endings'} = undef;
2098 sub expand_command_abbreviations {
2100 # go through @ARGV and expand any abbreviations
2102 my ( $rexpansion, $rraw_options, $config_file ) = @_;
2105 # set a pass limit to prevent an infinite loop;
2106 # 10 should be plenty, but it may be increased to allow deeply
2107 # nested expansions.
2108 my $max_passes = 10;
2111 # keep looping until all expansions have been converted into actual
2113 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2115 my $abbrev_count = 0;
2117 # loop over each item in @ARGV..
2118 foreach $word (@ARGV) {
2120 # convert any leading 'no-' to just 'no'
2121 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2123 # if it is a dash flag (instead of a file name)..
2124 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2129 # save the raw input for debug output in case of circular refs
2130 if ( $pass_count == 0 ) {
2131 push( @$rraw_options, $word );
2134 # recombine abbreviation and flag, if necessary,
2135 # to allow abbreviations with arguments such as '-vt=1'
2136 if ( $rexpansion->{ $abr . $flags } ) {
2137 $abr = $abr . $flags;
2141 # if we see this dash item in the expansion hash..
2142 if ( $rexpansion->{$abr} ) {
2145 # stuff all of the words that it expands to into the
2146 # new arg list for the next pass
2147 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2148 next unless $abbrev; # for safety; shouldn't happen
2149 push( @new_argv, '--' . $abbrev . $flags );
2153 # not in expansion hash, must be actual long name
2155 push( @new_argv, $word );
2159 # not a dash item, so just save it for the next pass
2161 push( @new_argv, $word );
2163 } # end of this pass
2165 # update parameter list @ARGV to the new one
2167 last unless ( $abbrev_count > 0 );
2169 # make sure we are not in an infinite loop
2170 if ( $pass_count == $max_passes ) {
2172 "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
2173 print STDERR "Here are the raw options\n";
2175 print STDERR "(@$rraw_options)\n";
2176 my $num = @new_argv;
2179 print STDERR "After $max_passes passes here is ARGV\n";
2180 print STDERR "(@new_argv)\n";
2183 print STDERR "After $max_passes passes ARGV has $num entries\n";
2188 Please check your configuration file $config_file for circular-references.
2189 To deactivate it, use -npro.
2194 Program bug - circular-references in the %expansion hash, probably due to
2195 a recent program change.
2198 } # end of check for circular references
2199 } # end of loop over all passes
2202 # Debug routine -- this will dump the expansion hash
2203 sub dump_short_names {
2204 my $rexpansion = shift;
2206 List of short names. This list shows how all abbreviations are
2207 translated into other abbreviations and, eventually, into long names.
2208 New abbreviations may be defined in a .perltidyrc file.
2209 For a list of all long names, use perltidy --dump-long-names (-dln).
2210 --------------------------------------------------------------------------
2212 foreach my $abbrev ( sort keys %$rexpansion ) {
2213 my @list = @{ $$rexpansion{$abbrev} };
2214 print STDOUT "$abbrev --> @list\n";
2218 sub check_vms_filename {
2220 # given a valid filename (the perltidy input file)
2221 # create a modified filename and separator character
2224 # Contributed by Michael Cartmell
2226 my ( $base, $path ) = fileparse( $_[0] );
2228 # remove explicit ; version
2229 $base =~ s/;-?\d*$//
2231 # remove explicit . version ie two dots in filename NB ^ escapes a dot
2232 or $base =~ s/( # begin capture $1
2233 (?:^|[^^])\. # match a dot not preceded by a caret
2234 (?: # followed by nothing
2236 .*[^^] # anything ending in a non caret
2239 \.-?\d*$ # match . version number
2242 # normalise filename, if there are no unescaped dots then append one
2243 $base .= '.' unless $base =~ /(?:^|[^^])\./;
2245 # if we don't already have an extension then we just append the extention
2246 my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2247 return ( $path . $base, $separator );
2252 # TODO: are these more standard names?
2253 # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2255 # Returns a string that determines what MS OS we are on.
2256 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2257 # Returns blank string if not an MS system.
2258 # Original code contributed by: Yves Orton
2259 # We need to know this to decide where to look for config files
2261 my $rpending_complaint = shift;
2263 return $os unless $^O =~ /win32|dos/i; # is it a MS box?
2265 # Systems built from Perl source may not have Win32.pm
2266 # But probably have Win32::GetOSVersion() anyway so the
2267 # following line is not 'required':
2268 # return $os unless eval('require Win32');
2270 # Use the standard API call to determine the version
2271 my ( $undef, $major, $minor, $build, $id );
2272 eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2275 # NAME ID MAJOR MINOR
2276 # Windows NT 4 2 4 0
2277 # Windows 2000 2 5 0
2279 # Windows Server 2003 2 5 2
2281 return "win32s" unless $id; # If id==0 then its a win32s box.
2282 $os = { # Magic numbers from MSDN
2283 # documentation of GetOSVersion
2290 0 => "2000", # or NT 4, see below
2297 # If $os is undefined, the above code is out of date. Suggested updates
2299 unless ( defined $os ) {
2301 $$rpending_complaint .= <<EOS;
2302 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2303 We won't be able to look for a system-wide config file.
2307 # Unfortunately the logic used for the various versions isnt so clever..
2308 # so we have to handle an outside case.
2309 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2313 return ( $^O !~ /win32|dos/i )
2316 && ( $^O ne 'MacOS' );
2319 sub look_for_Windows {
2321 # determine Windows sub-type and location of
2322 # system-wide configuration files
2323 my $rpending_complaint = shift;
2324 my $is_Windows = ( $^O =~ /win32|dos/i );
2325 my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
2326 return ( $is_Windows, $Windows_type );
2329 sub find_config_file {
2331 # look for a .perltidyrc configuration file
2332 my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2333 $rpending_complaint ) = @_;
2335 $$rconfig_file_chatter .= "# Config file search...system reported as:";
2337 $$rconfig_file_chatter .= "Windows $Windows_type\n";
2340 $$rconfig_file_chatter .= " $^O\n";
2343 # sub to check file existance and record all tests
2344 my $exists_config_file = sub {
2345 my $config_file = shift;
2346 return 0 unless $config_file;
2347 $$rconfig_file_chatter .= "# Testing: $config_file\n";
2348 return -f $config_file;
2353 # look in current directory first
2354 $config_file = ".perltidyrc";
2355 return $config_file if $exists_config_file->($config_file);
2357 # Default environment vars.
2358 my @envs = qw(PERLTIDY HOME);
2360 # Check the NT/2k/XP locations, first a local machine def, then a
2362 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2364 # Now go through the enviornment ...
2365 foreach my $var (@envs) {
2366 $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2367 if ( defined( $ENV{$var} ) ) {
2368 $$rconfig_file_chatter .= " = $ENV{$var}\n";
2370 # test ENV{ PERLTIDY } as file:
2371 if ( $var eq 'PERLTIDY' ) {
2372 $config_file = "$ENV{$var}";
2373 return $config_file if $exists_config_file->($config_file);
2376 # test ENV as directory:
2377 $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2378 return $config_file if $exists_config_file->($config_file);
2381 $$rconfig_file_chatter .= "\n";
2385 # then look for a system-wide definition
2386 # where to look varies with OS
2389 if ($Windows_type) {
2390 my ( $os, $system, $allusers ) =
2391 Win_Config_Locs( $rpending_complaint, $Windows_type );
2393 # Check All Users directory, if there is one.
2395 $config_file = catfile( $allusers, ".perltidyrc" );
2396 return $config_file if $exists_config_file->($config_file);
2399 # Check system directory.
2400 $config_file = catfile( $system, ".perltidyrc" );
2401 return $config_file if $exists_config_file->($config_file);
2405 # Place to add customization code for other systems
2406 elsif ( $^O eq 'OS2' ) {
2408 elsif ( $^O eq 'MacOS' ) {
2410 elsif ( $^O eq 'VMS' ) {
2413 # Assume some kind of Unix
2416 $config_file = "/usr/local/etc/perltidyrc";
2417 return $config_file if $exists_config_file->($config_file);
2419 $config_file = "/etc/perltidyrc";
2420 return $config_file if $exists_config_file->($config_file);
2423 # Couldn't find a config file
2427 sub Win_Config_Locs {
2429 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2430 # or undef if its not a win32 OS. In list context returns OS, System
2431 # Directory, and All Users Directory. All Users will be empty on a
2432 # 9x/Me box. Contributed by: Yves Orton.
2434 my $rpending_complaint = shift;
2435 my $os = (@_) ? shift: Win_OS_Type();
2441 if ( $os =~ /9[58]|Me/ ) {
2442 $system = "C:/Windows";
2444 elsif ( $os =~ /NT|XP|200?/ ) {
2445 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2448 ? "C:/WinNT/profiles/All Users/"
2449 : "C:/Documents and Settings/All Users/";
2453 # This currently would only happen on a win32s computer. I dont have
2454 # one to test, so I am unsure how to proceed. Suggestions welcome!
2455 $$rpending_complaint .=
2456 "I dont know a sensible place to look for config files on an $os system.\n";
2459 return wantarray ? ( $os, $system, $allusers ) : $os;
2462 sub dump_config_file {
2464 my $config_file = shift;
2465 my $rconfig_file_chatter = shift;
2466 print STDOUT "$$rconfig_file_chatter";
2468 print STDOUT "# Dump of file: '$config_file'\n";
2469 while ( $_ = $fh->getline() ) { print STDOUT }
2470 eval { $fh->close() };
2473 print STDOUT "# ...no config file found\n";
2477 sub read_config_file {
2479 my ( $fh, $config_file, $rexpansion ) = @_;
2480 my @config_list = ();
2482 # file is bad if non-empty $death_message is returned
2483 my $death_message = "";
2487 while ( $_ = $fh->getline() ) {
2490 next if /^\s*#/; # skip full-line comment
2491 ( $_, $death_message ) = strip_comment( $_, $config_file, $line_no );
2492 last if ($death_message);
2493 s/^\s*(.*?)\s*$/$1/; # trim both ends
2496 # look for something of the general form
2501 if ( $_ =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
2502 my ( $newname, $body, $curly ) = ( $2, $3, $4 );
2504 # handle a new alias definition
2508 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
2513 if ( ${$rexpansion}{$name} ) {
2515 my @names = sort keys %$rexpansion;
2517 "Here is a list of all installed aliases\n(@names)\n"
2518 . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2521 ${$rexpansion}{$name} = [];
2527 my ( $rbody_parts, $msg ) = parse_args($body);
2529 $death_message = <<EOM;
2530 Error reading file '$config_file' at line number $line_no.
2532 Please fix this line or use -npro to avoid reading this file
2539 # remove leading dashes if this is an alias
2540 foreach (@$rbody_parts) { s/^\-+//; }
2541 push @{ ${$rexpansion}{$name} }, @$rbody_parts;
2544 push( @config_list, @$rbody_parts );
2551 "Unexpected '}' seen in config file $config_file line $.\n";
2558 eval { $fh->close() };
2559 return ( \@config_list, $death_message );
2564 my ( $instr, $config_file, $line_no ) = @_;
2567 # nothing to do if no comments
2568 if ( $instr !~ /#/ ) {
2569 return ( $instr, $msg );
2572 # use simple method of no quotes
2573 elsif ( $instr !~ /['"]/ ) {
2574 $instr =~ s/\s*\#.*$//; # simple trim
2575 return ( $instr, $msg );
2578 # handle comments and quotes
2580 my $quote_char = "";
2583 # looking for ending quote character
2585 if ( $instr =~ /\G($quote_char)/gc ) {
2589 elsif ( $instr =~ /\G(.)/gc ) {
2593 # error..we reached the end without seeing the ending quote char
2596 Error reading file $config_file at line number $line_no.
2597 Did not see ending quote character <$quote_char> in this text:
2599 Please fix this line or use -npro to avoid reading this file
2605 # accumulating characters and looking for start of a quoted string
2607 if ( $instr =~ /\G([\"\'])/gc ) {
2611 elsif ( $instr =~ /\G#/gc ) {
2614 elsif ( $instr =~ /\G(.)/gc ) {
2622 return ( $outstr, $msg );
2627 # Parse a command string containing multiple string with possible
2628 # quotes, into individual commands. It might look like this, for example:
2630 # -wba=" + - " -some-thing -wbb='. && ||'
2632 # There is no need, at present, to handle escaped quote characters.
2633 # (They are not perltidy tokens, so needn't be in strings).
2636 my @body_parts = ();
2637 my $quote_char = "";
2642 # looking for ending quote character
2644 if ( $body =~ /\G($quote_char)/gc ) {
2647 elsif ( $body =~ /\G(.)/gc ) {
2651 # error..we reached the end without seeing the ending quote char
2653 if ( length($part) ) { push @body_parts, $part; }
2655 Did not see ending quote character <$quote_char> in this text:
2662 # accumulating characters and looking for start of a quoted string
2664 if ( $body =~ /\G([\"\'])/gc ) {
2667 elsif ( $body =~ /\G(\s+)/gc ) {
2668 if ( length($part) ) { push @body_parts, $part; }
2671 elsif ( $body =~ /\G(.)/gc ) {
2675 if ( length($part) ) { push @body_parts, $part; }
2680 return ( \@body_parts, $msg );
2683 sub dump_long_names {
2685 my @names = sort @_;
2687 # Command line long names (passed to GetOptions)
2688 #---------------------------------------------------------------
2689 # here is a summary of the Getopt codes:
2690 # <none> does not take an argument
2691 # =s takes a mandatory string
2692 # :s takes an optional string
2693 # =i takes a mandatory integer
2694 # :i takes an optional integer
2695 # ! does not take an argument and may be negated
2696 # i.e., -foo and -nofoo are allowed
2697 # a double dash signals the end of the options list
2699 #---------------------------------------------------------------
2702 foreach (@names) { print STDOUT "$_\n" }
2706 my @defaults = sort @_;
2707 print STDOUT "Default command line options:\n";
2708 foreach (@_) { print STDOUT "$_\n" }
2713 # write the options back out as a valid .perltidyrc file
2714 my ( $rOpts, $roption_string ) = @_;
2716 my $rGetopt_flags = \%Getopt_flags;
2717 foreach my $opt ( @{$roption_string} ) {
2719 if ( $opt =~ /(.*)(!|=.*)$/ ) {
2723 if ( defined( $rOpts->{$opt} ) ) {
2724 $rGetopt_flags->{$opt} = $flag;
2727 print STDOUT "# Final parameter set for this run:\n";
2728 foreach my $key ( sort keys %{$rOpts} ) {
2729 my $flag = $rGetopt_flags->{$key};
2730 my $value = $rOpts->{$key};
2734 if ( $flag =~ /^=/ ) {
2735 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
2736 $suffix = "=" . $value;
2738 elsif ( $flag =~ /^!/ ) {
2739 $prefix .= "no" unless ($value);
2745 "# ERROR in dump_options: unrecognized flag $flag for $key\n";
2748 print STDOUT $prefix . $key . $suffix . "\n";
2754 This is perltidy, v$VERSION
2756 Copyright 2000-2006, Steve Hancock
2758 Perltidy is free software and may be copied under the terms of the GNU
2759 General Public License, which is included in the distribution files.
2761 Complete documentation for perltidy can be found using 'man perltidy'
2762 or on the internet at http://perltidy.sourceforge.net.
2769 This is perltidy version $VERSION, a perl script indenter. Usage:
2771 perltidy [ options ] file1 file2 file3 ...
2772 (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
2773 perltidy [ options ] file1 -o outfile
2774 perltidy [ options ] file1 -st >outfile
2775 perltidy [ options ] <infile >outfile
2777 Options have short and long forms. Short forms are shown; see
2778 man pages for long forms. Note: '=s' indicates a required string,
2779 and '=n' indicates a required integer.
2783 -o=file name of the output file (only if single input file)
2784 -oext=s change output extension from 'tdy' to s
2785 -opath=path change path to be 'path' for output files
2786 -b backup original to .bak and modify file in-place
2787 -bext=s change default backup extension from 'bak' to s
2788 -q deactivate error messages (for running under editor)
2789 -w include non-critical warning messages in the .ERR error output
2790 -syn run perl -c to check syntax (default under unix systems)
2791 -log save .LOG file, which has useful diagnostics
2792 -f force perltidy to read a binary file
2793 -g like -log but writes more detailed .LOG file, for debugging scripts
2794 -opt write the set of options actually used to a .LOG file
2795 -npro ignore .perltidyrc configuration command file
2796 -pro=file read configuration commands from file instead of .perltidyrc
2797 -st send output to standard output, STDOUT
2798 -se send error output to standard error output, STDERR
2799 -v display version number to standard output and quit
2802 -i=n use n columns per indentation level (default n=4)
2803 -t tabs: use one tab character per indentation level, not recommeded
2804 -nt no tabs: use n spaces per indentation level (default)
2805 -et=n entab leading whitespace n spaces per tab; not recommended
2806 -io "indent only": just do indentation, no other formatting.
2807 -sil=n set starting indentation level to n; use if auto detection fails
2808 -ole=s specify output line ending (s=dos or win, mac, unix)
2809 -ple keep output line endings same as input (input must be filename)
2812 -fws freeze whitespace; this disables all whitespace changes
2813 and disables the following switches:
2814 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)
2815 -bbt same as -bt but for code block braces; same as -bt if not given
2816 -bbvt block braces vertically tight; use with -bl or -bli
2817 -bbvtl=s make -bbvt to apply to selected list of block types
2818 -pt=n paren tightness (n=0, 1 or 2)
2819 -sbt=n square bracket tightness (n=0, 1, or 2)
2820 -bvt=n brace vertical tightness,
2821 n=(0=open, 1=close unless multiple steps on a line, 2=always close)
2822 -pvt=n paren vertical tightness (see -bvt for n)
2823 -sbvt=n square bracket vertical tightness (see -bvt for n)
2824 -bvtc=n closing brace vertical tightness:
2825 n=(0=open, 1=sometimes close, 2=always close)
2826 -pvtc=n closing paren vertical tightness, see -bvtc for n.
2827 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
2828 -ci=n sets continuation indentation=n, default is n=2 spaces
2829 -lp line up parentheses, brackets, and non-BLOCK braces
2830 -sfs add space before semicolon in for( ; ; )
2831 -aws allow perltidy to add whitespace (default)
2832 -dws delete all old non-essential whitespace
2833 -icb indent closing brace of a code block
2834 -cti=n closing indentation of paren, square bracket, or non-block brace:
2835 n=0 none, =1 align with opening, =2 one full indentation level
2836 -icp equivalent to -cti=2
2837 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /'
2838 -wrs=s want space right of tokens in string;
2839 -sts put space before terminal semicolon of a statement
2840 -sak=s put space between keywords given in s and '(';
2841 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
2844 -fnl freeze newlines; this disables all line break changes
2845 and disables the following switches:
2846 -anl add newlines; ok to introduce new line breaks
2847 -bbs add blank line before subs and packages
2848 -bbc add blank line before block comments
2849 -bbb add blank line between major blocks
2850 -sob swallow optional blank lines
2851 -ce cuddled else; use this style: '} else {'
2852 -dnl delete old newlines (default)
2853 -mbl=n maximum consecutive blank lines (default=1)
2854 -l=n maximum line length; default n=80
2855 -bl opening brace on new line
2856 -sbl opening sub brace on new line. value of -bl is used if not given.
2857 -bli opening brace on new line and indented
2858 -bar opening brace always on right, even for long clauses
2859 -vt=n vertical tightness (requires -lp); n controls break after opening
2860 token: 0=never 1=no break if next line balanced 2=no break
2861 -vtc=n vertical tightness of closing container; n controls if closing
2862 token starts new line: 0=always 1=not unless list 1=never
2863 -wba=s want break after tokens in string; i.e. wba=': .'
2864 -wbb=s want break before tokens in string
2866 Following Old Breakpoints
2867 -boc break at old comma breaks: turns off all automatic list formatting
2868 -bol break at old logical breakpoints: or, and, ||, && (default)
2869 -bok break at old list keyword breakpoints such as map, sort (default)
2870 -bot break at old conditional (trinary ?:) operator breakpoints (default)
2871 -cab=n break at commas after a comma-arrow (=>):
2872 n=0 break at all commas after =>
2873 n=1 stable: break unless this breaks an existing one-line container
2874 n=2 break only if a one-line container cannot be formed
2875 n=3 do not treat commas after => specially at all
2878 -ibc indent block comments (default)
2879 -isbc indent spaced block comments; may indent unless no leading space
2880 -msc=n minimum desired spaces to side comment, default 4
2881 -csc add or update closing side comments after closing BLOCK brace
2882 -dcsc delete closing side comments created by a -csc command
2883 -cscp=s change closing side comment prefix to be other than '## end'
2884 -cscl=s change closing side comment to apply to selected list of blocks
2885 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
2886 -csct=n maximum number of columns of appended text, default n=20
2887 -cscw causes warning if old side comment is overwritten with -csc
2889 -sbc use 'static block comments' identified by leading '##' (default)
2890 -sbcp=s change static block comment identifier to be other than '##'
2891 -osbc outdent static block comments
2893 -ssc use 'static side comments' identified by leading '##' (default)
2894 -sscp=s change static side comment identifier to be other than '##'
2896 Delete selected text
2897 -dac delete all comments AND pod
2898 -dbc delete block comments
2899 -dsc delete side comments
2902 Send selected text to a '.TEE' file
2903 -tac tee all comments AND pod
2904 -tbc tee block comments
2905 -tsc tee side comments
2909 -olq outdent long quoted strings (default)
2910 -olc outdent a long block comment line
2911 -ola outdent statement labels
2912 -okw outdent control keywords (redo, next, last, goto, return)
2913 -okwl=s specify alternative keywords for -okw command
2916 -mft=n maximum fields per table; default n=40
2917 -x do not format lines before hash-bang line (i.e., for VMS)
2918 -asc allows perltidy to add a ';' when missing (default)
2919 -dsm allows perltidy to delete an unnecessary ';' (default)
2921 Combinations of other parameters
2922 -gnu attempt to follow GNU Coding Standards as applied to perl
2923 -mangle remove as many newlines as possible (but keep comments and pods)
2924 -extrude insert as many newlines as possible
2926 Dump and die, debugging
2927 -dop dump options used in this run to standard output and quit
2928 -ddf dump default options to standard output and quit
2929 -dsn dump all option short names to standard output and quit
2930 -dln dump option long names to standard output and quit
2931 -dpro dump whatever configuration file is in effect to standard output
2932 -dtt dump all token types to standard output and quit
2935 -html write an html file (see 'man perl2web' for many options)
2936 Note: when -html is used, no indentation or formatting are done.
2937 Hint: try perltidy -html -css=mystyle.css filename.pl
2938 and edit mystyle.css to change the appearance of filename.html.
2939 -nnn gives line numbers
2940 -pre only writes out <pre>..</pre> code section
2941 -toc places a table of contents to subs at the top (default)
2942 -pod passes pod text through pod2html (default)
2943 -frm write html as a frame (3 files)
2944 -text=s extra extension for table of contents if -frm, default='toc'
2945 -sext=s extra extension for file content if -frm, default='src'
2947 A prefix of "n" negates short form toggle switches, and a prefix of "no"
2948 negates the long forms. For example, -nasc means don't add missing
2951 If you are unable to see this entire text, try "perltidy -h | more"
2952 For more detailed information, and additional options, try "man perltidy",
2953 or go to the perltidy home page at http://perltidy.sourceforge.net
2958 sub process_this_file {
2960 my ( $truth, $beauty ) = @_;
2962 # loop to process each line of this file
2963 while ( my $line_of_tokens = $truth->get_line() ) {
2964 $beauty->write_line($line_of_tokens);
2968 eval { $beauty->finish_formatting() };
2969 $truth->report_tokenization_errors();
2974 # Use 'perl -c' to make sure that we did not create bad syntax
2975 # This is a very good independent check for programming errors
2977 # Given names of the input and output files, ($ifname, $ofname),
2978 # we do the following:
2979 # - check syntax of the input file
2980 # - if bad, all done (could be an incomplete code snippet)
2981 # - if infile syntax ok, then check syntax of the output file;
2982 # - if outfile syntax bad, issue warning; this implies a code bug!
2983 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
2985 my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
2986 my $infile_syntax_ok = 0;
2987 my $line_of_dashes = '-' x 42 . "\n";
2989 my $flags = $rOpts->{'perl-syntax-check-flags'};
2991 # be sure we invoke perl with -c
2992 # note: perl will accept repeated flags like '-c -c'. It is safest
2993 # to append another -c than try to find an interior bundled c, as
2994 # in -Tc, because such a 'c' might be in a quoted string, for example.
2995 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
2997 # be sure we invoke perl with -x if requested
2998 # same comments about repeated parameters applies
2999 if ( $rOpts->{'look-for-hash-bang'} ) {
3000 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3003 # this shouldn't happen unless a termporary file couldn't be made
3004 if ( $ifname eq '-' ) {
3005 $logger_object->write_logfile_entry(
3006 "Cannot run perl -c on STDIN and STDOUT\n");
3007 return $infile_syntax_ok;
3010 $logger_object->write_logfile_entry(
3011 "checking input file syntax with perl $flags\n");
3012 $logger_object->write_logfile_entry($line_of_dashes);
3014 # Not all operating systems/shells support redirection of the standard
3016 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3018 my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
3019 $logger_object->write_logfile_entry("$perl_output\n");
3021 if ( $perl_output =~ /syntax\s*OK/ ) {
3022 $infile_syntax_ok = 1;
3023 $logger_object->write_logfile_entry($line_of_dashes);
3024 $logger_object->write_logfile_entry(
3025 "checking output file syntax with perl $flags ...\n");
3026 $logger_object->write_logfile_entry($line_of_dashes);
3029 do_syntax_check( $ofname, $flags, $error_redirection );
3030 $logger_object->write_logfile_entry("$perl_output\n");
3032 unless ( $perl_output =~ /syntax\s*OK/ ) {
3033 $logger_object->write_logfile_entry($line_of_dashes);
3034 $logger_object->warning(
3035 "The output file has a syntax error when tested with perl $flags $ofname !\n"
3037 $logger_object->warning(
3038 "This implies an error in perltidy; the file $ofname is bad\n");
3039 $logger_object->report_definite_bug();
3041 # the perl version number will be helpful for diagnosing the problem
3042 $logger_object->write_logfile_entry(
3043 qx/perl -v $error_redirection/ . "\n" );
3048 # Only warn of perl -c syntax errors. Other messages,
3049 # such as missing modules, are too common. They can be
3050 # seen by running with perltidy -w
3051 $logger_object->complain("A syntax check using perl $flags gives: \n");
3052 $logger_object->complain($line_of_dashes);
3053 $logger_object->complain("$perl_output\n");
3054 $logger_object->complain($line_of_dashes);
3055 $infile_syntax_ok = -1;
3056 $logger_object->write_logfile_entry($line_of_dashes);
3057 $logger_object->write_logfile_entry(
3058 "The output file will not be checked because of input file problems\n"
3061 return $infile_syntax_ok;
3064 sub do_syntax_check {
3065 my ( $fname, $flags, $error_redirection ) = @_;
3067 # We have to quote the filename in case it has unusual characters
3068 # or spaces. Example: this filename #CM11.pm# gives trouble.
3069 $fname = '"' . $fname . '"';
3071 # Under VMS something like -T will become -t (and an error) so we
3072 # will put quotes around the flags. Double quotes seem to work on
3073 # Unix/Windows/VMS, but this may not work on all systems. (Single
3074 # quotes do not work under Windows). It could become necessary to
3075 # put double quotes around each flag, such as: -"c" -"T"
3076 # We may eventually need some system-dependent coding here.
3077 $flags = '"' . $flags . '"';
3079 # now wish for luck...
3080 return qx/perl $flags $fname $error_redirection/;
3083 #####################################################################
3085 # This is a stripped down version of IO::Scalar
3086 # Given a reference to a scalar, it supplies either:
3087 # a getline method which reads lines (mode='r'), or
3088 # a print method which reads lines (mode='w')
3090 #####################################################################
3091 package Perl::Tidy::IOScalar;
3095 my ( $package, $rscalar, $mode ) = @_;
3096 my $ref = ref $rscalar;
3097 if ( $ref ne 'SCALAR' ) {
3099 ------------------------------------------------------------------------
3100 expecting ref to SCALAR but got ref to ($ref); trace follows:
3101 ------------------------------------------------------------------------
3105 if ( $mode eq 'w' ) {
3107 return bless [ $rscalar, $mode ], $package;
3109 elsif ( $mode eq 'r' ) {
3111 # Convert a scalar to an array.
3112 # This avoids looking for "\n" on each call to getline
3113 my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
3115 return bless [ \@array, $mode, $i_next ], $package;
3119 ------------------------------------------------------------------------
3120 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3121 ------------------------------------------------------------------------
3128 my $mode = $self->[1];
3129 if ( $mode ne 'r' ) {
3131 ------------------------------------------------------------------------
3132 getline call requires mode = 'r' but mode = ($mode); trace follows:
3133 ------------------------------------------------------------------------
3136 my $i = $self->[2]++;
3137 ##my $line = $self->[0]->[$i];
3138 return $self->[0]->[$i];
3143 my $mode = $self->[1];
3144 if ( $mode ne 'w' ) {
3146 ------------------------------------------------------------------------
3147 print call requires mode = 'w' but mode = ($mode); trace follows:
3148 ------------------------------------------------------------------------
3151 ${ $self->[0] } .= $_[0];
3153 sub close { return }
3155 #####################################################################
3157 # This is a stripped down version of IO::ScalarArray
3158 # Given a reference to an array, it supplies either:
3159 # a getline method which reads lines (mode='r'), or
3160 # a print method which reads lines (mode='w')
3162 # NOTE: this routine assumes that that there aren't any embedded
3163 # newlines within any of the array elements. There are no checks
3166 #####################################################################
3167 package Perl::Tidy::IOScalarArray;
3171 my ( $package, $rarray, $mode ) = @_;
3172 my $ref = ref $rarray;
3173 if ( $ref ne 'ARRAY' ) {
3175 ------------------------------------------------------------------------
3176 expecting ref to ARRAY but got ref to ($ref); trace follows:
3177 ------------------------------------------------------------------------
3181 if ( $mode eq 'w' ) {
3183 return bless [ $rarray, $mode ], $package;
3185 elsif ( $mode eq 'r' ) {
3187 return bless [ $rarray, $mode, $i_next ], $package;
3191 ------------------------------------------------------------------------
3192 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3193 ------------------------------------------------------------------------
3200 my $mode = $self->[1];
3201 if ( $mode ne 'r' ) {
3203 ------------------------------------------------------------------------
3204 getline requires mode = 'r' but mode = ($mode); trace follows:
3205 ------------------------------------------------------------------------
3208 my $i = $self->[2]++;
3209 ##my $line = $self->[0]->[$i];
3210 return $self->[0]->[$i];
3215 my $mode = $self->[1];
3216 if ( $mode ne 'w' ) {
3218 ------------------------------------------------------------------------
3219 print requires mode = 'w' but mode = ($mode); trace follows:
3220 ------------------------------------------------------------------------
3223 push @{ $self->[0] }, $_[0];
3225 sub close { return }
3227 #####################################################################
3229 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3230 # which returns the next line to be parsed
3232 #####################################################################
3234 package Perl::Tidy::LineSource;
3238 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3239 my $input_file_copy = undef;
3242 my $input_line_ending;
3243 if ( $rOpts->{'preserve-line-endings'} ) {
3244 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3247 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3248 return undef unless $fh;
3250 # in order to check output syntax when standard output is used,
3251 # or when it is an object, we have to make a copy of the file
3252 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3255 # Turning off syntax check when input output is used.
3256 # The reason is that temporary files cause problems on
3258 $rOpts->{'check-syntax'} = 0;
3259 $input_file_copy = '-';
3261 $$rpending_logfile_message .= <<EOM;
3262 Note: --syntax check will be skipped because standard input is used
3269 _fh_copy => $fh_copy,
3270 _filename => $input_file,
3271 _input_file_copy => $input_file_copy,
3272 _input_line_ending => $input_line_ending,
3273 _rinput_buffer => [],
3278 sub get_input_file_copy_name {
3280 my $ifname = $self->{_input_file_copy};
3282 $ifname = $self->{_filename};
3287 sub close_input_file {
3289 eval { $self->{_fh}->close() };
3290 eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
3296 my $fh = $self->{_fh};
3297 my $fh_copy = $self->{_fh_copy};
3298 my $rinput_buffer = $self->{_rinput_buffer};
3300 if ( scalar(@$rinput_buffer) ) {
3301 $line = shift @$rinput_buffer;
3304 $line = $fh->getline();
3306 # patch to read raw mac files under unix, dos
3307 # see if the first line has embedded \r's
3308 if ( $line && !$self->{_started} ) {
3309 if ( $line =~ /[\015][^\015\012]/ ) {
3311 # found one -- break the line up and store in a buffer
3312 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3313 my $count = @$rinput_buffer;
3314 $line = shift @$rinput_buffer;
3316 $self->{_started}++;
3319 if ( $line && $fh_copy ) { $fh_copy->print($line); }
3326 my $fh = $self->{_fh};
3327 my $fh_copy = $self->{_fh_copy};
3328 $line = $fh->getline();
3329 if ( $line && $fh_copy ) { $fh_copy->print($line); }
3333 #####################################################################
3335 # the Perl::Tidy::LineSink class supplies a write_line method for
3336 # actual file writing
3338 #####################################################################
3340 package Perl::Tidy::LineSink;
3344 my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3345 $rpending_logfile_message )
3348 my $fh_copy = undef;
3350 my $output_file_copy = "";
3351 my $output_file_open = 0;
3353 if ( $rOpts->{'format'} eq 'tidy' ) {
3354 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3355 unless ($fh) { die "Cannot write to output stream\n"; }
3356 $output_file_open = 1;
3359 # in order to check output syntax when standard output is used,
3360 # or when it is an object, we have to make a copy of the file
3361 if ( $output_file eq '-' || ref $output_file ) {
3362 if ( $rOpts->{'check-syntax'} ) {
3364 # Turning off syntax check when standard output is used.
3365 # The reason is that temporary files cause problems on
3367 $rOpts->{'check-syntax'} = 0;
3368 $output_file_copy = '-';
3369 $$rpending_logfile_message .= <<EOM;
3370 Note: --syntax check will be skipped because standard output is used
3378 _fh_copy => $fh_copy,
3380 _output_file => $output_file,
3381 _output_file_open => $output_file_open,
3382 _output_file_copy => $output_file_copy,
3384 _tee_file => $tee_file,
3385 _tee_file_opened => 0,
3386 _line_separator => $line_separator,
3393 my $fh = $self->{_fh};
3394 my $fh_copy = $self->{_fh_copy};
3396 my $output_file_open = $self->{_output_file_open};
3398 $_[0] .= $self->{_line_separator};
3400 $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3401 print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
3403 if ( $self->{_tee_flag} ) {
3404 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3405 my $fh_tee = $self->{_fh_tee};
3406 print $fh_tee $_[0];
3410 sub get_output_file_copy {
3412 my $ofname = $self->{_output_file_copy};
3414 $ofname = $self->{_output_file};
3421 $self->{_tee_flag} = 1;
3426 $self->{_tee_flag} = 0;
3429 sub really_open_tee_file {
3431 my $tee_file = $self->{_tee_file};
3433 $fh_tee = IO::File->new(">$tee_file")
3434 or die("couldn't open TEE file $tee_file: $!\n");
3435 $self->{_tee_file_opened} = 1;
3436 $self->{_fh_tee} = $fh_tee;
3439 sub close_output_file {
3441 eval { $self->{_fh}->close() } if $self->{_output_file_open};
3442 eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
3443 $self->close_tee_file();
3446 sub close_tee_file {
3449 if ( $self->{_tee_file_opened} ) {
3450 eval { $self->{_fh_tee}->close() };
3451 $self->{_tee_file_opened} = 0;
3455 #####################################################################
3457 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3458 # useful for program development.
3460 # Only one such file is created regardless of the number of input
3461 # files processed. This allows the results of processing many files
3462 # to be summarized in a single file.
3464 #####################################################################
3466 package Perl::Tidy::Diagnostics;
3472 _write_diagnostics_count => 0,
3473 _last_diagnostic_file => "",
3479 sub set_input_file {
3481 $self->{_input_file} = $_[0];
3484 # This is a diagnostic routine which is useful for program development.
3485 # Output from debug messages go to a file named DIAGNOSTICS, where
3486 # they are labeled by file and line. This allows many files to be
3487 # scanned at once for some particular condition of interest.
3488 sub write_diagnostics {
3491 unless ( $self->{_write_diagnostics_count} ) {
3492 open DIAGNOSTICS, ">DIAGNOSTICS"
3493 or death("couldn't open DIAGNOSTICS: $!\n");
3496 my $last_diagnostic_file = $self->{_last_diagnostic_file};
3497 my $input_file = $self->{_input_file};
3498 if ( $last_diagnostic_file ne $input_file ) {
3499 print DIAGNOSTICS "\nFILE:$input_file\n";
3501 $self->{_last_diagnostic_file} = $input_file;
3502 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3503 print DIAGNOSTICS "$input_line_number:\t@_";
3504 $self->{_write_diagnostics_count}++;
3507 #####################################################################
3509 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
3511 #####################################################################
3513 package Perl::Tidy::Logger;
3518 my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3520 # remove any old error output file
3521 unless ( ref($warning_file) ) {
3522 if ( -e $warning_file ) { unlink($warning_file) }
3526 _log_file => $log_file,
3527 _fh_warnings => undef,
3529 _fh_warnings => undef,
3530 _last_input_line_written => 0,
3531 _at_end_of_file => 0,
3533 _block_log_output => 0,
3534 _line_of_tokens => undef,
3535 _output_line_number => undef,
3536 _wrote_line_information_string => 0,
3537 _wrote_column_headings => 0,
3538 _warning_file => $warning_file,
3539 _warning_count => 0,
3540 _complaint_count => 0,
3541 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure
3542 _saw_brace_error => 0,
3543 _saw_extrude => $saw_extrude,
3544 _output_array => [],
3548 sub close_log_file {
3551 if ( $self->{_fh_warnings} ) {
3552 eval { $self->{_fh_warnings}->close() };
3553 $self->{_fh_warnings} = undef;
3557 sub get_warning_count {
3559 return $self->{_warning_count};
3562 sub get_use_prefix {
3564 return $self->{_use_prefix};
3567 sub block_log_output {
3569 $self->{_block_log_output} = 1;
3572 sub unblock_log_output {
3574 $self->{_block_log_output} = 0;
3577 sub interrupt_logfile {
3579 $self->{_use_prefix} = 0;
3580 $self->warning("\n");
3581 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
3584 sub resume_logfile {
3586 $self->write_logfile_entry( '#' x 60 . "\n" );
3587 $self->{_use_prefix} = 1;
3590 sub we_are_at_the_last_line {
3592 unless ( $self->{_wrote_line_information_string} ) {
3593 $self->write_logfile_entry("Last line\n\n");
3595 $self->{_at_end_of_file} = 1;
3598 # record some stuff in case we go down in flames
3601 my ( $line_of_tokens, $output_line_number ) = @_;
3602 my $input_line = $line_of_tokens->{_line_text};
3603 my $input_line_number = $line_of_tokens->{_line_number};
3605 # save line information in case we have to write a logfile message
3606 $self->{_line_of_tokens} = $line_of_tokens;
3607 $self->{_output_line_number} = $output_line_number;
3608 $self->{_wrote_line_information_string} = 0;
3610 my $last_input_line_written = $self->{_last_input_line_written};
3611 my $rOpts = $self->{_rOpts};
3614 ( $input_line_number - $last_input_line_written ) >=
3615 $rOpts->{'logfile-gap'}
3617 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
3620 my $rlevels = $line_of_tokens->{_rlevels};
3621 my $structural_indentation_level = $$rlevels[0];
3622 $self->{_last_input_line_written} = $input_line_number;
3623 ( my $out_str = $input_line ) =~ s/^\s*//;
3626 $out_str = ( '.' x $structural_indentation_level ) . $out_str;
3628 if ( length($out_str) > 35 ) {
3629 $out_str = substr( $out_str, 0, 35 ) . " ....";
3631 $self->logfile_output( "", "$out_str\n" );
3635 sub write_logfile_entry {
3638 # add leading >>> to avoid confusing error mesages and code
3639 $self->logfile_output( ">>>", "@_" );
3642 sub write_column_headings {
3645 $self->{_wrote_column_headings} = 1;
3646 my $routput_array = $self->{_output_array};
3647 push @{$routput_array}, <<EOM;
3648 The nesting depths in the table below are at the start of the lines.
3649 The indicated output line numbers are not always exact.
3650 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
3652 in:out indent c b nesting code + messages; (messages begin with >>>)
3653 lines levels i k (code begins with one '.' per indent level)
3654 ------ ----- - - -------- -------------------------------------------
3658 sub make_line_information_string {
3660 # make columns of information when a logfile message needs to go out
3662 my $line_of_tokens = $self->{_line_of_tokens};
3663 my $input_line_number = $line_of_tokens->{_line_number};
3664 my $line_information_string = "";
3665 if ($input_line_number) {
3667 my $output_line_number = $self->{_output_line_number};
3668 my $brace_depth = $line_of_tokens->{_curly_brace_depth};
3669 my $paren_depth = $line_of_tokens->{_paren_depth};
3670 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
3671 my $python_indentation_level =
3672 $line_of_tokens->{_python_indentation_level};
3673 my $rlevels = $line_of_tokens->{_rlevels};
3674 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
3675 my $rci_levels = $line_of_tokens->{_rci_levels};
3676 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
3678 my $structural_indentation_level = $$rlevels[0];
3680 $self->write_column_headings() unless $self->{_wrote_column_headings};
3682 # keep logfile columns aligned for scripts up to 999 lines;
3683 # for longer scripts it doesn't really matter
3684 my $extra_space = "";
3686 ( $input_line_number < 10 ) ? " "
3687 : ( $input_line_number < 100 ) ? " "
3690 ( $output_line_number < 10 ) ? " "
3691 : ( $output_line_number < 100 ) ? " "
3694 # there are 2 possible nesting strings:
3695 # the original which looks like this: (0 [1 {2
3696 # the new one, which looks like this: {{[
3697 # the new one is easier to read, and shows the order, but
3698 # could be arbitrarily long, so we use it unless it is too long
3699 my $nesting_string =
3700 "($paren_depth [$square_bracket_depth {$brace_depth";
3701 my $nesting_string_new = $$rnesting_tokens[0];
3703 my $ci_level = $$rci_levels[0];
3704 if ( $ci_level > 9 ) { $ci_level = '*' }
3705 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
3707 if ( length($nesting_string_new) <= 8 ) {
3709 $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
3711 if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
3712 $line_information_string =
3713 "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
3715 return $line_information_string;
3718 sub logfile_output {
3720 my ( $prompt, $msg ) = @_;
3721 return if ( $self->{_block_log_output} );
3723 my $routput_array = $self->{_output_array};
3724 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
3725 push @{$routput_array}, "$msg";
3728 my $line_information_string = $self->make_line_information_string();
3729 $self->{_wrote_line_information_string} = 1;
3731 if ($line_information_string) {
3732 push @{$routput_array}, "$line_information_string $prompt$msg";
3735 push @{$routput_array}, "$msg";
3740 sub get_saw_brace_error {
3742 return $self->{_saw_brace_error};
3745 sub increment_brace_error {
3747 $self->{_saw_brace_error}++;
3752 use constant BRACE_WARNING_LIMIT => 10;
3753 my $saw_brace_error = $self->{_saw_brace_error};
3755 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
3759 $self->{_saw_brace_error} = $saw_brace_error;
3761 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
3762 $self->warning("No further warnings of this type will be given\n");
3768 # handle non-critical warning messages based on input flag
3770 my $rOpts = $self->{_rOpts};
3772 # these appear in .ERR output only if -w flag is used
3773 if ( $rOpts->{'warning-output'} ) {
3777 # otherwise, they go to the .LOG file
3779 $self->{_complaint_count}++;
3780 $self->write_logfile_entry(@_);
3786 # report errors to .ERR file (or stdout)
3788 use constant WARNING_LIMIT => 50;
3790 my $rOpts = $self->{_rOpts};
3791 unless ( $rOpts->{'quiet'} ) {
3793 my $warning_count = $self->{_warning_count};
3794 unless ($warning_count) {
3795 my $warning_file = $self->{_warning_file};
3797 if ( $rOpts->{'standard-error-output'} ) {
3798 $fh_warnings = *STDERR;
3801 ( $fh_warnings, my $filename ) =
3802 Perl::Tidy::streamhandle( $warning_file, 'w' );
3803 $fh_warnings or die("couldn't open $filename $!\n");
3804 warn "## Please see file $filename\n";
3806 $self->{_fh_warnings} = $fh_warnings;
3809 my $fh_warnings = $self->{_fh_warnings};
3810 if ( $warning_count < WARNING_LIMIT ) {
3811 if ( $self->get_use_prefix() > 0 ) {
3812 my $input_line_number =
3813 Perl::Tidy::Tokenizer::get_input_line_number();
3814 print $fh_warnings "$input_line_number:\t@_";
3815 $self->write_logfile_entry("WARNING: @_");
3818 print $fh_warnings @_;
3819 $self->write_logfile_entry(@_);
3823 $self->{_warning_count} = $warning_count;
3825 if ( $warning_count == WARNING_LIMIT ) {
3826 print $fh_warnings "No further warnings will be given";
3831 # programming bug codes:
3833 # 0 = maybe, not sure.
3835 sub report_possible_bug {
3837 my $saw_code_bug = $self->{_saw_code_bug};
3838 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
3841 sub report_definite_bug {
3843 $self->{_saw_code_bug} = 1;
3846 sub ask_user_for_bug_report {
3849 my ( $infile_syntax_ok, $formatter ) = @_;
3850 my $saw_code_bug = $self->{_saw_code_bug};
3851 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
3852 $self->warning(<<EOM);
3854 You may have encountered a code bug in perltidy. If you think so, and
3855 the problem is not listed in the BUGS file at
3856 http://perltidy.sourceforge.net, please report it so that it can be
3857 corrected. Include the smallest possible script which has the problem,
3858 along with the .LOG file. See the manual pages for contact information.
3863 elsif ( $saw_code_bug == 1 ) {
3864 if ( $self->{_saw_extrude} ) {
3865 $self->warning(<<EOM);
3866 You may have encountered a bug in perltidy. However, since you are
3867 using the -extrude option, the problem may be with perl itself, which
3868 has occasional parsing problems with this type of file. If you believe
3869 that the problem is with perltidy, and the problem is not listed in the
3870 BUGS file at http://perltidy.sourceforge.net, please report it so that
3871 it can be corrected. Include the smallest possible script which has the
3872 problem, along with the .LOG file. See the manual pages for contact
3878 $self->warning(<<EOM);
3880 Oops, you seem to have encountered a bug in perltidy. Please check the
3881 BUGS file at http://perltidy.sourceforge.net. If the problem is not
3882 listed there, please report it so that it can be corrected. Include the
3883 smallest possible script which produces this message, along with the
3884 .LOG file if appropriate. See the manual pages for contact information.
3885 Your efforts are appreciated.
3888 my $added_semicolon_count = 0;
3890 $added_semicolon_count =
3891 $formatter->get_added_semicolon_count();
3893 if ( $added_semicolon_count > 0 ) {
3894 $self->warning(<<EOM);
3896 The log file shows that perltidy added $added_semicolon_count semicolons.
3897 Please rerun with -nasc to see if that is the cause of the syntax error. Even
3898 if that is the problem, please report it so that it can be fixed.
3908 # called after all formatting to summarize errors
3910 my ( $infile_syntax_ok, $formatter ) = @_;
3912 my $rOpts = $self->{_rOpts};
3913 my $warning_count = $self->{_warning_count};
3914 my $saw_code_bug = $self->{_saw_code_bug};
3916 my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
3917 || $saw_code_bug == 1
3918 || $rOpts->{'logfile'};
3919 my $log_file = $self->{_log_file};
3920 if ($warning_count) {
3921 if ($save_logfile) {
3922 $self->block_log_output(); # avoid echoing this to the logfile
3924 "The logfile $log_file may contain useful information\n");
3925 $self->unblock_log_output();
3928 if ( $self->{_complaint_count} > 0 ) {
3930 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
3934 if ( $self->{_saw_brace_error}
3935 && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
3937 $self->warning("To save a full .LOG file rerun with -g\n");
3940 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
3942 if ($save_logfile) {
3943 my $log_file = $self->{_log_file};
3944 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
3946 my $routput_array = $self->{_output_array};
3947 foreach ( @{$routput_array} ) { $fh->print($_) }
3948 eval { $fh->close() };
3953 #####################################################################
3955 # The Perl::Tidy::DevNull class supplies a dummy print method
3957 #####################################################################
3959 package Perl::Tidy::DevNull;
3960 sub new { return bless {}, $_[0] }
3961 sub print { return }
3962 sub close { return }
3964 #####################################################################
3966 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
3968 #####################################################################
3970 package Perl::Tidy::HtmlWriter;
3980 %short_to_long_names
3984 $missing_html_entities
3987 # replace unsafe characters with HTML entity representation if HTML::Entities
3989 { eval "use HTML::Entities"; $missing_html_entities = $@; }
3993 my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
3994 $html_src_extension )
3997 my $html_file_opened = 0;
3999 ( $html_fh, my $html_filename ) =
4000 Perl::Tidy::streamhandle( $html_file, 'w' );
4002 warn("can't open $html_file: $!\n");
4005 $html_file_opened = 1;
4007 if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4008 $input_file = "NONAME";
4011 # write the table of contents to a string
4013 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4016 my @pre_string_stack;
4017 if ( $rOpts->{'html-pre-only'} ) {
4019 # pre section goes directly to the output stream
4020 $html_pre_fh = $html_fh;
4021 $html_pre_fh->print( <<"PRE_END");
4027 # pre section go out to a temporary string
4029 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4030 push @pre_string_stack, \$pre_string;
4033 # pod text gets diverted if the 'pod2html' is used
4036 if ( $rOpts->{'pod2html'} ) {
4037 if ( $rOpts->{'html-pre-only'} ) {
4038 undef $rOpts->{'pod2html'};
4041 eval "use Pod::Html";
4044 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4045 undef $rOpts->{'pod2html'};
4048 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4055 if ( $rOpts->{'frames'} ) {
4056 unless ($extension) {
4058 "cannot use frames without a specified output extension; ignoring -frm\n";
4059 undef $rOpts->{'frames'};
4062 $toc_filename = $input_file . $html_toc_extension . $extension;
4063 $src_filename = $input_file . $html_src_extension . $extension;
4067 # ----------------------------------------------------------
4068 # Output is now directed as follows:
4069 # html_toc_fh <-- table of contents items
4070 # html_pre_fh <-- the <pre> section of formatted code, except:
4071 # html_pod_fh <-- pod goes here with the pod2html option
4072 # ----------------------------------------------------------
4074 my $title = $rOpts->{'title'};
4076 ( $title, my $path ) = fileparse($input_file);
4078 my $toc_item_count = 0;
4079 my $in_toc_package = "";
4082 _input_file => $input_file, # name of input file
4083 _title => $title, # title, unescaped
4084 _html_file => $html_file, # name of .html output file
4085 _toc_filename => $toc_filename, # for frames option
4086 _src_filename => $src_filename, # for frames option
4087 _html_file_opened => $html_file_opened, # a flag
4088 _html_fh => $html_fh, # the output stream
4089 _html_pre_fh => $html_pre_fh, # pre section goes here
4090 _rpre_string_stack => \@pre_string_stack, # stack of pre sections
4091 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html
4092 _rpod_string => \$pod_string, # string holding pod
4093 _pod_cut_count => 0, # how many =cut's?
4094 _html_toc_fh => $html_toc_fh, # fh for table of contents
4095 _rtoc_string => \$toc_string, # string holding toc
4096 _rtoc_item_count => \$toc_item_count, # how many toc items
4097 _rin_toc_package => \$in_toc_package, # package name
4098 _rtoc_name_count => {}, # hash to track unique names
4099 _rpackage_stack => [], # stack to check for package
4101 _rlast_level => \$last_level, # brace indentation level
4107 # Add an item to the html table of contents.
4108 # This is called even if no table of contents is written,
4109 # because we still want to put the anchors in the <pre> text.
4110 # We are given an anchor name and its type; types are:
4111 # 'package', 'sub', '__END__', '__DATA__', 'EOF'
4112 # There must be an 'EOF' call at the end to wrap things up.
4114 my ( $name, $type ) = @_;
4115 my $html_toc_fh = $self->{_html_toc_fh};
4116 my $html_pre_fh = $self->{_html_pre_fh};
4117 my $rtoc_name_count = $self->{_rtoc_name_count};
4118 my $rtoc_item_count = $self->{_rtoc_item_count};
4119 my $rlast_level = $self->{_rlast_level};
4120 my $rin_toc_package = $self->{_rin_toc_package};
4121 my $rpackage_stack = $self->{_rpackage_stack};
4123 # packages contain sublists of subs, so to avoid errors all package
4124 # items are written and finished with the following routines
4125 my $end_package_list = sub {
4126 if ($$rin_toc_package) {
4127 $html_toc_fh->print("</ul>\n</li>\n");
4128 $$rin_toc_package = "";
4132 my $start_package_list = sub {
4133 my ( $unique_name, $package ) = @_;
4134 if ($$rin_toc_package) { $end_package_list->() }
4135 $html_toc_fh->print(<<EOM);
4136 <li><a href=\"#$unique_name\">package $package</a>
4139 $$rin_toc_package = $package;
4142 # start the table of contents on the first item
4143 unless ($$rtoc_item_count) {
4145 # but just quit if we hit EOF without any other entries
4146 # in this case, there will be no toc
4147 return if ( $type eq 'EOF' );
4148 $html_toc_fh->print( <<"TOC_END");
4149 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4153 $$rtoc_item_count++;
4155 # make a unique anchor name for this location:
4156 # - packages get a 'package-' prefix
4157 # - subs use their names
4158 my $unique_name = $name;
4159 if ( $type eq 'package' ) { $unique_name = "package-$name" }
4161 # append '-1', '-2', etc if necessary to make unique; this will
4162 # be unique because subs and packages cannot have a '-'
4163 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4164 $unique_name .= "-$count";
4167 # - all names get terminal '-' if pod2html is used, to avoid
4168 # conflicts with anchor names created by pod2html
4169 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4171 # start/stop lists of subs
4172 if ( $type eq 'sub' ) {
4173 my $package = $rpackage_stack->[$$rlast_level];
4174 unless ($package) { $package = 'main' }
4176 # if we're already in a package/sub list, be sure its the right
4177 # package or else close it
4178 if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4179 $end_package_list->();
4182 # start a package/sub list if necessary
4183 unless ($$rin_toc_package) {
4184 $start_package_list->( $unique_name, $package );
4188 # now write an entry in the toc for this item
4189 if ( $type eq 'package' ) {
4190 $start_package_list->( $unique_name, $name );
4192 elsif ( $type eq 'sub' ) {
4193 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4196 $end_package_list->();
4197 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4200 # write the anchor in the <pre> section
4201 $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4203 # end the table of contents, if any, on the end of file
4204 if ( $type eq 'EOF' ) {
4205 $html_toc_fh->print( <<"TOC_END");
4207 <!-- END CODE INDEX -->
4214 # This is the official list of tokens which may be identified by the
4215 # user. Long names are used as getopt keys. Short names are
4216 # convenient short abbreviations for specifying input. Short names
4217 # somewhat resemble token type characters, but are often different
4218 # because they may only be alphanumeric, to allow command line
4219 # input. Also, note that because of case insensitivity of html,
4220 # this table must be in a single case only (I've chosen to use all
4222 # When adding NEW_TOKENS: update this hash table
4223 # short names => long names
4224 %short_to_long_names = (
4234 'pu' => 'punctuation',
4235 'i' => 'identifier',
4237 'h' => 'here-doc-target',
4238 'hh' => 'here-doc-text',
4240 'sc' => 'semicolon',
4241 'm' => 'subroutine',
4245 # Now we have to map actual token types into one of the above short
4246 # names; any token types not mapped will get 'punctuation'
4249 # The values of this hash table correspond to the keys of the
4250 # previous hash table.
4251 # The keys of this hash table are token types and can be seen
4252 # by running with --dump-token-types (-dtt).
4254 # When adding NEW_TOKENS: update this hash table
4255 # $type => $short_name
4256 %token_short_names = (
4281 # These token types will all be called identifiers for now
4282 # FIXME: need to separate user defined modules as separate type
4283 my @identifier = qw" i t U C Y Z G :: ";
4284 @token_short_names{@identifier} = ('i') x scalar(@identifier);
4286 # These token types will be called 'structure'
4287 my @structure = qw" { } ";
4288 @token_short_names{@structure} = ('s') x scalar(@structure);
4290 # OLD NOTES: save for reference
4291 # Any of these could be added later if it would be useful.
4292 # For now, they will by default become punctuation
4293 # my @list = qw" L R [ ] ";
4294 # @token_long_names{@list} = ('non-structure') x scalar(@list);
4297 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4299 # @token_long_names{@list} = ('math') x scalar(@list);
4301 # my @list = qw" & &= ~ ~= ^ ^= | |= ";
4302 # @token_long_names{@list} = ('bit') x scalar(@list);
4304 # my @list = qw" == != < > <= <=> ";
4305 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4307 # my @list = qw" && || ! &&= ||= //= ";
4308 # @token_long_names{@list} = ('logical') x scalar(@list);
4310 # my @list = qw" . .= =~ !~ x x= ";
4311 # @token_long_names{@list} = ('string-operators') x scalar(@list);
4314 # my @list = qw" .. -> <> ... \ ? ";
4315 # @token_long_names{@list} = ('misc-operators') x scalar(@list);
4319 sub make_getopt_long_names {
4321 my ($rgetopt_names) = @_;
4322 while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4323 push @$rgetopt_names, "html-color-$name=s";
4324 push @$rgetopt_names, "html-italic-$name!";
4325 push @$rgetopt_names, "html-bold-$name!";
4327 push @$rgetopt_names, "html-color-background=s";
4328 push @$rgetopt_names, "html-linked-style-sheet=s";
4329 push @$rgetopt_names, "nohtml-style-sheets";
4330 push @$rgetopt_names, "html-pre-only";
4331 push @$rgetopt_names, "html-line-numbers";
4332 push @$rgetopt_names, "html-entities!";
4333 push @$rgetopt_names, "stylesheet";
4334 push @$rgetopt_names, "html-table-of-contents!";
4335 push @$rgetopt_names, "pod2html!";
4336 push @$rgetopt_names, "frames!";
4337 push @$rgetopt_names, "html-toc-extension=s";
4338 push @$rgetopt_names, "html-src-extension=s";
4340 # Pod::Html parameters:
4341 push @$rgetopt_names, "backlink=s";
4342 push @$rgetopt_names, "cachedir=s";
4343 push @$rgetopt_names, "htmlroot=s";
4344 push @$rgetopt_names, "libpods=s";
4345 push @$rgetopt_names, "podpath=s";
4346 push @$rgetopt_names, "podroot=s";
4347 push @$rgetopt_names, "title=s";
4349 # Pod::Html parameters with leading 'pod' which will be removed
4350 # before the call to Pod::Html
4351 push @$rgetopt_names, "podquiet!";
4352 push @$rgetopt_names, "podverbose!";
4353 push @$rgetopt_names, "podrecurse!";
4354 push @$rgetopt_names, "podflush";
4355 push @$rgetopt_names, "podheader!";
4356 push @$rgetopt_names, "podindex!";
4359 sub make_abbreviated_names {
4361 # We're appending things like this to the expansion list:
4362 # 'hcc' => [qw(html-color-comment)],
4363 # 'hck' => [qw(html-color-keyword)],
4366 my ($rexpansion) = @_;
4368 # abbreviations for color/bold/italic properties
4369 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4370 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
4371 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
4372 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
4373 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4374 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4377 # abbreviations for all other html options
4378 ${$rexpansion}{"hcbg"} = ["html-color-background"];
4379 ${$rexpansion}{"pre"} = ["html-pre-only"];
4380 ${$rexpansion}{"toc"} = ["html-table-of-contents"];
4381 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
4382 ${$rexpansion}{"nnn"} = ["html-line-numbers"];
4383 ${$rexpansion}{"hent"} = ["html-entities"];
4384 ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4385 ${$rexpansion}{"css"} = ["html-linked-style-sheet"];
4386 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
4387 ${$rexpansion}{"ss"} = ["stylesheet"];
4388 ${$rexpansion}{"pod"} = ["pod2html"];
4389 ${$rexpansion}{"npod"} = ["nopod2html"];
4390 ${$rexpansion}{"frm"} = ["frames"];
4391 ${$rexpansion}{"nfrm"} = ["noframes"];
4392 ${$rexpansion}{"text"} = ["html-toc-extension"];
4393 ${$rexpansion}{"sext"} = ["html-src-extension"];
4398 # This will be called once after options have been parsed
4402 # X11 color names for default settings that seemed to look ok
4403 # (these color names are only used for programming clarity; the hex
4404 # numbers are actually written)
4405 use constant ForestGreen => "#228B22";
4406 use constant SaddleBrown => "#8B4513";
4407 use constant magenta4 => "#8B008B";
4408 use constant IndianRed3 => "#CD5555";
4409 use constant DeepSkyBlue4 => "#00688B";
4410 use constant MediumOrchid3 => "#B452CD";
4411 use constant black => "#000000";
4412 use constant white => "#FFFFFF";
4413 use constant red => "#FF0000";
4415 # set default color, bold, italic properties
4416 # anything not listed here will be given the default (punctuation) color --
4417 # these types currently not listed and get default: ws pu s sc cm co p
4418 # When adding NEW_TOKENS: add an entry here if you don't want defaults
4420 # set_default_properties( $short_name, default_color, bold?, italic? );
4421 set_default_properties( 'c', ForestGreen, 0, 0 );
4422 set_default_properties( 'pd', ForestGreen, 0, 1 );
4423 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown
4424 set_default_properties( 'q', IndianRed3, 0, 0 );
4425 set_default_properties( 'hh', IndianRed3, 0, 1 );
4426 set_default_properties( 'h', IndianRed3, 1, 0 );
4427 set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
4428 set_default_properties( 'w', black, 0, 0 );
4429 set_default_properties( 'n', MediumOrchid3, 0, 0 );
4430 set_default_properties( 'v', MediumOrchid3, 0, 0 );
4431 set_default_properties( 'j', IndianRed3, 1, 0 );
4432 set_default_properties( 'm', red, 1, 0 );
4434 set_default_color( 'html-color-background', white );
4435 set_default_color( 'html-color-punctuation', black );
4437 # setup property lookup tables for tokens based on their short names
4438 # every token type has a short name, and will use these tables
4439 # to do the html markup
4440 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4441 $html_color{$short_name} = $rOpts->{"html-color-$long_name"};
4442 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
4443 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4446 # write style sheet to STDOUT and die if requested
4447 if ( defined( $rOpts->{'stylesheet'} ) ) {
4448 write_style_sheet_file('-');
4452 # make sure user gives a file name after -css
4453 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4454 $css_linkname = $rOpts->{'html-linked-style-sheet'};
4455 if ( $css_linkname =~ /^-/ ) {
4456 die "You must specify a valid filename after -css\n";
4460 # check for conflict
4461 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4462 $rOpts->{'nohtml-style-sheets'} = 0;
4463 warning("You can't specify both -css and -nss; -nss ignored\n");
4466 # write a style sheet file if necessary
4467 if ($css_linkname) {
4469 # if the selected filename exists, don't write, because user may
4470 # have done some work by hand to create it; use backup name instead
4471 # Also, this will avoid a potential disaster in which the user
4472 # forgets to specify the style sheet, like this:
4473 # perltidy -html -css myfile1.pl myfile2.pl
4474 # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4475 my $css_filename = $css_linkname;
4476 unless ( -e $css_filename ) {
4477 write_style_sheet_file($css_filename);
4480 $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4483 sub write_style_sheet_file {
4485 my $css_filename = shift;
4487 unless ( $fh = IO::File->new("> $css_filename") ) {
4488 die "can't open $css_filename: $!\n";
4490 write_style_sheet_data($fh);
4491 eval { $fh->close };
4494 sub write_style_sheet_data {
4496 # write the style sheet data to an open file handle
4499 my $bg_color = $rOpts->{'html-color-background'};
4500 my $text_color = $rOpts->{'html-color-punctuation'};
4502 # pre-bgcolor is new, and may not be defined
4503 my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4504 $pre_bg_color = $bg_color unless $pre_bg_color;
4506 $fh->print(<<"EOM");
4507 /* default style sheet generated by perltidy */
4508 body {background: $bg_color; color: $text_color}
4509 pre { color: $text_color;
4510 background: $pre_bg_color;
4511 font-family: courier;
4516 foreach my $short_name ( sort keys %short_to_long_names ) {
4517 my $long_name = $short_to_long_names{$short_name};
4519 my $abbrev = '.' . $short_name;
4520 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment
4521 my $color = $html_color{$short_name};
4522 if ( !defined($color) ) { $color = $text_color }
4523 $fh->print("$abbrev \{ color: $color;");
4525 if ( $html_bold{$short_name} ) {
4526 $fh->print(" font-weight:bold;");
4529 if ( $html_italic{$short_name} ) {
4530 $fh->print(" font-style:italic;");
4532 $fh->print("} /* $long_name */\n");
4536 sub set_default_color {
4538 # make sure that options hash $rOpts->{$key} contains a valid color
4539 my ( $key, $color ) = @_;
4540 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
4541 $rOpts->{$key} = check_RGB($color);
4546 # if color is a 6 digit hex RGB value, prepend a #, otherwise
4547 # assume that it is a valid ascii color name
4549 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
4553 sub set_default_properties {
4554 my ( $short_name, $color, $bold, $italic ) = @_;
4556 set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
4558 $key = "html-bold-$short_to_long_names{$short_name}";
4559 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
4560 $key = "html-italic-$short_to_long_names{$short_name}";
4561 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
4566 # Use Pod::Html to process the pod and make the page
4567 # then merge the perltidy code sections into it.
4568 # return 1 if success, 0 otherwise
4570 my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
4571 my $input_file = $self->{_input_file};
4572 my $title = $self->{_title};
4573 my $success_flag = 0;
4575 # don't try to use pod2html if no pod
4576 unless ($pod_string) {
4577 return $success_flag;
4580 # Pod::Html requires a real temporary filename
4581 # If we are making a frame, we have a name available
4582 # Otherwise, we have to fine one
4584 if ( $rOpts->{'frames'} ) {
4585 $tmpfile = $self->{_toc_filename};
4588 $tmpfile = Perl::Tidy::make_temporary_filename();
4590 my $fh_tmp = IO::File->new( $tmpfile, 'w' );
4592 warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4593 return $success_flag;
4596 #------------------------------------------------------------------
4597 # Warning: a temporary file is open; we have to clean up if
4598 # things go bad. From here on all returns should be by going to
4599 # RETURN so that the temporary file gets unlinked.
4600 #------------------------------------------------------------------
4602 # write the pod text to the temporary file
4603 $fh_tmp->print($pod_string);
4606 # Hand off the pod to pod2html.
4607 # Note that we can use the same temporary filename for input and output
4608 # because of the way pod2html works.
4612 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
4615 # Flags with string args:
4616 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
4617 # "podpath=s", "podroot=s"
4618 # Note: -css=s is handled by perltidy itself
4619 foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
4620 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
4623 # Toggle switches; these have extra leading 'pod'
4624 # "header!", "index!", "recurse!", "quiet!", "verbose!"
4625 foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
4626 my $kwd = $kw; # allows us to strip 'pod'
4627 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
4628 elsif ( defined( $rOpts->{$kw} ) ) {
4630 push @args, "--no$kwd";
4636 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
4638 # Must clean up if pod2html dies (it can);
4639 # Be careful not to overwrite callers __DIE__ routine
4640 local $SIG{__DIE__} = sub {
4642 unlink $tmpfile if -e $tmpfile;
4648 $fh_tmp = IO::File->new( $tmpfile, 'r' );
4651 # this error shouldn't happen ... we just used this filename
4652 warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4656 my $html_fh = $self->{_html_fh};
4661 # This routine will write the html selectively and store the toc
4662 my $html_print = sub {
4664 $html_fh->print($_) unless ($no_print);
4665 if ($in_toc) { push @toc, $_ }
4669 # loop over lines of html output from pod2html and merge in
4670 # the necessary perltidy html sections
4671 my ( $saw_body, $saw_index, $saw_body_end );
4672 while ( my $line = $fh_tmp->getline() ) {
4674 if ( $line =~ /^\s*<html>\s*$/i ) {
4675 my $date = localtime;
4676 $html_print->("<!-- Generated by perltidy on $date -->\n");
4677 $html_print->($line);
4680 # Copy the perltidy css, if any, after <body> tag
4681 elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
4683 $html_print->($css_string) if $css_string;
4684 $html_print->($line);
4686 # add a top anchor and heading
4687 $html_print->("<a name=\"-top-\"></a>\n");
4688 $title = escape_html($title);
4689 $html_print->("<h1>$title</h1>\n");
4691 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
4694 # when frames are used, an extra table of contents in the
4695 # contents panel is confusing, so don't print it
4696 $no_print = $rOpts->{'frames'}
4697 || !$rOpts->{'html-table-of-contents'};
4698 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
4699 $html_print->($line);
4702 # Copy the perltidy toc, if any, after the Pod::Html toc
4703 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
4705 $html_print->($line);
4707 $html_print->("<hr />\n") if $rOpts->{'frames'};
4708 $html_print->("<h2>Code Index:</h2>\n");
4709 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
4710 $html_print->(@toc);
4716 # Copy one perltidy section after each marker
4717 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
4719 $html_print->($1) if $1;
4721 # Intermingle code and pod sections if we saw multiple =cut's.
4722 if ( $self->{_pod_cut_count} > 1 ) {
4723 my $rpre_string = shift(@$rpre_string_stack);
4724 if ($$rpre_string) {
4725 $html_print->('<pre>');
4726 $html_print->($$rpre_string);
4727 $html_print->('</pre>');
4731 # shouldn't happen: we stored a string before writing
4734 "Problem merging html stream with pod2html; order may be wrong\n";
4736 $html_print->($line);
4739 # If didn't see multiple =cut lines, we'll put the pod out first
4740 # and then the code, because it's less confusing.
4743 # since we are not intermixing code and pod, we don't need
4744 # or want any <hr> lines which separated pod and code
4745 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
4749 # Copy any remaining code section before the </body> tag
4750 elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
4752 if (@$rpre_string_stack) {
4753 unless ( $self->{_pod_cut_count} > 1 ) {
4754 $html_print->('<hr />');
4756 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
4757 $html_print->('<pre>');
4758 $html_print->($$rpre_string);
4759 $html_print->('</pre>');
4762 $html_print->($line);
4765 $html_print->($line);
4770 unless ($saw_body) {
4771 warn "Did not see <body> in pod2html output\n";
4774 unless ($saw_body_end) {
4775 warn "Did not see </body> in pod2html output\n";
4778 unless ($saw_index) {
4779 warn "Did not find INDEX END in pod2html output\n";
4784 eval { $html_fh->close() };
4786 # note that we have to unlink tmpfile before making frames
4787 # because the tmpfile may be one of the names used for frames
4788 unlink $tmpfile if -e $tmpfile;
4789 if ( $success_flag && $rOpts->{'frames'} ) {
4790 $self->make_frame( \@toc );
4792 return $success_flag;
4797 # Make a frame with table of contents in the left panel
4798 # and the text in the right panel.
4800 # $html_filename contains the no-frames html output
4801 # $rtoc is a reference to an array with the table of contents
4804 my $input_file = $self->{_input_file};
4805 my $html_filename = $self->{_html_file};
4806 my $toc_filename = $self->{_toc_filename};
4807 my $src_filename = $self->{_src_filename};
4808 my $title = $self->{_title};
4809 $title = escape_html($title);
4811 # FUTURE input parameter:
4812 my $top_basename = "";
4814 # We need to produce 3 html files:
4815 # 1. - the table of contents
4816 # 2. - the contents (source code) itself
4817 # 3. - the frame which contains them
4819 # get basenames for relative links
4820 my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
4821 my ( $src_basename, $src_path ) = fileparse($src_filename);
4823 # 1. Make the table of contents panel, with appropriate changes
4824 # to the anchor names
4825 my $src_frame_name = 'SRC';
4827 write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
4830 # 2. The current .html filename is renamed to be the contents panel
4831 rename( $html_filename, $src_filename )
4832 or die "Cannot rename $html_filename to $src_filename:$!\n";
4834 # 3. Then use the original html filename for the frame
4836 $title, $html_filename, $top_basename,
4837 $toc_basename, $src_basename, $src_frame_name
4841 sub write_toc_html {
4843 # write a separate html table of contents file for frames
4844 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
4845 my $fh = IO::File->new( $toc_filename, 'w' )
4846 or die "Cannot open $toc_filename:$!\n";
4850 <title>$title</title>
4853 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
4857 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
4858 $fh->print( join "", @$rtoc );
4867 sub write_frame_html {
4869 # write an html file to be the table of contents frame
4871 $title, $frame_filename, $top_basename,
4872 $toc_basename, $src_basename, $src_frame_name
4875 my $fh = IO::File->new( $frame_filename, 'w' )
4876 or die "Cannot open $toc_basename:$!\n";
4879 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4880 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
4881 <?xml version="1.0" encoding="iso-8859-1" ?>
4882 <html xmlns="http://www.w3.org/1999/xhtml">
4884 <title>$title</title>
4888 # two left panels, one right, if master index file
4889 if ($top_basename) {
4891 <frameset cols="20%,80%">
4892 <frameset rows="30%,70%">
4893 <frame src = "$top_basename" />
4894 <frame src = "$toc_basename" />
4899 # one left panels, one right, if no master index file
4902 <frameset cols="20%,*">
4903 <frame src = "$toc_basename" />
4907 <frame src = "$src_basename" name = "$src_frame_name" />
4910 <p>If you see this message, you are using a non-frame-capable web client.</p>
4911 <p>This document contains:</p>
4913 <li><a href="$toc_basename">A table of contents</a></li>
4914 <li><a href="$src_basename">The source code</a></li>
4923 sub change_anchor_names {
4925 # add a filename and target to anchors
4926 # also return the first anchor
4927 my ( $rlines, $filename, $target ) = @_;
4929 foreach my $line (@$rlines) {
4931 # We're looking for lines like this:
4932 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
4933 # ---- - -------- -----------------
4935 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
4939 my $href = "$filename#$name";
4940 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
4941 unless ($first_anchor) { $first_anchor = $href }
4944 return $first_anchor;
4947 sub close_html_file {
4949 return unless $self->{_html_file_opened};
4951 my $html_fh = $self->{_html_fh};
4952 my $rtoc_string = $self->{_rtoc_string};
4954 # There are 3 basic paths to html output...
4956 # ---------------------------------
4957 # Path 1: finish up if in -pre mode
4958 # ---------------------------------
4959 if ( $rOpts->{'html-pre-only'} ) {
4960 $html_fh->print( <<"PRE_END");
4963 eval { $html_fh->close() };
4968 $self->add_toc_item( 'EOF', 'EOF' );
4970 my $rpre_string_stack = $self->{_rpre_string_stack};
4972 # Patch to darken the <pre> background color in case of pod2html and
4973 # interleaved code/documentation. Otherwise, the distinction
4974 # between code and documentation is blurred.
4975 if ( $rOpts->{pod2html}
4976 && $self->{_pod_cut_count} >= 1
4977 && $rOpts->{'html-color-background'} eq '#FFFFFF' )
4979 $rOpts->{'html-pre-color-background'} = '#F0F0F0';
4982 # put the css or its link into a string, if used
4984 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
4986 # use css linked to another file
4987 if ( $rOpts->{'html-linked-style-sheet'} ) {
4989 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
4993 # use css embedded in this file
4994 elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
4995 $fh_css->print( <<'ENDCSS');
4996 <style type="text/css">
4999 write_style_sheet_data($fh_css);
5000 $fh_css->print( <<"ENDCSS");
5006 # -----------------------------------------------------------
5007 # path 2: use pod2html if requested
5008 # If we fail for some reason, continue on to path 3
5009 # -----------------------------------------------------------
5010 if ( $rOpts->{'pod2html'} ) {
5011 my $rpod_string = $self->{_rpod_string};
5012 $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5013 $rpre_string_stack )
5017 # --------------------------------------------------
5018 # path 3: write code in html, with pod only in italics
5019 # --------------------------------------------------
5020 my $input_file = $self->{_input_file};
5021 my $title = escape_html($input_file);
5022 my $date = localtime;
5023 $html_fh->print( <<"HTML_START");
5024 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5025 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5026 <!-- Generated by perltidy on $date -->
5027 <html xmlns="http://www.w3.org/1999/xhtml">
5029 <title>$title</title>
5032 # output the css, if used
5034 $html_fh->print($css_string);
5035 $html_fh->print( <<"ENDCSS");
5042 $html_fh->print( <<"HTML_START");
5044 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5048 $html_fh->print("<a name=\"-top-\"></a>\n");
5049 $html_fh->print( <<"EOM");
5053 # copy the table of contents
5055 && !$rOpts->{'frames'}
5056 && $rOpts->{'html-table-of-contents'} )
5058 $html_fh->print($$rtoc_string);
5061 # copy the pre section(s)
5062 my $fname_comment = $input_file;
5063 $fname_comment =~ s/--+/-/g; # protect HTML comment tags
5064 $html_fh->print( <<"END_PRE");
5066 <!-- contents of filename: $fname_comment -->
5070 foreach my $rpre_string (@$rpre_string_stack) {
5071 $html_fh->print($$rpre_string);
5074 # and finish the html page
5075 $html_fh->print( <<"HTML_END");
5080 eval { $html_fh->close() }; # could be object without close method
5082 if ( $rOpts->{'frames'} ) {
5083 my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5084 $self->make_frame( \@toc );
5090 my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5091 my ( @colored_tokens, $j, $string, $type, $token, $level );
5092 my $rlast_level = $self->{_rlast_level};
5093 my $rpackage_stack = $self->{_rpackage_stack};
5095 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5096 $type = $$rtoken_type[$j];
5097 $token = $$rtokens[$j];
5098 $level = $$rlevels[$j];
5099 $level = 0 if ( $level < 0 );
5101 #-------------------------------------------------------
5102 # Update the package stack. The package stack is needed to keep
5103 # the toc correct because some packages may be declared within
5104 # blocks and go out of scope when we leave the block.
5105 #-------------------------------------------------------
5106 if ( $level > $$rlast_level ) {
5107 unless ( $rpackage_stack->[ $level - 1 ] ) {
5108 $rpackage_stack->[ $level - 1 ] = 'main';
5110 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5112 elsif ( $level < $$rlast_level ) {
5113 my $package = $rpackage_stack->[$level];
5114 unless ($package) { $package = 'main' }
5116 # if we change packages due to a nesting change, we
5117 # have to make an entry in the toc
5118 if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5119 $self->add_toc_item( $package, 'package' );
5122 $$rlast_level = $level;
5124 #-------------------------------------------------------
5125 # Intercept a sub name here; split it
5126 # into keyword 'sub' and sub name; and add an
5128 #-------------------------------------------------------
5129 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5130 $token = $self->markup_html_element( $1, 'k' );
5131 push @colored_tokens, $token;
5135 # but don't include sub declarations in the toc;
5136 # these wlll have leading token types 'i;'
5137 my $signature = join "", @$rtoken_type;
5138 unless ( $signature =~ /^i;/ ) {
5139 my $subname = $token;
5140 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5141 $self->add_toc_item( $subname, 'sub' );
5145 #-------------------------------------------------------
5146 # Intercept a package name here; split it
5147 # into keyword 'package' and name; add to the toc,
5148 # and update the package stack
5149 #-------------------------------------------------------
5150 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5151 $token = $self->markup_html_element( $1, 'k' );
5152 push @colored_tokens, $token;
5155 $self->add_toc_item( "$token", 'package' );
5156 $rpackage_stack->[$level] = $token;
5159 $token = $self->markup_html_element( $token, $type );
5160 push @colored_tokens, $token;
5162 return ( \@colored_tokens );
5165 sub markup_html_element {
5167 my ( $token, $type ) = @_;
5169 return $token if ( $type eq 'b' ); # skip a blank token
5170 return $token if ( $token =~ /^\s*$/ ); # skip a blank line
5171 $token = escape_html($token);
5173 # get the short abbreviation for this token type
5174 my $short_name = $token_short_names{$type};
5175 if ( !defined($short_name) ) {
5176 $short_name = "pu"; # punctuation is default
5179 # handle style sheets..
5180 if ( !$rOpts->{'nohtml-style-sheets'} ) {
5181 if ( $short_name ne 'pu' ) {
5182 $token = qq(<span class="$short_name">) . $token . "</span>";
5186 # handle no style sheets..
5188 my $color = $html_color{$short_name};
5190 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5191 $token = qq(<font color="$color">) . $token . "</font>";
5193 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5194 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
5202 if ($missing_html_entities) {
5203 $token =~ s/\&/&/g;
5204 $token =~ s/\</</g;
5205 $token =~ s/\>/>/g;
5206 $token =~ s/\"/"/g;
5209 HTML::Entities::encode_entities($token);
5214 sub finish_formatting {
5216 # called after last line
5218 $self->close_html_file();
5225 return unless $self->{_html_file_opened};
5226 my $html_pre_fh = $self->{_html_pre_fh};
5227 my ($line_of_tokens) = @_;
5228 my $line_type = $line_of_tokens->{_line_type};
5229 my $input_line = $line_of_tokens->{_line_text};
5230 my $line_number = $line_of_tokens->{_line_number};
5233 # markup line of code..
5235 if ( $line_type eq 'CODE' ) {
5236 my $rtoken_type = $line_of_tokens->{_rtoken_type};
5237 my $rtokens = $line_of_tokens->{_rtokens};
5238 my $rlevels = $line_of_tokens->{_rlevels};
5240 if ( $input_line =~ /(^\s*)/ ) {
5246 my ($rcolored_tokens) =
5247 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5248 $html_line .= join '', @$rcolored_tokens;
5251 # markup line of non-code..
5254 if ( $line_type eq 'HERE' ) { $line_character = 'H' }
5255 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
5256 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
5257 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5258 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
5259 elsif ( $line_type eq 'END_START' ) {
5260 $line_character = 'k';
5261 $self->add_toc_item( '__END__', '__END__' );
5263 elsif ( $line_type eq 'DATA_START' ) {
5264 $line_character = 'k';
5265 $self->add_toc_item( '__DATA__', '__DATA__' );
5267 elsif ( $line_type =~ /^POD/ ) {
5268 $line_character = 'P';
5269 if ( $rOpts->{'pod2html'} ) {
5270 my $html_pod_fh = $self->{_html_pod_fh};
5271 if ( $line_type eq 'POD_START' ) {
5273 my $rpre_string_stack = $self->{_rpre_string_stack};
5274 my $rpre_string = $rpre_string_stack->[-1];
5276 # if we have written any non-blank lines to the
5277 # current pre section, start writing to a new output
5279 if ( $$rpre_string =~ /\S/ ) {
5282 Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5283 $self->{_html_pre_fh} = $html_pre_fh;
5284 push @$rpre_string_stack, \$pre_string;
5286 # leave a marker in the pod stream so we know
5287 # where to put the pre section we just
5289 my $for_html = '=for html'; # don't confuse pod utils
5290 $html_pod_fh->print(<<EOM);
5293 <!-- pERLTIDY sECTION -->
5298 # otherwise, just clear the current string and start
5302 $html_pod_fh->print("\n");
5305 $html_pod_fh->print( $input_line . "\n" );
5306 if ( $line_type eq 'POD_END' ) {
5307 $self->{_pod_cut_count}++;
5308 $html_pod_fh->print("\n");
5313 else { $line_character = 'Q' }
5314 $html_line = $self->markup_html_element( $input_line, $line_character );
5317 # add the line number if requested
5318 if ( $rOpts->{'html-line-numbers'} ) {
5320 ( $line_number < 10 ) ? " "
5321 : ( $line_number < 100 ) ? " "
5322 : ( $line_number < 1000 ) ? " "
5324 $html_line = $extra_space . $line_number . " " . $html_line;
5328 $html_pre_fh->print("$html_line\n");
5331 #####################################################################
5333 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5334 # line breaks to the token stream
5336 # WARNING: This is not a real class for speed reasons. Only one
5337 # Formatter may be used.
5339 #####################################################################
5341 package Perl::Tidy::Formatter;
5345 # Caution: these debug flags produce a lot of output
5346 # They should all be 0 except when debugging small scripts
5347 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
5348 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
5349 use constant FORMATTER_DEBUG_FLAG_CI => 0;
5350 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
5351 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
5352 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
5353 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5354 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
5355 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
5356 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
5357 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
5358 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
5360 my $debug_warning = sub {
5361 print "FORMATTER_DEBUGGING with key $_[0]\n";
5364 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
5365 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
5366 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
5367 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
5368 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
5369 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
5370 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5371 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
5372 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
5373 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
5374 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
5375 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
5382 $max_gnu_stack_index
5383 $gnu_position_predictor
5384 $line_start_index_to_go
5385 $last_indentation_written
5386 $last_unadjusted_indentation
5389 $saw_VERSION_in_this_file
5394 $gnu_sequence_number
5395 $last_output_indentation
5401 @type_sequence_to_go
5402 @container_environment_to_go
5403 @bond_strength_to_go
5404 @forced_breakpoint_to_go
5407 @leading_spaces_to_go
5408 @reduced_spaces_to_go
5409 @matching_token_to_go
5411 @nesting_blocks_to_go
5413 @nesting_depth_to_go
5415 @old_breakpoint_to_go
5419 %saved_opening_indentation
5422 $comma_count_in_batch
5423 $old_line_count_in_batch
5424 $last_nonblank_index_to_go
5425 $last_nonblank_type_to_go
5426 $last_nonblank_token_to_go
5427 $last_last_nonblank_index_to_go
5428 $last_last_nonblank_type_to_go
5429 $last_last_nonblank_token_to_go
5430 @nonblank_lines_at_depth
5433 $in_format_skipping_section
5434 $format_skipping_pattern_begin
5435 $format_skipping_pattern_end
5437 $forced_breakpoint_count
5438 $forced_breakpoint_undo_count
5439 @forced_breakpoint_undo_stack
5440 %postponed_breakpoint
5444 $first_embedded_tab_at
5445 $last_embedded_tab_at
5446 $deleted_semicolon_count
5447 $first_deleted_semicolon_at
5448 $last_deleted_semicolon_at
5449 $added_semicolon_count
5450 $first_added_semicolon_at
5451 $last_added_semicolon_at
5452 $saw_negative_indentation
5453 $first_tabbing_disagreement
5454 $last_tabbing_disagreement
5455 $in_tabbing_disagreement
5456 $tabbing_disagreement_count
5460 $last_line_leading_type
5461 $last_line_leading_level
5462 $last_last_line_leading_level
5465 %block_opening_line_number
5466 $csc_new_statement_ok
5467 $accumulating_text_for_block
5469 $rleading_block_if_elsif_text
5470 $leading_block_text_level
5471 $leading_block_text_length_exceeded
5472 $leading_block_text_line_length
5473 $leading_block_text_line_number
5474 $closing_side_comment_prefix_pattern
5475 $closing_side_comment_list_pattern
5477 $last_nonblank_token
5479 $last_last_nonblank_token
5480 $last_last_nonblank_type
5481 $last_nonblank_block_type
5484 %is_if_brace_follower
5485 %space_after_keyword
5488 %is_last_next_redo_return
5489 %is_other_brace_follower
5490 %is_else_brace_follower
5491 %is_anon_sub_brace_follower
5492 %is_anon_sub_1_brace_follower
5494 %is_sort_map_grep_eval
5495 %is_sort_map_grep_eval_do
5496 %is_block_without_semicolon
5501 %is_if_unless_and_or_last_next_redo_return
5507 $is_static_block_comment
5508 $index_start_one_line_block
5509 $semicolons_before_block_self_destruct
5510 $index_max_forced_break
5513 $vertical_aligner_object
5518 $last_line_had_side_comment
5521 $static_block_comment_pattern
5522 $static_side_comment_pattern
5523 %opening_vertical_tightness
5524 %closing_vertical_tightness
5525 %closing_token_indentation
5527 %opening_token_right
5528 %stack_opening_token
5529 %stack_closing_token
5531 $block_brace_vertical_tightness_pattern
5534 $rOpts_add_whitespace
5535 $rOpts_block_brace_tightness
5536 $rOpts_block_brace_vertical_tightness
5537 $rOpts_brace_left_and_indent
5538 $rOpts_comma_arrow_breakpoints
5539 $rOpts_break_at_old_keyword_breakpoints
5540 $rOpts_break_at_old_comma_breakpoints
5541 $rOpts_break_at_old_logical_breakpoints
5542 $rOpts_break_at_old_trinary_breakpoints
5543 $rOpts_closing_side_comment_else_flag
5544 $rOpts_closing_side_comment_maximum_text
5545 $rOpts_continuation_indentation
5547 $rOpts_delete_old_whitespace
5548 $rOpts_fuzzy_line_length
5549 $rOpts_indent_columns
5550 $rOpts_line_up_parentheses
5551 $rOpts_maximum_fields_per_table
5552 $rOpts_maximum_line_length
5553 $rOpts_short_concatenation_item_length
5554 $rOpts_swallow_optional_blank_lines
5555 $rOpts_ignore_old_breakpoints
5556 $rOpts_format_skipping
5557 $rOpts_space_function_paren
5558 $rOpts_space_keyword_paren
5560 $half_maximum_line_length
5564 %is_keyword_returning_list
5568 %right_bond_strength
5585 # default list of block types for which -bli would apply
5586 $bli_list_string = 'if else elsif unless while for foreach do : sub';
5589 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
5590 <= >= == =~ !~ != ++ -- /= x=
5592 @is_digraph{@_} = (1) x scalar(@_);
5594 @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
5595 @is_trigraph{@_} = (1) x scalar(@_);
5598 = **= += *= &= <<= &&=
5599 -= /= |= >>= ||= //=
5603 @is_assignment{@_} = (1) x scalar(@_);
5613 @is_keyword_returning_list{@_} = (1) x scalar(@_);
5615 @_ = qw(is if unless and or err last next redo return);
5616 @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
5618 @_ = qw(last next redo return);
5619 @is_last_next_redo_return{@_} = (1) x scalar(@_);
5621 @_ = qw(sort map grep);
5622 @is_sort_map_grep{@_} = (1) x scalar(@_);
5624 @_ = qw(sort map grep eval);
5625 @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
5627 @_ = qw(sort map grep eval do);
5628 @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
5631 @is_if_unless{@_} = (1) x scalar(@_);
5633 @_ = qw(and or err);
5634 @is_and_or{@_} = (1) x scalar(@_);
5636 # Identify certain operators which often occur in chains
5637 @_ = qw(&& || and or : ? .);
5638 @is_chain_operator{@_} = (1) x scalar(@_);
5640 # We can remove semicolons after blocks preceded by these keywords
5641 @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
5642 unless while until for foreach);
5643 @is_block_without_semicolon{@_} = (1) x scalar(@_);
5645 # 'L' is token for opening { at hash key
5647 @is_opening_type{@_} = (1) x scalar(@_);
5649 # 'R' is token for closing } at hash key
5651 @is_closing_type{@_} = (1) x scalar(@_);
5654 @is_opening_token{@_} = (1) x scalar(@_);
5657 @is_closing_token{@_} = (1) x scalar(@_);
5661 use constant WS_YES => 1;
5662 use constant WS_OPTIONAL => 0;
5663 use constant WS_NO => -1;
5665 # Token bond strengths.
5666 use constant NO_BREAK => 10000;
5667 use constant VERY_STRONG => 100;
5668 use constant STRONG => 2.1;
5669 use constant NOMINAL => 1.1;
5670 use constant WEAK => 0.8;
5671 use constant VERY_WEAK => 0.55;
5673 # values for testing indexes in output array
5674 use constant UNDEFINED_INDEX => -1;
5676 # Maximum number of little messages; probably need not be changed.
5677 use constant MAX_NAG_MESSAGES => 6;
5679 # increment between sequence numbers for each type
5680 # For example, ?: pairs might have numbers 7,11,15,...
5681 use constant TYPE_SEQUENCE_INCREMENT => 4;
5685 # methods to count instances
5687 sub get_count { $_count; }
5688 sub _increment_count { ++$_count }
5689 sub _decrement_count { --$_count }
5692 # interface to Perl::Tidy::Logger routines
5694 if ($logger_object) {
5695 $logger_object->warning(@_);
5700 if ($logger_object) {
5701 $logger_object->complain(@_);
5705 sub write_logfile_entry {
5706 if ($logger_object) {
5707 $logger_object->write_logfile_entry(@_);
5712 if ($logger_object) {
5713 $logger_object->black_box(@_);
5717 sub report_definite_bug {
5718 if ($logger_object) {
5719 $logger_object->report_definite_bug();
5723 sub get_saw_brace_error {
5724 if ($logger_object) {
5725 $logger_object->get_saw_brace_error();
5729 sub we_are_at_the_last_line {
5730 if ($logger_object) {
5731 $logger_object->we_are_at_the_last_line();
5735 # interface to Perl::Tidy::Diagnostics routine
5736 sub write_diagnostics {
5738 if ($diagnostics_object) {
5739 $diagnostics_object->write_diagnostics(@_);
5743 sub get_added_semicolon_count {
5745 return $added_semicolon_count;
5749 $_[0]->_decrement_count();
5756 # we are given an object with a write_line() method to take lines
5758 sink_object => undef,
5759 diagnostics_object => undef,
5760 logger_object => undef,
5762 my %args = ( %defaults, @_ );
5764 $logger_object = $args{logger_object};
5765 $diagnostics_object = $args{diagnostics_object};
5767 # we create another object with a get_line() and peek_ahead() method
5768 my $sink_object = $args{sink_object};
5769 $file_writer_object =
5770 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
5772 # initialize the leading whitespace stack to negative levels
5773 # so that we can never run off the end of the stack
5774 $gnu_position_predictor = 0; # where the current token is predicted to be
5775 $max_gnu_stack_index = 0;
5776 $max_gnu_item_index = -1;
5777 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
5778 @gnu_item_list = ();
5779 $last_output_indentation = 0;
5780 $last_indentation_written = 0;
5781 $last_unadjusted_indentation = 0;
5782 $last_leading_token = "";
5784 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
5785 $saw_END_or_DATA_ = 0;
5787 @block_type_to_go = ();
5788 @type_sequence_to_go = ();
5789 @container_environment_to_go = ();
5790 @bond_strength_to_go = ();
5791 @forced_breakpoint_to_go = ();
5792 @lengths_to_go = (); # line length to start of ith token
5794 @matching_token_to_go = ();
5795 @mate_index_to_go = ();
5796 @nesting_blocks_to_go = ();
5797 @ci_levels_to_go = ();
5798 @nesting_depth_to_go = (0);
5799 @nobreak_to_go = ();
5800 @old_breakpoint_to_go = ();
5803 @leading_spaces_to_go = ();
5804 @reduced_spaces_to_go = ();
5807 @has_broken_sublist = ();
5808 @want_comma_break = ();
5811 $saw_negative_indentation = 0;
5812 $first_tabbing_disagreement = 0;
5813 $last_tabbing_disagreement = 0;
5814 $tabbing_disagreement_count = 0;
5815 $in_tabbing_disagreement = 0;
5816 $input_line_tabbing = undef;
5818 $last_line_type = "";
5819 $last_last_line_leading_level = 0;
5820 $last_line_leading_level = 0;
5821 $last_line_leading_type = '#';
5823 $last_nonblank_token = ';';
5824 $last_nonblank_type = ';';
5825 $last_last_nonblank_token = ';';
5826 $last_last_nonblank_type = ';';
5827 $last_nonblank_block_type = "";
5828 $last_output_level = 0;
5829 $looking_for_else = 0;
5830 $embedded_tab_count = 0;
5831 $first_embedded_tab_at = 0;
5832 $last_embedded_tab_at = 0;
5833 $deleted_semicolon_count = 0;
5834 $first_deleted_semicolon_at = 0;
5835 $last_deleted_semicolon_at = 0;
5836 $added_semicolon_count = 0;
5837 $first_added_semicolon_at = 0;
5838 $last_added_semicolon_at = 0;
5839 $last_line_had_side_comment = 0;
5840 $is_static_block_comment = 0;
5841 %postponed_breakpoint = ();
5843 # variables for adding side comments
5844 %block_leading_text = ();
5845 %block_opening_line_number = ();
5846 $csc_new_statement_ok = 1;
5848 %saved_opening_indentation = ();
5849 $in_format_skipping_section = 0;
5851 reset_block_text_accumulator();
5853 prepare_for_new_input_lines();
5855 $vertical_aligner_object =
5856 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
5857 $logger_object, $diagnostics_object );
5859 if ( $rOpts->{'entab-leading-whitespace'} ) {
5860 write_logfile_entry(
5861 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
5864 elsif ( $rOpts->{'tabs'} ) {
5865 write_logfile_entry("Indentation will be with a tab character\n");
5868 write_logfile_entry(
5869 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
5872 # This was the start of a formatter referent, but object-oriented
5873 # coding has turned out to be too slow here.
5874 $formatter_self = {};
5876 bless $formatter_self, $class;
5878 # Safety check..this is not a class yet
5879 if ( _increment_count() > 1 ) {
5881 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
5883 return $formatter_self;
5886 sub prepare_for_new_input_lines {
5888 $gnu_sequence_number++; # increment output batch counter
5889 %last_gnu_equals = ();
5890 %gnu_comma_count = ();
5891 %gnu_arrow_count = ();
5892 $line_start_index_to_go = 0;
5893 $max_gnu_item_index = UNDEFINED_INDEX;
5894 $index_max_forced_break = UNDEFINED_INDEX;
5895 $max_index_to_go = UNDEFINED_INDEX;
5896 $last_nonblank_index_to_go = UNDEFINED_INDEX;
5897 $last_nonblank_type_to_go = '';
5898 $last_nonblank_token_to_go = '';
5899 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
5900 $last_last_nonblank_type_to_go = '';
5901 $last_last_nonblank_token_to_go = '';
5902 $forced_breakpoint_count = 0;
5903 $forced_breakpoint_undo_count = 0;
5904 $rbrace_follower = undef;
5905 $lengths_to_go[0] = 0;
5906 $old_line_count_in_batch = 1;
5907 $comma_count_in_batch = 0;
5908 $starting_in_quote = 0;
5910 destroy_one_line_block();
5916 my ($line_of_tokens) = @_;
5918 my $line_type = $line_of_tokens->{_line_type};
5919 my $input_line = $line_of_tokens->{_line_text};
5921 my $want_blank_line_next = 0;
5923 # _line_type codes are:
5924 # SYSTEM - system-specific code before hash-bang line
5925 # CODE - line of perl code (including comments)
5926 # POD_START - line starting pod, such as '=head'
5927 # POD - pod documentation text
5928 # POD_END - last line of pod section, '=cut'
5929 # HERE - text of here-document
5930 # HERE_END - last line of here-doc (target word)
5931 # FORMAT - format section
5932 # FORMAT_END - last line of format section, '.'
5933 # DATA_START - __DATA__ line
5934 # DATA - unidentified text following __DATA__
5935 # END_START - __END__ line
5936 # END - unidentified text following __END__
5937 # ERROR - we are in big trouble, probably not a perl script
5939 # handle line of code..
5940 if ( $line_type eq 'CODE' ) {
5942 # let logger see all non-blank lines of code
5943 if ( $input_line !~ /^\s*$/ ) {
5944 my $output_line_number =
5945 $vertical_aligner_object->get_output_line_number();
5946 black_box( $line_of_tokens, $output_line_number );
5948 print_line_of_tokens($line_of_tokens);
5951 # handle line of non-code..
5957 if ( $line_type =~ /^POD/ ) {
5959 # Pod docs should have a preceding blank line. But be
5960 # very careful in __END__ and __DATA__ sections, because:
5961 # 1. the user may be using this section for any purpose whatsoever
5962 # 2. the blank counters are not active there
5963 # It should be safe to request a blank line between an
5964 # __END__ or __DATA__ and an immediately following '=head'
5965 # type line, (types END_START and DATA_START), but not for
5966 # any other lines of type END or DATA.
5967 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
5968 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
5970 && $line_type eq 'POD_START'
5971 && $last_line_type !~ /^(END|DATA)$/ )
5976 # patch to put a blank line after =cut
5977 # (required by podchecker)
5978 if ( $line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
5979 $file_writer_object->reset_consecutive_blank_lines();
5980 $want_blank_line_next = 1;
5984 # leave the blank counters in a predictable state
5985 # after __END__ or __DATA__
5986 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
5987 $file_writer_object->reset_consecutive_blank_lines();
5988 $saw_END_or_DATA_ = 1;
5991 # write unindented non-code line
5992 if ( !$skip_line ) {
5993 if ($tee_line) { $file_writer_object->tee_on() }
5994 write_unindented_line($input_line);
5995 if ($tee_line) { $file_writer_object->tee_off() }
5996 if ($want_blank_line_next) { want_blank_line(); }
5999 $last_line_type = $line_type;
6002 sub create_one_line_block {
6003 $index_start_one_line_block = $_[0];
6004 $semicolons_before_block_self_destruct = $_[1];
6007 sub destroy_one_line_block {
6008 $index_start_one_line_block = UNDEFINED_INDEX;
6009 $semicolons_before_block_self_destruct = 0;
6012 sub leading_spaces_to_go {
6014 # return the number of indentation spaces for a token in the output stream;
6015 # these were previously stored by 'set_leading_whitespace'.
6017 return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
6023 # return the number of leading spaces associated with an indentation
6024 # variable $indentation is either a constant number of spaces or an object
6025 # with a get_SPACES method.
6026 my $indentation = shift;
6027 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6030 sub get_RECOVERABLE_SPACES {
6032 # return the number of spaces (+ means shift right, - means shift left)
6033 # that we would like to shift a group of lines with the same indentation
6034 # to get them to line up with their opening parens
6035 my $indentation = shift;
6036 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6039 sub get_AVAILABLE_SPACES_to_go {
6041 my $item = $leading_spaces_to_go[ $_[0] ];
6043 # return the number of available leading spaces associated with an
6044 # indentation variable. $indentation is either a constant number of
6045 # spaces or an object with a get_AVAILABLE_SPACES method.
6046 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6049 sub new_lp_indentation_item {
6051 # this is an interface to the IndentationItem class
6052 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6054 # A negative level implies not to store the item in the item_list
6056 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6058 my $item = Perl::Tidy::IndentationItem->new(
6060 $ci_level, $available_spaces,
6061 $index, $gnu_sequence_number,
6062 $align_paren, $max_gnu_stack_index,
6063 $line_start_index_to_go,
6066 if ( $level >= 0 ) {
6067 $gnu_item_list[$max_gnu_item_index] = $item;
6073 sub set_leading_whitespace {
6075 # This routine defines leading whitespace
6076 # given: the level and continuation_level of a token,
6077 # define: space count of leading string which would apply if it
6078 # were the first token of a new line.
6080 my ( $level, $ci_level, $in_continued_quote ) = @_;
6082 # modify for -bli, which adds one continuation indentation for
6084 if ( $rOpts_brace_left_and_indent
6085 && $max_index_to_go == 0
6086 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6091 # patch to avoid trouble when input file has negative indentation.
6092 # other logic should catch this error.
6093 if ( $level < 0 ) { $level = 0 }
6095 #-------------------------------------------
6096 # handle the standard indentation scheme
6097 #-------------------------------------------
6098 unless ($rOpts_line_up_parentheses) {
6099 my $space_count = $ci_level * $rOpts_continuation_indentation + $level *
6100 $rOpts_indent_columns;
6102 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6104 if ($in_continued_quote) {
6108 $leading_spaces_to_go[$max_index_to_go] = $space_count;
6109 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6113 #-------------------------------------------------------------
6114 # handle case of -lp indentation..
6115 #-------------------------------------------------------------
6117 # The continued_quote flag means that this is the first token of a
6118 # line, and it is the continuation of some kind of multi-line quote
6119 # or pattern. It requires special treatment because it must have no
6120 # added leading whitespace. So we create a special indentation item
6121 # which is not in the stack.
6122 if ($in_continued_quote) {
6123 my $space_count = 0;
6124 my $available_space = 0;
6125 $level = -1; # flag to prevent storing in item_list
6126 $leading_spaces_to_go[$max_index_to_go] =
6127 $reduced_spaces_to_go[$max_index_to_go] =
6128 new_lp_indentation_item( $space_count, $level, $ci_level,
6129 $available_space, 0 );
6133 # get the top state from the stack
6134 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6135 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6136 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6138 my $type = $types_to_go[$max_index_to_go];
6139 my $token = $tokens_to_go[$max_index_to_go];
6140 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6142 if ( $type eq '{' || $type eq '(' ) {
6144 $gnu_comma_count{ $total_depth + 1 } = 0;
6145 $gnu_arrow_count{ $total_depth + 1 } = 0;
6147 # If we come to an opening token after an '=' token of some type,
6148 # see if it would be helpful to 'break' after the '=' to save space
6149 my $last_equals = $last_gnu_equals{$total_depth};
6150 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6152 # find the position if we break at the '='
6153 my $i_test = $last_equals;
6154 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6155 my $test_position = total_line_length( $i_test, $max_index_to_go );
6159 # if we are beyond the midpoint
6160 $gnu_position_predictor > $half_maximum_line_length
6162 # or if we can save some space by breaking at the '='
6163 # without obscuring the second line by the first
6164 || ( $test_position > 1 +
6165 total_line_length( $line_start_index_to_go, $last_equals ) )
6169 # then make the switch -- note that we do not set a real
6170 # breakpoint here because we may not really need one; sub
6171 # scan_list will do that if necessary
6172 $line_start_index_to_go = $i_test + 1;
6173 $gnu_position_predictor = $test_position;
6178 # Check for decreasing depth ..
6179 # Note that one token may have both decreasing and then increasing
6180 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
6181 # in this example we would first go back to (1,0) then up to (2,0)
6183 if ( $level < $current_level || $ci_level < $current_ci_level ) {
6185 # loop to find the first entry at or completely below this level
6186 my ( $lev, $ci_lev );
6188 if ($max_gnu_stack_index) {
6190 # save index of token which closes this level
6191 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6193 # Undo any extra indentation if we saw no commas
6194 my $available_spaces =
6195 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6197 my $comma_count = 0;
6198 my $arrow_count = 0;
6199 if ( $type eq '}' || $type eq ')' ) {
6200 $comma_count = $gnu_comma_count{$total_depth};
6201 $arrow_count = $gnu_arrow_count{$total_depth};
6202 $comma_count = 0 unless $comma_count;
6203 $arrow_count = 0 unless $arrow_count;
6205 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6206 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6208 if ( $available_spaces > 0 ) {
6210 if ( $comma_count <= 0 || $arrow_count > 0 ) {
6212 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6214 $gnu_stack[$max_gnu_stack_index]
6215 ->get_SEQUENCE_NUMBER();
6217 # Be sure this item was created in this batch. This
6218 # should be true because we delete any available
6219 # space from open items at the end of each batch.
6220 if ( $gnu_sequence_number != $seqno
6221 || $i > $max_gnu_item_index )
6224 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6226 report_definite_bug();
6230 if ( $arrow_count == 0 ) {
6232 ->permanently_decrease_AVAILABLE_SPACES(
6237 ->tentatively_decrease_AVAILABLE_SPACES(
6244 $j <= $max_gnu_item_index ;
6249 ->decrease_SPACES($available_spaces);
6256 --$max_gnu_stack_index;
6257 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6258 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6260 # stop when we reach a level at or below the current level
6261 if ( $lev <= $level && $ci_lev <= $ci_level ) {
6263 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6264 $current_level = $lev;
6265 $current_ci_level = $ci_lev;
6270 # reached bottom of stack .. should never happen because
6271 # only negative levels can get here, and $level was forced
6272 # to be positive above.
6275 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6277 report_definite_bug();
6283 # handle increasing depth
6284 if ( $level > $current_level || $ci_level > $current_ci_level ) {
6286 # Compute the standard incremental whitespace. This will be
6287 # the minimum incremental whitespace that will be used. This
6288 # choice results in a smooth transition between the gnu-style
6289 # and the standard style.
6290 my $standard_increment =
6291 ( $level - $current_level ) * $rOpts_indent_columns +
6292 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6294 # Now we have to define how much extra incremental space
6295 # ("$available_space") we want. This extra space will be
6296 # reduced as necessary when long lines are encountered or when
6297 # it becomes clear that we do not have a good list.
6298 my $available_space = 0;
6299 my $align_paren = 0;
6302 # initialization on empty stack..
6303 if ( $max_gnu_stack_index == 0 ) {
6304 $space_count = $level * $rOpts_indent_columns;
6307 # if this is a BLOCK, add the standard increment
6308 elsif ($last_nonblank_block_type) {
6309 $space_count += $standard_increment;
6312 # if last nonblank token was not structural indentation,
6313 # just use standard increment
6314 elsif ( $last_nonblank_type ne '{' ) {
6315 $space_count += $standard_increment;
6318 # otherwise use the space to the first non-blank level change token
6321 $space_count = $gnu_position_predictor;
6323 my $min_gnu_indentation =
6324 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6326 $available_space = $space_count - $min_gnu_indentation;
6327 if ( $available_space >= $standard_increment ) {
6328 $min_gnu_indentation += $standard_increment;
6330 elsif ( $available_space > 1 ) {
6331 $min_gnu_indentation += $available_space + 1;
6333 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
6334 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
6335 $min_gnu_indentation += 2;
6338 $min_gnu_indentation += 1;
6342 $min_gnu_indentation += $standard_increment;
6344 $available_space = $space_count - $min_gnu_indentation;
6346 if ( $available_space < 0 ) {
6347 $space_count = $min_gnu_indentation;
6348 $available_space = 0;
6353 # update state, but not on a blank token
6354 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
6356 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
6358 ++$max_gnu_stack_index;
6359 $gnu_stack[$max_gnu_stack_index] =
6360 new_lp_indentation_item( $space_count, $level, $ci_level,
6361 $available_space, $align_paren );
6363 # If the opening paren is beyond the half-line length, then
6364 # we will use the minimum (standard) indentation. This will
6365 # help avoid problems associated with running out of space
6366 # near the end of a line. As a result, in deeply nested
6367 # lists, there will be some indentations which are limited
6368 # to this minimum standard indentation. But the most deeply
6369 # nested container will still probably be able to shift its
6370 # parameters to the right for proper alignment, so in most
6371 # cases this will not be noticable.
6372 if ( $available_space > 0
6373 && $space_count > $half_maximum_line_length )
6375 $gnu_stack[$max_gnu_stack_index]
6376 ->tentatively_decrease_AVAILABLE_SPACES($available_space);
6381 # Count commas and look for non-list characters. Once we see a
6382 # non-list character, we give up and don't look for any more commas.
6383 if ( $type eq '=>' ) {
6384 $gnu_arrow_count{$total_depth}++;
6386 # tentatively treating '=>' like '=' for estimating breaks
6387 # TODO: this could use some experimentation
6388 $last_gnu_equals{$total_depth} = $max_index_to_go;
6391 elsif ( $type eq ',' ) {
6392 $gnu_comma_count{$total_depth}++;
6395 elsif ( $is_assignment{$type} ) {
6396 $last_gnu_equals{$total_depth} = $max_index_to_go;
6399 # this token might start a new line
6400 # if this is a non-blank..
6401 if ( $type ne 'b' ) {
6406 # this is the first nonblank token of the line
6407 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
6409 # or previous character was one of these:
6410 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
6412 # or previous character was opening and this does not close it
6413 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6414 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6416 # or this token is one of these:
6417 || $type =~ /^([\.]|\|\||\&\&)$/
6419 # or this is a closing structure
6420 || ( $last_nonblank_type_to_go eq '}'
6421 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6423 # or previous token was keyword 'return'
6424 || ( $last_nonblank_type_to_go eq 'k'
6425 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6427 # or starting a new line at certain keywords is fine
6429 && $is_if_unless_and_or_last_next_redo_return{$token} )
6431 # or this is after an assignment after a closing structure
6433 $is_assignment{$last_nonblank_type_to_go}
6435 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6437 # and it is significantly to the right
6438 || $gnu_position_predictor > $half_maximum_line_length
6443 check_for_long_gnu_style_lines();
6444 $line_start_index_to_go = $max_index_to_go;
6446 # back up 1 token if we want to break before that type
6447 # otherwise, we may strand tokens like '?' or ':' on a line
6448 if ( $line_start_index_to_go > 0 ) {
6449 if ( $last_nonblank_type_to_go eq 'k' ) {
6451 if ( $want_break_before{$last_nonblank_token_to_go} ) {
6452 $line_start_index_to_go--;
6455 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6456 $line_start_index_to_go--;
6462 # remember the predicted position of this token on the output line
6463 if ( $max_index_to_go > $line_start_index_to_go ) {
6464 $gnu_position_predictor =
6465 total_line_length( $line_start_index_to_go, $max_index_to_go );
6468 $gnu_position_predictor = $space_count +
6469 token_sequence_length( $max_index_to_go, $max_index_to_go );
6472 # store the indentation object for this token
6473 # this allows us to manipulate the leading whitespace
6474 # (in case we have to reduce indentation to fit a line) without
6475 # having to change any token values
6476 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
6477 $reduced_spaces_to_go[$max_index_to_go] =
6478 ( $max_gnu_stack_index > 0 && $ci_level )
6479 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
6480 : $gnu_stack[$max_gnu_stack_index];
6484 sub check_for_long_gnu_style_lines {
6486 # look at the current estimated maximum line length, and
6487 # remove some whitespace if it exceeds the desired maximum
6489 # this is only for the '-lp' style
6490 return unless ($rOpts_line_up_parentheses);
6492 # nothing can be done if no stack items defined for this line
6493 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6495 # see if we have exceeded the maximum desired line length
6496 # keep 2 extra free because they are needed in some cases
6497 # (result of trial-and-error testing)
6499 $gnu_position_predictor - $rOpts_maximum_line_length + 2;
6501 return if ( $spaces_needed < 0 );
6503 # We are over the limit, so try to remove a requested number of
6504 # spaces from leading whitespace. We are only allowed to remove
6505 # from whitespace items created on this batch, since others have
6506 # already been used and cannot be undone.
6507 my @candidates = ();
6510 # loop over all whitespace items created for the current batch
6511 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6512 my $item = $gnu_item_list[$i];
6514 # item must still be open to be a candidate (otherwise it
6515 # cannot influence the current token)
6516 next if ( $item->get_CLOSED() >= 0 );
6518 my $available_spaces = $item->get_AVAILABLE_SPACES();
6520 if ( $available_spaces > 0 ) {
6521 push( @candidates, [ $i, $available_spaces ] );
6525 return unless (@candidates);
6527 # sort by available whitespace so that we can remove whitespace
6528 # from the maximum available first
6529 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
6531 # keep removing whitespace until we are done or have no more
6533 foreach $candidate (@candidates) {
6534 my ( $i, $available_spaces ) = @{$candidate};
6535 my $deleted_spaces =
6536 ( $available_spaces > $spaces_needed )
6538 : $available_spaces;
6540 # remove the incremental space from this item
6541 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
6545 # update the leading whitespace of this item and all items
6546 # that came after it
6547 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
6549 my $old_spaces = $gnu_item_list[$i]->get_SPACES();
6550 if ( $old_spaces > $deleted_spaces ) {
6551 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
6554 # shouldn't happen except for code bug:
6556 my $level = $gnu_item_list[$i_debug]->get_LEVEL();
6557 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();
6558 my $old_level = $gnu_item_list[$i]->get_LEVEL();
6559 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
6561 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
6563 report_definite_bug();
6566 $gnu_position_predictor -= $deleted_spaces;
6567 $spaces_needed -= $deleted_spaces;
6568 last unless ( $spaces_needed > 0 );
6572 sub finish_lp_batch {
6574 # This routine is called once after each each output stream batch is
6575 # finished to undo indentation for all incomplete -lp
6576 # indentation levels. It is too risky to leave a level open,
6577 # because then we can't backtrack in case of a long line to follow.
6578 # This means that comments and blank lines will disrupt this
6579 # indentation style. But the vertical aligner may be able to
6580 # get the space back if there are side comments.
6582 # this is only for the 'lp' style
6583 return unless ($rOpts_line_up_parentheses);
6585 # nothing can be done if no stack items defined for this line
6586 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6588 # loop over all whitespace items created for the current batch
6590 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6591 my $item = $gnu_item_list[$i];
6593 # only look for open items
6594 next if ( $item->get_CLOSED() >= 0 );
6596 # Tentatively remove all of the available space
6597 # (The vertical aligner will try to get it back later)
6598 my $available_spaces = $item->get_AVAILABLE_SPACES();
6599 if ( $available_spaces > 0 ) {
6601 # delete incremental space for this item
6603 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
6605 # Reduce the total indentation space of any nodes that follow
6606 # Note that any such nodes must necessarily be dependents
6608 foreach ( $i + 1 .. $max_gnu_item_index ) {
6609 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
6616 sub reduce_lp_indentation {
6618 # reduce the leading whitespace at token $i if possible by $spaces_needed
6619 # (a large value of $spaces_needed will remove all excess space)
6620 # NOTE: to be called from scan_list only for a sequence of tokens
6621 # contained between opening and closing parens/braces/brackets
6623 my ( $i, $spaces_wanted ) = @_;
6624 my $deleted_spaces = 0;
6626 my $item = $leading_spaces_to_go[$i];
6627 my $available_spaces = $item->get_AVAILABLE_SPACES();
6630 $available_spaces > 0
6631 && ( ( $spaces_wanted <= $available_spaces )
6632 || !$item->get_HAVE_CHILD() )
6636 # we'll remove these spaces, but mark them as recoverable
6638 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
6641 return $deleted_spaces;
6644 sub token_sequence_length {
6646 # return length of tokens ($ifirst .. $ilast) including first & last
6647 # returns 0 if $ifirst > $ilast
6650 return 0 if ( $ilast < 0 || $ifirst > $ilast );
6651 return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
6652 return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
6655 sub total_line_length {
6657 # return length of a line of tokens ($ifirst .. $ilast)
6660 if ( $ifirst < 0 ) { $ifirst = 0 }
6662 return leading_spaces_to_go($ifirst) +
6663 token_sequence_length( $ifirst, $ilast );
6666 sub excess_line_length {
6668 # return number of characters by which a line of tokens ($ifirst..$ilast)
6669 # exceeds the allowable line length.
6672 if ( $ifirst < 0 ) { $ifirst = 0 }
6673 return leading_spaces_to_go($ifirst) +
6674 token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
6677 sub finish_formatting {
6679 # flush buffer and write any informative messages
6683 $file_writer_object->decrement_output_line_number()
6684 ; # fix up line number since it was incremented
6685 we_are_at_the_last_line();
6686 if ( $added_semicolon_count > 0 ) {
6687 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
6689 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
6690 write_logfile_entry("$added_semicolon_count $what added:\n");
6691 write_logfile_entry(
6692 " $first at input line $first_added_semicolon_at\n");
6694 if ( $added_semicolon_count > 1 ) {
6695 write_logfile_entry(
6696 " Last at input line $last_added_semicolon_at\n");
6698 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
6699 write_logfile_entry("\n");
6702 if ( $deleted_semicolon_count > 0 ) {
6703 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
6705 ( $deleted_semicolon_count > 1 )
6708 write_logfile_entry(
6709 "$deleted_semicolon_count unnecessary $what deleted:\n");
6710 write_logfile_entry(
6711 " $first at input line $first_deleted_semicolon_at\n");
6713 if ( $deleted_semicolon_count > 1 ) {
6714 write_logfile_entry(
6715 " Last at input line $last_deleted_semicolon_at\n");
6717 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
6718 write_logfile_entry("\n");
6721 if ( $embedded_tab_count > 0 ) {
6722 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
6724 ( $embedded_tab_count > 1 )
6725 ? "quotes or patterns"
6726 : "quote or pattern";
6727 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
6728 write_logfile_entry(
6729 "This means the display of this script could vary with device or software\n"
6731 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
6733 if ( $embedded_tab_count > 1 ) {
6734 write_logfile_entry(
6735 " Last at input line $last_embedded_tab_at\n");
6737 write_logfile_entry("\n");
6740 if ($first_tabbing_disagreement) {
6741 write_logfile_entry(
6742 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
6746 if ($in_tabbing_disagreement) {
6747 write_logfile_entry(
6748 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
6753 if ($last_tabbing_disagreement) {
6755 write_logfile_entry(
6756 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
6760 write_logfile_entry("No indentation disagreement seen\n");
6763 write_logfile_entry("\n");
6765 $vertical_aligner_object->report_anything_unusual();
6767 $file_writer_object->report_line_length_errors();
6772 # This routine is called to check the Opts hash after it is defined
6775 my ( $tabbing_string, $tab_msg );
6777 make_static_block_comment_pattern();
6778 make_static_side_comment_pattern();
6779 make_closing_side_comment_prefix();
6780 make_closing_side_comment_list_pattern();
6781 $format_skipping_pattern_begin =
6782 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
6783 $format_skipping_pattern_end =
6784 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
6786 # If closing side comments ARE selected, then we can safely
6787 # delete old closing side comments unless closing side comment
6788 # warnings are requested. This is a good idea because it will
6789 # eliminate any old csc's which fall below the line count threshold.
6790 # We cannot do this if warnings are turned on, though, because we
6791 # might delete some text which has been added. So that must
6792 # be handled when comments are created.
6793 if ( $rOpts->{'closing-side-comments'} ) {
6794 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
6795 $rOpts->{'delete-closing-side-comments'} = 1;
6799 # If closing side comments ARE NOT selected, but warnings ARE
6800 # selected and we ARE DELETING csc's, then we will pretend to be
6801 # adding with a huge interval. This will force the comments to be
6802 # generated for comparison with the old comments, but not added.
6803 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
6804 if ( $rOpts->{'delete-closing-side-comments'} ) {
6805 $rOpts->{'delete-closing-side-comments'} = 0;
6806 $rOpts->{'closing-side-comments'} = 1;
6807 $rOpts->{'closing-side-comment-interval'} = 100000000;
6812 make_block_brace_vertical_tightness_pattern();
6814 if ( $rOpts->{'line-up-parentheses'} ) {
6816 if ( $rOpts->{'indent-only'}
6817 || !$rOpts->{'add-newlines'}
6818 || !$rOpts->{'delete-old-newlines'} )
6821 -----------------------------------------------------------------------
6822 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
6824 The -lp indentation logic requires that perltidy be able to coordinate
6825 arbitrarily large numbers of line breakpoints. This isn't possible
6826 with these flags. Sometimes an acceptable workaround is to use -wocb=3
6827 -----------------------------------------------------------------------
6829 $rOpts->{'line-up-parentheses'} = 0;
6833 # At present, tabs are not compatable with the line-up-parentheses style
6834 # (it would be possible to entab the total leading whitespace
6835 # just prior to writing the line, if desired).
6836 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
6838 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
6840 $rOpts->{'tabs'} = 0;
6843 # Likewise, tabs are not compatable with outdenting..
6844 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
6846 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
6848 $rOpts->{'tabs'} = 0;
6851 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
6853 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
6855 $rOpts->{'tabs'} = 0;
6858 if ( !$rOpts->{'space-for-semicolon'} ) {
6859 $want_left_space{'f'} = -1;
6862 if ( $rOpts->{'space-terminal-semicolon'} ) {
6863 $want_left_space{';'} = 1;
6866 # implement outdenting preferences for keywords
6867 %outdent_keyword = ();
6870 @_ = qw(next last redo goto return);
6872 # override defaults if requested
6873 if ( $_ = $rOpts->{'outdent-keyword-list'} ) {
6879 # FUTURE: if not a keyword, assume that it is an identifier
6881 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
6882 $outdent_keyword{$_} = 1;
6885 warn "ignoring '$_' in -okwl list; not a perl keyword";
6889 # implement user whitespace preferences
6890 if ( $_ = $rOpts->{'want-left-space'} ) {
6894 @want_left_space{@_} = (1) x scalar(@_);
6897 if ( $_ = $rOpts->{'want-right-space'} ) {
6901 @want_right_space{@_} = (1) x scalar(@_);
6903 if ( $_ = $rOpts->{'nowant-left-space'} ) {
6907 @want_left_space{@_} = (-1) x scalar(@_);
6910 if ( $_ = $rOpts->{'nowant-right-space'} ) {
6914 @want_right_space{@_} = (-1) x scalar(@_);
6916 if ( $rOpts->{'dump-want-left-space'} ) {
6917 dump_want_left_space(*STDOUT);
6921 if ( $rOpts->{'dump-want-right-space'} ) {
6922 dump_want_right_space(*STDOUT);
6926 # default keywords for which space is introduced before an opening paren
6927 # (at present, including them messes up vertical alignment)
6928 @_ = qw(my local our and or err eq ne if else elsif until
6929 unless while for foreach return switch case given when);
6930 @space_after_keyword{@_} = (1) x scalar(@_);
6932 # allow user to modify these defaults
6933 if ( $_ = $rOpts->{'space-after-keyword'} ) {
6937 @space_after_keyword{@_} = (1) x scalar(@_);
6940 if ( $_ = $rOpts->{'nospace-after-keyword'} ) {
6944 @space_after_keyword{@_} = (0) x scalar(@_);
6947 # implement user break preferences
6948 if ( $_ = $rOpts->{'want-break-after'} ) {
6950 foreach my $tok (@_) {
6951 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
6952 my $lbs = $left_bond_strength{$tok};
6953 my $rbs = $right_bond_strength{$tok};
6954 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
6955 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
6961 if ( $_ = $rOpts->{'want-break-before'} ) {
6965 foreach my $tok (@_) {
6966 my $lbs = $left_bond_strength{$tok};
6967 my $rbs = $right_bond_strength{$tok};
6968 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
6969 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
6975 # make note if breaks are before certain key types
6976 %want_break_before = ();
6979 my $tok ( '.', ',', ':', '?', '&&', '||', 'and', 'or', 'err', 'xor' )
6981 $want_break_before{$tok} =
6982 $left_bond_strength{$tok} < $right_bond_strength{$tok};
6985 # Coordinate ?/: breaks, which must be similar
6986 if ( !$want_break_before{':'} ) {
6987 $want_break_before{'?'} = $want_break_before{':'};
6988 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
6989 $left_bond_strength{'?'} = NO_BREAK;
6992 # Define here tokens which may follow the closing brace of a do statement
6993 # on the same line, as in:
6994 # } while ( $something);
6995 @_ = qw(until while unless if ; );
6997 @is_do_follower{@_} = (1) x scalar(@_);
6999 # These tokens may follow the closing brace of an if or elsif block.
7000 # In other words, for cuddled else we want code to look like:
7001 # } elsif ( $something) {
7003 if ( $rOpts->{'cuddled-else'} ) {
7004 @_ = qw(else elsif);
7005 @is_if_brace_follower{@_} = (1) x scalar(@_);
7008 %is_if_brace_follower = ();
7011 # nothing can follow the closing curly of an else { } block:
7012 %is_else_brace_follower = ();
7014 # what can follow a multi-line anonymous sub definition closing curly:
7015 @_ = qw# ; : => or and && || ) #;
7017 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7019 # what can follow a one-line anonynomous sub closing curly:
7020 # one-line anonumous subs also have ']' here...
7021 # see tk3.t and PP.pm
7022 @_ = qw# ; : => or and && || ) ] #;
7024 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7026 # What can follow a closing curly of a block
7027 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7028 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7029 @_ = qw# ; : => or and && || ) #;
7032 # allow cuddled continue if cuddled else is specified
7033 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7035 @is_other_brace_follower{@_} = (1) x scalar(@_);
7037 $right_bond_strength{'{'} = WEAK;
7038 $left_bond_strength{'{'} = VERY_STRONG;
7040 # make -l=0 equal to -l=infinite
7041 if ( !$rOpts->{'maximum-line-length'} ) {
7042 $rOpts->{'maximum-line-length'} = 1000000;
7045 # make -lbl=0 equal to -lbl=infinite
7046 if ( !$rOpts->{'long-block-line-count'} ) {
7047 $rOpts->{'long-block-line-count'} = 1000000;
7050 my $ole = $rOpts->{'output-line-ending'};
7059 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7060 my $str = join " ", keys %endings;
7062 Unrecognized line ending '$ole'; expecting one of: $str
7065 if ( $rOpts->{'preserve-line-endings'} ) {
7066 warn "Ignoring -ple; conflicts with -ole\n";
7067 $rOpts->{'preserve-line-endings'} = undef;
7071 # hashes used to simplify setting whitespace
7073 '{' => $rOpts->{'brace-tightness'},
7074 '}' => $rOpts->{'brace-tightness'},
7075 '(' => $rOpts->{'paren-tightness'},
7076 ')' => $rOpts->{'paren-tightness'},
7077 '[' => $rOpts->{'square-bracket-tightness'},
7078 ']' => $rOpts->{'square-bracket-tightness'},
7087 # frequently used parameters
7088 $rOpts_add_newlines = $rOpts->{'add-newlines'};
7089 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
7090 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7091 $rOpts_block_brace_vertical_tightness =
7092 $rOpts->{'block-brace-vertical-tightness'};
7093 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
7094 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7095 $rOpts_break_at_old_trinary_breakpoints =
7096 $rOpts->{'break-at-old-trinary-breakpoints'};
7097 $rOpts_break_at_old_comma_breakpoints =
7098 $rOpts->{'break-at-old-comma-breakpoints'};
7099 $rOpts_break_at_old_keyword_breakpoints =
7100 $rOpts->{'break-at-old-keyword-breakpoints'};
7101 $rOpts_break_at_old_logical_breakpoints =
7102 $rOpts->{'break-at-old-logical-breakpoints'};
7103 $rOpts_closing_side_comment_else_flag =
7104 $rOpts->{'closing-side-comment-else-flag'};
7105 $rOpts_closing_side_comment_maximum_text =
7106 $rOpts->{'closing-side-comment-maximum-text'};
7107 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7108 $rOpts_cuddled_else = $rOpts->{'cuddled-else'};
7109 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
7110 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
7111 $rOpts_indent_columns = $rOpts->{'indent-columns'};
7112 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
7113 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7114 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
7115 $rOpts_short_concatenation_item_length =
7116 $rOpts->{'short-concatenation-item-length'};
7117 $rOpts_swallow_optional_blank_lines =
7118 $rOpts->{'swallow-optional-blank-lines'};
7119 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
7120 $rOpts_format_skipping = $rOpts->{'format-skipping'};
7121 $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
7122 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
7123 $half_maximum_line_length = $rOpts_maximum_line_length / 2;
7125 # Note that both opening and closing tokens can access the opening
7126 # and closing flags of their container types.
7127 %opening_vertical_tightness = (
7128 '(' => $rOpts->{'paren-vertical-tightness'},
7129 '{' => $rOpts->{'brace-vertical-tightness'},
7130 '[' => $rOpts->{'square-bracket-vertical-tightness'},
7131 ')' => $rOpts->{'paren-vertical-tightness'},
7132 '}' => $rOpts->{'brace-vertical-tightness'},
7133 ']' => $rOpts->{'square-bracket-vertical-tightness'},
7136 %closing_vertical_tightness = (
7137 '(' => $rOpts->{'paren-vertical-tightness-closing'},
7138 '{' => $rOpts->{'brace-vertical-tightness-closing'},
7139 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7140 ')' => $rOpts->{'paren-vertical-tightness-closing'},
7141 '}' => $rOpts->{'brace-vertical-tightness-closing'},
7142 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7145 # assume flag for '>' same as ')' for closing qw quotes
7146 %closing_token_indentation = (
7147 ')' => $rOpts->{'closing-paren-indentation'},
7148 '}' => $rOpts->{'closing-brace-indentation'},
7149 ']' => $rOpts->{'closing-square-bracket-indentation'},
7150 '>' => $rOpts->{'closing-paren-indentation'},
7153 %opening_token_right = (
7154 '(' => $rOpts->{'opening-paren-right'},
7155 '{' => $rOpts->{'opening-hash-brace-right'},
7156 '[' => $rOpts->{'opening-square-bracket-right'},
7159 %stack_opening_token = (
7160 '(' => $rOpts->{'stack-opening-paren'},
7161 '{' => $rOpts->{'stack-opening-hash-brace'},
7162 '[' => $rOpts->{'stack-opening-square-bracket'},
7165 %stack_closing_token = (
7166 ')' => $rOpts->{'stack-closing-paren'},
7167 '}' => $rOpts->{'stack-closing-hash-brace'},
7168 ']' => $rOpts->{'stack-closing-square-bracket'},
7172 sub make_static_block_comment_pattern {
7174 # create the pattern used to identify static block comments
7175 $static_block_comment_pattern = '^\s*##';
7177 # allow the user to change it
7178 if ( $rOpts->{'static-block-comment-prefix'} ) {
7179 my $prefix = $rOpts->{'static-block-comment-prefix'};
7180 $prefix =~ s/^\s*//;
7181 my $pattern = $prefix;
7183 # user may give leading caret to force matching left comments only
7184 if ( $prefix !~ /^\^#/ ) {
7185 if ( $prefix !~ /^#/ ) {
7187 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7189 $pattern = '^\s*' . $prefix;
7191 eval "'##'=~/$pattern/";
7194 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7196 $static_block_comment_pattern = $pattern;
7200 sub make_format_skipping_pattern {
7201 my ( $opt_name, $default ) = @_;
7202 my $param = $rOpts->{$opt_name};
7203 unless ($param) { $param = $default }
7205 if ( $param !~ /^#/ ) {
7206 die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7208 my $pattern = '^' . $param . '\s';
7209 eval "'#'=~/$pattern/";
7212 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7217 sub make_closing_side_comment_list_pattern {
7219 # turn any input list into a regex for recognizing selected block types
7220 $closing_side_comment_list_pattern = '^\w+';
7221 if ( defined( $rOpts->{'closing-side-comment-list'} )
7222 && $rOpts->{'closing-side-comment-list'} )
7224 $closing_side_comment_list_pattern =
7225 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7229 sub make_bli_pattern {
7231 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7232 && $rOpts->{'brace-left-and-indent-list'} )
7234 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7237 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7240 sub make_block_brace_vertical_tightness_pattern {
7242 # turn any input list into a regex for recognizing selected block types
7243 $block_brace_vertical_tightness_pattern =
7244 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7246 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7247 && $rOpts->{'block-brace-vertical-tightness-list'} )
7249 $block_brace_vertical_tightness_pattern =
7250 make_block_pattern( '-bbvtl',
7251 $rOpts->{'block-brace-vertical-tightness-list'} );
7255 sub make_block_pattern {
7257 # given a string of block-type keywords, return a regex to match them
7258 # The only tricky part is that labels are indicated with a single ':'
7259 # and the 'sub' token text may have additional text after it (name of
7264 # input string: "if else elsif unless while for foreach do : sub";
7265 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7267 my ( $abbrev, $string ) = @_;
7268 $string =~ s/^\s+//;
7269 $string =~ s/\s+$//;
7270 my @list = split /\s+/, $string;
7276 if ( $i eq 'sub' ) {
7278 elsif ( $i eq ':' ) {
7279 push @words, '\w+:';
7281 elsif ( $i =~ /^\w/ ) {
7285 warn "unrecognized block type $i after $abbrev, ignoring\n";
7288 my $pattern = '(' . join( '|', @words ) . ')$';
7289 if ( $seen{'sub'} ) {
7290 $pattern = '(' . $pattern . '|sub)';
7292 $pattern = '^' . $pattern;
7296 sub make_static_side_comment_pattern {
7298 # create the pattern used to identify static side comments
7299 $static_side_comment_pattern = '^##';
7301 # allow the user to change it
7302 if ( $rOpts->{'static-side-comment-prefix'} ) {
7303 my $prefix = $rOpts->{'static-side-comment-prefix'};
7304 $prefix =~ s/^\s*//;
7305 my $pattern = '^' . $prefix;
7306 eval "'##'=~/$pattern/";
7309 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
7311 $static_side_comment_pattern = $pattern;
7315 sub make_closing_side_comment_prefix {
7317 # Be sure we have a valid closing side comment prefix
7318 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
7319 my $csc_prefix_pattern;
7320 if ( !defined($csc_prefix) ) {
7321 $csc_prefix = '## end';
7322 $csc_prefix_pattern = '^##\s+end';
7325 my $test_csc_prefix = $csc_prefix;
7326 if ( $test_csc_prefix !~ /^#/ ) {
7327 $test_csc_prefix = '#' . $test_csc_prefix;
7330 # make a regex to recognize the prefix
7331 my $test_csc_prefix_pattern = $test_csc_prefix;
7333 # escape any special characters
7334 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
7336 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
7338 # allow exact number of intermediate spaces to vary
7339 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
7341 # make sure we have a good pattern
7342 # if we fail this we probably have an error in escaping
7344 eval "'##'=~/$test_csc_prefix_pattern/";
7347 # shouldn't happen..must have screwed up escaping, above
7348 report_definite_bug();
7350 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
7352 # just warn and keep going with defaults
7353 warn "Please consider using a simpler -cscp prefix\n";
7354 warn "Using default -cscp instead; please check output\n";
7357 $csc_prefix = $test_csc_prefix;
7358 $csc_prefix_pattern = $test_csc_prefix_pattern;
7361 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
7362 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
7365 sub dump_want_left_space {
7369 These values are the main control of whitespace to the left of a token type;
7370 They may be altered with the -wls parameter.
7371 For a list of token types, use perltidy --dump-token-types (-dtt)
7372 1 means the token wants a space to its left
7373 -1 means the token does not want a space to its left
7374 ------------------------------------------------------------------------
7376 foreach ( sort keys %want_left_space ) {
7377 print $fh "$_\t$want_left_space{$_}\n";
7381 sub dump_want_right_space {
7385 These values are the main control of whitespace to the right of a token type;
7386 They may be altered with the -wrs parameter.
7387 For a list of token types, use perltidy --dump-token-types (-dtt)
7388 1 means the token wants a space to its right
7389 -1 means the token does not want a space to its right
7390 ------------------------------------------------------------------------
7392 foreach ( sort keys %want_right_space ) {
7393 print $fh "$_\t$want_right_space{$_}\n";
7397 { # begin is_essential_whitespace
7399 my %is_sort_grep_map;
7404 @_ = qw(sort grep map);
7405 @is_sort_grep_map{@_} = (1) x scalar(@_);
7407 @_ = qw(for foreach);
7408 @is_for_foreach{@_} = (1) x scalar(@_);
7412 sub is_essential_whitespace {
7414 # Essential whitespace means whitespace which cannot be safely deleted
7415 # without risking the introduction of a syntax error.
7416 # We are given three tokens and their types:
7417 # ($tokenl, $typel) is the token to the left of the space in question
7418 # ($tokenr, $typer) is the token to the right of the space in question
7419 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
7421 # This is a slow routine but is not needed too often except when -mangle
7424 # Note: This routine should almost never need to be changed. It is
7425 # for avoiding syntax problems rather than for formatting.
7426 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
7428 # never combine two bare words or numbers
7429 my $result = ( ( $tokenr =~ /^[\'\w]/ ) && ( $tokenl =~ /[\'\w]$/ ) )
7431 # do not combine a number with a concatination dot
7432 # example: pom.caputo:
7433 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
7434 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
7435 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
7437 # do not join a minus with a bare word, because you might form
7438 # a file test operator. Example from Complex.pm:
7439 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
7440 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
7442 # and something like this could become ambiguous without space
7444 # use constant III=>1;
7448 || ( ( $tokenl eq '-' )
7449 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
7451 # '= -' should not become =- or you will get a warning
7453 # || ($tokenr eq '-')
7455 # keep a space between a quote and a bareword to prevent the
7456 # bareword from becomming a quote modifier.
7457 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7459 # keep a space between a token ending in '$' and any word;
7460 # this caused trouble: "die @$ if $@"
7461 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7462 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7464 # perl is very fussy about spaces before <<
7465 || ( $tokenr =~ /^\<\</ )
7467 # avoid combining tokens to create new meanings. Example:
7468 # $a+ +$b must not become $a++$b
7469 || ( $is_digraph{ $tokenl . $tokenr } )
7470 || ( $is_trigraph{ $tokenl . $tokenr } )
7472 # another example: do not combine these two &'s:
7473 # allow_options & &OPT_EXECCGI
7474 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
7476 # don't combine $$ or $# with any alphanumeric
7477 # (testfile mangle.t with --mangle)
7478 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
7480 # retain any space after possible filehandle
7481 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
7482 || ( $typel eq 'Z' || $typell eq 'Z' )
7484 # keep paren separate in 'use Foo::Bar ()'
7488 && $tokenll eq 'use' )
7490 # keep any space between filehandle and paren:
7491 # file mangle.t with --mangle:
7492 || ( $typel eq 'Y' && $tokenr eq '(' )
7494 # retain any space after here doc operator ( hereerr.t)
7495 || ( $typel eq 'h' )
7497 # FIXME: this needs some further work; extrude.t has test cases
7498 # it is safest to retain any space after start of ? : operator
7499 # because of perl's quirky parser.
7500 # ie, this line will fail if you remove the space after the '?':
7501 # $b=join $comma ? ',' : ':', @_; # ok
7502 # $b=join $comma ?',' : ':', @_; # error!
7504 # $b=join $comma?',' : ':', @_; # not a problem!
7505 ## || ($typel eq '?')
7507 # be careful with a space around ++ and --, to avoid ambiguity as to
7508 # which token it applies
7509 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
7510 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
7512 # need space after foreach my; for example, this will fail in
7513 # older versions of Perl:
7514 # foreach my$ft(@filetypes)...
7519 && $is_for_foreach{$tokenll} && $tokenr =~ /^\$/
7522 # must have space between grep and left paren; "grep(" will fail
7523 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
7525 # don't stick numbers next to left parens, as in:
7526 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
7527 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
7529 # don't join something like: for bla::bla:: abc
7530 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7531 || ( $tokenl =~ /\:\:$/ && ( $tokenr =~ /^[\'\w]/ ) )
7532 ; # the value of this long logic sequence is the result we want
7537 sub set_white_space_flag {
7539 # This routine examines each pair of nonblank tokens and
7540 # sets values for array @white_space_flag.
7542 # $white_space_flag[$j] is a flag indicating whether a white space
7543 # BEFORE token $j is needed, with the following values:
7545 # -1 do not want a space before token $j
7546 # 0 optional space or $j is a whitespace
7547 # 1 want a space before token $j
7550 # The values for the first token will be defined based
7551 # upon the contents of the "to_go" output array.
7553 # Note: retain debug print statements because they are usually
7554 # required after adding new token types.
7558 # initialize these global hashes, which control the use of
7559 # whitespace around tokens:
7564 # %space_after_keyword
7566 # Many token types are identical to the tokens themselves.
7567 # See the tokenizer for a complete list. Here are some special types:
7569 # f = semicolon in for statement
7572 # Note that :: is excluded since it should be contained in an identifier
7573 # Note that '->' is excluded because it never gets space
7574 # parentheses and brackets are excluded since they are handled specially
7575 # curly braces are included but may be overridden by logic, such as
7578 # NEW_TOKENS: create a whitespace rule here. This can be as
7579 # simple as adding your new letter to @spaces_both_sides, for
7583 @is_opening_type{@_} = (1) x scalar(@_);
7586 @is_closing_type{@_} = (1) x scalar(@_);
7588 my @spaces_both_sides = qw"
7589 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
7590 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>=
7591 &&= ||= //= <=> A k f w F n C Y U G v
7594 my @spaces_left_side = qw"
7595 t ! ~ m p { \ h pp mm Z j
7597 push( @spaces_left_side, '#' ); # avoids warning message
7599 my @spaces_right_side = qw"
7600 ; } ) ] R J ++ -- **=
7602 push( @spaces_right_side, ',' ); # avoids warning message
7603 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
7604 @want_right_space{@spaces_both_sides} =
7605 (1) x scalar(@spaces_both_sides);
7606 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);
7607 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
7608 @want_left_space{@spaces_right_side} =
7609 (-1) x scalar(@spaces_right_side);
7610 @want_right_space{@spaces_right_side} =
7611 (1) x scalar(@spaces_right_side);
7612 $want_left_space{'L'} = WS_NO;
7613 $want_left_space{'->'} = WS_NO;
7614 $want_right_space{'->'} = WS_NO;
7615 $want_left_space{'**'} = WS_NO;
7616 $want_right_space{'**'} = WS_NO;
7618 # hash type information must stay tightly bound
7620 $binary_ws_rules{'i'}{'L'} = WS_NO;
7621 $binary_ws_rules{'i'}{'{'} = WS_YES;
7622 $binary_ws_rules{'k'}{'{'} = WS_YES;
7623 $binary_ws_rules{'U'}{'{'} = WS_YES;
7624 $binary_ws_rules{'i'}{'['} = WS_NO;
7625 $binary_ws_rules{'R'}{'L'} = WS_NO;
7626 $binary_ws_rules{'R'}{'{'} = WS_NO;
7627 $binary_ws_rules{'t'}{'L'} = WS_NO;
7628 $binary_ws_rules{'t'}{'{'} = WS_NO;
7629 $binary_ws_rules{'}'}{'L'} = WS_NO;
7630 $binary_ws_rules{'}'}{'{'} = WS_NO;
7631 $binary_ws_rules{'$'}{'L'} = WS_NO;
7632 $binary_ws_rules{'$'}{'{'} = WS_NO;
7633 $binary_ws_rules{'@'}{'L'} = WS_NO;
7634 $binary_ws_rules{'@'}{'{'} = WS_NO;
7635 $binary_ws_rules{'='}{'L'} = WS_YES;
7637 # the following includes ') {'
7638 # as in : if ( xxx ) { yyy }
7639 $binary_ws_rules{']'}{'L'} = WS_NO;
7640 $binary_ws_rules{']'}{'{'} = WS_NO;
7641 $binary_ws_rules{')'}{'{'} = WS_YES;
7642 $binary_ws_rules{')'}{'['} = WS_NO;
7643 $binary_ws_rules{']'}{'['} = WS_NO;
7644 $binary_ws_rules{']'}{'{'} = WS_NO;
7645 $binary_ws_rules{'}'}{'['} = WS_NO;
7646 $binary_ws_rules{'R'}{'['} = WS_NO;
7648 $binary_ws_rules{']'}{'++'} = WS_NO;
7649 $binary_ws_rules{']'}{'--'} = WS_NO;
7650 $binary_ws_rules{')'}{'++'} = WS_NO;
7651 $binary_ws_rules{')'}{'--'} = WS_NO;
7653 $binary_ws_rules{'R'}{'++'} = WS_NO;
7654 $binary_ws_rules{'R'}{'--'} = WS_NO;
7656 $binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label
7657 $binary_ws_rules{'w'}{':'} = WS_NO;
7658 $binary_ws_rules{'i'}{'Q'} = WS_YES;
7659 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
7661 # FIXME: we need to split 'i' into variables and functions
7662 # and have no space for functions but space for variables. For now,
7663 # I have a special patch in the special rules below
7664 $binary_ws_rules{'i'}{'('} = WS_NO;
7666 $binary_ws_rules{'w'}{'('} = WS_NO;
7667 $binary_ws_rules{'w'}{'{'} = WS_YES;
7669 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
7670 my ( $last_token, $last_type, $last_block_type, $token, $type,
7672 my (@white_space_flag);
7673 my $j_tight_closing_paren = -1;
7675 if ( $max_index_to_go >= 0 ) {
7676 $token = $tokens_to_go[$max_index_to_go];
7677 $type = $types_to_go[$max_index_to_go];
7678 $block_type = $block_type_to_go[$max_index_to_go];
7686 # loop over all tokens
7689 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
7691 if ( $$rtoken_type[$j] eq 'b' ) {
7692 $white_space_flag[$j] = WS_OPTIONAL;
7696 # set a default value, to be changed as needed
7698 $last_token = $token;
7700 $last_block_type = $block_type;
7701 $token = $$rtokens[$j];
7702 $type = $$rtoken_type[$j];
7703 $block_type = $$rblock_type[$j];
7705 #---------------------------------------------------------------
7707 # handle space on the inside of opening braces
7708 #---------------------------------------------------------------
7711 if ( $is_opening_type{$last_type} ) {
7713 $j_tight_closing_paren = -1;
7715 # let's keep empty matched braces together: () {} []
7717 if ( $token eq $matching_token{$last_token} ) {
7727 # we're considering the right of an opening brace
7728 # tightness = 0 means always pad inside with space
7729 # tightness = 1 means pad inside if "complex"
7730 # tightness = 2 means never pad inside with space
7733 if ( $last_type eq '{'
7734 && $last_token eq '{'
7735 && $last_block_type )
7737 $tightness = $rOpts_block_brace_tightness;
7739 else { $tightness = $tightness{$last_token} }
7741 if ( $tightness <= 0 ) {
7744 elsif ( $tightness > 1 ) {
7749 # Patch to count '-foo' as single token so that
7750 # each of $a{-foo} and $a{foo} and $a{'foo'} do
7751 # not get spaces with default formatting.
7755 && $last_token eq '{'
7756 && $$rtoken_type[ $j + 1 ] eq 'w' );
7758 # $j_next is where a closing token should be if
7759 # the container has a single token
7761 ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
7764 my $tok_next = $$rtokens[$j_next];
7765 my $type_next = $$rtoken_type[$j_next];
7767 # for tightness = 1, if there is just one token
7768 # within the matching pair, we will keep it tight
7770 $tok_next eq $matching_token{$last_token}
7772 # but watch out for this: [ [ ] (misc.t)
7773 && $last_token ne $token
7777 # remember where to put the space for the closing paren
7778 $j_tight_closing_paren = $j_next;
7786 } # done with opening braces and brackets
7788 if FORMATTER_DEBUG_FLAG_WHITE;
7790 #---------------------------------------------------------------
7792 # handle space on inside of closing brace pairs
7793 #---------------------------------------------------------------
7796 if ( $is_closing_type{$type} ) {
7798 if ( $j == $j_tight_closing_paren ) {
7800 $j_tight_closing_paren = -1;
7805 if ( !defined($ws) ) {
7808 if ( $type eq '}' && $token eq '}' && $block_type ) {
7809 $tightness = $rOpts_block_brace_tightness;
7811 else { $tightness = $tightness{$token} }
7813 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
7819 if FORMATTER_DEBUG_FLAG_WHITE;
7821 #---------------------------------------------------------------
7823 # use the binary table
7824 #---------------------------------------------------------------
7825 if ( !defined($ws) ) {
7826 $ws = $binary_ws_rules{$last_type}{$type};
7829 if FORMATTER_DEBUG_FLAG_WHITE;
7831 #---------------------------------------------------------------
7833 # some special cases
7834 #---------------------------------------------------------------
7835 if ( $token eq '(' ) {
7837 # This will have to be tweaked as tokenization changes.
7838 # We usually want a space at '} (', for example:
7839 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
7842 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
7843 # At present, the above & block is marked as type L/R so this case
7844 # won't go through here.
7845 if ( $last_type eq '}' ) { $ws = WS_YES }
7847 # NOTE: some older versions of Perl had occasional problems if
7848 # spaces are introduced between keywords or functions and opening
7849 # parens. So the default is not to do this except is certain
7850 # cases. The current Perl seems to tolerate spaces.
7852 # Space between keyword and '('
7853 elsif ( $last_type eq 'k' ) {
7855 unless ( $rOpts_space_keyword_paren
7856 || $space_after_keyword{$last_token} );
7859 # Space between function and '('
7860 # -----------------------------------------------------
7861 # 'w' and 'i' checks for something like:
7862 # myfun( &myfun( ->myfun(
7863 # -----------------------------------------------------
7864 elsif (( $last_type =~ /^[wU]$/ )
7865 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
7867 $ws = WS_NO unless ($rOpts_space_function_paren);
7870 # space between something like $i and ( in
7871 # for $i ( 0 .. 20 ) {
7872 # FIXME: eventually, type 'i' needs to be split into multiple
7873 # token types so this can be a hardwired rule.
7874 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
7878 # allow constant function followed by '()' to retain no space
7879 elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
7884 # patch for SWITCH/CASE: make space at ']{' optional
7885 # since the '{' might begin a case or when block
7886 elsif ( $token eq '{' && $last_token eq ']' ) {
7890 # keep space between 'sub' and '{' for anonymous sub definition
7891 if ( $type eq '{' ) {
7892 if ( $last_token eq 'sub' ) {
7896 # this is needed to avoid no space in '){'
7897 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
7899 # avoid any space before the brace or bracket in something like
7900 # @opts{'a','b',...}
7901 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
7906 elsif ( $type eq 'i' ) {
7908 # never a space before ->
7909 if ( $token =~ /^\-\>/ ) {
7914 # retain any space between '-' and bare word
7915 elsif ( $type eq 'w' || $type eq 'C' ) {
7916 $ws = WS_OPTIONAL if $last_type eq '-';
7918 # never a space before ->
7919 if ( $token =~ /^\-\>/ ) {
7924 # retain any space between '-' and bare word
7925 # example: avoid space between 'USER' and '-' here:
7926 # $myhash{USER-NAME}='steve';
7927 elsif ( $type eq 'm' || $type eq '-' ) {
7928 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
7931 # always space before side comment
7932 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
7934 # always preserver whatever space was used after a possible
7935 # filehandle or here doc operator
7936 if ( $type ne '#' && ( $last_type eq 'Z' || $last_type eq 'h' ) ) {
7941 if FORMATTER_DEBUG_FLAG_WHITE;
7943 #---------------------------------------------------------------
7945 # default rules not covered above
7946 #---------------------------------------------------------------
7947 # if we fall through to here,
7948 # look at the pre-defined hash tables for the two tokens, and
7949 # if (they are equal) use the common value
7950 # if (either is zero or undef) use the other
7951 # if (either is -1) use it
7965 if ( !defined($ws) ) {
7966 my $wl = $want_left_space{$type};
7967 my $wr = $want_right_space{$last_type};
7968 if ( !defined($wl) ) { $wl = 0 }
7969 if ( !defined($wr) ) { $wr = 0 }
7970 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
7973 if ( !defined($ws) ) {
7976 "WS flag is undefined for tokens $last_token $token\n");
7979 # Treat newline as a whitespace. Otherwise, we might combine
7980 # 'Send' and '-recipients' here according to the above rules:
7981 # my $msg = new Fax::Send
7982 # -recipients => $to,
7984 if ( $ws == 0 && $j == 0 ) { $ws = 1 }
7989 && ( $last_type !~ /^[Zh]$/ ) )
7992 # If this happens, we have a non-fatal but undesirable
7993 # hole in the above rules which should be patched.
7995 "WS flag is zero for tokens $last_token $token\n");
7997 $white_space_flag[$j] = $ws;
7999 FORMATTER_DEBUG_FLAG_WHITE && do {
8000 my $str = substr( $last_token, 0, 15 );
8001 $str .= ' ' x ( 16 - length($str) );
8002 if ( !defined($ws_1) ) { $ws_1 = "*" }
8003 if ( !defined($ws_2) ) { $ws_2 = "*" }
8004 if ( !defined($ws_3) ) { $ws_3 = "*" }
8005 if ( !defined($ws_4) ) { $ws_4 = "*" }
8007 "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8010 return \@white_space_flag;
8013 { # begin print_line_of_tokens
8020 my $rcontainer_type;
8021 my $rcontainer_environment;
8024 my $rnesting_tokens;
8026 my $rnesting_blocks;
8029 my $python_indentation_level;
8031 # These local token variables are stored by store_token_to_go:
8034 my $container_environment;
8036 my $in_continued_quote;
8039 my $no_internal_newlines;
8045 # routine to pull the jth token from the line of tokens
8048 $token = $$rtokens[$j];
8049 $type = $$rtoken_type[$j];
8050 $block_type = $$rblock_type[$j];
8051 $container_type = $$rcontainer_type[$j];
8052 $container_environment = $$rcontainer_environment[$j];
8053 $type_sequence = $$rtype_sequence[$j];
8054 $level = $$rlevels[$j];
8055 $slevel = $$rslevels[$j];
8056 $nesting_blocks = $$rnesting_blocks[$j];
8057 $ci_level = $$rci_levels[$j];
8063 sub save_current_token {
8066 $block_type, $ci_level,
8067 $container_environment, $container_type,
8068 $in_continued_quote, $level,
8069 $nesting_blocks, $no_internal_newlines,
8071 $type, $type_sequence,
8075 sub restore_current_token {
8077 $block_type, $ci_level,
8078 $container_environment, $container_type,
8079 $in_continued_quote, $level,
8080 $nesting_blocks, $no_internal_newlines,
8082 $type, $type_sequence,
8087 # Routine to place the current token into the output stream.
8088 # Called once per output token.
8089 sub store_token_to_go {
8091 my $flag = $no_internal_newlines;
8092 if ( $_[0] ) { $flag = 1 }
8094 $tokens_to_go[ ++$max_index_to_go ] = $token;
8095 $types_to_go[$max_index_to_go] = $type;
8096 $nobreak_to_go[$max_index_to_go] = $flag;
8097 $old_breakpoint_to_go[$max_index_to_go] = 0;
8098 $forced_breakpoint_to_go[$max_index_to_go] = 0;
8099 $block_type_to_go[$max_index_to_go] = $block_type;
8100 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
8101 $container_environment_to_go[$max_index_to_go] = $container_environment;
8102 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;
8103 $ci_levels_to_go[$max_index_to_go] = $ci_level;
8104 $mate_index_to_go[$max_index_to_go] = -1;
8105 $matching_token_to_go[$max_index_to_go] = '';
8107 # Note: negative levels are currently retained as a diagnostic so that
8108 # the 'final indentation level' is correctly reported for bad scripts.
8109 # But this means that every use of $level as an index must be checked.
8110 # If this becomes too much of a problem, we might give up and just clip
8112 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
8113 $levels_to_go[$max_index_to_go] = $level;
8114 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
8115 $lengths_to_go[ $max_index_to_go + 1 ] =
8116 $lengths_to_go[$max_index_to_go] + length($token);
8118 # Define the indentation that this token would have if it started
8119 # a new line. We have to do this now because we need to know this
8120 # when considering one-line blocks.
8121 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
8123 if ( $type ne 'b' ) {
8124 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
8125 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
8126 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
8127 $last_nonblank_index_to_go = $max_index_to_go;
8128 $last_nonblank_type_to_go = $type;
8129 $last_nonblank_token_to_go = $token;
8130 if ( $type eq ',' ) {
8131 $comma_count_in_batch++;
8135 FORMATTER_DEBUG_FLAG_STORE && do {
8136 my ( $a, $b, $c ) = caller();
8138 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
8142 sub insert_new_token_to_go {
8144 # insert a new token into the output stream. use same level as
8145 # previous token; assumes a character at max_index_to_go.
8146 save_current_token();
8147 ( $token, $type, $slevel, $no_internal_newlines ) = @_;
8149 if ( $max_index_to_go == UNDEFINED_INDEX ) {
8150 warning("code bug: bad call to insert_new_token_to_go\n");
8152 $level = $levels_to_go[$max_index_to_go];
8154 # FIXME: it seems to be necessary to use the next, rather than
8155 # previous, value of this variable when creating a new blank (align.t)
8156 #my $slevel = $nesting_depth_to_go[$max_index_to_go];
8157 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
8158 $ci_level = $ci_levels_to_go[$max_index_to_go];
8159 $container_environment = $container_environment_to_go[$max_index_to_go];
8160 $in_continued_quote = 0;
8162 $type_sequence = "";
8163 store_token_to_go();
8164 restore_current_token();
8168 my %is_until_while_for_if_elsif_else;
8172 # always break after a closing curly of these block types:
8173 @_ = qw(until while for if elsif else);
8174 @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
8178 sub print_line_of_tokens {
8180 my $line_of_tokens = shift;
8182 # This routine is called once per input line to process all of
8183 # the tokens on that line. This is the first stage of
8186 # Full-line comments and blank lines may be processed immediately.
8188 # For normal lines of code, the tokens are stored one-by-one,
8189 # via calls to 'sub store_token_to_go', until a known line break
8190 # point is reached. Then, the batch of collected tokens is
8191 # passed along to 'sub output_line_to_go' for further
8192 # processing. This routine decides if there should be
8193 # whitespace between each pair of non-white tokens, so later
8194 # routines only need to decide on any additional line breaks.
8195 # Any whitespace is initally a single space character. Later,
8196 # the vertical aligner may expand that to be multiple space
8197 # characters if necessary for alignment.
8199 # extract input line number for error messages
8200 $input_line_number = $line_of_tokens->{_line_number};
8202 $rtoken_type = $line_of_tokens->{_rtoken_type};
8203 $rtokens = $line_of_tokens->{_rtokens};
8204 $rlevels = $line_of_tokens->{_rlevels};
8205 $rslevels = $line_of_tokens->{_rslevels};
8206 $rblock_type = $line_of_tokens->{_rblock_type};
8207 $rcontainer_type = $line_of_tokens->{_rcontainer_type};
8208 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
8209 $rtype_sequence = $line_of_tokens->{_rtype_sequence};
8210 $input_line = $line_of_tokens->{_line_text};
8211 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
8212 $rci_levels = $line_of_tokens->{_rci_levels};
8213 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
8215 $in_continued_quote = $starting_in_quote =
8216 $line_of_tokens->{_starting_in_quote};
8217 $in_quote = $line_of_tokens->{_ending_in_quote};
8218 $python_indentation_level =
8219 $line_of_tokens->{_python_indentation_level};
8224 my $next_nonblank_token;
8225 my $next_nonblank_token_type;
8226 my $rwhite_space_flag;
8228 $jmax = @$rtokens - 1;
8230 $container_type = "";
8231 $container_environment = "";
8232 $type_sequence = "";
8233 $no_internal_newlines = 1 - $rOpts_add_newlines;
8234 $is_static_block_comment = 0;
8236 # Handle a continued quote..
8237 if ($in_continued_quote) {
8239 # A line which is entirely a quote or pattern must go out
8240 # verbatim. Note: the \n is contained in $input_line.
8242 if ( ( $input_line =~ "\t" ) ) {
8243 note_embedded_tab();
8245 write_unindented_line("$input_line");
8246 $last_line_had_side_comment = 0;
8250 # prior to version 20010406, perltidy had a bug which placed
8251 # continuation indentation before the last line of some multiline
8252 # quotes and patterns -- exactly the lines passing this way.
8253 # To help find affected lines in scripts run with these
8254 # versions, run with '-chk', and it will warn of any quotes or
8255 # patterns which might have been modified by these early
8257 if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
8259 "-chk: please check this line for extra leading whitespace\n"
8264 # Write line verbatim if we are in a formatting skip section
8265 if ($in_format_skipping_section) {
8266 write_unindented_line("$input_line");
8267 $last_line_had_side_comment = 0;
8269 # Note: extra space appended to comment simplifies pattern matching
8271 && $$rtoken_type[0] eq '#'
8272 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
8274 $in_format_skipping_section = 0;
8275 write_logfile_entry("Exiting formatting skip section\n");
8280 # See if we are entering a formatting skip section
8281 if ( $rOpts_format_skipping
8283 && $$rtoken_type[0] eq '#'
8284 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
8287 $in_format_skipping_section = 1;
8288 write_logfile_entry("Entering formatting skip section\n");
8289 write_unindented_line("$input_line");
8290 $last_line_had_side_comment = 0;
8294 # delete trailing blank tokens
8295 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
8297 # Handle a blank line..
8300 # For the 'swallow-optional-blank-lines' option, we delete all
8301 # old blank lines and let the blank line rules generate any
8303 if ( !$rOpts_swallow_optional_blank_lines ) {
8305 $file_writer_object->write_blank_code_line();
8306 $last_line_leading_type = 'b';
8308 $last_line_had_side_comment = 0;
8312 # see if this is a static block comment (starts with ## by default)
8313 my $is_static_block_comment_without_leading_space = 0;
8315 && $$rtoken_type[0] eq '#'
8316 && $rOpts->{'static-block-comments'}
8317 && $input_line =~ /$static_block_comment_pattern/o )
8319 $is_static_block_comment = 1;
8320 $is_static_block_comment_without_leading_space =
8321 substr( $input_line, 0, 1 ) eq '#';
8324 # create a hanging side comment if appropriate
8327 && $$rtoken_type[0] eq '#' # only token is a comment
8328 && $last_line_had_side_comment # last line had side comment
8329 && $input_line =~ /^\s/ # there is some leading space
8330 && !$is_static_block_comment # do not make static comment hanging
8331 && $rOpts->{'hanging-side-comments'} # user is allowing this
8335 # We will insert an empty qw string at the start of the token list
8336 # to force this comment to be a side comment. The vertical aligner
8337 # should then line it up with the previous side comment.
8338 unshift @$rtoken_type, 'q';
8339 unshift @$rtokens, '';
8340 unshift @$rlevels, $$rlevels[0];
8341 unshift @$rslevels, $$rslevels[0];
8342 unshift @$rblock_type, '';
8343 unshift @$rcontainer_type, '';
8344 unshift @$rcontainer_environment, '';
8345 unshift @$rtype_sequence, '';
8346 unshift @$rnesting_tokens, $$rnesting_tokens[0];
8347 unshift @$rci_levels, $$rci_levels[0];
8348 unshift @$rnesting_blocks, $$rnesting_blocks[0];
8352 # remember if this line has a side comment
8353 $last_line_had_side_comment =
8354 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
8356 # Handle a block (full-line) comment..
8357 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
8359 if ( $rOpts->{'delete-block-comments'} ) { return }
8361 if ( $rOpts->{'tee-block-comments'} ) {
8362 $file_writer_object->tee_on();
8365 destroy_one_line_block();
8366 output_line_to_go();
8368 # output a blank line before block comments
8370 $last_line_leading_type !~ /^[#b]$/
8371 && $rOpts->{'blanks-before-comments'} # only if allowed
8373 $is_static_block_comment # never before static block comments
8376 flush(); # switching to new output stream
8377 $file_writer_object->write_blank_code_line();
8378 $last_line_leading_type = 'b';
8381 # TRIM COMMENTS -- This could be turned off as a option
8382 $$rtokens[0] =~ s/\s*$//; # trim right end
8385 $rOpts->{'indent-block-comments'}
8386 && ( !$rOpts->{'indent-spaced-block-comments'}
8387 || $input_line =~ /^\s+/ )
8388 && !$is_static_block_comment_without_leading_space
8392 store_token_to_go();
8393 output_line_to_go();
8396 flush(); # switching to new output stream
8397 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
8398 $last_line_leading_type = '#';
8400 if ( $rOpts->{'tee-block-comments'} ) {
8401 $file_writer_object->tee_off();
8406 # compare input/output indentation except for continuation lines
8407 # (because they have an unknown amount of initial blank space)
8408 # and lines which are quotes (because they may have been outdented)
8409 # Note: this test is placed here because we know the continuation flag
8410 # at this point, which allows us to avoid non-meaningful checks.
8411 my $structural_indentation_level = $$rlevels[0];
8412 compare_indentation_levels( $python_indentation_level,
8413 $structural_indentation_level )
8414 unless ( $python_indentation_level < 0
8415 || ( $$rci_levels[0] > 0 )
8416 || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
8419 # Patch needed for MakeMaker. Do not break a statement
8420 # in which $VERSION may be calculated. See MakeMaker.pm;
8421 # this is based on the coding in it.
8422 # The first line of a file that matches this will be eval'd:
8423 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8425 # *VERSION = \'1.01';
8426 # ( $VERSION ) = '$Revision: 1.49 $ ' =~ /\$Revision:\s+([^\s]+)/;
8427 # We will pass such a line straight through without breaking
8428 # it unless -npvl is used
8430 my $is_VERSION_statement = 0;
8433 !$saw_VERSION_in_this_file
8434 && $input_line =~ /VERSION/ # quick check to reject most lines
8435 && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8438 $saw_VERSION_in_this_file = 1;
8439 $is_VERSION_statement = 1;
8440 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
8441 $no_internal_newlines = 1;
8444 # take care of indentation-only
8445 # also write a line which is entirely a 'qw' list
8446 if ( $rOpts->{'indent-only'}
8447 || ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq 'q' ) ) )
8450 $input_line =~ s/^\s*//; # trim left end
8451 $input_line =~ s/\s*$//; # trim right end
8454 $token = $input_line;
8457 $container_type = "";
8458 $container_environment = "";
8459 $type_sequence = "";
8460 store_token_to_go();
8461 output_line_to_go();
8465 push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding
8466 push( @$rtoken_type, 'b', 'b' );
8467 ($rwhite_space_flag) =
8468 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
8470 # find input tabbing to allow checks for tabbing disagreement
8472 ##$input_line_tabbing = "";
8473 ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
8475 # if the buffer hasn't been flushed, add a leading space if
8476 # necessary to keep essential whitespace. This is really only
8477 # necessary if we are squeezing out all ws.
8478 if ( $max_index_to_go >= 0 ) {
8480 $old_line_count_in_batch++;
8483 is_essential_whitespace(
8484 $last_last_nonblank_token,
8485 $last_last_nonblank_type,
8486 $tokens_to_go[$max_index_to_go],
8487 $types_to_go[$max_index_to_go],
8493 my $slevel = $$rslevels[0];
8494 insert_new_token_to_go( ' ', 'b', $slevel,
8495 $no_internal_newlines );
8499 # If we just saw the end of an elsif block, write nag message
8500 # if we do not see another elseif or an else.
8501 if ($looking_for_else) {
8503 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
8504 write_logfile_entry("(No else block)\n");
8506 $looking_for_else = 0;
8509 # This is a good place to kill incomplete one-line blocks
8510 if ( ( $semicolons_before_block_self_destruct == 0 )
8511 && ( $max_index_to_go >= 0 )
8512 && ( $types_to_go[$max_index_to_go] eq ';' )
8513 && ( $$rtokens[0] ne '}' ) )
8515 destroy_one_line_block();
8516 output_line_to_go();
8519 # loop to process the tokens one-by-one
8523 foreach $j ( 0 .. $jmax ) {
8525 # pull out the local values for this token
8528 if ( $type eq '#' ) {
8530 # trim trailing whitespace
8531 # (there is no option at present to prevent this)
8535 $rOpts->{'delete-side-comments'}
8537 # delete closing side comments if necessary
8538 || ( $rOpts->{'delete-closing-side-comments'}
8539 && $token =~ /$closing_side_comment_prefix_pattern/o
8540 && $last_nonblank_block_type =~
8541 /$closing_side_comment_list_pattern/o )
8544 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8545 unstore_token_to_go();
8551 # If we are continuing after seeing a right curly brace, flush
8552 # buffer unless we see what we are looking for, as in
8554 if ( $rbrace_follower && $type ne 'b' ) {
8556 unless ( $rbrace_follower->{$token} ) {
8557 output_line_to_go();
8559 $rbrace_follower = undef;
8562 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
8563 $next_nonblank_token = $$rtokens[$j_next];
8564 $next_nonblank_token_type = $$rtoken_type[$j_next];
8566 #--------------------------------------------------------
8567 # Start of section to patch token text
8568 #--------------------------------------------------------
8570 # Modify certain tokens here for whitespace
8571 # The following is not yet done, but could be:
8573 if ( $type =~ /^[wit]$/ ) {
8576 # change '$ var' to '$var' etc
8577 # '-> new' to '->new'
8578 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8582 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
8585 # change 'LABEL :' to 'LABEL:'
8586 elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
8588 # patch to add space to something like "x10"
8589 # This avoids having to split this token in the pre-tokenizer
8590 elsif ( $type eq 'n' ) {
8591 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
8594 elsif ( $type eq 'Q' ) {
8595 note_embedded_tab() if ( $token =~ "\t" );
8597 # make note of something like '$var = s/xxx/yyy/;'
8598 # in case it should have been '$var =~ s/xxx/yyy/;'
8600 $token =~ /^(s|tr|y|m|\/)/
8601 && $last_nonblank_token =~ /^(=|==|!=)$/
8603 # precededed by simple scalar
8604 && $last_last_nonblank_type eq 'i'
8605 && $last_last_nonblank_token =~ /^\$/
8607 # followed by some kind of termination
8608 # (but give complaint if we can's see far enough ahead)
8609 && $next_nonblank_token =~ /^[; \)\}]$/
8611 # scalar is not decleared
8613 $types_to_go[0] eq 'k'
8614 && $tokens_to_go[0] =~ /^(my|our|local)$/
8618 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8620 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
8625 # trim blanks from right of qw quotes
8626 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8627 elsif ( $type eq 'q' ) {
8629 note_embedded_tab() if ( $token =~ "\t" );
8632 #--------------------------------------------------------
8633 # End of section to patch token text
8634 #--------------------------------------------------------
8636 # insert any needed whitespace
8637 if ( ( $type ne 'b' )
8638 && ( $max_index_to_go >= 0 )
8639 && ( $types_to_go[$max_index_to_go] ne 'b' )
8640 && $rOpts_add_whitespace )
8642 my $ws = $$rwhite_space_flag[$j];
8645 insert_new_token_to_go( ' ', 'b', $slevel,
8646 $no_internal_newlines );
8650 # Do not allow breaks which would promote a side comment to a
8651 # block comment. In order to allow a break before an opening
8652 # or closing BLOCK, followed by a side comment, those sections
8653 # of code will handle this flag separately.
8654 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
8655 my $is_opening_BLOCK =
8659 && $block_type ne 't' );
8660 my $is_closing_BLOCK =
8664 && $block_type ne 't' );
8666 if ( $side_comment_follows
8667 && !$is_opening_BLOCK
8668 && !$is_closing_BLOCK )
8670 $no_internal_newlines = 1;
8673 # We're only going to handle breaking for code BLOCKS at this
8674 # (top) level. Other indentation breaks will be handled by
8675 # sub scan_list, which is better suited to dealing with them.
8676 if ($is_opening_BLOCK) {
8678 # Tentatively output this token. This is required before
8679 # calling starting_one_line_block. We may have to unstore
8680 # it, though, if we have to break before it.
8681 store_token_to_go($side_comment_follows);
8683 # Look ahead to see if we might form a one-line block
8685 starting_one_line_block( $j, $jmax, $level, $slevel,
8686 $ci_level, $rtokens, $rtoken_type, $rblock_type );
8687 clear_breakpoint_undo_stack();
8689 # to simplify the logic below, set a flag to indicate if
8690 # this opening brace is far from the keyword which introduces it
8691 my $keyword_on_same_line = 1;
8692 if ( ( $max_index_to_go >= 0 )
8693 && ( $last_nonblank_type eq ')' ) )
8695 if ( $block_type =~ /^(if|else|elsif)$/
8696 && ( $tokens_to_go[0] eq '}' )
8697 && $rOpts_cuddled_else )
8699 $keyword_on_same_line = 1;
8701 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
8703 $keyword_on_same_line = 0;
8707 # decide if user requested break before '{'
8710 # use -bl flag if not a sub block of any type
8711 $block_type !~ /^sub/
8712 ? $rOpts->{'opening-brace-on-new-line'}
8714 # use -sbl flag unless this is an anonymous sub block
8715 : $block_type !~ /^sub\W*$/
8716 ? $rOpts->{'opening-sub-brace-on-new-line'}
8718 # do not break for anonymous subs
8721 # Break before an opening '{' ...
8727 # and we were unable to start looking for a block,
8728 && $index_start_one_line_block == UNDEFINED_INDEX
8730 # or if it will not be on same line as its keyword, so that
8731 # it will be outdented (eval.t, overload.t), and the user
8732 # has not insisted on keeping it on the right
8733 || ( !$keyword_on_same_line
8734 && !$rOpts->{'opening-brace-always-on-right'} )
8739 # but only if allowed
8740 unless ($no_internal_newlines) {
8742 # since we already stored this token, we must unstore it
8743 unstore_token_to_go();
8745 # then output the line
8746 output_line_to_go();
8748 # and now store this token at the start of a new line
8749 store_token_to_go($side_comment_follows);
8753 # Now update for side comment
8754 if ($side_comment_follows) { $no_internal_newlines = 1 }
8756 # now output this line
8757 unless ($no_internal_newlines) {
8758 output_line_to_go();
8762 elsif ($is_closing_BLOCK) {
8764 # If there is a pending one-line block ..
8765 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8767 # we have to terminate it if..
8770 # it is too long (final length may be different from
8771 # initial estimate). note: must allow 1 space for this token
8772 excess_line_length( $index_start_one_line_block,
8773 $max_index_to_go ) >= 0
8775 # or if it has too many semicolons
8776 || ( $semicolons_before_block_self_destruct == 0
8777 && $last_nonblank_type ne ';' )
8780 destroy_one_line_block();
8784 # put a break before this closing curly brace if appropriate
8785 unless ( $no_internal_newlines
8786 || $index_start_one_line_block != UNDEFINED_INDEX )
8789 # add missing semicolon if ...
8790 # there are some tokens
8792 ( $max_index_to_go > 0 )
8794 # and we don't have one
8795 && ( $last_nonblank_type ne ';' )
8797 # patch until some block type issues are fixed:
8798 # Do not add semi-colon for block types '{',
8799 # '}', and ';' because we cannot be sure yet
8800 # that this is a block and not an anonomyous
8801 # hash (blktype.t, blktype1.t)
8802 && ( $block_type !~ /^[\{\};]$/ )
8804 # it seems best not to add semicolons in these
8805 # special block types: sort|map|grep
8806 && ( !$is_sort_map_grep{$block_type} )
8808 # and we are allowed to do so.
8809 && $rOpts->{'add-semicolons'}
8813 save_current_token();
8816 $level = $levels_to_go[$max_index_to_go];
8817 $slevel = $nesting_depth_to_go[$max_index_to_go];
8819 $nesting_blocks_to_go[$max_index_to_go];
8820 $ci_level = $ci_levels_to_go[$max_index_to_go];
8822 $container_type = "";
8823 $container_environment = "";
8824 $type_sequence = "";
8826 # Note - we remove any blank AFTER extracting its
8827 # parameters such as level, etc, above
8828 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8829 unstore_token_to_go();
8831 store_token_to_go();
8833 note_added_semicolon();
8834 restore_current_token();
8837 # then write out everything before this closing curly brace
8838 output_line_to_go();
8842 # Now update for side comment
8843 if ($side_comment_follows) { $no_internal_newlines = 1 }
8845 # store the closing curly brace
8846 store_token_to_go();
8848 # ok, we just stored a closing curly brace. Often, but
8849 # not always, we want to end the line immediately.
8850 # So now we have to check for special cases.
8852 # if this '}' successfully ends a one-line block..
8853 my $is_one_line_block = 0;
8855 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8857 # Remember the type of token just before the
8858 # opening brace. It would be more general to use
8859 # a stack, but this will work for one-line blocks.
8860 $is_one_line_block =
8861 $types_to_go[$index_start_one_line_block];
8863 # we have to actually make it by removing tentative
8864 # breaks that were set within it
8865 undo_forced_breakpoint_stack(0);
8866 set_nobreaks( $index_start_one_line_block,
8867 $max_index_to_go - 1 );
8869 # then re-initialize for the next one-line block
8870 destroy_one_line_block();
8872 # then decide if we want to break after the '}' ..
8873 # We will keep going to allow certain brace followers as in:
8874 # do { $ifclosed = 1; last } unless $losing;
8876 # But make a line break if the curly ends a
8877 # significant block:
8878 if ( $is_until_while_for_if_elsif_else{$block_type} ) {
8879 output_line_to_go() unless ($no_internal_newlines);
8883 # set string indicating what we need to look for brace follower
8885 if ( $block_type eq 'do' ) {
8886 $rbrace_follower = \%is_do_follower;
8888 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
8889 $rbrace_follower = \%is_if_brace_follower;
8891 elsif ( $block_type eq 'else' ) {
8892 $rbrace_follower = \%is_else_brace_follower;
8895 # added eval for borris.t
8896 elsif ($is_sort_map_grep_eval{$block_type}
8897 || $is_one_line_block eq 'G' )
8899 $rbrace_follower = undef;
8904 elsif ( $block_type =~ /^sub\W*$/ ) {
8906 if ($is_one_line_block) {
8907 $rbrace_follower = \%is_anon_sub_1_brace_follower;
8910 $rbrace_follower = \%is_anon_sub_brace_follower;
8914 # TESTING ONLY for SWITCH/CASE - this is where to start
8915 # recoding to retain else's on the same line as a case,
8916 # but there is a lot more that would need to be done.
8917 ##elsif ($block_type eq 'case') {$rbrace_follower = {else=>1};}
8919 # None of the above: specify what can follow a closing
8920 # brace of a block which is not an
8921 # if/elsif/else/do/sort/map/grep/eval
8923 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
8925 $rbrace_follower = \%is_other_brace_follower;
8928 # See if an elsif block is followed by another elsif or else;
8930 if ( $block_type eq 'elsif' ) {
8932 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
8933 $looking_for_else = 1; # ok, check on next line
8937 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
8938 write_logfile_entry("No else block :(\n");
8943 # keep going after certain block types (map,sort,grep,eval)
8944 # added eval for borris.t
8950 # if no more tokens, postpone decision until re-entring
8951 elsif ( ( $next_nonblank_token_type eq 'b' )
8952 && $rOpts_add_newlines )
8954 unless ($rbrace_follower) {
8955 output_line_to_go() unless ($no_internal_newlines);
8959 elsif ($rbrace_follower) {
8961 unless ( $rbrace_follower->{$next_nonblank_token} ) {
8962 output_line_to_go() unless ($no_internal_newlines);
8964 $rbrace_follower = undef;
8968 output_line_to_go() unless ($no_internal_newlines);
8971 } # end treatment of closing block token
8974 elsif ( $type eq ';' ) {
8976 # kill one-line blocks with too many semicolons
8977 $semicolons_before_block_self_destruct--;
8979 ( $semicolons_before_block_self_destruct < 0 )
8980 || ( $semicolons_before_block_self_destruct == 0
8981 && $next_nonblank_token_type !~ /^[b\}]$/ )
8984 destroy_one_line_block();
8987 # Remove unnecessary semicolons, but not after bare
8988 # blocks, where it could be unsafe if the brace is
8992 $last_nonblank_token eq '}'
8994 $is_block_without_semicolon{
8995 $last_nonblank_block_type}
8996 || $last_nonblank_block_type =~ /^sub\s+\w/
8997 || $last_nonblank_block_type =~ /^\w+:$/ )
8999 || $last_nonblank_type eq ';'
9004 $rOpts->{'delete-semicolons'}
9006 # don't delete ; before a # because it would promote it
9007 # to a block comment
9008 && ( $next_nonblank_token_type ne '#' )
9011 note_deleted_semicolon();
9013 unless ( $no_internal_newlines
9014 || $index_start_one_line_block != UNDEFINED_INDEX );
9018 write_logfile_entry("Extra ';'\n");
9021 store_token_to_go();
9024 unless ( $no_internal_newlines
9025 || ( $next_nonblank_token eq '}' ) );
9029 # handle here_doc target string
9030 elsif ( $type eq 'h' ) {
9031 $no_internal_newlines =
9032 1; # no newlines after seeing here-target
9033 destroy_one_line_block();
9034 store_token_to_go();
9037 # handle all other token types
9040 # if this is a blank...
9041 if ( $type eq 'b' ) {
9043 # make it just one character
9044 $token = ' ' if $rOpts_add_whitespace;
9046 # delete it if unwanted by whitespace rules
9047 # or we are deleting all whitespace
9048 my $ws = $$rwhite_space_flag[ $j + 1 ];
9049 if ( ( defined($ws) && $ws == -1 )
9050 || $rOpts_delete_old_whitespace )
9053 # unless it might make a syntax error
9055 unless is_essential_whitespace(
9056 $last_last_nonblank_token,
9057 $last_last_nonblank_type,
9058 $tokens_to_go[$max_index_to_go],
9059 $types_to_go[$max_index_to_go],
9060 $$rtokens[ $j + 1 ],
9061 $$rtoken_type[ $j + 1 ]
9065 store_token_to_go();
9068 # remember two previous nonblank OUTPUT tokens
9069 if ( $type ne '#' && $type ne 'b' ) {
9070 $last_last_nonblank_token = $last_nonblank_token;
9071 $last_last_nonblank_type = $last_nonblank_type;
9072 $last_nonblank_token = $token;
9073 $last_nonblank_type = $type;
9074 $last_nonblank_block_type = $block_type;
9077 # unset the continued-quote flag since it only applies to the
9078 # first token, and we want to resume normal formatting if
9079 # there are additional tokens on the line
9080 $in_continued_quote = 0;
9082 } # end of loop over all tokens in this 'line_of_tokens'
9084 # we have to flush ..
9087 # if there is a side comment
9088 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
9090 # if this line which ends in a quote
9093 # if this is a VERSION statement
9094 || $is_VERSION_statement
9096 # to keep a label on one line if that is how it is now
9097 || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
9099 # if we are instructed to keep all old line breaks
9100 || !$rOpts->{'delete-old-newlines'}
9103 destroy_one_line_block();
9104 output_line_to_go();
9107 # mark old line breakpoints in current output stream
9108 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
9109 $old_breakpoint_to_go[$max_index_to_go] = 1;
9112 } # end print_line_of_tokens
9114 sub note_added_semicolon {
9115 $last_added_semicolon_at = $input_line_number;
9116 if ( $added_semicolon_count == 0 ) {
9117 $first_added_semicolon_at = $last_added_semicolon_at;
9119 $added_semicolon_count++;
9120 write_logfile_entry("Added ';' here\n");
9123 sub note_deleted_semicolon {
9124 $last_deleted_semicolon_at = $input_line_number;
9125 if ( $deleted_semicolon_count == 0 ) {
9126 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
9128 $deleted_semicolon_count++;
9129 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
9132 sub note_embedded_tab {
9133 $embedded_tab_count++;
9134 $last_embedded_tab_at = $input_line_number;
9135 if ( !$first_embedded_tab_at ) {
9136 $first_embedded_tab_at = $last_embedded_tab_at;
9139 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
9140 write_logfile_entry("Embedded tabs in quote or pattern\n");
9144 sub starting_one_line_block {
9146 # after seeing an opening curly brace, look for the closing brace
9147 # and see if the entire block will fit on a line. This routine is
9148 # not always right because it uses the old whitespace, so a check
9149 # is made later (at the closing brace) to make sure we really
9150 # have a one-line block. We have to do this preliminary check,
9151 # though, because otherwise we would always break at a semicolon
9152 # within a one-line block if the block contains multiple statements.
9154 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
9158 # kill any current block - we can only go 1 deep
9159 destroy_one_line_block();
9162 # 1=distance from start of block to opening brace exceeds line length
9167 # shouldn't happen: there must have been a prior call to
9168 # store_token_to_go to put the opening brace in the output stream
9169 if ( $max_index_to_go < 0 ) {
9170 warning("program bug: store_token_to_go called incorrectly\n");
9171 report_definite_bug();
9175 # cannot use one-line blocks with cuddled else else/elsif lines
9176 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
9181 my $block_type = $$rblock_type[$j];
9183 # find the starting keyword for this block (such as 'if', 'else', ...)
9185 if ( $block_type =~ /^[\{\}\;\:]$/ ) {
9186 $i_start = $max_index_to_go;
9189 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
9191 # For something like "if (xxx) {", the keyword "if" will be
9192 # just after the most recent break. This will be 0 unless
9193 # we have just killed a one-line block and are starting another.
9195 $i_start = $index_max_forced_break + 1;
9196 if ( $types_to_go[$i_start] eq 'b' ) {
9200 unless ( $tokens_to_go[$i_start] eq $block_type ) {
9205 # the previous nonblank token should start these block types
9207 ( $last_last_nonblank_token_to_go eq $block_type )
9208 || ( $block_type =~ /^sub/
9209 && $last_last_nonblank_token_to_go =~ /^sub/ )
9212 $i_start = $last_last_nonblank_index_to_go;
9215 # patch for SWITCH/CASE to retain one-line case/when blocks
9216 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
9217 $i_start = $index_max_forced_break + 1;
9218 if ( $types_to_go[$i_start] eq 'b' ) {
9221 unless ( $tokens_to_go[$i_start] eq $block_type ) {
9230 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
9234 # see if length is too long to even start
9235 if ( $pos > $rOpts_maximum_line_length ) {
9239 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
9241 # old whitespace could be arbitrarily large, so don't use it
9242 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
9243 else { $pos += length( $$rtokens[$i] ) }
9245 # Return false result if we exceed the maximum line length,
9246 if ( $pos > $rOpts_maximum_line_length ) {
9250 # or encounter another opening brace before finding the closing brace.
9251 elsif ($$rtokens[$i] eq '{'
9252 && $$rtoken_type[$i] eq '{'
9253 && $$rblock_type[$i] )
9258 # if we find our closing brace..
9259 elsif ($$rtokens[$i] eq '}'
9260 && $$rtoken_type[$i] eq '}'
9261 && $$rblock_type[$i] )
9264 # be sure any trailing comment also fits on the line
9266 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
9268 if ( $$rtoken_type[$i_nonblank] eq '#' ) {
9269 $pos += length( $$rtokens[$i_nonblank] );
9271 if ( $i_nonblank > $i + 1 ) {
9272 $pos += length( $$rtokens[ $i + 1 ] );
9275 if ( $pos > $rOpts_maximum_line_length ) {
9280 # ok, it's a one-line block
9281 create_one_line_block( $i_start, 20 );
9285 # just keep going for other characters
9290 # Allow certain types of new one-line blocks to form by joining
9291 # input lines. These can be safely done, but for other block types,
9292 # we keep old one-line blocks but do not form new ones. It is not
9293 # always a good idea to make as many one-line blocks as possible,
9294 # so other types are not done. The user can always use -mangle.
9295 if ( $is_sort_map_grep_eval{$block_type} ) {
9296 create_one_line_block( $i_start, 1 );
9302 sub unstore_token_to_go {
9304 # remove most recent token from output stream
9305 if ( $max_index_to_go > 0 ) {
9309 $max_index_to_go = UNDEFINED_INDEX;
9314 sub want_blank_line {
9316 $file_writer_object->want_blank_line();
9319 sub write_unindented_line {
9321 $file_writer_object->write_line( $_[0] );
9326 # If there is a single, long parameter within parens, like this:
9328 # $self->command( "/msg "
9330 # . " You said $1, but did you know that it's square was "
9331 # . $1 * $1 . " ?" );
9333 # we can remove the continuation indentation of the 2nd and higher lines
9334 # to achieve this effect, which is more pleasing:
9336 # $self->command("/msg "
9338 # . " You said $1, but did you know that it's square was "
9339 # . $1 * $1 . " ?");
9341 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
9342 my $max_line = @$ri_first - 1;
9344 # must be multiple lines
9345 return unless $max_line > $line_open;
9347 my $lev_start = $levels_to_go[$i_start];
9348 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
9350 # see if all additional lines in this container have continuation
9353 my $line_1 = 1 + $line_open;
9354 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
9355 my $ibeg = $$ri_first[$n];
9356 my $iend = $$ri_last[$n];
9357 if ( $ibeg eq $closing_index ) { $n--; last }
9358 return if ( $lev_start != $levels_to_go[$ibeg] );
9359 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
9360 last if ( $closing_index <= $iend );
9363 # we can reduce the indentation of all continuation lines
9364 my $continuation_line_count = $n - $line_open;
9365 @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9366 (0) x ($continuation_line_count);
9367 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9368 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
9371 sub set_logical_padding {
9373 # Look at a batch of lines and see if extra padding can improve the
9374 # alignment when there are certain leading operators. Here is an
9375 # example, in which some extra space is introduced before
9376 # '( $year' to make it line up with the subsequent lines:
9378 # if ( ( $Year < 1601 )
9379 # || ( $Year > 2899 )
9380 # || ( $EndYear < 1601 )
9381 # || ( $EndYear > 2899 ) )
9383 # &Error_OutOfRange;
9386 my ( $ri_first, $ri_last ) = @_;
9387 my $max_line = @$ri_first - 1;
9389 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
9390 $tok_next, $has_leading_op_next, $has_leading_op );
9392 # looking at each line of this batch..
9393 foreach $line ( 0 .. $max_line - 1 ) {
9395 # see if the next line begins with a logical operator
9396 $ibeg = $$ri_first[$line];
9397 $iend = $$ri_last[$line];
9398 $ibeg_next = $$ri_first[ $line + 1 ];
9399 $tok_next = $tokens_to_go[$ibeg_next];
9400 $has_leading_op_next = $is_chain_operator{$tok_next};
9401 next unless ($has_leading_op_next);
9403 # next line must not be at lesser depth
9405 if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
9407 # identify the token in this line to be padded on the left
9410 # handle lines at same depth...
9411 if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
9413 # if this is not first line of the batch ...
9416 # and we have leading operator
9417 next if $has_leading_op;
9420 # 1. the previous line is at lesser depth, or
9421 # 2. the previous line ends in an assignment
9423 # Example 1: previous line at lesser depth
9424 # if ( ( $Year < 1601 ) # <- we are here but
9425 # || ( $Year > 2899 ) # list has not yet
9426 # || ( $EndYear < 1601 ) # collapsed vertically
9427 # || ( $EndYear > 2899 ) )
9430 # Example 2: previous line ending in assignment:
9432 # $year % 4 ? 0 # <- We are here
9438 $is_assignment{ $types_to_go[$iendm] }
9439 || ( $nesting_depth_to_go[$ibegm] <
9440 $nesting_depth_to_go[$ibeg] )
9443 # we will add padding before the first token
9447 # for first line of the batch..
9450 # WARNING: Never indent if first line is starting in a
9451 # continued quote, which would change the quote.
9452 next if $starting_in_quote;
9454 # if this is text after closing '}'
9455 # then look for an interior token to pad
9456 if ( $types_to_go[$ibeg] eq '}' ) {
9460 # otherwise, we might pad if it looks really good
9463 # we might pad token $ibeg, so be sure that it
9464 # is at the same depth as the next line.
9466 if ( $nesting_depth_to_go[ $ibeg + 1 ] !=
9467 $nesting_depth_to_go[$ibeg_next] );
9469 # We can pad on line 1 of a statement if at least 3
9470 # lines will be aligned. Otherwise, it
9471 # can look very confusing.
9472 if ( $max_line > 2 ) {
9473 my $leading_token = $tokens_to_go[$ibeg_next];
9475 # never indent line 1 of a '.' series because
9476 # previous line is most likely at same level.
9477 # TODO: we should also look at the leasing_spaces
9478 # of the last output line and skip if it is same
9480 next if ( $leading_token eq '.' );
9483 foreach my $l ( 2 .. 3 ) {
9484 my $ibeg_next_next = $$ri_first[ $line + $l ];
9486 unless $tokens_to_go[$ibeg_next_next] eq
9490 next unless $count == 3;
9500 # find interior token to pad if necessary
9501 if ( !defined($ipad) ) {
9503 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
9505 # find any unclosed container
9507 unless ( $type_sequence_to_go[$i]
9508 && $mate_index_to_go[$i] > $iend );
9510 # find next nonblank token to pad
9512 if ( $types_to_go[$ipad] eq 'b' ) {
9514 last if ( $ipad > $iend );
9520 # next line must not be at greater depth
9521 my $iend_next = $$ri_last[ $line + 1 ];
9523 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
9524 $nesting_depth_to_go[$ipad] );
9526 # lines must be somewhat similar to be padded..
9527 my $inext_next = $ibeg_next + 1;
9528 if ( $types_to_go[$inext_next] eq 'b' ) {
9531 my $type = $types_to_go[$ipad];
9533 # see if there are multiple continuation lines
9534 my $logical_continuation_lines = 1;
9535 if ( $line + 2 <= $max_line ) {
9536 my $leading_token = $tokens_to_go[$ibeg_next];
9537 my $ibeg_next_next = $$ri_first[ $line + 2 ];
9538 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
9539 && $nesting_depth_to_go[$ibeg_next] eq
9540 $nesting_depth_to_go[$ibeg_next_next] )
9542 $logical_continuation_lines++;
9547 # either we have multiple continuation lines to follow
9548 # and we are not padding the first token
9549 ( $logical_continuation_lines > 1 && $ipad > 0 )
9555 $types_to_go[$inext_next] eq $type
9557 # and keywords must match if keyword
9560 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
9566 #----------------------begin special check---------------
9568 # One more check is needed before we can make the pad.
9569 # If we are in a list with some long items, we want each
9570 # item to stand out. So in the following example, the
9571 # first line begining with '$casefold->' would look good
9572 # padded to align with the next line, but then it
9573 # would be indented more than the last line, so we
9577 # $casefold->{code} eq '0041'
9578 # && $casefold->{status} eq 'C'
9579 # && $casefold->{mapping} eq '0061',
9584 # It would be faster, and almost as good, to use a comma
9585 # count, and not pad if comma_count > 1 and the previous
9586 # line did not end with a comma.
9590 my $ibg = $$ri_first[ $line + 1 ];
9591 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
9593 # just use simplified formula for leading spaces to avoid
9594 # needless sub calls
9595 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
9597 # look at each line beyond the next ..
9599 foreach $l ( $line + 2 .. $max_line ) {
9600 my $ibg = $$ri_first[$l];
9602 # quit looking at the end of this container
9604 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
9605 || ( $nesting_depth_to_go[$ibg] < $depth );
9607 # cannot do the pad if a later line would be
9609 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
9615 # don't pad if we end in a broken list
9616 if ( $l == $max_line ) {
9617 my $i2 = $$ri_last[$l];
9618 if ( $types_to_go[$i2] eq '#' ) {
9619 my $i1 = $$ri_first[$l];
9622 terminal_type( \@types_to_go, \@block_type_to_go, $i1,
9627 next unless $ok_to_pad;
9629 #----------------------end special check---------------
9631 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
9632 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
9633 $pad_spaces = $length_2 - $length_1;
9635 # make sure this won't change if -lp is used
9636 my $indentation_1 = $leading_spaces_to_go[$ibeg];
9637 if ( ref($indentation_1) ) {
9638 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
9639 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
9640 unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
9646 # we might be able to handle a pad of -1 by removing a blank
9648 if ( $pad_spaces < 0 ) {
9649 if ( $pad_spaces == -1 ) {
9650 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
9651 $tokens_to_go[ $ipad - 1 ] = '';
9657 # now apply any padding for alignment
9658 if ( $ipad >= 0 && $pad_spaces ) {
9659 my $length_t = total_line_length( $ibeg, $iend );
9660 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
9661 $tokens_to_go[$ipad] =
9662 ' ' x $pad_spaces . $tokens_to_go[$ipad];
9670 $has_leading_op = $has_leading_op_next;
9671 } # end of loop over lines
9675 sub correct_lp_indentation {
9677 # When the -lp option is used, we need to make a last pass through
9678 # each line to correct the indentation positions in case they differ
9679 # from the predictions. This is necessary because perltidy uses a
9680 # predictor/corrector method for aligning with opening parens. The
9681 # predictor is usually good, but sometimes stumbles. The corrector
9682 # tries to patch things up once the actual opening paren locations
9684 my ( $ri_first, $ri_last ) = @_;
9687 # Note on flag '$do_not_pad':
9688 # We want to avoid a situation like this, where the aligner inserts
9689 # whitespace before the '=' to align it with a previous '=', because
9690 # otherwise the parens might become mis-aligned in a situation like
9691 # this, where the '=' has become aligned with the previous line,
9692 # pushing the opening '(' forward beyond where we want it.
9694 # $mkFloor::currentRoom = '';
9695 # $mkFloor::c_entry = $c->Entry(
9697 # -relief => 'sunken',
9701 # We leave it to the aligner to decide how to do this.
9703 # first remove continuation indentation if appropriate
9704 my $max_line = @$ri_first - 1;
9706 # looking at each line of this batch..
9707 my ( $ibeg, $iend );
9709 foreach $line ( 0 .. $max_line ) {
9710 $ibeg = $$ri_first[$line];
9711 $iend = $$ri_last[$line];
9713 # looking at each token in this output line..
9715 foreach $i ( $ibeg .. $iend ) {
9717 # How many space characters to place before this token
9718 # for special alignment. Actual padding is done in the
9721 # looking for next unvisited indentation item
9722 my $indentation = $leading_spaces_to_go[$i];
9723 if ( !$indentation->get_MARKED() ) {
9724 $indentation->set_MARKED(1);
9726 # looking for indentation item for which we are aligning
9727 # with parens, braces, and brackets
9728 next unless ( $indentation->get_ALIGN_PAREN() );
9730 # skip closed container on this line
9733 if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
9734 if ( $type_sequence_to_go[$im]
9735 && $mate_index_to_go[$im] <= $iend )
9741 if ( $line == 1 && $i == $ibeg ) {
9745 # Ok, let's see what the error is and try to fix it
9747 my $predicted_pos = $indentation->get_SPACES();
9750 # token is mid-line - use length to previous token
9751 $actual_pos = total_line_length( $ibeg, $i - 1 );
9753 # for mid-line token, we must check to see if all
9754 # additional lines have continuation indentation,
9755 # and remove it if so. Otherwise, we do not get
9757 my $closing_index = $indentation->get_CLOSED();
9758 if ( $closing_index > $iend ) {
9759 my $ibeg_next = $$ri_first[ $line + 1 ];
9760 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
9761 undo_lp_ci( $line, $i, $closing_index, $ri_first,
9766 elsif ( $line > 0 ) {
9768 # handle case where token starts a new line;
9769 # use length of previous line
9770 my $ibegm = $$ri_first[ $line - 1 ];
9771 my $iendm = $$ri_last[ $line - 1 ];
9772 $actual_pos = total_line_length( $ibegm, $iendm );
9776 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
9780 # token is first character of first line of batch
9781 $actual_pos = $predicted_pos;
9784 my $move_right = $actual_pos - $predicted_pos;
9786 # done if no error to correct (gnu2.t)
9787 if ( $move_right == 0 ) {
9788 $indentation->set_RECOVERABLE_SPACES($move_right);
9792 # if we have not seen closure for this indentation in
9793 # this batch, we can only pass on a request to the
9795 my $closing_index = $indentation->get_CLOSED();
9797 if ( $closing_index < 0 ) {
9798 $indentation->set_RECOVERABLE_SPACES($move_right);
9802 # If necessary, look ahead to see if there is really any
9803 # leading whitespace dependent on this whitespace, and
9804 # also find the longest line using this whitespace.
9805 # Since it is always safe to move left if there are no
9806 # dependents, we only need to do this if we may have
9807 # dependent nodes or need to move right.
9809 my $right_margin = 0;
9810 my $have_child = $indentation->get_HAVE_CHILD();
9812 my %saw_indentation;
9814 $saw_indentation{$indentation} = $indentation;
9816 if ( $have_child || $move_right > 0 ) {
9819 if ( $i == $ibeg ) {
9820 $max_length = total_line_length( $ibeg, $iend );
9823 # look ahead at the rest of the lines of this batch..
9825 foreach $line_t ( $line + 1 .. $max_line ) {
9826 my $ibeg_t = $$ri_first[$line_t];
9827 my $iend_t = $$ri_last[$line_t];
9828 last if ( $closing_index <= $ibeg_t );
9830 # remember all different indentation objects
9831 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
9832 $saw_indentation{$indentation_t} = $indentation_t;
9835 # remember longest line in the group
9836 my $length_t = total_line_length( $ibeg_t, $iend_t );
9837 if ( $length_t > $max_length ) {
9838 $max_length = $length_t;
9841 $right_margin = $rOpts_maximum_line_length - $max_length;
9842 if ( $right_margin < 0 ) { $right_margin = 0 }
9845 my $first_line_comma_count =
9846 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
9847 my $comma_count = $indentation->get_COMMA_COUNT();
9848 my $arrow_count = $indentation->get_ARROW_COUNT();
9850 # This is a simple approximate test for vertical alignment:
9851 # if we broke just after an opening paren, brace, bracket,
9852 # and there are 2 or more commas in the first line,
9853 # and there are no '=>'s,
9854 # then we are probably vertically aligned. We could set
9855 # an exact flag in sub scan_list, but this is good
9857 my $indentation_count = keys %saw_indentation;
9858 my $is_vertically_aligned =
9860 && $first_line_comma_count > 1
9861 && $indentation_count == 1
9862 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
9864 # Make the move if possible ..
9867 # we can always move left
9870 # but we should only move right if we are sure it will
9871 # not spoil vertical alignment
9872 || ( $comma_count == 0 )
9873 || ( $comma_count > 0 && !$is_vertically_aligned )
9877 ( $move_right <= $right_margin )
9881 foreach ( keys %saw_indentation ) {
9882 $saw_indentation{$_}
9883 ->permanently_decrease_AVAILABLE_SPACES( -$move );
9887 # Otherwise, record what we want and the vertical aligner
9888 # will try to recover it.
9890 $indentation->set_RECOVERABLE_SPACES($move_right);
9898 # flush is called to output any tokens in the pipeline, so that
9899 # an alternate source of lines can be written in the correct order
9902 destroy_one_line_block();
9903 output_line_to_go();
9904 Perl::Tidy::VerticalAligner::flush();
9907 # output_line_to_go sends one logical line of tokens on down the
9908 # pipeline to the VerticalAligner package, breaking the line into continuation
9909 # lines as necessary. The line of tokens is ready to go in the "to_go"
9912 sub output_line_to_go {
9914 # debug stuff; this routine can be called from many points
9915 FORMATTER_DEBUG_FLAG_OUTPUT && do {
9916 my ( $a, $b, $c ) = caller;
9918 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9920 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9921 write_diagnostics("$output_str\n");
9924 # just set a tentative breakpoint if we might be in a one-line block
9925 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9926 set_forced_breakpoint($max_index_to_go);
9930 my $cscw_block_comment;
9931 $cscw_block_comment = add_closing_side_comment()
9932 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9934 match_opening_and_closing_tokens();
9936 # tell the -lp option we are outputting a batch so it can close
9937 # any unfinished items in its stack
9941 my $imax = $max_index_to_go;
9943 # trim any blank tokens
9944 if ( $max_index_to_go >= 0 ) {
9945 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
9946 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
9949 # anything left to write?
9950 if ( $imin <= $imax ) {
9952 # add a blank line before certain key types
9953 if ( $last_line_leading_type !~ /^[#b]/ ) {
9955 my $leading_token = $tokens_to_go[$imin];
9956 my $leading_type = $types_to_go[$imin];
9958 # blank lines before subs except declarations and one-liners
9959 # MCONVERSION LOCATION - for sub tokenization change
9960 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
9961 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9963 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9964 $imax ) !~ /^[\;\}]$/
9968 # break before all package declarations
9969 # MCONVERSION LOCATION - for tokenizaton change
9970 elsif ( $leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) {
9971 $want_blank = ( $rOpts->{'blanks-before-subs'} );
9974 # break before certain key blocks except one-liners
9975 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
9976 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9978 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9983 # Break before certain block types if we haven't had a break at this
9984 # level for a while. This is the difficult decision..
9985 elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
9986 && $leading_type eq 'k' )
9988 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
9989 if ( !defined($lc) ) { $lc = 0 }
9991 $want_blank = $rOpts->{'blanks-before-blocks'}
9992 && $lc >= $rOpts->{'long-block-line-count'}
9993 && $file_writer_object->get_consecutive_nonblank_lines() >=
9994 $rOpts->{'long-block-line-count'}
9996 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10003 # future: send blank line down normal path to VerticalAligner
10004 Perl::Tidy::VerticalAligner::flush();
10005 $file_writer_object->write_blank_code_line();
10009 # update blank line variables and count number of consecutive
10010 # non-blank, non-comment lines at this level
10011 $last_last_line_leading_level = $last_line_leading_level;
10012 $last_line_leading_level = $levels_to_go[$imin];
10013 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
10014 $last_line_leading_type = $types_to_go[$imin];
10015 if ( $last_line_leading_level == $last_last_line_leading_level
10016 && $last_line_leading_type ne 'b'
10017 && $last_line_leading_type ne '#'
10018 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
10020 $nonblank_lines_at_depth[$last_line_leading_level]++;
10023 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
10026 FORMATTER_DEBUG_FLAG_FLUSH && do {
10027 my ( $package, $file, $line ) = caller;
10029 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
10032 # add a couple of extra terminal blank tokens
10035 # set all forced breakpoints for good list formatting
10036 my $saw_good_break = 0;
10037 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
10040 $max_index_to_go > 0
10043 || $old_line_count_in_batch > 1
10044 || is_unbalanced_batch()
10046 $comma_count_in_batch
10047 && ( $rOpts_maximum_fields_per_table > 0
10048 || $rOpts_comma_arrow_breakpoints == 0 )
10053 $saw_good_break = scan_list();
10056 # let $ri_first and $ri_last be references to lists of
10057 # first and last tokens of line fragments to output..
10058 my ( $ri_first, $ri_last );
10060 # write a single line if..
10063 # we aren't allowed to add any newlines
10064 !$rOpts_add_newlines
10066 # or, we don't already have an interior breakpoint
10067 # and we didn't see a good breakpoint
10069 !$forced_breakpoint_count
10070 && !$saw_good_break
10072 # and this line is 'short'
10077 @$ri_first = ($imin);
10078 @$ri_last = ($imax);
10081 # otherwise use multiple lines
10084 ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break);
10086 # now we do a correction step to clean this up a bit
10087 # (The only time we would not do this is for debugging)
10088 if ( $rOpts->{'recombine'} ) {
10089 ( $ri_first, $ri_last ) =
10090 recombine_breakpoints( $ri_first, $ri_last );
10094 # do corrector step if -lp option is used
10095 my $do_not_pad = 0;
10096 if ($rOpts_line_up_parentheses) {
10097 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
10099 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
10101 prepare_for_new_input_lines();
10103 # output any new -cscw block comment
10104 if ($cscw_block_comment) {
10106 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
10110 sub reset_block_text_accumulator {
10112 # save text after 'if' and 'elsif' to append after 'else'
10113 if ($accumulating_text_for_block) {
10115 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
10116 push @{$rleading_block_if_elsif_text}, $leading_block_text;
10119 $accumulating_text_for_block = "";
10120 $leading_block_text = "";
10121 $leading_block_text_level = 0;
10122 $leading_block_text_length_exceeded = 0;
10123 $leading_block_text_line_number = 0;
10124 $leading_block_text_line_length = 0;
10127 sub set_block_text_accumulator {
10129 $accumulating_text_for_block = $tokens_to_go[$i];
10130 if ( $accumulating_text_for_block !~ /^els/ ) {
10131 $rleading_block_if_elsif_text = [];
10133 $leading_block_text = "";
10134 $leading_block_text_level = $levels_to_go[$i];
10135 $leading_block_text_line_number =
10136 $vertical_aligner_object->get_output_line_number();
10137 $leading_block_text_length_exceeded = 0;
10139 # this will contain the column number of the last character
10140 # of the closing side comment
10141 $leading_block_text_line_length =
10142 length($accumulating_text_for_block) +
10143 length( $rOpts->{'closing-side-comment-prefix'} ) +
10144 $leading_block_text_level * $rOpts_indent_columns + 3;
10147 sub accumulate_block_text {
10150 # accumulate leading text for -csc, ignoring any side comments
10151 if ( $accumulating_text_for_block
10152 && !$leading_block_text_length_exceeded
10153 && $types_to_go[$i] ne '#' )
10156 my $added_length = length( $tokens_to_go[$i] );
10157 $added_length += 1 if $i == 0;
10158 my $new_line_length = $leading_block_text_line_length + $added_length;
10160 # we can add this text if we don't exceed some limits..
10163 # we must not have already exceeded the text length limit
10164 length($leading_block_text) <
10165 $rOpts_closing_side_comment_maximum_text
10168 # the new total line length must be below the line length limit
10169 # or the new length must be below the text length limit
10170 # (ie, we may allow one token to exceed the text length limit)
10171 && ( $new_line_length < $rOpts_maximum_line_length
10172 || length($leading_block_text) + $added_length <
10173 $rOpts_closing_side_comment_maximum_text )
10175 # UNLESS: we are adding a closing paren before the brace we seek.
10176 # This is an attempt to avoid situations where the ... to be
10177 # added are longer than the omitted right paren, as in:
10179 # foreach my $item (@a_rather_long_variable_name_here) {
10181 # } ## end foreach my $item (@a_rather_long_variable_name_here...
10184 $tokens_to_go[$i] eq ')'
10187 $i + 1 <= $max_index_to_go
10188 && $block_type_to_go[ $i + 1 ] eq
10189 $accumulating_text_for_block
10191 || ( $i + 2 <= $max_index_to_go
10192 && $block_type_to_go[ $i + 2 ] eq
10193 $accumulating_text_for_block )
10199 # add an extra space at each newline
10200 if ( $i == 0 ) { $leading_block_text .= ' ' }
10202 # add the token text
10203 $leading_block_text .= $tokens_to_go[$i];
10204 $leading_block_text_line_length = $new_line_length;
10207 # show that text was truncated if necessary
10208 elsif ( $types_to_go[$i] ne 'b' ) {
10209 $leading_block_text_length_exceeded = 1;
10210 $leading_block_text .= '...';
10216 my %is_if_elsif_else_unless_while_until_for_foreach;
10220 # These block types may have text between the keyword and opening
10221 # curly. Note: 'else' does not, but must be included to allow trailing
10222 # if/elsif text to be appended.
10223 # patch for SWITCH/CASE: added 'case' and 'when'
10224 @_ = qw(if elsif else unless while until for foreach case when);
10225 @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
10228 sub accumulate_csc_text {
10230 # called once per output buffer when -csc is used. Accumulates
10231 # the text placed after certain closing block braces.
10232 # Defines and returns the following for this buffer:
10234 my $block_leading_text = ""; # the leading text of the last '}'
10235 my $rblock_leading_if_elsif_text;
10236 my $i_block_leading_text =
10237 -1; # index of token owning block_leading_text
10238 my $block_line_count = 100; # how many lines the block spans
10239 my $terminal_type = 'b'; # type of last nonblank token
10240 my $i_terminal = 0; # index of last nonblank token
10241 my $terminal_block_type = "";
10243 for my $i ( 0 .. $max_index_to_go ) {
10244 my $type = $types_to_go[$i];
10245 my $block_type = $block_type_to_go[$i];
10246 my $token = $tokens_to_go[$i];
10248 # remember last nonblank token type
10249 if ( $type ne '#' && $type ne 'b' ) {
10250 $terminal_type = $type;
10251 $terminal_block_type = $block_type;
10255 my $type_sequence = $type_sequence_to_go[$i];
10256 if ( $block_type && $type_sequence ) {
10258 if ( $token eq '}' ) {
10260 # restore any leading text saved when we entered this block
10261 if ( defined( $block_leading_text{$type_sequence} ) ) {
10262 ( $block_leading_text, $rblock_leading_if_elsif_text ) =
10263 @{ $block_leading_text{$type_sequence} };
10264 $i_block_leading_text = $i;
10265 delete $block_leading_text{$type_sequence};
10266 $rleading_block_if_elsif_text =
10267 $rblock_leading_if_elsif_text;
10270 # if we run into a '}' then we probably started accumulating
10271 # at something like a trailing 'if' clause..no harm done.
10272 if ( $accumulating_text_for_block
10273 && $levels_to_go[$i] <= $leading_block_text_level )
10275 my $lev = $levels_to_go[$i];
10276 reset_block_text_accumulator();
10279 if ( defined( $block_opening_line_number{$type_sequence} ) )
10281 my $output_line_number =
10282 $vertical_aligner_object->get_output_line_number();
10283 $block_line_count = $output_line_number -
10284 $block_opening_line_number{$type_sequence} + 1;
10285 delete $block_opening_line_number{$type_sequence};
10289 # Error: block opening line undefined for this line..
10290 # This shouldn't be possible, but it is not a
10291 # significant problem.
10295 elsif ( $token eq '{' ) {
10298 $vertical_aligner_object->get_output_line_number();
10299 $block_opening_line_number{$type_sequence} = $line_number;
10301 if ( $accumulating_text_for_block
10302 && $levels_to_go[$i] == $leading_block_text_level )
10305 if ( $accumulating_text_for_block eq $block_type ) {
10307 # save any leading text before we enter this block
10308 $block_leading_text{$type_sequence} = [
10309 $leading_block_text,
10310 $rleading_block_if_elsif_text
10312 $block_opening_line_number{$type_sequence} =
10313 $leading_block_text_line_number;
10314 reset_block_text_accumulator();
10318 # shouldn't happen, but not a serious error.
10319 # We were accumulating -csc text for block type
10320 # $accumulating_text_for_block and unexpectedly
10321 # encountered a '{' for block type $block_type.
10328 && $csc_new_statement_ok
10329 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
10330 && $token =~ /$closing_side_comment_list_pattern/o )
10332 set_block_text_accumulator($i);
10336 # note: ignoring type 'q' because of tricks being played
10337 # with 'q' for hanging side comments
10338 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
10339 $csc_new_statement_ok =
10340 ( $block_type || $type eq 'J' || $type eq ';' );
10343 && $accumulating_text_for_block
10344 && $levels_to_go[$i] == $leading_block_text_level )
10346 reset_block_text_accumulator();
10349 accumulate_block_text($i);
10354 # Treat an 'else' block specially by adding preceding 'if' and
10355 # 'elsif' text. Otherwise, the 'end else' is not helpful,
10356 # especially for cuddled-else formatting.
10357 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
10358 $block_leading_text =
10359 make_else_csc_text( $i_terminal, $terminal_block_type,
10360 $block_leading_text, $rblock_leading_if_elsif_text );
10363 return ( $terminal_type, $i_terminal, $i_block_leading_text,
10364 $block_leading_text, $block_line_count );
10368 sub make_else_csc_text {
10370 # create additional -csc text for an 'else' and optionally 'elsif',
10371 # depending on the value of switch
10372 # $rOpts_closing_side_comment_else_flag:
10374 # = 0 add 'if' text to trailing else
10375 # = 1 same as 0 plus:
10376 # add 'if' to 'elsif's if can fit in line length
10377 # add last 'elsif' to trailing else if can fit in one line
10378 # = 2 same as 1 but do not check if exceed line length
10380 # $rif_elsif_text = a reference to a list of all previous closing
10381 # side comments created for this if block
10383 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
10384 my $csc_text = $block_leading_text;
10386 if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
10391 my $count = @{$rif_elsif_text};
10392 return $csc_text unless ($count);
10394 my $if_text = '[ if' . $rif_elsif_text->[0];
10396 # always show the leading 'if' text on 'else'
10397 if ( $block_type eq 'else' ) {
10398 $csc_text .= $if_text;
10401 # see if that's all
10402 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
10406 my $last_elsif_text = "";
10407 if ( $count > 1 ) {
10408 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
10409 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
10412 # tentatively append one more item
10413 my $saved_text = $csc_text;
10414 if ( $block_type eq 'else' ) {
10415 $csc_text .= $last_elsif_text;
10418 $csc_text .= ' ' . $if_text;
10421 # all done if no length checks requested
10422 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
10426 # undo it if line length exceeded
10428 length($csc_text) + length($block_type) +
10429 length( $rOpts->{'closing-side-comment-prefix'} ) +
10430 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
10431 if ( $length > $rOpts_maximum_line_length ) {
10432 $csc_text = $saved_text;
10437 sub add_closing_side_comment {
10439 # add closing side comments after closing block braces if -csc used
10440 my $cscw_block_comment;
10442 #---------------------------------------------------------------
10443 # Step 1: loop through all tokens of this line to accumulate
10444 # the text needed to create the closing side comments. Also see
10445 # how the line ends.
10446 #---------------------------------------------------------------
10448 my ( $terminal_type, $i_terminal, $i_block_leading_text,
10449 $block_leading_text, $block_line_count )
10450 = accumulate_csc_text();
10452 #---------------------------------------------------------------
10453 # Step 2: make the closing side comment if this ends a block
10454 #---------------------------------------------------------------
10455 my $have_side_comment = $i_terminal != $max_index_to_go;
10457 # if this line might end in a block closure..
10459 $terminal_type eq '}'
10464 # the block is long enough
10465 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
10467 # or there is an existing comment to check
10468 || ( $have_side_comment
10469 && $rOpts->{'closing-side-comment-warnings'} )
10472 # .. and if this is one of the types of interest
10473 && $block_type_to_go[$i_terminal] =~
10474 /$closing_side_comment_list_pattern/o
10476 # ..and the corresponding opening brace must is not in this batch
10477 # (because we do not need to tag one-line blocks, although this
10478 # should also be caught with a positive -csci value)
10479 && $mate_index_to_go[$i_terminal] < 0
10484 # this is the last token (line doesnt have a side comment)
10485 !$have_side_comment
10487 # or the old side comment is a closing side comment
10488 || $tokens_to_go[$max_index_to_go] =~
10489 /$closing_side_comment_prefix_pattern/o
10494 # then make the closing side comment text
10496 "$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
10498 # append any extra descriptive text collected above
10499 if ( $i_block_leading_text == $i_terminal ) {
10500 $token .= $block_leading_text;
10502 $token =~ s/\s*$//; # trim any trailing whitespace
10504 # handle case of existing closing side comment
10505 if ($have_side_comment) {
10507 # warn if requested and tokens differ significantly
10508 if ( $rOpts->{'closing-side-comment-warnings'} ) {
10509 my $old_csc = $tokens_to_go[$max_index_to_go];
10510 my $new_csc = $token;
10511 $new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
10512 my $new_trailing_dots = $1;
10513 $old_csc =~ s/\.\.\.\s*$//;
10514 $new_csc =~ s/\s+//g; # trim all whitespace
10515 $old_csc =~ s/\s+//g;
10517 # Patch to handle multiple closing side comments at
10518 # else and elsif's. These have become too complicated
10519 # to check, so if we see an indication of
10520 # '[ if' or '[ # elsif', then assume they were made
10522 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
10523 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
10525 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
10526 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
10529 # if old comment is contained in new comment,
10530 # only compare the common part.
10531 if ( length($new_csc) > length($old_csc) ) {
10532 $new_csc = substr( $new_csc, 0, length($old_csc) );
10535 # if the new comment is shorter and has been limited,
10536 # only compare the common part.
10537 if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
10539 $old_csc = substr( $old_csc, 0, length($new_csc) );
10542 # any remaining difference?
10543 if ( $new_csc ne $old_csc ) {
10545 # just leave the old comment if we are below the threshold
10546 # for creating side comments
10547 if ( $block_line_count <
10548 $rOpts->{'closing-side-comment-interval'} )
10553 # otherwise we'll make a note of it
10557 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
10560 # save the old side comment in a new trailing block comment
10561 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
10564 $cscw_block_comment =
10565 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
10570 # No differences.. we can safely delete old comment if we
10571 # are below the threshold
10572 if ( $block_line_count <
10573 $rOpts->{'closing-side-comment-interval'} )
10576 unstore_token_to_go()
10577 if ( $types_to_go[$max_index_to_go] eq '#' );
10578 unstore_token_to_go()
10579 if ( $types_to_go[$max_index_to_go] eq 'b' );
10584 # switch to the new csc (unless we deleted it!)
10585 $tokens_to_go[$max_index_to_go] = $token if $token;
10588 # handle case of NO existing closing side comment
10591 # insert the new side comment into the output token stream
10593 my $block_type = '';
10594 my $type_sequence = '';
10595 my $container_environment =
10596 $container_environment_to_go[$max_index_to_go];
10597 my $level = $levels_to_go[$max_index_to_go];
10598 my $slevel = $nesting_depth_to_go[$max_index_to_go];
10599 my $no_internal_newlines = 0;
10601 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
10602 my $ci_level = $ci_levels_to_go[$max_index_to_go];
10603 my $in_continued_quote = 0;
10605 # first insert a blank token
10606 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
10608 # then the side comment
10609 insert_new_token_to_go( $token, $type, $slevel,
10610 $no_internal_newlines );
10613 return $cscw_block_comment;
10616 sub previous_nonblank_token {
10621 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
10622 return $tokens_to_go[ $i - 1 ];
10625 return $tokens_to_go[ $i - 2 ];
10632 sub send_lines_to_vertical_aligner {
10634 my ( $ri_first, $ri_last, $do_not_pad ) = @_;
10636 my $rindentation_list = [0]; # ref to indentations for each line
10638 set_vertical_alignment_markers( $ri_first, $ri_last );
10640 # flush if necessary to avoid unwanted alignment
10641 my $must_flush = 0;
10642 if ( @$ri_first > 1 ) {
10644 # flush before a long if statement
10645 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
10650 Perl::Tidy::VerticalAligner::flush();
10653 set_logical_padding( $ri_first, $ri_last );
10655 # loop to prepare each line for shipment
10656 my $n_last_line = @$ri_first - 1;
10658 for my $n ( 0 .. $n_last_line ) {
10659 my $ibeg = $$ri_first[$n];
10660 my $iend = $$ri_last[$n];
10665 my $i_start = $ibeg;
10669 my @container_name = ("");
10670 my @multiple_comma_arrows = (undef);
10672 my $j = 0; # field index
10675 for $i ( $ibeg .. $iend ) {
10677 # Keep track of containers balanced on this line only.
10678 # These are used below to prevent unwanted cross-line alignments.
10679 # Unbalanced containers already avoid aligning across
10680 # container boundaries.
10681 if ( $tokens_to_go[$i] eq '(' ) {
10682 my $i_mate = $mate_index_to_go[$i];
10683 if ( $i_mate > $i && $i_mate <= $iend ) {
10685 my $seqno = $type_sequence_to_go[$i];
10686 my $count = comma_arrow_count($seqno);
10687 $multiple_comma_arrows[$depth] = $count && $count > 1;
10688 my $name = previous_nonblank_token($i);
10690 $container_name[$depth] = "+" . $name;
10693 elsif ( $tokens_to_go[$i] eq ')' ) {
10694 $depth-- if $depth > 0;
10697 # if we find a new synchronization token, we are done with
10699 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
10701 my $tok = my $raw_tok = $matching_token_to_go[$i];
10703 # make separators in different nesting depths unique
10704 # by appending the nesting depth digit.
10705 if ( $raw_tok ne '#' ) {
10706 $tok .= "$nesting_depth_to_go[$i]";
10709 # do any special decorations for commas to avoid unwanted
10710 # cross-line alignments.
10711 if ( $raw_tok eq ',' ) {
10712 if ( $container_name[$depth] ) {
10713 $tok .= $container_name[$depth];
10717 # decorate '=>' with:
10718 # - Nothing if this container is unbalanced on this line.
10719 # - The previous token if it is balanced and multiple '=>'s
10720 # - The container name if it is bananced and no other '=>'s
10721 elsif ( $raw_tok eq '=>' ) {
10722 if ( $container_name[$depth] ) {
10723 if ( $multiple_comma_arrows[$depth] ) {
10724 $tok .= "+" . previous_nonblank_token($i);
10727 $tok .= $container_name[$depth];
10732 # concatenate the text of the consecutive tokens to form
10735 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
10737 # store the alignment token for this field
10738 push( @tokens, $tok );
10740 # get ready for the next batch
10743 $patterns[$j] = "";
10746 # continue accumulating tokens
10747 # handle non-keywords..
10748 if ( $types_to_go[$i] ne 'k' ) {
10749 my $type = $types_to_go[$i];
10751 # Mark most things before arrows as a quote to
10752 # get them to line up. Testfile: mixed.pl.
10753 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
10754 my $next_type = $types_to_go[ $i + 1 ];
10755 my $i_next_nonblank =
10756 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
10758 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
10763 # minor patch to make numbers and quotes align
10764 if ( $type eq 'n' ) { $type = 'Q' }
10766 $patterns[$j] .= $type;
10769 # for keywords we have to use the actual text
10772 # map certain keywords to the same 'if' class to align
10773 # long if/elsif sequences. my testfile: elsif.pl
10774 my $tok = $tokens_to_go[$i];
10775 if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) {
10778 $patterns[$j] .= $tok;
10782 # done with this line .. join text of tokens to make the last field
10783 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
10785 my ( $indentation, $lev, $level_end, $is_semicolon_terminated,
10786 $is_outdented_line )
10787 = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
10788 $ri_first, $ri_last, $rindentation_list );
10790 # we will allow outdenting of long lines..
10791 my $outdent_long_lines = (
10793 # which are long quotes, if allowed
10794 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
10796 # which are long block comments, if allowed
10798 $types_to_go[$ibeg] eq '#'
10799 && $rOpts->{'outdent-long-comments'}
10801 # but not if this is a static block comment
10802 && !$is_static_block_comment
10807 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
10809 my $rvertical_tightness_flags =
10810 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
10811 $ri_first, $ri_last );
10813 # flush an outdented line to avoid any unwanted vertical alignment
10814 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10816 # send this new line down the pipe
10817 my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
10818 Perl::Tidy::VerticalAligner::append_line(
10825 $forced_breakpoint_to_go[$iend] || $in_comma_list,
10826 $outdent_long_lines,
10827 $is_semicolon_terminated,
10829 $rvertical_tightness_flags,
10833 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
10835 # flush an outdented line to avoid any unwanted vertical alignment
10836 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10840 } # end of loop to output each line
10842 # remember indentation of lines containing opening containers for
10843 # later use by sub set_adjusted_indentation
10844 save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
10847 { # begin unmatched_indexes
10849 # closure to keep track of unbalanced containers.
10850 # arrays shared by the routines in this block:
10851 my @unmatched_opening_indexes_in_this_batch;
10852 my @unmatched_closing_indexes_in_this_batch;
10853 my %comma_arrow_count;
10855 sub is_unbalanced_batch {
10856 @unmatched_opening_indexes_in_this_batch +
10857 @unmatched_closing_indexes_in_this_batch;
10860 sub comma_arrow_count {
10862 return $comma_arrow_count{$seqno};
10865 sub match_opening_and_closing_tokens {
10867 # Match up indexes of opening and closing braces, etc, in this batch.
10868 # This has to be done after all tokens are stored because unstoring
10869 # of tokens would otherwise cause trouble.
10871 @unmatched_opening_indexes_in_this_batch = ();
10872 @unmatched_closing_indexes_in_this_batch = ();
10873 %comma_arrow_count = ();
10875 my ( $i, $i_mate, $token );
10876 foreach $i ( 0 .. $max_index_to_go ) {
10877 if ( $type_sequence_to_go[$i] ) {
10878 $token = $tokens_to_go[$i];
10879 if ( $token =~ /^[\(\[\{\?]$/ ) {
10880 push @unmatched_opening_indexes_in_this_batch, $i;
10882 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
10884 $i_mate = pop @unmatched_opening_indexes_in_this_batch;
10885 if ( defined($i_mate) && $i_mate >= 0 ) {
10886 if ( $type_sequence_to_go[$i_mate] ==
10887 $type_sequence_to_go[$i] )
10889 $mate_index_to_go[$i] = $i_mate;
10890 $mate_index_to_go[$i_mate] = $i;
10893 push @unmatched_opening_indexes_in_this_batch,
10895 push @unmatched_closing_indexes_in_this_batch, $i;
10899 push @unmatched_closing_indexes_in_this_batch, $i;
10903 elsif ( $tokens_to_go[$i] eq '=>' ) {
10904 if (@unmatched_opening_indexes_in_this_batch) {
10905 my $j = $unmatched_opening_indexes_in_this_batch[-1];
10906 my $seqno = $type_sequence_to_go[$j];
10907 $comma_arrow_count{$seqno}++;
10913 sub save_opening_indentation {
10915 # This should be called after each batch of tokens is output. It
10916 # saves indentations of lines of all unmatched opening tokens.
10917 # These will be used by sub get_opening_indentation.
10919 my ( $ri_first, $ri_last, $rindentation_list ) = @_;
10921 # we no longer need indentations of any saved indentations which
10922 # are unmatched closing tokens in this batch, because we will
10923 # never encounter them again. So we can delete them to keep
10924 # the hash size down.
10925 foreach (@unmatched_closing_indexes_in_this_batch) {
10926 my $seqno = $type_sequence_to_go[$_];
10927 delete $saved_opening_indentation{$seqno};
10930 # we need to save indentations of any unmatched opening tokens
10931 # in this batch because we may need them in a subsequent batch.
10932 foreach (@unmatched_opening_indexes_in_this_batch) {
10933 my $seqno = $type_sequence_to_go[$_];
10934 $saved_opening_indentation{$seqno} = [
10935 lookup_opening_indentation(
10936 $_, $ri_first, $ri_last, $rindentation_list
10941 } # end unmatched_indexes
10943 sub get_opening_indentation {
10945 # get the indentation of the line which output the opening token
10946 # corresponding to a given closing token in the current output batch.
10949 # $i_closing - index in this line of a closing token ')' '}' or ']'
10951 # $ri_first - reference to list of the first index $i for each output
10952 # line in this batch
10953 # $ri_last - reference to list of the last index $i for each output line
10955 # $rindentation_list - reference to a list containing the indentation
10956 # used for each line.
10959 # -the indentation of the line which contained the opening token
10960 # which matches the token at index $i_opening
10961 # -and its offset (number of columns) from the start of the line
10963 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
10965 # first, see if the opening token is in the current batch
10966 my $i_opening = $mate_index_to_go[$i_closing];
10967 my ( $indent, $offset );
10968 if ( $i_opening >= 0 ) {
10970 # it is..look up the indentation
10971 ( $indent, $offset ) =
10972 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
10973 $rindentation_list );
10976 # if not, it should have been stored in the hash by a previous batch
10978 my $seqno = $type_sequence_to_go[$i_closing];
10980 if ( $saved_opening_indentation{$seqno} ) {
10981 ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
10985 # if no sequence number it must be an unbalanced container
10991 return ( $indent, $offset );
10994 sub lookup_opening_indentation {
10996 # get the indentation of the line in the current output batch
10997 # which output a selected opening token
11000 # $i_opening - index of an opening token in the current output batch
11001 # whose line indentation we need
11002 # $ri_first - reference to list of the first index $i for each output
11003 # line in this batch
11004 # $ri_last - reference to list of the last index $i for each output line
11006 # $rindentation_list - reference to a list containing the indentation
11007 # used for each line. (NOTE: the first slot in
11008 # this list is the last returned line number, and this is
11009 # followed by the list of indentations).
11012 # -the indentation of the line which contained token $i_opening
11013 # -and its offset (number of columns) from the start of the line
11015 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
11017 my $nline = $rindentation_list->[0]; # line number of previous lookup
11019 # reset line location if necessary
11020 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
11022 # find the correct line
11023 unless ( $i_opening > $ri_last->[-1] ) {
11024 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
11027 # error - token index is out of bounds - shouldn't happen
11030 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
11032 report_definite_bug();
11033 $nline = $#{$ri_last};
11036 $rindentation_list->[0] =
11037 $nline; # save line number to start looking next call
11038 my $ibeg = $ri_start->[$nline];
11039 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
11040 return ( $rindentation_list->[ $nline + 1 ], $offset );
11044 my %is_if_elsif_else_unless_while_until_for_foreach;
11048 # These block types may have text between the keyword and opening
11049 # curly. Note: 'else' does not, but must be included to allow trailing
11050 # if/elsif text to be appended.
11051 # patch for SWITCH/CASE: added 'case' and 'when'
11052 @_ = qw(if elsif else unless while until for foreach case when);
11053 @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
11056 sub set_adjusted_indentation {
11058 # This routine has the final say regarding the actual indentation of
11059 # a line. It starts with the basic indentation which has been
11060 # defined for the leading token, and then takes into account any
11061 # options that the user has set regarding special indenting and
11064 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
11065 $rindentation_list )
11068 # we need to know the last token of this line
11069 my ( $terminal_type, $i_terminal ) =
11070 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
11072 my $is_outdented_line = 0;
11074 my $is_semicolon_terminated = $terminal_type eq ';'
11075 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
11077 ##########################################################
11078 # Section 1: set a flag and a default indentation
11080 # Most lines are indented according to the initial token.
11081 # But it is common to outdent to the level just after the
11082 # terminal token in certain cases...
11083 # adjust_indentation flag:
11084 # 0 - do not adjust
11086 # 2 - vertically align with opening token
11088 ##########################################################
11089 my $adjust_indentation = 0;
11090 my $default_adjust_indentation = $adjust_indentation;
11092 my ( $opening_indentation, $opening_offset );
11094 # if we are at a closing token of some type..
11095 if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
11097 # get the indentation of the line containing the corresponding
11099 ( $opening_indentation, $opening_offset ) =
11100 get_opening_indentation( $ibeg, $ri_first, $ri_last,
11101 $rindentation_list );
11103 # First set the default behavior:
11104 # default behavior is to outdent closing lines
11105 # of the form: "); }; ]; )->xxx;"
11107 $is_semicolon_terminated
11109 # and 'cuddled parens' of the form: ")->pack("
11111 $terminal_type eq '('
11112 && $types_to_go[$ibeg] eq ')'
11113 && ( $nesting_depth_to_go[$iend] + 1 ==
11114 $nesting_depth_to_go[$ibeg] )
11118 $adjust_indentation = 1;
11121 # TESTING: outdent something like '),'
11123 $terminal_type eq ','
11125 # allow just one character before the comma
11126 && $i_terminal == $ibeg + 1
11128 # requre LIST environment; otherwise, we may outdent too much --
11129 # this can happen in calls without parentheses (overload.t);
11130 && $container_environment_to_go[$i_terminal] eq 'LIST'
11133 $adjust_indentation = 1;
11136 # undo continuation indentation of a terminal closing token if
11137 # it is the last token before a level decrease. This will allow
11138 # a closing token to line up with its opening counterpart, and
11139 # avoids a indentation jump larger than 1 level.
11140 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
11141 && $i_terminal == $ibeg )
11143 my $ci = $ci_levels_to_go[$ibeg];
11144 my $lev = $levels_to_go[$ibeg];
11145 my $next_type = $types_to_go[ $ibeg + 1 ];
11146 my $i_next_nonblank =
11147 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
11148 if ( $i_next_nonblank <= $max_index_to_go
11149 && $levels_to_go[$i_next_nonblank] < $lev )
11151 $adjust_indentation = 1;
11155 $default_adjust_indentation = $adjust_indentation;
11157 # Now modify default behavior according to user request:
11158 # handle option to indent non-blocks of the form ); }; ];
11159 # But don't do special indentation to something like ')->pack('
11160 if ( !$block_type_to_go[$ibeg] ) {
11161 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
11163 if ( $i_terminal <= $ibeg + 1
11164 || $is_semicolon_terminated )
11166 $adjust_indentation = 2;
11169 $adjust_indentation = 0;
11172 elsif ( $cti == 2 ) {
11173 if ($is_semicolon_terminated) {
11174 $adjust_indentation = 3;
11177 $adjust_indentation = 0;
11180 elsif ( $cti == 3 ) {
11181 $adjust_indentation = 3;
11185 # handle option to indent blocks
11188 $rOpts->{'indent-closing-brace'}
11190 $i_terminal == $ibeg # isolated terminal '}'
11191 || $is_semicolon_terminated
11195 $adjust_indentation = 3;
11200 # if at ');', '};', '>;', and '];' of a terminal qw quote
11201 elsif ($$rpatterns[0] =~ /^qb*;$/
11202 && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
11204 if ( $closing_token_indentation{$1} == 0 ) {
11205 $adjust_indentation = 1;
11208 $adjust_indentation = 3;
11212 ##########################################################
11213 # Section 2: set indentation according to flag set above
11215 # Select the indentation object to define leading
11216 # whitespace. If we are outdenting something like '} } );'
11217 # then we want to use one level below the last token
11218 # ($i_terminal) in order to get it to fully outdent through
11220 ##########################################################
11223 my $level_end = $levels_to_go[$iend];
11225 if ( $adjust_indentation == 0 ) {
11226 $indentation = $leading_spaces_to_go[$ibeg];
11227 $lev = $levels_to_go[$ibeg];
11229 elsif ( $adjust_indentation == 1 ) {
11230 $indentation = $reduced_spaces_to_go[$i_terminal];
11231 $lev = $levels_to_go[$i_terminal];
11234 # handle indented closing token which aligns with opening token
11235 elsif ( $adjust_indentation == 2 ) {
11237 # handle option to align closing token with opening token
11238 $lev = $levels_to_go[$ibeg];
11240 # calculate spaces needed to align with opening token
11242 get_SPACES($opening_indentation) + $opening_offset;
11244 # Indent less than the previous line.
11246 # Problem: For -lp we don't exactly know what it was if there
11247 # were recoverable spaces sent to the aligner. A good solution
11248 # would be to force a flush of the vertical alignment buffer, so
11249 # that we would know. For now, this rule is used for -lp:
11251 # When the last line did not start with a closing token we will
11252 # be optimistic that the aligner will recover everything wanted.
11254 # This rule will prevent us from breaking a hierarchy of closing
11255 # tokens, and in a worst case will leave a closing paren too far
11256 # indented, but this is better than frequently leaving it not
11258 my $last_spaces = get_SPACES($last_indentation_written);
11259 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
11261 get_RECOVERABLE_SPACES($last_indentation_written);
11264 # reset the indentation to the new space count if it works
11265 # only options are all or none: nothing in-between looks good
11266 $lev = $levels_to_go[$ibeg];
11267 if ( $space_count < $last_spaces ) {
11268 if ($rOpts_line_up_parentheses) {
11269 my $lev = $levels_to_go[$ibeg];
11271 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11274 $indentation = $space_count;
11278 # revert to default if it doesnt work
11280 $space_count = leading_spaces_to_go($ibeg);
11281 if ( $default_adjust_indentation == 0 ) {
11282 $indentation = $leading_spaces_to_go[$ibeg];
11284 elsif ( $default_adjust_indentation == 1 ) {
11285 $indentation = $reduced_spaces_to_go[$i_terminal];
11286 $lev = $levels_to_go[$i_terminal];
11291 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
11294 # handle -icb (indented closing code block braces)
11295 # Updated method for indented block braces: indent one full level if
11296 # there is no continuation indentation. This will occur for major
11297 # structures such as sub, if, else, but not for things like map
11300 # Note: only code blocks without continuation indentation are
11301 # handled here (if, else, unless, ..). In the following snippet,
11302 # the terminal brace of the sort block will have continuation
11303 # indentation as shown so it will not be handled by the coding
11304 # here. We would have to undo the continuation indentation to do
11305 # this, but it probably looks ok as is. This is a possible future
11306 # update for semicolon terminated lines.
11308 # if ($sortby eq 'date' or $sortby eq 'size') {
11310 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
11315 if ( $block_type_to_go[$ibeg]
11316 && $ci_levels_to_go[$i_terminal] == 0 )
11318 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
11319 $indentation = $spaces + $rOpts_indent_columns;
11321 # NOTE: for -lp we could create a new indentation object, but
11322 # there is probably no need to do it
11325 # handle -icp and any -icb block braces which fall through above
11326 # test such as the 'sort' block mentioned above.
11329 # There are currently two ways to handle -icp...
11330 # One way is to use the indentation of the previous line:
11331 # $indentation = $last_indentation_written;
11333 # The other way is to use the indentation that the previous line
11334 # would have had if it hadn't been adjusted:
11335 $indentation = $last_unadjusted_indentation;
11337 # Current method: use the minimum of the two. This avoids
11338 # inconsistent indentation.
11339 if ( get_SPACES($last_indentation_written) <
11340 get_SPACES($indentation) )
11342 $indentation = $last_indentation_written;
11346 # use previous indentation but use own level
11347 # to cause list to be flushed properly
11348 $lev = $levels_to_go[$ibeg];
11351 # remember indentation except for multi-line quotes, which get
11353 unless ( $ibeg == 0 && $starting_in_quote ) {
11354 $last_indentation_written = $indentation;
11355 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
11356 $last_leading_token = $tokens_to_go[$ibeg];
11359 # be sure lines with leading closing tokens are not outdented more
11360 # than the line which contained the corresponding opening token.
11362 #############################################################
11363 # updated per bug report in alex_bug.pl: we must not
11364 # mess with the indentation of closing logical braces so
11365 # we must treat something like '} else {' as if it were
11366 # an isolated brace my $is_isolated_block_brace = (
11367 # $iend == $ibeg ) && $block_type_to_go[$ibeg];
11368 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
11369 && ( $iend == $ibeg
11370 || $is_if_elsif_else_unless_while_until_for_foreach{
11371 $block_type_to_go[$ibeg] } );
11372 #############################################################
11373 if ( !$is_isolated_block_brace && defined($opening_indentation) ) {
11374 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
11375 $indentation = $opening_indentation;
11379 # remember the indentation of each line of this batch
11380 push @{$rindentation_list}, $indentation;
11382 # outdent lines with certain leading tokens...
11385 # must be first word of this batch
11391 # certain leading keywords if requested
11393 $rOpts->{'outdent-keywords'}
11394 && $types_to_go[$ibeg] eq 'k'
11395 && $outdent_keyword{ $tokens_to_go[$ibeg] }
11398 # or labels if requested
11399 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
11401 # or static block comments if requested
11402 || ( $types_to_go[$ibeg] eq '#'
11403 && $rOpts->{'outdent-static-block-comments'}
11404 && $is_static_block_comment )
11409 my $space_count = leading_spaces_to_go($ibeg);
11410 if ( $space_count > 0 ) {
11411 $space_count -= $rOpts_continuation_indentation;
11412 $is_outdented_line = 1;
11413 if ( $space_count < 0 ) { $space_count = 0 }
11415 # do not promote a spaced static block comment to non-spaced;
11416 # this is not normally necessary but could be for some
11417 # unusual user inputs (such as -ci = -i)
11418 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
11422 if ($rOpts_line_up_parentheses) {
11424 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11427 $indentation = $space_count;
11432 return ( $indentation, $lev, $level_end, $is_semicolon_terminated,
11433 $is_outdented_line );
11437 sub set_vertical_tightness_flags {
11439 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
11441 # Define vertical tightness controls for the nth line of a batch.
11442 # We create an array of parameters which tell the vertical aligner
11443 # if we should combine this line with the next line to achieve the
11444 # desired vertical tightness. The array of parameters contains:
11446 # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace
11447 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
11448 # if closing: spaces of padding to use
11449 # [2] sequence number of container
11450 # [3] valid flag: do not append if this flag is false. Will be
11451 # true if appropriate -vt flag is set. Otherwise, Will be
11452 # made true only for 2 line container in parens with -lp
11454 # These flags are used by sub set_leading_whitespace in
11455 # the vertical aligner
11457 my $rvertical_tightness_flags;
11459 # For non-BLOCK tokens, we will need to examine the next line
11460 # too, so we won't consider the last line.
11461 if ( $n < $n_last_line ) {
11463 # see if last token is an opening token...not a BLOCK...
11464 my $ibeg_next = $$ri_first[ $n + 1 ];
11465 my $token_end = $tokens_to_go[$iend];
11466 my $iend_next = $$ri_last[ $n + 1 ];
11468 $type_sequence_to_go[$iend]
11469 && !$block_type_to_go[$iend]
11470 && $is_opening_token{$token_end}
11472 $opening_vertical_tightness{$token_end} > 0
11474 # allow 2-line method call to be closed up
11475 || ( $rOpts_line_up_parentheses
11476 && $token_end eq '('
11478 && $types_to_go[ $iend - 1 ] ne 'b' )
11483 # avoid multiple jumps in nesting depth in one line if
11485 my $ovt = $opening_vertical_tightness{$token_end};
11486 my $iend_next = $$ri_last[ $n + 1 ];
11489 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
11490 $nesting_depth_to_go[$ibeg_next] )
11494 # If -vt flag has not been set, mark this as invalid
11495 # and aligner will validate it if it sees the closing paren
11497 my $valid_flag = $ovt;
11498 @{$rvertical_tightness_flags} =
11499 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
11503 # see if first token of next line is a closing token...
11504 # ..and be sure this line does not have a side comment
11505 my $token_next = $tokens_to_go[$ibeg_next];
11506 if ( $type_sequence_to_go[$ibeg_next]
11507 && !$block_type_to_go[$ibeg_next]
11508 && $is_closing_token{$token_next}
11509 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
11511 my $ovt = $opening_vertical_tightness{$token_next};
11512 my $cvt = $closing_vertical_tightness{$token_next};
11515 # never append a trailing line like )->pack(
11516 # because it will throw off later alignment
11518 $nesting_depth_to_go[$ibeg_next] ==
11519 $nesting_depth_to_go[ $iend_next + 1 ] + 1
11524 $container_environment_to_go[$ibeg_next] ne 'LIST'
11528 # allow closing up 2-line method calls
11529 || ( $rOpts_line_up_parentheses
11530 && $token_next eq ')' )
11537 # decide which trailing closing tokens to append..
11539 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
11541 my $str = join( '',
11542 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
11544 # append closing token if followed by comment or ';'
11545 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
11549 my $valid_flag = $cvt;
11550 @{$rvertical_tightness_flags} = (
11552 $tightness{$token_next} == 2 ? 0 : 1,
11553 $type_sequence_to_go[$ibeg_next], $valid_flag,
11559 # Opening Token Right
11560 # If requested, move an isolated trailing opening token to the end of
11561 # the previous line which ended in a comma. We could do this
11562 # in sub recombine_breakpoints but that would cause problems
11563 # with -lp formatting. The problem is that indentation will
11564 # quickly move far to the right in nested expressions. By
11565 # doing it after indentation has been set, we avoid changes
11566 # to the indentation. Actual movement of the token takes place
11567 # in sub write_leader_and_string.
11569 $opening_token_right{ $tokens_to_go[$ibeg_next] }
11571 # previous line is not opening
11572 # (use -sot to combine with it)
11573 && !$is_opening_token{$token_end}
11575 # previous line ended in one of these
11576 # (add other cases if necessary; '=>' and '.' are not necessary
11577 ##&& ($is_opening_token{$token_end} || $token_end eq ',')
11578 && !$block_type_to_go[$ibeg_next]
11580 # this is a line with just an opening token
11581 && ( $iend_next == $ibeg_next
11582 || $iend_next == $ibeg_next + 1
11583 && $types_to_go[$iend_next] eq '#' )
11585 # looks bad if we align vertically with the wrong container
11586 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
11589 my $valid_flag = 1;
11590 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11591 @{$rvertical_tightness_flags} =
11592 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
11595 # Stacking of opening and closing tokens
11597 my $token_beg_next = $tokens_to_go[$ibeg_next];
11599 # patch to make something like 'qw(' behave like an opening paren
11601 if ( $types_to_go[$ibeg_next] eq 'q' ) {
11602 if ( $token_beg_next =~ /^q.([\[\(\{])$/ ) {
11603 $token_beg_next = $1;
11607 if ( $is_closing_token{$token_end}
11608 && $is_closing_token{$token_beg_next} )
11610 $stackable = $stack_closing_token{$token_beg_next}
11611 unless ( $block_type_to_go[$ibeg_next] )
11612 ; # shouldn't happen; just checking
11614 elsif ($is_opening_token{$token_end}
11615 && $is_opening_token{$token_beg_next} )
11617 $stackable = $stack_opening_token{$token_beg_next}
11618 unless ( $block_type_to_go[$ibeg_next] )
11619 ; # shouldn't happen; just checking
11624 my $is_semicolon_terminated;
11625 if ( $n + 1 == $n_last_line ) {
11626 my ( $terminal_type, $i_terminal ) = terminal_type(
11627 \@types_to_go, \@block_type_to_go,
11628 $ibeg_next, $iend_next
11630 $is_semicolon_terminated = $terminal_type eq ';'
11631 && $nesting_depth_to_go[$iend_next] <
11632 $nesting_depth_to_go[$ibeg_next];
11635 # this must be a line with just an opening token
11636 # or end in a semicolon
11638 $is_semicolon_terminated
11639 || ( $iend_next == $ibeg_next
11640 || $iend_next == $ibeg_next + 1
11641 && $types_to_go[$iend_next] eq '#' )
11644 my $valid_flag = 1;
11645 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11646 @{$rvertical_tightness_flags} =
11647 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
11653 # Check for a last line with isolated opening BLOCK curly
11654 elsif ($rOpts_block_brace_vertical_tightness
11656 && $types_to_go[$iend] eq '{'
11657 && $block_type_to_go[$iend] =~
11658 /$block_brace_vertical_tightness_pattern/o )
11660 @{$rvertical_tightness_flags} =
11661 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
11664 return $rvertical_tightness_flags;
11668 my %is_vertical_alignment_type;
11669 my %is_vertical_alignment_keyword;
11674 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
11675 { ? : => =~ && || //
11677 @is_vertical_alignment_type{@_} = (1) x scalar(@_);
11679 @_ = qw(if unless and or err eq ne for foreach while until);
11680 @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
11683 sub set_vertical_alignment_markers {
11685 # Look at the tokens in this output batch and define the array
11686 # 'matching_token_to_go' which marks tokens at which we would
11687 # accept vertical alignment.
11689 # nothing to do if we aren't allowed to change whitespace
11690 if ( !$rOpts_add_whitespace ) {
11691 for my $i ( 0 .. $max_index_to_go ) {
11692 $matching_token_to_go[$i] = '';
11697 my ( $ri_first, $ri_last ) = @_;
11699 # look at each line of this batch..
11700 my $last_vertical_alignment_before_index;
11701 my $vert_last_nonblank_type;
11702 my $vert_last_nonblank_token;
11703 my $vert_last_nonblank_block_type;
11704 my $max_line = @$ri_first - 1;
11705 my ( $i, $type, $token, $block_type, $alignment_type );
11706 my ( $ibeg, $iend, $line );
11707 foreach $line ( 0 .. $max_line ) {
11708 $ibeg = $$ri_first[$line];
11709 $iend = $$ri_last[$line];
11710 $last_vertical_alignment_before_index = -1;
11711 $vert_last_nonblank_type = '';
11712 $vert_last_nonblank_token = '';
11713 $vert_last_nonblank_block_type = '';
11715 # look at each token in this output line..
11716 foreach $i ( $ibeg .. $iend ) {
11717 $alignment_type = '';
11718 $type = $types_to_go[$i];
11719 $block_type = $block_type_to_go[$i];
11720 $token = $tokens_to_go[$i];
11722 # check for flag indicating that we should not align
11724 if ( $matching_token_to_go[$i] ) {
11725 $matching_token_to_go[$i] = '';
11729 #--------------------------------------------------------
11730 # First see if we want to align BEFORE this token
11731 #--------------------------------------------------------
11733 # The first possible token that we can align before
11734 # is index 2 because: 1) it doesn't normally make sense to
11735 # align before the first token and 2) the second
11736 # token must be a blank if we are to align before
11738 if ( $i < $ibeg + 2 ) {
11741 # must follow a blank token
11742 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
11745 # align a side comment --
11746 elsif ( $type eq '#' ) {
11750 # it is a static side comment
11752 $rOpts->{'static-side-comments'}
11753 && $token =~ /$static_side_comment_pattern/o
11756 # or a closing side comment
11757 || ( $vert_last_nonblank_block_type
11759 /$closing_side_comment_prefix_pattern/o )
11762 $alignment_type = $type;
11763 } ## Example of a static side comment
11766 # otherwise, do not align two in a row to create a
11768 elsif ( $last_vertical_alignment_before_index == $i - 2 ) {
11771 # align before one of these keywords
11772 # (within a line, since $i>1)
11773 elsif ( $type eq 'k' ) {
11775 # /^(if|unless|and|or|eq|ne)$/
11776 if ( $is_vertical_alignment_keyword{$token} ) {
11777 $alignment_type = $token;
11781 # align before one of these types..
11782 # Note: add '.' after new vertical aligner is operational
11783 elsif ( $is_vertical_alignment_type{$type} ) {
11784 $alignment_type = $token;
11786 # For a paren after keyword, only align something like this:
11788 # elsif ( $b ) { &b }
11789 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
11790 $alignment_type = ""
11791 unless $vert_last_nonblank_token =~
11792 /^(if|unless|elsif)$/;
11795 # be sure the alignment tokens are unique
11796 # This didn't work well: reason not determined
11797 # if ($token ne $type) {$alignment_type .= $type}
11800 # NOTE: This is deactivated until the new vertical aligner
11801 # is finished because it causes the previous if/elsif alignment
11803 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) {
11804 # $alignment_type = $type;
11807 if ($alignment_type) {
11808 $last_vertical_alignment_before_index = $i;
11811 #--------------------------------------------------------
11812 # Next see if we want to align AFTER the previous nonblank
11813 #--------------------------------------------------------
11815 # We want to line up ',' and interior ';' tokens, with the added
11816 # space AFTER these tokens. (Note: interior ';' is included
11817 # because it may occur in short blocks).
11820 # we haven't already set it
11823 # and its not the first token of the line
11826 # and it follows a blank
11827 && $types_to_go[ $i - 1 ] eq 'b'
11829 # and previous token IS one of these:
11830 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
11832 # and it's NOT one of these
11833 && ( $type !~ /^[b\#\)\]\}]$/ )
11835 # then go ahead and align
11839 $alignment_type = $vert_last_nonblank_type;
11842 #--------------------------------------------------------
11843 # then store the value
11844 #--------------------------------------------------------
11845 $matching_token_to_go[$i] = $alignment_type;
11846 if ( $type ne 'b' ) {
11847 $vert_last_nonblank_type = $type;
11848 $vert_last_nonblank_token = $token;
11849 $vert_last_nonblank_block_type = $block_type;
11856 sub terminal_type {
11858 # returns type of last token on this line (terminal token), as follows:
11859 # returns # for a full-line comment
11860 # returns ' ' for a blank line
11861 # otherwise returns final token type
11863 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
11865 # check for full-line comment..
11866 if ( $$rtype[$ibeg] eq '#' ) {
11867 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
11871 # start at end and walk bakwards..
11872 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
11874 # skip past any side comment and blanks
11875 next if ( $$rtype[$i] eq 'b' );
11876 next if ( $$rtype[$i] eq '#' );
11878 # found it..make sure it is a BLOCK termination,
11879 # but hide a terminal } after sort/grep/map because it is not
11880 # necessarily the end of the line. (terminal.t)
11881 my $terminal_type = $$rtype[$i];
11883 $terminal_type eq '}'
11884 && ( !$$rblock_type[$i]
11885 || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
11888 $terminal_type = 'b';
11890 return wantarray ? ( $terminal_type, $i ) : $terminal_type;
11894 return wantarray ? ( ' ', $ibeg ) : ' ';
11899 my %is_good_keyword_breakpoint;
11900 my %is_lt_gt_le_ge;
11902 sub set_bond_strengths {
11906 @_ = qw(if unless while until for foreach);
11907 @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
11909 @_ = qw(lt gt le ge);
11910 @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
11912 ###############################################################
11913 # NOTE: NO_BREAK's set here are HINTS which may not be honored;
11914 # essential NO_BREAKS's must be enforced in section 2, below.
11915 ###############################################################
11917 # adding NEW_TOKENS: add a left and right bond strength by
11918 # mimmicking what is done for an existing token type. You
11919 # can skip this step at first and take the default, then
11920 # tweak later to get desired results.
11922 # The bond strengths should roughly follow precenence order where
11923 # possible. If you make changes, please check the results very
11924 # carefully on a variety of scripts.
11926 # no break around possible filehandle
11927 $left_bond_strength{'Z'} = NO_BREAK;
11928 $right_bond_strength{'Z'} = NO_BREAK;
11930 # never put a bare word on a new line:
11931 # example print (STDERR, "bla"); will fail with break after (
11932 $left_bond_strength{'w'} = NO_BREAK;
11934 # blanks always have infinite strength to force breaks after real tokens
11935 $right_bond_strength{'b'} = NO_BREAK;
11937 # try not to break on exponentation
11938 @_ = qw" ** .. ... <=> ";
11939 @left_bond_strength{@_} = (STRONG) x scalar(@_);
11940 @right_bond_strength{@_} = (STRONG) x scalar(@_);
11942 # The comma-arrow has very low precedence but not a good break point
11943 $left_bond_strength{'=>'} = NO_BREAK;
11944 $right_bond_strength{'=>'} = NOMINAL;
11946 # ok to break after label
11947 $left_bond_strength{'J'} = NO_BREAK;
11948 $right_bond_strength{'J'} = NOMINAL;
11949 $left_bond_strength{'j'} = STRONG;
11950 $right_bond_strength{'j'} = STRONG;
11951 $left_bond_strength{'A'} = STRONG;
11952 $right_bond_strength{'A'} = STRONG;
11954 $left_bond_strength{'->'} = STRONG;
11955 $right_bond_strength{'->'} = VERY_STRONG;
11957 # breaking AFTER these is just ok:
11958 @_ = qw" % + - * / x ";
11959 @left_bond_strength{@_} = (STRONG) x scalar(@_);
11960 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
11962 # breaking BEFORE these is just ok:
11964 @right_bond_strength{@_} = (STRONG) x scalar(@_);
11965 @left_bond_strength{@_} = (NOMINAL) x scalar(@_);
11967 # I prefer breaking before the string concatenation operator
11968 # because it can be hard to see at the end of a line
11969 # swap these to break after a '.'
11970 # this could be a future option
11971 $right_bond_strength{'.'} = STRONG;
11972 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
11975 @left_bond_strength{@_} = (STRONG) x scalar(@_);
11976 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
11978 # make these a little weaker than nominal so that they get
11979 # favored for end-of-line characters
11980 @_ = qw"!= == =~ !~";
11981 @left_bond_strength{@_} = (STRONG) x scalar(@_);
11982 @right_bond_strength{@_} =
11983 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
11985 # break AFTER these
11986 @_ = qw" < > | & >= <=";
11987 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
11988 @right_bond_strength{@_} =
11989 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
11991 # breaking either before or after a quote is ok
11992 # but bias for breaking before a quote
11993 $left_bond_strength{'Q'} = NOMINAL;
11994 $right_bond_strength{'Q'} = NOMINAL + 0.02;
11995 $left_bond_strength{'q'} = NOMINAL;
11996 $right_bond_strength{'q'} = NOMINAL;
11998 # starting a line with a keyword is usually ok
11999 $left_bond_strength{'k'} = NOMINAL;
12001 # we usually want to bond a keyword strongly to what immediately
12002 # follows, rather than leaving it stranded at the end of a line
12003 $right_bond_strength{'k'} = STRONG;
12005 $left_bond_strength{'G'} = NOMINAL;
12006 $right_bond_strength{'G'} = STRONG;
12008 # it is very good to break AFTER various assignment operators
12010 = **= += *= &= <<= &&=
12011 -= /= |= >>= ||= //=
12015 @left_bond_strength{@_} = (STRONG) x scalar(@_);
12016 @right_bond_strength{@_} =
12017 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
12019 # break BEFORE '&&' and '||' and '//'
12020 # set strength of '||' to same as '=' so that chains like
12021 # $a = $b || $c || $d will break before the first '||'
12022 $right_bond_strength{'||'} = NOMINAL;
12023 $left_bond_strength{'||'} = $right_bond_strength{'='};
12025 # same thing for '//'
12026 $right_bond_strength{'//'} = NOMINAL;
12027 $left_bond_strength{'//'} = $right_bond_strength{'='};
12029 # set strength of && a little higher than ||
12030 $right_bond_strength{'&&'} = NOMINAL;
12031 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
12033 $left_bond_strength{';'} = VERY_STRONG;
12034 $right_bond_strength{';'} = VERY_WEAK;
12035 $left_bond_strength{'f'} = VERY_STRONG;
12037 # make right strength of for ';' a little less than '='
12038 # to make for contents break after the ';' to avoid this:
12039 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
12040 # $number_of_fields )
12041 # and make it weaker than ',' and 'and' too
12042 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
12044 # The strengths of ?/: should be somewhere between
12045 # an '=' and a quote (NOMINAL),
12046 # make strength of ':' slightly less than '?' to help
12047 # break long chains of ? : after the colons
12048 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
12049 $right_bond_strength{':'} = NO_BREAK;
12050 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
12051 $right_bond_strength{'?'} = NO_BREAK;
12053 $left_bond_strength{','} = VERY_STRONG;
12054 $right_bond_strength{','} = VERY_WEAK;
12056 # Set bond strengths of certain keywords
12057 # make 'or', 'err', 'and' slightly weaker than a ','
12058 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
12059 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
12060 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
12061 $left_bond_strength{'xor'} = NOMINAL;
12062 $right_bond_strength{'and'} = NOMINAL;
12063 $right_bond_strength{'or'} = NOMINAL;
12064 $right_bond_strength{'err'} = NOMINAL;
12065 $right_bond_strength{'xor'} = STRONG;
12068 # patch-its always ok to break at end of line
12069 $nobreak_to_go[$max_index_to_go] = 0;
12071 # adding a small 'bias' to strengths is a simple way to make a line
12072 # break at the first of a sequence of identical terms. For example,
12073 # to force long string of conditional operators to break with
12074 # each line ending in a ':', we can add a small number to the bond
12075 # strength of each ':'
12076 my $colon_bias = 0;
12083 my $code_bias = -.01;
12087 my $last_nonblank_type = $type;
12088 my $last_nonblank_token = $token;
12089 my $delta_bias = 0.0001;
12090 my $list_str = $left_bond_strength{'?'};
12092 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
12093 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
12096 # preliminary loop to compute bond strengths
12097 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
12098 $last_type = $type;
12099 if ( $type ne 'b' ) {
12100 $last_nonblank_type = $type;
12101 $last_nonblank_token = $token;
12103 $type = $types_to_go[$i];
12105 # strength on both sides of a blank is the same
12106 if ( $type eq 'b' && $last_type ne 'b' ) {
12107 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
12111 $token = $tokens_to_go[$i];
12112 $block_type = $block_type_to_go[$i];
12114 $next_type = $types_to_go[$i_next];
12115 $next_token = $tokens_to_go[$i_next];
12116 $total_nesting_depth = $nesting_depth_to_go[$i_next];
12117 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12118 $next_nonblank_type = $types_to_go[$i_next_nonblank];
12119 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12121 # Some token chemistry... The decision about where to break a
12122 # line depends upon a "bond strength" between tokens. The LOWER
12123 # the bond strength, the MORE likely a break. The strength
12124 # values are based on trial-and-error, and need to be tweaked
12125 # occasionally to get desired results. Things to keep in mind
12127 # 1. relative strengths are important. small differences
12128 # in strengths can make big formatting differences.
12129 # 2. each indentation level adds one unit of bond strength
12130 # 3. a value of NO_BREAK makes an unbreakable bond
12131 # 4. a value of VERY_WEAK is the strength of a ','
12132 # 5. values below NOMINAL are considered ok break points
12133 # 6. values above NOMINAL are considered poor break points
12134 # We are computing the strength of the bond between the current
12135 # token and the NEXT token.
12136 my $bond_str = VERY_STRONG; # a default, high strength
12138 #---------------------------------------------------------------
12140 # use minimum of left and right bond strengths if defined;
12141 # digraphs and trigraphs like to break on their left
12142 #---------------------------------------------------------------
12143 my $bsr = $right_bond_strength{$type};
12145 if ( !defined($bsr) ) {
12147 if ( $is_digraph{$type} || $is_trigraph{$type} ) {
12151 $bsr = VERY_STRONG;
12155 # define right bond strengths of certain keywords
12156 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
12157 $bsr = $right_bond_strength{$token};
12159 elsif ( $token eq 'ne' or $token eq 'eq' ) {
12162 my $bsl = $left_bond_strength{$next_nonblank_type};
12164 # set terminal bond strength to the nominal value
12165 # this will cause good preceding breaks to be retained
12166 if ( $i_next_nonblank > $max_index_to_go ) {
12170 if ( !defined($bsl) ) {
12172 if ( $is_digraph{$next_nonblank_type}
12173 || $is_trigraph{$next_nonblank_type} )
12178 $bsl = VERY_STRONG;
12182 # define right bond strengths of certain keywords
12183 if ( $next_nonblank_type eq 'k'
12184 && defined( $left_bond_strength{$next_nonblank_token} ) )
12186 $bsl = $left_bond_strength{$next_nonblank_token};
12188 elsif ($next_nonblank_token eq 'ne'
12189 or $next_nonblank_token eq 'eq' )
12193 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
12194 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
12197 # Note: it might seem that we would want to keep a NO_BREAK if
12198 # either token has this value. This didn't work, because in an
12199 # arrow list, it prevents the comma from separating from the
12200 # following bare word (which is probably quoted by its arrow).
12201 # So necessary NO_BREAK's have to be handled as special cases
12202 # in the final section.
12203 $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
12204 my $bond_str_1 = $bond_str;
12206 #---------------------------------------------------------------
12209 #---------------------------------------------------------------
12211 # allow long lines before final { in an if statement, as in:
12216 # Otherwise, the line before the { tends to be too short.
12217 if ( $type eq ')' ) {
12218 if ( $next_nonblank_type eq '{' ) {
12219 $bond_str = VERY_WEAK + 0.03;
12223 elsif ( $type eq '(' ) {
12224 if ( $next_nonblank_type eq '{' ) {
12225 $bond_str = NOMINAL;
12229 # break on something like '} (', but keep this stronger than a ','
12230 # example is in 'howe.pl'
12231 elsif ( $type eq 'R' or $type eq '}' ) {
12232 if ( $next_nonblank_type eq '(' ) {
12233 $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
12237 #-----------------------------------------------------------------
12238 # adjust bond strength bias
12239 #-----------------------------------------------------------------
12241 elsif ( $type eq 'f' ) {
12242 $bond_str += $f_bias;
12243 $f_bias += $delta_bias;
12246 # in long ?: conditionals, bias toward just one set per line (colon.t)
12247 elsif ( $type eq ':' ) {
12248 if ( !$want_break_before{$type} ) {
12249 $bond_str += $colon_bias;
12250 $colon_bias += $delta_bias;
12254 if ( $next_nonblank_type eq ':'
12255 && $want_break_before{$next_nonblank_type} )
12257 $bond_str += $colon_bias;
12258 $colon_bias += $delta_bias;
12261 # if leading '.' is used, align all but 'short' quotes;
12262 # the idea is to not place something like "\n" on a single line.
12263 elsif ( $next_nonblank_type eq '.' ) {
12264 if ( $want_break_before{'.'} ) {
12266 $last_nonblank_type eq '.'
12269 $rOpts_short_concatenation_item_length )
12270 && ( $token !~ /^[\)\]\}]$/ )
12273 $dot_bias += $delta_bias;
12275 $bond_str += $dot_bias;
12278 elsif ($next_nonblank_type eq '&&'
12279 && $want_break_before{$next_nonblank_type} )
12281 $bond_str += $amp_bias;
12282 $amp_bias += $delta_bias;
12284 elsif ($next_nonblank_type eq '||'
12285 && $want_break_before{$next_nonblank_type} )
12287 $bond_str += $bar_bias;
12288 $bar_bias += $delta_bias;
12290 elsif ( $next_nonblank_type eq 'k' ) {
12292 if ( $next_nonblank_token eq 'and'
12293 && $want_break_before{$next_nonblank_token} )
12295 $bond_str += $and_bias;
12296 $and_bias += $delta_bias;
12298 elsif ($next_nonblank_token =~ /^(or|err)$/
12299 && $want_break_before{$next_nonblank_token} )
12301 $bond_str += $or_bias;
12302 $or_bias += $delta_bias;
12305 # FIXME: needs more testing
12306 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
12307 $bond_str = $list_str if ( $bond_str > $list_str );
12309 elsif ( $token eq 'err'
12310 && !$want_break_before{$token} )
12312 $bond_str += $or_bias;
12313 $or_bias += $delta_bias;
12318 && !$want_break_before{$type} )
12320 $bond_str += $colon_bias;
12321 $colon_bias += $delta_bias;
12323 elsif ( $type eq '&&'
12324 && !$want_break_before{$type} )
12326 $bond_str += $amp_bias;
12327 $amp_bias += $delta_bias;
12329 elsif ( $type eq '||'
12330 && !$want_break_before{$type} )
12332 $bond_str += $bar_bias;
12333 $bar_bias += $delta_bias;
12335 elsif ( $type eq 'k' ) {
12337 if ( $token eq 'and'
12338 && !$want_break_before{$token} )
12340 $bond_str += $and_bias;
12341 $and_bias += $delta_bias;
12343 elsif ( $token eq 'or'
12344 && !$want_break_before{$token} )
12346 $bond_str += $or_bias;
12347 $or_bias += $delta_bias;
12351 # keep matrix and hash indices together
12352 # but make them a little below STRONG to allow breaking open
12353 # something like {'some-word'}{'some-very-long-word'} at the }{
12355 if ( ( $type eq ']' or $type eq 'R' )
12356 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
12359 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
12362 if ( $next_nonblank_token =~ /^->/ ) {
12364 # increase strength to the point where a break in the following
12365 # will be after the opening paren rather than at the arrow:
12367 if ( $type eq 'i' ) {
12368 $bond_str = 1.45 * STRONG;
12371 elsif ( $type =~ /^[\)\]\}R]$/ ) {
12372 $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
12375 # otherwise make strength before an '->' a little over a '+'
12377 if ( $bond_str <= NOMINAL ) {
12378 $bond_str = NOMINAL + 0.01;
12383 if ( $token eq ')' && $next_nonblank_token eq '[' ) {
12384 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
12387 # map1.t -- correct for a quirk in perl
12389 && $next_nonblank_type eq 'i'
12390 && $last_nonblank_type eq 'k'
12391 && $is_sort_map_grep{$last_nonblank_token} )
12393 # /^(sort|map|grep)$/ )
12395 $bond_str = NO_BREAK;
12398 # extrude.t: do not break before paren at:
12400 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
12401 $bond_str = NO_BREAK;
12404 # good to break after end of code blocks
12405 if ( $type eq '}' && $block_type ) {
12407 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
12408 $code_bias += $delta_bias;
12411 if ( $type eq 'k' ) {
12413 # allow certain control keywords to stand out
12414 if ( $next_nonblank_type eq 'k'
12415 && $is_last_next_redo_return{$token} )
12417 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
12420 # Don't break after keyword my. This is a quick fix for a
12421 # rare problem with perl. An example is this line from file
12423 # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
12425 if ( $token eq 'my' ) {
12426 $bond_str = NO_BREAK;
12431 # good to break before 'if', 'unless', etc
12432 if ( $is_if_brace_follower{$next_nonblank_token} ) {
12433 $bond_str = VERY_WEAK;
12436 if ( $next_nonblank_type eq 'k' ) {
12438 # keywords like 'unless', 'if', etc, within statements
12440 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
12441 $bond_str = VERY_WEAK / 1.05;
12445 # try not to break before a comma-arrow
12446 elsif ( $next_nonblank_type eq '=>' ) {
12447 if ( $bond_str < STRONG ) { $bond_str = STRONG }
12450 #----------------------------------------------------------------------
12451 # only set NO_BREAK's from here on
12452 #----------------------------------------------------------------------
12453 if ( $type eq 'C' or $type eq 'U' ) {
12455 # use strict requires that bare word and => not be separated
12456 if ( $next_nonblank_type eq '=>' ) {
12457 $bond_str = NO_BREAK;
12462 # use strict requires that bare word within braces not start new line
12463 elsif ( $type eq 'L' ) {
12465 if ( $next_nonblank_type eq 'w' ) {
12466 $bond_str = NO_BREAK;
12470 # in older version of perl, use strict can cause problems with
12471 # breaks before bare words following opening parens. For example,
12472 # this will fail under older versions if a break is made between
12475 # open( MAIL, "a long filename or command");
12477 elsif ( $type eq '{' ) {
12479 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
12481 # but it's fine to break if the word is followed by a '=>'
12482 # or if it is obviously a sub call
12483 my $i_next_next_nonblank = $i_next_nonblank + 1;
12484 my $next_next_type = $types_to_go[$i_next_next_nonblank];
12485 if ( $next_next_type eq 'b'
12486 && $i_next_nonblank < $max_index_to_go )
12488 $i_next_next_nonblank++;
12489 $next_next_type = $types_to_go[$i_next_next_nonblank];
12492 ##if ( $next_next_type ne '=>' ) {
12493 # these are ok: '->xxx', '=>', '('
12495 # We'll check for an old breakpoint and keep a leading
12496 # bareword if it was that way in the input file.
12497 # Presumably it was ok that way. For example, the
12498 # following would remain unchanged:
12501 # January, February, March, April,
12502 # May, June, July, August,
12503 # September, October, November, December,
12506 # This should be sufficient:
12507 if ( !$old_breakpoint_to_go[$i]
12508 && ( $next_next_type eq ',' || $next_next_type eq '}' )
12511 $bond_str = NO_BREAK;
12516 elsif ( $type eq 'w' ) {
12518 if ( $next_nonblank_type eq 'R' ) {
12519 $bond_str = NO_BREAK;
12522 # use strict requires that bare word and => not be separated
12523 if ( $next_nonblank_type eq '=>' ) {
12524 $bond_str = NO_BREAK;
12528 # in fact, use strict hates bare words on any new line. For
12529 # example, a break before the underscore here provokes the
12530 # wrath of use strict:
12531 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
12532 elsif ( $type eq 'F' ) {
12533 $bond_str = NO_BREAK;
12536 # use strict does not allow separating type info from trailing { }
12537 # testfile is readmail.pl
12538 elsif ( $type eq 't' or $type eq 'i' ) {
12540 if ( $next_nonblank_type eq 'L' ) {
12541 $bond_str = NO_BREAK;
12545 # Do not break between a possible filehandle and a ? or / and do
12546 # not introduce a break after it if there is no blank
12548 elsif ( $type eq 'Z' ) {
12553 # if there is no blank and we do not want one. Examples:
12554 # print $x++ # do not break after $x
12555 # print HTML"HELLO" # break ok after HTML
12558 && defined( $want_left_space{$next_type} )
12559 && $want_left_space{$next_type} == WS_NO
12562 # or we might be followed by the start of a quote
12563 || $next_nonblank_type =~ /^[\/\?]$/
12566 $bond_str = NO_BREAK;
12570 # Do not break before a possible file handle
12571 if ( $next_nonblank_type eq 'Z' ) {
12572 $bond_str = NO_BREAK;
12575 # As a defensive measure, do not break between a '(' and a
12576 # filehandle. In some cases, this can cause an error. For
12577 # example, the following program works:
12584 # But this program fails:
12592 # This is normally only a problem with the 'extrude' option
12593 if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
12594 $bond_str = NO_BREAK;
12597 # patch to put cuddled elses back together when on multiple
12598 # lines, as in: } \n else \n { \n
12599 if ($rOpts_cuddled_else) {
12601 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
12602 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
12604 $bond_str = NO_BREAK;
12608 # keep '}' together with ';'
12609 if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
12610 $bond_str = NO_BREAK;
12613 # never break between sub name and opening paren
12614 if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
12615 $bond_str = NO_BREAK;
12618 #---------------------------------------------------------------
12620 # now take nesting depth into account
12621 #---------------------------------------------------------------
12622 # final strength incorporates the bond strength and nesting depth
12625 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
12626 if ( $total_nesting_depth > 0 ) {
12627 $strength = $bond_str + $total_nesting_depth;
12630 $strength = $bond_str;
12634 $strength = NO_BREAK;
12637 # always break after side comment
12638 if ( $type eq '#' ) { $strength = 0 }
12640 $bond_strength_to_go[$i] = $strength;
12642 FORMATTER_DEBUG_FLAG_BOND && do {
12643 my $str = substr( $token, 0, 15 );
12644 $str .= ' ' x ( 16 - length($str) );
12646 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
12653 sub pad_array_to_go {
12655 # to simplify coding in scan_list and set_bond_strengths, it helps
12656 # to create some extra blank tokens at the end of the arrays
12657 $tokens_to_go[ $max_index_to_go + 1 ] = '';
12658 $tokens_to_go[ $max_index_to_go + 2 ] = '';
12659 $types_to_go[ $max_index_to_go + 1 ] = 'b';
12660 $types_to_go[ $max_index_to_go + 2 ] = 'b';
12661 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
12662 $nesting_depth_to_go[$max_index_to_go];
12665 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
12666 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
12668 # shouldn't happen:
12669 unless ( get_saw_brace_error() ) {
12671 "Program bug in scan_list: hit nesting error which should have been caught\n"
12673 report_definite_bug();
12677 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
12682 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
12683 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
12687 { # begin scan_list
12690 $block_type, $current_depth,
12692 $i_last_nonblank_token, $last_colon_sequence_number,
12693 $last_nonblank_token, $last_nonblank_type,
12694 $last_old_breakpoint_count, $minimum_depth,
12695 $next_nonblank_block_type, $next_nonblank_token,
12696 $next_nonblank_type, $old_breakpoint_count,
12697 $starting_breakpoint_count, $starting_depth,
12703 @breakpoint_stack, @breakpoint_undo_stack,
12704 @comma_index, @container_type,
12705 @identifier_count_stack, @index_before_arrow,
12706 @interrupted_list, @item_count_stack,
12707 @last_comma_index, @last_dot_index,
12708 @last_nonblank_type, @old_breakpoint_count_stack,
12709 @opening_structure_index_stack, @rfor_semicolon_list,
12710 @has_old_logical_breakpoints, @rand_or_list,
12714 # routine to define essential variables when we go 'up' to
12716 sub check_for_new_minimum_depth {
12718 if ( $depth < $minimum_depth ) {
12720 $minimum_depth = $depth;
12722 # these arrays need not retain values between calls
12723 $breakpoint_stack[$depth] = $starting_breakpoint_count;
12724 $container_type[$depth] = "";
12725 $identifier_count_stack[$depth] = 0;
12726 $index_before_arrow[$depth] = -1;
12727 $interrupted_list[$depth] = 1;
12728 $item_count_stack[$depth] = 0;
12729 $last_nonblank_type[$depth] = "";
12730 $opening_structure_index_stack[$depth] = -1;
12732 $breakpoint_undo_stack[$depth] = undef;
12733 $comma_index[$depth] = undef;
12734 $last_comma_index[$depth] = undef;
12735 $last_dot_index[$depth] = undef;
12736 $old_breakpoint_count_stack[$depth] = undef;
12737 $has_old_logical_breakpoints[$depth] = 0;
12738 $rand_or_list[$depth] = [];
12739 $rfor_semicolon_list[$depth] = [];
12740 $i_equals[$depth] = -1;
12742 # these arrays must retain values between calls
12743 if ( !defined( $has_broken_sublist[$depth] ) ) {
12744 $dont_align[$depth] = 0;
12745 $has_broken_sublist[$depth] = 0;
12746 $want_comma_break[$depth] = 0;
12751 # routine to decide which commas to break at within a container;
12753 # $bp_count = number of comma breakpoints set
12754 # $do_not_break_apart = a flag indicating if container need not
12756 sub set_comma_breakpoints {
12760 my $do_not_break_apart = 0;
12761 if ( $item_count_stack[$dd] && !$dont_align[$dd] ) {
12763 my $fbc = $forced_breakpoint_count;
12765 # always open comma lists not preceded by keywords,
12766 # barewords, identifiers (that is, anything that doesn't
12767 # look like a function call)
12768 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
12770 set_comma_breakpoints_do(
12772 $opening_structure_index_stack[$dd],
12774 $item_count_stack[$dd],
12775 $identifier_count_stack[$dd],
12777 $next_nonblank_type,
12778 $container_type[$dd],
12779 $interrupted_list[$dd],
12780 \$do_not_break_apart,
12783 $bp_count = $forced_breakpoint_count - $fbc;
12784 $do_not_break_apart = 0 if $must_break_open;
12786 return ( $bp_count, $do_not_break_apart );
12789 my %is_logical_container;
12792 @_ = qw# if elsif unless while and or err not && | || ? : ! #;
12793 @is_logical_container{@_} = (1) x scalar(@_);
12796 sub set_for_semicolon_breakpoints {
12798 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
12799 set_forced_breakpoint($_);
12803 sub set_logical_breakpoints {
12806 $item_count_stack[$dd] == 0
12807 && $is_logical_container{ $container_type[$dd] }
12810 || $has_old_logical_breakpoints[$dd]
12814 # Look for breaks in this order:
12817 foreach my $i ( 0 .. 3 ) {
12818 if ( $rand_or_list[$dd][$i] ) {
12819 foreach ( @{ $rand_or_list[$dd][$i] } ) {
12820 set_forced_breakpoint($_);
12823 # break at any 'if' and 'unless' too
12824 foreach ( @{ $rand_or_list[$dd][4] } ) {
12825 set_forced_breakpoint($_);
12827 $rand_or_list[$dd] = [];
12834 sub is_unbreakable_container {
12836 # never break a container of one of these types
12837 # because bad things can happen (map1.t)
12839 $is_sort_map_grep{ $container_type[$dd] };
12844 # This routine is responsible for setting line breaks for all lists,
12845 # so that hierarchical structure can be displayed and so that list
12846 # items can be vertically aligned. The output of this routine is
12847 # stored in the array @forced_breakpoint_to_go, which is used to set
12848 # final breakpoints.
12850 $starting_depth = $nesting_depth_to_go[0];
12853 $current_depth = $starting_depth;
12855 $last_colon_sequence_number = -1;
12856 $last_nonblank_token = ';';
12857 $last_nonblank_type = ';';
12858 $last_old_breakpoint_count = 0;
12859 $minimum_depth = $current_depth + 1; # forces update in check below
12860 $old_breakpoint_count = 0;
12861 $starting_breakpoint_count = $forced_breakpoint_count;
12864 $type_sequence = '';
12866 check_for_new_minimum_depth($current_depth);
12868 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
12869 my $want_previous_breakpoint = -1;
12871 my $saw_good_breakpoint;
12872 my $i_line_end = -1;
12873 my $i_line_start = -1;
12875 # loop over all tokens in this batch
12876 while ( ++$i <= $max_index_to_go ) {
12877 if ( $type ne 'b' ) {
12878 $i_last_nonblank_token = $i - 1;
12879 $last_nonblank_type = $type;
12880 $last_nonblank_token = $token;
12882 $type = $types_to_go[$i];
12883 $block_type = $block_type_to_go[$i];
12884 $token = $tokens_to_go[$i];
12885 $type_sequence = $type_sequence_to_go[$i];
12886 my $next_type = $types_to_go[ $i + 1 ];
12887 my $next_token = $tokens_to_go[ $i + 1 ];
12888 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12889 $next_nonblank_type = $types_to_go[$i_next_nonblank];
12890 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12891 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
12893 # set break if flag was set
12894 if ( $want_previous_breakpoint >= 0 ) {
12895 set_forced_breakpoint($want_previous_breakpoint);
12896 $want_previous_breakpoint = -1;
12899 $last_old_breakpoint_count = $old_breakpoint_count;
12900 if ( $old_breakpoint_to_go[$i] ) {
12902 $i_line_start = $i_next_nonblank;
12904 $old_breakpoint_count++;
12906 # Break before certain keywords if user broke there and
12907 # this is a 'safe' break point. The idea is to retain
12908 # any preferred breaks for sequential list operations,
12909 # like a schwartzian transform.
12910 if ($rOpts_break_at_old_keyword_breakpoints) {
12912 $next_nonblank_type eq 'k'
12913 && $is_keyword_returning_list{$next_nonblank_token}
12914 && ( $type =~ /^[=\)\]\}Riw]$/
12916 && $is_keyword_returning_list{$token} )
12920 # we actually have to set this break next time through
12921 # the loop because if we are at a closing token (such
12922 # as '}') which forms a one-line block, this break might
12924 $want_previous_breakpoint = $i;
12928 next if ( $type eq 'b' );
12929 $depth = $nesting_depth_to_go[ $i + 1 ];
12931 # safety check - be sure we always break after a comment
12932 # Shouldn't happen .. an error here probably means that the
12933 # nobreak flag did not get turned off correctly during
12935 if ( $type eq '#' ) {
12936 if ( $i != $max_index_to_go ) {
12938 "Non-fatal program bug: backup logic needed to break after a comment\n"
12940 report_definite_bug();
12941 $nobreak_to_go[$i] = 0;
12942 set_forced_breakpoint($i);
12946 # Force breakpoints at certain tokens in long lines.
12947 # Note that such breakpoints will be undone later if these tokens
12948 # are fully contained within parens on a line.
12952 && $token =~ /^(if|unless)$/
12956 # or container is broken (by side-comment, etc)
12957 || ( $next_nonblank_token eq '('
12958 && $mate_index_to_go[$i_next_nonblank] < $i )
12962 set_forced_breakpoint( $i - 1 );
12965 # remember locations of '||' and '&&' for possible breaks if we
12966 # decide this is a long logical expression.
12967 if ( $type eq '||' ) {
12968 push @{ $rand_or_list[$depth][2] }, $i;
12969 ++$has_old_logical_breakpoints[$depth]
12970 if ( ( $i == $i_line_start || $i == $i_line_end )
12971 && $rOpts_break_at_old_logical_breakpoints );
12973 elsif ( $type eq '&&' ) {
12974 push @{ $rand_or_list[$depth][3] }, $i;
12975 ++$has_old_logical_breakpoints[$depth]
12976 if ( ( $i == $i_line_start || $i == $i_line_end )
12977 && $rOpts_break_at_old_logical_breakpoints );
12979 elsif ( $type eq 'f' ) {
12980 push @{ $rfor_semicolon_list[$depth] }, $i;
12982 elsif ( $type eq 'k' ) {
12983 if ( $token eq 'and' ) {
12984 push @{ $rand_or_list[$depth][1] }, $i;
12985 ++$has_old_logical_breakpoints[$depth]
12986 if ( ( $i == $i_line_start || $i == $i_line_end )
12987 && $rOpts_break_at_old_logical_breakpoints );
12990 # break immediately at 'or's which are probably not in a logical
12991 # block -- but we will break in logical breaks below so that
12992 # they do not add to the forced_breakpoint_count
12993 elsif ( $token eq 'or' ) {
12994 push @{ $rand_or_list[$depth][0] }, $i;
12995 ++$has_old_logical_breakpoints[$depth]
12996 if ( ( $i == $i_line_start || $i == $i_line_end )
12997 && $rOpts_break_at_old_logical_breakpoints );
12998 if ( $is_logical_container{ $container_type[$depth] } ) {
13001 if ($is_long_line) { set_forced_breakpoint($i) }
13002 elsif ( ( $i == $i_line_start || $i == $i_line_end )
13003 && $rOpts_break_at_old_logical_breakpoints )
13005 $saw_good_breakpoint = 1;
13009 elsif ( $token eq 'if' || $token eq 'unless' ) {
13010 push @{ $rand_or_list[$depth][4] }, $i;
13011 if ( ( $i == $i_line_start || $i == $i_line_end )
13012 && $rOpts_break_at_old_logical_breakpoints )
13014 set_forced_breakpoint($i);
13018 elsif ( $is_assignment{$type} ) {
13019 $i_equals[$depth] = $i;
13022 if ($type_sequence) {
13024 # handle any postponed closing breakpoints
13025 if ( $token =~ /^[\)\]\}\:]$/ ) {
13026 if ( $type eq ':' ) {
13027 $last_colon_sequence_number = $type_sequence;
13029 # TESTING: retain break at a ':' line break
13030 if ( ( $i == $i_line_start || $i == $i_line_end )
13031 && $rOpts_break_at_old_trinary_breakpoints )
13035 set_forced_breakpoint($i);
13037 # break at previous '='
13038 if ( $i_equals[$depth] > 0 ) {
13039 set_forced_breakpoint( $i_equals[$depth] );
13040 $i_equals[$depth] = -1;
13044 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
13045 my $inc = ( $type eq ':' ) ? 0 : 1;
13046 set_forced_breakpoint( $i - $inc );
13047 delete $postponed_breakpoint{$type_sequence};
13051 # set breaks at ?/: if they will get separated (and are
13052 # not a ?/: chain), or if the '?' is at the end of the
13054 elsif ( $token eq '?' ) {
13055 my $i_colon = $mate_index_to_go[$i];
13057 $i_colon <= 0 # the ':' is not in this batch
13058 || $i == 0 # this '?' is the first token of the line
13060 $max_index_to_go # or this '?' is the last token
13064 # don't break at a '?' if preceded by ':' on
13065 # this line of previous ?/: pair on this line.
13066 # This is an attempt to preserve a chain of ?/:
13067 # expressions (elsif2.t). And don't break if
13068 # this has a side comment.
13069 set_forced_breakpoint($i)
13071 $type_sequence == (
13072 $last_colon_sequence_number +
13073 TYPE_SEQUENCE_INCREMENT
13075 || $tokens_to_go[$max_index_to_go] eq '#'
13077 set_closing_breakpoint($i);
13082 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
13084 #------------------------------------------------------------
13085 # Handle Increasing Depth..
13087 # prepare for a new list when depth increases
13088 # token $i is a '(','{', or '['
13089 #------------------------------------------------------------
13090 if ( $depth > $current_depth ) {
13092 $breakpoint_stack[$depth] = $forced_breakpoint_count;
13093 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
13094 $has_broken_sublist[$depth] = 0;
13095 $identifier_count_stack[$depth] = 0;
13096 $index_before_arrow[$depth] = -1;
13097 $interrupted_list[$depth] = 0;
13098 $item_count_stack[$depth] = 0;
13099 $last_comma_index[$depth] = undef;
13100 $last_dot_index[$depth] = undef;
13101 $last_nonblank_type[$depth] = $last_nonblank_type;
13102 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
13103 $opening_structure_index_stack[$depth] = $i;
13104 $rand_or_list[$depth] = [];
13105 $rfor_semicolon_list[$depth] = [];
13106 $i_equals[$depth] = -1;
13107 $want_comma_break[$depth] = 0;
13108 $container_type[$depth] =
13109 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
13110 ? $last_nonblank_token
13112 $has_old_logical_breakpoints[$depth] = 0;
13114 # if line ends here then signal closing token to break
13115 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
13117 set_closing_breakpoint($i);
13120 # Not all lists of values should be vertically aligned..
13121 $dont_align[$depth] =
13123 # code BLOCKS are handled at a higher level
13124 ( $block_type ne "" )
13126 # certain paren lists
13127 || ( $type eq '(' ) && (
13129 # it does not usually look good to align a list of
13130 # identifiers in a parameter list, as in:
13131 # my($var1, $var2, ...)
13132 # (This test should probably be refined, for now I'm just
13133 # testing for any keyword)
13134 ( $last_nonblank_type eq 'k' )
13136 # a trailing '(' usually indicates a non-list
13137 || ( $next_nonblank_type eq '(' )
13140 # patch to outdent opening brace of long if/for/..
13141 # statements (like this one). See similar coding in
13142 # set_continuation breaks. We have also catch it here for
13143 # short line fragments which otherwise will not go through
13144 # set_continuation_breaks.
13148 # if we have the ')' but not its '(' in this batch..
13149 && ( $last_nonblank_token eq ')' )
13150 && $mate_index_to_go[$i_last_nonblank_token] < 0
13152 # and user wants brace to left
13153 && !$rOpts->{'opening-brace-always-on-right'}
13155 && ( $type eq '{' ) # should be true
13156 && ( $token eq '{' ) # should be true
13159 set_forced_breakpoint( $i - 1 );
13163 #------------------------------------------------------------
13164 # Handle Decreasing Depth..
13166 # finish off any old list when depth decreases
13167 # token $i is a ')','}', or ']'
13168 #------------------------------------------------------------
13169 elsif ( $depth < $current_depth ) {
13171 check_for_new_minimum_depth($depth);
13173 # force all outer logical containers to break after we see on
13175 $has_old_logical_breakpoints[$depth] ||=
13176 $has_old_logical_breakpoints[$current_depth];
13178 # Patch to break between ') {' if the paren list is broken.
13179 # There is similar logic in set_continuation_breaks for
13180 # non-broken lists.
13182 && $next_nonblank_block_type
13183 && $interrupted_list[$current_depth]
13184 && $next_nonblank_type eq '{'
13185 && !$rOpts->{'opening-brace-always-on-right'} )
13187 set_forced_breakpoint($i);
13190 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
13192 # set breaks at commas if necessary
13193 my ( $bp_count, $do_not_break_apart ) =
13194 set_comma_breakpoints($current_depth);
13196 my $i_opening = $opening_structure_index_stack[$current_depth];
13197 my $saw_opening_structure = ( $i_opening >= 0 );
13199 # this term is long if we had to break at interior commas..
13200 my $is_long_term = $bp_count > 0;
13202 # ..or if the length between opening and closing parens exceeds
13203 # allowed line length
13204 if ( !$is_long_term && $saw_opening_structure ) {
13205 my $i_opening_minus = find_token_starting_list($i_opening);
13207 # Note: we have to allow for one extra space after a
13208 # closing token so that we do not strand a comma or
13209 # semicolon, hence the '>=' here (oneline.t)
13211 excess_line_length( $i_opening_minus, $i ) >= 0;
13214 # We've set breaks after all comma-arrows. Now we have to
13215 # undo them if this can be a one-line block
13216 # (the only breakpoints set will be due to comma-arrows)
13219 # user doesn't require breaking after all comma-arrows
13220 ( $rOpts_comma_arrow_breakpoints != 0 )
13222 # and if the opening structure is in this batch
13223 && $saw_opening_structure
13225 # and either on the same old line
13227 $old_breakpoint_count_stack[$current_depth] ==
13228 $last_old_breakpoint_count
13230 # or user wants to form long blocks with arrows
13231 || $rOpts_comma_arrow_breakpoints == 2
13234 # and we made some breakpoints between the opening and closing
13235 && ( $breakpoint_undo_stack[$current_depth] <
13236 $forced_breakpoint_undo_count )
13238 # and this block is short enough to fit on one line
13239 # Note: use < because need 1 more space for possible comma
13244 undo_forced_breakpoint_stack(
13245 $breakpoint_undo_stack[$current_depth] );
13248 # now see if we have any comma breakpoints left
13249 my $has_comma_breakpoints =
13250 ( $breakpoint_stack[$current_depth] !=
13251 $forced_breakpoint_count );
13253 # update broken-sublist flag of the outer container
13254 $has_broken_sublist[$depth] = $has_broken_sublist[$depth]
13255 || $has_broken_sublist[$current_depth]
13257 || $has_comma_breakpoints;
13259 # Having come to the closing ')', '}', or ']', now we have to decide if we
13260 # should 'open up' the structure by placing breaks at the opening and
13261 # closing containers. This is a tricky decision. Here are some of the
13262 # basic considerations:
13264 # -If this is a BLOCK container, then any breakpoints will have already
13265 # been set (and according to user preferences), so we need do nothing here.
13267 # -If we have a comma-separated list for which we can align the list items,
13268 # then we need to do so because otherwise the vertical aligner cannot
13269 # currently do the alignment.
13271 # -If this container does itself contain a container which has been broken
13272 # open, then it should be broken open to properly show the structure.
13274 # -If there is nothing to align, and no other reason to break apart,
13275 # then do not do it.
13277 # We will not break open the parens of a long but 'simple' logical expression.
13280 # This is an example of a simple logical expression and its formatting:
13282 # if ( $bigwasteofspace1 && $bigwasteofspace2
13283 # || $bigwasteofspace3 && $bigwasteofspace4 )
13285 # Most people would prefer this than the 'spacey' version:
13288 # $bigwasteofspace1 && $bigwasteofspace2
13289 # || $bigwasteofspace3 && $bigwasteofspace4
13292 # To illustrate the rules for breaking logical expressions, consider:
13296 # and ( exists $ids_excl_uc{$id_uc}
13297 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
13299 # This is on the verge of being difficult to read. The current default is to
13300 # open it up like this:
13305 # and ( exists $ids_excl_uc{$id_uc}
13306 # or grep $id_uc =~ /$_/, @ids_excl_uc )
13309 # This is a compromise which tries to avoid being too dense and to spacey.
13310 # A more spaced version would be:
13316 # exists $ids_excl_uc{$id_uc}
13317 # or grep $id_uc =~ /$_/, @ids_excl_uc
13321 # Some people might prefer the spacey version -- an option could be added. The
13322 # innermost expression contains a long block '( exists $ids_... ')'.
13324 # Here is how the logic goes: We will force a break at the 'or' that the
13325 # innermost expression contains, but we will not break apart its opening and
13326 # closing containers because (1) it contains no multi-line sub-containers itself,
13327 # and (2) there is no alignment to be gained by breaking it open like this
13330 # exists $ids_excl_uc{$id_uc}
13331 # or grep $id_uc =~ /$_/, @ids_excl_uc
13334 # (although this looks perfectly ok and might be good for long expressions). The
13335 # outer 'if' container, though, contains a broken sub-container, so it will be
13336 # broken open to avoid too much density. Also, since it contains no 'or's, there
13337 # will be a forced break at its 'and'.
13339 # set some flags telling something about this container..
13340 my $is_simple_logical_expression = 0;
13341 if ( $item_count_stack[$current_depth] == 0
13342 && $saw_opening_structure
13343 && $tokens_to_go[$i_opening] eq '('
13344 && $is_logical_container{ $container_type[$current_depth] }
13348 # This seems to be a simple logical expression with
13349 # no existing breakpoints. Set a flag to prevent
13351 if ( !$has_comma_breakpoints ) {
13352 $is_simple_logical_expression = 1;
13355 # This seems to be a simple logical expression with
13356 # breakpoints (broken sublists, for example). Break
13357 # at all 'or's and '||'s.
13359 set_logical_breakpoints($current_depth);
13364 && @{ $rfor_semicolon_list[$current_depth] } )
13366 set_for_semicolon_breakpoints($current_depth);
13368 # open up a long 'for' or 'foreach' container to allow
13369 # leading term alignment unless -lp is used.
13370 $has_comma_breakpoints = 1
13371 unless $rOpts_line_up_parentheses;
13376 # breaks for code BLOCKS are handled at a higher level
13379 # we do not need to break at the top level of an 'if'
13381 && !$is_simple_logical_expression
13383 ## modification to keep ': (' containers vertically tight;
13384 ## but probably better to let user set -vt=1 to avoid
13385 ## inconsistency with other paren types
13386 ## && ($container_type[$current_depth] ne ':')
13388 # otherwise, we require one of these reasons for breaking:
13391 # - this term has forced line breaks
13392 $has_comma_breakpoints
13394 # - the opening container is separated from this batch
13395 # for some reason (comment, blank line, code block)
13396 # - this is a non-paren container spanning multiple lines
13397 || !$saw_opening_structure
13399 # - this is a long block contained in another breakable
13402 && $container_environment_to_go[$i_opening] ne
13408 # For -lp option, we must put a breakpoint before
13409 # the token which has been identified as starting
13410 # this indentation level. This is necessary for
13411 # proper alignment.
13412 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
13414 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
13415 if ( defined($item) ) {
13416 my $i_start_2 = $item->get_STARTING_INDEX();
13418 defined($i_start_2)
13420 # we are breaking after an opening brace, paren,
13421 # so don't break before it too
13422 && $i_start_2 ne $i_opening
13426 # Only break for breakpoints at the same
13427 # indentation level as the opening paren
13428 my $test1 = $nesting_depth_to_go[$i_opening];
13429 my $test2 = $nesting_depth_to_go[$i_start_2];
13430 if ( $test2 == $test1 ) {
13431 set_forced_breakpoint( $i_start_2 - 1 );
13437 # break after opening structure.
13438 # note: break before closing structure will be automatic
13439 if ( $minimum_depth <= $current_depth ) {
13441 set_forced_breakpoint($i_opening)
13442 unless ( $do_not_break_apart
13443 || is_unbreakable_container($current_depth) );
13445 # break at '.' of lower depth level before opening token
13446 if ( $last_dot_index[$depth] ) {
13447 set_forced_breakpoint( $last_dot_index[$depth] );
13450 # break before opening structure if preeced by another
13451 # closing structure and a comma. This is normally
13452 # done by the previous closing brace, but not
13453 # if it was a one-line block.
13454 if ( $i_opening > 2 ) {
13456 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
13460 if ( $types_to_go[$i_prev] eq ','
13461 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
13463 set_forced_breakpoint($i_prev);
13466 # also break before something like ':(' or '?('
13469 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
13471 my $token_prev = $tokens_to_go[$i_prev];
13472 if ( $want_break_before{$token_prev} ) {
13473 set_forced_breakpoint($i_prev);
13479 # break after comma following closing structure
13480 if ( $next_type eq ',' ) {
13481 set_forced_breakpoint( $i + 1 );
13484 # break before an '=' following closing structure
13486 $is_assignment{$next_nonblank_type}
13487 && ( $breakpoint_stack[$current_depth] !=
13488 $forced_breakpoint_count )
13491 set_forced_breakpoint($i);
13494 # break at any comma before the opening structure Added
13495 # for -lp, but seems to be good in general. It isn't
13496 # obvious how far back to look; the '5' below seems to
13497 # work well and will catch the comma in something like
13498 # push @list, myfunc( $param, $param, ..
13500 my $icomma = $last_comma_index[$depth];
13501 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
13502 unless ( $forced_breakpoint_to_go[$icomma] ) {
13503 set_forced_breakpoint($icomma);
13506 } # end logic to open up a container
13508 # Break open a logical container open if it was already open
13509 elsif ($is_simple_logical_expression
13510 && $has_old_logical_breakpoints[$current_depth] )
13512 set_logical_breakpoints($current_depth);
13515 # Handle long container which does not get opened up
13516 elsif ($is_long_term) {
13518 # must set fake breakpoint to alert outer containers that
13520 set_fake_breakpoint();
13524 #------------------------------------------------------------
13525 # Handle this token
13526 #------------------------------------------------------------
13528 $current_depth = $depth;
13530 # handle comma-arrow
13531 if ( $type eq '=>' ) {
13532 next if ( $last_nonblank_type eq '=>' );
13533 next if $rOpts_break_at_old_comma_breakpoints;
13534 next if $rOpts_comma_arrow_breakpoints == 3;
13535 $want_comma_break[$depth] = 1;
13536 $index_before_arrow[$depth] = $i_last_nonblank_token;
13540 elsif ( $type eq '.' ) {
13541 $last_dot_index[$depth] = $i;
13544 # Turn off alignment if we are sure that this is not a list
13545 # environment. To be safe, we will do this if we see certain
13546 # non-list tokens, such as ';', and also the environment is
13547 # not a list. Note that '=' could be in any of the = operators
13548 # (lextest.t). We can't just use the reported environment
13549 # because it can be incorrect in some cases.
13550 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
13551 && $container_environment_to_go[$i] ne 'LIST' )
13553 $dont_align[$depth] = 1;
13554 $want_comma_break[$depth] = 0;
13555 $index_before_arrow[$depth] = -1;
13558 # now just handle any commas
13559 next unless ( $type eq ',' );
13561 $last_dot_index[$depth] = undef;
13562 $last_comma_index[$depth] = $i;
13564 # break here if this comma follows a '=>'
13565 # but not if there is a side comment after the comma
13566 if ( $want_comma_break[$depth] ) {
13568 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
13569 $want_comma_break[$depth] = 0;
13570 $index_before_arrow[$depth] = -1;
13574 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
13576 # break before the previous token if it looks safe
13577 # Example of something that we will not try to break before:
13578 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
13579 my $ibreak = $index_before_arrow[$depth] - 1;
13581 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
13583 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
13584 if ( $types_to_go[$ibreak] =~ /^[,b\(\{\[]$/ ) {
13585 set_forced_breakpoint($ibreak);
13589 $want_comma_break[$depth] = 0;
13590 $index_before_arrow[$depth] = -1;
13592 # handle list which mixes '=>'s and ','s:
13593 # treat any list items so far as an interrupted list
13594 $interrupted_list[$depth] = 1;
13598 # skip past these commas if we are not supposed to format them
13599 next if ( $dont_align[$depth] );
13601 # break after all commas above starting depth
13602 if ( $depth < $starting_depth ) {
13603 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
13607 # add this comma to the list..
13608 my $item_count = $item_count_stack[$depth];
13609 if ( $item_count == 0 ) {
13611 # but do not form a list with no opening structure
13614 # open INFILE_COPY, ">$input_file_copy"
13615 # or die ("very long message");
13617 if ( ( $opening_structure_index_stack[$depth] < 0 )
13618 && $container_environment_to_go[$i] eq 'BLOCK' )
13620 $dont_align[$depth] = 1;
13625 $comma_index[$depth][$item_count] = $i;
13626 ++$item_count_stack[$depth];
13627 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
13628 $identifier_count_stack[$depth]++;
13632 #-------------------------------------------
13633 # end of loop over all tokens in this batch
13634 #-------------------------------------------
13636 # set breaks for any unfinished lists ..
13637 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
13639 $interrupted_list[$dd] = 1;
13640 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
13641 set_comma_breakpoints($dd);
13642 set_logical_breakpoints($dd)
13643 if ( $has_old_logical_breakpoints[$dd] );
13644 set_for_semicolon_breakpoints($dd);
13646 # break open container...
13647 my $i_opening = $opening_structure_index_stack[$dd];
13648 set_forced_breakpoint($i_opening)
13650 is_unbreakable_container($dd)
13652 # Avoid a break which would place an isolated ' or "
13655 && $i_opening >= $max_index_to_go - 2
13656 && $token =~ /^['"]$/ )
13660 # Return a flag indicating if the input file had some good breakpoints.
13661 # This flag will be used to force a break in a line shorter than the
13662 # allowed line length.
13663 if ( $has_old_logical_breakpoints[$current_depth] ) {
13664 $saw_good_breakpoint = 1;
13666 return $saw_good_breakpoint;
13670 sub find_token_starting_list {
13672 # When testing to see if a block will fit on one line, some
13673 # previous token(s) may also need to be on the line; particularly
13674 # if this is a sub call. So we will look back at least one
13675 # token. NOTE: This isn't perfect, but not critical, because
13676 # if we mis-identify a block, it will be wrapped and therefore
13677 # fixed the next time it is formatted.
13678 my $i_opening_paren = shift;
13679 my $i_opening_minus = $i_opening_paren;
13680 my $im1 = $i_opening_paren - 1;
13681 my $im2 = $i_opening_paren - 2;
13682 my $im3 = $i_opening_paren - 3;
13683 my $typem1 = $types_to_go[$im1];
13684 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
13685 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
13686 $i_opening_minus = $i_opening_paren;
13688 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
13689 $i_opening_minus = $im1 if $im1 >= 0;
13691 # walk back to improve length estimate
13692 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
13693 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
13694 $i_opening_minus = $j;
13696 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
13698 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
13699 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
13700 $i_opening_minus = $im2;
13702 return $i_opening_minus;
13705 { # begin set_comma_breakpoints_do
13707 my %is_keyword_with_special_leading_term;
13711 # These keywords have prototypes which allow a special leading item
13712 # followed by a list
13714 qw(formline grep kill map printf sprintf push chmod join pack unshift);
13715 @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
13718 sub set_comma_breakpoints_do {
13720 # Given a list with some commas, set breakpoints at some of the
13721 # commas, if necessary, to make it easy to read. This list is
13724 $depth, $i_opening_paren, $i_closing_paren,
13725 $item_count, $identifier_count, $rcomma_index,
13726 $next_nonblank_type, $list_type, $interrupted,
13727 $rdo_not_break_apart, $must_break_open,
13730 # nothing to do if no commas seen
13731 return if ( $item_count < 1 );
13732 my $i_first_comma = $$rcomma_index[0];
13733 my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
13734 my $i_last_comma = $i_true_last_comma;
13735 if ( $i_last_comma >= $max_index_to_go ) {
13736 $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
13737 return if ( $item_count < 1 );
13740 #---------------------------------------------------------------
13741 # find lengths of all items in the list to calculate page layout
13742 #---------------------------------------------------------------
13743 my $comma_count = $item_count;
13749 my @max_length = ( 0, 0 );
13750 my $first_term_length;
13751 my $i = $i_opening_paren;
13754 for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
13755 $is_odd = 1 - $is_odd;
13756 $i_prev_plus = $i + 1;
13757 $i = $$rcomma_index[$j];
13760 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
13762 ( $types_to_go[$i_prev_plus] eq 'b' )
13765 push @i_term_begin, $i_term_begin;
13766 push @i_term_end, $i_term_end;
13767 push @i_term_comma, $i;
13769 # note: currently adding 2 to all lengths (for comma and space)
13771 2 + token_sequence_length( $i_term_begin, $i_term_end );
13772 push @item_lengths, $length;
13775 $first_term_length = $length;
13779 if ( $length > $max_length[$is_odd] ) {
13780 $max_length[$is_odd] = $length;
13785 # now we have to make a distinction between the comma count and item
13786 # count, because the item count will be one greater than the comma
13787 # count if the last item is not terminated with a comma
13789 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
13790 ? $i_last_comma + 1
13793 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
13794 ? $i_closing_paren - 2
13795 : $i_closing_paren - 1;
13796 my $i_effective_last_comma = $i_last_comma;
13798 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
13800 if ( $last_item_length > 0 ) {
13802 # add 2 to length because other lengths include a comma and a blank
13803 $last_item_length += 2;
13804 push @item_lengths, $last_item_length;
13805 push @i_term_begin, $i_b + 1;
13806 push @i_term_end, $i_e;
13807 push @i_term_comma, undef;
13809 my $i_odd = $item_count % 2;
13811 if ( $last_item_length > $max_length[$i_odd] ) {
13812 $max_length[$i_odd] = $last_item_length;
13816 $i_effective_last_comma = $i_e + 1;
13818 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
13819 $identifier_count++;
13823 #---------------------------------------------------------------
13824 # End of length calculations
13825 #---------------------------------------------------------------
13827 #---------------------------------------------------------------
13828 # Compound List Rule 1:
13829 # Break at (almost) every comma for a list containing a broken
13830 # sublist. This has higher priority than the Interrupted List
13832 #---------------------------------------------------------------
13833 if ( $has_broken_sublist[$depth] ) {
13835 # Break at every comma except for a comma between two
13836 # simple, small terms. This prevents long vertical
13837 # columns of, say, just 0's.
13838 my $small_length = 10; # 2 + actual maximum length wanted
13840 # We'll insert a break in long runs of small terms to
13841 # allow alignment in uniform tables.
13842 my $skipped_count = 0;
13843 my $columns = table_columns_available($i_first_comma);
13844 my $fields = int( $columns / $small_length );
13845 if ( $rOpts_maximum_fields_per_table
13846 && $fields > $rOpts_maximum_fields_per_table )
13848 $fields = $rOpts_maximum_fields_per_table;
13850 my $max_skipped_count = $fields - 1;
13852 my $is_simple_last_term = 0;
13853 my $is_simple_next_term = 0;
13854 foreach my $j ( 0 .. $item_count ) {
13855 $is_simple_last_term = $is_simple_next_term;
13856 $is_simple_next_term = 0;
13857 if ( $j < $item_count
13858 && $i_term_end[$j] == $i_term_begin[$j]
13859 && $item_lengths[$j] <= $small_length )
13861 $is_simple_next_term = 1;
13864 if ( $is_simple_last_term
13865 && $is_simple_next_term
13866 && $skipped_count < $max_skipped_count )
13871 $skipped_count = 0;
13872 my $i = $i_term_comma[ $j - 1 ];
13873 last unless defined $i;
13874 set_forced_breakpoint($i);
13878 # always break at the last comma if this list is
13879 # interrupted; we wouldn't want to leave a terminal '{', for
13881 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
13885 #my ( $a, $b, $c ) = caller();
13886 #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
13887 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
13888 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
13890 #---------------------------------------------------------------
13891 # Interrupted List Rule:
13892 # A list is is forced to use old breakpoints if it was interrupted
13893 # by side comments or blank lines, or requested by user.
13894 #---------------------------------------------------------------
13895 if ( $rOpts_break_at_old_comma_breakpoints
13897 || $i_opening_paren < 0 )
13899 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
13903 #---------------------------------------------------------------
13904 # Looks like a list of items. We have to look at it and size it up.
13905 #---------------------------------------------------------------
13907 my $opening_token = $tokens_to_go[$i_opening_paren];
13908 my $opening_environment =
13909 $container_environment_to_go[$i_opening_paren];
13911 #-------------------------------------------------------------------
13912 # Return if this will fit on one line
13913 #-------------------------------------------------------------------
13915 my $i_opening_minus = find_token_starting_list($i_opening_paren);
13917 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
13919 #-------------------------------------------------------------------
13920 # Now we know that this block spans multiple lines; we have to set
13921 # at least one breakpoint -- real or fake -- as a signal to break
13922 # open any outer containers.
13923 #-------------------------------------------------------------------
13924 set_fake_breakpoint();
13926 # be sure we do not extend beyond the current list length
13927 if ( $i_effective_last_comma >= $max_index_to_go ) {
13928 $i_effective_last_comma = $max_index_to_go - 1;
13931 # Set a flag indicating if we need to break open to keep -lp
13932 # items aligned. This is necessary if any of the list terms
13933 # exceeds the available space after the '('.
13934 my $need_lp_break_open = $must_break_open;
13935 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
13936 my $columns_if_unbroken = $rOpts_maximum_line_length -
13937 total_line_length( $i_opening_minus, $i_opening_paren );
13938 $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken )
13939 || ( $max_length[1] > $columns_if_unbroken )
13940 || ( $first_term_length > $columns_if_unbroken );
13943 # Specify if the list must have an even number of fields or not.
13944 # It is generally safest to assume an even number, because the
13945 # list items might be a hash list. But if we can be sure that
13946 # it is not a hash, then we can allow an odd number for more
13948 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
13950 if ( $identifier_count >= $item_count - 1
13951 || $is_assignment{$next_nonblank_type}
13952 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
13958 # do we have a long first term which should be
13959 # left on a line by itself?
13960 my $use_separate_first_term = (
13961 $odd_or_even == 1 # only if we can use 1 field/line
13962 && $item_count > 3 # need several items
13963 && $first_term_length >
13964 2 * $max_length[0] - 2 # need long first term
13965 && $first_term_length >
13966 2 * $max_length[1] - 2 # need long first term
13969 # or do we know from the type of list that the first term should
13971 if ( !$use_separate_first_term ) {
13972 if ( $is_keyword_with_special_leading_term{$list_type} ) {
13973 $use_separate_first_term = 1;
13975 # should the container be broken open?
13976 if ( $item_count < 3 ) {
13977 if ( $i_first_comma - $i_opening_paren < 4 ) {
13978 $$rdo_not_break_apart = 1;
13981 elsif ($first_term_length < 20
13982 && $i_first_comma - $i_opening_paren < 4 )
13984 my $columns = table_columns_available($i_first_comma);
13985 if ( $first_term_length < $columns ) {
13986 $$rdo_not_break_apart = 1;
13993 if ($use_separate_first_term) {
13995 # ..set a break and update starting values
13996 $use_separate_first_term = 1;
13997 set_forced_breakpoint($i_first_comma);
13998 $i_opening_paren = $i_first_comma;
13999 $i_first_comma = $$rcomma_index[1];
14001 return if $comma_count == 1;
14002 shift @item_lengths;
14003 shift @i_term_begin;
14005 shift @i_term_comma;
14008 # if not, update the metrics to include the first term
14010 if ( $first_term_length > $max_length[0] ) {
14011 $max_length[0] = $first_term_length;
14015 # Field width parameters
14016 my $pair_width = ( $max_length[0] + $max_length[1] );
14018 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
14020 # Number of free columns across the page width for laying out tables
14021 my $columns = table_columns_available($i_first_comma);
14023 # Estimated maximum number of fields which fit this space
14024 # This will be our first guess
14025 my $number_of_fields_max =
14026 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
14028 my $number_of_fields = $number_of_fields_max;
14030 # Find the best-looking number of fields
14031 # and make this our second guess if possible
14032 my ( $number_of_fields_best, $ri_ragged_break_list,
14033 $new_identifier_count )
14034 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
14037 if ( $number_of_fields_best != 0
14038 && $number_of_fields_best < $number_of_fields_max )
14040 $number_of_fields = $number_of_fields_best;
14043 # ----------------------------------------------------------------------
14044 # If we are crowded and the -lp option is being used, try to
14045 # undo some indentation
14046 # ----------------------------------------------------------------------
14048 $rOpts_line_up_parentheses
14050 $number_of_fields == 0
14051 || ( $number_of_fields == 1
14052 && $number_of_fields != $number_of_fields_best )
14056 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
14057 if ( $available_spaces > 0 ) {
14059 my $spaces_wanted = $max_width - $columns; # for 1 field
14061 if ( $number_of_fields_best == 0 ) {
14062 $number_of_fields_best =
14063 get_maximum_fields_wanted( \@item_lengths );
14066 if ( $number_of_fields_best != 1 ) {
14067 my $spaces_wanted_2 =
14068 1 + $pair_width - $columns; # for 2 fields
14069 if ( $available_spaces > $spaces_wanted_2 ) {
14070 $spaces_wanted = $spaces_wanted_2;
14074 if ( $spaces_wanted > 0 ) {
14075 my $deleted_spaces =
14076 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
14079 if ( $deleted_spaces > 0 ) {
14080 $columns = table_columns_available($i_first_comma);
14081 $number_of_fields_max =
14082 maximum_number_of_fields( $columns, $odd_or_even,
14083 $max_width, $pair_width );
14084 $number_of_fields = $number_of_fields_max;
14086 if ( $number_of_fields_best == 1
14087 && $number_of_fields >= 1 )
14089 $number_of_fields = $number_of_fields_best;
14096 # try for one column if two won't work
14097 if ( $number_of_fields <= 0 ) {
14098 $number_of_fields = int( $columns / $max_width );
14101 # The user can place an upper bound on the number of fields,
14102 # which can be useful for doing maintenance on tables
14103 if ( $rOpts_maximum_fields_per_table
14104 && $number_of_fields > $rOpts_maximum_fields_per_table )
14106 $number_of_fields = $rOpts_maximum_fields_per_table;
14109 # How many columns (characters) and lines would this container take
14110 # if no additional whitespace were added?
14111 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
14112 $i_effective_last_comma + 1 );
14113 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
14114 my $packed_lines = 1 + int( $packed_columns / $columns );
14116 # are we an item contained in an outer list?
14117 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
14119 if ( $number_of_fields <= 0 ) {
14121 # #---------------------------------------------------------------
14122 # # We're in trouble. We can't find a single field width that works.
14123 # # There is no simple answer here; we may have a single long list
14125 # #---------------------------------------------------------------
14127 # In many cases, it may be best to not force a break if there is just one
14128 # comma, because the standard continuation break logic will do a better
14131 # In the common case that all but one of the terms can fit
14132 # on a single line, it may look better not to break open the
14133 # containing parens. Consider, for example
14137 # sort { $color_value{$::a} <=> $color_value{$::b}; }
14140 # which will look like this with the container broken:
14144 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
14147 # Here is an example of this rule for a long last term:
14149 # log_message( 0, 256, 128,
14150 # "Number of routes in adj-RIB-in to be considered: $peercount" );
14152 # And here is an example with a long first term:
14155 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
14156 # $r, $pu, $ps, $cu, $cs, $tt
14158 # if $style eq 'all';
14160 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
14161 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
14162 my $long_first_term =
14163 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
14165 # break at every comma ...
14168 # if requested by user or is best looking
14169 $number_of_fields_best == 1
14171 # or if this is a sublist of a larger list
14172 || $in_hierarchical_list
14174 # or if multiple commas and we dont have a long first or last
14176 || ( $comma_count > 1
14177 && !( $long_last_term || $long_first_term ) )
14180 foreach ( 0 .. $comma_count - 1 ) {
14181 set_forced_breakpoint( $$rcomma_index[$_] );
14184 elsif ($long_last_term) {
14186 set_forced_breakpoint($i_last_comma);
14187 $$rdo_not_break_apart = 1 unless $must_break_open;
14189 elsif ($long_first_term) {
14191 set_forced_breakpoint($i_first_comma);
14195 # let breaks be defined by default bond strength logic
14200 # --------------------------------------------------------
14201 # We have a tentative field count that seems to work.
14202 # How many lines will this require?
14203 # --------------------------------------------------------
14204 my $formatted_lines = $item_count / ($number_of_fields);
14205 if ( $formatted_lines != int $formatted_lines ) {
14206 $formatted_lines = 1 + int $formatted_lines;
14209 # So far we've been trying to fill out to the right margin. But
14210 # compact tables are easier to read, so let's see if we can use fewer
14211 # fields without increasing the number of lines.
14212 $number_of_fields =
14213 compactify_table( $item_count, $number_of_fields, $formatted_lines,
14216 # How many spaces across the page will we fill?
14217 my $columns_per_line =
14218 ( int $number_of_fields / 2 ) * $pair_width +
14219 ( $number_of_fields % 2 ) * $max_width;
14221 my $formatted_columns;
14223 if ( $number_of_fields > 1 ) {
14224 $formatted_columns =
14225 ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) *
14229 $formatted_columns = $max_width * $item_count;
14231 if ( $formatted_columns < $packed_columns ) {
14232 $formatted_columns = $packed_columns;
14235 my $unused_columns = $formatted_columns - $packed_columns;
14237 # set some empirical parameters to help decide if we should try to
14238 # align; high sparsity does not look good, especially with few lines
14239 my $sparsity = ($unused_columns) / ($formatted_columns);
14240 my $max_allowed_sparsity =
14241 ( $item_count < 3 ) ? 0.1
14242 : ( $packed_lines == 1 ) ? 0.15
14243 : ( $packed_lines == 2 ) ? 0.4
14246 # Begin check for shortcut methods, which avoid treating a list
14247 # as a table for relatively small parenthesized lists. These
14248 # are usually easier to read if not formatted as tables.
14250 $packed_lines <= 2 # probably can fit in 2 lines
14251 && $item_count < 9 # doesn't have too many items
14252 && $opening_environment eq 'BLOCK' # not a sub-container
14253 && $opening_token eq '(' # is paren list
14257 # Shortcut method 1: for -lp and just one comma:
14258 # This is a no-brainer, just break at the comma.
14260 $rOpts_line_up_parentheses # -lp
14261 && $item_count == 2 # two items, one comma
14262 && !$must_break_open
14265 my $i_break = $$rcomma_index[0];
14266 set_forced_breakpoint($i_break);
14267 $$rdo_not_break_apart = 1;
14268 set_non_alignment_flags( $comma_count, $rcomma_index );
14273 # method 2 is for most small ragged lists which might look
14274 # best if not displayed as a table.
14276 ( $number_of_fields == 2 && $item_count == 3 )
14278 $new_identifier_count > 0 # isn't all quotes
14279 && $sparsity > 0.15
14280 ) # would be fairly spaced gaps if aligned
14285 set_ragged_breakpoints( \@i_term_comma,
14286 $ri_ragged_break_list );
14287 ++$break_count if ($use_separate_first_term);
14289 # NOTE: we should really use the true break count here,
14290 # which can be greater if there are large terms and
14291 # little space, but usually this will work well enough.
14292 unless ($must_break_open) {
14294 if ( $break_count <= 1 ) {
14295 $$rdo_not_break_apart = 1;
14297 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14299 $$rdo_not_break_apart = 1;
14302 set_non_alignment_flags( $comma_count, $rcomma_index );
14306 } # end shortcut methods
14310 FORMATTER_DEBUG_FLAG_SPARSE && do {
14312 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
14316 #---------------------------------------------------------------
14317 # Compound List Rule 2:
14318 # If this list is too long for one line, and it is an item of a
14319 # larger list, then we must format it, regardless of sparsity
14320 # (ian.t). One reason that we have to do this is to trigger
14321 # Compound List Rule 1, above, which causes breaks at all commas of
14322 # all outer lists. In this way, the structure will be properly
14324 #---------------------------------------------------------------
14326 # Decide if this list is too long for one line unless broken
14327 my $total_columns = table_columns_available($i_opening_paren);
14328 my $too_long = $packed_columns > $total_columns;
14330 # For a paren list, include the length of the token just before the
14331 # '(' because this is likely a sub call, and we would have to
14332 # include the sub name on the same line as the list. This is still
14333 # imprecise, but not too bad. (steve.t)
14334 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
14337 excess_line_length( $i_opening_minus,
14338 $i_effective_last_comma + 1 ) > 0;
14341 # FIXME: For an item after a '=>', try to include the length of the
14342 # thing before the '=>'. This is crude and should be improved by
14343 # actually looking back token by token.
14344 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
14345 my $i_opening_minus = $i_opening_paren - 4;
14346 if ( $i_opening_minus >= 0 ) {
14348 excess_line_length( $i_opening_minus,
14349 $i_effective_last_comma + 1 ) > 0;
14353 # Always break lists contained in '[' and '{' if too long for 1 line,
14354 # and always break lists which are too long and part of a more complex
14356 my $must_break_open_container = $must_break_open
14358 && ( $in_hierarchical_list || $opening_token ne '(' ) );
14360 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
14362 #---------------------------------------------------------------
14363 # The main decision:
14364 # Now decide if we will align the data into aligned columns. Do not
14365 # attempt to align columns if this is a tiny table or it would be
14366 # too spaced. It seems that the more packed lines we have, the
14367 # sparser the list that can be allowed and still look ok.
14368 #---------------------------------------------------------------
14370 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
14371 || ( $formatted_lines < 2 )
14372 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
14376 #---------------------------------------------------------------
14377 # too sparse: would look ugly if aligned in a table;
14378 #---------------------------------------------------------------
14380 # use old breakpoints if this is a 'big' list
14381 # FIXME: goal is to improve set_ragged_breakpoints so that
14382 # this is not necessary.
14383 if ( $packed_lines > 2 && $item_count > 10 ) {
14384 write_logfile_entry("List sparse: using old breakpoints\n");
14385 copy_old_breakpoints( $i_first_comma, $i_last_comma );
14388 # let the continuation logic handle it if 2 lines
14392 set_ragged_breakpoints( \@i_term_comma,
14393 $ri_ragged_break_list );
14394 ++$break_count if ($use_separate_first_term);
14396 unless ($must_break_open_container) {
14397 if ( $break_count <= 1 ) {
14398 $$rdo_not_break_apart = 1;
14400 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14402 $$rdo_not_break_apart = 1;
14405 set_non_alignment_flags( $comma_count, $rcomma_index );
14410 #---------------------------------------------------------------
14411 # go ahead and format as a table
14412 #---------------------------------------------------------------
14413 write_logfile_entry(
14414 "List: auto formatting with $number_of_fields fields/row\n");
14416 my $j_first_break =
14417 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
14420 my $j = $j_first_break ;
14421 $j < $comma_count ;
14422 $j += $number_of_fields
14425 my $i = $$rcomma_index[$j];
14426 set_forced_breakpoint($i);
14432 sub set_non_alignment_flags {
14434 # set flag which indicates that these commas should not be
14436 my ( $comma_count, $rcomma_index ) = @_;
14437 foreach ( 0 .. $comma_count - 1 ) {
14438 $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
14442 sub study_list_complexity {
14444 # Look for complex tables which should be formatted with one term per line.
14445 # Returns the following:
14447 # \@i_ragged_break_list = list of good breakpoints to avoid lines
14448 # which are hard to read
14449 # $number_of_fields_best = suggested number of fields based on
14450 # complexity; = 0 if any number may be used.
14452 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
14453 my $item_count = @{$ri_term_begin};
14454 my $complex_item_count = 0;
14455 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
14456 my $i_max = @{$ritem_lengths} - 1;
14457 ##my @item_complexity;
14459 my $i_last_last_break = -3;
14460 my $i_last_break = -2;
14461 my @i_ragged_break_list;
14463 my $definitely_complex = 30;
14464 my $definitely_simple = 12;
14465 my $quote_count = 0;
14467 for my $i ( 0 .. $i_max ) {
14468 my $ib = $ri_term_begin->[$i];
14469 my $ie = $ri_term_end->[$i];
14471 # define complexity: start with the actual term length
14472 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
14474 ##TBD: join types here and check for variations
14475 ##my $str=join "", @tokens_to_go[$ib..$ie];
14478 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
14482 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
14486 if ( $ib eq $ie ) {
14487 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
14488 $complex_item_count++;
14489 $weighted_length *= 2;
14495 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
14496 $complex_item_count++;
14497 $weighted_length *= 2;
14499 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
14500 $weighted_length += 4;
14504 # add weight for extra tokens.
14505 $weighted_length += 2 * ( $ie - $ib );
14507 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
14508 ## print "# COMPLEXITY:$weighted_length $BUB\n";
14510 ##push @item_complexity, $weighted_length;
14512 # now mark a ragged break after this item it if it is 'long and
14514 if ( $weighted_length >= $definitely_complex ) {
14516 # if we broke after the previous term
14517 # then break before it too
14518 if ( $i_last_break == $i - 1
14520 && $i_last_last_break != $i - 2 )
14523 ## FIXME: don't strand a small term
14524 pop @i_ragged_break_list;
14525 push @i_ragged_break_list, $i - 2;
14526 push @i_ragged_break_list, $i - 1;
14529 push @i_ragged_break_list, $i;
14530 $i_last_last_break = $i_last_break;
14531 $i_last_break = $i;
14534 # don't break before a small last term -- it will
14535 # not look good on a line by itself.
14536 elsif ($i == $i_max
14537 && $i_last_break == $i - 1
14538 && $weighted_length <= $definitely_simple )
14540 pop @i_ragged_break_list;
14544 my $identifier_count = $i_max + 1 - $quote_count;
14546 # Need more tuning here..
14547 if ( $max_width > 12
14548 && $complex_item_count > $item_count / 2
14549 && $number_of_fields_best != 2 )
14551 $number_of_fields_best = 1;
14554 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
14557 sub get_maximum_fields_wanted {
14559 # Not all tables look good with more than one field of items.
14560 # This routine looks at a table and decides if it should be
14561 # formatted with just one field or not.
14562 # This coding is still under development.
14563 my ($ritem_lengths) = @_;
14565 my $number_of_fields_best = 0;
14567 # For just a few items, we tentatively assume just 1 field.
14568 my $item_count = @{$ritem_lengths};
14569 if ( $item_count <= 5 ) {
14570 $number_of_fields_best = 1;
14573 # For larger tables, look at it both ways and see what looks best
14577 my @max_length = ( 0, 0 );
14578 my @last_length_2 = ( undef, undef );
14579 my @first_length_2 = ( undef, undef );
14580 my $last_length = undef;
14581 my $total_variation_1 = 0;
14582 my $total_variation_2 = 0;
14583 my @total_variation_2 = ( 0, 0 );
14584 for ( my $j = 0 ; $j < $item_count ; $j++ ) {
14586 $is_odd = 1 - $is_odd;
14587 my $length = $ritem_lengths->[$j];
14588 if ( $length > $max_length[$is_odd] ) {
14589 $max_length[$is_odd] = $length;
14592 if ( defined($last_length) ) {
14593 my $dl = abs( $length - $last_length );
14594 $total_variation_1 += $dl;
14596 $last_length = $length;
14598 my $ll = $last_length_2[$is_odd];
14599 if ( defined($ll) ) {
14600 my $dl = abs( $length - $ll );
14601 $total_variation_2[$is_odd] += $dl;
14604 $first_length_2[$is_odd] = $length;
14606 $last_length_2[$is_odd] = $length;
14608 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
14610 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
14611 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
14612 $number_of_fields_best = 1;
14615 return ($number_of_fields_best);
14618 sub table_columns_available {
14619 my $i_first_comma = shift;
14621 $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
14623 # Patch: the vertical formatter does not line up lines whose lengths
14624 # exactly equal the available line length because of allowances
14625 # that must be made for side comments. Therefore, the number of
14626 # available columns is reduced by 1 character.
14631 sub maximum_number_of_fields {
14633 # how many fields will fit in the available space?
14634 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
14635 my $max_pairs = int( $columns / $pair_width );
14636 my $number_of_fields = $max_pairs * 2;
14637 if ( $odd_or_even == 1
14638 && $max_pairs * $pair_width + $max_width <= $columns )
14640 $number_of_fields++;
14642 return $number_of_fields;
14645 sub compactify_table {
14647 # given a table with a certain number of fields and a certain number
14648 # of lines, see if reducing the number of fields will make it look
14650 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
14651 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
14655 $min_fields = $number_of_fields ;
14656 $min_fields >= $odd_or_even
14657 && $min_fields * $formatted_lines >= $item_count ;
14658 $min_fields -= $odd_or_even
14661 $number_of_fields = $min_fields;
14664 return $number_of_fields;
14667 sub set_ragged_breakpoints {
14669 # Set breakpoints in a list that cannot be formatted nicely as a
14671 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
14673 my $break_count = 0;
14674 foreach (@$ri_ragged_break_list) {
14675 my $j = $ri_term_comma->[$_];
14677 set_forced_breakpoint($j);
14681 return $break_count;
14684 sub copy_old_breakpoints {
14685 my ( $i_first_comma, $i_last_comma ) = @_;
14686 for my $i ( $i_first_comma .. $i_last_comma ) {
14687 if ( $old_breakpoint_to_go[$i] ) {
14688 set_forced_breakpoint($i);
14694 my ( $i, $j ) = @_;
14695 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
14697 FORMATTER_DEBUG_FLAG_NOBREAK && do {
14698 my ( $a, $b, $c ) = caller();
14700 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
14704 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
14707 # shouldn't happen; non-critical error
14709 FORMATTER_DEBUG_FLAG_NOBREAK && do {
14710 my ( $a, $b, $c ) = caller();
14712 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
14718 sub set_fake_breakpoint {
14720 # Just bump up the breakpoint count as a signal that there are breaks.
14721 # This is useful if we have breaks but may want to postpone deciding where
14723 $forced_breakpoint_count++;
14726 sub set_forced_breakpoint {
14729 return unless defined $i && $i >= 0;
14731 # when called with certain tokens, use bond strengths to decide
14732 # if we break before or after it
14733 my $token = $tokens_to_go[$i];
14735 if ( $token =~ /^([\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
14736 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
14739 # breaks are forced before 'if' and 'unless'
14740 elsif ( $is_if_unless{$token} ) { $i-- }
14742 if ( $i >= 0 && $i <= $max_index_to_go ) {
14743 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
14745 FORMATTER_DEBUG_FLAG_FORCE && do {
14746 my ( $a, $b, $c ) = caller();
14748 "FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
14751 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
14752 $forced_breakpoint_to_go[$i_nonblank] = 1;
14754 if ( $i_nonblank > $index_max_forced_break ) {
14755 $index_max_forced_break = $i_nonblank;
14757 $forced_breakpoint_count++;
14758 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
14761 # if we break at an opening container..break at the closing
14762 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
14763 set_closing_breakpoint($i_nonblank);
14769 sub clear_breakpoint_undo_stack {
14770 $forced_breakpoint_undo_count = 0;
14773 sub undo_forced_breakpoint_stack {
14775 my $i_start = shift;
14776 if ( $i_start < 0 ) {
14778 my ( $a, $b, $c ) = caller();
14780 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
14784 while ( $forced_breakpoint_undo_count > $i_start ) {
14786 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
14787 if ( $i >= 0 && $i <= $max_index_to_go ) {
14788 $forced_breakpoint_to_go[$i] = 0;
14789 $forced_breakpoint_count--;
14791 FORMATTER_DEBUG_FLAG_UNDOBP && do {
14792 my ( $a, $b, $c ) = caller();
14794 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
14799 # shouldn't happen, but not a critical error
14801 FORMATTER_DEBUG_FLAG_UNDOBP && do {
14802 my ( $a, $b, $c ) = caller();
14804 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
14811 sub recombine_breakpoints {
14813 # sub set_continuation_breaks is very liberal in setting line breaks
14814 # for long lines, always setting breaks at good breakpoints, even
14815 # when that creates small lines. Occasionally small line fragments
14816 # are produced which would look better if they were combined.
14817 # That's the task of this routine, recombine_breakpoints.
14818 my ( $ri_first, $ri_last ) = @_;
14819 my $more_to_do = 1;
14821 # We keep looping over all of the lines of this batch
14822 # until there are no more possible recombinations
14823 my $nmax_last = @$ri_last;
14824 while ($more_to_do) {
14828 my $nmax = @$ri_last - 1;
14830 # safety check for infinite loop
14831 unless ( $nmax < $nmax_last ) {
14833 # shouldn't happen because splice below decreases nmax on each pass:
14834 # but i get paranoid sometimes
14835 die "Program bug-infinite loop in recombine breakpoints\n";
14837 $nmax_last = $nmax;
14839 my $previous_outdentable_closing_paren;
14840 my $leading_amp_count = 0;
14841 my $this_line_is_semicolon_terminated;
14843 # loop over all remaining lines in this batch
14844 for $n ( 1 .. $nmax ) {
14846 #----------------------------------------------------------
14847 # If we join the current pair of lines,
14848 # line $n-1 will become the left part of the joined line
14849 # line $n will become the right part of the joined line
14851 # Here are Indexes of the endpoint tokens of the two lines:
14853 # ---left---- | ---right---
14854 # $if $imid | $imidr $il
14856 # We want to decide if we should join tokens $imid to $imidr
14858 # We will apply a number of ad-hoc tests to see if joining
14859 # here will look ok. The code will just issue a 'next'
14860 # command if the join doesn't look good. If we get through
14861 # the gauntlet of tests, the lines will be recombined.
14862 #----------------------------------------------------------
14863 my $if = $$ri_first[ $n - 1 ];
14864 my $il = $$ri_last[$n];
14865 my $imid = $$ri_last[ $n - 1 ];
14866 my $imidr = $$ri_first[$n];
14868 #my $depth_increase=( $nesting_depth_to_go[$imidr] -
14869 # $nesting_depth_to_go[$if] );
14871 ##print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n";
14873 # If line $n is the last line, we set some flags and
14874 # do any special checks for it
14875 if ( $n == $nmax ) {
14877 # a terminal '{' should stay where it is
14878 next if $types_to_go[$imidr] eq '{';
14880 # set flag if statement $n ends in ';'
14881 $this_line_is_semicolon_terminated = $types_to_go[$il] eq ';'
14883 # with possible side comment
14884 || ( $types_to_go[$il] eq '#'
14885 && $il - $imidr >= 2
14886 && $types_to_go[ $il - 2 ] eq ';'
14887 && $types_to_go[ $il - 1 ] eq 'b' );
14890 #----------------------------------------------------------
14891 # Section 1: examine token at $imid (right end of first line
14893 #----------------------------------------------------------
14895 # an isolated '}' may join with a ';' terminated segment
14896 if ( $types_to_go[$imid] eq '}' ) {
14898 # Check for cases where combining a semicolon terminated
14899 # statement with a previous isolated closing paren will
14900 # allow the combined line to be outdented. This is
14901 # generally a good move. For example, we can join up
14902 # the last two lines here:
14904 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
14905 # $size, $atime, $mtime, $ctime, $blksize, $blocks
14911 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
14912 # $size, $atime, $mtime, $ctime, $blksize, $blocks
14915 # which makes the parens line up.
14917 # Another example, from Joe Matarazzo, probably looks best
14918 # with the 'or' clause appended to the trailing paren:
14919 # $self->some_method(
14922 # ) or die "Some_method didn't work";
14924 $previous_outdentable_closing_paren =
14925 $this_line_is_semicolon_terminated # ends in ';'
14926 && $if == $imid # only one token on last line
14927 && $tokens_to_go[$imid] eq ')' # must be structural paren
14929 # only &&, ||, and : if no others seen
14930 # (but note: our count made below could be wrong
14931 # due to intervening comments)
14932 && ( $leading_amp_count == 0
14933 || $types_to_go[$imidr] !~ /^(:|\&\&|\|\|)$/ )
14935 # but leading colons probably line up with with a
14936 # previous colon or question (count could be wrong).
14937 && $types_to_go[$imidr] ne ':'
14939 # only one step in depth allowed. this line must not
14940 # begin with a ')' itself.
14941 && ( $nesting_depth_to_go[$imid] ==
14942 $nesting_depth_to_go[$il] + 1 );
14946 $previous_outdentable_closing_paren
14948 # handle '.' and '?' specially below
14949 || ( $types_to_go[$imidr] =~ /^[\.\?]$/ )
14953 # do not recombine lines with ending &&, ||, or :
14954 elsif ( $types_to_go[$imid] =~ /^(|:|\&\&|\|\|)$/ ) {
14955 next unless $want_break_before{ $types_to_go[$imid] };
14958 # for lines ending in a comma...
14959 elsif ( $types_to_go[$imid] eq ',' ) {
14961 # an isolated '},' may join with an identifier + ';'
14962 # this is useful for the class of a 'bless' statement (bless.t)
14963 if ( $types_to_go[$if] eq '}'
14964 && $types_to_go[$imidr] eq 'i' )
14967 unless ( ( $if == ( $imid - 1 ) )
14968 && ( $il == ( $imidr + 1 ) )
14969 && $this_line_is_semicolon_terminated );
14971 # override breakpoint
14972 $forced_breakpoint_to_go[$imid] = 0;
14975 # but otherwise, do not recombine unless this will leave
14978 next unless ( $n + 1 >= $nmax );
14983 elsif ( $types_to_go[$imid] eq '(' ) {
14985 # No longer doing this
14988 elsif ( $types_to_go[$imid] eq ')' ) {
14990 # No longer doing this
14993 # keep a terminal colon
14994 elsif ( $types_to_go[$imid] eq ':' ) {
14998 # keep a terminal for-semicolon
14999 elsif ( $types_to_go[$imid] eq 'f' ) {
15003 # if '=' at end of line ...
15004 elsif ( $is_assignment{ $types_to_go[$imid] } ) {
15006 # otherwise always ok to join isolated '='
15007 unless ( $if == $imid ) {
15010 ( $types_to_go[$il] =~ /^[+-\/\*\)]$/ )
15012 # note no '$' in pattern because -> can
15013 # start long identifier
15014 && !grep { $_ =~ /^(->|=>|[\,])/ }
15015 @types_to_go[ $imidr .. $il ]
15018 # retain the break after the '=' unless ...
15022 # '=' is followed by a number and looks like math
15023 ( $types_to_go[$imidr] eq 'n' && $is_math )
15025 # or followed by a scalar and looks like math
15026 || ( ( $types_to_go[$imidr] eq 'i' )
15027 && ( $tokens_to_go[$imidr] =~ /^\$/ )
15030 # or followed by a single "short" token
15031 # ('12' is arbitrary)
15033 && token_sequence_length( $imidr, $imidr ) < 12 )
15037 unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) {
15038 $forced_breakpoint_to_go[$imid] = 0;
15043 elsif ( $types_to_go[$imid] eq 'k' ) {
15045 # make major control keywords stand out
15050 #/^(last|next|redo|return)$/
15051 $is_last_next_redo_return{ $tokens_to_go[$imid] }
15054 if ( $is_and_or{ $tokens_to_go[$imid] } ) {
15055 next unless $want_break_before{ $tokens_to_go[$imid] };
15059 #----------------------------------------------------------
15060 # Section 2: Now examine token at $imidr (left end of second
15062 #----------------------------------------------------------
15064 # join lines identified above as capable of
15065 # causing an outdented line with leading closing paren
15066 if ($previous_outdentable_closing_paren) {
15067 $forced_breakpoint_to_go[$imid] = 0;
15070 # do not recombine lines with leading &&, ||, or :
15071 elsif ( $types_to_go[$imidr] =~ /^(:|\&\&|\|\|)$/ ) {
15072 $leading_amp_count++;
15073 next if $want_break_before{ $types_to_go[$imidr] };
15076 # Identify and recombine a broken ?/: chain
15077 elsif ( $types_to_go[$imidr] eq '?' ) {
15079 # indexes of line first tokens --
15080 # mm - line before previous line
15081 # f - previous line
15084 # fff - line after next
15085 my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1;
15086 my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
15087 my $imm = $n > 1 ? $$ri_first[ $n - 2 ] : -1;
15088 my $seqno = $type_sequence_to_go[$imidr];
15090 ( $types_to_go[$if] eq ':'
15091 && $type_sequence_to_go[$if] ==
15092 $seqno - TYPE_SEQUENCE_INCREMENT );
15095 && $types_to_go[$imm] eq ':'
15096 && $type_sequence_to_go[$imm] ==
15097 $seqno - 2 * TYPE_SEQUENCE_INCREMENT );
15101 && $types_to_go[$iff] eq ':'
15102 && $type_sequence_to_go[$iff] == $seqno );
15105 && $types_to_go[$ifff] eq ':'
15106 && $type_sequence_to_go[$ifff] ==
15107 $seqno + TYPE_SEQUENCE_INCREMENT );
15109 # we require that this '?' be part of a correct sequence
15110 # of 3 in a row or else no recombination is done.
15112 unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) );
15113 $forced_breakpoint_to_go[$imid] = 0;
15116 # do not recombine lines with leading '.'
15117 elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) {
15118 my $i_next_nonblank = $imidr + 1;
15119 if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
15120 $i_next_nonblank++;
15126 # ... unless there is just one and we can reduce
15127 # this to two lines if we do. For example, this
15131 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
15133 # looks better than this:
15134 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
15135 # . '$args .= $pat;'
15140 && $types_to_go[$if] ne $types_to_go[$imidr]
15143 # ... or this would strand a short quote , like this
15144 # . "some long qoute"
15147 || ( $types_to_go[$i_next_nonblank] eq 'Q'
15148 && $i_next_nonblank >= $il - 1
15149 && length( $tokens_to_go[$i_next_nonblank] ) <
15150 $rOpts_short_concatenation_item_length )
15154 # handle leading keyword..
15155 elsif ( $types_to_go[$imidr] eq 'k' ) {
15157 # handle leading "and" and "or"
15158 if ( $is_and_or{ $tokens_to_go[$imidr] } ) {
15160 # Decide if we will combine a single terminal 'and' and
15161 # 'or' after an 'if' or 'unless'. We should consider the
15162 # possible vertical alignment, and visual clutter.
15164 # This looks best with the 'and' on the same
15165 # line as the 'if':
15168 # if $seconds and $nu < 2;
15170 # But this looks better as shown:
15173 # if !$this->{Parents}{$_}
15174 # or $this->{Parents}{$_} eq $_;
15176 # Eventually, it would be nice to look for
15177 # similarities (such as 'this' or 'Parents'), but
15178 # for now I'm using a simple rule that says that
15179 # the resulting line length must not be more than
15180 # half the maximum line length (making it 80/2 =
15181 # 40 characters by default).
15184 $this_line_is_semicolon_terminated
15187 # following 'if' or 'unless'
15188 $types_to_go[$if] eq 'k'
15189 && $is_if_unless{ $tokens_to_go[$if] }
15194 # override breakpoint
15195 ##$forced_breakpoint_to_go[$imid] = 0;
15198 # handle leading "if" and "unless"
15199 elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) {
15201 # FIXME: This is still experimental..may not be too useful
15204 $this_line_is_semicolon_terminated
15206 # previous line begins with 'and' or 'or'
15207 && $types_to_go[$if] eq 'k'
15208 && $is_and_or{ $tokens_to_go[$if] }
15212 # override breakpoint
15213 ##$forced_breakpoint_to_go[$imid] = 0;
15217 # handle all other leading keywords
15220 # keywords look best at start of lines,
15221 # but combine things like "1 while"
15222 unless ( $is_assignment{ $types_to_go[$imid] } ) {
15224 if ( ( $types_to_go[$imid] ne 'k' )
15225 && ( $tokens_to_go[$imidr] ne 'while' ) );
15230 # similar treatment of && and || as above for 'and' and 'or':
15231 # NOTE: This block of code is currently bypassed because
15232 # of a previous block but is retained for possible future use.
15233 elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) {
15235 # maybe looking at something like:
15236 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
15240 $this_line_is_semicolon_terminated
15242 # previous line begins with an 'if' or 'unless' keyword
15243 && $types_to_go[$if] eq 'k'
15244 && $is_if_unless{ $tokens_to_go[$if] }
15248 # override breakpoint
15249 ##$forced_breakpoint_to_go[$imid] = 0;
15252 #----------------------------------------------------------
15254 # Combine the lines if we arrive here and it is possible
15255 #----------------------------------------------------------
15257 # honor hard breakpoints
15258 next if ( $forced_breakpoint_to_go[$imid] > 0 );
15260 my $bs = $bond_strength_to_go[$imid];
15262 # combined line cannot be too long
15264 if excess_line_length( $if, $il ) > 0;
15266 # do not recombine if we would skip in indentation levels
15267 if ( $n < $nmax ) {
15268 my $if_next = $$ri_first[ $n + 1 ];
15271 $levels_to_go[$if] < $levels_to_go[$imidr]
15272 && $levels_to_go[$imidr] < $levels_to_go[$if_next]
15274 # but an isolated 'if (' is undesirable
15277 && $imid - $if <= 2
15278 && $types_to_go[$if] eq 'k'
15279 && $tokens_to_go[$if] eq 'if'
15280 && $tokens_to_go[$imid] ne '('
15286 next if ( $bs == NO_BREAK );
15288 # remember the pair with the greatest bond strength
15295 if ( $bs > $bs_best ) {
15300 # we have 2 or more candidates, so need another pass
15305 # recombine the pair with the greatest bond strength
15307 splice @$ri_first, $n_best, 1;
15308 splice @$ri_last, $n_best - 1, 1;
15311 return ( $ri_first, $ri_last );
15314 sub set_continuation_breaks {
15316 # Define an array of indexes for inserting newline characters to
15317 # keep the line lengths below the maximum desired length. There is
15318 # an implied break after the last token, so it need not be included.
15319 # We'll break at points where the bond strength is lowest.
15321 my $saw_good_break = shift;
15322 my @i_first = (); # the first index to output
15323 my @i_last = (); # the last index to output
15324 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
15325 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
15327 set_bond_strengths();
15330 my $imax = $max_index_to_go;
15331 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
15332 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
15333 my $i_begin = $imin;
15335 my $leading_spaces = leading_spaces_to_go($imin);
15336 my $line_count = 0;
15337 my $last_break_strength = NO_BREAK;
15338 my $i_last_break = -1;
15339 my $max_bias = 0.001;
15340 my $tiny_bias = 0.0001;
15341 my $leading_alignment_token = "";
15342 my $leading_alignment_type = "";
15344 # see if any ?/:'s are in order
15345 my $colons_in_order = 1;
15347 my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
15348 foreach (@colon_list) {
15349 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
15353 # This is a sufficient but not necessary condition for colon chain
15354 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
15356 while ( $i_begin <= $imax ) {
15357 my $lowest_strength = NO_BREAK;
15358 my $starting_sum = $lengths_to_go[$i_begin];
15361 my $lowest_next_token = '';
15362 my $lowest_next_type = 'b';
15363 my $i_lowest_next_nonblank = -1;
15365 # loop to find next break point
15366 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
15367 my $type = $types_to_go[$i_test];
15368 my $token = $tokens_to_go[$i_test];
15369 my $next_type = $types_to_go[ $i_test + 1 ];
15370 my $next_token = $tokens_to_go[ $i_test + 1 ];
15371 my $i_next_nonblank =
15372 ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
15373 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
15374 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15375 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
15376 my $strength = $bond_strength_to_go[$i_test];
15377 my $must_break = 0;
15379 # FIXME: TESTING: Might want to be able to break after these
15380 # force an immediate break at certain operators
15381 # with lower level than the start of the line
15384 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
15385 || ( $next_nonblank_type eq 'k'
15386 && $next_nonblank_token =~ /^(and|or)$/ )
15388 && ( $nesting_depth_to_go[$i_begin] >
15389 $nesting_depth_to_go[$i_next_nonblank] )
15392 set_forced_breakpoint($i_next_nonblank);
15397 # Try to put a break where requested by scan_list
15398 $forced_breakpoint_to_go[$i_test]
15400 # break between ) { in a continued line so that the '{' can
15402 # See similar logic in scan_list which catches instances
15403 # where a line is just something like ') {'
15405 && ( $token eq ')' )
15406 && ( $next_nonblank_type eq '{' )
15407 && ($next_nonblank_block_type)
15408 && !$rOpts->{'opening-brace-always-on-right'} )
15410 # There is an implied forced break at a terminal opening brace
15411 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
15416 # Forced breakpoints must sometimes be overridden, for example
15417 # because of a side comment causing a NO_BREAK. It is easier
15418 # to catch this here than when they are set.
15419 if ( $strength < NO_BREAK ) {
15420 $strength = $lowest_strength - $tiny_bias;
15425 # quit if a break here would put a good terminal token on
15426 # the next line and we already have a possible break
15429 && ( $next_nonblank_type =~ /^[\;\,]$/ )
15432 $leading_spaces + $lengths_to_go[ $i_next_nonblank + 1 ]
15434 ) > $rOpts_maximum_line_length
15438 last if ( $i_lowest >= 0 );
15441 # Avoid a break which would strand a single punctuation
15442 # token. For example, we do not want to strand a leading
15443 # '.' which is followed by a long quoted string.
15446 && ( $i_test == $i_begin )
15447 && ( $i_test < $imax )
15448 && ( $token eq $type )
15451 $leading_spaces + $lengths_to_go[ $i_test + 1 ] -
15453 ) <= $rOpts_maximum_line_length
15459 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
15465 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
15468 # break at previous best break if it would have produced
15469 # a leading alignment of certain common tokens, and it
15470 # is different from the latest candidate break
15472 if ($leading_alignment_type);
15474 # Force at least one breakpoint if old code had good
15475 # break It is only called if a breakpoint is required or
15476 # desired. This will probably need some adjustments
15477 # over time. A goal is to try to be sure that, if a new
15478 # side comment is introduced into formated text, then
15479 # the same breakpoints will occur. scbreak.t
15482 $i_test == $imax # we are at the end
15483 && !$forced_breakpoint_count #
15484 && $saw_good_break # old line had good break
15485 && $type =~ /^[#;\{]$/ # and this line ends in
15486 # ';' or side comment
15487 && $i_last_break < 0 # and we haven't made a break
15488 && $i_lowest > 0 # and we saw a possible break
15489 && $i_lowest < $imax - 1 # (but not just before this ;)
15490 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
15493 $lowest_strength = $strength;
15494 $i_lowest = $i_test;
15495 $lowest_next_token = $next_nonblank_token;
15496 $lowest_next_type = $next_nonblank_type;
15497 $i_lowest_next_nonblank = $i_next_nonblank;
15498 last if $must_break;
15500 # set flags to remember if a break here will produce a
15501 # leading alignment of certain common tokens
15502 if ( $line_count > 0
15504 && ( $lowest_strength - $last_break_strength <= $max_bias )
15507 my $i_last_end = $i_begin - 1;
15508 if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
15509 my $tok_beg = $tokens_to_go[$i_begin];
15510 my $type_beg = $types_to_go[$i_begin];
15513 # check for leading alignment of certain tokens
15515 $tok_beg eq $next_nonblank_token
15516 && $is_chain_operator{$tok_beg}
15517 && ( $type_beg eq 'k'
15518 || $type_beg eq $tok_beg )
15519 && $nesting_depth_to_go[$i_begin] >=
15520 $nesting_depth_to_go[$i_next_nonblank]
15523 || ( $tokens_to_go[$i_last_end] eq $token
15524 && $is_chain_operator{$token}
15525 && ( $type eq 'k' || $type eq $token )
15526 && $nesting_depth_to_go[$i_last_end] >=
15527 $nesting_depth_to_go[$i_test] )
15530 $leading_alignment_token = $next_nonblank_token;
15531 $leading_alignment_type = $next_nonblank_type;
15537 ( $i_test >= $imax )
15541 $leading_spaces + $lengths_to_go[ $i_test + 2 ] -
15543 ) > $rOpts_maximum_line_length
15546 FORMATTER_DEBUG_FLAG_BREAK
15548 "BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
15550 # allow one extra terminal token after exceeding line length
15551 # if it would strand this token.
15552 if ( $rOpts_fuzzy_line_length
15554 && ( $i_lowest == $i_test )
15555 && ( length($token) > 1 )
15556 && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
15563 ( $i_test == $imax ) # we're done if no more tokens,
15565 ( $i_lowest >= 0 ) # or no more space and we have a break
15571 # it's always ok to break at imax if no other break was found
15572 if ( $i_lowest < 0 ) { $i_lowest = $imax }
15574 # semi-final index calculation
15575 my $i_next_nonblank = (
15576 ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
15580 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
15581 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15583 #-------------------------------------------------------
15584 # ?/: rule 1 : if a break here will separate a '?' on this
15585 # line from its closing ':', then break at the '?' instead.
15586 #-------------------------------------------------------
15588 foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
15589 next unless ( $tokens_to_go[$i] eq '?' );
15591 # do not break if probable sequence of ?/: statements
15592 next if ($is_colon_chain);
15594 # do not break if statement is broken by side comment
15597 $tokens_to_go[$max_index_to_go] eq '#'
15598 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
15599 $max_index_to_go ) !~ /^[\;\}]$/
15602 # no break needed if matching : is also on the line
15604 if ( $mate_index_to_go[$i] >= 0
15605 && $mate_index_to_go[$i] <= $i_next_nonblank );
15608 if ( $want_break_before{'?'} ) { $i_lowest-- }
15612 # final index calculation
15613 $i_next_nonblank = (
15614 ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
15618 $next_nonblank_type = $types_to_go[$i_next_nonblank];
15619 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15621 FORMATTER_DEBUG_FLAG_BREAK
15622 && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
15624 #-------------------------------------------------------
15625 # ?/: rule 2 : if we break at a '?', then break at its ':'
15627 # Note: this rule is also in sub scan_list to handle a break
15628 # at the start and end of a line (in case breaks are dictated
15629 # by side comments).
15630 #-------------------------------------------------------
15631 if ( $next_nonblank_type eq '?' ) {
15632 set_closing_breakpoint($i_next_nonblank);
15634 elsif ( $types_to_go[$i_lowest] eq '?' ) {
15635 set_closing_breakpoint($i_lowest);
15638 #-------------------------------------------------------
15639 # ?/: rule 3 : if we break at a ':' then we save
15640 # its location for further work below. We may need to go
15641 # back and break at its '?'.
15642 #-------------------------------------------------------
15643 if ( $next_nonblank_type eq ':' ) {
15644 push @i_colon_breaks, $i_next_nonblank;
15646 elsif ( $types_to_go[$i_lowest] eq ':' ) {
15647 push @i_colon_breaks, $i_lowest;
15650 # here we should set breaks for all '?'/':' pairs which are
15651 # separated by this line
15655 # save this line segment, after trimming blanks at the ends
15657 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
15659 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
15661 # set a forced breakpoint at a container opening, if necessary, to
15662 # signal a break at a closing container. Excepting '(' for now.
15663 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
15664 && !$forced_breakpoint_to_go[$i_lowest] )
15666 set_closing_breakpoint($i_lowest);
15669 # get ready to go again
15670 $i_begin = $i_lowest + 1;
15671 $last_break_strength = $lowest_strength;
15672 $i_last_break = $i_lowest;
15673 $leading_alignment_token = "";
15674 $leading_alignment_type = "";
15675 $lowest_next_token = '';
15676 $lowest_next_type = 'b';
15678 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
15682 # update indentation size
15683 if ( $i_begin <= $imax ) {
15684 $leading_spaces = leading_spaces_to_go($i_begin);
15688 #-------------------------------------------------------
15689 # ?/: rule 4 -- if we broke at a ':', then break at
15690 # corresponding '?' unless this is a chain of ?: expressions
15691 #-------------------------------------------------------
15692 if (@i_colon_breaks) {
15694 # using a simple method for deciding if we are in a ?/: chain --
15695 # this is a chain if it has multiple ?/: pairs all in order;
15697 # Note that if line starts in a ':' we count that above as a break
15698 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
15700 unless ($is_chain) {
15701 my @insert_list = ();
15702 foreach (@i_colon_breaks) {
15703 my $i_question = $mate_index_to_go[$_];
15704 if ( $i_question >= 0 ) {
15705 if ( $want_break_before{'?'} ) {
15707 if ( $i_question > 0
15708 && $types_to_go[$i_question] eq 'b' )
15714 if ( $i_question >= 0 ) {
15715 push @insert_list, $i_question;
15718 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
15722 return \@i_first, \@i_last;
15725 sub insert_additional_breaks {
15727 # this routine will add line breaks at requested locations after
15728 # sub set_continuation_breaks has made preliminary breaks.
15730 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
15733 my $line_number = 0;
15735 foreach $i_break_left ( sort @$ri_break_list ) {
15737 $i_f = $$ri_first[$line_number];
15738 $i_l = $$ri_last[$line_number];
15739 while ( $i_break_left >= $i_l ) {
15742 # shouldn't happen unless caller passes bad indexes
15743 if ( $line_number >= @$ri_last ) {
15745 "Non-fatal program bug: couldn't set break at $i_break_left\n"
15747 report_definite_bug();
15750 $i_f = $$ri_first[$line_number];
15751 $i_l = $$ri_last[$line_number];
15754 my $i_break_right = $i_break_left + 1;
15755 if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
15757 if ( $i_break_left >= $i_f
15758 && $i_break_left < $i_l
15759 && $i_break_right > $i_f
15760 && $i_break_right <= $i_l )
15762 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
15763 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
15768 sub set_closing_breakpoint {
15770 # set a breakpoint at a matching closing token
15771 # at present, this is only used to break at a ':' which matches a '?'
15772 my $i_break = shift;
15774 if ( $mate_index_to_go[$i_break] >= 0 ) {
15776 # CAUTION: infinite recursion possible here:
15777 # set_closing_breakpoint calls set_forced_breakpoint, and
15778 # set_forced_breakpoint call set_closing_breakpoint
15779 # ( test files attrib.t, BasicLyx.pm.html).
15780 # Don't reduce the '2' in the statement below
15781 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
15783 # break before } ] and ), but sub set_forced_breakpoint will decide
15784 # to break before or after a ? and :
15785 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
15786 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
15790 my $type_sequence = $type_sequence_to_go[$i_break];
15791 if ($type_sequence) {
15792 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
15793 $postponed_breakpoint{$type_sequence} = 1;
15798 # check to see if output line tabbing agrees with input line
15799 # this can be very useful for debugging a script which has an extra
15801 sub compare_indentation_levels {
15803 my ( $python_indentation_level, $structural_indentation_level ) = @_;
15804 if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
15805 $last_tabbing_disagreement = $input_line_number;
15807 if ($in_tabbing_disagreement) {
15810 $tabbing_disagreement_count++;
15812 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
15813 write_logfile_entry(
15814 "Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
15817 $in_tabbing_disagreement = $input_line_number;
15818 $first_tabbing_disagreement = $in_tabbing_disagreement
15819 unless ($first_tabbing_disagreement);
15824 if ($in_tabbing_disagreement) {
15826 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
15827 write_logfile_entry(
15828 "End indentation disagreement from input line $in_tabbing_disagreement\n"
15831 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
15832 write_logfile_entry(
15833 "No further tabbing disagreements will be noted\n");
15836 $in_tabbing_disagreement = 0;
15841 #####################################################################
15843 # the Perl::Tidy::IndentationItem class supplies items which contain
15844 # how much whitespace should be used at the start of a line
15846 #####################################################################
15848 package Perl::Tidy::IndentationItem;
15850 # Indexes for indentation items
15851 use constant SPACES => 0; # total leading white spaces
15852 use constant LEVEL => 1; # the indentation 'level'
15853 use constant CI_LEVEL => 2; # the 'continuation level'
15854 use constant AVAILABLE_SPACES => 3; # how many left spaces available
15856 use constant CLOSED => 4; # index where we saw closing '}'
15857 use constant COMMA_COUNT => 5; # how many commas at this level?
15858 use constant SEQUENCE_NUMBER => 6; # output batch number
15859 use constant INDEX => 7; # index in output batch list
15860 use constant HAVE_CHILD => 8; # any dependents?
15861 use constant RECOVERABLE_SPACES => 9; # how many spaces to the right
15862 # we would like to move to get
15863 # alignment (negative if left)
15864 use constant ALIGN_PAREN => 10; # do we want to try to align
15865 # with an opening structure?
15866 use constant MARKED => 11; # if visited by corrector logic
15867 use constant STACK_DEPTH => 12; # indentation nesting depth
15868 use constant STARTING_INDEX => 13; # first token index of this level
15869 use constant ARROW_COUNT => 14; # how many =>'s
15873 # Create an 'indentation_item' which describes one level of leading
15874 # whitespace when the '-lp' indentation is used. We return
15875 # a reference to an anonymous array of associated variables.
15876 # See above constants for storage scheme.
15878 $class, $spaces, $level,
15879 $ci_level, $available_spaces, $index,
15880 $gnu_sequence_number, $align_paren, $stack_depth,
15884 my $arrow_count = 0;
15885 my $comma_count = 0;
15886 my $have_child = 0;
15887 my $want_right_spaces = 0;
15890 $spaces, $level, $ci_level,
15891 $available_spaces, $closed, $comma_count,
15892 $gnu_sequence_number, $index, $have_child,
15893 $want_right_spaces, $align_paren, $marked,
15894 $stack_depth, $starting_index, $arrow_count,
15898 sub permanently_decrease_AVAILABLE_SPACES {
15900 # make a permanent reduction in the available indentation spaces
15901 # at one indentation item. NOTE: if there are child nodes, their
15902 # total SPACES must be reduced by the caller.
15904 my ( $item, $spaces_needed ) = @_;
15905 my $available_spaces = $item->get_AVAILABLE_SPACES();
15906 my $deleted_spaces =
15907 ( $available_spaces > $spaces_needed )
15909 : $available_spaces;
15910 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
15911 $item->decrease_SPACES($deleted_spaces);
15912 $item->set_RECOVERABLE_SPACES(0);
15914 return $deleted_spaces;
15917 sub tentatively_decrease_AVAILABLE_SPACES {
15919 # We are asked to tentatively delete $spaces_needed of indentation
15920 # for a indentation item. We may want to undo this later. NOTE: if
15921 # there are child nodes, their total SPACES must be reduced by the
15923 my ( $item, $spaces_needed ) = @_;
15924 my $available_spaces = $item->get_AVAILABLE_SPACES();
15925 my $deleted_spaces =
15926 ( $available_spaces > $spaces_needed )
15928 : $available_spaces;
15929 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
15930 $item->decrease_SPACES($deleted_spaces);
15931 $item->increase_RECOVERABLE_SPACES($deleted_spaces);
15932 return $deleted_spaces;
15935 sub get_STACK_DEPTH {
15937 return $self->[STACK_DEPTH];
15942 return $self->[SPACES];
15947 return $self->[MARKED];
15951 my ( $self, $value ) = @_;
15952 if ( defined($value) ) {
15953 $self->[MARKED] = $value;
15955 return $self->[MARKED];
15958 sub get_AVAILABLE_SPACES {
15960 return $self->[AVAILABLE_SPACES];
15963 sub decrease_SPACES {
15964 my ( $self, $value ) = @_;
15965 if ( defined($value) ) {
15966 $self->[SPACES] -= $value;
15968 return $self->[SPACES];
15971 sub decrease_AVAILABLE_SPACES {
15972 my ( $self, $value ) = @_;
15973 if ( defined($value) ) {
15974 $self->[AVAILABLE_SPACES] -= $value;
15976 return $self->[AVAILABLE_SPACES];
15979 sub get_ALIGN_PAREN {
15981 return $self->[ALIGN_PAREN];
15984 sub get_RECOVERABLE_SPACES {
15986 return $self->[RECOVERABLE_SPACES];
15989 sub set_RECOVERABLE_SPACES {
15990 my ( $self, $value ) = @_;
15991 if ( defined($value) ) {
15992 $self->[RECOVERABLE_SPACES] = $value;
15994 return $self->[RECOVERABLE_SPACES];
15997 sub increase_RECOVERABLE_SPACES {
15998 my ( $self, $value ) = @_;
15999 if ( defined($value) ) {
16000 $self->[RECOVERABLE_SPACES] += $value;
16002 return $self->[RECOVERABLE_SPACES];
16007 return $self->[CI_LEVEL];
16012 return $self->[LEVEL];
16015 sub get_SEQUENCE_NUMBER {
16017 return $self->[SEQUENCE_NUMBER];
16022 return $self->[INDEX];
16025 sub get_STARTING_INDEX {
16027 return $self->[STARTING_INDEX];
16030 sub set_HAVE_CHILD {
16031 my ( $self, $value ) = @_;
16032 if ( defined($value) ) {
16033 $self->[HAVE_CHILD] = $value;
16035 return $self->[HAVE_CHILD];
16038 sub get_HAVE_CHILD {
16040 return $self->[HAVE_CHILD];
16043 sub set_ARROW_COUNT {
16044 my ( $self, $value ) = @_;
16045 if ( defined($value) ) {
16046 $self->[ARROW_COUNT] = $value;
16048 return $self->[ARROW_COUNT];
16051 sub get_ARROW_COUNT {
16053 return $self->[ARROW_COUNT];
16056 sub set_COMMA_COUNT {
16057 my ( $self, $value ) = @_;
16058 if ( defined($value) ) {
16059 $self->[COMMA_COUNT] = $value;
16061 return $self->[COMMA_COUNT];
16064 sub get_COMMA_COUNT {
16066 return $self->[COMMA_COUNT];
16070 my ( $self, $value ) = @_;
16071 if ( defined($value) ) {
16072 $self->[CLOSED] = $value;
16074 return $self->[CLOSED];
16079 return $self->[CLOSED];
16082 #####################################################################
16084 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
16085 # contain a single output line
16087 #####################################################################
16089 package Perl::Tidy::VerticalAligner::Line;
16096 use constant JMAX => 0;
16097 use constant JMAX_ORIGINAL_LINE => 1;
16098 use constant RTOKENS => 2;
16099 use constant RFIELDS => 3;
16100 use constant RPATTERNS => 4;
16101 use constant INDENTATION => 5;
16102 use constant LEADING_SPACE_COUNT => 6;
16103 use constant OUTDENT_LONG_LINES => 7;
16104 use constant LIST_TYPE => 8;
16105 use constant IS_HANGING_SIDE_COMMENT => 9;
16106 use constant RALIGNMENTS => 10;
16107 use constant MAXIMUM_LINE_LENGTH => 11;
16108 use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
16111 $_index_map{jmax} = JMAX;
16112 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;
16113 $_index_map{rtokens} = RTOKENS;
16114 $_index_map{rfields} = RFIELDS;
16115 $_index_map{rpatterns} = RPATTERNS;
16116 $_index_map{indentation} = INDENTATION;
16117 $_index_map{leading_space_count} = LEADING_SPACE_COUNT;
16118 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;
16119 $_index_map{list_type} = LIST_TYPE;
16120 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
16121 $_index_map{ralignments} = RALIGNMENTS;
16122 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;
16123 $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
16125 my @_default_data = ();
16126 $_default_data[JMAX] = undef;
16127 $_default_data[JMAX_ORIGINAL_LINE] = undef;
16128 $_default_data[RTOKENS] = undef;
16129 $_default_data[RFIELDS] = undef;
16130 $_default_data[RPATTERNS] = undef;
16131 $_default_data[INDENTATION] = undef;
16132 $_default_data[LEADING_SPACE_COUNT] = undef;
16133 $_default_data[OUTDENT_LONG_LINES] = undef;
16134 $_default_data[LIST_TYPE] = undef;
16135 $_default_data[IS_HANGING_SIDE_COMMENT] = undef;
16136 $_default_data[RALIGNMENTS] = [];
16137 $_default_data[MAXIMUM_LINE_LENGTH] = undef;
16138 $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
16142 # methods to count object population
16144 sub get_count { $_count; }
16145 sub _increment_count { ++$_count }
16146 sub _decrement_count { --$_count }
16149 # Constructor may be called as a class method
16151 my ( $caller, %arg ) = @_;
16152 my $caller_is_obj = ref($caller);
16153 my $class = $caller_is_obj || $caller;
16155 my $self = bless [], $class;
16157 $self->[RALIGNMENTS] = [];
16160 foreach ( keys %_index_map ) {
16161 $index = $_index_map{$_};
16162 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
16163 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
16164 else { $self->[$index] = $_default_data[$index] }
16167 $self->_increment_count();
16172 $_[0]->_decrement_count();
16175 sub get_jmax { $_[0]->[JMAX] }
16176 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }
16177 sub get_rtokens { $_[0]->[RTOKENS] }
16178 sub get_rfields { $_[0]->[RFIELDS] }
16179 sub get_rpatterns { $_[0]->[RPATTERNS] }
16180 sub get_indentation { $_[0]->[INDENTATION] }
16181 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }
16182 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }
16183 sub get_list_type { $_[0]->[LIST_TYPE] }
16184 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
16185 sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
16187 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
16188 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
16189 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
16190 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
16192 sub get_starting_column {
16193 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
16196 sub increment_column {
16197 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
16199 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
16201 sub current_field_width {
16205 return $self->get_column($j);
16208 return $self->get_column($j) - $self->get_column( $j - 1 );
16212 sub field_width_growth {
16215 return $self->get_column($j) - $self->get_starting_column($j);
16218 sub starting_field_width {
16222 return $self->get_starting_column($j);
16225 return $self->get_starting_column($j) -
16226 $self->get_starting_column( $j - 1 );
16230 sub increase_field_width {
16233 my ( $j, $pad ) = @_;
16234 my $jmax = $self->get_jmax();
16235 for my $k ( $j .. $jmax ) {
16236 $self->increment_column( $k, $pad );
16240 sub get_available_space_on_right {
16242 my $jmax = $self->get_jmax();
16243 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
16246 sub set_jmax { $_[0]->[JMAX] = $_[1] }
16247 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }
16248 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }
16249 sub set_rfields { $_[0]->[RFIELDS] = $_[1] }
16250 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }
16251 sub set_indentation { $_[0]->[INDENTATION] = $_[1] }
16252 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
16253 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }
16254 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }
16255 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
16256 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
16260 #####################################################################
16262 # the Perl::Tidy::VerticalAligner::Alignment class holds information
16263 # on a single column being aligned
16265 #####################################################################
16266 package Perl::Tidy::VerticalAligner::Alignment;
16274 # Symbolic array indexes
16275 use constant COLUMN => 0; # the current column number
16276 use constant STARTING_COLUMN => 1; # column number when created
16277 use constant MATCHING_TOKEN => 2; # what token we are matching
16278 use constant STARTING_LINE => 3; # the line index of creation
16279 use constant ENDING_LINE => 4; # the most recent line to use it
16280 use constant SAVED_COLUMN => 5; # the most recent line to use it
16281 use constant SERIAL_NUMBER => 6; # unique number for this alignment
16282 # (just its index in an array)
16284 # Correspondence between variables and array indexes
16286 $_index_map{column} = COLUMN;
16287 $_index_map{starting_column} = STARTING_COLUMN;
16288 $_index_map{matching_token} = MATCHING_TOKEN;
16289 $_index_map{starting_line} = STARTING_LINE;
16290 $_index_map{ending_line} = ENDING_LINE;
16291 $_index_map{saved_column} = SAVED_COLUMN;
16292 $_index_map{serial_number} = SERIAL_NUMBER;
16294 my @_default_data = ();
16295 $_default_data[COLUMN] = undef;
16296 $_default_data[STARTING_COLUMN] = undef;
16297 $_default_data[MATCHING_TOKEN] = undef;
16298 $_default_data[STARTING_LINE] = undef;
16299 $_default_data[ENDING_LINE] = undef;
16300 $_default_data[SAVED_COLUMN] = undef;
16301 $_default_data[SERIAL_NUMBER] = undef;
16303 # class population count
16306 sub get_count { $_count; }
16307 sub _increment_count { ++$_count }
16308 sub _decrement_count { --$_count }
16313 my ( $caller, %arg ) = @_;
16314 my $caller_is_obj = ref($caller);
16315 my $class = $caller_is_obj || $caller;
16317 my $self = bless [], $class;
16319 foreach ( keys %_index_map ) {
16320 my $index = $_index_map{$_};
16321 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
16322 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
16323 else { $self->[$index] = $_default_data[$index] }
16325 $self->_increment_count();
16330 $_[0]->_decrement_count();
16333 sub get_column { return $_[0]->[COLUMN] }
16334 sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
16335 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }
16336 sub get_starting_line { return $_[0]->[STARTING_LINE] }
16337 sub get_ending_line { return $_[0]->[ENDING_LINE] }
16338 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }
16340 sub set_column { $_[0]->[COLUMN] = $_[1] }
16341 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
16342 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }
16343 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }
16344 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }
16345 sub increment_column { $_[0]->[COLUMN] += $_[1] }
16347 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
16348 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }
16352 package Perl::Tidy::VerticalAligner;
16354 # The Perl::Tidy::VerticalAligner package collects output lines and
16355 # attempts to line up certain common tokens, such as => and #, which are
16356 # identified by the calling routine.
16358 # There are two main routines: append_line and flush. Append acts as a
16359 # storage buffer, collecting lines into a group which can be vertically
16360 # aligned. When alignment is no longer possible or desirable, it dumps
16361 # the group to flush.
16363 # append_line -----> flush
16371 # Caution: these debug flags produce a lot of output
16372 # They should all be 0 except when debugging small scripts
16374 use constant VALIGN_DEBUG_FLAG_APPEND => 0;
16375 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
16377 my $debug_warning = sub {
16378 print "VALIGN_DEBUGGING with key $_[0]\n";
16381 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
16382 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
16387 $vertical_aligner_self
16389 $maximum_alignment_index
16393 $previous_minimum_jmax_seen
16394 $previous_maximum_jmax_seen
16395 $maximum_line_index
16400 $last_group_level_written
16401 $last_leading_space_count
16405 $last_comment_column
16406 $last_side_comment_line_number
16407 $last_side_comment_length
16408 $last_side_comment_level
16409 $outdented_line_count
16410 $first_outdented_line_at
16411 $last_outdented_line_at
16412 $diagnostics_object
16414 $file_writer_object
16415 @side_comment_history
16416 $comment_leading_space_count
16423 $cached_line_leading_space_count
16427 $rOpts_maximum_line_length
16428 $rOpts_continuation_indentation
16429 $rOpts_indent_columns
16431 $rOpts_entab_leading_whitespace
16433 $rOpts_minimum_space_to_comment
16441 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
16444 # variables describing the entire space group:
16446 $ralignment_list = [];
16448 $last_group_level_written = -1;
16449 $extra_indent_ok = 0; # can we move all lines to the right?
16450 $last_side_comment_length = 0;
16451 $maximum_jmax_seen = 0;
16452 $minimum_jmax_seen = 0;
16453 $previous_minimum_jmax_seen = 0;
16454 $previous_maximum_jmax_seen = 0;
16456 # variables describing each line of the group
16457 @group_lines = (); # list of all lines in group
16459 $outdented_line_count = 0;
16460 $first_outdented_line_at = 0;
16461 $last_outdented_line_at = 0;
16462 $last_side_comment_line_number = 0;
16463 $last_side_comment_level = -1;
16465 # most recent 3 side comments; [ line number, column ]
16466 $side_comment_history[0] = [ -300, 0 ];
16467 $side_comment_history[1] = [ -200, 0 ];
16468 $side_comment_history[2] = [ -100, 0 ];
16470 # write_leader_and_string cache:
16471 $cached_line_text = "";
16472 $cached_line_type = 0;
16473 $cached_line_flag = 0;
16475 $cached_line_valid = 0;
16476 $cached_line_leading_space_count = 0;
16478 # frequently used parameters
16479 $rOpts_indent_columns = $rOpts->{'indent-columns'};
16480 $rOpts_tabs = $rOpts->{'tabs'};
16481 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
16482 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
16483 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
16485 forget_side_comment();
16487 initialize_for_new_group();
16489 $vertical_aligner_self = {};
16490 bless $vertical_aligner_self, $class;
16491 return $vertical_aligner_self;
16494 sub initialize_for_new_group {
16495 $maximum_line_index = -1; # lines in the current group
16496 $maximum_alignment_index = -1; # alignments in current group
16497 $zero_count = 0; # count consecutive lines without tokens
16498 $current_line = undef; # line being matched for alignment
16499 $group_maximum_gap = 0; # largest gap introduced
16501 $marginal_match = 0;
16502 $comment_leading_space_count = 0;
16503 $last_leading_space_count = 0;
16506 # interface to Perl::Tidy::Diagnostics routines
16507 sub write_diagnostics {
16508 if ($diagnostics_object) {
16509 $diagnostics_object->write_diagnostics(@_);
16513 # interface to Perl::Tidy::Logger routines
16515 if ($logger_object) {
16516 $logger_object->warning(@_);
16520 sub write_logfile_entry {
16521 if ($logger_object) {
16522 $logger_object->write_logfile_entry(@_);
16526 sub report_definite_bug {
16527 if ($logger_object) {
16528 $logger_object->report_definite_bug();
16534 # return the number of leading spaces associated with an indentation
16535 # variable $indentation is either a constant number of spaces or an
16536 # object with a get_SPACES method.
16537 my $indentation = shift;
16538 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
16541 sub get_RECOVERABLE_SPACES {
16543 # return the number of spaces (+ means shift right, - means shift left)
16544 # that we would like to shift a group of lines with the same indentation
16545 # to get them to line up with their opening parens
16546 my $indentation = shift;
16547 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
16550 sub get_STACK_DEPTH {
16552 my $indentation = shift;
16553 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
16556 sub make_alignment {
16557 my ( $col, $token ) = @_;
16559 # make one new alignment at column $col which aligns token $token
16560 ++$maximum_alignment_index;
16561 my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
16563 starting_column => $col,
16564 matching_token => $token,
16565 starting_line => $maximum_line_index,
16566 ending_line => $maximum_line_index,
16567 serial_number => $maximum_alignment_index,
16569 $ralignment_list->[$maximum_alignment_index] = $alignment;
16573 sub dump_alignments {
16575 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
16576 for my $i ( 0 .. $maximum_alignment_index ) {
16577 my $column = $ralignment_list->[$i]->get_column();
16578 my $starting_column = $ralignment_list->[$i]->get_starting_column();
16579 my $matching_token = $ralignment_list->[$i]->get_matching_token();
16580 my $starting_line = $ralignment_list->[$i]->get_starting_line();
16581 my $ending_line = $ralignment_list->[$i]->get_ending_line();
16583 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
16587 sub save_alignment_columns {
16588 for my $i ( 0 .. $maximum_alignment_index ) {
16589 $ralignment_list->[$i]->save_column();
16593 sub restore_alignment_columns {
16594 for my $i ( 0 .. $maximum_alignment_index ) {
16595 $ralignment_list->[$i]->restore_column();
16599 sub forget_side_comment {
16600 $last_comment_column = 0;
16605 # sub append is called to place one line in the current vertical group.
16607 # The input parameters are:
16608 # $level = indentation level of this line
16609 # $rfields = reference to array of fields
16610 # $rpatterns = reference to array of patterns, one per field
16611 # $rtokens = reference to array of tokens starting fields 1,2,..
16613 # Here is an example of what this package does. In this example,
16614 # we are trying to line up both the '=>' and the '#'.
16616 # '18' => 'grave', # \`
16617 # '19' => 'acute', # `'
16618 # '20' => 'caron', # \v
16619 # <-tabs-><f1-><--field 2 ---><-f3->
16622 # col1 col2 col3 col4
16624 # The calling routine has already broken the entire line into 3 fields as
16625 # indicated. (So the work of identifying promising common tokens has
16626 # already been done).
16628 # In this example, there will be 2 tokens being matched: '=>' and '#'.
16629 # They are the leading parts of fields 2 and 3, but we do need to know
16630 # what they are so that we can dump a group of lines when these tokens
16633 # The fields contain the actual characters of each field. The patterns
16634 # are like the fields, but they contain mainly token types instead
16635 # of tokens, so they have fewer characters. They are used to be
16636 # sure we are matching fields of similar type.
16638 # In this example, there will be 4 column indexes being adjusted. The
16639 # first one is always at zero. The interior columns are at the start of
16640 # the matching tokens, and the last one tracks the maximum line length.
16642 # Basically, each time a new line comes in, it joins the current vertical
16643 # group if possible. Otherwise it causes the current group to be dumped
16644 # and a new group is started.
16646 # For each new group member, the column locations are increased, as
16647 # necessary, to make room for the new fields. When the group is finally
16648 # output, these column numbers are used to compute the amount of spaces of
16649 # padding needed for each field.
16651 # Programming note: the fields are assumed not to have any tab characters.
16652 # Tabs have been previously removed except for tabs in quoted strings and
16653 # side comments. Tabs in these fields can mess up the column counting.
16654 # The log file warns the user if there are any such tabs.
16657 $level, $level_end,
16658 $indentation, $rfields,
16659 $rtokens, $rpatterns,
16660 $is_forced_break, $outdent_long_lines,
16661 $is_terminal_statement, $do_not_pad,
16662 $rvertical_tightness_flags, $level_jump,
16665 # number of fields is $jmax
16666 # number of tokens between fields is $jmax-1
16667 my $jmax = $#{$rfields};
16668 $previous_minimum_jmax_seen = $minimum_jmax_seen;
16669 $previous_maximum_jmax_seen = $maximum_jmax_seen;
16671 my $leading_space_count = get_SPACES($indentation);
16673 # set outdented flag to be sure we either align within statements or
16674 # across statement boundaries, but not both.
16675 my $is_outdented = $last_leading_space_count > $leading_space_count;
16676 $last_leading_space_count = $leading_space_count;
16678 # Patch: undo for hanging side comment
16679 my $is_hanging_side_comment =
16680 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
16681 $is_outdented = 0 if $is_hanging_side_comment;
16683 VALIGN_DEBUG_FLAG_APPEND0 && do {
16685 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
16688 # Validate cached line if necessary: If we can produce a container
16689 # with just 2 lines total by combining an existing cached opening
16690 # token with the closing token to follow, then we will mark both
16691 # cached flags as valid.
16692 if ($rvertical_tightness_flags) {
16693 if ( $maximum_line_index <= 0
16694 && $cached_line_type
16695 && $rvertical_tightness_flags->[2] == $cached_seqno )
16697 $rvertical_tightness_flags->[3] ||= 1;
16698 $cached_line_valid ||= 1;
16702 # do not join an opening block brace with an unbalanced line
16703 # unless requested with a flag value of 2
16704 if ( $cached_line_type == 3
16705 && $maximum_line_index < 0
16706 && $cached_line_flag < 2
16707 && $level_jump != 0 )
16709 $cached_line_valid = 0;
16712 # patch until new aligner is finished
16713 if ($do_not_pad) { my_flush() }
16715 # shouldn't happen:
16716 if ( $level < 0 ) { $level = 0 }
16718 # do not align code across indentation level changes
16719 if ( $level != $group_level || $is_outdented ) {
16721 # we are allowed to shift a group of lines to the right if its
16722 # level is greater than the previous and next group
16724 ( $level < $group_level && $last_group_level_written < $group_level );
16728 # If we know that this line will get flushed out by itself because
16729 # of level changes, we can leave the extra_indent_ok flag set.
16730 # That way, if we get an external flush call, we will still be
16731 # able to do some -lp alignment if necessary.
16732 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
16734 $group_level = $level;
16736 # wait until after the above flush to get the leading space
16737 # count because it may have been changed if the -icp flag is in
16739 $leading_space_count = get_SPACES($indentation);
16743 # --------------------------------------------------------------------
16744 # Patch to collect outdentable block COMMENTS
16745 # --------------------------------------------------------------------
16746 my $is_blank_line = "";
16747 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
16748 if ( $group_type eq 'COMMENT' ) {
16752 && $outdent_long_lines
16753 && $leading_space_count == $comment_leading_space_count
16758 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
16766 # --------------------------------------------------------------------
16767 # Step 1. Handle simple line of code with no fields to match.
16768 # --------------------------------------------------------------------
16769 if ( $jmax <= 0 ) {
16772 if ( $maximum_line_index >= 0
16773 && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
16776 # flush the current group if it has some aligned columns..
16777 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
16779 # flush current group if we are just collecting side comments..
16782 # ...and we haven't seen a comment lately
16783 ( $zero_count > 3 )
16785 # ..or if this new line doesn't fit to the left of the comments
16786 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
16787 $group_lines[0]->get_column(0) )
16794 # patch to start new COMMENT group if this comment may be outdented
16795 if ( $is_block_comment
16796 && $outdent_long_lines
16797 && $maximum_line_index < 0 )
16799 $group_type = 'COMMENT';
16800 $comment_leading_space_count = $leading_space_count;
16801 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
16805 # just write this line directly if no current group, no side comment,
16806 # and no space recovery is needed.
16807 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
16809 write_leader_and_string( $leading_space_count, $$rfields[0], 0,
16810 $outdent_long_lines, $rvertical_tightness_flags );
16818 # programming check: (shouldn't happen)
16819 # an error here implies an incorrect call was made
16820 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
16822 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
16824 report_definite_bug();
16827 # --------------------------------------------------------------------
16828 # create an object to hold this line
16829 # --------------------------------------------------------------------
16830 my $new_line = new Perl::Tidy::VerticalAligner::Line(
16832 jmax_original_line => $jmax,
16833 rtokens => $rtokens,
16834 rfields => $rfields,
16835 rpatterns => $rpatterns,
16836 indentation => $indentation,
16837 leading_space_count => $leading_space_count,
16838 outdent_long_lines => $outdent_long_lines,
16840 is_hanging_side_comment => $is_hanging_side_comment,
16841 maximum_line_length => $rOpts->{'maximum-line-length'},
16842 rvertical_tightness_flags => $rvertical_tightness_flags,
16845 # --------------------------------------------------------------------
16846 # It simplifies things to create a zero length side comment
16848 # --------------------------------------------------------------------
16849 make_side_comment( $new_line, $level_end );
16851 # --------------------------------------------------------------------
16852 # Decide if this is a simple list of items.
16853 # There are 3 list types: none, comma, comma-arrow.
16854 # We use this below to be less restrictive in deciding what to align.
16855 # --------------------------------------------------------------------
16856 if ($is_forced_break) {
16857 decide_if_list($new_line);
16860 if ($current_line) {
16862 # --------------------------------------------------------------------
16863 # Allow hanging side comment to join current group, if any
16864 # This will help keep side comments aligned, because otherwise we
16865 # will have to start a new group, making alignment less likely.
16866 # --------------------------------------------------------------------
16867 join_hanging_comment( $new_line, $current_line )
16868 if $is_hanging_side_comment;
16870 # --------------------------------------------------------------------
16871 # If there is just one previous line, and it has more fields
16872 # than the new line, try to join fields together to get a match with
16873 # the new line. At the present time, only a single leading '=' is
16874 # allowed to be compressed out. This is useful in rare cases where
16875 # a table is forced to use old breakpoints because of side comments,
16876 # and the table starts out something like this:
16877 # my %MonthChars = ('0', 'Jan', # side comment
16880 # Eliminating the '=' field will allow the remaining fields to line up.
16881 # This situation does not occur if there are no side comments
16882 # because scan_list would put a break after the opening '('.
16883 # --------------------------------------------------------------------
16884 eliminate_old_fields( $new_line, $current_line );
16886 # --------------------------------------------------------------------
16887 # If the new line has more fields than the current group,
16888 # see if we can match the first fields and combine the remaining
16889 # fields of the new line.
16890 # --------------------------------------------------------------------
16891 eliminate_new_fields( $new_line, $current_line );
16893 # --------------------------------------------------------------------
16894 # Flush previous group unless all common tokens and patterns match..
16895 # --------------------------------------------------------------------
16896 check_match( $new_line, $current_line );
16898 # --------------------------------------------------------------------
16899 # See if there is space for this line in the current group (if any)
16900 # --------------------------------------------------------------------
16901 if ($current_line) {
16902 check_fit( $new_line, $current_line );
16906 # --------------------------------------------------------------------
16907 # Append this line to the current group (or start new group)
16908 # --------------------------------------------------------------------
16909 accept_line($new_line);
16911 # Future update to allow this to vary:
16912 $current_line = $new_line if ( $maximum_line_index == 0 );
16914 # --------------------------------------------------------------------
16915 # Step 8. Some old debugging stuff
16916 # --------------------------------------------------------------------
16917 VALIGN_DEBUG_FLAG_APPEND && do {
16918 print "APPEND fields:";
16919 dump_array(@$rfields);
16920 print "APPEND tokens:";
16921 dump_array(@$rtokens);
16922 print "APPEND patterns:";
16923 dump_array(@$rpatterns);
16928 sub join_hanging_comment {
16931 my $jmax = $line->get_jmax();
16932 return 0 unless $jmax == 1; # must be 2 fields
16933 my $rtokens = $line->get_rtokens();
16934 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment..
16935 my $rfields = $line->get_rfields();
16936 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty...
16937 my $old_line = shift;
16938 my $maximum_field_index = $old_line->get_jmax();
16940 unless $maximum_field_index > $jmax; # the current line has more fields
16941 my $rpatterns = $line->get_rpatterns();
16943 $line->set_is_hanging_side_comment(1);
16944 $jmax = $maximum_field_index;
16945 $line->set_jmax($jmax);
16946 $$rfields[$jmax] = $$rfields[1];
16947 $$rtokens[ $jmax - 1 ] = $$rtokens[0];
16948 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
16949 for ( my $j = 1 ; $j < $jmax ; $j++ ) {
16950 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
16951 $$rtokens[ $j - 1 ] = "";
16952 $$rpatterns[ $j - 1 ] = "";
16957 sub eliminate_old_fields {
16959 my $new_line = shift;
16960 my $jmax = $new_line->get_jmax();
16961 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
16962 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
16964 # there must be one previous line
16965 return unless ( $maximum_line_index == 0 );
16967 my $old_line = shift;
16968 my $maximum_field_index = $old_line->get_jmax();
16970 # this line must have fewer fields
16971 return unless $maximum_field_index > $jmax;
16973 # Identify specific cases where field elimination is allowed:
16974 # case=1: both lines have comma-separated lists, and the first
16975 # line has an equals
16976 # case=2: both lines have leading equals
16978 # case 1 is the default
16981 # See if case 2: both lines have leading '='
16982 # We'll require smiliar leading patterns in this case
16983 my $old_rtokens = $old_line->get_rtokens();
16984 my $rtokens = $new_line->get_rtokens();
16985 my $rpatterns = $new_line->get_rpatterns();
16986 my $old_rpatterns = $old_line->get_rpatterns();
16987 if ( $rtokens->[0] =~ /^=\d*$/
16988 && $old_rtokens->[0] eq $rtokens->[0]
16989 && $old_rpatterns->[0] eq $rpatterns->[0] )
16994 # not too many fewer fields in new line for case 1
16995 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
16997 # case 1 must have side comment
16998 my $old_rfields = $old_line->get_rfields();
17001 && length( $$old_rfields[$maximum_field_index] ) == 0 );
17003 my $rfields = $new_line->get_rfields();
17005 my $hid_equals = 0;
17007 my @new_alignments = ();
17008 my @new_fields = ();
17009 my @new_matching_patterns = ();
17010 my @new_matching_tokens = ();
17014 my $current_field = '';
17015 my $current_pattern = '';
17017 # loop over all old tokens
17019 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
17020 $current_field .= $$old_rfields[$k];
17021 $current_pattern .= $$old_rpatterns[$k];
17022 last if ( $j > $jmax - 1 );
17024 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
17026 $new_fields[$j] = $current_field;
17027 $new_matching_patterns[$j] = $current_pattern;
17028 $current_field = '';
17029 $current_pattern = '';
17030 $new_matching_tokens[$j] = $$old_rtokens[$k];
17031 $new_alignments[$j] = $old_line->get_alignment($k);
17036 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
17037 last if ( $case == 2 ); # avoid problems with stuff
17038 # like: $a=$b=$c=$d;
17042 if ( $in_match && $case == 1 )
17043 ; # disallow gaps in matching field types in case 1
17047 # Modify the current state if we are successful.
17048 # We must exactly reach the ends of both lists for success.
17049 if ( ( $j == $jmax )
17050 && ( $current_field eq '' )
17051 && ( $case != 1 || $hid_equals ) )
17053 $k = $maximum_field_index;
17054 $current_field .= $$old_rfields[$k];
17055 $current_pattern .= $$old_rpatterns[$k];
17056 $new_fields[$j] = $current_field;
17057 $new_matching_patterns[$j] = $current_pattern;
17059 $new_alignments[$j] = $old_line->get_alignment($k);
17060 $maximum_field_index = $j;
17062 $old_line->set_alignments(@new_alignments);
17063 $old_line->set_jmax($jmax);
17064 $old_line->set_rtokens( \@new_matching_tokens );
17065 $old_line->set_rfields( \@new_fields );
17066 $old_line->set_rpatterns( \@$rpatterns );
17070 # create an empty side comment if none exists
17071 sub make_side_comment {
17072 my $new_line = shift;
17073 my $level_end = shift;
17074 my $jmax = $new_line->get_jmax();
17075 my $rtokens = $new_line->get_rtokens();
17077 # if line does not have a side comment...
17078 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
17079 my $rfields = $new_line->get_rfields();
17080 my $rpatterns = $new_line->get_rpatterns();
17081 $$rtokens[$jmax] = '#';
17082 $$rfields[ ++$jmax ] = '';
17083 $$rpatterns[$jmax] = '#';
17084 $new_line->set_jmax($jmax);
17085 $new_line->set_jmax_original_line($jmax);
17088 # line has a side comment..
17091 # don't remember old side comment location for very long
17092 my $line_number = $vertical_aligner_self->get_output_line_number();
17093 my $rfields = $new_line->get_rfields();
17095 $line_number - $last_side_comment_line_number > 12
17097 # and don't remember comment location across block level changes
17098 || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
17101 forget_side_comment();
17103 $last_side_comment_line_number = $line_number;
17104 $last_side_comment_level = $level_end;
17108 sub decide_if_list {
17112 # A list will be taken to be a line with a forced break in which all
17113 # of the field separators are commas or comma-arrows (except for the
17116 # List separator tokens are things like ',3' or '=>2',
17117 # where the trailing digit is the nesting depth. Allow braces
17118 # to allow nested list items.
17119 my $rtokens = $line->get_rtokens();
17120 my $test_token = $$rtokens[0];
17121 if ( $test_token =~ /^(\,|=>)/ ) {
17122 my $list_type = $test_token;
17123 my $jmax = $line->get_jmax();
17125 foreach ( 1 .. $jmax - 2 ) {
17126 if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
17131 $line->set_list_type($list_type);
17135 sub eliminate_new_fields {
17137 return unless ( $maximum_line_index >= 0 );
17138 my $new_line = shift;
17139 my $old_line = shift;
17140 my $jmax = $new_line->get_jmax();
17142 my $old_rtokens = $old_line->get_rtokens();
17143 my $rtokens = $new_line->get_rtokens();
17144 my $is_assignment =
17145 ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
17147 # must be monotonic variation
17148 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
17150 # must be more fields in the new line
17151 my $maximum_field_index = $old_line->get_jmax();
17152 return unless ( $maximum_field_index < $jmax );
17154 unless ($is_assignment) {
17156 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
17157 ; # only if monotonic
17159 # never combine fields of a comma list
17161 unless ( $maximum_field_index > 1 )
17162 && ( $new_line->get_list_type() !~ /^,/ );
17165 my $rfields = $new_line->get_rfields();
17166 my $rpatterns = $new_line->get_rpatterns();
17167 my $old_rpatterns = $old_line->get_rpatterns();
17169 # loop over all old tokens except comment
17172 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
17173 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
17174 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
17181 # first tokens agree, so combine new tokens
17183 for $k ( $maximum_field_index .. $jmax - 1 ) {
17185 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
17186 $$rfields[$k] = "";
17187 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
17188 $$rpatterns[$k] = "";
17191 $$rtokens[ $maximum_field_index - 1 ] = '#';
17192 $$rfields[$maximum_field_index] = $$rfields[$jmax];
17193 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
17194 $jmax = $maximum_field_index;
17196 $new_line->set_jmax($jmax);
17201 my $new_line = shift;
17202 my $old_line = shift;
17204 my $jmax = $new_line->get_jmax();
17205 my $maximum_field_index = $old_line->get_jmax();
17207 # flush if this line has too many fields
17208 if ( $jmax > $maximum_field_index ) { my_flush(); return }
17210 # flush if adding this line would make a non-monotonic field count
17212 ( $maximum_field_index > $jmax ) # this has too few fields
17214 ( $previous_minimum_jmax_seen < $jmax ) # and wouldn't be monotonic
17215 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
17223 # otherwise append this line if everything matches
17224 my $jmax_original_line = $new_line->get_jmax_original_line();
17225 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
17226 my $rtokens = $new_line->get_rtokens();
17227 my $rfields = $new_line->get_rfields();
17228 my $rpatterns = $new_line->get_rpatterns();
17229 my $list_type = $new_line->get_list_type();
17231 my $group_list_type = $old_line->get_list_type();
17232 my $old_rpatterns = $old_line->get_rpatterns();
17233 my $old_rtokens = $old_line->get_rtokens();
17235 my $jlimit = $jmax - 1;
17236 if ( $maximum_field_index > $jmax ) {
17237 $jlimit = $jmax_original_line;
17238 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
17241 my $everything_matches = 1;
17243 # common list types always match
17244 unless ( ( $group_list_type && ( $list_type eq $group_list_type ) )
17245 || $is_hanging_side_comment )
17248 my $leading_space_count = $new_line->get_leading_space_count();
17249 my $saw_equals = 0;
17250 for my $j ( 0 .. $jlimit ) {
17253 my $old_tok = $$old_rtokens[$j];
17254 my $new_tok = $$rtokens[$j];
17256 # dumb down the match after an equals
17257 if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
17259 $old_tok =~ s/\+.*$//;
17261 if ( $new_tok =~ /^=\d*$/ ) { $saw_equals = 1 }
17263 # we never match if the matching tokens differ
17265 && $old_tok ne $new_tok )
17270 # otherwise, if patterns match, we always have a match.
17271 # However, if patterns don't match, we have to be careful...
17272 elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
17274 # We have to be very careful about aligning commas when the
17275 # pattern's don't match, because it can be worse to create an
17276 # alignment where none is needed than to omit one. The current
17277 # rule: if we are within a matching sub call (indicated by '+'
17278 # in the matching token), we'll allow a marginal match, but
17281 # Here's an example where we'd like to align the '='
17282 # my $cfile = File::Spec->catfile( 't', 'callext.c' );
17283 # my $inc = File::Spec->catdir( 'Basic', 'Core' );
17284 # because the function names differ.
17285 # Future alignment logic should make this unnecessary.
17287 # Here's an example where the ','s are not contained in a call.
17288 # The first line below should probably not match the next two:
17289 # ( $a, $b ) = ( $b, $r );
17290 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
17291 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
17292 if ( $new_tok =~ /^,/ ) {
17293 if ( $$rtokens[$j] =~ /[A-Za-z]/ ) {
17294 $marginal_match = 1;
17301 # parens don't align well unless patterns match
17302 elsif ( $new_tok =~ /^\(/ ) {
17306 # Handle an '=' alignment with different patterns to
17308 elsif ( $new_tok =~ /^=\d*$/ ) {
17312 # It is best to be a little restrictive when
17313 # aligning '=' tokens. Here is an example of
17314 # two lines that we will not align:
17317 # The problem is that one is a 'my' declaration,
17318 # and the other isn't, so they're not very similar.
17319 # We will filter these out by comparing the first
17320 # letter of the pattern. This is crude, but works
17323 substr( $$old_rpatterns[$j], 0, 1 ) ne
17324 substr( $$rpatterns[$j], 0, 1 ) )
17329 # If we pass that test, we'll call it a marginal match.
17330 # Here is an example of a marginal match:
17332 # $op = compile_bblock($op);
17333 # The left tokens are both identifiers, but
17334 # one accesses a hash and the other doesn't.
17335 # We'll let this be a tentative match and undo
17336 # it later if we don't find more than 2 lines
17338 elsif ( $maximum_line_index == 0 ) {
17339 $marginal_match = 1;
17344 # Don't let line with fewer fields increase column widths
17346 if ( $maximum_field_index > $jmax ) {
17348 length( $$rfields[$j] ) - $old_line->current_field_width($j);
17351 $pad += $leading_space_count;
17354 # TESTING: suspend this rule to allow last lines to join
17355 if ( $pad > 0 ) { $match = 0; }
17359 $everything_matches = 0;
17365 if ( $maximum_field_index > $jmax ) {
17367 if ($everything_matches) {
17369 my $comment = $$rfields[$jmax];
17370 for $jmax ( $jlimit .. $maximum_field_index ) {
17371 $$rtokens[$jmax] = $$old_rtokens[$jmax];
17372 $$rfields[ ++$jmax ] = '';
17373 $$rpatterns[$jmax] = $$old_rpatterns[$jmax];
17375 $$rfields[$jmax] = $comment;
17376 $new_line->set_jmax($jmax);
17380 my_flush() unless ($everything_matches);
17385 return unless ( $maximum_line_index >= 0 );
17386 my $new_line = shift;
17387 my $old_line = shift;
17389 my $jmax = $new_line->get_jmax();
17390 my $leading_space_count = $new_line->get_leading_space_count();
17391 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
17392 my $rtokens = $new_line->get_rtokens();
17393 my $rfields = $new_line->get_rfields();
17394 my $rpatterns = $new_line->get_rpatterns();
17396 my $group_list_type = $group_lines[0]->get_list_type();
17398 my $padding_so_far = 0;
17399 my $padding_available = $old_line->get_available_space_on_right();
17401 # save current columns in case this doesn't work
17402 save_alignment_columns();
17404 my ( $j, $pad, $eight );
17405 my $maximum_field_index = $old_line->get_jmax();
17406 for $j ( 0 .. $jmax ) {
17408 ## testing patch to avoid excessive gaps in previous lines,
17409 # due to a line of fewer fields.
17410 # return join( ".",
17411 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
17412 # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
17413 ## MOVED BELOW AS A TEST
17414 ##next if ($jmax < $maximum_field_index && $j==$jmax-1);
17416 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
17419 $pad += $leading_space_count;
17422 # remember largest gap of the group, excluding gap to side comment
17424 && $group_maximum_gap < -$pad
17426 && $j < $jmax - 1 )
17428 $group_maximum_gap = -$pad;
17433 ## This patch helps sometimes, but it doesn't check to see if
17434 ## the line is too long even without the side comment. It needs
17436 ##don't let a long token with no trailing side comment push
17437 ##side comments out, or end a group. (sidecmt1.t)
17438 ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
17440 # This line will need space; lets see if we want to accept it..
17443 # not if this won't fit
17444 ( $pad > $padding_available )
17446 # previously, there were upper bounds placed on padding here
17447 # (maximum_whitespace_columns), but they were not really helpful
17452 # revert to starting state then flush; things didn't work out
17453 restore_alignment_columns();
17458 # TESTING PATCH moved from above to be sure we fit
17459 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
17461 # looks ok, squeeze this field in
17462 $old_line->increase_field_width( $j, $pad );
17463 $padding_available -= $pad;
17465 # remember largest gap of the group, excluding gap to side comment
17466 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
17467 $group_maximum_gap = $pad;
17474 my $new_line = shift;
17475 $group_lines[ ++$maximum_line_index ] = $new_line;
17477 # initialize field lengths if starting new group
17478 if ( $maximum_line_index == 0 ) {
17480 my $jmax = $new_line->get_jmax();
17481 my $rfields = $new_line->get_rfields();
17482 my $rtokens = $new_line->get_rtokens();
17484 my $col = $new_line->get_leading_space_count();
17486 for $j ( 0 .. $jmax ) {
17487 $col += length( $$rfields[$j] );
17489 # create initial alignments for the new group
17491 if ( $j < $jmax ) { $token = $$rtokens[$j] }
17492 my $alignment = make_alignment( $col, $token );
17493 $new_line->set_alignment( $j, $alignment );
17496 $maximum_jmax_seen = $jmax;
17497 $minimum_jmax_seen = $jmax;
17500 # use previous alignments otherwise
17502 my @new_alignments =
17503 $group_lines[ $maximum_line_index - 1 ]->get_alignments();
17504 $new_line->set_alignments(@new_alignments);
17510 # debug routine to dump array contents
17515 # flush() sends the current Perl::Tidy::VerticalAligner group down the
17516 # pipeline to Perl::Tidy::FileWriter.
17518 # This is the external flush, which also empties the cache
17521 if ( $maximum_line_index < 0 ) {
17522 if ($cached_line_type) {
17523 entab_and_output( $cached_line_text,
17524 $cached_line_leading_space_count,
17525 $last_group_level_written );
17526 $cached_line_type = 0;
17527 $cached_line_text = "";
17535 # This is the internal flush, which leaves the cache intact
17538 return if ( $maximum_line_index < 0 );
17540 # handle a group of comment lines
17541 if ( $group_type eq 'COMMENT' ) {
17543 VALIGN_DEBUG_FLAG_APPEND0 && do {
17544 my ( $a, $b, $c ) = caller();
17546 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
17549 my $leading_space_count = $comment_leading_space_count;
17550 my $leading_string = get_leading_string($leading_space_count);
17552 # zero leading space count if any lines are too long
17553 my $max_excess = 0;
17554 for my $i ( 0 .. $maximum_line_index ) {
17555 my $str = $group_lines[$i];
17557 length($str) + $leading_space_count - $rOpts_maximum_line_length;
17558 if ( $excess > $max_excess ) {
17559 $max_excess = $excess;
17563 if ( $max_excess > 0 ) {
17564 $leading_space_count -= $max_excess;
17565 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
17566 $last_outdented_line_at =
17567 $file_writer_object->get_output_line_number();
17568 unless ($outdented_line_count) {
17569 $first_outdented_line_at = $last_outdented_line_at;
17571 $outdented_line_count += ( $maximum_line_index + 1 );
17574 # write the group of lines
17575 my $outdent_long_lines = 0;
17576 for my $i ( 0 .. $maximum_line_index ) {
17577 write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
17578 $outdent_long_lines, "" );
17582 # handle a group of code lines
17585 VALIGN_DEBUG_FLAG_APPEND0 && do {
17586 my $group_list_type = $group_lines[0]->get_list_type();
17587 my ( $a, $b, $c ) = caller();
17588 my $maximum_field_index = $group_lines[0]->get_jmax();
17590 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
17594 # some small groups are best left unaligned
17595 my $do_not_align = decide_if_aligned();
17597 # optimize side comment location
17598 $do_not_align = adjust_side_comment($do_not_align);
17600 # recover spaces for -lp option if possible
17601 my $extra_leading_spaces = get_extra_leading_spaces();
17603 # all lines of this group have the same basic leading spacing
17604 my $group_leader_length = $group_lines[0]->get_leading_space_count();
17606 # add extra leading spaces if helpful
17608 improve_continuation_indentation( $do_not_align,
17609 $group_leader_length );
17611 # loop to output all lines
17612 for my $i ( 0 .. $maximum_line_index ) {
17613 my $line = $group_lines[$i];
17614 write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
17615 $group_leader_length, $extra_leading_spaces );
17618 initialize_for_new_group();
17621 sub decide_if_aligned {
17623 # Do not try to align two lines which are not really similar
17624 return unless $maximum_line_index == 1;
17626 my $group_list_type = $group_lines[0]->get_list_type();
17628 my $do_not_align = (
17630 # always align lists
17635 # don't align if it was just a marginal match
17638 # don't align two lines with big gap
17639 || $group_maximum_gap > 12
17641 # or lines with differing number of alignment tokens
17642 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
17646 # But try to convert them into a simple comment group if the first line
17647 # a has side comment
17648 my $rfields = $group_lines[0]->get_rfields();
17649 my $maximum_field_index = $group_lines[0]->get_jmax();
17651 && ( $maximum_line_index > 0 )
17652 && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
17657 return $do_not_align;
17660 sub adjust_side_comment {
17662 my $do_not_align = shift;
17664 # let's see if we can move the side comment field out a little
17665 # to improve readability (the last field is always a side comment field)
17666 my $have_side_comment = 0;
17667 my $first_side_comment_line = -1;
17668 my $maximum_field_index = $group_lines[0]->get_jmax();
17669 for my $i ( 0 .. $maximum_line_index ) {
17670 my $line = $group_lines[$i];
17672 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
17673 $have_side_comment = 1;
17674 $first_side_comment_line = $i;
17679 my $kmax = $maximum_field_index + 1;
17681 if ($have_side_comment) {
17683 my $line = $group_lines[0];
17685 # the maximum space without exceeding the line length:
17686 my $avail = $line->get_available_space_on_right();
17688 # try to use the previous comment column
17689 my $side_comment_column = $line->get_column( $kmax - 2 );
17690 my $move = $last_comment_column - $side_comment_column;
17692 ## my $sc_line0 = $side_comment_history[0]->[0];
17693 ## my $sc_col0 = $side_comment_history[0]->[1];
17694 ## my $sc_line1 = $side_comment_history[1]->[0];
17695 ## my $sc_col1 = $side_comment_history[1]->[1];
17696 ## my $sc_line2 = $side_comment_history[2]->[0];
17697 ## my $sc_col2 = $side_comment_history[2]->[1];
17699 ## # FUTURE UPDATES:
17700 ## # Be sure to ignore 'do not align' and '} # end comments'
17701 ## # Find first $move > 0 and $move <= $avail as follows:
17702 ## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
17703 ## # 2. try sc_col2 if (line-sc_line2) < 12
17704 ## # 3. try min possible space, plus up to 8,
17705 ## # 4. try min possible space
17707 if ( $kmax > 0 && !$do_not_align ) {
17709 # but if this doesn't work, give up and use the minimum space
17710 if ( $move > $avail ) {
17711 $move = $rOpts_minimum_space_to_comment - 1;
17714 # but we want some minimum space to the comment
17715 my $min_move = $rOpts_minimum_space_to_comment - 1;
17717 && $last_side_comment_length > 0
17718 && ( $first_side_comment_line == 0 )
17719 && $group_level == $last_group_level_written )
17724 if ( $move < $min_move ) {
17728 # prevously, an upper bound was placed on $move here,
17729 # (maximum_space_to_comment), but it was not helpful
17731 # don't exceed the available space
17732 if ( $move > $avail ) { $move = $avail }
17734 # we can only increase space, never decrease
17736 $line->increase_field_width( $maximum_field_index - 1, $move );
17739 # remember this column for the next group
17740 $last_comment_column = $line->get_column( $kmax - 2 );
17744 # try to at least line up the existing side comment location
17745 if ( $kmax > 0 && $move > 0 && $move < $avail ) {
17746 $line->increase_field_width( $maximum_field_index - 1, $move );
17750 # reset side comment column if we can't align
17752 forget_side_comment();
17756 return $do_not_align;
17759 sub improve_continuation_indentation {
17760 my ( $do_not_align, $group_leader_length ) = @_;
17762 # See if we can increase the continuation indentation
17763 # to move all continuation lines closer to the next field
17764 # (unless it is a comment).
17766 # '$min_ci_gap'is the extra indentation that we may need to introduce.
17767 # We will only introduce this to fields which already have some ci.
17768 # Without this variable, we would occasionally get something like this
17771 # use overload '+' => \&plus,
17773 # '*' => \&multiply,
17776 # 'atan2' => \&atan2,
17778 # Whereas with this variable, we can shift variables over to get this:
17780 # use overload '+' => \&plus,
17782 # '*' => \&multiply,
17785 # 'atan2' => \&atan2,
17787 ## BUB: Deactivated####################
17788 # The trouble with this patch is that it may, for example,
17789 # move in some 'or's or ':'s, and leave some out, so that the
17790 # left edge alignment suffers.
17792 ###########################################
17794 my $maximum_field_index = $group_lines[0]->get_jmax();
17796 my $min_ci_gap = $rOpts_maximum_line_length;
17797 if ( $maximum_field_index > 1 && !$do_not_align ) {
17799 for my $i ( 0 .. $maximum_line_index ) {
17800 my $line = $group_lines[$i];
17801 my $leading_space_count = $line->get_leading_space_count();
17802 my $rfields = $line->get_rfields();
17804 my $gap = $line->get_column(0) - $leading_space_count -
17805 length( $$rfields[0] );
17807 if ( $leading_space_count > $group_leader_length ) {
17808 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
17812 if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
17819 return $min_ci_gap;
17822 sub write_vertically_aligned_line {
17824 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
17825 $extra_leading_spaces )
17827 my $rfields = $line->get_rfields();
17828 my $leading_space_count = $line->get_leading_space_count();
17829 my $outdent_long_lines = $line->get_outdent_long_lines();
17830 my $maximum_field_index = $line->get_jmax();
17831 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
17833 # add any extra spaces
17834 if ( $leading_space_count > $group_leader_length ) {
17835 $leading_space_count += $min_ci_gap;
17838 my $str = $$rfields[0];
17840 # loop to concatenate all fields of this line and needed padding
17841 my $total_pad_count = 0;
17843 for $j ( 1 .. $maximum_field_index ) {
17845 # skip zero-length side comments
17847 if ( ( $j == $maximum_field_index )
17848 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
17851 # compute spaces of padding before this field
17852 my $col = $line->get_column( $j - 1 );
17853 $pad = $col - ( length($str) + $leading_space_count );
17855 if ($do_not_align) {
17857 ( $j < $maximum_field_index )
17859 : $rOpts_minimum_space_to_comment - 1;
17862 # accumulate the padding
17863 if ( $pad > 0 ) { $total_pad_count += $pad; }
17866 if ( !defined $$rfields[$j] ) {
17867 write_diagnostics("UNDEFined field at j=$j\n");
17870 # only add padding when we have a finite field;
17871 # this avoids extra terminal spaces if we have empty fields
17872 if ( length( $$rfields[$j] ) > 0 ) {
17873 $str .= ' ' x $total_pad_count;
17874 $total_pad_count = 0;
17875 $str .= $$rfields[$j];
17878 # update side comment history buffer
17879 if ( $j == $maximum_field_index ) {
17880 my $lineno = $file_writer_object->get_output_line_number();
17881 shift @side_comment_history;
17882 push @side_comment_history, [ $lineno, $col ];
17886 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
17888 # ship this line off
17889 write_leader_and_string( $leading_space_count + $extra_leading_spaces,
17890 $str, $side_comment_length, $outdent_long_lines,
17891 $rvertical_tightness_flags );
17894 sub get_extra_leading_spaces {
17896 #----------------------------------------------------------
17897 # Define any extra indentation space (for the -lp option).
17899 # If a list has side comments, sub scan_list must dump the
17900 # list before it sees everything. When this happens, it sets
17901 # the indentation to the standard scheme, but notes how
17902 # many spaces it would have liked to use. We may be able
17903 # to recover that space here in the event that that all of the
17904 # lines of a list are back together again.
17905 #----------------------------------------------------------
17907 my $extra_leading_spaces = 0;
17908 if ($extra_indent_ok) {
17909 my $object = $group_lines[0]->get_indentation();
17910 if ( ref($object) ) {
17911 my $extra_indentation_spaces_wanted =
17912 get_RECOVERABLE_SPACES($object);
17914 # all indentation objects must be the same
17916 for $i ( 1 .. $maximum_line_index ) {
17917 if ( $object != $group_lines[$i]->get_indentation() ) {
17918 $extra_indentation_spaces_wanted = 0;
17923 if ($extra_indentation_spaces_wanted) {
17925 # the maximum space without exceeding the line length:
17926 my $avail = $group_lines[0]->get_available_space_on_right();
17927 $extra_leading_spaces =
17928 ( $avail > $extra_indentation_spaces_wanted )
17929 ? $extra_indentation_spaces_wanted
17932 # update the indentation object because with -icp the terminal
17933 # ');' will use the same adjustment.
17934 $object->permanently_decrease_AVAILABLE_SPACES(
17935 -$extra_leading_spaces );
17939 return $extra_leading_spaces;
17942 sub combine_fields {
17944 # combine all fields except for the comment field ( sidecmt.t )
17946 my $maximum_field_index = $group_lines[0]->get_jmax();
17947 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
17948 my $line = $group_lines[$j];
17949 my $rfields = $line->get_rfields();
17950 foreach ( 1 .. $maximum_field_index - 1 ) {
17951 $$rfields[0] .= $$rfields[$_];
17953 $$rfields[1] = $$rfields[$maximum_field_index];
17955 $line->set_jmax(1);
17956 $line->set_column( 0, 0 );
17957 $line->set_column( 1, 0 );
17960 $maximum_field_index = 1;
17962 for $j ( 0 .. $maximum_line_index ) {
17963 my $line = $group_lines[$j];
17964 my $rfields = $line->get_rfields();
17965 for $k ( 0 .. $maximum_field_index ) {
17966 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
17968 $pad += $group_lines[$j]->get_leading_space_count();
17971 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
17977 sub get_output_line_number {
17979 # the output line number reported to a caller is the number of items
17980 # written plus the number of items in the buffer
17982 1 + $maximum_line_index + $file_writer_object->get_output_line_number();
17985 sub write_leader_and_string {
17987 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
17988 $rvertical_tightness_flags )
17991 # handle outdenting of long lines:
17992 if ($outdent_long_lines) {
17994 length($str) - $side_comment_length + $leading_space_count -
17995 $rOpts_maximum_line_length;
17996 if ( $excess > 0 ) {
17997 $leading_space_count = 0;
17998 $last_outdented_line_at =
17999 $file_writer_object->get_output_line_number();
18001 unless ($outdented_line_count) {
18002 $first_outdented_line_at = $last_outdented_line_at;
18004 $outdented_line_count++;
18008 # Make preliminary leading whitespace. It could get changed
18009 # later by entabbing, so we have to keep track of any changes
18010 # to the leading_space_count from here on.
18011 my $leading_string =
18012 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
18014 # Unpack any recombination data; it was packed by
18015 # sub send_lines_to_vertical_aligner. Contents:
18017 # [0] type: 1=opening 2=closing 3=opening block brace
18018 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
18019 # if closing: spaces of padding to use
18020 # [2] sequence number of container
18021 # [3] valid flag: do not append if this flag is false
18023 my ( $open_or_close, $tightness_flag, $seqno, $valid );
18024 if ($rvertical_tightness_flags) {
18025 ( $open_or_close, $tightness_flag, $seqno, $valid ) =
18026 @{$rvertical_tightness_flags};
18029 # handle any cached line ..
18030 # either append this line to it or write it out
18031 if ( length($cached_line_text) ) {
18033 if ( !$cached_line_valid ) {
18034 entab_and_output( $cached_line_text,
18035 $cached_line_leading_space_count,
18036 $last_group_level_written );
18039 # handle cached line with opening container token
18040 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
18042 my $gap = $leading_space_count - length($cached_line_text);
18044 # handle option of just one tight opening per line:
18045 if ( $cached_line_flag == 1 ) {
18046 if ( defined($open_or_close) && $open_or_close == 1 ) {
18052 $leading_string = $cached_line_text . ' ' x $gap;
18053 $leading_space_count = $cached_line_leading_space_count;
18056 entab_and_output( $cached_line_text,
18057 $cached_line_leading_space_count,
18058 $last_group_level_written );
18062 # handle cached line to place before this closing container token
18064 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
18066 if ( length($test_line) <= $rOpts_maximum_line_length ) {
18068 $leading_string = "";
18069 $leading_space_count = $cached_line_leading_space_count;
18072 entab_and_output( $cached_line_text,
18073 $cached_line_leading_space_count,
18074 $last_group_level_written );
18078 $cached_line_type = 0;
18079 $cached_line_text = "";
18081 # make the line to be written
18082 my $line = $leading_string . $str;
18084 # write or cache this line
18085 if ( !$rvertical_tightness_flags || $side_comment_length > 0 ) {
18086 entab_and_output( $line, $leading_space_count, $group_level );
18089 $cached_line_text = $line;
18090 $cached_line_type = $open_or_close;
18091 $cached_line_flag = $tightness_flag;
18092 $cached_seqno = $seqno;
18093 $cached_line_valid = $valid;
18094 $cached_line_leading_space_count = $leading_space_count;
18097 $last_group_level_written = $group_level;
18098 $last_side_comment_length = $side_comment_length;
18099 $extra_indent_ok = 0;
18102 sub entab_and_output {
18103 my ( $line, $leading_space_count, $level ) = @_;
18105 # The line is currently correct if there is no tabbing (recommended!)
18106 # We may have to lop off some leading spaces and replace with tabs.
18107 if ( $leading_space_count > 0 ) {
18109 # Nothing to do if no tabs
18110 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
18111 || $rOpts_indent_columns <= 0 )
18117 # Handle entab option
18118 elsif ($rOpts_entab_leading_whitespace) {
18120 $leading_space_count % $rOpts_entab_leading_whitespace;
18122 int( $leading_space_count / $rOpts_entab_leading_whitespace );
18123 my $leading_string = "\t" x $tab_count . ' ' x $space_count;
18124 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
18125 substr( $line, 0, $leading_space_count ) = $leading_string;
18129 # REMOVE AFTER TESTING
18130 # shouldn't happen - program error counting whitespace
18131 # we'll skip entabbing
18133 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
18138 # Handle option of one tab per level
18140 my $leading_string = ( "\t" x $level );
18142 $leading_space_count - $level * $rOpts_indent_columns;
18144 # shouldn't happen:
18145 if ( $space_count < 0 ) {
18147 "Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
18149 $leading_string = ( ' ' x $leading_space_count );
18152 $leading_string .= ( ' ' x $space_count );
18154 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
18155 substr( $line, 0, $leading_space_count ) = $leading_string;
18159 # REMOVE AFTER TESTING
18160 # shouldn't happen - program error counting whitespace
18161 # we'll skip entabbing
18163 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
18168 $file_writer_object->write_code_line( $line . "\n" );
18171 { # begin get_leading_string
18173 my @leading_string_cache;
18175 sub get_leading_string {
18177 # define the leading whitespace string for this line..
18178 my $leading_whitespace_count = shift;
18180 # Handle case of zero whitespace, which includes multi-line quotes
18181 # (which may have a finite level; this prevents tab problems)
18182 if ( $leading_whitespace_count <= 0 ) {
18186 # look for previous result
18187 elsif ( $leading_string_cache[$leading_whitespace_count] ) {
18188 return $leading_string_cache[$leading_whitespace_count];
18191 # must compute a string for this number of spaces
18192 my $leading_string;
18194 # Handle simple case of no tabs
18195 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
18196 || $rOpts_indent_columns <= 0 )
18198 $leading_string = ( ' ' x $leading_whitespace_count );
18201 # Handle entab option
18202 elsif ($rOpts_entab_leading_whitespace) {
18204 $leading_whitespace_count % $rOpts_entab_leading_whitespace;
18207 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
18208 $leading_string = "\t" x $tab_count . ' ' x $space_count;
18211 # Handle option of one tab per level
18213 $leading_string = ( "\t" x $group_level );
18215 $leading_whitespace_count - $group_level * $rOpts_indent_columns;
18217 # shouldn't happen:
18218 if ( $space_count < 0 ) {
18220 "Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
18222 $leading_string = ( ' ' x $leading_whitespace_count );
18225 $leading_string .= ( ' ' x $space_count );
18228 $leading_string_cache[$leading_whitespace_count] = $leading_string;
18229 return $leading_string;
18231 } # end get_leading_string
18233 sub report_anything_unusual {
18235 if ( $outdented_line_count > 0 ) {
18236 write_logfile_entry(
18237 "$outdented_line_count long lines were outdented:\n");
18238 write_logfile_entry(
18239 " First at output line $first_outdented_line_at\n");
18241 if ( $outdented_line_count > 1 ) {
18242 write_logfile_entry(
18243 " Last at output line $last_outdented_line_at\n");
18245 write_logfile_entry(
18246 " use -noll to prevent outdenting, -l=n to increase line length\n"
18248 write_logfile_entry("\n");
18252 #####################################################################
18254 # the Perl::Tidy::FileWriter class writes the output file
18256 #####################################################################
18258 package Perl::Tidy::FileWriter;
18260 # Maximum number of little messages; probably need not be changed.
18261 use constant MAX_NAG_MESSAGES => 6;
18263 sub write_logfile_entry {
18265 my $logger_object = $self->{_logger_object};
18266 if ($logger_object) {
18267 $logger_object->write_logfile_entry(@_);
18273 my ( $line_sink_object, $rOpts, $logger_object ) = @_;
18276 _line_sink_object => $line_sink_object,
18277 _logger_object => $logger_object,
18279 _output_line_number => 1,
18280 _consecutive_blank_lines => 0,
18281 _consecutive_nonblank_lines => 0,
18282 _first_line_length_error => 0,
18283 _max_line_length_error => 0,
18284 _last_line_length_error => 0,
18285 _first_line_length_error_at => 0,
18286 _max_line_length_error_at => 0,
18287 _last_line_length_error_at => 0,
18288 _line_length_error_count => 0,
18289 _max_output_line_length => 0,
18290 _max_output_line_length_at => 0,
18296 $self->{_line_sink_object}->tee_on();
18301 $self->{_line_sink_object}->tee_off();
18304 sub get_output_line_number {
18306 return $self->{_output_line_number};
18309 sub decrement_output_line_number {
18311 $self->{_output_line_number}--;
18314 sub get_consecutive_nonblank_lines {
18316 return $self->{_consecutive_nonblank_lines};
18319 sub reset_consecutive_blank_lines {
18321 $self->{_consecutive_blank_lines} = 0;
18324 sub want_blank_line {
18326 unless ( $self->{_consecutive_blank_lines} ) {
18327 $self->write_blank_code_line();
18331 sub write_blank_code_line {
18333 my $rOpts = $self->{_rOpts};
18335 if ( $self->{_consecutive_blank_lines} >=
18336 $rOpts->{'maximum-consecutive-blank-lines'} );
18337 $self->{_consecutive_blank_lines}++;
18338 $self->{_consecutive_nonblank_lines} = 0;
18339 $self->write_line("\n");
18342 sub write_code_line {
18346 if ( $a =~ /^\s*$/ ) {
18347 my $rOpts = $self->{_rOpts};
18349 if ( $self->{_consecutive_blank_lines} >=
18350 $rOpts->{'maximum-consecutive-blank-lines'} );
18351 $self->{_consecutive_blank_lines}++;
18352 $self->{_consecutive_nonblank_lines} = 0;
18355 $self->{_consecutive_blank_lines} = 0;
18356 $self->{_consecutive_nonblank_lines}++;
18358 $self->write_line($a);
18365 # TODO: go through and see if the test is necessary here
18366 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
18368 $self->{_line_sink_object}->write_line($a);
18370 # This calculation of excess line length ignores any internal tabs
18371 my $rOpts = $self->{_rOpts};
18372 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
18373 if ( $a =~ /^\t+/g ) {
18374 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
18377 # Note that we just incremented output line number to future value
18378 # so we must subtract 1 for current line number
18379 if ( length($a) > 1 + $self->{_max_output_line_length} ) {
18380 $self->{_max_output_line_length} = length($a) - 1;
18381 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
18384 if ( $exceed > 0 ) {
18385 my $output_line_number = $self->{_output_line_number};
18386 $self->{_last_line_length_error} = $exceed;
18387 $self->{_last_line_length_error_at} = $output_line_number - 1;
18388 if ( $self->{_line_length_error_count} == 0 ) {
18389 $self->{_first_line_length_error} = $exceed;
18390 $self->{_first_line_length_error_at} = $output_line_number - 1;
18394 $self->{_last_line_length_error} > $self->{_max_line_length_error} )
18396 $self->{_max_line_length_error} = $exceed;
18397 $self->{_max_line_length_error_at} = $output_line_number - 1;
18400 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
18401 $self->write_logfile_entry(
18402 "Line length exceeded by $exceed characters\n");
18404 $self->{_line_length_error_count}++;
18409 sub report_line_length_errors {
18411 my $rOpts = $self->{_rOpts};
18412 my $line_length_error_count = $self->{_line_length_error_count};
18413 if ( $line_length_error_count == 0 ) {
18414 $self->write_logfile_entry(
18415 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
18416 my $max_output_line_length = $self->{_max_output_line_length};
18417 my $max_output_line_length_at = $self->{_max_output_line_length_at};
18418 $self->write_logfile_entry(
18419 " Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
18425 my $word = ( $line_length_error_count > 1 ) ? "s" : "";
18426 $self->write_logfile_entry(
18427 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
18430 $word = ( $line_length_error_count > 1 ) ? "First" : "";
18431 my $first_line_length_error = $self->{_first_line_length_error};
18432 my $first_line_length_error_at = $self->{_first_line_length_error_at};
18433 $self->write_logfile_entry(
18434 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
18437 if ( $line_length_error_count > 1 ) {
18438 my $max_line_length_error = $self->{_max_line_length_error};
18439 my $max_line_length_error_at = $self->{_max_line_length_error_at};
18440 my $last_line_length_error = $self->{_last_line_length_error};
18441 my $last_line_length_error_at = $self->{_last_line_length_error_at};
18442 $self->write_logfile_entry(
18443 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
18445 $self->write_logfile_entry(
18446 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
18452 #####################################################################
18454 # The Perl::Tidy::Debugger class shows line tokenization
18456 #####################################################################
18458 package Perl::Tidy::Debugger;
18462 my ( $class, $filename ) = @_;
18465 _debug_file => $filename,
18466 _debug_file_opened => 0,
18471 sub really_open_debug_file {
18474 my $debug_file = $self->{_debug_file};
18476 unless ( $fh = IO::File->new("> $debug_file") ) {
18477 warn("can't open $debug_file: $!\n");
18479 $self->{_debug_file_opened} = 1;
18480 $self->{_fh} = $fh;
18482 "Use -dump-token-types (-dtt) to get a list of token type codes\n";
18485 sub close_debug_file {
18488 my $fh = $self->{_fh};
18489 if ( $self->{_debug_file_opened} ) {
18491 eval { $self->{_fh}->close() };
18495 sub write_debug_entry {
18497 # This is a debug dump routine which may be modified as necessary
18498 # to dump tokens on a line-by-line basis. The output will be written
18499 # to the .DEBUG file when the -D flag is entered.
18501 my $line_of_tokens = shift;
18503 my $input_line = $line_of_tokens->{_line_text};
18504 my $rtoken_type = $line_of_tokens->{_rtoken_type};
18505 my $rtokens = $line_of_tokens->{_rtokens};
18506 my $rlevels = $line_of_tokens->{_rlevels};
18507 my $rslevels = $line_of_tokens->{_rslevels};
18508 my $rblock_type = $line_of_tokens->{_rblock_type};
18509 my $input_line_number = $line_of_tokens->{_line_number};
18510 my $line_type = $line_of_tokens->{_line_type};
18514 my $token_str = "$input_line_number: ";
18515 my $reconstructed_original = "$input_line_number: ";
18516 my $block_str = "$input_line_number: ";
18518 #$token_str .= "$line_type: ";
18519 #$reconstructed_original .= "$line_type: ";
18522 my @next_char = ( '"', '"' );
18524 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
18525 my $fh = $self->{_fh};
18527 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
18530 if ( $$rtoken_type[$j] eq 'k' ) {
18531 $pattern .= $$rtokens[$j];
18534 $pattern .= $$rtoken_type[$j];
18536 $reconstructed_original .= $$rtokens[$j];
18537 $block_str .= "($$rblock_type[$j])";
18538 $num = length( $$rtokens[$j] );
18539 my $type_str = $$rtoken_type[$j];
18541 # be sure there are no blank tokens (shouldn't happen)
18542 # This can only happen if a programming error has been made
18543 # because all valid tokens are non-blank
18544 if ( $type_str eq ' ' ) {
18545 print $fh "BLANK TOKEN on the next line\n";
18546 $type_str = $next_char[$i_next];
18547 $i_next = 1 - $i_next;
18550 if ( length($type_str) == 1 ) {
18551 $type_str = $type_str x $num;
18553 $token_str .= $type_str;
18556 # Write what you want here ...
18557 # print $fh "$input_line\n";
18558 # print $fh "$pattern\n";
18559 print $fh "$reconstructed_original\n";
18560 print $fh "$token_str\n";
18562 #print $fh "$block_str\n";
18565 #####################################################################
18567 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
18568 # method for returning the next line to be parsed, as well as a
18569 # 'peek_ahead()' method
18571 # The input parameter is an object with a 'get_line()' method
18572 # which returns the next line to be parsed
18574 #####################################################################
18576 package Perl::Tidy::LineBuffer;
18581 my $line_source_object = shift;
18584 _line_source_object => $line_source_object,
18585 _rlookahead_buffer => [],
18591 my $buffer_index = shift;
18593 my $line_source_object = $self->{_line_source_object};
18594 my $rlookahead_buffer = $self->{_rlookahead_buffer};
18595 if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
18596 $line = $$rlookahead_buffer[$buffer_index];
18599 $line = $line_source_object->get_line();
18600 push( @$rlookahead_buffer, $line );
18608 my $line_source_object = $self->{_line_source_object};
18609 my $rlookahead_buffer = $self->{_rlookahead_buffer};
18611 if ( scalar(@$rlookahead_buffer) ) {
18612 $line = shift @$rlookahead_buffer;
18615 $line = $line_source_object->get_line();
18620 ########################################################################
18622 # the Perl::Tidy::Tokenizer package is essentially a filter which
18623 # reads lines of perl source code from a source object and provides
18624 # corresponding tokenized lines through its get_line() method. Lines
18625 # flow from the source_object to the caller like this:
18627 # source_object --> LineBuffer_object --> Tokenizer --> calling routine
18628 # get_line() get_line() get_line() line_of_tokens
18630 # The source object can be any object with a get_line() method which
18631 # supplies one line (a character string) perl call.
18632 # The LineBuffer object is created by the Tokenizer.
18633 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
18634 # containing one tokenized line for each call to its get_line() method.
18636 # WARNING: This is not a real class yet. Only one tokenizer my be used.
18638 ########################################################################
18640 package Perl::Tidy::Tokenizer;
18644 # Caution: these debug flags produce a lot of output
18645 # They should all be 0 except when debugging small scripts
18647 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
18648 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
18649 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
18650 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
18651 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
18653 my $debug_warning = sub {
18654 print "TOKENIZER_DEBUGGING with key $_[0]\n";
18657 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
18658 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
18659 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
18660 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
18661 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
18668 $level_in_tokenizer
18669 $slevel_in_tokenizer
18670 $nesting_token_string
18671 $nesting_type_string
18672 $nesting_block_string
18673 $nesting_block_flag
18674 $nesting_list_string
18676 $saw_negative_indentation
18678 $last_nonblank_token
18679 $last_nonblank_type
18680 $last_nonblank_block_type
18681 $last_nonblank_container_type
18682 $last_nonblank_type_sequence
18683 $last_last_nonblank_token
18684 $last_last_nonblank_type
18685 $last_last_nonblank_block_type
18686 $last_last_nonblank_container_type
18687 $last_last_nonblank_type_sequence
18688 $last_nonblank_prototype
18697 $allowed_quote_modifiers
18700 @paren_semicolon_count
18701 @paren_structural_type
18704 @brace_structural_type
18705 @brace_statement_type
18708 $square_bracket_depth
18709 @square_bracket_type
18710 @square_bracket_structural_type
18712 @starting_line_of_current_depth
18714 @current_sequence_number
18715 @nesting_sequence_number
18716 @lower_case_labels_at
18720 %user_function_prototype
18721 %saw_function_definition
18725 $unexpected_error_count
18734 $ci_string_in_tokenizer
18735 $continuation_string_in_tokenizer
18736 $in_statement_continuation
18737 $started_looking_for_here_target_at
18738 $nearly_matched_here_target_at
18740 %is_indirect_object_taker
18742 %expecting_operator_token
18743 %expecting_operator_types
18744 %expecting_term_types
18745 %expecting_term_token
18747 %is_block_list_function
18749 %is_file_test_operator
18751 %is_valid_token_type
18753 %is_code_block_token
18755 @opening_brace_names
18756 @closing_brace_names
18757 %is_keyword_taking_list
18758 %is_q_qq_qw_qx_qr_s_y_tr_m
18761 # possible values of operator_expected()
18762 use constant TERM => -1;
18763 use constant UNKNOWN => 0;
18764 use constant OPERATOR => 1;
18766 # possible values of context
18767 use constant SCALAR_CONTEXT => -1;
18768 use constant UNKNOWN_CONTEXT => 0;
18769 use constant LIST_CONTEXT => 1;
18771 # Maximum number of little messages; probably need not be changed.
18772 use constant MAX_NAG_MESSAGES => 6;
18776 # methods to count instances
18778 sub get_count { $_count; }
18779 sub _increment_count { ++$_count }
18780 sub _decrement_count { --$_count }
18784 $_[0]->_decrement_count();
18791 # Note: 'tabs' and 'indent_columns' are temporary and should be
18794 source_object => undef,
18795 debugger_object => undef,
18796 diagnostics_object => undef,
18797 logger_object => undef,
18798 starting_level => undef,
18799 indent_columns => 4,
18801 look_for_hash_bang => 0,
18803 look_for_autoloader => 1,
18804 look_for_selfloader => 1,
18806 my %args = ( %defaults, @_ );
18808 # we are given an object with a get_line() method to supply source lines
18809 my $source_object = $args{source_object};
18811 # we create another object with a get_line() and peek_ahead() method
18812 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
18814 # Tokenizer state data is as follows:
18815 # _rhere_target_list reference to list of here-doc targets
18816 # _here_doc_target the target string for a here document
18817 # _here_quote_character the type of here-doc quoting (" ' ` or none)
18818 # to determine if interpolation is done
18819 # _quote_target character we seek if chasing a quote
18820 # _line_start_quote line where we started looking for a long quote
18821 # _in_here_doc flag indicating if we are in a here-doc
18822 # _in_pod flag set if we are in pod documentation
18823 # _in_error flag set if we saw severe error (binary in script)
18824 # _in_data flag set if we are in __DATA__ section
18825 # _in_end flag set if we are in __END__ section
18826 # _in_format flag set if we are in a format description
18827 # _in_attribute_list flag telling if we are looking for attributes
18828 # _in_quote flag telling if we are chasing a quote
18829 # _starting_level indentation level of first line
18830 # _input_tabstr string denoting one indentation level of input file
18831 # _know_input_tabstr flag indicating if we know _input_tabstr
18832 # _line_buffer_object object with get_line() method to supply source code
18833 # _diagnostics_object place to write debugging information
18834 $tokenizer_self = {
18835 _rhere_target_list => undef,
18837 _here_doc_target => "",
18838 _here_quote_character => "",
18844 _in_attribute_list => 0,
18846 _quote_target => "",
18847 _line_start_quote => -1,
18848 _starting_level => $args{starting_level},
18849 _know_starting_level => defined( $args{starting_level} ),
18850 _tabs => $args{tabs},
18851 _indent_columns => $args{indent_columns},
18852 _look_for_hash_bang => $args{look_for_hash_bang},
18853 _trim_qw => $args{trim_qw},
18854 _input_tabstr => "",
18855 _know_input_tabstr => -1,
18856 _last_line_number => 0,
18857 _saw_perl_dash_P => 0,
18858 _saw_perl_dash_w => 0,
18859 _saw_use_strict => 0,
18860 _look_for_autoloader => $args{look_for_autoloader},
18861 _look_for_selfloader => $args{look_for_selfloader},
18862 _saw_autoloader => 0,
18863 _saw_selfloader => 0,
18864 _saw_hash_bang => 0,
18867 _saw_lc_filehandle => 0,
18868 _started_tokenizing => 0,
18869 _line_buffer_object => $line_buffer_object,
18870 _debugger_object => $args{debugger_object},
18871 _diagnostics_object => $args{diagnostics_object},
18872 _logger_object => $args{logger_object},
18875 prepare_for_a_new_file();
18876 find_starting_indentation_level();
18878 bless $tokenizer_self, $class;
18880 # This is not a full class yet, so die if an attempt is made to
18881 # create more than one object.
18883 if ( _increment_count() > 1 ) {
18885 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
18888 return $tokenizer_self;
18892 # interface to Perl::Tidy::Logger routines
18894 my $logger_object = $tokenizer_self->{_logger_object};
18895 if ($logger_object) {
18896 $logger_object->warning(@_);
18901 my $logger_object = $tokenizer_self->{_logger_object};
18902 if ($logger_object) {
18903 $logger_object->complain(@_);
18907 sub write_logfile_entry {
18908 my $logger_object = $tokenizer_self->{_logger_object};
18909 if ($logger_object) {
18910 $logger_object->write_logfile_entry(@_);
18914 sub interrupt_logfile {
18915 my $logger_object = $tokenizer_self->{_logger_object};
18916 if ($logger_object) {
18917 $logger_object->interrupt_logfile();
18921 sub resume_logfile {
18922 my $logger_object = $tokenizer_self->{_logger_object};
18923 if ($logger_object) {
18924 $logger_object->resume_logfile();
18928 sub increment_brace_error {
18929 my $logger_object = $tokenizer_self->{_logger_object};
18930 if ($logger_object) {
18931 $logger_object->increment_brace_error();
18935 sub report_definite_bug {
18936 my $logger_object = $tokenizer_self->{_logger_object};
18937 if ($logger_object) {
18938 $logger_object->report_definite_bug();
18942 sub brace_warning {
18943 my $logger_object = $tokenizer_self->{_logger_object};
18944 if ($logger_object) {
18945 $logger_object->brace_warning(@_);
18949 sub get_saw_brace_error {
18950 my $logger_object = $tokenizer_self->{_logger_object};
18951 if ($logger_object) {
18952 $logger_object->get_saw_brace_error();
18959 # interface to Perl::Tidy::Diagnostics routines
18960 sub write_diagnostics {
18961 if ( $tokenizer_self->{_diagnostics_object} ) {
18962 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
18966 sub report_tokenization_errors {
18970 my $level = get_indentation_level();
18971 if ( $level != $tokenizer_self->{_starting_level} ) {
18972 warning("final indentation level: $level\n");
18975 check_final_nesting_depths();
18977 if ( $tokenizer_self->{_look_for_hash_bang}
18978 && !$tokenizer_self->{_saw_hash_bang} )
18981 "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
18984 if ( $tokenizer_self->{_in_format} ) {
18985 warning("hit EOF while in format description\n");
18988 # this check may be removed after a year or so
18989 if ( $tokenizer_self->{_saw_lc_filehandle} ) {
18991 warning( <<'EOM' );
18992 ------------------------------------------------------------------------
18993 PLEASE NOTE: If you get this message, it is because perltidy noticed
18994 possible ambiguous syntax at one or more places in your script, as
18995 noted above. The problem is with statements accepting indirect objects,
18996 such as print and printf statements of the form
18998 print bareword ( $etc
19000 Perltidy needs your help in deciding if 'bareword' is a filehandle or a
19001 function call. The problem is the space between 'bareword' and '('. If
19002 'bareword' is a function call, you should remove the trailing space. If
19003 'bareword' is a filehandle, you should avoid the opening paren or else
19004 globally capitalize 'bareword' to be BAREWORD. So the above line
19007 print bareword( $etc # function
19009 print bareword @list # filehandle
19011 print BAREWORD ( $etc # filehandle
19013 If you want to keep the line as it is, and are sure it is correct,
19014 you can use -w=0 to prevent this message.
19015 ------------------------------------------------------------------------
19020 if ( $tokenizer_self->{_in_pod} ) {
19022 # Just write log entry if this is after __END__ or __DATA__
19023 # because this happens to often, and it is not likely to be
19025 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
19026 write_logfile_entry(
19027 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
19033 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
19039 if ( $tokenizer_self->{_in_here_doc} ) {
19040 my $here_doc_target = $tokenizer_self->{_here_doc_target};
19041 if ($here_doc_target) {
19043 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
19048 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
19051 if ($nearly_matched_here_target_at) {
19053 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
19058 if ( $tokenizer_self->{_in_quote} ) {
19059 my $line_start_quote = $tokenizer_self->{_line_start_quote};
19060 my $quote_target = $tokenizer_self->{_quote_target};
19062 ( $tokenizer_self->{_in_attribute_list} )
19066 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
19070 unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
19071 if ( $] < 5.006 ) {
19072 write_logfile_entry("Suggest including '-w parameter'\n");
19075 write_logfile_entry("Suggest including 'use warnings;'\n");
19079 if ( $tokenizer_self->{_saw_perl_dash_P} ) {
19080 write_logfile_entry("Use of -P parameter for defines is discouraged\n");
19083 unless ( $tokenizer_self->{_saw_use_strict} ) {
19084 write_logfile_entry("Suggest including 'use strict;'\n");
19087 # it is suggested that lables have at least one upper case character
19088 # for legibility and to avoid code breakage as new keywords are introduced
19089 if (@lower_case_labels_at) {
19090 my $num = @lower_case_labels_at;
19091 write_logfile_entry(
19092 "Suggest using upper case characters in label(s)\n");
19094 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
19098 sub report_v_string {
19100 # warn if this version can't handle v-strings
19102 $saw_v_string = $input_line_number;
19103 if ( $] < 5.006 ) {
19105 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
19110 sub get_input_line_number {
19111 return $tokenizer_self->{_last_line_number};
19114 # returns the next tokenized line
19119 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
19121 return undef unless ($input_line);
19123 $tokenizer_self->{_last_line_number}++;
19125 # Find and remove what characters terminate this line, including any
19127 my $input_line_separator = "";
19128 if ( chomp($input_line) ) { $input_line_separator = $/ }
19130 # TODO: what other characters should be included here?
19131 if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
19132 $input_line_separator = $2 . $input_line_separator;
19135 # for backwards compatability we keep the line text terminated with
19136 # a newline character
19137 $input_line .= "\n";
19139 my $input_line_number = $tokenizer_self->{_last_line_number};
19141 # create a data structure describing this line which will be
19142 # returned to the caller.
19144 # _line_type codes are:
19145 # SYSTEM - system-specific code before hash-bang line
19146 # CODE - line of perl code (including comments)
19147 # POD_START - line starting pod, such as '=head'
19148 # POD - pod documentation text
19149 # POD_END - last line of pod section, '=cut'
19150 # HERE - text of here-document
19151 # HERE_END - last line of here-doc (target word)
19152 # FORMAT - format section
19153 # FORMAT_END - last line of format section, '.'
19154 # DATA_START - __DATA__ line
19155 # DATA - unidentified text following __DATA__
19156 # END_START - __END__ line
19157 # END - unidentified text following __END__
19158 # ERROR - we are in big trouble, probably not a perl script
19161 # _curly_brace_depth - depth of curly braces at start of line
19162 # _square_bracket_depth - depth of square brackets at start of line
19163 # _paren_depth - depth of parens at start of line
19164 # _starting_in_quote - this line continues a multi-line quote
19165 # (so don't trim leading blanks!)
19166 # _ending_in_quote - this line ends in a multi-line quote
19167 # (so don't trim trailing blanks!)
19168 my $line_of_tokens = {
19169 _line_type => 'EOF',
19170 _line_text => $input_line,
19171 _line_number => $input_line_number,
19172 _rtoken_type => undef,
19175 _rslevels => undef,
19176 _rblock_type => undef,
19177 _rcontainer_type => undef,
19178 _rcontainer_environment => undef,
19179 _rtype_sequence => undef,
19180 _rnesting_tokens => undef,
19181 _rci_levels => undef,
19182 _rnesting_blocks => undef,
19183 _python_indentation_level => -1, ## 0,
19184 _starting_in_quote =>
19185 ( $tokenizer_self->{_in_quote} && ( $quote_type eq 'Q' ) ),
19186 _ending_in_quote => 0,
19187 _curly_brace_depth => $brace_depth,
19188 _square_bracket_depth => $square_bracket_depth,
19189 _paren_depth => $paren_depth,
19190 _quote_character => '',
19193 # must print line unchanged if we are in a here document
19194 if ( $tokenizer_self->{_in_here_doc} ) {
19196 $line_of_tokens->{_line_type} = 'HERE';
19197 my $here_doc_target = $tokenizer_self->{_here_doc_target};
19198 my $here_quote_character = $tokenizer_self->{_here_quote_character};
19199 my $candidate_target = $input_line;
19200 chomp $candidate_target;
19201 if ( $candidate_target eq $here_doc_target ) {
19202 $nearly_matched_here_target_at = undef;
19203 $line_of_tokens->{_line_type} = 'HERE_END';
19204 write_logfile_entry("Exiting HERE document $here_doc_target\n");
19206 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
19207 if (@$rhere_target_list) { # there can be multiple here targets
19208 ( $here_doc_target, $here_quote_character ) =
19209 @{ shift @$rhere_target_list };
19210 $tokenizer_self->{_here_doc_target} = $here_doc_target;
19211 $tokenizer_self->{_here_quote_character} =
19212 $here_quote_character;
19213 write_logfile_entry(
19214 "Entering HERE document $here_doc_target\n");
19215 $nearly_matched_here_target_at = undef;
19216 $started_looking_for_here_target_at = $input_line_number;
19219 $tokenizer_self->{_in_here_doc} = 0;
19220 $tokenizer_self->{_here_doc_target} = "";
19221 $tokenizer_self->{_here_quote_character} = "";
19225 # check for error of extra whitespace
19226 # note for PERL6: leading whitespace is allowed
19228 $candidate_target =~ s/\s*$//;
19229 $candidate_target =~ s/^\s*//;
19230 if ( $candidate_target eq $here_doc_target ) {
19231 $nearly_matched_here_target_at = $input_line_number;
19234 return $line_of_tokens;
19237 # must print line unchanged if we are in a format section
19238 elsif ( $tokenizer_self->{_in_format} ) {
19240 if ( $input_line =~ /^\.[\s#]*$/ ) {
19241 write_logfile_entry("Exiting format section\n");
19242 $tokenizer_self->{_in_format} = 0;
19243 $line_of_tokens->{_line_type} = 'FORMAT_END';
19246 $line_of_tokens->{_line_type} = 'FORMAT';
19248 return $line_of_tokens;
19251 # must print line unchanged if we are in pod documentation
19252 elsif ( $tokenizer_self->{_in_pod} ) {
19254 $line_of_tokens->{_line_type} = 'POD';
19255 if ( $input_line =~ /^=cut/ ) {
19256 $line_of_tokens->{_line_type} = 'POD_END';
19257 write_logfile_entry("Exiting POD section\n");
19258 $tokenizer_self->{_in_pod} = 0;
19260 if ( $input_line =~ /^\#\!.*perl\b/ ) {
19262 "Hash-bang in pod can cause older versions of perl to fail! \n"
19266 return $line_of_tokens;
19269 # must print line unchanged if we have seen a severe error (i.e., we
19270 # are seeing illegal tokens and connot continue. Syntax errors do
19271 # not pass this route). Calling routine can decide what to do, but
19272 # the default can be to just pass all lines as if they were after __END__
19273 elsif ( $tokenizer_self->{_in_error} ) {
19274 $line_of_tokens->{_line_type} = 'ERROR';
19275 return $line_of_tokens;
19278 # print line unchanged if we are __DATA__ section
19279 elsif ( $tokenizer_self->{_in_data} ) {
19281 # ...but look for POD
19282 # Note that the _in_data and _in_end flags remain set
19283 # so that we return to that state after seeing the
19284 # end of a pod section
19285 if ( $input_line =~ /^=(?!cut)/ ) {
19286 $line_of_tokens->{_line_type} = 'POD_START';
19287 write_logfile_entry("Entering POD section\n");
19288 $tokenizer_self->{_in_pod} = 1;
19289 return $line_of_tokens;
19292 $line_of_tokens->{_line_type} = 'DATA';
19293 return $line_of_tokens;
19297 # print line unchanged if we are in __END__ section
19298 elsif ( $tokenizer_self->{_in_end} ) {
19300 # ...but look for POD
19301 # Note that the _in_data and _in_end flags remain set
19302 # so that we return to that state after seeing the
19303 # end of a pod section
19304 if ( $input_line =~ /^=(?!cut)/ ) {
19305 $line_of_tokens->{_line_type} = 'POD_START';
19306 write_logfile_entry("Entering POD section\n");
19307 $tokenizer_self->{_in_pod} = 1;
19308 return $line_of_tokens;
19311 $line_of_tokens->{_line_type} = 'END';
19312 return $line_of_tokens;
19316 # check for a hash-bang line if we haven't seen one
19317 if ( !$tokenizer_self->{_saw_hash_bang} ) {
19318 if ( $input_line =~ /^\#\!.*perl\b/ ) {
19319 $tokenizer_self->{_saw_hash_bang} = $input_line_number;
19321 # check for -w and -P flags
19322 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
19323 $tokenizer_self->{_saw_perl_dash_P} = 1;
19326 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
19327 $tokenizer_self->{_saw_perl_dash_w} = 1;
19330 if ( ( $input_line_number > 1 )
19331 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
19334 # this is helpful for VMS systems; we may have accidentally
19335 # tokenized some DCL commands
19336 if ( $tokenizer_self->{_started_tokenizing} ) {
19338 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
19342 complain("Useless hash-bang after line 1\n");
19346 # Report the leading hash-bang as a system line
19347 # This will prevent -dac from deleting it
19349 $line_of_tokens->{_line_type} = 'SYSTEM';
19350 return $line_of_tokens;
19355 # wait for a hash-bang before parsing if the user invoked us with -x
19356 if ( $tokenizer_self->{_look_for_hash_bang}
19357 && !$tokenizer_self->{_saw_hash_bang} )
19359 $line_of_tokens->{_line_type} = 'SYSTEM';
19360 return $line_of_tokens;
19363 # a first line of the form ': #' will be marked as SYSTEM
19364 # since lines of this form may be used by tcsh
19365 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
19366 $line_of_tokens->{_line_type} = 'SYSTEM';
19367 return $line_of_tokens;
19370 # now we know that it is ok to tokenize the line...
19371 # the line tokenizer will modify any of these private variables:
19372 # _rhere_target_list
19379 my $ending_in_quote_last = $tokenizer_self->{_in_quote};
19380 tokenize_this_line($line_of_tokens);
19382 # Now finish defining the return structure and return it
19383 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
19385 # handle severe error (binary data in script)
19386 if ( $tokenizer_self->{_in_error} ) {
19387 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages
19388 warning("Giving up after error\n");
19389 $line_of_tokens->{_line_type} = 'ERROR';
19390 reset_indentation_level(0); # avoid error messages
19391 return $line_of_tokens;
19394 # handle start of pod documentation
19395 if ( $tokenizer_self->{_in_pod} ) {
19397 # This gets tricky..above a __DATA__ or __END__ section, perl
19398 # accepts '=cut' as the start of pod section. But afterwards,
19399 # only pod utilities see it and they may ignore an =cut without
19400 # leading =head. In any case, this isn't good.
19401 if ( $input_line =~ /^=cut\b/ ) {
19402 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
19403 complain("=cut while not in pod ignored\n");
19404 $tokenizer_self->{_in_pod} = 0;
19405 $line_of_tokens->{_line_type} = 'POD_STOP';
19408 $line_of_tokens->{_line_type} = 'POD_END';
19410 "=cut starts a pod section .. this can fool pod utilities.\n"
19412 write_logfile_entry("Entering POD section\n");
19417 $line_of_tokens->{_line_type} = 'POD_START';
19418 write_logfile_entry("Entering POD section\n");
19421 return $line_of_tokens;
19424 # update indentation levels for log messages
19425 if ( $input_line !~ /^\s*$/ ) {
19426 my $rlevels = $line_of_tokens->{_rlevels};
19427 my $structural_indentation_level = $$rlevels[0];
19428 my ( $python_indentation_level, $msg ) =
19429 find_indentation_level( $input_line, $structural_indentation_level );
19430 if ($msg) { write_logfile_entry("$msg") }
19431 if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
19432 $line_of_tokens->{_python_indentation_level} =
19433 $python_indentation_level;
19437 # see if this line contains here doc targets
19438 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
19439 if (@$rhere_target_list) {
19441 #my $here_doc_target = shift @$rhere_target_list;
19442 my ( $here_doc_target, $here_quote_character ) =
19443 @{ shift @$rhere_target_list };
19444 $tokenizer_self->{_in_here_doc} = 1;
19445 $tokenizer_self->{_here_doc_target} = $here_doc_target;
19446 $tokenizer_self->{_here_quote_character} = $here_quote_character;
19447 write_logfile_entry("Entering HERE document $here_doc_target\n");
19448 $started_looking_for_here_target_at = $input_line_number;
19451 # NOTE: __END__ and __DATA__ statements are written unformatted
19452 # because they can theoretically contain additional characters
19453 # which are not tokenized (and cannot be read with <DATA> either!).
19454 if ( $tokenizer_self->{_in_data} ) {
19455 $line_of_tokens->{_line_type} = 'DATA_START';
19456 write_logfile_entry("Starting __DATA__ section\n");
19457 $tokenizer_self->{_saw_data} = 1;
19459 # keep parsing after __DATA__ if use SelfLoader was seen
19460 if ( $tokenizer_self->{_saw_selfloader} ) {
19461 $tokenizer_self->{_in_data} = 0;
19462 write_logfile_entry(
19463 "SelfLoader seen, continuing; -nlsl deactivates\n");
19466 return $line_of_tokens;
19469 elsif ( $tokenizer_self->{_in_end} ) {
19470 $line_of_tokens->{_line_type} = 'END_START';
19471 write_logfile_entry("Starting __END__ section\n");
19472 $tokenizer_self->{_saw_end} = 1;
19474 # keep parsing after __END__ if use AutoLoader was seen
19475 if ( $tokenizer_self->{_saw_autoloader} ) {
19476 $tokenizer_self->{_in_end} = 0;
19477 write_logfile_entry(
19478 "AutoLoader seen, continuing; -nlal deactivates\n");
19480 return $line_of_tokens;
19483 # now, finally, we know that this line is type 'CODE'
19484 $line_of_tokens->{_line_type} = 'CODE';
19486 # remember if we have seen any real code
19487 if ( !$tokenizer_self->{_started_tokenizing}
19488 && $input_line !~ /^\s*$/
19489 && $input_line !~ /^\s*#/ )
19491 $tokenizer_self->{_started_tokenizing} = 1;
19494 if ( $tokenizer_self->{_debugger_object} ) {
19495 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
19498 # Note: if keyword 'format' occurs in this line code, it is still CODE
19499 # (keyword 'format' need not start a line)
19500 if ( $tokenizer_self->{_in_format} ) {
19501 write_logfile_entry("Entering format section\n");
19504 if ( $tokenizer_self->{_in_quote}
19505 and ( $tokenizer_self->{_line_start_quote} < 0 ) )
19508 if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
19509 $tokenizer_self->{_line_start_quote} = $input_line_number;
19510 $tokenizer_self->{_quote_target} = $quote_target;
19511 write_logfile_entry(
19512 "Start multi-line quote or pattern ending in $quote_target\n");
19515 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
19516 and !$tokenizer_self->{_in_quote} )
19518 $tokenizer_self->{_line_start_quote} = -1;
19519 write_logfile_entry("End of multi-line quote or pattern\n");
19522 # we are returning a line of CODE
19523 return $line_of_tokens;
19526 sub find_starting_indentation_level {
19528 my $starting_level = 0;
19529 my $know_input_tabstr = -1; # flag for find_indentation_level
19531 # use value if given as parameter
19532 if ( $tokenizer_self->{_know_starting_level} ) {
19533 $starting_level = $tokenizer_self->{_starting_level};
19536 # if we know there is a hash_bang line, the level must be zero
19537 elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
19538 $tokenizer_self->{_know_starting_level} = 1;
19541 # otherwise figure it out from the input file
19545 my $structural_indentation_level = -1; # flag for find_indentation_level
19549 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
19552 # if first line is #! then assume starting level is zero
19553 if ( $i == 1 && $line =~ /^\#\!/ ) {
19554 $starting_level = 0;
19557 next if ( $line =~ /^\s*#/ ); # must not be comment
19558 next if ( $line =~ /^\s*$/ ); # must not be blank
19559 ( $starting_level, $msg ) =
19560 find_indentation_level( $line, $structural_indentation_level );
19561 if ($msg) { write_logfile_entry("$msg") }
19564 $msg = "Line $i implies starting-indentation-level = $starting_level\n";
19566 if ( $starting_level > 0 ) {
19568 my $input_tabstr = $tokenizer_self->{_input_tabstr};
19569 if ( $input_tabstr eq "\t" ) {
19570 $msg .= "by guessing input tabbing uses 1 tab per level\n";
19573 my $cols = length($input_tabstr);
19575 "by guessing input tabbing uses $cols blanks per level\n";
19578 write_logfile_entry("$msg");
19580 $tokenizer_self->{_starting_level} = $starting_level;
19581 reset_indentation_level($starting_level);
19584 # Find indentation level given a input line. At the same time, try to
19585 # figure out the input tabbing scheme.
19587 # There are two types of calls:
19589 # Type 1: $structural_indentation_level < 0
19590 # In this case we have to guess $input_tabstr to figure out the level.
19592 # Type 2: $structural_indentation_level >= 0
19593 # In this case the level of this line is known, and this routine can
19594 # update the tabbing string, if still unknown, to make the level correct.
19596 sub find_indentation_level {
19597 my ( $line, $structural_indentation_level ) = @_;
19601 my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
19602 my $input_tabstr = $tokenizer_self->{_input_tabstr};
19604 # find leading whitespace
19605 my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
19607 # make first guess at input tabbing scheme if necessary
19608 if ( $know_input_tabstr < 0 ) {
19610 $know_input_tabstr = 0;
19612 if ( $tokenizer_self->{_tabs} ) {
19613 $input_tabstr = "\t";
19614 if ( length($leading_whitespace) > 0 ) {
19615 if ( $leading_whitespace !~ /\t/ ) {
19617 my $cols = $tokenizer_self->{_indent_columns};
19619 if ( length($leading_whitespace) < $cols ) {
19620 $cols = length($leading_whitespace);
19622 $input_tabstr = " " x $cols;
19627 $input_tabstr = " " x $tokenizer_self->{_indent_columns};
19629 if ( length($leading_whitespace) > 0 ) {
19630 if ( $leading_whitespace =~ /^\t/ ) {
19631 $input_tabstr = "\t";
19635 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
19636 $tokenizer_self->{_input_tabstr} = $input_tabstr;
19639 # determine the input tabbing scheme if possible
19640 if ( ( $know_input_tabstr == 0 )
19641 && ( length($leading_whitespace) > 0 )
19642 && ( $structural_indentation_level > 0 ) )
19644 my $saved_input_tabstr = $input_tabstr;
19646 # check for common case of one tab per indentation level
19647 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
19648 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
19649 $input_tabstr = "\t";
19650 $msg = "Guessing old indentation was tab character\n";
19656 # detab any tabs based on 8 blanks per tab
19658 if ( $leading_whitespace =~ s/^\t+/ /g ) {
19659 $entabbed = "entabbed";
19662 # now compute tabbing from number of spaces
19664 length($leading_whitespace) / $structural_indentation_level;
19665 if ( $columns == int $columns ) {
19667 "Guessing old indentation was $columns $entabbed spaces\n";
19670 $columns = int $columns;
19672 "old indentation is unclear, using $columns $entabbed spaces\n";
19674 $input_tabstr = " " x $columns;
19676 $know_input_tabstr = 1;
19677 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
19678 $tokenizer_self->{_input_tabstr} = $input_tabstr;
19680 # see if mistakes were made
19681 if ( ( $tokenizer_self->{_starting_level} > 0 )
19682 && !$tokenizer_self->{_know_starting_level} )
19685 if ( $input_tabstr ne $saved_input_tabstr ) {
19687 "I made a bad starting level guess; rerun with a value for -sil \n"
19693 # use current guess at input tabbing to get input indentation level
19695 # Patch to handle a common case of entabbed leading whitespace
19696 # If the leading whitespace equals 4 spaces and we also have
19697 # tabs, detab the input whitespace assuming 8 spaces per tab.
19698 if ( length($input_tabstr) == 4 ) {
19699 $leading_whitespace =~ s/^\t+/ /g;
19702 if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
19705 while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
19711 return ( $level, $msg );
19714 sub dump_token_types {
19718 # This should be the latest list of token types in use
19719 # adding NEW_TOKENS: add a comment here
19720 print $fh <<'END_OF_LIST';
19722 Here is a list of the token types currently used for lines of type 'CODE'.
19723 For the following tokens, the "type" of a token is just the token itself.
19725 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
19726 ( ) <= >= == =~ !~ != ++ -- /= x=
19727 ... **= <<= >>= &&= ||= //= <=>
19728 , + - / * | % ! x ~ = \ ? : . < > ^ &
19730 The following additional token types are defined:
19733 b blank (white space)
19734 { indent: opening structural curly brace or square bracket or paren
19735 (code block, anonymous hash reference, or anonymous array reference)
19736 } outdent: right structural curly brace or square bracket or paren
19737 [ left non-structural square bracket (enclosing an array index)
19738 ] right non-structural square bracket
19739 ( left non-structural paren (all but a list right of an =)
19740 ) right non-structural parena
19741 L left non-structural curly brace (enclosing a key)
19742 R right non-structural curly brace
19743 ; terminal semicolon
19744 f indicates a semicolon in a "for" statement
19745 h here_doc operator <<
19747 Q indicates a quote or pattern
19748 q indicates a qw quote block
19750 C user-defined constant or constant function (with void prototype = ())
19751 U user-defined function taking parameters
19752 G user-defined function taking block parameter (like grep/map/eval)
19753 M (unused, but reserved for subroutine definition name)
19754 P (unused, but -html uses it to label pod text)
19755 t type indicater such as %,$,@,*,&,sub
19756 w bare word (perhaps a subroutine call)
19757 i identifier of some type (with leading %, $, @, *, &, sub, -> )
19760 F a file test operator (like -e)
19762 Z identifier in indirect object slot: may be file handle, object
19763 J LABEL: code block label
19764 j LABEL after next, last, redo, goto
19767 pp pre-increment operator ++
19768 mm pre-decrement operator --
19769 A : used as attribute separator
19771 Here are the '_line_type' codes used internally:
19772 SYSTEM - system-specific code before hash-bang line
19773 CODE - line of perl code (including comments)
19774 POD_START - line starting pod, such as '=head'
19775 POD - pod documentation text
19776 POD_END - last line of pod section, '=cut'
19777 HERE - text of here-document
19778 HERE_END - last line of here-doc (target word)
19779 FORMAT - format section
19780 FORMAT_END - last line of format section, '.'
19781 DATA_START - __DATA__ line
19782 DATA - unidentified text following __DATA__
19783 END_START - __END__ line
19784 END - unidentified text following __END__
19785 ERROR - we are in big trouble, probably not a perl script
19789 # This is a currently unused debug routine
19790 sub dump_functions {
19794 foreach $pkg ( keys %is_user_function ) {
19795 print $fh "\nnon-constant subs in package $pkg\n";
19797 foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
19799 if ( $is_block_list_function{$pkg}{$sub} ) {
19800 $msg = 'block_list';
19803 if ( $is_block_function{$pkg}{$sub} ) {
19806 print $fh "$sub $msg\n";
19810 foreach $pkg ( keys %is_constant ) {
19811 print $fh "\nconstants and constant subs in package $pkg\n";
19813 foreach $sub ( keys %{ $is_constant{$pkg} } ) {
19814 print $fh "$sub\n";
19819 sub prepare_for_a_new_file {
19820 $saw_negative_indentation = 0;
19821 $id_scan_state = '';
19822 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
19823 $last_nonblank_token = ';'; # the only possible starting state which
19824 $last_nonblank_type = ';'; # will make a leading brace a code block
19825 $last_nonblank_block_type = '';
19826 $last_nonblank_container_type = '';
19827 $last_nonblank_type_sequence = '';
19828 $last_last_nonblank_token = ';';
19829 $last_last_nonblank_type = ';';
19830 $last_last_nonblank_block_type = '';
19831 $last_last_nonblank_container_type = '';
19832 $last_last_nonblank_type_sequence = '';
19833 $last_nonblank_prototype = "";
19835 $in_attribute_list = 0; # ATTRS
19836 $in_quote = 0; # flag telling if we are chasing a quote, and what kind
19838 $quote_character = ""; # character we seek if chasing a quote
19839 $quote_pos = 0; # next character index to check for case of alphanum char
19841 $allowed_quote_modifiers = "";
19844 $square_bracket_depth = 0;
19845 $current_package = "main";
19846 @current_depth[ 0 .. $#closing_brace_names ] =
19847 (0) x scalar @closing_brace_names;
19848 @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
19849 ( 0 .. $#closing_brace_names );
19850 @current_sequence_number = ();
19852 $paren_type[$paren_depth] = '';
19853 $paren_semicolon_count[$paren_depth] = 0;
19854 $brace_type[$brace_depth] = ';'; # identify opening brace as code block
19855 $brace_structural_type[$brace_depth] = '';
19856 $brace_statement_type[$brace_depth] = "";
19857 $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
19858 $paren_structural_type[$brace_depth] = '';
19859 $square_bracket_type[$square_bracket_depth] = '';
19860 $square_bracket_structural_type[$square_bracket_depth] = '';
19861 $brace_package[$paren_depth] = $current_package;
19862 %is_constant = (); # user-defined constants
19863 %is_user_function = (); # user-defined functions
19864 %user_function_prototype = (); # their prototypes
19865 %is_block_function = ();
19866 %is_block_list_function = ();
19867 %saw_function_definition = ();
19868 $unexpected_error_count = 0;
19870 $context = UNKNOWN_CONTEXT;
19871 @slevel_stack = ();
19872 $ci_string_in_tokenizer = "";
19873 $continuation_string_in_tokenizer = "0";
19874 $in_statement_continuation = 0;
19875 @lower_case_labels_at = ();
19876 $saw_v_string = 0; # for warning of v-strings on older perl
19877 $nesting_token_string = "";
19878 $nesting_type_string = "";
19879 $nesting_block_string = '1'; # initially in a block
19880 $nesting_block_flag = 1;
19881 $nesting_list_string = '0'; # initially not in a list
19882 $nesting_list_flag = 0; # initially not in a list
19883 $nearly_matched_here_target_at = undef;
19886 sub get_quote_target {
19887 return matching_end_token($quote_character);
19890 sub get_indentation_level {
19891 return $level_in_tokenizer;
19894 sub reset_indentation_level {
19895 $level_in_tokenizer = $_[0];
19896 $slevel_in_tokenizer = $_[0];
19897 push @slevel_stack, $slevel_in_tokenizer;
19900 { # begin tokenize_this_line
19902 use constant BRACE => 0;
19903 use constant SQUARE_BRACKET => 1;
19904 use constant PAREN => 2;
19905 use constant QUESTION_COLON => 3;
19908 $block_type, $container_type, $expecting,
19909 $here_doc_target, $here_quote_character, $i,
19910 $i_tok, $last_nonblank_i, $next_tok,
19911 $next_type, $prototype, $rtoken_map,
19912 $rtoken_type, $rtokens, $tok,
19913 $type, $type_sequence,
19916 my @output_token_list = (); # stack of output token indexes
19917 my @output_token_type = (); # token types
19918 my @output_block_type = (); # types of code block
19919 my @output_container_type = (); # paren types, such as if, elsif, ..
19920 my @output_type_sequence = (); # nesting sequential number
19922 my @here_target_list = (); # list of here-doc target strings
19924 # ------------------------------------------------------------
19925 # beginning of various scanner interfaces to simplify coding
19926 # ------------------------------------------------------------
19927 sub scan_bare_identifier {
19928 ( $i, $tok, $type, $prototype ) =
19929 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
19933 sub scan_identifier {
19934 ( $i, $tok, $type, $id_scan_state, $identifier ) =
19935 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens );
19939 ( $i, $tok, $type, $id_scan_state ) =
19940 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
19947 ( $i, $type, $number ) =
19948 scan_number_do( $input_line, $i, $rtoken_map, $type );
19951 # a sub to warn if token found where term expected
19952 sub error_if_expecting_TERM {
19953 if ( $expecting == TERM ) {
19954 if ( $really_want_term{$last_nonblank_type} ) {
19955 unexpected( $tok, "term", $i_tok, $last_nonblank_i );
19961 # a sub to warn if token found where operator expected
19962 sub error_if_expecting_OPERATOR {
19963 if ( $expecting == OPERATOR ) {
19964 my $thing = defined $_[0] ? $_[0] : $tok;
19965 unexpected( $thing, "operator", $i_tok, $last_nonblank_i );
19966 if ( $i_tok == 0 ) {
19967 interrupt_logfile();
19968 warning("Missing ';' above?\n");
19975 # ------------------------------------------------------------
19976 # end scanner interfaces
19977 # ------------------------------------------------------------
19979 my %is_for_foreach;
19980 @_ = qw(for foreach);
19981 @is_for_foreach{@_} = (1) x scalar(@_);
19985 @is_my_our{@_} = (1) x scalar(@_);
19987 # These keywords may introduce blocks after parenthesized expressions,
19989 # keyword ( .... ) { BLOCK }
19990 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
19991 my %is_blocktype_with_paren;
19992 @_ = qw(if elsif unless while until for foreach switch case given when);
19993 @is_blocktype_with_paren{@_} = (1) x scalar(@_);
19995 # ------------------------------------------------------------
19996 # begin hash of code for handling most token types
19997 # ------------------------------------------------------------
19998 my $tokenization_code = {
20000 # no special code for these types yet, but syntax checks
20033 error_if_expecting_TERM()
20034 if ( $expecting == TERM );
20037 error_if_expecting_TERM()
20038 if ( $expecting == TERM );
20042 # start looking for a scalar
20043 error_if_expecting_OPERATOR("Scalar")
20044 if ( $expecting == OPERATOR );
20047 if ( $identifier eq '$^W' ) {
20048 $tokenizer_self->{_saw_perl_dash_w} = 1;
20051 # Check for indentifier in indirect object slot
20052 # (vorboard.pl, sort.t). Something like:
20053 # /^(print|printf|sort|exec|system)$/
20055 $is_indirect_object_taker{$last_nonblank_token}
20057 || ( ( $last_nonblank_token eq '(' )
20058 && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
20059 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
20068 $paren_semicolon_count[$paren_depth] = 0;
20070 $container_type = $want_paren;
20074 $container_type = $last_nonblank_token;
20076 # We can check for a syntax error here of unexpected '(',
20077 # but this is going to get messy...
20079 $expecting == OPERATOR
20081 # be sure this is not a method call of the form
20082 # &method(...), $method->(..), &{method}(...),
20083 # $ref[2](list) is ok & short for $ref[2]->(list)
20084 # NOTE: at present, braces in something like &{ xxx }
20085 # are not marked as a block, we might have a method call
20086 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
20091 # ref: camel 3 p 703.
20092 if ( $last_last_nonblank_token eq 'do' ) {
20094 "do SUBROUTINE is deprecated; consider & or -> notation\n"
20099 # if this is an empty list, (), then it is not an
20100 # error; for example, we might have a constant pi and
20101 # invoke it with pi() or just pi;
20102 my ( $next_nonblank_token, $i_next ) =
20103 find_next_nonblank_token( $i, $rtokens );
20104 if ( $next_nonblank_token ne ')' ) {
20106 error_if_expecting_OPERATOR('(');
20108 if ( $last_nonblank_type eq 'C' ) {
20110 "$last_nonblank_token has a void prototype\n";
20112 elsif ( $last_nonblank_type eq 'i' ) {
20114 && $last_nonblank_token =~ /^\$/ )
20117 "Do you mean '$last_nonblank_token->(' ?\n";
20121 interrupt_logfile();
20125 } ## end if ( $next_nonblank_token...
20126 } ## end else [ if ( $last_last_nonblank_token...
20127 } ## end if ( $expecting == OPERATOR...
20129 $paren_type[$paren_depth] = $container_type;
20130 $type_sequence = increase_nesting_depth( PAREN, $i_tok );
20132 # propagate types down through nested parens
20133 # for example: the second paren in 'if ((' would be structural
20134 # since the first is.
20136 if ( $last_nonblank_token eq '(' ) {
20137 $type = $last_nonblank_type;
20140 # We exclude parens as structural after a ',' because it
20141 # causes subtle problems with continuation indentation for
20142 # something like this, where the first 'or' will not get
20147 # ( not defined $check )
20149 # or $check eq "new"
20150 # or $check eq "old",
20153 # Likewise, we exclude parens where a statement can start
20154 # because of problems with continuation indentation, like
20157 # ($firstline =~ /^#\!.*perl/)
20158 # and (print $File::Find::name, "\n")
20161 # (ref($usage_fref) =~ /CODE/)
20163 # : (&blast_usage, &blast_params, &blast_general_params);
20169 if ( $last_nonblank_type eq ')' ) {
20171 "Syntax error? found token '$last_nonblank_type' then '('\n"
20174 $paren_structural_type[$paren_depth] = $type;
20178 $type_sequence = decrease_nesting_depth( PAREN, $i_tok );
20180 if ( $paren_structural_type[$paren_depth] eq '{' ) {
20184 $container_type = $paren_type[$paren_depth];
20186 # /^(for|foreach)$/
20187 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
20188 my $num_sc = $paren_semicolon_count[$paren_depth];
20189 if ( $num_sc > 0 && $num_sc != 2 ) {
20190 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
20194 if ( $paren_depth > 0 ) { $paren_depth-- }
20197 if ( $last_nonblank_type eq ',' ) {
20198 complain("Repeated ','s \n");
20201 # patch for operator_expected: note if we are in the list (use.t)
20202 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
20203 ## FIXME: need to move this elsewhere, perhaps check after a '('
20204 ## elsif ($last_nonblank_token eq '(') {
20205 ## warning("Leading ','s illegal in some versions of perl\n");
20209 $context = UNKNOWN_CONTEXT;
20210 $statement_type = '';
20212 # /^(for|foreach)$/
20213 if ( $is_for_foreach{ $paren_type[$paren_depth] } )
20214 { # mark ; in for loop
20216 # Be careful: we do not want a semicolon such as the
20217 # following to be included:
20219 # for (sort {strcoll($a,$b);} keys %investments) {
20221 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
20222 && $square_bracket_depth ==
20223 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
20227 $paren_semicolon_count[$paren_depth]++;
20233 error_if_expecting_OPERATOR("String")
20234 if ( $expecting == OPERATOR );
20237 $allowed_quote_modifiers = "";
20240 error_if_expecting_OPERATOR("String")
20241 if ( $expecting == OPERATOR );
20244 $allowed_quote_modifiers = "";
20247 error_if_expecting_OPERATOR("String")
20248 if ( $expecting == OPERATOR );
20251 $allowed_quote_modifiers = "";
20256 if ( $expecting == UNKNOWN ) { # indeterminte, must guess..
20258 ( $is_pattern, $msg ) =
20259 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map );
20262 write_diagnostics("DIVIDE:$msg\n");
20263 write_logfile_entry($msg);
20266 else { $is_pattern = ( $expecting == TERM ) }
20271 $allowed_quote_modifiers = '[cgimosx]';
20273 else { # not a pattern; check for a /= token
20275 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=
20281 #DEBUG - collecting info on what tokens follow a divide
20282 # for development of guessing algorithm
20283 #if ( numerator_expected( $i, $rtokens ) < 0 ) {
20284 # #write_diagnostics( "DIVIDE? $input_line\n" );
20290 # if we just saw a ')', we will label this block with
20291 # its type. We need to do this to allow sub
20292 # code_block_type to determine if this brace starts a
20293 # code block or anonymous hash. (The type of a paren
20294 # pair is the preceding token, such as 'if', 'else',
20296 $container_type = "";
20298 # ATTRS: for a '{' following an attribute list, reset
20299 # things to look like we just saw the sub name
20300 if ( $statement_type =~ /^sub/ ) {
20301 $last_nonblank_token = $statement_type;
20302 $last_nonblank_type = 'i';
20303 $statement_type = "";
20306 # patch for SWITCH/CASE: hide these keywords from an immediately
20307 # following opening brace
20308 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
20309 && $statement_type eq $last_nonblank_token )
20311 $last_nonblank_token = ";";
20314 elsif ( $last_nonblank_token eq ')' ) {
20315 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
20317 # defensive move in case of a nesting error (pbug.t)
20318 # in which this ')' had no previous '('
20319 # this nesting error will have been caught
20320 if ( !defined($last_nonblank_token) ) {
20321 $last_nonblank_token = 'if';
20324 # check for syntax error here;
20325 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
20326 my $list = join( ' ', sort keys %is_blocktype_with_paren );
20328 "syntax error at ') {', didn't see one of: $list\n");
20332 # patch for paren-less for/foreach glitch, part 2.
20333 # see note below under 'qw'
20334 elsif ($last_nonblank_token eq 'qw'
20335 && $is_for_foreach{$want_paren} )
20337 $last_nonblank_token = $want_paren;
20338 if ( $last_last_nonblank_token eq $want_paren ) {
20340 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
20347 # now identify which of the three possible types of
20348 # curly braces we have: hash index container, anonymous
20349 # hash reference, or code block.
20351 # non-structural (hash index) curly brace pair
20352 # get marked 'L' and 'R'
20353 if ( is_non_structural_brace() ) {
20356 # patch for SWITCH/CASE:
20357 # allow paren-less identifier after 'when'
20358 # if the brace is preceded by a space
20359 if ( $statement_type eq 'when'
20360 && $last_nonblank_type eq 'i'
20361 && $last_last_nonblank_type eq 'k'
20362 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
20365 $block_type = $statement_type;
20369 # code and anonymous hash have the same type, '{', but are
20370 # distinguished by 'block_type',
20371 # which will be blank for an anonymous hash
20374 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type );
20376 # patch to promote bareword type to function taking block
20378 && $last_nonblank_type eq 'w'
20379 && $last_nonblank_i >= 0 )
20381 if ( $output_token_type[$last_nonblank_i] eq 'w' ) {
20382 $output_token_type[$last_nonblank_i] = 'G';
20386 # patch for SWITCH/CASE: if we find a stray opening block brace
20387 # where we might accept a 'case' or 'when' block, then take it
20388 if ( $statement_type eq 'case'
20389 || $statement_type eq 'when' )
20391 if ( !$block_type || $block_type eq '}' ) {
20392 $block_type = $statement_type;
20396 $brace_type[ ++$brace_depth ] = $block_type;
20397 $brace_package[$brace_depth] = $current_package;
20398 $type_sequence = increase_nesting_depth( BRACE, $i_tok );
20399 $brace_structural_type[$brace_depth] = $type;
20400 $brace_context[$brace_depth] = $context;
20401 $brace_statement_type[$brace_depth] = $statement_type;
20404 $block_type = $brace_type[$brace_depth];
20405 if ($block_type) { $statement_type = '' }
20406 if ( defined( $brace_package[$brace_depth] ) ) {
20407 $current_package = $brace_package[$brace_depth];
20410 # can happen on brace error (caught elsewhere)
20413 $type_sequence = decrease_nesting_depth( BRACE, $i_tok );
20415 if ( $brace_structural_type[$brace_depth] eq 'L' ) {
20419 # propagate type information for 'do' and 'eval' blocks.
20420 # This is necessary to enable us to know if an operator
20421 # or term is expected next
20422 if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
20423 $tok = $brace_type[$brace_depth];
20426 $context = $brace_context[$brace_depth];
20427 $statement_type = $brace_statement_type[$brace_depth];
20428 if ( $brace_depth > 0 ) { $brace_depth--; }
20430 '&' => sub { # maybe sub call? start looking
20432 # We have to check for sub call unless we are sure we
20433 # are expecting an operator. This example from s2p
20434 # got mistaken as a q operator in an early version:
20435 # print BODY &q(<<'EOT');
20436 if ( $expecting != OPERATOR ) {
20442 '<' => sub { # angle operator or less than?
20444 if ( $expecting != OPERATOR ) {
20446 find_angle_operator_termination( $input_line, $i, $rtoken_map,
20453 '?' => sub { # ?: conditional or starting pattern?
20457 if ( $expecting == UNKNOWN ) {
20460 ( $is_pattern, $msg ) =
20461 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map );
20463 if ($msg) { write_logfile_entry($msg) }
20465 else { $is_pattern = ( $expecting == TERM ) }
20470 $allowed_quote_modifiers = '[cgimosx]'; # TBD:check this
20475 increase_nesting_depth( QUESTION_COLON, $i_tok );
20478 '*' => sub { # typeglob, or multiply?
20480 if ( $expecting == TERM ) {
20485 if ( $$rtokens[ $i + 1 ] eq '=' ) {
20490 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
20494 if ( $$rtokens[ $i + 1 ] eq '=' ) {
20502 '.' => sub { # what kind of . ?
20504 if ( $expecting != OPERATOR ) {
20506 if ( $type eq '.' ) {
20507 error_if_expecting_TERM()
20508 if ( $expecting == TERM );
20516 # if this is the first nonblank character, call it a label
20517 # since perl seems to just swallow it
20518 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
20522 # ATTRS: check for a ':' which introduces an attribute list
20523 # (this might eventually get its own token type)
20524 elsif ( $statement_type =~ /^sub/ ) {
20526 $in_attribute_list = 1;
20529 # check for scalar attribute, such as
20530 # my $foo : shared = 1;
20531 elsif ($is_my_our{$statement_type}
20532 && $current_depth[QUESTION_COLON] == 0 )
20535 $in_attribute_list = 1;
20538 # otherwise, it should be part of a ?/: operator
20541 decrease_nesting_depth( QUESTION_COLON, $i_tok );
20542 if ( $last_nonblank_token eq '?' ) {
20543 warning("Syntax error near ? :\n");
20547 '+' => sub { # what kind of plus?
20549 if ( $expecting == TERM ) {
20552 # unary plus is safest assumption if not a number
20553 if ( !defined($number) ) { $type = 'p'; }
20555 elsif ( $expecting == OPERATOR ) {
20558 if ( $next_type eq 'w' ) { $type = 'p' }
20563 error_if_expecting_OPERATOR("Array")
20564 if ( $expecting == OPERATOR );
20567 '%' => sub { # hash or modulo?
20569 # first guess is hash if no following blank
20570 if ( $expecting == UNKNOWN ) {
20571 if ( $next_type ne 'b' ) { $expecting = TERM }
20573 if ( $expecting == TERM ) {
20578 $square_bracket_type[ ++$square_bracket_depth ] =
20579 $last_nonblank_token;
20580 $type_sequence = increase_nesting_depth( SQUARE_BRACKET, $i_tok );
20582 # It may seem odd, but structural square brackets have
20583 # type '{' and '}'. This simplifies the indentation logic.
20584 if ( !is_non_structural_brace() ) {
20587 $square_bracket_structural_type[$square_bracket_depth] = $type;
20590 $type_sequence = decrease_nesting_depth( SQUARE_BRACKET, $i_tok );
20592 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
20596 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
20598 '-' => sub { # what kind of minus?
20600 if ( ( $expecting != OPERATOR )
20601 && $is_file_test_operator{$next_tok} )
20607 elsif ( $expecting == TERM ) {
20610 # maybe part of bareword token? unary is safest
20611 if ( !defined($number) ) { $type = 'm'; }
20614 elsif ( $expecting == OPERATOR ) {
20618 if ( $next_type eq 'w' ) {
20626 # check for special variables like ${^WARNING_BITS}
20627 if ( $expecting == TERM ) {
20629 # FIXME: this should work but will not catch errors
20630 # because we also have to be sure that previous token is
20631 # a type character ($,@,%).
20632 if ( $last_nonblank_token eq '{'
20633 && ( $next_tok =~ /^[A-Za-z_]/ ) )
20636 if ( $next_tok eq 'W' ) {
20637 $tokenizer_self->{_saw_perl_dash_w} = 1;
20639 $tok = $tok . $next_tok;
20645 unless ( error_if_expecting_TERM() ) {
20647 # Something like this is valid but strange:
20649 complain("The '^' seems unusual here\n");
20655 '::' => sub { # probably a sub call
20656 scan_bare_identifier();
20658 '<<' => sub { # maybe a here-doc?
20660 unless ( $i < $max_token_index )
20661 ; # here-doc not possible if end of line
20663 if ( $expecting != OPERATOR ) {
20664 my ($found_target);
20665 ( $found_target, $here_doc_target, $here_quote_character, $i ) =
20666 find_here_doc( $expecting, $i, $rtokens, $rtoken_map );
20668 if ($found_target) {
20669 push @here_target_list,
20670 [ $here_doc_target, $here_quote_character ];
20672 if ( length($here_doc_target) > 80 ) {
20673 my $truncated = substr( $here_doc_target, 0, 80 );
20674 complain("Long here-target: '$truncated' ...\n");
20676 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
20678 "Unconventional here-target: '$here_doc_target'\n"
20682 elsif ( $expecting == TERM ) {
20684 # shouldn't happen..
20685 warning("Program bug; didn't find here doc target\n");
20686 report_definite_bug();
20694 # if -> points to a bare word, we must scan for an identifier,
20695 # otherwise something like ->y would look like the y operator
20699 # type = 'pp' for pre-increment, '++' for post-increment
20701 if ( $expecting == TERM ) { $type = 'pp' }
20702 elsif ( $expecting == UNKNOWN ) {
20703 my ( $next_nonblank_token, $i_next ) =
20704 find_next_nonblank_token( $i, $rtokens );
20705 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
20710 if ( $last_nonblank_type eq $tok ) {
20711 complain("Repeated '=>'s \n");
20714 # patch for operator_expected: note if we are in the list (use.t)
20715 # TODO: make version numbers a new token type
20716 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
20719 # type = 'mm' for pre-decrement, '--' for post-decrement
20722 if ( $expecting == TERM ) { $type = 'mm' }
20723 elsif ( $expecting == UNKNOWN ) {
20724 my ( $next_nonblank_token, $i_next ) =
20725 find_next_nonblank_token( $i, $rtokens );
20726 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
20731 error_if_expecting_TERM()
20732 if ( $expecting == TERM );
20736 error_if_expecting_TERM()
20737 if ( $expecting == TERM );
20741 error_if_expecting_TERM()
20742 if ( $expecting == TERM );
20746 # ------------------------------------------------------------
20747 # end hash of code for handling individual token types
20748 # ------------------------------------------------------------
20750 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
20752 # These block types terminate statements and do not need a trailing
20754 # patched for SWITCH/CASE:
20755 my %is_zero_continuation_block_type;
20756 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ;
20757 if elsif else unless while until for foreach switch case given when);
20758 @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
20760 my %is_not_zero_continuation_block_type;
20761 @_ = qw(sort grep map do eval);
20762 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
20764 my %is_logical_container;
20765 @_ = qw(if elsif unless while and or err not && ! || for foreach);
20766 @is_logical_container{@_} = (1) x scalar(@_);
20768 my %is_binary_type;
20770 @is_binary_type{@_} = (1) x scalar(@_);
20772 my %is_binary_keyword;
20773 @_ = qw(and or err eq ne cmp);
20774 @is_binary_keyword{@_} = (1) x scalar(@_);
20776 # 'L' is token for opening { at hash key
20777 my %is_opening_type;
20778 @_ = qw" L { ( [ ";
20779 @is_opening_type{@_} = (1) x scalar(@_);
20781 # 'R' is token for closing } at hash key
20782 my %is_closing_type;
20783 @_ = qw" R } ) ] ";
20784 @is_closing_type{@_} = (1) x scalar(@_);
20786 my %is_redo_last_next_goto;
20787 @_ = qw(redo last next goto);
20788 @is_redo_last_next_goto{@_} = (1) x scalar(@_);
20790 my %is_use_require;
20791 @_ = qw(use require);
20792 @is_use_require{@_} = (1) x scalar(@_);
20794 my %is_sub_package;
20795 @_ = qw(sub package);
20796 @is_sub_package{@_} = (1) x scalar(@_);
20798 # This hash holds the hash key in $tokenizer_self for these keywords:
20799 my %is_format_END_DATA = (
20800 'format' => '_in_format',
20801 '__END__' => '_in_end',
20802 '__DATA__' => '_in_data',
20805 # ref: camel 3 p 147,
20806 # but perl may accept undocumented flags
20807 my %quote_modifiers = (
20808 's' => '[cegimosx]',
20811 'm' => '[cgimosx]',
20819 # table showing how many quoted things to look for after quote operator..
20820 # s, y, tr have 2 (pattern and replacement)
20821 # others have 1 (pattern only)
20822 my %quote_items = (
20834 sub tokenize_this_line {
20836 # This routine breaks a line of perl code into tokens which are of use in
20837 # indentation and reformatting. One of my goals has been to define tokens
20838 # such that a newline may be inserted between any pair of tokens without
20839 # changing or invalidating the program. This version comes close to this,
20840 # although there are necessarily a few exceptions which must be caught by
20841 # the formatter. Many of these involve the treatment of bare words.
20843 # The tokens and their types are returned in arrays. See previous
20844 # routine for their names.
20846 # See also the array "valid_token_types" in the BEGIN section for an
20849 # To simplify things, token types are either a single character, or they
20850 # are identical to the tokens themselves.
20852 # As a debugging aid, the -D flag creates a file containing a side-by-side
20853 # comparison of the input string and its tokenization for each line of a file.
20854 # This is an invaluable debugging aid.
20856 # In addition to tokens, and some associated quantities, the tokenizer
20857 # also returns flags indication any special line types. These include
20858 # quotes, here_docs, formats.
20860 # -----------------------------------------------------------------------
20862 # How to add NEW_TOKENS:
20864 # New token types will undoubtedly be needed in the future both to keep up
20865 # with changes in perl and to help adapt the tokenizer to other applications.
20867 # Here are some notes on the minimal steps. I wrote these notes while
20868 # adding the 'v' token type for v-strings, which are things like version
20869 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You
20870 # can use your editor to search for the string "NEW_TOKENS" to find the
20871 # appropriate sections to change):
20873 # *. Try to talk somebody else into doing it! If not, ..
20875 # *. Make a backup of your current version in case things don't work out!
20877 # *. Think of a new, unused character for the token type, and add to
20878 # the array @valid_token_types in the BEGIN section of this package.
20879 # For example, I used 'v' for v-strings.
20881 # *. Implement coding to recognize the $type of the token in this routine.
20882 # This is the hardest part, and is best done by immitating or modifying
20883 # some of the existing coding. For example, to recognize v-strings, I
20884 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
20885 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
20887 # *. Update sub operator_expected. This update is critically important but
20888 # the coding is trivial. Look at the comments in that routine for help.
20889 # For v-strings, which should behave like numbers, I just added 'v' to the
20890 # regex used to handle numbers and strings (types 'n' and 'Q').
20892 # *. Implement a 'bond strength' rule in sub set_bond_strengths in
20893 # Perl::Tidy::Formatter for breaking lines around this token type. You can
20894 # skip this step and take the default at first, then adjust later to get
20895 # desired results. For adding type 'v', I looked at sub bond_strength and
20896 # saw that number type 'n' was using default strengths, so I didn't do
20897 # anything. I may tune it up someday if I don't like the way line
20898 # breaks with v-strings look.
20900 # *. Implement a 'whitespace' rule in sub set_white_space_flag in
20901 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
20902 # and saw that type 'n' used spaces on both sides, so I just added 'v'
20903 # to the array @spaces_both_sides.
20905 # *. Update HtmlWriter package so that users can colorize the token as
20906 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
20907 # that package. For v-strings, I initially chose to use a default color
20908 # equal to the default for numbers, but it might be nice to change that
20911 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
20913 # *. Run lots and lots of debug tests. Start with special files designed
20914 # to test the new token type. Run with the -D flag to create a .DEBUG
20915 # file which shows the tokenization. When these work ok, test as many old
20916 # scripts as possible. Start with all of the '.t' files in the 'test'
20917 # directory of the distribution file. Compare .tdy output with previous
20918 # version and updated version to see the differences. Then include as
20919 # many more files as possible. My own technique has been to collect a huge
20920 # number of perl scripts (thousands!) into one directory and run perltidy
20921 # *, then run diff between the output of the previous version and the
20924 # -----------------------------------------------------------------------
20926 my $line_of_tokens = shift;
20927 my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
20929 # patch while coding change is underway
20930 # make callers private data to allow access
20931 # $tokenizer_self = $caller_tokenizer_self;
20933 # extract line number for use in error messages
20934 $input_line_number = $line_of_tokens->{_line_number};
20936 # check for pod documentation
20937 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
20939 # must not be in multi-line quote
20940 # and must not be in an eqn
20941 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
20943 $tokenizer_self->{_in_pod} = 1;
20948 $input_line = $untrimmed_input_line;
20952 # trim start of this line unless we are continuing a quoted line
20953 # do not trim end because we might end in a quote (test: deken4.pl)
20954 # Perl::Tidy::Formatter will delete needless trailing blanks
20955 unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
20956 $input_line =~ s/^\s*//; # trim left end
20959 # re-initialize for the main loop
20960 @output_token_list = (); # stack of output token indexes
20961 @output_token_type = (); # token types
20962 @output_block_type = (); # types of code block
20963 @output_container_type = (); # paren types, such as if, elsif, ..
20964 @output_type_sequence = (); # nesting sequential number
20966 $tok = $last_nonblank_token;
20967 $type = $last_nonblank_type;
20968 $prototype = $last_nonblank_prototype;
20969 $last_nonblank_i = -1;
20970 $block_type = $last_nonblank_block_type;
20971 $container_type = $last_nonblank_container_type;
20972 $type_sequence = $last_nonblank_type_sequence;
20973 @here_target_list = (); # list of here-doc target strings
20977 # tokenization is done in two stages..
20978 # stage 1 is a very simple pre-tokenization
20979 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
20981 # a little optimization for a full-line comment
20982 if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
20983 $max_tokens_wanted = 1 # no use tokenizing a comment
20986 # start by breaking the line into pre-tokens
20987 ( $rpretokens, $rpretoken_map, $rpretoken_type ) =
20988 pre_tokenize( $input_line, $max_tokens_wanted );
20990 $max_token_index = scalar(@$rpretokens) - 1;
20991 push( @$rpretokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
20992 push( @$rpretoken_map, 0, 0, 0 ); # shouldn't be referenced
20993 push( @$rpretoken_type, 'b', 'b', 'b' );
20995 # temporary copies while coding change is underway
20996 ( $rtokens, $rtoken_map, $rtoken_type ) =
20997 ( $rpretokens, $rpretoken_map, $rpretoken_type );
20999 # initialize for main loop
21000 for $i ( 0 .. $max_token_index + 3 ) {
21001 $output_token_type[$i] = "";
21002 $output_block_type[$i] = "";
21003 $output_container_type[$i] = "";
21004 $output_type_sequence[$i] = "";
21009 # ------------------------------------------------------------
21010 # begin main tokenization loop
21011 # ------------------------------------------------------------
21013 # we are looking at each pre-token of one line and combining them
21015 while ( ++$i <= $max_token_index ) {
21017 if ($in_quote) { # continue looking for end of a quote
21018 $type = $quote_type;
21020 unless (@output_token_list) { # initialize if continuation line
21021 push( @output_token_list, $i );
21022 $output_token_type[$i] = $type;
21025 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
21027 # scan for the end of the quote or pattern
21028 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
21029 do_quote( $i, $in_quote, $quote_character, $quote_pos,
21030 $quote_depth, $rtokens, $rtoken_map );
21032 # all done if we didn't find it
21033 last if ($in_quote);
21035 # re-initialize for next search
21036 $quote_character = '';
21039 last if ( ++$i > $max_token_index );
21041 # look for any modifiers
21042 if ($allowed_quote_modifiers) {
21044 # check for exact quote modifiers
21045 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
21046 my $str = $$rtokens[$i];
21047 while ( $str =~ /\G$allowed_quote_modifiers/gc ) { }
21049 if ( defined( pos($str) ) ) {
21052 if ( pos($str) == length($str) ) {
21053 last if ( ++$i > $max_token_index );
21056 # Looks like a joined quote modifier
21057 # and keyword, maybe something like
21058 # s/xxx/yyy/gefor @k=...
21059 # Example is "galgen.pl". Would have to split
21060 # the word and insert a new token in the
21061 # pre-token list. This is so rare that I haven't
21062 # done it. Will just issue a warning citation.
21064 # This error might also be triggered if my quote
21065 # modifier characters are incomplete
21069 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
21070 Please put a space between quote modifiers and trailing keywords.
21073 # print "token $$rtokens[$i]\n";
21074 # my $num = length($str) - pos($str);
21075 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
21076 # print "continuing with new token $$rtokens[$i]\n";
21078 # skipping past this token does least damage
21079 last if ( ++$i > $max_token_index );
21084 # example file: rokicki4.pl
21085 # This error might also be triggered if my quote
21086 # modifier characters are incomplete
21087 write_logfile_entry(
21088 "Note: found word $str at quote modifier location\n"
21094 $allowed_quote_modifiers = "";
21098 unless ( $tok =~ /^\s*$/ ) {
21100 # try to catch some common errors
21101 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
21103 if ( $last_nonblank_token eq 'eq' ) {
21104 complain("Should 'eq' be '==' here ?\n");
21106 elsif ( $last_nonblank_token eq 'ne' ) {
21107 complain("Should 'ne' be '!=' here ?\n");
21111 $last_last_nonblank_token = $last_nonblank_token;
21112 $last_last_nonblank_type = $last_nonblank_type;
21113 $last_last_nonblank_block_type = $last_nonblank_block_type;
21114 $last_last_nonblank_container_type =
21115 $last_nonblank_container_type;
21116 $last_last_nonblank_type_sequence =
21117 $last_nonblank_type_sequence;
21118 $last_nonblank_token = $tok;
21119 $last_nonblank_type = $type;
21120 $last_nonblank_prototype = $prototype;
21121 $last_nonblank_block_type = $block_type;
21122 $last_nonblank_container_type = $container_type;
21123 $last_nonblank_type_sequence = $type_sequence;
21124 $last_nonblank_i = $i_tok;
21127 # store previous token type
21128 if ( $i_tok >= 0 ) {
21129 $output_token_type[$i_tok] = $type;
21130 $output_block_type[$i_tok] = $block_type;
21131 $output_container_type[$i_tok] = $container_type;
21132 $output_type_sequence[$i_tok] = $type_sequence;
21134 my $pre_tok = $$rtokens[$i]; # get the next pre-token
21135 my $pre_type = $$rtoken_type[$i]; # and type
21137 $type = $pre_type; # to be modified as necessary
21138 $block_type = ""; # blank for all tokens except code block braces
21139 $container_type = ""; # blank for all tokens except some parens
21140 $type_sequence = ""; # blank for all tokens except ?/:
21141 $prototype = ""; # blank for all tokens except user defined subs
21144 # this pre-token will start an output token
21145 push( @output_token_list, $i_tok );
21147 # continue gathering identifier if necessary
21148 # but do not start on blanks and comments
21149 if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
21151 if ( $id_scan_state =~ /^(sub|package)/ ) {
21158 last if ($id_scan_state);
21159 next if ( ( $i > 0 ) || $type );
21161 # didn't find any token; start over
21166 # handle whitespace tokens..
21167 next if ( $type eq 'b' );
21168 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
21169 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
21171 # Build larger tokens where possible, since we are not in a quote.
21173 # First try to assemble digraphs. The following tokens are
21174 # excluded and handled specially:
21175 # '/=' is excluded because the / might start a pattern.
21176 # 'x=' is excluded since it might be $x=, with $ on previous line
21177 # '**' and *= might be typeglobs of punctuation variables
21178 # I have allowed tokens starting with <, such as <=,
21179 # because I don't think these could be valid angle operators.
21180 # test file: storrs4.pl
21181 my $test_tok = $tok . $$rtokens[ $i + 1 ];
21182 my $combine_ok = $is_digraph{$test_tok};
21184 # check for special cases which cannot be combined
21187 # '//' must be defined_or operator if an operator is expected.
21188 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
21189 # could be migrated here for clarity
21190 if ( $test_tok eq '//' ) {
21191 my $next_type = $$rtokens[ $i + 1 ];
21193 operator_expected( $prev_type, $tok, $next_type );
21194 $combine_ok = 0 unless ( $expecting == OPERATOR );
21200 && ( $test_tok ne '/=' ) # might be pattern
21201 && ( $test_tok ne 'x=' ) # might be $x
21202 && ( $test_tok ne '**' ) # typeglob?
21203 && ( $test_tok ne '*=' ) # typeglob?
21209 # Now try to assemble trigraphs. Note that all possible
21210 # perl trigraphs can be constructed by appending a character
21212 $test_tok = $tok . $$rtokens[ $i + 1 ];
21214 if ( $is_trigraph{$test_tok} ) {
21221 $next_tok = $$rtokens[ $i + 1 ];
21222 $next_type = $$rtoken_type[ $i + 1 ];
21224 TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
21227 $last_nonblank_token, $tok,
21228 $next_tok, $brace_depth,
21229 $brace_type[$brace_depth], $paren_depth,
21230 $paren_type[$paren_depth]
21232 print "TOKENIZE:(@debug_list)\n";
21235 # turn off attribute list on first non-blank, non-bareword
21236 if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
21238 ###############################################################
21239 # We have the next token, $tok.
21240 # Now we have to examine this token and decide what it is
21241 # and define its $type
21243 # section 1: bare words
21244 ###############################################################
21246 if ( $pre_type eq 'w' ) {
21247 $expecting = operator_expected( $prev_type, $tok, $next_type );
21248 my ( $next_nonblank_token, $i_next ) =
21249 find_next_nonblank_token( $i, $rtokens );
21251 # ATTRS: handle sub and variable attributes
21252 if ($in_attribute_list) {
21254 # treat bare word followed by open paren like qw(
21255 if ( $next_nonblank_token eq '(' ) {
21256 $in_quote = $quote_items{q};
21257 $allowed_quote_modifiers = $quote_modifiers{q};
21263 # handle bareword not followed by open paren
21270 # quote a word followed by => operator
21271 if ( $next_nonblank_token eq '=' ) {
21273 if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
21274 if ( $is_constant{$current_package}{$tok} ) {
21277 elsif ( $is_user_function{$current_package}{$tok} ) {
21280 $user_function_prototype{$current_package}{$tok};
21282 elsif ( $tok =~ /^v\d+$/ ) {
21284 unless ($saw_v_string) { report_v_string($tok) }
21286 else { $type = 'w' }
21292 # quote a bare word within braces..like xxx->{s}; note that we
21293 # must be sure this is not a structural brace, to avoid
21294 # mistaking {s} in the following for a quoted bare word:
21295 # for(@[){s}bla}BLA}
21296 if ( ( $last_nonblank_type eq 'L' )
21297 && ( $next_nonblank_token eq '}' ) )
21303 # a bare word immediately followed by :: is not a keyword;
21304 # use $tok_kw when testing for keywords to avoid a mistake
21306 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
21311 # handle operator x (now we know it isn't $x=)
21312 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
21313 if ( $tok eq 'x' ) {
21315 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=
21325 # FIXME: Patch: mark something like x4 as an integer for now
21326 # It gets fixed downstream. This is easier than
21327 # splitting the pretoken.
21333 elsif ( ( $tok eq 'strict' )
21334 and ( $last_nonblank_token eq 'use' ) )
21336 $tokenizer_self->{_saw_use_strict} = 1;
21337 scan_bare_identifier();
21340 elsif ( ( $tok eq 'warnings' )
21341 and ( $last_nonblank_token eq 'use' ) )
21343 $tokenizer_self->{_saw_perl_dash_w} = 1;
21345 # scan as identifier, so that we pick up something like:
21346 # use warnings::register
21347 scan_bare_identifier();
21351 $tok eq 'AutoLoader'
21352 && $tokenizer_self->{_look_for_autoloader}
21354 $last_nonblank_token eq 'use'
21356 # these regexes are from AutoSplit.pm, which we want
21358 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
21359 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
21363 write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
21364 $tokenizer_self->{_saw_autoloader} = 1;
21365 $tokenizer_self->{_look_for_autoloader} = 0;
21366 scan_bare_identifier();
21370 $tok eq 'SelfLoader'
21371 && $tokenizer_self->{_look_for_selfloader}
21372 && ( $last_nonblank_token eq 'use'
21373 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
21374 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
21377 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
21378 $tokenizer_self->{_saw_selfloader} = 1;
21379 $tokenizer_self->{_look_for_selfloader} = 0;
21380 scan_bare_identifier();
21383 elsif ( ( $tok eq 'constant' )
21384 and ( $last_nonblank_token eq 'use' ) )
21386 scan_bare_identifier();
21387 my ( $next_nonblank_token, $i_next ) =
21388 find_next_nonblank_token( $i, $rtokens );
21390 if ($next_nonblank_token) {
21392 if ( $is_keyword{$next_nonblank_token} ) {
21394 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
21398 # FIXME: could check for error in which next token is
21399 # not a word (number, punctuation, ..)
21401 $is_constant{$current_package}
21402 {$next_nonblank_token} = 1;
21407 # various quote operators
21408 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
21409 if ( $expecting == OPERATOR ) {
21411 # patch for paren-less for/foreach glitch, part 1
21412 # perl will accept this construct as valid:
21414 # foreach my $key qw\Uno Due Tres Quadro\ {
21415 # print "Set $key\n";
21417 unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
21419 error_if_expecting_OPERATOR();
21422 $in_quote = $quote_items{$tok};
21423 $allowed_quote_modifiers = $quote_modifiers{$tok};
21425 # All quote types are 'Q' except possibly qw quotes.
21426 # qw quotes are special in that they may generally be trimmed
21427 # of leading and trailing whitespace. So they are given a
21428 # separate type, 'q', unless requested otherwise.
21430 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
21433 $quote_type = $type;
21436 # check for a statement label
21438 ( $next_nonblank_token eq ':' )
21439 && ( $$rtokens[ $i_next + 1 ] ne ':' )
21440 && ( $i_next <= $max_token_index ) # colon on same line
21444 if ( $tok !~ /A-Z/ ) {
21445 push @lower_case_labels_at, $input_line_number;
21453 # 'sub' || 'package'
21454 elsif ( $is_sub_package{$tok_kw} ) {
21455 error_if_expecting_OPERATOR()
21456 if ( $expecting == OPERATOR );
21460 # Note on token types for format, __DATA__, __END__:
21461 # It simplifies things to give these type ';', so that when we
21462 # start rescanning we will be expecting a token of type TERM.
21463 # We will switch to type 'k' before outputting the tokens.
21464 elsif ( $is_format_END_DATA{$tok_kw} ) {
21465 $type = ';'; # make tokenizer look for TERM next
21466 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
21470 elsif ( $is_keyword{$tok_kw} ) {
21473 # Since for and foreach may not be followed immediately
21474 # by an opening paren, we have to remember which keyword
21475 # is associated with the next '('
21476 if ( $is_for_foreach{$tok} ) {
21477 if ( new_statement_ok() ) {
21478 $want_paren = $tok;
21482 # recognize 'use' statements, which are special
21483 elsif ( $is_use_require{$tok} ) {
21484 $statement_type = $tok;
21485 error_if_expecting_OPERATOR()
21486 if ( $expecting == OPERATOR );
21489 # remember my and our to check for trailing ": shared"
21490 elsif ( $is_my_our{$tok} ) {
21491 $statement_type = $tok;
21494 # Check for misplaced 'elsif' and 'else', but allow isolated
21495 # else or elsif blocks to be formatted. This is indicated
21496 # by a last noblank token of ';'
21497 elsif ( $tok eq 'elsif' ) {
21498 if ( $last_nonblank_token ne ';'
21499 && $last_nonblank_block_type !~
21500 /^(if|elsif|unless)$/ )
21503 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
21507 elsif ( $tok eq 'else' ) {
21509 # patched for SWITCH/CASE
21510 if ( $last_nonblank_token ne ';'
21511 && $last_nonblank_block_type !~
21512 /^(if|elsif|unless|case|when)$/ )
21515 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
21519 elsif ( $tok eq 'continue' ) {
21520 if ( $last_nonblank_token ne ';'
21521 && $last_nonblank_block_type !~
21522 /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
21525 # note: ';' '{' and '}' in list above
21526 # because continues can follow bare blocks;
21527 # ':' is labeled block
21528 warning("'$tok' should follow a block\n");
21532 # patch for SWITCH/CASE if 'case' and 'when are
21533 # treated as keywords.
21534 elsif ( $tok eq 'when' || $tok eq 'case' ) {
21535 $statement_type = $tok; # next '{' is block
21539 # check for inline label following
21540 # /^(redo|last|next|goto)$/
21541 elsif (( $last_nonblank_type eq 'k' )
21542 && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
21548 # something else --
21551 scan_bare_identifier();
21552 if ( $type eq 'w' ) {
21554 if ( $expecting == OPERATOR ) {
21556 # don't complain about possible indirect object
21560 # sub new($) { ... }
21561 # $b = new A::; # calls A::new
21562 # $c = new A; # same thing but suspicious
21563 # This will call A::new but we have a 'new' in
21564 # main:: which looks like a constant.
21566 if ( $last_nonblank_type eq 'C' ) {
21567 if ( $tok !~ /::$/ ) {
21569 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
21570 Maybe indirectet object notation?
21575 error_if_expecting_OPERATOR("bareword");
21579 # mark bare words immediately followed by a paren as
21581 $next_tok = $$rtokens[ $i + 1 ];
21582 if ( $next_tok eq '(' ) {
21586 # mark bare words following a file test operator as
21587 # something that will expect an operator next.
21588 # patch 072901: unless followed immediately by a paren,
21589 # in which case it must be a function call (pid.t)
21590 if ( $last_nonblank_type eq 'F' && $next_tok ne '(' ) {
21594 # patch for SWITCH/CASE if 'case' and 'when are
21595 # not treated as keywords:
21599 && $brace_type[$brace_depth] eq 'switch'
21601 || ( $tok eq 'when'
21602 && $brace_type[$brace_depth] eq 'given' )
21605 $statement_type = $tok; # next '{' is block
21606 $type = 'k'; # for keyword syntax coloring
21609 # patch for SWITCH/CASE if switch and given not keywords
21610 # Switch is not a perl 5 keyword, but we will gamble
21611 # and mark switch followed by paren as a keyword. This
21612 # is only necessary to get html syntax coloring nice,
21613 # and does not commit this as being a switch/case.
21614 if ( $next_nonblank_token eq '('
21615 && ( $tok eq 'switch' || $tok eq 'given' ) )
21617 $type = 'k'; # for keyword syntax coloring
21623 ###############################################################
21624 # section 2: strings of digits
21625 ###############################################################
21626 elsif ( $pre_type eq 'd' ) {
21627 $expecting = operator_expected( $prev_type, $tok, $next_type );
21628 error_if_expecting_OPERATOR("Number")
21629 if ( $expecting == OPERATOR );
21631 if ( !defined($number) ) {
21633 # shouldn't happen - we should always get a number
21634 warning("non-number beginning with digit--program bug\n");
21635 report_definite_bug();
21639 ###############################################################
21640 # section 3: all other tokens
21641 ###############################################################
21644 last if ( $tok eq '#' );
21645 my $code = $tokenization_code->{$tok};
21648 operator_expected( $prev_type, $tok, $next_type );
21655 # -----------------------------
21656 # end of main tokenization loop
21657 # -----------------------------
21659 if ( $i_tok >= 0 ) {
21660 $output_token_type[$i_tok] = $type;
21661 $output_block_type[$i_tok] = $block_type;
21662 $output_container_type[$i_tok] = $container_type;
21663 $output_type_sequence[$i_tok] = $type_sequence;
21666 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
21667 $last_last_nonblank_token = $last_nonblank_token;
21668 $last_last_nonblank_type = $last_nonblank_type;
21669 $last_last_nonblank_block_type = $last_nonblank_block_type;
21670 $last_last_nonblank_container_type = $last_nonblank_container_type;
21671 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
21672 $last_nonblank_token = $tok;
21673 $last_nonblank_type = $type;
21674 $last_nonblank_block_type = $block_type;
21675 $last_nonblank_container_type = $container_type;
21676 $last_nonblank_type_sequence = $type_sequence;
21677 $last_nonblank_prototype = $prototype;
21680 # reset indentation level if necessary at a sub or package
21681 # in an attempt to recover from a nesting error
21682 if ( $level_in_tokenizer < 0 ) {
21683 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
21684 reset_indentation_level(0);
21685 brace_warning("resetting level to 0 at $1 $2\n");
21689 # all done tokenizing this line ...
21690 # now prepare the final list of tokens and types
21692 my @token_type = (); # stack of output token types
21693 my @block_type = (); # stack of output code block types
21694 my @container_type = (); # stack of output code container types
21695 my @type_sequence = (); # stack of output type sequence numbers
21696 my @tokens = (); # output tokens
21697 my @levels = (); # structural brace levels of output tokens
21698 my @slevels = (); # secondary nesting levels of output tokens
21699 my @nesting_tokens = (); # string of tokens leading to this depth
21700 my @nesting_types = (); # string of token types leading to this depth
21701 my @nesting_blocks = (); # string of block types leading to this depth
21702 my @nesting_lists = (); # string of list types leading to this depth
21703 my @ci_string = (); # string needed to compute continuation indentation
21704 my @container_environment = (); # BLOCK or LIST
21705 my $container_environment = '';
21706 my $im = -1; # previous $i value
21708 my $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
21710 # =head1 Computing Token Indentation
21712 # The final section of the tokenizer forms tokens and also computes
21713 # parameters needed to find indentation. It is much easier to do it
21714 # in the tokenizer than elsewhere. Here is a brief description of how
21715 # indentation is computed. Perl::Tidy computes indentation as the sum
21718 # (1) structural indentation, such as if/else/elsif blocks
21719 # (2) continuation indentation, such as long parameter call lists.
21721 # These are occasionally called primary and secondary indentation.
21723 # Structural indentation is introduced by tokens of type '{', although
21724 # the actual tokens might be '{', '(', or '['. Structural indentation
21725 # is of two types: BLOCK and non-BLOCK. Default structural indentation
21726 # is 4 characters if the standard indentation scheme is used.
21728 # Continuation indentation is introduced whenever a line at BLOCK level
21729 # is broken before its termination. Default continuation indentation
21730 # is 2 characters in the standard indentation scheme.
21732 # Both types of indentation may be nested arbitrarily deep and
21733 # interlaced. The distinction between the two is somewhat arbitrary.
21735 # For each token, we will define two variables which would apply if
21736 # the current statement were broken just before that token, so that
21737 # that token started a new line:
21739 # $level = the structural indentation level,
21740 # $ci_level = the continuation indentation level
21742 # The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
21743 # assuming defaults. However, in some special cases it is customary
21744 # to modify $ci_level from this strict value.
21746 # The total structural indentation is easy to compute by adding and
21747 # subtracting 1 from a saved value as types '{' and '}' are seen. The
21748 # running value of this variable is $level_in_tokenizer.
21750 # The total continuation is much more difficult to compute, and requires
21751 # several variables. These veriables are:
21753 # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
21754 # each indentation level, if there are intervening open secondary
21755 # structures just prior to that level.
21756 # $continuation_string_in_tokenizer = a string of 1's and 0's indicating
21757 # if the last token at that level is "continued", meaning that it
21758 # is not the first token of an expression.
21759 # $nesting_block_string = a string of 1's and 0's indicating, for each
21760 # indentation level, if the level is of type BLOCK or not.
21761 # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
21762 # $nesting_list_string = a string of 1's and 0's indicating, for each
21763 # indentation level, if it is is appropriate for list formatting.
21764 # If so, continuation indentation is used to indent long list items.
21765 # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
21766 # @slevel_stack = a stack of total nesting depths at each
21767 # structural indentation level, where "total nesting depth" means
21768 # the nesting depth that would occur if every nesting token -- '{', '[',
21769 # and '(' -- , regardless of context, is used to compute a nesting
21772 #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
21773 #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
21775 my ( $ci_string_i, $level_i, $nesting_block_string_i,
21776 $nesting_list_string_i, $nesting_token_string_i,
21777 $nesting_type_string_i, );
21779 foreach $i (@output_token_list) { # scan the list of pre-tokens indexes
21781 # self-checking for valid token types
21782 my $type = $output_token_type[$i];
21783 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
21784 $level_i = $level_in_tokenizer;
21786 # This can happen by running perltidy on non-scripts
21787 # although it could also be bug introduced by programming change.
21788 # Perl silently accepts a 032 (^Z) and takes it as the end
21789 if ( !$is_valid_token_type{$type} ) {
21790 my $val = ord($type);
21792 "unexpected character decimal $val ($type) in script\n");
21793 $tokenizer_self->{_in_error} = 1;
21796 # ----------------------------------------------------------------
21797 # TOKEN TYPE PATCHES
21798 # output __END__, __DATA__, and format as type 'k' instead of ';'
21799 # to make html colors correct, etc.
21800 my $fix_type = $type;
21801 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
21803 # output anonymous 'sub' as keyword
21804 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
21806 # -----------------------------------------------------------------
21808 $nesting_token_string_i = $nesting_token_string;
21809 $nesting_type_string_i = $nesting_type_string;
21810 $nesting_block_string_i = $nesting_block_string;
21811 $nesting_list_string_i = $nesting_list_string;
21813 # set primary indentation levels based on structural braces
21814 # Note: these are set so that the leading braces have a HIGHER
21815 # level than their CONTENTS, which is convenient for indentation
21816 # Also, define continuation indentation for each token.
21817 if ( $type eq '{' || $type eq 'L' ) {
21819 # use environment before updating
21820 $container_environment =
21821 $nesting_block_flag ? 'BLOCK'
21822 : $nesting_list_flag ? 'LIST'
21825 # if the difference between total nesting levels is not 1,
21826 # there are intervening non-structural nesting types between
21827 # this '{' and the previous unclosed '{'
21828 my $intervening_secondary_structure = 0;
21829 if (@slevel_stack) {
21830 $intervening_secondary_structure =
21831 $slevel_in_tokenizer - $slevel_stack[-1];
21834 # =head1 Continuation Indentation
21836 # Having tried setting continuation indentation both in the formatter and
21837 # in the tokenizer, I can say that setting it in the tokenizer is much,
21838 # much easier. The formatter already has too much to do, and can't
21839 # make decisions on line breaks without knowing what 'ci' will be at
21840 # arbitrary locations.
21842 # But a problem with setting the continuation indentation (ci) here
21843 # in the tokenizer is that we do not know where line breaks will actually
21844 # be. As a result, we don't know if we should propagate continuation
21845 # indentation to higher levels of structure.
21847 # For nesting of only structural indentation, we never need to do this.
21848 # For example, in a long if statement, like this
21850 # if ( !$output_block_type[$i]
21851 # && ($in_statement_continuation) )
21856 # the second line has ci but we do normally give the lines within the BLOCK
21857 # any ci. This would be true if we had blocks nested arbitrarily deeply.
21859 # But consider something like this, where we have created a break after
21860 # an opening paren on line 1, and the paren is not (currently) a
21861 # structural indentation token:
21863 # my $file = $menubar->Menubutton(
21864 # qw/-text File -underline 0 -menuitems/ => [
21866 # Cascade => '~View',
21870 # The second line has ci, so it would seem reasonable to propagate it
21871 # down, giving the third line 1 ci + 1 indentation. This suggests the
21872 # following rule, which is currently used to propagating ci down: if there
21873 # are any non-structural opening parens (or brackets, or braces), before
21874 # an opening structural brace, then ci is propagated down, and otherwise
21875 # not. The variable $intervening_secondary_structure contains this
21876 # information for the current token, and the string
21877 # "$ci_string_in_tokenizer" is a stack of previous values of this
21880 # save the current states
21881 push( @slevel_stack, 1 + $slevel_in_tokenizer );
21882 $level_in_tokenizer++;
21884 if ( $output_block_type[$i] ) {
21885 $nesting_block_flag = 1;
21886 $nesting_block_string .= '1';
21889 $nesting_block_flag = 0;
21890 $nesting_block_string .= '0';
21893 # we will use continuation indentation within containers
21894 # which are not blocks and not logical expressions
21896 if ( !$output_block_type[$i] ) {
21898 # propagate flag down at nested open parens
21899 if ( $output_container_type[$i] eq '(' ) {
21900 $bit = 1 if $nesting_list_flag;
21903 # use list continuation if not a logical grouping
21904 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
21908 $is_logical_container{ $output_container_type[$i] };
21911 $nesting_list_string .= $bit;
21912 $nesting_list_flag = $bit;
21914 $ci_string_in_tokenizer .=
21915 ( $intervening_secondary_structure != 0 ) ? '1' : '0';
21916 $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
21917 $continuation_string_in_tokenizer .=
21918 ( $in_statement_continuation > 0 ) ? '1' : '0';
21920 # Sometimes we want to give an opening brace continuation indentation,
21921 # and sometimes not. For code blocks, we don't do it, so that the leading
21922 # '{' gets outdented, like this:
21924 # if ( !$output_block_type[$i]
21925 # && ($in_statement_continuation) )
21928 # For other types, we will give them continuation indentation. For example,
21929 # here is how a list looks with the opening paren indented:
21932 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
21933 # [ "homer", "marge", "bart" ], );
21935 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
21937 my $total_ci = $ci_string_sum;
21939 !$output_block_type[$i] # patch: skip for BLOCK
21940 && ($in_statement_continuation)
21943 $total_ci += $in_statement_continuation
21944 unless ( $ci_string_in_tokenizer =~ /1$/ );
21947 $ci_string_i = $total_ci;
21948 $in_statement_continuation = 0;
21951 elsif ( $type eq '}' || $type eq 'R' ) {
21953 # only a nesting error in the script would prevent popping here
21954 if ( @slevel_stack > 1 ) { pop(@slevel_stack); }
21956 $level_i = --$level_in_tokenizer;
21958 # restore previous level values
21959 if ( length($nesting_block_string) > 1 )
21960 { # true for valid script
21961 chop $nesting_block_string;
21962 $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
21963 chop $nesting_list_string;
21964 $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
21966 chop $ci_string_in_tokenizer;
21968 ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
21970 $in_statement_continuation =
21971 chop $continuation_string_in_tokenizer;
21973 # zero continuation flag at terminal BLOCK '}' which
21974 # ends a statement.
21975 if ( $output_block_type[$i] ) {
21977 # ...These include non-anonymous subs
21978 # note: could be sub ::abc { or sub 'abc
21979 if ( $output_block_type[$i] =~ m/^sub\s*/gc ) {
21981 # note: older versions of perl require the /gc modifier
21982 # here or else the \G does not work.
21983 if ( $output_block_type[$i] =~ /\G('|::|\w)/gc ) {
21984 $in_statement_continuation = 0;
21988 # ...and include all block types except user subs with
21989 # block prototypes and these: (sort|grep|map|do|eval)
21990 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
21992 $is_zero_continuation_block_type{ $output_block_type
21995 $in_statement_continuation = 0;
21998 # ..but these are not terminal types:
21999 # /^(sort|grep|map|do|eval)$/ )
22001 $is_not_zero_continuation_block_type{
22002 $output_block_type[$i] } )
22006 # ..and a block introduced by a label
22007 # /^\w+\s*:$/gc ) {
22008 elsif ( $output_block_type[$i] =~ /:$/ ) {
22009 $in_statement_continuation = 0;
22012 # ..nor user function with block prototype
22017 # If we are in a list, then
22018 # we must set continuatoin indentation at the closing
22019 # paren of something like this (paren after $check):
22022 # ( not defined $check )
22024 # or $check eq "new"
22025 # or $check eq "old",
22027 elsif ( $tok eq ')' ) {
22028 $in_statement_continuation = 1
22029 if $output_container_type[$i] =~ /^[;,\{\}]$/;
22033 # use environment after updating
22034 $container_environment =
22035 $nesting_block_flag ? 'BLOCK'
22036 : $nesting_list_flag ? 'LIST'
22038 $ci_string_i = $ci_string_sum + $in_statement_continuation;
22039 $nesting_block_string_i = $nesting_block_string;
22040 $nesting_list_string_i = $nesting_list_string;
22043 # not a structural indentation type..
22046 $container_environment =
22047 $nesting_block_flag ? 'BLOCK'
22048 : $nesting_list_flag ? 'LIST'
22051 # zero the continuation indentation at certain tokens so
22052 # that they will be at the same level as its container. For
22053 # commas, this simplifies the -lp indentation logic, which
22054 # counts commas. For ?: it makes them stand out.
22055 if ($nesting_list_flag) {
22056 if ( $type =~ /^[,\?\:]$/ ) {
22057 $in_statement_continuation = 0;
22061 # be sure binary operators get continuation indentation
22063 $container_environment
22064 && ( $type eq 'k' && $is_binary_keyword{$tok}
22065 || $is_binary_type{$type} )
22068 $in_statement_continuation = 1;
22071 # continuation indentation is sum of any open ci from previous
22072 # levels plus the current level
22073 $ci_string_i = $ci_string_sum + $in_statement_continuation;
22075 # update continuation flag ...
22076 # if this isn't a blank or comment..
22077 if ( $type ne 'b' && $type ne '#' ) {
22079 # and we are in a BLOCK
22080 if ($nesting_block_flag) {
22082 # the next token after a ';' and label starts a new stmt
22083 if ( $type eq ';' || $type eq 'J' ) {
22084 $in_statement_continuation = 0;
22087 # otherwise, we are continuing the current statement
22089 $in_statement_continuation = 1;
22093 # if we are not in a BLOCK..
22096 # do not use continuation indentation if not list
22097 # environment (could be within if/elsif clause)
22098 if ( !$nesting_list_flag ) {
22099 $in_statement_continuation = 0;
22102 # otherwise, the next token after a ',' starts a new term
22103 elsif ( $type eq ',' ) {
22104 $in_statement_continuation = 0;
22107 # otherwise, we are continuing the current term
22109 $in_statement_continuation = 1;
22115 if ( $level_in_tokenizer < 0 ) {
22116 unless ($saw_negative_indentation) {
22117 $saw_negative_indentation = 1;
22118 warning("Starting negative indentation\n");
22122 # set secondary nesting levels based on all continment token types
22123 # Note: these are set so that the nesting depth is the depth
22124 # of the PREVIOUS TOKEN, which is convenient for setting
22125 # the stength of token bonds
22126 my $slevel_i = $slevel_in_tokenizer;
22129 if ( $is_opening_type{$type} ) {
22130 $slevel_in_tokenizer++;
22131 $nesting_token_string .= $tok;
22132 $nesting_type_string .= $type;
22136 elsif ( $is_closing_type{$type} ) {
22137 $slevel_in_tokenizer--;
22138 my $char = chop $nesting_token_string;
22140 if ( $char ne $matching_start_token{$tok} ) {
22141 $nesting_token_string .= $char . $tok;
22142 $nesting_type_string .= $type;
22145 chop $nesting_type_string;
22149 push( @block_type, $output_block_type[$i] );
22150 push( @ci_string, $ci_string_i );
22151 push( @container_environment, $container_environment );
22152 push( @container_type, $output_container_type[$i] );
22153 push( @levels, $level_i );
22154 push( @nesting_tokens, $nesting_token_string_i );
22155 push( @nesting_types, $nesting_type_string_i );
22156 push( @slevels, $slevel_i );
22157 push( @token_type, $fix_type );
22158 push( @type_sequence, $output_type_sequence[$i] );
22159 push( @nesting_blocks, $nesting_block_string );
22160 push( @nesting_lists, $nesting_list_string );
22162 # now form the previous token
22165 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters
22169 substr( $input_line, $$rtoken_map[$im], $num ) );
22175 $num = length($input_line) - $$rtoken_map[$im]; # make the last token
22177 push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
22180 $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
22181 $tokenizer_self->{_in_quote} = $in_quote;
22182 $tokenizer_self->{_rhere_target_list} = \@here_target_list;
22184 $line_of_tokens->{_rtoken_type} = \@token_type;
22185 $line_of_tokens->{_rtokens} = \@tokens;
22186 $line_of_tokens->{_rblock_type} = \@block_type;
22187 $line_of_tokens->{_rcontainer_type} = \@container_type;
22188 $line_of_tokens->{_rcontainer_environment} = \@container_environment;
22189 $line_of_tokens->{_rtype_sequence} = \@type_sequence;
22190 $line_of_tokens->{_rlevels} = \@levels;
22191 $line_of_tokens->{_rslevels} = \@slevels;
22192 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
22193 $line_of_tokens->{_rci_levels} = \@ci_string;
22194 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
22198 } # end tokenize_this_line
22200 sub new_statement_ok {
22202 # return true if the current token can start a new statement
22204 return label_ok() # a label would be ok here
22206 || $last_nonblank_type eq 'J'; # or we follow a label
22212 # Decide if a bare word followed by a colon here is a label
22214 # if it follows an opening or closing code block curly brace..
22215 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
22216 && $last_nonblank_type eq $last_nonblank_token )
22219 # it is a label if and only if the curly encloses a code block
22220 return $brace_type[$brace_depth];
22223 # otherwise, it is a label if and only if it follows a ';'
22226 return ( $last_nonblank_type eq ';' );
22230 sub code_block_type {
22232 # Decide if this is a block of code, and its type.
22233 # Must be called only when $type = $token = '{'
22234 # The problem is to distinguish between the start of a block of code
22235 # and the start of an anonymous hash reference
22236 # Returns "" if not code block, otherwise returns 'last_nonblank_token'
22237 # to indicate the type of code block. (For example, 'last_nonblank_token'
22238 # might be 'if' for an if block, 'else' for an else block, etc).
22240 # handle case of multiple '{'s
22242 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
22244 my ( $i, $rtokens, $rtoken_type ) = @_;
22245 if ( $last_nonblank_token eq '{'
22246 && $last_nonblank_type eq $last_nonblank_token )
22249 # opening brace where a statement may appear is probably
22250 # a code block but might be and anonymous hash reference
22251 if ( $brace_type[$brace_depth] ) {
22252 return decide_if_code_block( $i, $rtokens, $rtoken_type );
22255 # cannot start a code block within an anonymous hash
22261 elsif ( $last_nonblank_token eq ';' ) {
22263 # an opening brace where a statement may appear is probably
22264 # a code block but might be and anonymous hash reference
22265 return decide_if_code_block( $i, $rtokens, $rtoken_type );
22268 # handle case of '}{'
22269 elsif ($last_nonblank_token eq '}'
22270 && $last_nonblank_type eq $last_nonblank_token )
22273 # a } { situation ...
22274 # could be hash reference after code block..(blktype1.t)
22275 if ($last_nonblank_block_type) {
22276 return decide_if_code_block( $i, $rtokens, $rtoken_type );
22279 # must be a block if it follows a closing hash reference
22281 return $last_nonblank_token;
22285 # NOTE: braces after type characters start code blocks, but for
22286 # simplicity these are not identified as such. See also
22287 # sub is_non_structural_brace.
22288 # elsif ( $last_nonblank_type eq 't' ) {
22289 # return $last_nonblank_token;
22292 # brace after label:
22293 elsif ( $last_nonblank_type eq 'J' ) {
22294 return $last_nonblank_token;
22297 # otherwise, look at previous token. This must be a code block if
22298 # it follows any of these:
22299 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
22300 elsif ( $is_code_block_token{$last_nonblank_token} ) {
22301 return $last_nonblank_token;
22304 # or a sub definition
22305 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
22306 && $last_nonblank_token =~ /^sub\b/ )
22308 return $last_nonblank_token;
22311 # user-defined subs with block parameters (like grep/map/eval)
22312 elsif ( $last_nonblank_type eq 'G' ) {
22313 return $last_nonblank_token;
22317 elsif ( $last_nonblank_type eq 'w' ) {
22318 return decide_if_code_block( $i, $rtokens, $rtoken_type );
22321 # anything else must be anonymous hash reference
22327 sub decide_if_code_block {
22329 my ( $i, $rtokens, $rtoken_type ) = @_;
22330 my ( $next_nonblank_token, $i_next ) =
22331 find_next_nonblank_token( $i, $rtokens );
22333 # we are at a '{' where a statement may appear.
22334 # We must decide if this brace starts an anonymous hash or a code
22336 # return "" if anonymous hash, and $last_nonblank_token otherwise
22338 # initialize to be code BLOCK
22339 my $code_block_type = $last_nonblank_token;
22341 # Check for the common case of an empty anonymous hash reference:
22342 # Maybe something like sub { { } }
22343 if ( $next_nonblank_token eq '}' ) {
22344 $code_block_type = "";
22349 # To guess if this '{' is an anonymous hash reference, look ahead
22350 # and test as follows:
22352 # it is a hash reference if next come:
22353 # - a string or digit followed by a comma or =>
22354 # - bareword followed by =>
22355 # otherwise it is a code block
22357 # Examples of anonymous hash ref:
22361 # Examples of code blocks:
22362 # {1; print "hello\n", 1;}
22365 # We are only going to look ahead one more (nonblank/comment) line.
22366 # Strange formatting could cause a bad guess, but that's unlikely.
22367 my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
22368 my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
22369 my ( $rpre_tokens, $rpre_types ) =
22370 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
22371 # generous, and prevents
22373 # time in mangled files
22374 if ( defined($rpre_types) && @$rpre_types ) {
22375 push @pre_types, @$rpre_types;
22376 push @pre_tokens, @$rpre_tokens;
22379 # put a sentinal token to simplify stopping the search
22380 push @pre_types, '}';
22383 $jbeg = 1 if $pre_types[0] eq 'b';
22385 # first look for one of these
22387 # - bareword with leading -
22391 if ( $pre_types[$j] =~ /^[\'\"]/ ) {
22393 # find the closing quote; don't worry about escapes
22394 my $quote_mark = $pre_types[$j];
22395 for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
22396 if ( $pre_types[$k] eq $quote_mark ) {
22398 my $next = $pre_types[$j];
22403 elsif ( $pre_types[$j] eq 'd' ) {
22406 elsif ( $pre_types[$j] eq 'w' ) {
22407 unless ( $is_keyword{ $pre_tokens[$j] } ) {
22411 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
22414 if ( $j > $jbeg ) {
22416 $j++ if $pre_types[$j] eq 'b';
22418 # it's a hash ref if a comma or => follow next
22419 if ( $pre_types[$j] eq ','
22420 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
22422 $code_block_type = "";
22427 return $code_block_type;
22432 # report unexpected token type and show where it is
22433 my ( $found, $expecting, $i_tok, $last_nonblank_i ) = @_;
22434 $unexpected_error_count++;
22435 if ( $unexpected_error_count <= MAX_NAG_MESSAGES ) {
22436 my $msg = "found $found where $expecting expected";
22437 my $pos = $$rpretoken_map[$i_tok];
22438 interrupt_logfile();
22439 my ( $offset, $numbered_line, $underline ) =
22440 make_numbered_line( $input_line_number, $input_line, $pos );
22441 $underline = write_on_underline( $underline, $pos - $offset, '^' );
22444 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
22445 my $pos_prev = $$rpretoken_map[$last_nonblank_i];
22447 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
22448 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
22451 $num = $pos - $pos_prev;
22453 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
22456 write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
22457 $trailer = " (previous token underlined)";
22459 warning( $numbered_line . "\n" );
22460 warning( $underline . "\n" );
22461 warning( $msg . $trailer . "\n" );
22466 sub indicate_error {
22467 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
22468 interrupt_logfile();
22470 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
22474 sub write_error_indicator_pair {
22475 my ( $line_number, $input_line, $pos, $carrat ) = @_;
22476 my ( $offset, $numbered_line, $underline ) =
22477 make_numbered_line( $line_number, $input_line, $pos );
22478 $underline = write_on_underline( $underline, $pos - $offset, $carrat );
22479 warning( $numbered_line . "\n" );
22480 $underline =~ s/\s*$//;
22481 warning( $underline . "\n" );
22484 sub make_numbered_line {
22486 # Given an input line, its line number, and a character position of
22487 # interest, create a string not longer than 80 characters of the form
22488 # $lineno: sub_string
22489 # such that the sub_string of $str contains the position of interest
22491 # Here is an example of what we want, in this case we add trailing
22492 # '...' because the line is long.
22494 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
22496 # Here is another example, this time in which we used leading '...'
22497 # because of excessive length:
22499 # 2: ... er of the World Wide Web Consortium's
22501 # input parameters are:
22502 # $lineno = line number
22503 # $str = the text of the line
22504 # $pos = position of interest (the error) : 0 = first character
22507 # - $offset = an offset which corrects the position in case we only
22508 # display part of a line, such that $pos-$offset is the effective
22509 # position from the start of the displayed line.
22510 # - $numbered_line = the numbered line as above,
22511 # - $underline = a blank 'underline' which is all spaces with the same
22512 # number of characters as the numbered line.
22514 my ( $lineno, $str, $pos ) = @_;
22515 my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
22516 my $excess = length($str) - $offset - 68;
22517 my $numc = ( $excess > 0 ) ? 68 : undef;
22519 if ( defined($numc) ) {
22520 if ( $offset == 0 ) {
22521 $str = substr( $str, $offset, $numc - 4 ) . " ...";
22524 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
22529 if ( $offset == 0 ) {
22532 $str = "... " . substr( $str, $offset + 4 );
22536 my $numbered_line = sprintf( "%d: ", $lineno );
22537 $offset -= length($numbered_line);
22538 $numbered_line .= $str;
22539 my $underline = " " x length($numbered_line);
22540 return ( $offset, $numbered_line, $underline );
22543 sub write_on_underline {
22545 # The "underline" is a string that shows where an error is; it starts
22546 # out as a string of blanks with the same length as the numbered line of
22547 # code above it, and we have to add marking to show where an error is.
22548 # In the example below, we want to write the string '--^' just below
22549 # the line of bad code:
22551 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
22553 # We are given the current underline string, plus a position and a
22554 # string to write on it.
22556 # In the above example, there will be 2 calls to do this:
22557 # First call: $pos=19, pos_chr=^
22558 # Second call: $pos=16, pos_chr=---
22560 # This is a trivial thing to do with substr, but there is some
22563 my ( $underline, $pos, $pos_chr ) = @_;
22565 # check for error..shouldn't happen
22566 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
22569 my $excess = length($pos_chr) + $pos - length($underline);
22570 if ( $excess > 0 ) {
22571 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
22573 substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
22574 return ($underline);
22577 sub is_non_structural_brace {
22579 # Decide if a brace or bracket is structural or non-structural
22580 # by looking at the previous token and type
22582 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
22583 # Tentatively deactivated because it caused the wrong operator expectation
22585 # $user = @vars[1] / 100;
22586 # Must update sub operator_expected before re-implementing.
22587 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
22591 # NOTE: braces after type characters start code blocks, but for
22592 # simplicity these are not identified as such. See also
22593 # sub code_block_type
22594 # if ($last_nonblank_type eq 't') {return 0}
22596 # otherwise, it is non-structural if it is decorated
22597 # by type information.
22598 # For example, the '{' here is non-structural: ${xxx}
22600 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
22602 # or if we follow a hash or array closing curly brace or bracket
22603 # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
22604 # because the first '}' would have been given type 'R'
22605 || $last_nonblank_type =~ /^([R\]])$/
22609 sub operator_expected {
22611 # Many perl symbols have two or more meanings. For example, '<<'
22612 # can be a shift operator or a here-doc operator. The
22613 # interpretation of these symbols depends on the current state of
22614 # the tokenizer, which may either be expecting a term or an
22615 # operator. For this example, a << would be a shift if an operator
22616 # is expected, and a here-doc if a term is expected. This routine
22617 # is called to make this decision for any current token. It returns
22618 # one of three possible values:
22620 # OPERATOR - operator expected (or at least, not a term)
22621 # UNKNOWN - can't tell
22622 # TERM - a term is expected (or at least, not an operator)
22624 # The decision is based on what has been seen so far. This
22625 # information is stored in the "$last_nonblank_type" and
22626 # "$last_nonblank_token" variables. For example, if the
22627 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
22628 # if $last_nonblank_type is 'n' (numeric), we are expecting an
22631 # If a UNKNOWN is returned, the calling routine must guess. A major
22632 # goal of this tokenizer is to minimize the possiblity of returning
22633 # UNKNOWN, because a wrong guess can spoil the formatting of a
22636 # adding NEW_TOKENS: it is critically important that this routine be
22637 # updated to allow it to determine if an operator or term is to be
22638 # expected after the new token. Doing this simply involves adding
22639 # the new token character to one of the regexes in this routine or
22640 # to one of the hash lists
22641 # that it uses, which are initialized in the BEGIN section.
22643 my ( $prev_type, $tok, $next_type ) = @_;
22644 my $op_expected = UNKNOWN;
22646 #print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
22648 # Note: function prototype is available for token type 'U' for future
22649 # program development. It contains the leading and trailing parens,
22650 # and no blanks. It might be used to eliminate token type 'C', for
22651 # example (prototype = '()'). Thus:
22652 # if ($last_nonblank_type eq 'U') {
22653 # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
22656 # A possible filehandle (or object) requires some care...
22657 if ( $last_nonblank_type eq 'Z' ) {
22660 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
22661 $op_expected = UNKNOWN;
22664 # For possible file handle like "$a", Perl uses weird parsing rules.
22666 # print $a/2,"/hi"; - division
22667 # print $a / 2,"/hi"; - division
22668 # print $a/ 2,"/hi"; - division
22669 # print $a /2,"/hi"; - pattern (and error)!
22670 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
22671 $op_expected = TERM;
22674 # Note when an operation is being done where a
22675 # filehandle might be expected, since a change in whitespace
22676 # could change the interpretation of the statement.
22678 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
22679 complain("operator in print statement not recommended\n");
22680 $op_expected = OPERATOR;
22685 # handle something after 'do' and 'eval'
22686 elsif ( $is_block_operator{$last_nonblank_token} ) {
22688 # something like $a = eval "expression";
22690 if ( $last_nonblank_type eq 'k' ) {
22691 $op_expected = TERM; # expression or list mode following keyword
22694 # something like $a = do { BLOCK } / 2;
22697 $op_expected = OPERATOR; # block mode following }
22701 # handle bare word..
22702 elsif ( $last_nonblank_type eq 'w' ) {
22704 # unfortunately, we can't tell what type of token to expect next
22705 # after most bare words
22706 $op_expected = UNKNOWN;
22709 # operator, but not term possible after these types
22710 # Note: moved ')' from type to token because parens in list context
22711 # get marked as '{' '}' now. This is a minor glitch in the following:
22712 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
22714 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
22715 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
22717 $op_expected = OPERATOR;
22719 # in a 'use' statement, numbers and v-strings are not true
22720 # numbers, so to avoid incorrect error messages, we will
22721 # mark them as unknown for now (use.t)
22722 # TODO: it would be much nicer to create a new token V for VERSION
22723 # number in a use statement. Then this could be a check on type V
22724 # and related patches which change $statement_type for '=>'
22725 # and ',' could be removed. Further, it would clean things up to
22726 # scan the 'use' statement with a separate subroutine.
22727 if ( ( $statement_type eq 'use' )
22728 && ( $last_nonblank_type =~ /^[nv]$/ ) )
22730 $op_expected = UNKNOWN;
22734 # no operator after many keywords, such as "die", "warn", etc
22735 elsif ( $expecting_term_token{$last_nonblank_token} ) {
22737 # patch for dor.t (defined or).
22738 # perl functions which may be unary operators
22739 # TODO: This list is incomplete, and these should be put
22742 && $next_type eq '/'
22743 && $last_nonblank_type eq 'k'
22744 && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
22746 $op_expected = OPERATOR;
22749 $op_expected = TERM;
22753 # no operator after things like + - ** (i.e., other operators)
22754 elsif ( $expecting_term_types{$last_nonblank_type} ) {
22755 $op_expected = TERM;
22758 # a few operators, like "time", have an empty prototype () and so
22759 # take no parameters but produce a value to operate on
22760 elsif ( $expecting_operator_token{$last_nonblank_token} ) {
22761 $op_expected = OPERATOR;
22764 # post-increment and decrement produce values to be operated on
22765 elsif ( $expecting_operator_types{$last_nonblank_type} ) {
22766 $op_expected = OPERATOR;
22769 # no value to operate on after sub block
22770 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
22772 # a right brace here indicates the end of a simple block.
22773 # all non-structural right braces have type 'R'
22774 # all braces associated with block operator keywords have been given those
22775 # keywords as "last_nonblank_token" and caught above.
22776 # (This statement is order dependent, and must come after checking
22777 # $last_nonblank_token).
22778 elsif ( $last_nonblank_type eq '}' ) {
22780 # patch for dor.t (defined or).
22782 && $next_type eq '/'
22783 && $last_nonblank_token eq ']' )
22785 $op_expected = OPERATOR;
22788 $op_expected = TERM;
22792 # something else..what did I forget?
22795 # collecting diagnostics on unknown operator types..see what was missed
22796 $op_expected = UNKNOWN;
22798 "OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
22802 TOKENIZER_DEBUG_FLAG_EXPECT && do {
22804 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
22806 return $op_expected;
22809 # The following routines keep track of nesting depths of the nesting
22810 # types, ( [ { and ?. This is necessary for determining the indentation
22811 # level, and also for debugging programs. Not only do they keep track of
22812 # nesting depths of the individual brace types, but they check that each
22813 # of the other brace types is balanced within matching pairs. For
22814 # example, if the program sees this sequence:
22818 # then it can determine that there is an extra left paren somewhere
22819 # between the { and the }. And so on with every other possible
22820 # combination of outer and inner brace types. For another
22825 # which has an extra ] within the parens.
22827 # The brace types have indexes 0 .. 3 which are indexes into
22830 # The pair ? : are treated as just another nesting type, with ? acting
22831 # as the opening brace and : acting as the closing brace.
22835 # $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
22837 # saves the nesting depth of brace type $b (where $b is either of the other
22838 # nesting types) when brace type $a enters a new depth. When this depth
22839 # decreases, a check is made that the current depth of brace types $b is
22840 # unchanged, or otherwise there must have been an error. This can
22841 # be very useful for localizing errors, particularly when perl runs to
22842 # the end of a large file (such as this one) and announces that there
22843 # is a problem somewhere.
22845 # A numerical sequence number is maintained for every nesting type,
22846 # so that each matching pair can be uniquely identified in a simple
22849 sub increase_nesting_depth {
22850 my ( $a, $i_tok ) = @_;
22852 $current_depth[$a]++;
22854 # Sequence numbers increment by number of items. This keeps
22855 # a unique set of numbers but still allows the relative location
22856 # of any type to be determined.
22857 $nesting_sequence_number[$a] += scalar(@closing_brace_names);
22858 my $seqno = $nesting_sequence_number[$a];
22859 $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
22861 my $pos = $$rpretoken_map[$i_tok];
22862 $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
22863 [ $input_line_number, $input_line, $pos ];
22865 for $b ( 0 .. $#closing_brace_names ) {
22866 next if ( $b == $a );
22867 $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
22872 sub decrease_nesting_depth {
22874 my ( $a, $i_tok ) = @_;
22875 my $pos = $$rpretoken_map[$i_tok];
22879 if ( $current_depth[$a] > 0 ) {
22881 $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
22883 # check that any brace types $b contained within are balanced
22884 for $b ( 0 .. $#closing_brace_names ) {
22885 next if ( $b == $a );
22887 unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
22888 $current_depth[$b] )
22890 my $diff = $current_depth[$b] -
22891 $depth_array[$a][$b][ $current_depth[$a] ];
22893 # don't whine too many times
22894 my $saw_brace_error = get_saw_brace_error();
22896 $saw_brace_error <= MAX_NAG_MESSAGES
22898 # if too many closing types have occured, we probably
22899 # already caught this error
22900 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
22903 interrupt_logfile();
22905 $starting_line_of_current_depth[$a][ $current_depth[$a] ];
22907 my $rel = [ $input_line_number, $input_line, $pos ];
22911 if ( $diff == 1 || $diff == -1 ) {
22919 ? $opening_brace_names[$b]
22920 : $closing_brace_names[$b];
22921 write_error_indicator_pair( @$rsl, '^' );
22923 Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
22928 $starting_line_of_current_depth[$b]
22929 [ $current_depth[$b] ];
22932 " The most recent un-matched $bname is on line $ml\n";
22933 write_error_indicator_pair( @$rml, '^' );
22935 write_error_indicator_pair( @$rel, '^' );
22939 increment_brace_error();
22942 $current_depth[$a]--;
22946 my $saw_brace_error = get_saw_brace_error();
22947 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
22949 There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
22951 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
22953 increment_brace_error();
22958 sub check_final_nesting_depths {
22961 for $a ( 0 .. $#closing_brace_names ) {
22963 if ( $current_depth[$a] ) {
22964 my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
22967 Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
22968 The most recent un-matched $opening_brace_names[$a] is on line $sl
22970 indicate_error( $msg, @$rsl, '^' );
22971 increment_brace_error();
22976 sub numerator_expected {
22978 # this is a filter for a possible numerator, in support of guessing
22979 # for the / pattern delimiter token.
22984 # Note: I am using the convention that variables ending in
22985 # _expected have these 3 possible values.
22986 my ( $i, $rtokens ) = @_;
22987 my $next_token = $$rtokens[ $i + 1 ];
22988 if ( $next_token eq '=' ) { $i++; } # handle /=
22989 my ( $next_nonblank_token, $i_next ) =
22990 find_next_nonblank_token( $i, $rtokens );
22992 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
22997 if ( $next_nonblank_token =~ /^\s*$/ ) {
23006 sub pattern_expected {
23008 # This is the start of a filter for a possible pattern.
23009 # It looks at the token after a possbible pattern and tries to
23010 # determine if that token could end a pattern.
23015 my ( $i, $rtokens ) = @_;
23016 my $next_token = $$rtokens[ $i + 1 ];
23017 if ( $next_token =~ /^[cgimosx]/ ) { $i++; } # skip possible modifier
23018 my ( $next_nonblank_token, $i_next ) =
23019 find_next_nonblank_token( $i, $rtokens );
23021 # list of tokens which may follow a pattern
23022 # (can probably be expanded)
23023 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
23029 if ( $next_nonblank_token =~ /^\s*$/ ) {
23038 sub find_next_nonblank_token_on_this_line {
23039 my ( $i, $rtokens ) = @_;
23040 my $next_nonblank_token;
23042 if ( $i < $max_token_index ) {
23043 $next_nonblank_token = $$rtokens[ ++$i ];
23045 if ( $next_nonblank_token =~ /^\s*$/ ) {
23047 if ( $i < $max_token_index ) {
23048 $next_nonblank_token = $$rtokens[ ++$i ];
23053 $next_nonblank_token = "";
23055 return ( $next_nonblank_token, $i );
23058 sub find_next_nonblank_token {
23059 my ( $i, $rtokens ) = @_;
23061 if ( $i >= $max_token_index ) {
23063 if ( !$peeked_ahead ) {
23065 $rtokens = peek_ahead_for_nonblank_token($rtokens);
23068 my $next_nonblank_token = $$rtokens[ ++$i ];
23070 if ( $next_nonblank_token =~ /^\s*$/ ) {
23071 $next_nonblank_token = $$rtokens[ ++$i ];
23073 return ( $next_nonblank_token, $i );
23076 sub peek_ahead_for_n_nonblank_pre_tokens {
23078 # returns next n pretokens if they exist
23079 # returns undef's if hits eof without seeing any pretokens
23080 my $max_pretokens = shift;
23083 my ( $rpre_tokens, $rmap, $rpre_types );
23085 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
23087 $line =~ s/^\s*//; # trim leading blanks
23088 next if ( length($line) <= 0 ); # skip blank
23089 next if ( $line =~ /^#/ ); # skip comment
23090 ( $rpre_tokens, $rmap, $rpre_types ) =
23091 pre_tokenize( $line, $max_pretokens );
23094 return ( $rpre_tokens, $rpre_types );
23097 # look ahead for next non-blank, non-comment line of code
23098 sub peek_ahead_for_nonblank_token {
23099 my $rtokens = shift;
23103 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
23105 $line =~ s/^\s*//; # trim leading blanks
23106 next if ( length($line) <= 0 ); # skip blank
23107 next if ( $line =~ /^#/ ); # skip comment
23108 my ( $rtok, $rmap, $rtype ) =
23109 pre_tokenize( $line, 2 ); # only need 2 pre-tokens
23110 my $j = $max_token_index + 1;
23113 foreach $tok (@$rtok) {
23114 last if ( $tok =~ "\n" );
23115 $$rtokens[ ++$j ] = $tok;
23124 # Break a string, $str, into a sequence of preliminary tokens. We
23125 # are interested in these types of tokens:
23126 # words (type='w'), example: 'max_tokens_wanted'
23127 # digits (type = 'd'), example: '0755'
23128 # whitespace (type = 'b'), example: ' '
23129 # any other single character (i.e. punct; type = the character itself).
23130 # We cannot do better than this yet because we might be in a quoted
23131 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
23133 my ( $str, $max_tokens_wanted ) = @_;
23135 # we return references to these 3 arrays:
23136 my @tokens = (); # array of the tokens themselves
23137 my @token_map = (0); # string position of start of each token
23138 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
23143 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
23146 # note that this must come before words!
23147 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
23150 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
23152 # single-character punctuation
23153 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
23157 return ( \@tokens, \@token_map, \@type );
23161 push @token_map, pos($str);
23163 } while ( --$max_tokens_wanted != 0 );
23165 return ( \@tokens, \@token_map, \@type );
23170 # this is an old debug routine
23171 my ( $rtokens, $rtoken_map ) = @_;
23172 my $num = scalar(@$rtokens);
23175 for ( $i = 0 ; $i < $num ; $i++ ) {
23176 my $len = length( $$rtokens[$i] );
23177 print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
23181 sub find_angle_operator_termination {
23183 # We are looking at a '<' and want to know if it is an angle operator.
23184 # We are to return:
23185 # $i = pretoken index of ending '>' if found, current $i otherwise
23186 # $type = 'Q' if found, '>' otherwise
23187 my ( $input_line, $i_beg, $rtoken_map, $expecting ) = @_;
23190 pos($input_line) = 1 + $$rtoken_map[$i];
23194 # we just have to find the next '>' if a term is expected
23195 if ( $expecting == TERM ) { $filter = '[\>]' }
23197 # we have to guess if we don't know what is expected
23198 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
23200 # shouldn't happen - we shouldn't be here if operator is expected
23201 else { warning("Program Bug in find_angle_operator_termination\n") }
23203 # To illustrate what we might be looking at, in case we are
23204 # guessing, here are some examples of valid angle operators
23211 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
23212 # <${PREFIX}*img*.$IMAGE_TYPE>
23213 # <img*.$IMAGE_TYPE>
23214 # <Timg*.$IMAGE_TYPE>
23215 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
23217 # Here are some examples of lines which do not have angle operators:
23218 # return undef unless $self->[2]++ < $#{$self->[1]};
23221 # the following line from dlister.pl caused trouble:
23222 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
23224 # If the '<' starts an angle operator, it must end on this line and
23225 # it must not have certain characters like ';' and '=' in it. I use
23226 # this to limit the testing. This filter should be improved if
23229 if ( $input_line =~ /($filter)/g ) {
23233 # We MAY have found an angle operator termination if we get
23234 # here, but we need to do more to be sure we haven't been
23236 my $pos = pos($input_line);
23238 my $pos_beg = $$rtoken_map[$i];
23239 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
23241 # Reject if the closing '>' follows a '-' as in:
23242 # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
23243 if ( $expecting eq UNKNOWN ) {
23244 my $check = substr( $input_line, $pos - 2, 1 );
23245 if ( $check eq '-' ) {
23246 return ( $i, $type );
23250 ######################################debug#####
23251 #write_diagnostics( "ANGLE? :$str\n");
23252 #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
23253 ######################################debug#####
23256 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
23258 # It may be possible that a quote ends midway in a pretoken.
23259 # If this happens, it may be necessary to split the pretoken.
23262 "Possible tokinization error..please check this line\n");
23263 report_possible_bug();
23266 # Now let's see where we stand....
23267 # OK if math op not possible
23268 if ( $expecting == TERM ) {
23271 # OK if there are no more than 2 pre-tokens inside
23272 # (not possible to write 2 token math between < and >)
23273 # This catches most common cases
23274 elsif ( $i <= $i_beg + 3 ) {
23275 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
23281 # Let's try a Brace Test: any braces inside must balance
23283 while ( $str =~ /\{/g ) { $br++ }
23284 while ( $str =~ /\}/g ) { $br-- }
23286 while ( $str =~ /\[/g ) { $sb++ }
23287 while ( $str =~ /\]/g ) { $sb-- }
23289 while ( $str =~ /\(/g ) { $pr++ }
23290 while ( $str =~ /\)/g ) { $pr-- }
23292 # if braces do not balance - not angle operator
23293 if ( $br || $sb || $pr ) {
23297 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
23300 # we should keep doing more checks here...to be continued
23301 # Tentatively accepting this as a valid angle operator.
23302 # There are lots more things that can be checked.
23305 "ANGLE-Guessing yes: $str expecting=$expecting\n");
23306 write_logfile_entry("Guessing angle operator here: $str\n");
23311 # didn't find ending >
23313 if ( $expecting == TERM ) {
23314 warning("No ending > for angle operator\n");
23318 return ( $i, $type );
23321 sub inverse_pretoken_map {
23323 # Starting with the current pre_token index $i, scan forward until
23324 # finding the index of the next pre_token whose position is $pos.
23325 my ( $i, $pos, $rtoken_map ) = @_;
23328 while ( ++$i <= $max_token_index ) {
23330 if ( $pos <= $$rtoken_map[$i] ) {
23332 # Let the calling routine handle errors in which we do not
23333 # land on a pre-token boundary. It can happen by running
23334 # perltidy on some non-perl scripts, for example.
23335 if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
23340 return ( $i, $error );
23343 sub guess_if_pattern_or_conditional {
23345 # this routine is called when we have encountered a ? following an
23346 # unknown bareword, and we must decide if it starts a pattern or not
23347 # input parameters:
23348 # $i - token index of the ? starting possible pattern
23349 # output parameters:
23350 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
23351 # msg = a warning or diagnostic message
23352 my ( $i, $rtokens, $rtoken_map ) = @_;
23353 my $is_pattern = 0;
23354 my $msg = "guessing that ? after $last_nonblank_token starts a ";
23356 if ( $i >= $max_token_index ) {
23357 $msg .= "conditional (no end to pattern found on the line)\n";
23362 my $next_token = $$rtokens[$i]; # first token after ?
23364 # look for a possible ending ? on this line..
23366 my $quote_depth = 0;
23367 my $quote_character = '';
23369 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
23370 follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
23371 $quote_pos, $quote_depth );
23375 # we didn't find an ending ? on this line,
23376 # so we bias towards conditional
23378 $msg .= "conditional (no ending ? on this line)\n";
23380 # we found an ending ?, so we bias towards a pattern
23384 if ( pattern_expected( $i, $rtokens ) >= 0 ) {
23386 $msg .= "pattern (found ending ? and pattern expected)\n";
23389 $msg .= "pattern (uncertain, but found ending ?)\n";
23393 return ( $is_pattern, $msg );
23396 sub guess_if_pattern_or_division {
23398 # this routine is called when we have encountered a / following an
23399 # unknown bareword, and we must decide if it starts a pattern or is a
23401 # input parameters:
23402 # $i - token index of the / starting possible pattern
23403 # output parameters:
23404 # $is_pattern = 0 if probably division, =1 if probably a pattern
23405 # msg = a warning or diagnostic message
23406 my ( $i, $rtokens, $rtoken_map ) = @_;
23407 my $is_pattern = 0;
23408 my $msg = "guessing that / after $last_nonblank_token starts a ";
23410 if ( $i >= $max_token_index ) {
23411 "division (no end to pattern found on the line)\n";
23415 my $divide_expected = numerator_expected( $i, $rtokens );
23417 my $next_token = $$rtokens[$i]; # first token after slash
23419 # look for a possible ending / on this line..
23421 my $quote_depth = 0;
23422 my $quote_character = '';
23424 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
23425 follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
23426 $quote_pos, $quote_depth );
23430 # we didn't find an ending / on this line,
23431 # so we bias towards division
23432 if ( $divide_expected >= 0 ) {
23434 $msg .= "division (no ending / on this line)\n";
23437 $msg = "multi-line pattern (division not possible)\n";
23443 # we found an ending /, so we bias towards a pattern
23446 if ( pattern_expected( $i, $rtokens ) >= 0 ) {
23448 if ( $divide_expected >= 0 ) {
23450 if ( $i - $ibeg > 60 ) {
23451 $msg .= "division (matching / too distant)\n";
23455 $msg .= "pattern (but division possible too)\n";
23461 $msg .= "pattern (division not possible)\n";
23466 if ( $divide_expected >= 0 ) {
23468 $msg .= "division (pattern not possible)\n";
23473 "pattern (uncertain, but division would not work here)\n";
23478 return ( $is_pattern, $msg );
23481 sub find_here_doc {
23483 # find the target of a here document, if any
23484 # input parameters:
23485 # $i - token index of the second < of <<
23486 # ($i must be less than the last token index if this is called)
23487 # output parameters:
23488 # $found_target = 0 didn't find target; =1 found target
23489 # HERE_TARGET - the target string (may be empty string)
23490 # $i - unchanged if not here doc,
23491 # or index of the last token of the here target
23492 my ( $expecting, $i, $rtokens, $rtoken_map ) = @_;
23494 my $found_target = 0;
23495 my $here_doc_target = '';
23496 my $here_quote_character = '';
23497 my ( $next_nonblank_token, $i_next_nonblank, $next_token );
23498 $next_token = $$rtokens[ $i + 1 ];
23500 # perl allows a backslash before the target string (heredoc.t)
23502 if ( $next_token eq '\\' ) {
23504 $next_token = $$rtokens[ $i + 2 ];
23507 ( $next_nonblank_token, $i_next_nonblank ) =
23508 find_next_nonblank_token_on_this_line( $i, $rtokens );
23510 if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
23513 my $quote_depth = 0;
23516 ( $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth ) =
23517 follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
23518 $here_quote_character, $quote_pos, $quote_depth );
23520 if ($in_quote) { # didn't find end of quote, so no target found
23523 else { # found ending quote
23528 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
23529 $tokj = $$rtokens[$j];
23531 # we have to remove any backslash before the quote character
23532 # so that the here-doc-target exactly matches this string
23536 && $$rtokens[ $j + 1 ] eq $here_quote_character );
23537 $here_doc_target .= $tokj;
23542 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
23544 write_logfile_entry(
23545 "found blank here-target after <<; suggest using \"\"\n");
23548 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
23550 my $here_doc_expected;
23551 if ( $expecting == UNKNOWN ) {
23552 $here_doc_expected = guess_if_here_doc($next_token);
23555 $here_doc_expected = 1;
23558 if ($here_doc_expected) {
23560 $here_doc_target = $next_token;
23567 if ( $expecting == TERM ) {
23569 write_logfile_entry("Note: bare here-doc operator <<\n");
23576 # patch to neglect any prepended backslash
23577 if ( $found_target && $backslash ) { $i++ }
23579 return ( $found_target, $here_doc_target, $here_quote_character, $i );
23582 # try to resolve here-doc vs. shift by looking ahead for
23583 # non-code or the end token (currently only looks for end token)
23584 # returns 1 if it is probably a here doc, 0 if not
23585 sub guess_if_here_doc {
23587 # This is how many lines we will search for a target as part of the
23588 # guessing strategy. It is a constant because there is probably
23589 # little reason to change it.
23590 use constant HERE_DOC_WINDOW => 40;
23592 my $next_token = shift;
23593 my $here_doc_expected = 0;
23596 my $msg = "checking <<";
23598 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
23602 if ( $line =~ /^$next_token$/ ) {
23603 $msg .= " -- found target $next_token ahead $k lines\n";
23604 $here_doc_expected = 1; # got it
23607 last if ( $k >= HERE_DOC_WINDOW );
23610 unless ($here_doc_expected) {
23612 if ( !defined($line) ) {
23613 $here_doc_expected = -1; # hit eof without seeing target
23614 $msg .= " -- must be shift; target $next_token not in file\n";
23617 else { # still unsure..taking a wild guess
23619 if ( !$is_constant{$current_package}{$next_token} ) {
23620 $here_doc_expected = 1;
23622 " -- guessing it's a here-doc ($next_token not a constant)\n";
23626 " -- guessing it's a shift ($next_token is a constant)\n";
23630 write_logfile_entry($msg);
23631 return $here_doc_expected;
23636 # follow (or continue following) quoted string or pattern
23637 # $in_quote return code:
23638 # 0 - ok, found end
23639 # 1 - still must find end of quote whose target is $quote_character
23640 # 2 - still looking for end of first of two quotes
23641 my ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, $rtokens,
23645 if ( $in_quote == 2 ) { # two quotes/patterns to follow
23647 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
23648 follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
23649 $quote_pos, $quote_depth );
23651 if ( $in_quote == 1 ) {
23652 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
23653 $quote_character = '';
23657 if ( $in_quote == 1 ) { # one (more) quote to follow
23659 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
23660 follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
23661 $quote_pos, $quote_depth );
23663 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth );
23666 sub scan_number_do {
23668 # scan a number in any of the formats that Perl accepts
23669 # Underbars (_) are allowed in decimal numbers.
23670 # input parameters -
23671 # $input_line - the string to scan
23672 # $i - pre_token index to start scanning
23673 # $rtoken_map - reference to the pre_token map giving starting
23674 # character position in $input_line of token $i
23675 # output parameters -
23676 # $i - last pre_token index of the number just scanned
23677 # number - the number (characters); or undef if not a number
23679 my ( $input_line, $i, $rtoken_map, $input_type ) = @_;
23680 my $pos_beg = $$rtoken_map[$i];
23683 my $number = undef;
23684 my $type = $input_type;
23686 my $first_char = substr( $input_line, $pos_beg, 1 );
23688 # Look for bad starting characters; Shouldn't happen..
23689 if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
23690 warning("Program bug - scan_number given character $first_char\n");
23691 report_definite_bug();
23692 return ( $i, $type, $number );
23695 # handle v-string without leading 'v' character ('Two Dot' rule)
23697 # TODO: v-strings may contain underscores
23698 pos($input_line) = $pos_beg;
23699 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
23700 $pos = pos($input_line);
23701 my $numc = $pos - $pos_beg;
23702 $number = substr( $input_line, $pos_beg, $numc );
23704 unless ($saw_v_string) { report_v_string($number) }
23707 # handle octal, hex, binary
23708 if ( !defined($number) ) {
23709 pos($input_line) = $pos_beg;
23710 if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
23712 $pos = pos($input_line);
23713 my $numc = $pos - $pos_beg;
23714 $number = substr( $input_line, $pos_beg, $numc );
23720 if ( !defined($number) ) {
23721 pos($input_line) = $pos_beg;
23723 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
23724 $pos = pos($input_line);
23726 # watch out for things like 0..40 which would give 0. by this;
23727 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
23728 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
23732 my $numc = $pos - $pos_beg;
23733 $number = substr( $input_line, $pos_beg, $numc );
23738 # filter out non-numbers like e + - . e2 .e3 +e6
23739 # the rule: at least one digit, and any 'e' must be preceded by a digit
23741 $number !~ /\d/ # no digits
23742 || ( $number =~ /^(.*)[eE]/
23743 && $1 !~ /\d/ ) # or no digits before the 'e'
23747 $type = $input_type;
23748 return ( $i, $type, $number );
23751 # Found a number; now we must convert back from character position
23752 # to pre_token index. An error here implies user syntax error.
23753 # An example would be an invalid octal number like '009'.
23755 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
23756 if ($error) { warning("Possibly invalid number\n") }
23758 return ( $i, $type, $number );
23761 sub scan_bare_identifier_do {
23763 # this routine is called to scan a token starting with an alphanumeric
23764 # variable or package separator, :: or '.
23766 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map ) = @_;
23768 my $package = undef;
23772 # we have to back up one pretoken at a :: since each : is one pretoken
23773 if ( $tok eq '::' ) { $i_beg-- }
23774 if ( $tok eq '->' ) { $i_beg-- }
23775 my $pos_beg = $$rtoken_map[$i_beg];
23776 pos($input_line) = $pos_beg;
23783 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
23785 my $pos = pos($input_line);
23786 my $numc = $pos - $pos_beg;
23787 $tok = substr( $input_line, $pos_beg, $numc );
23789 # type 'w' includes anything without leading type info
23790 # ($,%,@,*) including something like abc::def::ghi
23794 if ( defined($2) ) { $sub_name = $2; }
23795 if ( defined($1) ) {
23798 # patch: don't allow isolated package name which just ends
23799 # in the old style package separator (single quote). Example:
23801 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
23805 $package =~ s/\'/::/g;
23806 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
23807 $package =~ s/::$//;
23810 $package = $current_package;
23812 if ( $is_keyword{$tok} ) {
23817 # if it is a bareword..
23818 if ( $type eq 'w' ) {
23820 # check for v-string with leading 'v' type character
23821 # (This seems to have presidence over filehandle, type 'Y')
23822 if ( $tok =~ /^v\d[_\d]*$/ ) {
23824 # we only have the first part - something like 'v101' -
23826 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
23827 $pos = pos($input_line);
23828 $numc = $pos - $pos_beg;
23829 $tok = substr( $input_line, $pos_beg, $numc );
23833 # warn if this version can't handle v-strings
23834 unless ($saw_v_string) { report_v_string($tok) }
23837 elsif ( $is_constant{$package}{$sub_name} ) {
23841 # bareword after sort has implied empty prototype; for example:
23842 # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
23843 # This has priority over whatever the user has specified.
23844 elsif ($last_nonblank_token eq 'sort'
23845 && $last_nonblank_type eq 'k' )
23850 # Note: strangely, perl does not seem to really let you create
23851 # functions which act like eval and do, in the sense that eval
23852 # and do may have operators following the final }, but any operators
23853 # that you create with prototype (&) apparently do not allow
23854 # trailing operators, only terms. This seems strange.
23855 # If this ever changes, here is the update
23856 # to make perltidy behave accordingly:
23858 # elsif ( $is_block_function{$package}{$tok} ) {
23859 # $tok='eval'; # patch to do braces like eval - doesn't work
23862 # FIXME: This could become a separate type to allow for different
23864 elsif ( $is_block_function{$package}{$sub_name} ) {
23868 elsif ( $is_block_list_function{$package}{$sub_name} ) {
23871 elsif ( $is_user_function{$package}{$sub_name} ) {
23873 $prototype = $user_function_prototype{$package}{$sub_name};
23876 # check for indirect object
23879 # added 2001-03-27: must not be followed immediately by '('
23881 ( $input_line !~ m/\G\(/gc )
23886 # preceded by keyword like 'print', 'printf' and friends
23887 $is_indirect_object_taker{$last_nonblank_token}
23889 # or preceded by something like 'print(' or 'printf('
23891 ( $last_nonblank_token eq '(' )
23892 && $is_indirect_object_taker{ $paren_type[$paren_depth]
23900 # may not be indirect object unless followed by a space
23901 if ( $input_line =~ m/\G\s+/gc ) {
23905 # Perl's indirect object notation is a very bad
23906 # thing and can cause subtle bugs, especially for
23907 # beginning programmers. And I haven't even been
23908 # able to figure out a sane warning scheme which
23909 # doesn't get in the way of good scripts.
23911 # Complain if a filehandle has any lower case
23912 # letters. This is suggested good practice, but the
23913 # main reason for this warning is that prior to
23914 # release 20010328, perltidy incorrectly parsed a
23915 # function call after a print/printf, with the
23916 # result that a space got added before the opening
23917 # paren, thereby converting the function name to a
23918 # filehandle according to perl's weird rules. This
23919 # will not usually generate a syntax error, so this
23920 # is a potentially serious bug. By warning
23921 # of filehandles with any lower case letters,
23922 # followed by opening parens, we will help the user
23923 # find almost all of these older errors.
23924 # use 'sub_name' because something like
23925 # main::MYHANDLE is ok for filehandle
23926 if ( $sub_name =~ /[a-z]/ ) {
23928 # could be bug caused by older perltidy if
23930 if ( $input_line =~ m/\G\s*\(/gc ) {
23932 "Caution: unknown word '$tok' in indirect object slot\n"
23938 # bareword not followed by a space -- may not be filehandle
23939 # (may be function call defined in a 'use' statement)
23946 # Now we must convert back from character position
23947 # to pre_token index.
23948 # I don't think an error flag can occur here ..but who knows
23950 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
23952 warning("scan_bare_identifier: Possibly invalid tokenization\n");
23956 # no match but line not blank - could be syntax error
23957 # perl will take '::' alone without complaint
23961 # change this warning to log message if it becomes annoying
23962 warning("didn't find identifier after leading ::\n");
23964 return ( $i, $tok, $type, $prototype );
23969 # This is the new scanner and will eventually replace scan_identifier.
23970 # Only type 'sub' and 'package' are implemented.
23971 # Token types $ * % @ & -> are not yet implemented.
23973 # Scan identifier following a type token.
23974 # The type of call depends on $id_scan_state: $id_scan_state = ''
23975 # for starting call, in which case $tok must be the token defining
23978 # If the type token is the last nonblank token on the line, a value
23979 # of $id_scan_state = $tok is returned, indicating that further
23980 # calls must be made to get the identifier. If the type token is
23981 # not the last nonblank token on the line, the identifier is
23982 # scanned and handled and a value of '' is returned.
23984 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state ) = @_;
23986 my ( $i_beg, $pos_beg );
23988 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
23989 #my ($a,$b,$c) = caller;
23990 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
23992 # on re-entry, start scanning at first token on the line
23993 if ($id_scan_state) {
23998 # on initial entry, start scanning just after type token
24001 $id_scan_state = $tok;
24005 # find $i_beg = index of next nonblank token,
24006 # and handle empty lines
24007 my $blank_line = 0;
24008 my $next_nonblank_token = $$rtokens[$i_beg];
24009 if ( $i_beg > $max_token_index ) {
24014 # only a '#' immediately after a '$' is not a comment
24015 if ( $next_nonblank_token eq '#' ) {
24016 unless ( $tok eq '$' ) {
24021 if ( $next_nonblank_token =~ /^\s/ ) {
24022 ( $next_nonblank_token, $i_beg ) =
24023 find_next_nonblank_token_on_this_line( $i_beg, $rtokens );
24024 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
24030 # handle non-blank line; identifier, if any, must follow
24031 unless ($blank_line) {
24033 if ( $id_scan_state eq 'sub' ) {
24034 ( $i, $tok, $type, $id_scan_state ) =
24035 do_scan_sub( $input_line, $i, $i_beg, $tok, $type, $rtokens,
24036 $rtoken_map, $id_scan_state );
24039 elsif ( $id_scan_state eq 'package' ) {
24040 ( $i, $tok, $type ) =
24041 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
24043 $id_scan_state = '';
24047 warning("invalid token in scan_id: $tok\n");
24048 $id_scan_state = '';
24052 if ( $id_scan_state && ( !defined($type) || !$type ) ) {
24054 # shouldn't happen:
24056 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
24058 report_definite_bug();
24061 TOKENIZER_DEBUG_FLAG_NSCAN && do {
24063 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
24065 return ( $i, $tok, $type, $id_scan_state );
24070 # saved package and subnames in case prototype is on separate line
24071 my ( $package_saved, $subname_saved );
24075 # do_scan_sub parses a sub name and prototype
24076 # it is called with $i_beg equal to the index of the first nonblank
24077 # token following a 'sub' token.
24079 # TODO: add future error checks to be sure we have a valid
24080 # sub name. For example, 'sub &doit' is wrong. Also, be sure
24081 # a name is given if and only if a non-anonymous sub is
24084 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
24087 $id_scan_state = ""; # normally we get everything in one call
24088 my $subname = undef;
24089 my $package = undef;
24094 my $pos_beg = $$rtoken_map[$i_beg];
24095 pos($input_line) = $pos_beg;
24097 # sub NAME PROTO ATTRS
24099 $input_line =~ m/\G\s*
24100 ((?:\w*(?:'|::))*) # package - something that ends in :: or '
24101 (\w+) # NAME - required
24102 (\s*\([^){]*\))? # PROTO - something in parens
24103 (\s*:)? # ATTRS - leading : of attribute list
24112 $package = ( defined($1) && $1 ) ? $1 : $current_package;
24113 $package =~ s/\'/::/g;
24114 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
24115 $package =~ s/::$//;
24116 my $pos = pos($input_line);
24117 my $numc = $pos - $pos_beg;
24118 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
24122 # Look for prototype/attributes not preceded on this line by subname;
24123 # This might be an anonymous sub with attributes,
24124 # or a prototype on a separate line from its sub name
24126 $input_line =~ m/\G(\s*\([^){]*\))? # PROTO
24127 (\s*:)? # ATTRS leading ':'
24136 # Handle prototype on separate line from subname
24137 if ($subname_saved) {
24138 $package = $package_saved;
24139 $subname = $subname_saved;
24140 $tok = $last_nonblank_token;
24147 # ATTRS: if there are attributes, back up and let the ':' be
24148 # found later by the scanner.
24149 my $pos = pos($input_line);
24151 $pos -= length($attrs);
24154 my $next_nonblank_token = $tok;
24156 # catch case of line with leading ATTR ':' after anonymous sub
24157 if ( $pos == $pos_beg && $tok eq ':' ) {
24159 $in_attribute_list = 1;
24162 # We must convert back from character position
24163 # to pre_token index.
24166 # I don't think an error flag can occur here ..but ?
24168 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
24169 if ($error) { warning("Possibly invalid sub\n") }
24171 # check for multiple definitions of a sub
24172 ( $next_nonblank_token, my $i_next ) =
24173 find_next_nonblank_token_on_this_line( $i, $rtokens );
24176 if ( $next_nonblank_token =~ /^(\s*|#)$/ )
24177 { # skip blank or side comment
24178 my ( $rpre_tokens, $rpre_types ) =
24179 peek_ahead_for_n_nonblank_pre_tokens(1);
24180 if ( defined($rpre_tokens) && @$rpre_tokens ) {
24181 $next_nonblank_token = $rpre_tokens->[0];
24184 $next_nonblank_token = '}';
24187 $package_saved = "";
24188 $subname_saved = "";
24189 if ( $next_nonblank_token eq '{' ) {
24191 if ( $saw_function_definition{$package}{$subname} ) {
24192 my $lno = $saw_function_definition{$package}{$subname};
24194 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
24197 $saw_function_definition{$package}{$subname} =
24198 $input_line_number;
24201 elsif ( $next_nonblank_token eq ';' ) {
24203 elsif ( $next_nonblank_token eq '}' ) {
24206 # ATTRS - if an attribute list follows, remember the name
24207 # of the sub so the next opening brace can be labeled.
24208 # Setting 'statement_type' causes any ':'s to introduce
24210 elsif ( $next_nonblank_token eq ':' ) {
24211 $statement_type = $tok;
24214 # see if PROTO follows on another line:
24215 elsif ( $next_nonblank_token eq '(' ) {
24216 if ( $attrs || $proto ) {
24218 "unexpected '(' after definition or declaration of sub '$subname'\n"
24222 $id_scan_state = 'sub'; # we must come back to get proto
24223 $statement_type = $tok;
24224 $package_saved = $package;
24225 $subname_saved = $subname;
24228 elsif ($next_nonblank_token) { # EOF technically ok
24230 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
24233 check_prototype( $proto, $package, $subname );
24236 # no match but line not blank
24239 return ( $i, $tok, $type, $id_scan_state );
24243 sub check_prototype {
24244 my ( $proto, $package, $subname ) = @_;
24245 return unless ( defined($package) && defined($subname) );
24246 if ( defined($proto) ) {
24247 $proto =~ s/^\s*\(\s*//;
24248 $proto =~ s/\s*\)$//;
24250 $is_user_function{$package}{$subname} = 1;
24251 $user_function_prototype{$package}{$subname} = "($proto)";
24253 # prototypes containing '&' must be treated specially..
24254 if ( $proto =~ /\&/ ) {
24256 # right curly braces of prototypes ending in
24257 # '&' may be followed by an operator
24258 if ( $proto =~ /\&$/ ) {
24259 $is_block_function{$package}{$subname} = 1;
24262 # right curly braces of prototypes NOT ending in
24263 # '&' may NOT be followed by an operator
24264 elsif ( $proto !~ /\&$/ ) {
24265 $is_block_list_function{$package}{$subname} = 1;
24270 $is_constant{$package}{$subname} = 1;
24274 $is_user_function{$package}{$subname} = 1;
24278 sub do_scan_package {
24280 # do_scan_package parses a package name
24281 # it is called with $i_beg equal to the index of the first nonblank
24282 # token following a 'package' token.
24284 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map ) = @_;
24285 my $package = undef;
24286 my $pos_beg = $$rtoken_map[$i_beg];
24287 pos($input_line) = $pos_beg;
24289 # handle non-blank line; package name, if any, must follow
24290 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
24292 $package = ( defined($1) && $1 ) ? $1 : 'main';
24293 $package =~ s/\'/::/g;
24294 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
24295 $package =~ s/::$//;
24296 my $pos = pos($input_line);
24297 my $numc = $pos - $pos_beg;
24298 $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
24301 # Now we must convert back from character position
24302 # to pre_token index.
24303 # I don't think an error flag can occur here ..but ?
24305 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
24306 if ($error) { warning("Possibly invalid package\n") }
24307 $current_package = $package;
24310 my ( $next_nonblank_token, $i_next ) =
24311 find_next_nonblank_token( $i, $rtokens );
24312 if ( $next_nonblank_token !~ /^[;\}]$/ ) {
24314 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
24319 # no match but line not blank --
24320 # could be a label with name package, like package: , for example.
24325 return ( $i, $tok, $type );
24328 sub scan_identifier_do {
24330 # This routine assembles tokens into identifiers. It maintains a
24331 # scan state, id_scan_state. It updates id_scan_state based upon
24332 # current id_scan_state and token, and returns an updated
24333 # id_scan_state and the next index after the identifier.
24335 my ( $i, $id_scan_state, $identifier, $rtokens ) = @_;
24338 my $tok_begin = $$rtokens[$i_begin];
24339 if ( $tok_begin eq ':' ) { $tok_begin = '::' }
24340 my $id_scan_state_begin = $id_scan_state;
24341 my $identifier_begin = $identifier;
24342 my $tok = $tok_begin;
24345 # these flags will be used to help figure out the type:
24346 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
24349 # allow old package separator (') except in 'use' statement
24350 my $allow_tick = ( $last_nonblank_token ne 'use' );
24352 # get started by defining a type and a state if necessary
24353 unless ($id_scan_state) {
24354 $context = UNKNOWN_CONTEXT;
24356 # fixup for digraph
24357 if ( $tok eq '>' ) {
24361 $identifier = $tok;
24363 if ( $tok eq '$' || $tok eq '*' ) {
24364 $id_scan_state = '$';
24365 $context = SCALAR_CONTEXT;
24367 elsif ( $tok eq '%' || $tok eq '@' ) {
24368 $id_scan_state = '$';
24369 $context = LIST_CONTEXT;
24371 elsif ( $tok eq '&' ) {
24372 $id_scan_state = '&';
24374 elsif ( $tok eq 'sub' or $tok eq 'package' ) {
24375 $saw_alpha = 0; # 'sub' is considered type info here
24376 $id_scan_state = '$';
24377 $identifier .= ' '; # need a space to separate sub from sub name
24379 elsif ( $tok eq '::' ) {
24380 $id_scan_state = 'A';
24382 elsif ( $tok =~ /^[A-Za-z_]/ ) {
24383 $id_scan_state = ':';
24385 elsif ( $tok eq '->' ) {
24386 $id_scan_state = '$';
24391 my ( $a, $b, $c ) = caller;
24392 warning("Program Bug: scan_identifier given bad token = $tok \n");
24393 warning(" called from sub $a line: $c\n");
24394 report_definite_bug();
24396 $saw_type = !$saw_alpha;
24400 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
24403 # now loop to gather the identifier
24406 while ( $i < $max_token_index ) {
24407 $i_save = $i unless ( $tok =~ /^\s*$/ );
24408 $tok = $$rtokens[ ++$i ];
24410 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
24415 if ( $id_scan_state eq '$' ) { # starting variable name
24417 if ( $tok eq '$' ) {
24419 $identifier .= $tok;
24421 # we've got a punctuation variable if end of line (punct.t)
24422 if ( $i == $max_token_index ) {
24424 $id_scan_state = '';
24428 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
24430 $id_scan_state = ':'; # now need ::
24431 $identifier .= $tok;
24433 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
24435 $id_scan_state = ':'; # now need ::
24436 $identifier .= $tok;
24438 # Perl will accept leading digits in identifiers,
24439 # although they may not always produce useful results.
24440 # Something like $main::0 is ok. But this also works:
24442 # sub howdy::123::bubba{ print "bubba $54321!\n" }
24443 # howdy::123::bubba();
24446 elsif ( $tok =~ /^[0-9]/ ) { # numeric
24448 $id_scan_state = ':'; # now need ::
24449 $identifier .= $tok;
24451 elsif ( $tok eq '::' ) {
24452 $id_scan_state = 'A';
24453 $identifier .= $tok;
24455 elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array
24456 $identifier .= $tok; # keep same state, a $ could follow
24458 elsif ( $tok eq '{' ) {
24460 # check for something like ${#} or ${©}
24461 if ( $identifier eq '$'
24462 && $i + 2 <= $max_token_index
24463 && $$rtokens[ $i + 2 ] eq '}'
24464 && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
24466 my $next2 = $$rtokens[ $i + 2 ];
24467 my $next1 = $$rtokens[ $i + 1 ];
24468 $identifier .= $tok . $next1 . $next2;
24470 $id_scan_state = '';
24474 # skip something like ${xxx} or ->{
24475 $id_scan_state = '';
24477 # if this is the first token of a line, any tokens for this
24478 # identifier have already been accumulated
24479 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
24484 # space ok after leading $ % * & @
24485 elsif ( $tok =~ /^\s*$/ ) {
24487 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
24489 if ( length($identifier) > 1 ) {
24490 $id_scan_state = '';
24492 $type = 'i'; # probably punctuation variable
24497 # spaces after $'s are common, and space after @
24498 # is harmless, so only complain about space
24499 # after other type characters. Space after $ and
24500 # @ will be removed in formatting. Report space
24501 # after % and * because they might indicate a
24502 # parsing error. In other words '% ' might be a
24503 # modulo operator. Delete this warning if it
24505 if ( $identifier !~ /^[\@\$]$/ ) {
24507 "Space in identifier, following $identifier\n";
24513 # space after '->' is ok
24515 elsif ( $tok eq '^' ) {
24517 # check for some special variables like $^W
24518 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
24519 $identifier .= $tok;
24520 $id_scan_state = 'A';
24523 $id_scan_state = '';
24526 else { # something else
24528 # check for various punctuation variables
24529 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
24530 $identifier .= $tok;
24533 elsif ( $identifier eq '$#' ) {
24535 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
24537 # perl seems to allow just these: $#: $#- $#+
24538 elsif ( $tok =~ /^[\:\-\+]$/ ) {
24540 $identifier .= $tok;
24544 write_logfile_entry( 'Use of $# is deprecated' . "\n" );
24547 elsif ( $identifier eq '$$' ) {
24549 # perl does not allow references to punctuation
24550 # variables without braces. For example, this
24554 # You would have to use
24558 if ( $tok eq '{' ) { $type = 't' }
24559 else { $type = 'i' }
24561 elsif ( $identifier eq '->' ) {
24566 if ( length($identifier) == 1 ) { $identifier = ''; }
24568 $id_scan_state = '';
24572 elsif ( $id_scan_state eq '&' ) { # starting sub call?
24574 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric ..
24575 $id_scan_state = ':'; # now need ::
24577 $identifier .= $tok;
24579 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
24580 $id_scan_state = ':'; # now need ::
24582 $identifier .= $tok;
24584 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
24585 $id_scan_state = ':'; # now need ::
24587 $identifier .= $tok;
24589 elsif ( $tok =~ /^\s*$/ ) { # allow space
24591 elsif ( $tok eq '::' ) { # leading ::
24592 $id_scan_state = 'A'; # accept alpha next
24593 $identifier .= $tok;
24595 elsif ( $tok eq '{' ) {
24596 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
24598 $id_scan_state = '';
24603 # punctuation variable?
24604 # testfile: cunningham4.pl
24605 if ( $identifier eq '&' ) {
24606 $identifier .= $tok;
24613 $id_scan_state = '';
24617 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)
24619 if ( $tok =~ /^[A-Za-z_]/ ) { # found it
24620 $identifier .= $tok;
24621 $id_scan_state = ':'; # now need ::
24624 elsif ( $tok eq "'" && $allow_tick ) {
24625 $identifier .= $tok;
24626 $id_scan_state = ':'; # now need ::
24629 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
24630 $identifier .= $tok;
24631 $id_scan_state = ':'; # now need ::
24634 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
24635 $id_scan_state = '(';
24636 $identifier .= $tok;
24638 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
24639 $id_scan_state = ')';
24640 $identifier .= $tok;
24643 $id_scan_state = '';
24648 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha
24650 if ( $tok eq '::' ) { # got it
24651 $identifier .= $tok;
24652 $id_scan_state = 'A'; # now require alpha
24654 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here
24655 $identifier .= $tok;
24656 $id_scan_state = ':'; # now need ::
24659 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
24660 $identifier .= $tok;
24661 $id_scan_state = ':'; # now need ::
24664 elsif ( $tok eq "'" && $allow_tick ) { # tick
24666 if ( $is_keyword{$identifier} ) {
24667 $id_scan_state = ''; # that's all
24671 $identifier .= $tok;
24674 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
24675 $id_scan_state = '(';
24676 $identifier .= $tok;
24678 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
24679 $id_scan_state = ')';
24680 $identifier .= $tok;
24683 $id_scan_state = ''; # that's all
24688 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype
24690 if ( $tok eq '(' ) { # got it
24691 $identifier .= $tok;
24692 $id_scan_state = ')'; # now find the end of it
24694 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
24695 $identifier .= $tok;
24698 $id_scan_state = ''; # that's all - no prototype
24703 elsif ( $id_scan_state eq ')' ) { # looking for ) to end
24705 if ( $tok eq ')' ) { # got it
24706 $identifier .= $tok;
24707 $id_scan_state = ''; # all done
24710 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
24711 $identifier .= $tok;
24713 else { # probable error in script, but keep going
24714 warning("Unexpected '$tok' while seeking end of prototype\n");
24715 $identifier .= $tok;
24718 else { # can get here due to error in initialization
24719 $id_scan_state = '';
24725 if ( $id_scan_state eq ')' ) {
24726 warning("Hit end of line while seeking ) to end prototype\n");
24729 # once we enter the actual identifier, it may not extend beyond
24730 # the end of the current line
24731 if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
24732 $id_scan_state = '';
24734 if ( $i < 0 ) { $i = 0 }
24741 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
24744 else { $type = 'i' }
24746 elsif ( $identifier eq '->' ) {
24750 ( length($identifier) > 1 )
24752 # In something like '@$=' we have an identifier '@$'
24753 # In something like '$${' we have type '$$' (and only
24754 # part of an identifier)
24755 && !( $identifier =~ /\$$/ && $tok eq '{' )
24756 && ( $identifier !~ /^(sub |package )$/ )
24761 else { $type = 't' }
24763 elsif ($saw_alpha) {
24765 # type 'w' includes anything without leading type info
24766 # ($,%,@,*) including something like abc::def::ghi
24771 } # this can happen on a restart
24775 $tok = $identifier;
24776 if ($message) { write_logfile_entry($message) }
24783 TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
24784 my ( $a, $b, $c ) = caller;
24786 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
24788 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
24790 return ( $i, $tok, $type, $id_scan_state, $identifier );
24793 sub follow_quoted_string {
24795 # scan for a specific token, skipping escaped characters
24796 # if the quote character is blank, use the first non-blank character
24797 # input parameters:
24798 # $rtokens = reference to the array of tokens
24799 # $i = the token index of the first character to search
24800 # $in_quote = number of quoted strings being followed
24801 # $beginning_tok = the starting quote character
24802 # $quote_pos = index to check next for alphanumeric delimiter
24803 # output parameters:
24804 # $i = the token index of the ending quote character
24805 # $in_quote = decremented if found end, unchanged if not
24806 # $beginning_tok = the starting quote character
24807 # $quote_pos = index to check next for alphanumeric delimiter
24808 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
24809 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth )
24811 my ( $tok, $end_tok );
24812 my $i = $i_beg - 1;
24814 TOKENIZER_DEBUG_FLAG_QUOTE && do {
24816 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
24819 # get the corresponding end token
24820 if ( $beginning_tok !~ /^\s*$/ ) {
24821 $end_tok = matching_end_token($beginning_tok);
24824 # a blank token means we must find and use the first non-blank one
24826 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
24828 while ( $i < $max_token_index ) {
24829 $tok = $$rtokens[ ++$i ];
24831 if ( $tok !~ /^\s*$/ ) {
24833 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
24834 $i = $max_token_index;
24838 if ( length($tok) > 1 ) {
24839 if ( $quote_pos <= 0 ) { $quote_pos = 1 }
24840 $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
24843 $beginning_tok = $tok;
24846 $end_tok = matching_end_token($beginning_tok);
24852 $allow_quote_comments = 1;
24857 # There are two different loops which search for the ending quote
24858 # character. In the rare case of an alphanumeric quote delimiter, we
24859 # have to look through alphanumeric tokens character-by-character, since
24860 # the pre-tokenization process combines multiple alphanumeric
24861 # characters, whereas for a non-alphanumeric delimiter, only tokens of
24862 # length 1 can match.
24864 # loop for case of alphanumeric quote delimiter..
24865 # "quote_pos" is the position the current word to begin searching
24866 if ( $beginning_tok =~ /\w/ ) {
24868 # Note this because it is not recommended practice except
24869 # for obfuscated perl contests
24870 if ( $in_quote == 1 ) {
24871 write_logfile_entry(
24872 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
24875 while ( $i < $max_token_index ) {
24877 if ( $quote_pos == 0 || ( $i < 0 ) ) {
24878 $tok = $$rtokens[ ++$i ];
24880 if ( $tok eq '\\' ) {
24883 last if ( $i >= $max_token_index );
24884 $tok = $$rtokens[ ++$i ];
24888 my $old_pos = $quote_pos;
24890 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
24894 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
24896 if ( $quote_pos > 0 ) {
24900 if ( $quote_depth == 0 ) {
24908 # loop for case of a non-alphanumeric quote delimiter..
24911 while ( $i < $max_token_index ) {
24912 $tok = $$rtokens[ ++$i ];
24914 if ( $tok eq $end_tok ) {
24917 if ( $quote_depth == 0 ) {
24922 elsif ( $tok eq $beginning_tok ) {
24925 elsif ( $tok eq '\\' ) {
24930 if ( $i > $max_token_index ) { $i = $max_token_index }
24931 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth );
24934 sub matching_end_token {
24936 # find closing character for a pattern
24937 my $beginning_token = shift;
24939 if ( $beginning_token eq '{' ) {
24942 elsif ( $beginning_token eq '[' ) {
24945 elsif ( $beginning_token eq '<' ) {
24948 elsif ( $beginning_token eq '(' ) {
24958 # These names are used in error messages
24959 @opening_brace_names = qw# '{' '[' '(' '?' #;
24960 @closing_brace_names = qw# '}' ']' ')' ':' #;
24963 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
24964 <= >= == =~ !~ != ++ -- /= x=
24966 @is_digraph{@digraphs} = (1) x scalar(@digraphs);
24968 my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> );
24969 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
24971 # make a hash of all valid token types for self-checking the tokenizer
24972 # (adding NEW_TOKENS : select a new character and add to this list)
24973 my @valid_token_types = qw#
24974 A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
24975 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
24977 push( @valid_token_types, @digraphs );
24978 push( @valid_token_types, @trigraphs );
24979 push( @valid_token_types, '#' );
24980 push( @valid_token_types, ',' );
24981 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
24983 # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
24984 my @file_test_operators =
24985 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
24986 @is_file_test_operator{@file_test_operators} =
24987 (1) x scalar(@file_test_operators);
24989 # these functions have prototypes of the form (&), so when they are
24990 # followed by a block, that block MAY BE followed by an operator.
24991 @_ = qw( do eval );
24992 @is_block_operator{@_} = (1) x scalar(@_);
24994 # these functions allow an identifier in the indirect object slot
24995 @_ = qw( print printf sort exec system );
24996 @is_indirect_object_taker{@_} = (1) x scalar(@_);
24998 # These tokens may precede a code block
24999 # patched for SWITCH/CASE
25000 @_ = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
25001 unless do while until eval for foreach map grep sort
25002 switch case given when);
25003 @is_code_block_token{@_} = (1) x scalar(@_);
25005 # I'll build the list of keywords incrementally
25008 # keywords and tokens after which a value or pattern is expected,
25009 # but not an operator. In other words, these should consume terms
25010 # to their right, or at least they are not expected to be followed
25011 # immediately by operators.
25012 my @value_requestor = qw(
25230 # patched above for SWITCH/CASE
25231 push( @Keywords, @value_requestor );
25233 # These are treated the same but are not keywords:
25238 push( @value_requestor, @extra_vr );
25240 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
25242 # this list contains keywords which do not look for arguments,
25243 # so that they might be followed by an operator, or at least
25245 my @operator_requestor = qw(
25269 push( @Keywords, @operator_requestor );
25271 # These are treated the same but are not considered keywords:
25278 push( @operator_requestor, @extra_or );
25280 @expecting_operator_token{@operator_requestor} =
25281 (1) x scalar(@operator_requestor);
25283 # these token TYPES expect trailing operator but not a term
25284 # note: ++ and -- are post-increment and decrement, 'C' = constant
25285 my @operator_requestor_types = qw( ++ -- C );
25286 @expecting_operator_types{@operator_requestor_types} =
25287 (1) x scalar(@operator_requestor_types);
25289 # these token TYPES consume values (terms)
25290 # note: pp and mm are pre-increment and decrement
25291 # f=semicolon in for, F=file test operator
25292 my @value_requestor_type = qw#
25293 L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
25294 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
25295 <= >= == != => \ > < % * / ? & | ** <=>
25296 f F pp mm Y p m U J G
25298 push( @value_requestor_type, ',' )
25299 ; # (perl doesn't like a ',' in a qw block)
25300 @expecting_term_types{@value_requestor_type} =
25301 (1) x scalar(@value_requestor_type);
25303 # For simple syntax checking, it is nice to have a list of operators which
25304 # will really be unhappy if not followed by a term. This includes most
25306 %really_want_term = %expecting_term_types;
25308 # with these exceptions...
25309 delete $really_want_term{'U'}; # user sub, depends on prototype
25310 delete $really_want_term{'F'}; # file test works on $_ if no following term
25311 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
25314 @_ = qw(q qq qw qx qr s y tr m);
25315 @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
25317 # These keywords are handled specially in the tokenizer code:
25318 my @special_keywords = qw(
25334 push( @Keywords, @special_keywords );
25336 # Keywords after which list formatting may be used
25337 # WARNING: do not include |map|grep|eval or perl may die on
25338 # syntax errors (map1.t).
25339 my @keyword_taking_list = qw(
25411 @is_keyword_taking_list{@keyword_taking_list} =
25412 (1) x scalar(@keyword_taking_list);
25414 # These are not used in any way yet
25415 # my @unused_keywords = qw(
25422 # The list of keywords was extracted from function 'keyword' in
25423 # perl file toke.c version 5.005.03, using this utility, plus a
25424 # little editing: (file getkwd.pl):
25425 # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
25426 # Add 'get' prefix where necessary, then split into the above lists.
25427 # This list should be updated as necessary.
25428 # The list should not contain these special variables:
25429 # ARGV DATA ENV SIG STDERR STDIN STDOUT
25432 @is_keyword{@Keywords} = (1) x scalar(@Keywords);
25439 Perl::Tidy - Parses and beautifies perl source
25445 Perl::Tidy::perltidy(
25447 destination => $destination,
25450 perltidyrc => $perltidyrc,
25451 logfile => $logfile,
25452 errorfile => $errorfile,
25453 formatter => $formatter, # callback object (see below)
25454 dump_options => $dump_options,
25455 dump_options_type => $dump_options_type,
25460 This module makes the functionality of the perltidy utility available to perl
25461 scripts. Any or all of the input parameters may be omitted, in which case the
25462 @ARGV array will be used to provide input parameters as described
25463 in the perltidy(1) man page.
25465 For example, the perltidy script is basically just this:
25468 Perl::Tidy::perltidy();
25470 The module accepts input and output streams by a variety of methods.
25471 The following list of parameters may be any of a the following: a
25472 filename, an ARRAY reference, a SCALAR reference, or an object with
25473 either a B<getline> or B<print> method, as appropriate.
25475 source - the source of the script to be formatted
25476 destination - the destination of the formatted output
25477 stderr - standard error output
25478 perltidyrc - the .perltidyrc file
25479 logfile - the .LOG file stream, if any
25480 errorfile - the .ERR file stream, if any
25481 dump_options - ref to a hash to receive parameters (see below),
25482 dump_options_type - controls contents of dump_options
25483 dump_getopt_flags - ref to a hash to receive Getopt flags
25484 dump_options_category - ref to a hash giving category of options
25485 dump_abbreviations - ref to a hash giving all abbreviations
25487 The following chart illustrates the logic used to decide how to
25490 ref($param) $param is assumed to be:
25491 ----------- ---------------------
25493 SCALAR ref to string
25495 (other) object with getline (if source) or print method
25497 If the parameter is an object, and the object has a B<close> method, that
25498 close method will be called at the end of the stream.
25504 If the B<source> parameter is given, it defines the source of the
25509 If the B<destination> parameter is given, it will be used to define the
25510 file or memory location to receive output of perltidy.
25514 The B<stderr> parameter allows the calling program to capture the output
25515 to what would otherwise go to the standard error output device.
25519 If the B<perltidyrc> file is given, it will be used instead of any
25520 F<.perltidyrc> configuration file that would otherwise be used.
25524 If the B<argv> parameter is given, it will be used instead of the
25525 B<@ARGV> array. The B<argv> parameter may be a string, a reference to a
25526 string, or a reference to an array. If it is a string or reference to a
25527 string, it will be parsed into an array of items just as if it were a
25528 command line string.
25532 If the B<dump_options> parameter is given, it must be the reference to a hash.
25533 In this case, the parameters contained in any perltidyrc configuration file
25534 will be placed in this hash and perltidy will return immediately. This is
25535 equivalent to running perltidy with --dump-options, except that the perameters
25536 are returned in a hash rather than dumped to standard output. Also, by default
25537 only the parameters in the perltidyrc file are returned, but this can be
25538 changed (see the next parameter). This parameter provides a convenient method
25539 for external programs to read a perltidyrc file. An example program using
25540 this feature, F<perltidyrc_dump.pl>, is included in the distribution.
25542 Any combination of the B<dump_> parameters may be used together.
25544 =item dump_options_type
25546 This parameter is a string which can be used to control the parameters placed
25547 in the hash reference supplied by B<dump_options>. The possible values are
25548 'perltidyrc' (default) and 'full'. The 'full' parameter causes both the
25549 default options plus any options found in a perltidyrc file to be returned.
25551 =item dump_getopt_flags
25553 If the B<dump_getopt_flags> parameter is given, it must be the reference to a
25554 hash. This hash will receive all of the parameters that perltidy understands
25555 and flags that are passed to Getopt::Long. This parameter may be
25556 used alone or with the B<dump_options> flag. Perltidy will
25557 exit immediately after filling this hash. See the demo program
25558 F<perltidyrc_dump.pl> for example usage.
25560 =item dump_options_category
25562 If the B<dump_options_category> parameter is given, it must be the reference to a
25563 hash. This hash will receive a hash with keys equal to all long parameter names
25564 and values equal to the title of the corresponding section of the perltidy manual.
25565 See the demo program F<perltidyrc_dump.pl> for example usage.
25567 =item dump_abbreviations
25569 If the B<dump_abbreviations> parameter is given, it must be the reference to a
25570 hash. This hash will receive all abbreviations used by Perl::Tidy. See the
25571 demo program F<perltidyrc_dump.pl> for example usage.
25577 The following example passes perltidy a snippet as a reference
25578 to a string and receives the result back in a reference to
25583 # some messy source code to format
25584 my $source = <<'EOM';
25586 my @editors=('Emacs', 'Vi '); my $rand = rand();
25587 print "A poll of 10 random programmers gave these results:\n";
25589 my $i=int ($rand+rand());
25590 print " $editors[$i] users are from Venus" . ", " .
25591 "$editors[1-$i] users are from Mars" .
25596 # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
25598 perltidy( source => \$source, destination => \@dest );
25599 foreach (@dest) {print}
25601 =head1 Using the B<formatter> Callback Object
25603 The B<formatter> parameter is an optional callback object which allows
25604 the calling program to receive tokenized lines directly from perltidy for
25605 further specialized processing. When this parameter is used, the two
25606 formatting options which are built into perltidy (beautification or
25607 html) are ignored. The following diagram illustrates the logical flow:
25609 |-- (normal route) -> code beautification
25610 caller->perltidy->|-- (-html flag ) -> create html
25611 |-- (formatter given)-> callback to write_line
25613 This can be useful for processing perl scripts in some way. The
25614 parameter C<$formatter> in the perltidy call,
25616 formatter => $formatter,
25618 is an object created by the caller with a C<write_line> method which
25619 will accept and process tokenized lines, one line per call. Here is
25620 a simple example of a C<write_line> which merely prints the line number,
25621 the line type (as determined by perltidy), and the text of the line:
25625 # This is called from perltidy line-by-line
25627 my $line_of_tokens = shift;
25628 my $line_type = $line_of_tokens->{_line_type};
25629 my $input_line_number = $line_of_tokens->{_line_number};
25630 my $input_line = $line_of_tokens->{_line_text};
25631 print "$input_line_number:$line_type:$input_line";
25634 The complete program, B<perllinetype>, is contained in the examples section of
25635 the source distribution. As this example shows, the callback method
25636 receives a parameter B<$line_of_tokens>, which is a reference to a hash
25637 of other useful information. This example uses these hash entries:
25639 $line_of_tokens->{_line_number} - the line number (1,2,...)
25640 $line_of_tokens->{_line_text} - the text of the line
25641 $line_of_tokens->{_line_type} - the type of the line, one of:
25643 SYSTEM - system-specific code before hash-bang line
25644 CODE - line of perl code (including comments)
25645 POD_START - line starting pod, such as '=head'
25646 POD - pod documentation text
25647 POD_END - last line of pod section, '=cut'
25648 HERE - text of here-document
25649 HERE_END - last line of here-doc (target word)
25650 FORMAT - format section
25651 FORMAT_END - last line of format section, '.'
25652 DATA_START - __DATA__ line
25653 DATA - unidentified text following __DATA__
25654 END_START - __END__ line
25655 END - unidentified text following __END__
25656 ERROR - we are in big trouble, probably not a perl script
25658 Most applications will be only interested in lines of type B<CODE>. For
25659 another example, let's write a program which checks for one of the
25660 so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
25661 can slow down processing. Here is a B<write_line>, from the example
25662 program B<find_naughty.pl>, which does that:
25666 # This is called back from perltidy line-by-line
25667 # We're looking for $`, $&, and $'
25668 my ( $self, $line_of_tokens ) = @_;
25670 # pull out some stuff we might need
25671 my $line_type = $line_of_tokens->{_line_type};
25672 my $input_line_number = $line_of_tokens->{_line_number};
25673 my $input_line = $line_of_tokens->{_line_text};
25674 my $rtoken_type = $line_of_tokens->{_rtoken_type};
25675 my $rtokens = $line_of_tokens->{_rtokens};
25678 # skip comments, pod, etc
25679 return if ( $line_type ne 'CODE' );
25681 # loop over tokens looking for $`, $&, and $'
25682 for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
25684 # we only want to examine token types 'i' (identifier)
25685 next unless $$rtoken_type[$j] eq 'i';
25687 # pull out the actual token text
25688 my $token = $$rtokens[$j];
25691 if ( $token =~ /^\$[\`\&\']$/ ) {
25693 "$input_line_number: $token\n";
25698 This example pulls out these tokenization variables from the $line_of_tokens
25701 $rtoken_type = $line_of_tokens->{_rtoken_type};
25702 $rtokens = $line_of_tokens->{_rtokens};
25704 The variable C<$rtoken_type> is a reference to an array of token type codes,
25705 and C<$rtokens> is a reference to a corresponding array of token text.
25706 These are obviously only defined for lines of type B<CODE>.
25707 Perltidy classifies tokens into types, and has a brief code for each type.
25708 You can get a complete list at any time by running perltidy from the
25711 perltidy --dump-token-types
25713 In the present example, we are only looking for tokens of type B<i>
25714 (identifiers), so the for loop skips past all other types. When an
25715 identifier is found, its actual text is checked to see if it is one
25716 being sought. If so, the above write_line prints the token and its
25719 The B<formatter> feature is relatively new in perltidy, and further
25720 documentation needs to be written to complete its description. However,
25721 several example programs have been written and can be found in the
25722 B<examples> section of the source distribution. Probably the best way
25723 to get started is to find one of the examples which most closely matches
25724 your application and start modifying it.
25726 For help with perltidy's pecular way of breaking lines into tokens, you
25727 might run, from the command line,
25729 perltidy -D filename
25731 where F<filename> is a short script of interest. This will produce
25732 F<filename.DEBUG> with interleaved lines of text and their token types.
25733 The -D flag has been in perltidy from the beginning for this purpose.
25734 If you want to see the code which creates this file, it is
25735 C<write_debug_entry> in Tidy.pm.
25743 Thanks to Hugh Myers who developed the initial modular interface
25748 This man page documents Perl::Tidy version 20060614.
25753 perltidy at users.sourceforge.net
25757 The perltidy(1) man page describes all of the features of perltidy. It
25758 can be found at http://perltidy.sourceforge.net.