2 ############################################################
4 # perltidy - a perl script indenter and formatter
6 # Copyright (c) 2000-2012 by Steve Hancock
7 # Distributed under the GPL license agreement; see file COPYING
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 # For brief instructions instructions, try 'perltidy -h'.
24 # For more complete documentation, try 'man perltidy'
25 # or visit http://perltidy.sourceforge.net
27 # This script is an example of the default style. It was formatted with:
31 # Code Contributions: See ChangeLog.html for a complete history.
32 # Michael Cartmell supplied code for adaptation to VMS and helped with
34 # Hugh S. Myers supplied sub streamhandle and the supporting code to
35 # create a Perl::Tidy module which can operate on strings, arrays, etc.
36 # Yves Orton supplied coding to help detect Windows versions.
37 # Axel Rose supplied a patch for MacPerl.
38 # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
39 # Dan Tyrell contributed a patch for binary I/O.
40 # Ueli Hugenschmidt contributed a patch for -fpsc
41 # Sam Kington supplied a patch to identify the initial indentation of
43 # jonathan swartz supplied patches for:
44 # * .../ pattern, which looks upwards from directory
45 # * --notidy, to be used in directories where we want to avoid
46 # accidentally tidying
47 # * prefilter and postfilter
50 # Many others have supplied key ideas, suggestions, and bug reports;
51 # see the CHANGES file.
53 ############################################################
56 use 5.004; # need IO::File from 5.004 or later
57 BEGIN { $^W = 1; } # turn on warnings
71 @ISA = qw( Exporter );
72 @EXPORT = qw( &perltidy );
80 ( $VERSION = q($Id: Tidy.pm,v 1.74 2012/07/01 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
85 # given filename and mode (r or w), create an object which:
86 # has a 'getline' method if mode='r', and
87 # has a 'print' method if mode='w'.
88 # The objects also need a 'close' method.
90 # How the object is made:
92 # if $filename is: Make object using:
93 # ---------------- -----------------
94 # '-' (STDIN if mode = 'r', STDOUT if mode='w')
96 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
97 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar)
99 # (check for 'print' method for 'w' mode)
100 # (check for 'getline' method for 'r' mode)
101 my $ref = ref( my $filename = shift );
108 if ( $ref eq 'ARRAY' ) {
109 $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
111 elsif ( $ref eq 'SCALAR' ) {
112 $New = sub { Perl::Tidy::IOScalar->new(@_) };
116 # Accept an object with a getline method for reading. Note:
117 # IO::File is built-in and does not respond to the defined
118 # operator. If this causes trouble, the check can be
119 # skipped and we can just let it crash if there is no
121 if ( $mode =~ /[rR]/ ) {
122 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
123 $New = sub { $filename };
126 $New = sub { undef };
128 ------------------------------------------------------------------------
129 No 'getline' method is defined for object of class $ref
130 Please check your call to Perl::Tidy::perltidy. Trace follows.
131 ------------------------------------------------------------------------
136 # Accept an object with a print method for writing.
137 # See note above about IO::File
138 if ( $mode =~ /[wW]/ ) {
139 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
140 $New = sub { $filename };
143 $New = sub { undef };
145 ------------------------------------------------------------------------
146 No 'print' method is defined for object of class $ref
147 Please check your call to Perl::Tidy::perltidy. Trace follows.
148 ------------------------------------------------------------------------
157 if ( $filename eq '-' ) {
158 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
161 $New = sub { IO::File->new(@_) };
164 $fh = $New->( $filename, $mode )
165 or warn "Couldn't open file:$filename in mode:$mode : $!\n";
166 return $fh, ( $ref or $filename );
169 sub find_input_line_ending {
171 # Peek at a file and return first line ending character.
172 # Quietly return undef in case of any trouble.
173 my ($input_file) = @_;
176 # silently ignore input from object or stdin
177 if ( ref($input_file) || $input_file eq '-' ) {
180 open( INFILE, $input_file ) || return $ending;
184 read( INFILE, $buf, 1024 );
186 if ( $buf && $buf =~ /([\012\015]+)/ ) {
190 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
193 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
196 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
210 # concatenate a path and file basename
211 # returns undef in case of error
213 BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
215 # use File::Spec if we can
216 unless ($missing_file_spec) {
217 return File::Spec->catfile(@_);
220 # Perl 5.004 systems may not have File::Spec so we'll make
221 # a simple try. We assume File::Basename is available.
222 # return undef if not successful.
224 my $path = join '/', @_;
225 my $test_file = $path . $name;
226 my ( $test_name, $test_path ) = fileparse($test_file);
227 return $test_file if ( $test_name eq $name );
228 return undef if ( $^O eq 'VMS' );
230 # this should work at least for Windows and Unix:
231 $test_file = $path . '/' . $name;
232 ( $test_name, $test_path ) = fileparse($test_file);
233 return $test_file if ( $test_name eq $name );
237 sub make_temporary_filename {
239 # Make a temporary filename.
240 # FIXME: return both a name and opened filehandle
242 # The POSIX tmpnam() function tends to be unreliable for non-unix systems
243 # (at least for the win32 systems that I've tested), so use a pre-defined
244 # name for them. A disadvantage of this is that two perltidy
245 # runs in the same working directory may conflict. However, the chance of
246 # that is small and managable by the user, especially on systems for which
247 # the POSIX tmpnam function doesn't work.
248 my $name = "perltidy.TMP";
249 if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
252 eval "use POSIX qw(tmpnam)";
253 if ($@) { return $name }
256 # just make a couple of tries before giving up and using the default
258 my $tmpname = tmpnam();
259 my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
269 # Here is a map of the flow of data from the input source to the output
272 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
273 # input groups output
274 # lines tokens lines of lines lines
277 # The names correspond to the package names responsible for the unit processes.
279 # The overall process is controlled by the "main" package.
281 # LineSource is the stream of input lines
283 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
284 # if necessary. A token is any section of the input line which should be
285 # manipulated as a single entity during formatting. For example, a single
286 # ',' character is a token, and so is an entire side comment. It handles
287 # the complexities of Perl syntax, such as distinguishing between '<<' as
288 # a shift operator and as a here-document, or distinguishing between '/'
289 # as a divide symbol and as a pattern delimiter.
291 # Formatter inserts and deletes whitespace between tokens, and breaks
292 # sequences of tokens at appropriate points as output lines. It bases its
293 # decisions on the default rules as modified by any command-line options.
295 # VerticalAligner collects groups of lines together and tries to line up
296 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
298 # FileWriter simply writes lines to the output stream.
300 # The Logger package, not shown, records significant events and warning
301 # messages. It writes a .LOG file, which may be saved with a
302 # '-log' or a '-g' flag.
306 # variables needed by interrupt handler:
310 # this routine may be called to give a status report if interrupted. If a
311 # parameter is given, it will call exit with that parameter. This is no
312 # longer used because it works under Unix but not under Windows.
313 sub interrupt_handler {
315 my $exit_flag = shift;
316 print STDERR "perltidy interrupted";
318 my $input_line_number =
319 Perl::Tidy::Tokenizer::get_input_line_number();
320 print STDERR " at line $input_line_number";
324 if ( ref $input_file ) { print STDERR " of reference to:" }
325 else { print STDERR " of file:" }
326 print STDERR " $input_file";
329 exit $exit_flag if defined($exit_flag);
336 destination => undef,
343 dump_options => undef,
344 dump_options_type => undef,
345 dump_getopt_flags => undef,
346 dump_options_category => undef,
347 dump_options_range => undef,
348 dump_abbreviations => undef,
353 # don't overwrite callers ARGV
358 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
360 my @good_keys = sort keys %defaults;
361 @bad_keys = sort @bad_keys;
363 ------------------------------------------------------------------------
364 Unknown perltidy parameter : (@bad_keys)
365 perltidy only understands : (@good_keys)
366 ------------------------------------------------------------------------
371 my $get_hash_ref = sub {
373 my $hash_ref = $input_hash{$key};
374 if ( defined($hash_ref) ) {
375 unless ( ref($hash_ref) eq 'HASH' ) {
376 my $what = ref($hash_ref);
378 $what ? "but is ref to $what" : "but is not a reference";
380 ------------------------------------------------------------------------
381 error in call to perltidy:
382 -$key must be reference to HASH $but_is
383 ------------------------------------------------------------------------
390 %input_hash = ( %defaults, %input_hash );
391 my $argv = $input_hash{'argv'};
392 my $destination_stream = $input_hash{'destination'};
393 my $errorfile_stream = $input_hash{'errorfile'};
394 my $logfile_stream = $input_hash{'logfile'};
395 my $perltidyrc_stream = $input_hash{'perltidyrc'};
396 my $source_stream = $input_hash{'source'};
397 my $stderr_stream = $input_hash{'stderr'};
398 my $user_formatter = $input_hash{'formatter'};
399 my $prefilter = $input_hash{'prefilter'};
400 my $postfilter = $input_hash{'postfilter'};
402 # various dump parameters
403 my $dump_options_type = $input_hash{'dump_options_type'};
404 my $dump_options = $get_hash_ref->('dump_options');
405 my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
406 my $dump_options_category = $get_hash_ref->('dump_options_category');
407 my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
408 my $dump_options_range = $get_hash_ref->('dump_options_range');
410 # validate dump_options_type
411 if ( defined($dump_options) ) {
412 unless ( defined($dump_options_type) ) {
413 $dump_options_type = 'perltidyrc';
415 unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
417 ------------------------------------------------------------------------
418 Please check value of -dump_options_type in call to perltidy;
419 saw: '$dump_options_type'
420 expecting: 'perltidyrc' or 'full'
421 ------------------------------------------------------------------------
427 $dump_options_type = "";
430 if ($user_formatter) {
432 # if the user defines a formatter, there is no output stream,
433 # but we need a null stream to keep coding simple
434 $destination_stream = Perl::Tidy::DevNull->new();
437 # see if ARGV is overridden
438 if ( defined($argv) ) {
440 my $rargv = ref $argv;
441 if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
445 if ( $rargv eq 'ARRAY' ) {
450 ------------------------------------------------------------------------
451 Please check value of -argv in call to perltidy;
452 it must be a string or ref to ARRAY but is: $rargv
453 ------------------------------------------------------------------------
460 my ( $rargv, $msg ) = parse_args($argv);
463 Error parsing this string passed to to perltidy with 'argv':
471 # redirect STDERR if requested
472 if ($stderr_stream) {
473 my $ref_type = ref($stderr_stream);
474 if ( $ref_type eq 'SCALAR' or $ref_type eq 'ARRAY' ) {
476 ------------------------------------------------------------------------
477 You are trying to redirect STDERR to a reference of type $ref_type
478 It can only be redirected to a file
479 Please check value of -stderr in call to perltidy
480 ------------------------------------------------------------------------
483 my ( $fh_stderr, $stderr_file ) =
484 Perl::Tidy::streamhandle( $stderr_stream, 'w' );
485 if ($fh_stderr) { *STDERR = $fh_stderr }
488 ------------------------------------------------------------------------
489 Unable to redirect STDERR to $stderr_stream
490 Please check value of -stderr in call to perltidy
491 ------------------------------------------------------------------------
496 my $rpending_complaint;
497 $$rpending_complaint = "";
498 my $rpending_logfile_message;
499 $$rpending_logfile_message = "";
501 my ( $is_Windows, $Windows_type ) =
502 look_for_Windows($rpending_complaint);
504 # VMS file names are restricted to a 40.40 format, so we append _tdy
505 # instead of .tdy, etc. (but see also sub check_vms_filename)
508 if ( $^O eq 'VMS' ) {
514 $dot_pattern = '\.'; # must escape for use in regex
517 #---------------------------------------------------------------
518 # get command line options
519 #---------------------------------------------------------------
520 my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
521 $rexpansion, $roption_category, $roption_range )
522 = process_command_line(
523 $perltidyrc_stream, $is_Windows, $Windows_type,
524 $rpending_complaint, $dump_options_type,
527 #---------------------------------------------------------------
528 # Handle requests to dump information
529 #---------------------------------------------------------------
531 # return or exit immediately after all dumps
534 # Getopt parameters and their flags
535 if ( defined($dump_getopt_flags) ) {
537 foreach my $op ( @{$roption_string} ) {
546 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
550 $dump_getopt_flags->{$opt} = $flag;
554 if ( defined($dump_options_category) ) {
556 %{$dump_options_category} = %{$roption_category};
559 if ( defined($dump_options_range) ) {
561 %{$dump_options_range} = %{$roption_range};
564 if ( defined($dump_abbreviations) ) {
566 %{$dump_abbreviations} = %{$rexpansion};
569 if ( defined($dump_options) ) {
571 %{$dump_options} = %{$rOpts};
574 return if ($quit_now);
576 # make printable string of options for this run as possible diagnostic
577 my $readable_options = readable_options( $rOpts, $roption_string );
579 # dump from command line
580 if ( $rOpts->{'dump-options'} ) {
581 print STDOUT $readable_options;
585 #---------------------------------------------------------------
586 # check parameters and their interactions
587 #---------------------------------------------------------------
588 check_options( $rOpts, $is_Windows, $Windows_type,
589 $rpending_complaint );
591 if ($user_formatter) {
592 $rOpts->{'format'} = 'user';
595 # there must be one entry here for every possible format
596 my %default_file_extension = (
602 # be sure we have a valid output format
603 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
604 my $formats = join ' ',
605 sort map { "'" . $_ . "'" } keys %default_file_extension;
606 my $fmt = $rOpts->{'format'};
607 die "-format='$fmt' but must be one of: $formats\n";
610 my $output_extension =
611 make_extension( $rOpts->{'output-file-extension'},
612 $default_file_extension{ $rOpts->{'format'} }, $dot );
614 # If the backup extension contains a / character then the backup should
615 # be deleted when the -b option is used. On older versions of
616 # perltidy this will generate an error message due to an illegal
619 # A backup file will still be generated but will be deleted
620 # at the end. If -bext='/' then this extension will be
621 # the default 'bak'. Otherwise it will be whatever characters
622 # remains after all '/' characters are removed. For example:
623 # -bext extension slashes
627 # '/dev/null' devnull 2 (Currently not allowed)
628 my $bext = $rOpts->{'backup-file-extension'};
629 my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g );
631 # At present only one forward slash is allowed. In the future multiple
632 # slashes may be allowed to allow for other options
633 if ( $delete_backup > 1 ) {
634 die "-bext=$bext contains more than one '/'\n";
637 my $backup_extension =
638 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
640 my $html_toc_extension =
641 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
643 my $html_src_extension =
644 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
646 # check for -b option;
647 # silently ignore unless beautify mode
648 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
649 && $rOpts->{'format'} eq 'tidy';
651 # turn off -b with warnings in case of conflicts with other options
652 if ($in_place_modify) {
653 if ( $rOpts->{'standard-output'} ) {
654 warn "Ignoring -b; you may not use -b and -st together\n";
655 $in_place_modify = 0;
657 if ($destination_stream) {
659 "Ignoring -b; you may not specify a destination stream and -b together\n";
660 $in_place_modify = 0;
662 if ( ref($source_stream) ) {
664 "Ignoring -b; you may not specify a source array and -b together\n";
665 $in_place_modify = 0;
667 if ( $rOpts->{'outfile'} ) {
668 warn "Ignoring -b; you may not use -b and -o together\n";
669 $in_place_modify = 0;
671 if ( defined( $rOpts->{'output-path'} ) ) {
672 warn "Ignoring -b; you may not use -b and -opath together\n";
673 $in_place_modify = 0;
677 Perl::Tidy::Formatter::check_options($rOpts);
678 if ( $rOpts->{'format'} eq 'html' ) {
679 Perl::Tidy::HtmlWriter->check_options($rOpts);
682 # make the pattern of file extensions that we shouldn't touch
683 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
684 if ($output_extension) {
685 my $ext = quotemeta($output_extension);
686 $forbidden_file_extensions .= "|$ext";
688 if ( $in_place_modify && $backup_extension ) {
689 my $ext = quotemeta($backup_extension);
690 $forbidden_file_extensions .= "|$ext";
692 $forbidden_file_extensions .= ')$';
694 # Create a diagnostics object if requested;
695 # This is only useful for code development
696 my $diagnostics_object = undef;
697 if ( $rOpts->{'DIAGNOSTICS'} ) {
698 $diagnostics_object = Perl::Tidy::Diagnostics->new();
701 # no filenames should be given if input is from an array
702 if ($source_stream) {
705 "You may not specify any filenames when a source array is given\n";
708 # we'll stuff the source array into ARGV
709 unshift( @ARGV, $source_stream );
711 # No special treatment for source stream which is a filename.
712 # This will enable checks for binary files and other bad stuff.
713 $source_stream = undef unless ref($source_stream);
716 # use stdin by default if no source array and no args
718 unshift( @ARGV, '-' ) unless @ARGV;
721 #---------------------------------------------------------------
723 # main loop to process all files in argument list
724 #---------------------------------------------------------------
725 my $number_of_files = @ARGV;
726 my $formatter = undef;
728 while ( $input_file = shift @ARGV ) {
730 my $input_file_permissions;
732 #---------------------------------------------------------------
733 # prepare this input stream
734 #---------------------------------------------------------------
735 if ($source_stream) {
736 $fileroot = "perltidy";
738 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
739 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
740 $in_place_modify = 0;
743 $fileroot = $input_file;
744 unless ( -e $input_file ) {
746 # file doesn't exist - check for a file glob
747 if ( $input_file =~ /([\?\*\[\{])/ ) {
749 # Windows shell may not remove quotes, so do it
750 my $input_file = $input_file;
751 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
752 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
753 my $pattern = fileglob_to_re($input_file);
755 if ( !$@ && opendir( DIR, './' ) ) {
757 grep { /$pattern/ && !-d $_ } readdir(DIR);
760 unshift @ARGV, @files;
765 print "skipping file: '$input_file': no matches found\n";
769 unless ( -f $input_file ) {
770 print "skipping file: $input_file: not a regular file\n";
774 # As a safety precaution, skip zero length files.
775 # If for example a source file got clobberred somehow,
776 # the old .tdy or .bak files might still exist so we
777 # shouldn't overwrite them with zero length files.
778 unless ( -s $input_file ) {
779 print "skipping file: $input_file: Zero size\n";
783 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
785 "skipping file: $input_file: Non-text (override with -f)\n";
789 # we should have a valid filename now
790 $fileroot = $input_file;
791 $input_file_permissions = ( stat $input_file )[2] & 07777;
793 if ( $^O eq 'VMS' ) {
794 ( $fileroot, $dot ) = check_vms_filename($fileroot);
797 # add option to change path here
798 if ( defined( $rOpts->{'output-path'} ) ) {
800 my ( $base, $old_path ) = fileparse($fileroot);
801 my $new_path = $rOpts->{'output-path'};
802 unless ( -d $new_path ) {
803 unless ( mkdir $new_path, 0777 ) {
804 die "unable to create directory $new_path: $!\n";
807 my $path = $new_path;
808 $fileroot = catfile( $path, $base );
811 ------------------------------------------------------------------------
812 Problem combining $new_path and $base to make a filename; check -opath
813 ------------------------------------------------------------------------
819 # Skip files with same extension as the output files because
820 # this can lead to a messy situation with files like
821 # script.tdy.tdy.tdy ... or worse problems ... when you
822 # rerun perltidy over and over with wildcard input.
825 && ( $input_file =~ /$forbidden_file_extensions/o
826 || $input_file eq 'DIAGNOSTICS' )
829 print "skipping file: $input_file: wrong extension\n";
833 # the 'source_object' supplies a method to read the input file
835 Perl::Tidy::LineSource->new( $input_file, $rOpts,
836 $rpending_logfile_message );
837 next unless ($source_object);
839 # Prefilters and postfilters: The prefilter is a code reference
840 # that will be applied to the source before tidying, and the
841 # postfilter is a code reference to the result before outputting.
844 while ( my $line = $source_object->get_line() ) {
847 $buf = $prefilter->($buf);
849 $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
850 $rpending_logfile_message );
853 # register this file name with the Diagnostics package
854 $diagnostics_object->set_input_file($input_file)
855 if $diagnostics_object;
857 #---------------------------------------------------------------
858 # prepare the output stream
859 #---------------------------------------------------------------
860 my $output_file = undef;
861 my $actual_output_extension;
863 if ( $rOpts->{'outfile'} ) {
865 if ( $number_of_files <= 1 ) {
867 if ( $rOpts->{'standard-output'} ) {
868 die "You may not use -o and -st together\n";
870 elsif ($destination_stream) {
872 "You may not specify a destination array and -o together\n";
874 elsif ( defined( $rOpts->{'output-path'} ) ) {
875 die "You may not specify -o and -opath together\n";
877 elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
878 die "You may not specify -o and -oext together\n";
880 $output_file = $rOpts->{outfile};
882 # make sure user gives a file name after -o
883 if ( $output_file =~ /^-/ ) {
884 die "You must specify a valid filename after -o\n";
887 # do not overwrite input file with -o
888 if ( defined($input_file_permissions)
889 && ( $output_file eq $input_file ) )
892 "Use 'perltidy -b $input_file' to modify in-place\n";
896 die "You may not use -o with more than one input file\n";
899 elsif ( $rOpts->{'standard-output'} ) {
900 if ($destination_stream) {
902 "You may not specify a destination array and -st together\n";
906 if ( $number_of_files <= 1 ) {
909 die "You may not use -st with more than one input file\n";
912 elsif ($destination_stream) {
913 $output_file = $destination_stream;
915 elsif ($source_stream) { # source but no destination goes to stdout
918 elsif ( $input_file eq '-' ) {
922 if ($in_place_modify) {
923 $output_file = IO::File->new_tmpfile()
924 or die "cannot open temp file for -b option: $!\n";
927 $actual_output_extension = $output_extension;
928 $output_file = $fileroot . $output_extension;
932 # the 'sink_object' knows how to write the output file
933 my $tee_file = $fileroot . $dot . "TEE";
935 my $line_separator = $rOpts->{'output-line-ending'};
936 if ( $rOpts->{'preserve-line-endings'} ) {
937 $line_separator = find_input_line_ending($input_file);
940 # Eventually all I/O may be done with binmode, but for now it is
941 # only done when a user requests a particular line separator
942 # through the -ple or -ole flags
944 if ( defined($line_separator) ) { $binmode = 1 }
945 else { $line_separator = "\n" }
947 my ( $sink_object, $postfilter_buffer );
950 Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
951 $line_separator, $rOpts, $rpending_logfile_message,
956 Perl::Tidy::LineSink->new( $output_file, $tee_file,
957 $line_separator, $rOpts, $rpending_logfile_message,
961 #---------------------------------------------------------------
962 # initialize the error logger
963 #---------------------------------------------------------------
964 my $warning_file = $fileroot . $dot . "ERR";
965 if ($errorfile_stream) { $warning_file = $errorfile_stream }
966 my $log_file = $fileroot . $dot . "LOG";
967 if ($logfile_stream) { $log_file = $logfile_stream }
970 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
972 write_logfile_header(
973 $rOpts, $logger_object, $config_file,
974 $rraw_options, $Windows_type, $readable_options,
976 if ($$rpending_logfile_message) {
977 $logger_object->write_logfile_entry($$rpending_logfile_message);
979 if ($$rpending_complaint) {
980 $logger_object->complain($$rpending_complaint);
983 #---------------------------------------------------------------
984 # initialize the debug object, if any
985 #---------------------------------------------------------------
986 my $debugger_object = undef;
987 if ( $rOpts->{DEBUG} ) {
989 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
992 #---------------------------------------------------------------
993 # loop over iterations for one source stream
994 #---------------------------------------------------------------
996 # We will do a convergence test if 3 or more iterations are allowed.
997 # It would be pointless for fewer because we have to make at least
998 # two passes before we can see if we are converged, and the test
999 # would just slow things down.
1000 my $max_iterations = $rOpts->{'iterations'};
1001 my $convergence_log_message;
1003 my $do_convergence_test = $max_iterations > 2;
1004 if ($do_convergence_test) {
1005 eval "use Digest::MD5 qw(md5_hex)";
1006 $do_convergence_test = !$@;
1009 # save objects to allow redirecting output during iterations
1010 my $sink_object_final = $sink_object;
1011 my $debugger_object_final = $debugger_object;
1012 my $logger_object_final = $logger_object;
1014 for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
1016 # send output stream to temp buffers until last iteration
1018 if ( $iter < $max_iterations ) {
1020 Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file,
1021 $line_separator, $rOpts, $rpending_logfile_message,
1025 $sink_object = $sink_object_final;
1028 # Save logger, debugger output only on pass 1 because:
1029 # (1) line number references must be to the starting
1030 # source, not an intermediate result, and
1031 # (2) we need to know if there are errors so we can stop the
1032 # iterations early if necessary.
1034 $debugger_object = undef;
1035 $logger_object = undef;
1038 #------------------------------------------------------------
1039 # create a formatter for this file : html writer or
1041 #------------------------------------------------------------
1043 # we have to delete any old formatter because, for safety,
1044 # the formatter will check to see that there is only one.
1047 if ($user_formatter) {
1048 $formatter = $user_formatter;
1050 elsif ( $rOpts->{'format'} eq 'html' ) {
1052 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
1053 $actual_output_extension, $html_toc_extension,
1054 $html_src_extension );
1056 elsif ( $rOpts->{'format'} eq 'tidy' ) {
1057 $formatter = Perl::Tidy::Formatter->new(
1058 logger_object => $logger_object,
1059 diagnostics_object => $diagnostics_object,
1060 sink_object => $sink_object,
1064 die "I don't know how to do -format=$rOpts->{'format'}\n";
1067 unless ($formatter) {
1069 "Unable to continue with $rOpts->{'format'} formatting\n";
1072 #---------------------------------------------------------------
1073 # create the tokenizer for this file
1074 #---------------------------------------------------------------
1075 $tokenizer = undef; # must destroy old tokenizer
1076 $tokenizer = Perl::Tidy::Tokenizer->new(
1077 source_object => $source_object,
1078 logger_object => $logger_object,
1079 debugger_object => $debugger_object,
1080 diagnostics_object => $diagnostics_object,
1081 starting_level => $rOpts->{'starting-indentation-level'},
1082 tabs => $rOpts->{'tabs'},
1083 entab_leading_space => $rOpts->{'entab-leading-whitespace'},
1084 indent_columns => $rOpts->{'indent-columns'},
1085 look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
1086 look_for_autoloader => $rOpts->{'look-for-autoloader'},
1087 look_for_selfloader => $rOpts->{'look-for-selfloader'},
1088 trim_qw => $rOpts->{'trim-qw'},
1091 #---------------------------------------------------------------
1093 #---------------------------------------------------------------
1094 process_this_file( $tokenizer, $formatter );
1096 #---------------------------------------------------------------
1097 # close the input source and report errors
1098 #---------------------------------------------------------------
1099 $source_object->close_input_file();
1101 # line source for next iteration (if any) comes from the current
1102 # temporary output buffer
1103 if ( $iter < $max_iterations ) {
1105 $sink_object->close_output_file();
1107 Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts,
1108 $rpending_logfile_message );
1110 # stop iterations if errors or converged
1111 my $stop_now = $logger_object->{_warning_count};
1113 $convergence_log_message = <<EOM;
1114 Stopping iterations because of errors.
1117 elsif ($do_convergence_test) {
1118 my $digest = md5_hex($sink_buffer);
1119 if ( !$saw_md5{$digest} ) {
1120 $saw_md5{$digest} = $iter;
1124 # Saw this result before, stop iterating
1126 my $iterm = $iter - 1;
1127 if ( $saw_md5{$digest} != $iterm ) {
1129 # Blinking (oscillating) between two stable
1130 # end states. This has happened in the past
1131 # but at present there are no known instances.
1132 $convergence_log_message = <<EOM;
1133 Blinking. Output for iteration $iter same as for $saw_md5{$digest}.
1135 $diagnostics_object->write_diagnostics(
1136 $convergence_log_message)
1137 if $diagnostics_object;
1140 $convergence_log_message = <<EOM;
1141 Converged. Output for iteration $iter same as for iter $iterm.
1143 $diagnostics_object->write_diagnostics(
1144 $convergence_log_message)
1145 if $diagnostics_object && $iterm > 2;
1148 } ## end if ($do_convergence_test)
1152 # we are stopping the iterations early;
1153 # copy the output stream to its final destination
1154 $sink_object = $sink_object_final;
1155 while ( my $line = $source_object->get_line() ) {
1156 $sink_object->write_line($line);
1158 $source_object->close_input_file();
1161 } ## end if ( $iter < $max_iterations)
1162 } # end loop over iterations for one source file
1164 # restore objects which have been temporarily undefined
1165 # for second and higher iterations
1166 $debugger_object = $debugger_object_final;
1167 $logger_object = $logger_object_final;
1169 $logger_object->write_logfile_entry($convergence_log_message)
1170 if $convergence_log_message;
1172 #---------------------------------------------------------------
1173 # Perform any postfilter operation
1174 #---------------------------------------------------------------
1176 $sink_object->close_output_file();
1178 Perl::Tidy::LineSink->new( $output_file, $tee_file,
1179 $line_separator, $rOpts, $rpending_logfile_message,
1181 my $buf = $postfilter->($postfilter_buffer);
1183 Perl::Tidy::LineSource->new( \$buf, $rOpts,
1184 $rpending_logfile_message );
1186 ##foreach my $line ( split( "\n", $buf , -1) ) {
1187 while ( my $line = $source_object->get_line() ) {
1188 $sink_object->write_line($line);
1190 $source_object->close_input_file();
1193 # Save names of the input and output files for syntax check
1194 my $ifname = $input_file;
1195 my $ofname = $output_file;
1197 #---------------------------------------------------------------
1198 # handle the -b option (backup and modify in-place)
1199 #---------------------------------------------------------------
1200 if ($in_place_modify) {
1201 unless ( -f $input_file ) {
1203 # oh, oh, no real file to backup ..
1204 # shouldn't happen because of numerous preliminary checks
1206 "problem with -b backing up input file '$input_file': not a file\n";
1208 my $backup_name = $input_file . $backup_extension;
1209 if ( -f $backup_name ) {
1210 unlink($backup_name)
1212 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1215 # backup the input file
1216 # we use copy for symlinks, move for regular files
1217 if ( -l $input_file ) {
1218 File::Copy::copy( $input_file, $backup_name )
1219 or die "File::Copy failed trying to backup source: $!";
1222 rename( $input_file, $backup_name )
1224 "problem renaming $input_file to $backup_name for -b option: $!\n";
1226 $ifname = $backup_name;
1228 # copy the output to the original input file
1229 # NOTE: it would be nice to just close $output_file and use
1230 # File::Copy::copy here, but in this case $output_file is the
1231 # handle of an open nameless temporary file so we would lose
1232 # everything if we closed it.
1233 seek( $output_file, 0, 0 )
1235 "unable to rewind a temporary file for -b option: $!\n";
1236 my $fout = IO::File->new("> $input_file")
1238 "problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
1241 while ( $line = $output_file->getline() ) {
1242 $fout->print($line);
1245 $output_file = $input_file;
1246 $ofname = $input_file;
1249 #---------------------------------------------------------------
1250 # clean up and report errors
1251 #---------------------------------------------------------------
1252 $sink_object->close_output_file() if $sink_object;
1253 $debugger_object->close_debug_file() if $debugger_object;
1255 # set output file permissions
1256 if ( $output_file && -f $output_file && !-l $output_file ) {
1257 if ($input_file_permissions) {
1259 # give output script same permissions as input script, but
1260 # make it user-writable or else we can't run perltidy again.
1261 # Thus we retain whatever executable flags were set.
1262 if ( $rOpts->{'format'} eq 'tidy' ) {
1263 chmod( $input_file_permissions | 0600, $output_file );
1266 # else use default permissions for html and any other format
1270 #---------------------------------------------------------------
1271 # Do syntax check if requested and possible
1272 #---------------------------------------------------------------
1273 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
1275 && $rOpts->{'check-syntax'}
1280 check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1283 #---------------------------------------------------------------
1284 # remove the original file for in-place modify as follows:
1285 # $delete_backup=0 never
1286 # $delete_backup=1 only if no errors
1287 # $delete_backup>1 always : CURRENTLY NOT ALLOWED, see above
1288 #---------------------------------------------------------------
1289 if ( $in_place_modify
1292 && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) )
1295 # As an added safety precaution, do not delete the source file
1296 # if its size has dropped from positive to zero, since this
1297 # could indicate a disaster of some kind, including a hardware
1298 # failure. Actually, this could happen if you had a file of
1299 # all comments (or pod) and deleted everything with -dac (-dap)
1301 if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) {
1303 "output file '$output_file' missing or zero length; original '$ifname' not deleted\n"
1309 "unable to remove previous '$ifname' for -b option; check permissions: $!\n";
1313 $logger_object->finish( $infile_syntax_ok, $formatter )
1315 } # end of main loop to process all files
1316 } # end of main program perltidy
1319 sub get_stream_as_named_file {
1321 # Return the name of a file containing a stream of data, creating
1322 # a temporary file if necessary.
1324 # $stream - the name of a file or stream
1326 # $fname = name of file if possible, or undef
1327 # $if_tmpfile = true if temp file, undef if not temp file
1329 # This routine is needed for passing actual files to Perl for
1335 if ( ref($stream) ) {
1336 my ( $fh_stream, $fh_name ) =
1337 Perl::Tidy::streamhandle( $stream, 'r' );
1339 my ( $fout, $tmpnam );
1341 # FIXME: fix the tmpnam routine to return an open filehandle
1342 $tmpnam = Perl::Tidy::make_temporary_filename();
1343 $fout = IO::File->new( $tmpnam, 'w' );
1349 while ( my $line = $fh_stream->getline() ) {
1350 $fout->print($line);
1354 $fh_stream->close();
1357 elsif ( $stream ne '-' && -f $stream ) {
1361 return ( $fname, $is_tmpfile );
1364 sub fileglob_to_re {
1366 # modified (corrected) from version in find2perl
1368 $x =~ s#([./^\$()])#\\$1#g; # escape special characters
1369 $x =~ s#\*#.*#g; # '*' -> '.*'
1370 $x =~ s#\?#.#g; # '?' -> '.'
1371 "^$x\\z"; # match whole word
1374 sub make_extension {
1376 # Make a file extension, including any leading '.' if necessary
1377 # The '.' may actually be an '_' under VMS
1378 my ( $extension, $default, $dot ) = @_;
1380 # Use the default if none specified
1381 $extension = $default unless ($extension);
1383 # Only extensions with these leading characters get a '.'
1384 # This rule gives the user some freedom
1385 if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1386 $extension = $dot . $extension;
1391 sub write_logfile_header {
1393 $rOpts, $logger_object, $config_file,
1394 $rraw_options, $Windows_type, $readable_options
1396 $logger_object->write_logfile_entry(
1397 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1399 if ($Windows_type) {
1400 $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1402 my $options_string = join( ' ', @$rraw_options );
1405 $logger_object->write_logfile_entry(
1406 "Found Configuration File >>> $config_file \n");
1408 $logger_object->write_logfile_entry(
1409 "Configuration and command line parameters for this run:\n");
1410 $logger_object->write_logfile_entry("$options_string\n");
1412 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1413 $rOpts->{'logfile'} = 1; # force logfile to be saved
1414 $logger_object->write_logfile_entry(
1415 "Final parameter set for this run\n");
1416 $logger_object->write_logfile_entry(
1417 "------------------------------------\n");
1419 $logger_object->write_logfile_entry($readable_options);
1421 $logger_object->write_logfile_entry(
1422 "------------------------------------\n");
1424 $logger_object->write_logfile_entry(
1425 "To find error messages search for 'WARNING' with your editor\n");
1428 sub generate_options {
1430 ######################################################################
1431 # Generate and return references to:
1432 # @option_string - the list of options to be passed to Getopt::Long
1433 # @defaults - the list of default options
1434 # %expansion - a hash showing how all abbreviations are expanded
1435 # %category - a hash giving the general category of each option
1436 # %option_range - a hash giving the valid ranges of certain options
1438 # Note: a few options are not documented in the man page and usage
1439 # message. This is because these are experimental or debug options and
1440 # may or may not be retained in future versions.
1442 # Here are the undocumented flags as far as I know. Any of them
1443 # may disappear at any time. They are mainly for fine-tuning
1446 # fll --> fuzzy-line-length # a trivial parameter which gets
1447 # turned off for the extrude option
1448 # which is mainly for debugging
1449 # chk --> check-multiline-quotes # check for old bug; to be deleted
1450 # scl --> short-concatenation-item-length # helps break at '.'
1451 # recombine # for debugging line breaks
1452 # valign # for debugging vertical alignment
1453 # I --> DIAGNOSTICS # for debugging
1454 ######################################################################
1456 # here is a summary of the Getopt codes:
1457 # <none> does not take an argument
1458 # =s takes a mandatory string
1459 # :s takes an optional string (DO NOT USE - filenames will get eaten up)
1460 # =i takes a mandatory integer
1461 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1462 # ! does not take an argument and may be negated
1463 # i.e., -foo and -nofoo are allowed
1464 # a double dash signals the end of the options list
1466 #---------------------------------------------------------------
1467 # Define the option string passed to GetOptions.
1468 #---------------------------------------------------------------
1470 my @option_string = ();
1472 my %option_category = ();
1473 my %option_range = ();
1474 my $rexpansion = \%expansion;
1476 # names of categories in manual
1477 # leading integers will allow sorting
1478 my @category_name = (
1480 '1. Basic formatting options',
1481 '2. Code indentation control',
1482 '3. Whitespace control',
1483 '4. Comment controls',
1484 '5. Linebreak controls',
1485 '6. Controlling list formatting',
1486 '7. Retaining or ignoring existing line breaks',
1487 '8. Blank line control',
1488 '9. Other controls',
1490 '11. pod2html options',
1491 '12. Controlling HTML properties',
1495 # These options are parsed directly by perltidy:
1498 # However, they are included in the option set so that they will
1499 # be seen in the options dump.
1501 # These long option names have no abbreviations or are treated specially
1502 @option_string = qw(
1512 my $category = 13; # Debugging
1513 foreach (@option_string) {
1514 my $opt = $_; # must avoid changing the actual flag
1516 $option_category{$opt} = $category_name[$category];
1519 $category = 11; # HTML
1520 $option_category{html} = $category_name[$category];
1522 # routine to install and check options
1523 my $add_option = sub {
1524 my ( $long_name, $short_name, $flag ) = @_;
1525 push @option_string, $long_name . $flag;
1526 $option_category{$long_name} = $category_name[$category];
1528 if ( $expansion{$short_name} ) {
1529 my $existing_name = $expansion{$short_name}[0];
1531 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1533 $expansion{$short_name} = [$long_name];
1534 if ( $flag eq '!' ) {
1535 my $nshort_name = 'n' . $short_name;
1536 my $nolong_name = 'no' . $long_name;
1537 if ( $expansion{$nshort_name} ) {
1538 my $existing_name = $expansion{$nshort_name}[0];
1540 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1542 $expansion{$nshort_name} = [$nolong_name];
1547 # Install long option names which have a simple abbreviation.
1548 # Options with code '!' get standard negation ('no' for long names,
1549 # 'n' for abbreviations). Categories follow the manual.
1551 ###########################
1552 $category = 0; # I/O_Control
1553 ###########################
1554 $add_option->( 'backup-and-modify-in-place', 'b', '!' );
1555 $add_option->( 'backup-file-extension', 'bext', '=s' );
1556 $add_option->( 'force-read-binary', 'f', '!' );
1557 $add_option->( 'format', 'fmt', '=s' );
1558 $add_option->( 'iterations', 'it', '=i' );
1559 $add_option->( 'logfile', 'log', '!' );
1560 $add_option->( 'logfile-gap', 'g', ':i' );
1561 $add_option->( 'outfile', 'o', '=s' );
1562 $add_option->( 'output-file-extension', 'oext', '=s' );
1563 $add_option->( 'output-path', 'opath', '=s' );
1564 $add_option->( 'profile', 'pro', '=s' );
1565 $add_option->( 'quiet', 'q', '!' );
1566 $add_option->( 'standard-error-output', 'se', '!' );
1567 $add_option->( 'standard-output', 'st', '!' );
1568 $add_option->( 'warning-output', 'w', '!' );
1570 # options which are both toggle switches and values moved here
1571 # to hide from tidyview (which does not show category 0 flags):
1572 # -ole moved here from category 1
1573 # -sil moved here from category 2
1574 $add_option->( 'output-line-ending', 'ole', '=s' );
1575 $add_option->( 'starting-indentation-level', 'sil', '=i' );
1577 ########################################
1578 $category = 1; # Basic formatting options
1579 ########################################
1580 $add_option->( 'check-syntax', 'syn', '!' );
1581 $add_option->( 'entab-leading-whitespace', 'et', '=i' );
1582 $add_option->( 'indent-columns', 'i', '=i' );
1583 $add_option->( 'maximum-line-length', 'l', '=i' );
1584 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
1585 $add_option->( 'preserve-line-endings', 'ple', '!' );
1586 $add_option->( 'tabs', 't', '!' );
1588 ########################################
1589 $category = 2; # Code indentation control
1590 ########################################
1591 $add_option->( 'continuation-indentation', 'ci', '=i' );
1592 $add_option->( 'line-up-parentheses', 'lp', '!' );
1593 $add_option->( 'outdent-keyword-list', 'okwl', '=s' );
1594 $add_option->( 'outdent-keywords', 'okw', '!' );
1595 $add_option->( 'outdent-labels', 'ola', '!' );
1596 $add_option->( 'outdent-long-quotes', 'olq', '!' );
1597 $add_option->( 'indent-closing-brace', 'icb', '!' );
1598 $add_option->( 'closing-token-indentation', 'cti', '=i' );
1599 $add_option->( 'closing-paren-indentation', 'cpi', '=i' );
1600 $add_option->( 'closing-brace-indentation', 'cbi', '=i' );
1601 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1602 $add_option->( 'brace-left-and-indent', 'bli', '!' );
1603 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
1605 ########################################
1606 $category = 3; # Whitespace control
1607 ########################################
1608 $add_option->( 'add-semicolons', 'asc', '!' );
1609 $add_option->( 'add-whitespace', 'aws', '!' );
1610 $add_option->( 'block-brace-tightness', 'bbt', '=i' );
1611 $add_option->( 'brace-tightness', 'bt', '=i' );
1612 $add_option->( 'delete-old-whitespace', 'dws', '!' );
1613 $add_option->( 'delete-semicolons', 'dsm', '!' );
1614 $add_option->( 'nospace-after-keyword', 'nsak', '=s' );
1615 $add_option->( 'nowant-left-space', 'nwls', '=s' );
1616 $add_option->( 'nowant-right-space', 'nwrs', '=s' );
1617 $add_option->( 'paren-tightness', 'pt', '=i' );
1618 $add_option->( 'space-after-keyword', 'sak', '=s' );
1619 $add_option->( 'space-for-semicolon', 'sfs', '!' );
1620 $add_option->( 'space-function-paren', 'sfp', '!' );
1621 $add_option->( 'space-keyword-paren', 'skp', '!' );
1622 $add_option->( 'space-terminal-semicolon', 'sts', '!' );
1623 $add_option->( 'square-bracket-tightness', 'sbt', '=i' );
1624 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
1625 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1626 $add_option->( 'trim-qw', 'tqw', '!' );
1627 $add_option->( 'want-left-space', 'wls', '=s' );
1628 $add_option->( 'want-right-space', 'wrs', '=s' );
1630 ########################################
1631 $category = 4; # Comment controls
1632 ########################################
1633 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
1634 $add_option->( 'closing-side-comment-interval', 'csci', '=i' );
1635 $add_option->( 'closing-side-comment-list', 'cscl', '=s' );
1636 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1637 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
1638 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
1639 $add_option->( 'closing-side-comments', 'csc', '!' );
1640 $add_option->( 'closing-side-comments-balanced', 'cscb', '!' );
1641 $add_option->( 'format-skipping', 'fs', '!' );
1642 $add_option->( 'format-skipping-begin', 'fsb', '=s' );
1643 $add_option->( 'format-skipping-end', 'fse', '=s' );
1644 $add_option->( 'hanging-side-comments', 'hsc', '!' );
1645 $add_option->( 'indent-block-comments', 'ibc', '!' );
1646 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
1647 $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
1648 $add_option->( 'minimum-space-to-comment', 'msc', '=i' );
1649 $add_option->( 'outdent-long-comments', 'olc', '!' );
1650 $add_option->( 'outdent-static-block-comments', 'osbc', '!' );
1651 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
1652 $add_option->( 'static-block-comments', 'sbc', '!' );
1653 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
1654 $add_option->( 'static-side-comments', 'ssc', '!' );
1656 ########################################
1657 $category = 5; # Linebreak controls
1658 ########################################
1659 $add_option->( 'add-newlines', 'anl', '!' );
1660 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
1661 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1662 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
1663 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
1664 $add_option->( 'cuddled-else', 'ce', '!' );
1665 $add_option->( 'delete-old-newlines', 'dnl', '!' );
1666 $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
1667 $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
1668 $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
1669 $add_option->( 'opening-paren-right', 'opr', '!' );
1670 $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
1671 $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' );
1672 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
1673 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
1674 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
1675 $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
1676 $add_option->( 'stack-closing-paren', 'scp', '!' );
1677 $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
1678 $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
1679 $add_option->( 'stack-opening-paren', 'sop', '!' );
1680 $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
1681 $add_option->( 'vertical-tightness', 'vt', '=i' );
1682 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
1683 $add_option->( 'want-break-after', 'wba', '=s' );
1684 $add_option->( 'want-break-before', 'wbb', '=s' );
1685 $add_option->( 'break-after-all-operators', 'baao', '!' );
1686 $add_option->( 'break-before-all-operators', 'bbao', '!' );
1687 $add_option->( 'keep-interior-semicolons', 'kis', '!' );
1689 ########################################
1690 $category = 6; # Controlling list formatting
1691 ########################################
1692 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1693 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
1694 $add_option->( 'maximum-fields-per-table', 'mft', '=i' );
1696 ########################################
1697 $category = 7; # Retaining or ignoring existing line breaks
1698 ########################################
1699 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1700 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1701 $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1702 $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
1703 $add_option->( 'ignore-old-breakpoints', 'iob', '!' );
1705 ########################################
1706 $category = 8; # Blank line control
1707 ########################################
1708 $add_option->( 'blanks-before-blocks', 'bbb', '!' );
1709 $add_option->( 'blanks-before-comments', 'bbc', '!' );
1710 $add_option->( 'blank-lines-before-subs', 'blbs', '=i' );
1711 $add_option->( 'blank-lines-before-packages', 'blbp', '=i' );
1712 $add_option->( 'long-block-line-count', 'lbl', '=i' );
1713 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1714 $add_option->( 'keep-old-blank-lines', 'kbl', '=i' );
1716 ########################################
1717 $category = 9; # Other controls
1718 ########################################
1719 $add_option->( 'delete-block-comments', 'dbc', '!' );
1720 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1721 $add_option->( 'delete-pod', 'dp', '!' );
1722 $add_option->( 'delete-side-comments', 'dsc', '!' );
1723 $add_option->( 'tee-block-comments', 'tbc', '!' );
1724 $add_option->( 'tee-pod', 'tp', '!' );
1725 $add_option->( 'tee-side-comments', 'tsc', '!' );
1726 $add_option->( 'look-for-autoloader', 'lal', '!' );
1727 $add_option->( 'look-for-hash-bang', 'x', '!' );
1728 $add_option->( 'look-for-selfloader', 'lsl', '!' );
1729 $add_option->( 'pass-version-line', 'pvl', '!' );
1731 ########################################
1732 $category = 13; # Debugging
1733 ########################################
1734 $add_option->( 'DEBUG', 'D', '!' );
1735 $add_option->( 'DIAGNOSTICS', 'I', '!' );
1736 $add_option->( 'check-multiline-quotes', 'chk', '!' );
1737 $add_option->( 'dump-defaults', 'ddf', '!' );
1738 $add_option->( 'dump-long-names', 'dln', '!' );
1739 $add_option->( 'dump-options', 'dop', '!' );
1740 $add_option->( 'dump-profile', 'dpro', '!' );
1741 $add_option->( 'dump-short-names', 'dsn', '!' );
1742 $add_option->( 'dump-token-types', 'dtt', '!' );
1743 $add_option->( 'dump-want-left-space', 'dwls', '!' );
1744 $add_option->( 'dump-want-right-space', 'dwrs', '!' );
1745 $add_option->( 'fuzzy-line-length', 'fll', '!' );
1746 $add_option->( 'help', 'h', '' );
1747 $add_option->( 'short-concatenation-item-length', 'scl', '=i' );
1748 $add_option->( 'show-options', 'opt', '!' );
1749 $add_option->( 'version', 'v', '' );
1751 #---------------------------------------------------------------------
1753 # The Perl::Tidy::HtmlWriter will add its own options to the string
1754 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1756 ########################################
1757 # Set categories 10, 11, 12
1758 ########################################
1759 # Based on their known order
1760 $category = 12; # HTML properties
1761 foreach my $opt (@option_string) {
1762 my $long_name = $opt;
1763 $long_name =~ s/(!|=.*|:.*)$//;
1764 unless ( defined( $option_category{$long_name} ) ) {
1765 if ( $long_name =~ /^html-linked/ ) {
1766 $category = 10; # HTML options
1768 elsif ( $long_name =~ /^pod2html/ ) {
1769 $category = 11; # Pod2html
1771 $option_category{$long_name} = $category_name[$category];
1775 #---------------------------------------------------------------
1776 # Assign valid ranges to certain options
1777 #---------------------------------------------------------------
1778 # In the future, these may be used to make preliminary checks
1779 # hash keys are long names
1780 # If key or value is undefined:
1781 # strings may have any value
1782 # integer ranges are >=0
1783 # If value is defined:
1784 # value is [qw(any valid words)] for strings
1785 # value is [min, max] for integers
1786 # if min is undefined, there is no lower limit
1787 # if max is undefined, there is no upper limit
1788 # Parameters not listed here have defaults
1790 'format' => [ 'tidy', 'html', 'user' ],
1791 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
1793 'block-brace-tightness' => [ 0, 2 ],
1794 'brace-tightness' => [ 0, 2 ],
1795 'paren-tightness' => [ 0, 2 ],
1796 'square-bracket-tightness' => [ 0, 2 ],
1798 'block-brace-vertical-tightness' => [ 0, 2 ],
1799 'brace-vertical-tightness' => [ 0, 2 ],
1800 'brace-vertical-tightness-closing' => [ 0, 2 ],
1801 'paren-vertical-tightness' => [ 0, 2 ],
1802 'paren-vertical-tightness-closing' => [ 0, 2 ],
1803 'square-bracket-vertical-tightness' => [ 0, 2 ],
1804 'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1805 'vertical-tightness' => [ 0, 2 ],
1806 'vertical-tightness-closing' => [ 0, 2 ],
1808 'closing-brace-indentation' => [ 0, 3 ],
1809 'closing-paren-indentation' => [ 0, 3 ],
1810 'closing-square-bracket-indentation' => [ 0, 3 ],
1811 'closing-token-indentation' => [ 0, 3 ],
1813 'closing-side-comment-else-flag' => [ 0, 2 ],
1814 'comma-arrow-breakpoints' => [ 0, 3 ],
1817 # Note: we could actually allow negative ci if someone really wants it:
1818 # $option_range{'continuation-indentation'} = [ undef, undef ];
1820 #---------------------------------------------------------------
1821 # Assign default values to the above options here, except
1822 # for 'outfile' and 'help'.
1823 # These settings should approximate the perlstyle(1) suggestions.
1824 #---------------------------------------------------------------
1829 blanks-before-blocks
1830 blanks-before-comments
1831 blank-lines-before-subs=1
1832 blank-lines-before-packages=1
1833 block-brace-tightness=0
1834 block-brace-vertical-tightness=0
1836 brace-vertical-tightness-closing=0
1837 brace-vertical-tightness=0
1838 break-at-old-logical-breakpoints
1839 break-at-old-ternary-breakpoints
1840 break-at-old-attribute-breakpoints
1841 break-at-old-keyword-breakpoints
1842 comma-arrow-breakpoints=1
1844 closing-side-comment-interval=6
1845 closing-side-comment-maximum-text=20
1846 closing-side-comment-else-flag=0
1847 closing-side-comments-balanced
1848 closing-paren-indentation=0
1849 closing-brace-indentation=0
1850 closing-square-bracket-indentation=0
1851 continuation-indentation=2
1855 hanging-side-comments
1856 indent-block-comments
1859 keep-old-blank-lines=1
1860 long-block-line-count=8
1863 maximum-consecutive-blank-lines=1
1864 maximum-fields-per-table=0
1865 maximum-line-length=80
1866 minimum-space-to-comment=4
1867 nobrace-left-and-indent
1869 nodelete-old-whitespace
1874 nostatic-side-comments
1879 outdent-long-comments
1881 paren-vertical-tightness-closing=0
1882 paren-vertical-tightness=0
1886 short-concatenation-item-length=8
1888 square-bracket-tightness=1
1889 square-bracket-vertical-tightness-closing=0
1890 square-bracket-vertical-tightness=0
1891 static-block-comments
1894 backup-file-extension=bak
1898 html-table-of-contents
1902 push @defaults, "perl-syntax-check-flags=-c -T";
1904 #---------------------------------------------------------------
1905 # Define abbreviations which will be expanded into the above primitives.
1906 # These may be defined recursively.
1907 #---------------------------------------------------------------
1910 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
1911 'fnl' => [qw(freeze-newlines)],
1912 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1913 'fws' => [qw(freeze-whitespace)],
1914 'freeze-blank-lines' =>
1915 [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1916 'fbl' => [qw(freeze-blank-lines)],
1917 'indent-only' => [qw(freeze-newlines freeze-whitespace)],
1918 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1919 'nooutdent-long-lines' =>
1920 [qw(nooutdent-long-quotes nooutdent-long-comments)],
1921 'noll' => [qw(nooutdent-long-lines)],
1922 'io' => [qw(indent-only)],
1923 'delete-all-comments' =>
1924 [qw(delete-block-comments delete-side-comments delete-pod)],
1925 'nodelete-all-comments' =>
1926 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1927 'dac' => [qw(delete-all-comments)],
1928 'ndac' => [qw(nodelete-all-comments)],
1929 'gnu' => [qw(gnu-style)],
1930 'pbp' => [qw(perl-best-practices)],
1931 'tee-all-comments' =>
1932 [qw(tee-block-comments tee-side-comments tee-pod)],
1933 'notee-all-comments' =>
1934 [qw(notee-block-comments notee-side-comments notee-pod)],
1935 'tac' => [qw(tee-all-comments)],
1936 'ntac' => [qw(notee-all-comments)],
1937 'html' => [qw(format=html)],
1938 'nhtml' => [qw(format=tidy)],
1939 'tidy' => [qw(format=tidy)],
1941 'swallow-optional-blank-lines' => [qw(kbl=0)],
1942 'noswallow-optional-blank-lines' => [qw(kbl=1)],
1943 'sob' => [qw(kbl=0)],
1944 'nsob' => [qw(kbl=1)],
1946 'break-after-comma-arrows' => [qw(cab=0)],
1947 'nobreak-after-comma-arrows' => [qw(cab=1)],
1948 'baa' => [qw(cab=0)],
1949 'nbaa' => [qw(cab=1)],
1951 'blanks-before-subs' => [qw(blbs=1 blbp=1)],
1952 'bbs' => [qw(blbs=1 blbp=1)],
1953 'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
1954 'nbbs' => [qw(blbs=0 blbp=0)],
1956 'break-at-old-trinary-breakpoints' => [qw(bot)],
1958 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1959 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1960 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1961 'icp' => [qw(cpi=2 cbi=2 csbi=2)],
1962 'nicp' => [qw(cpi=0 cbi=0 csbi=0)],
1964 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1965 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1966 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1967 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],
1968 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],
1970 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1971 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1972 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1974 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1975 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1976 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1978 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1979 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1980 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1982 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1983 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1984 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1986 'otr' => [qw(opr ohbr osbr)],
1987 'opening-token-right' => [qw(opr ohbr osbr)],
1988 'notr' => [qw(nopr nohbr nosbr)],
1989 'noopening-token-right' => [qw(nopr nohbr nosbr)],
1991 'sot' => [qw(sop sohb sosb)],
1992 'nsot' => [qw(nsop nsohb nsosb)],
1993 'stack-opening-tokens' => [qw(sop sohb sosb)],
1994 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1996 'sct' => [qw(scp schb scsb)],
1997 'stack-closing-tokens' => => [qw(scp schb scsb)],
1998 'nsct' => [qw(nscp nschb nscsb)],
1999 'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
2001 # 'mangle' originally deleted pod and comments, but to keep it
2002 # reversible, it no longer does. But if you really want to
2003 # delete them, just use:
2006 # An interesting use for 'mangle' is to do this:
2007 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
2008 # which will form as many one-line blocks as possible
2013 keep-old-blank-lines=0
2015 delete-old-whitespace
2018 maximum-consecutive-blank-lines=0
2019 maximum-line-length=100000
2023 noblanks-before-blocks
2024 blank-lines-before-subs=0
2025 blank-lines-before-packages=0
2030 # 'extrude' originally deleted pod and comments, but to keep it
2031 # reversible, it no longer does. But if you really want to
2032 # delete them, just use
2035 # An interesting use for 'extrude' is to do this:
2036 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
2037 # which will break up all one-line blocks.
2044 delete-old-whitespace
2047 maximum-consecutive-blank-lines=0
2048 maximum-line-length=1
2051 noblanks-before-blocks
2052 blank-lines-before-subs=0
2053 blank-lines-before-packages=0
2060 # this style tries to follow the GNU Coding Standards (which do
2061 # not really apply to perl but which are followed by some perl
2065 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
2069 # Style suggested in Damian Conway's Perl Best Practices
2070 'perl-best-practices' => [
2071 qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
2072 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
2075 # Additional styles can be added here
2078 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
2080 # Uncomment next line to dump all expansions for debugging:
2081 # dump_short_names(\%expansion);
2083 \@option_string, \@defaults, \%expansion,
2084 \%option_category, \%option_range
2087 } # end of generate_options
2089 sub process_command_line {
2092 $perltidyrc_stream, $is_Windows, $Windows_type,
2093 $rpending_complaint, $dump_options_type
2099 $roption_string, $rdefaults, $rexpansion,
2100 $roption_category, $roption_range
2101 ) = generate_options();
2103 #---------------------------------------------------------------
2104 # set the defaults by passing the above list through GetOptions
2105 #---------------------------------------------------------------
2111 # do not load the defaults if we are just dumping perltidyrc
2112 unless ( $dump_options_type eq 'perltidyrc' ) {
2113 for $i (@$rdefaults) { push @ARGV, "--" . $i }
2116 # Patch to save users Getopt::Long configuration
2117 # and set to Getopt::Long defaults. Use eval to avoid
2118 # breaking old versions of Perl without these routines.
2120 eval { $glc = Getopt::Long::Configure() };
2122 eval { Getopt::Long::ConfigDefaults() };
2124 else { $glc = undef }
2126 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2127 die "Programming Bug: error in setting default options";
2130 # Patch to put the previous Getopt::Long configuration back
2131 eval { Getopt::Long::Configure($glc) } if defined $glc;
2135 my @raw_options = ();
2136 my $config_file = "";
2137 my $saw_ignore_profile = 0;
2138 my $saw_extrude = 0;
2139 my $saw_dump_profile = 0;
2142 #---------------------------------------------------------------
2143 # Take a first look at the command-line parameters. Do as many
2144 # immediate dumps as possible, which can avoid confusion if the
2145 # perltidyrc file has an error.
2146 #---------------------------------------------------------------
2147 foreach $i (@ARGV) {
2150 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
2151 $saw_ignore_profile = 1;
2154 # note: this must come before -pro and -profile, below:
2155 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
2156 $saw_dump_profile = 1;
2158 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
2161 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
2165 # resolve <dir>/.../<file>, meaning look upwards from directory
2166 if ( defined($config_file) ) {
2167 if ( my ( $start_dir, $search_file ) =
2168 ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
2170 $start_dir = '.' if !$start_dir;
2171 $start_dir = Cwd::realpath($start_dir);
2172 if ( my $found_file =
2173 find_file_upwards( $start_dir, $search_file ) )
2175 $config_file = $found_file;
2179 unless ( -e $config_file ) {
2180 warn "cannot find file given with -pro=$config_file: $!\n";
2184 elsif ( $i =~ /^-(pro|profile)=?$/ ) {
2185 die "usage: -pro=filename or --profile=filename, no spaces\n";
2187 elsif ( $i =~ /^-extrude$/ ) {
2190 elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
2194 elsif ( $i =~ /^-(version|v)$/ ) {
2198 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
2199 dump_defaults(@$rdefaults);
2202 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
2203 dump_long_names(@$roption_string);
2206 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
2207 dump_short_names($rexpansion);
2210 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
2211 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
2216 if ( $saw_dump_profile && $saw_ignore_profile ) {
2217 warn "No profile to dump because of -npro\n";
2221 #---------------------------------------------------------------
2222 # read any .perltidyrc configuration file
2223 #---------------------------------------------------------------
2224 unless ($saw_ignore_profile) {
2226 # resolve possible conflict between $perltidyrc_stream passed
2227 # as call parameter to perltidy and -pro=filename on command
2229 if ($perltidyrc_stream) {
2232 Conflict: a perltidyrc configuration file was specified both as this
2233 perltidy call parameter: $perltidyrc_stream
2234 and with this -profile=$config_file.
2235 Using -profile=$config_file.
2239 $config_file = $perltidyrc_stream;
2243 # look for a config file if we don't have one yet
2244 my $rconfig_file_chatter;
2245 $$rconfig_file_chatter = "";
2247 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2248 $rpending_complaint )
2249 unless $config_file;
2251 # open any config file
2254 ( $fh_config, $config_file ) =
2255 Perl::Tidy::streamhandle( $config_file, 'r' );
2256 unless ($fh_config) {
2257 $$rconfig_file_chatter .=
2258 "# $config_file exists but cannot be opened\n";
2262 if ($saw_dump_profile) {
2263 dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
2269 my ( $rconfig_list, $death_message ) =
2270 read_config_file( $fh_config, $config_file, $rexpansion );
2271 die $death_message if ($death_message);
2273 # process any .perltidyrc parameters right now so we can
2275 if (@$rconfig_list) {
2276 local @ARGV = @$rconfig_list;
2278 expand_command_abbreviations( $rexpansion, \@raw_options,
2281 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2283 "Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
2286 # Anything left in this local @ARGV is an error and must be
2287 # invalid bare words from the configuration file. We cannot
2288 # check this earlier because bare words may have been valid
2289 # values for parameters. We had to wait for GetOptions to have
2293 my $str = "\'" . pop(@ARGV) . "\'";
2294 while ( my $param = pop(@ARGV) ) {
2295 if ( length($str) < 70 ) {
2296 $str .= ", '$param'";
2304 There are $count unrecognized values in the configuration file '$config_file':
2306 Use leading dashes for parameters. Use -npro to ignore this file.
2310 # Undo any options which cause premature exit. They are not
2311 # appropriate for a config file, and it could be hard to
2312 # diagnose the cause of the premature exit.
2321 dump-want-left-space
2322 dump-want-right-space
2330 if ( defined( $Opts{$_} ) ) {
2332 warn "ignoring --$_ in config file: $config_file\n";
2339 #---------------------------------------------------------------
2340 # now process the command line parameters
2341 #---------------------------------------------------------------
2342 expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2344 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2345 die "Error on command line; for help try 'perltidy -h'\n";
2348 return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
2349 $rexpansion, $roption_category, $roption_range );
2350 } # end of process_command_line
2354 my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2356 #---------------------------------------------------------------
2357 # check and handle any interactions among the basic options..
2358 #---------------------------------------------------------------
2360 # Since -vt, -vtc, and -cti are abbreviations, but under
2361 # msdos, an unquoted input parameter like vtc=1 will be
2362 # seen as 2 parameters, vtc and 1, so the abbreviations
2363 # won't be seen. Therefore, we will catch them here if
2366 if ( defined $rOpts->{'vertical-tightness'} ) {
2367 my $vt = $rOpts->{'vertical-tightness'};
2368 $rOpts->{'paren-vertical-tightness'} = $vt;
2369 $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2370 $rOpts->{'brace-vertical-tightness'} = $vt;
2373 if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2374 my $vtc = $rOpts->{'vertical-tightness-closing'};
2375 $rOpts->{'paren-vertical-tightness-closing'} = $vtc;
2376 $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2377 $rOpts->{'brace-vertical-tightness-closing'} = $vtc;
2380 if ( defined $rOpts->{'closing-token-indentation'} ) {
2381 my $cti = $rOpts->{'closing-token-indentation'};
2382 $rOpts->{'closing-square-bracket-indentation'} = $cti;
2383 $rOpts->{'closing-brace-indentation'} = $cti;
2384 $rOpts->{'closing-paren-indentation'} = $cti;
2387 # In quiet mode, there is no log file and hence no way to report
2388 # results of syntax check, so don't do it.
2389 if ( $rOpts->{'quiet'} ) {
2390 $rOpts->{'check-syntax'} = 0;
2393 # can't check syntax if no output
2394 if ( $rOpts->{'format'} ne 'tidy' ) {
2395 $rOpts->{'check-syntax'} = 0;
2398 # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2399 # wide variety of nasty problems on these systems, because they cannot
2400 # reliably run backticks. Don't even think about changing this!
2401 if ( $rOpts->{'check-syntax'}
2403 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2405 $rOpts->{'check-syntax'} = 0;
2408 # It's really a bad idea to check syntax as root unless you wrote
2409 # the script yourself. FIXME: not sure if this works with VMS
2410 unless ($is_Windows) {
2412 if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2413 $rOpts->{'check-syntax'} = 0;
2414 $$rpending_complaint .=
2415 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2419 # check iteration count and quietly fix if necessary:
2420 # - iterations option only applies to code beautification mode
2421 # - the convergence check should stop most runs on iteration 2, and
2422 # virtually all on iteration 3. But we'll allow up to 6.
2423 if ( $rOpts->{'format'} ne 'tidy' ) {
2424 $rOpts->{'iterations'} = 1;
2426 elsif ( defined( $rOpts->{'iterations'} ) ) {
2427 if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2428 elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 }
2431 $rOpts->{'iterations'} = 1;
2434 # check for reasonable number of blank lines and fix to avoid problems
2435 if ( $rOpts->{'blank-lines-before-subs'} ) {
2436 if ( $rOpts->{'blank-lines-before-subs'} < 0 ) {
2437 $rOpts->{'blank-lines-before-subs'} = 0;
2438 warn "negative value of -blbs, setting 0\n";
2440 if ( $rOpts->{'blank-lines-before-subs'} > 100 ) {
2441 warn "unreasonably large value of -blbs, reducing\n";
2442 $rOpts->{'blank-lines-before-subs'} = 100;
2445 if ( $rOpts->{'blank-lines-before-packages'} ) {
2446 if ( $rOpts->{'blank-lines-before-packages'} < 0 ) {
2447 warn "negative value of -blbp, setting 0\n";
2448 $rOpts->{'blank-lines-before-packages'} = 0;
2450 if ( $rOpts->{'blank-lines-before-packages'} > 100 ) {
2451 warn "unreasonably large value of -blbp, reducing\n";
2452 $rOpts->{'blank-lines-before-packages'} = 100;
2456 # see if user set a non-negative logfile-gap
2457 if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2459 # a zero gap will be taken as a 1
2460 if ( $rOpts->{'logfile-gap'} == 0 ) {
2461 $rOpts->{'logfile-gap'} = 1;
2464 # setting a non-negative logfile gap causes logfile to be saved
2465 $rOpts->{'logfile'} = 1;
2468 # not setting logfile gap, or setting it negative, causes default of 50
2470 $rOpts->{'logfile-gap'} = 50;
2473 # set short-cut flag when only indentation is to be done.
2474 # Note that the user may or may not have already set the
2476 if ( !$rOpts->{'add-whitespace'}
2477 && !$rOpts->{'delete-old-whitespace'}
2478 && !$rOpts->{'add-newlines'}
2479 && !$rOpts->{'delete-old-newlines'} )
2481 $rOpts->{'indent-only'} = 1;
2484 # -isbc implies -ibc
2485 if ( $rOpts->{'indent-spaced-block-comments'} ) {
2486 $rOpts->{'indent-block-comments'} = 1;
2489 # -bli flag implies -bl
2490 if ( $rOpts->{'brace-left-and-indent'} ) {
2491 $rOpts->{'opening-brace-on-new-line'} = 1;
2494 if ( $rOpts->{'opening-brace-always-on-right'}
2495 && $rOpts->{'opening-brace-on-new-line'} )
2498 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2499 'opening-brace-on-new-line' (-bl). Ignoring -bl.
2501 $rOpts->{'opening-brace-on-new-line'} = 0;
2504 # it simplifies things if -bl is 0 rather than undefined
2505 if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2506 $rOpts->{'opening-brace-on-new-line'} = 0;
2509 # -sbl defaults to -bl if not defined
2510 if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2511 $rOpts->{'opening-sub-brace-on-new-line'} =
2512 $rOpts->{'opening-brace-on-new-line'};
2515 if ( $rOpts->{'entab-leading-whitespace'} ) {
2516 if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2517 warn "-et=n must use a positive integer; ignoring -et\n";
2518 $rOpts->{'entab-leading-whitespace'} = undef;
2521 # entab leading whitespace has priority over the older 'tabs' option
2522 if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2526 sub find_file_upwards {
2527 my ( $search_dir, $search_file ) = @_;
2529 $search_dir =~ s{/+$}{};
2530 $search_file =~ s{^/+}{};
2533 my $try_path = "$search_dir/$search_file";
2534 if ( -f $try_path ) {
2537 elsif ( $search_dir eq '/' ) {
2541 $search_dir = dirname($search_dir);
2546 sub expand_command_abbreviations {
2548 # go through @ARGV and expand any abbreviations
2550 my ( $rexpansion, $rraw_options, $config_file ) = @_;
2553 # set a pass limit to prevent an infinite loop;
2554 # 10 should be plenty, but it may be increased to allow deeply
2555 # nested expansions.
2556 my $max_passes = 10;
2559 # keep looping until all expansions have been converted into actual
2561 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2563 my $abbrev_count = 0;
2565 # loop over each item in @ARGV..
2566 foreach $word (@ARGV) {
2568 # convert any leading 'no-' to just 'no'
2569 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2571 # if it is a dash flag (instead of a file name)..
2572 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2577 # save the raw input for debug output in case of circular refs
2578 if ( $pass_count == 0 ) {
2579 push( @$rraw_options, $word );
2582 # recombine abbreviation and flag, if necessary,
2583 # to allow abbreviations with arguments such as '-vt=1'
2584 if ( $rexpansion->{ $abr . $flags } ) {
2585 $abr = $abr . $flags;
2589 # if we see this dash item in the expansion hash..
2590 if ( $rexpansion->{$abr} ) {
2593 # stuff all of the words that it expands to into the
2594 # new arg list for the next pass
2595 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2596 next unless $abbrev; # for safety; shouldn't happen
2597 push( @new_argv, '--' . $abbrev . $flags );
2601 # not in expansion hash, must be actual long name
2603 push( @new_argv, $word );
2607 # not a dash item, so just save it for the next pass
2609 push( @new_argv, $word );
2611 } # end of this pass
2613 # update parameter list @ARGV to the new one
2615 last unless ( $abbrev_count > 0 );
2617 # make sure we are not in an infinite loop
2618 if ( $pass_count == $max_passes ) {
2620 "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
2621 print STDERR "Here are the raw options\n";
2623 print STDERR "(@$rraw_options)\n";
2624 my $num = @new_argv;
2627 print STDERR "After $max_passes passes here is ARGV\n";
2628 print STDERR "(@new_argv)\n";
2631 print STDERR "After $max_passes passes ARGV has $num entries\n";
2636 Please check your configuration file $config_file for circular-references.
2637 To deactivate it, use -npro.
2642 Program bug - circular-references in the %expansion hash, probably due to
2643 a recent program change.
2646 } # end of check for circular references
2647 } # end of loop over all passes
2650 # Debug routine -- this will dump the expansion hash
2651 sub dump_short_names {
2652 my $rexpansion = shift;
2654 List of short names. This list shows how all abbreviations are
2655 translated into other abbreviations and, eventually, into long names.
2656 New abbreviations may be defined in a .perltidyrc file.
2657 For a list of all long names, use perltidy --dump-long-names (-dln).
2658 --------------------------------------------------------------------------
2660 foreach my $abbrev ( sort keys %$rexpansion ) {
2661 my @list = @{ $$rexpansion{$abbrev} };
2662 print STDOUT "$abbrev --> @list\n";
2666 sub check_vms_filename {
2668 # given a valid filename (the perltidy input file)
2669 # create a modified filename and separator character
2672 # Contributed by Michael Cartmell
2674 my ( $base, $path ) = fileparse( $_[0] );
2676 # remove explicit ; version
2677 $base =~ s/;-?\d*$//
2679 # remove explicit . version ie two dots in filename NB ^ escapes a dot
2680 or $base =~ s/( # begin capture $1
2681 (?:^|[^^])\. # match a dot not preceded by a caret
2682 (?: # followed by nothing
2684 .*[^^] # anything ending in a non caret
2687 \.-?\d*$ # match . version number
2690 # normalise filename, if there are no unescaped dots then append one
2691 $base .= '.' unless $base =~ /(?:^|[^^])\./;
2693 # if we don't already have an extension then we just append the extention
2694 my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2695 return ( $path . $base, $separator );
2700 # TODO: are these more standard names?
2701 # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2703 # Returns a string that determines what MS OS we are on.
2704 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2705 # Returns blank string if not an MS system.
2706 # Original code contributed by: Yves Orton
2707 # We need to know this to decide where to look for config files
2709 my $rpending_complaint = shift;
2711 return $os unless $^O =~ /win32|dos/i; # is it a MS box?
2713 # Systems built from Perl source may not have Win32.pm
2714 # But probably have Win32::GetOSVersion() anyway so the
2715 # following line is not 'required':
2716 # return $os unless eval('require Win32');
2718 # Use the standard API call to determine the version
2719 my ( $undef, $major, $minor, $build, $id );
2720 eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2723 # NAME ID MAJOR MINOR
2724 # Windows NT 4 2 4 0
2725 # Windows 2000 2 5 0
2727 # Windows Server 2003 2 5 2
2729 return "win32s" unless $id; # If id==0 then its a win32s box.
2730 $os = { # Magic numbers from MSDN
2731 # documentation of GetOSVersion
2738 0 => "2000", # or NT 4, see below
2745 # If $os is undefined, the above code is out of date. Suggested updates
2747 unless ( defined $os ) {
2749 $$rpending_complaint .= <<EOS;
2750 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2751 We won't be able to look for a system-wide config file.
2755 # Unfortunately the logic used for the various versions isnt so clever..
2756 # so we have to handle an outside case.
2757 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2762 ( $^O !~ /win32|dos/i )
2765 && ( $^O ne 'MacOS' );
2768 sub look_for_Windows {
2770 # determine Windows sub-type and location of
2771 # system-wide configuration files
2772 my $rpending_complaint = shift;
2773 my $is_Windows = ( $^O =~ /win32|dos/i );
2774 my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
2775 return ( $is_Windows, $Windows_type );
2778 sub find_config_file {
2780 # look for a .perltidyrc configuration file
2781 # For Windows also look for a file named perltidy.ini
2782 my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2783 $rpending_complaint ) = @_;
2785 $$rconfig_file_chatter .= "# Config file search...system reported as:";
2787 $$rconfig_file_chatter .= "Windows $Windows_type\n";
2790 $$rconfig_file_chatter .= " $^O\n";
2793 # sub to check file existance and record all tests
2794 my $exists_config_file = sub {
2795 my $config_file = shift;
2796 return 0 unless $config_file;
2797 $$rconfig_file_chatter .= "# Testing: $config_file\n";
2798 return -f $config_file;
2803 # look in current directory first
2804 $config_file = ".perltidyrc";
2805 return $config_file if $exists_config_file->($config_file);
2807 $config_file = "perltidy.ini";
2808 return $config_file if $exists_config_file->($config_file);
2811 # Default environment vars.
2812 my @envs = qw(PERLTIDY HOME);
2814 # Check the NT/2k/XP locations, first a local machine def, then a
2816 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2818 # Now go through the enviornment ...
2819 foreach my $var (@envs) {
2820 $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2821 if ( defined( $ENV{$var} ) ) {
2822 $$rconfig_file_chatter .= " = $ENV{$var}\n";
2824 # test ENV{ PERLTIDY } as file:
2825 if ( $var eq 'PERLTIDY' ) {
2826 $config_file = "$ENV{$var}";
2827 return $config_file if $exists_config_file->($config_file);
2830 # test ENV as directory:
2831 $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2832 return $config_file if $exists_config_file->($config_file);
2835 $config_file = catfile( $ENV{$var}, "perltidy.ini" );
2836 return $config_file if $exists_config_file->($config_file);
2840 $$rconfig_file_chatter .= "\n";
2844 # then look for a system-wide definition
2845 # where to look varies with OS
2848 if ($Windows_type) {
2849 my ( $os, $system, $allusers ) =
2850 Win_Config_Locs( $rpending_complaint, $Windows_type );
2852 # Check All Users directory, if there is one.
2853 # i.e. C:\Documents and Settings\User\perltidy.ini
2856 $config_file = catfile( $allusers, ".perltidyrc" );
2857 return $config_file if $exists_config_file->($config_file);
2859 $config_file = catfile( $allusers, "perltidy.ini" );
2860 return $config_file if $exists_config_file->($config_file);
2863 # Check system directory.
2864 # retain old code in case someone has been able to create
2865 # a file with a leading period.
2866 $config_file = catfile( $system, ".perltidyrc" );
2867 return $config_file if $exists_config_file->($config_file);
2869 $config_file = catfile( $system, "perltidy.ini" );
2870 return $config_file if $exists_config_file->($config_file);
2874 # Place to add customization code for other systems
2875 elsif ( $^O eq 'OS2' ) {
2877 elsif ( $^O eq 'MacOS' ) {
2879 elsif ( $^O eq 'VMS' ) {
2882 # Assume some kind of Unix
2885 $config_file = "/usr/local/etc/perltidyrc";
2886 return $config_file if $exists_config_file->($config_file);
2888 $config_file = "/etc/perltidyrc";
2889 return $config_file if $exists_config_file->($config_file);
2892 # Couldn't find a config file
2896 sub Win_Config_Locs {
2898 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2899 # or undef if its not a win32 OS. In list context returns OS, System
2900 # Directory, and All Users Directory. All Users will be empty on a
2901 # 9x/Me box. Contributed by: Yves Orton.
2903 my $rpending_complaint = shift;
2904 my $os = (@_) ? shift : Win_OS_Type();
2910 if ( $os =~ /9[58]|Me/ ) {
2911 $system = "C:/Windows";
2913 elsif ( $os =~ /NT|XP|200?/ ) {
2914 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2917 ? "C:/WinNT/profiles/All Users/"
2918 : "C:/Documents and Settings/All Users/";
2922 # This currently would only happen on a win32s computer. I dont have
2923 # one to test, so I am unsure how to proceed. Suggestions welcome!
2924 $$rpending_complaint .=
2925 "I dont know a sensible place to look for config files on an $os system.\n";
2928 return wantarray ? ( $os, $system, $allusers ) : $os;
2931 sub dump_config_file {
2933 my $config_file = shift;
2934 my $rconfig_file_chatter = shift;
2935 print STDOUT "$$rconfig_file_chatter";
2937 print STDOUT "# Dump of file: '$config_file'\n";
2938 while ( my $line = $fh->getline() ) { print STDOUT $line }
2939 eval { $fh->close() };
2942 print STDOUT "# ...no config file found\n";
2946 sub read_config_file {
2948 my ( $fh, $config_file, $rexpansion ) = @_;
2949 my @config_list = ();
2951 # file is bad if non-empty $death_message is returned
2952 my $death_message = "";
2956 while ( my $line = $fh->getline() ) {
2959 ( $line, $death_message ) =
2960 strip_comment( $line, $config_file, $line_no );
2961 last if ($death_message);
2963 $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
2966 # look for something of the general form
2973 if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) {
2974 ( $newname, $body ) = ( $2, $3, );
2978 # handle a new alias definition
2982 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
2987 if ( ${$rexpansion}{$name} ) {
2989 my @names = sort keys %$rexpansion;
2991 "Here is a list of all installed aliases\n(@names)\n"
2992 . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2995 ${$rexpansion}{$name} = [];
3001 my ( $rbody_parts, $msg ) = parse_args($body);
3003 $death_message = <<EOM;
3004 Error reading file '$config_file' at line number $line_no.
3006 Please fix this line or use -npro to avoid reading this file
3013 # remove leading dashes if this is an alias
3014 foreach (@$rbody_parts) { s/^\-+//; }
3015 push @{ ${$rexpansion}{$name} }, @$rbody_parts;
3018 push( @config_list, @$rbody_parts );
3023 eval { $fh->close() };
3024 return ( \@config_list, $death_message );
3029 # Strip any comment from a command line
3030 my ( $instr, $config_file, $line_no ) = @_;
3033 # check for full-line comment
3034 if ( $instr =~ /^\s*#/ ) {
3035 return ( "", $msg );
3038 # nothing to do if no comments
3039 if ( $instr !~ /#/ ) {
3040 return ( $instr, $msg );
3043 # handle case of no quotes
3044 elsif ( $instr !~ /['"]/ ) {
3046 # We now require a space before the # of a side comment
3047 # this allows something like:
3049 # Otherwise, it would have to be quoted:
3051 $instr =~ s/\s+\#.*$//;
3052 return ( $instr, $msg );
3055 # handle comments and quotes
3057 my $quote_char = "";
3060 # looking for ending quote character
3062 if ( $instr =~ /\G($quote_char)/gc ) {
3066 elsif ( $instr =~ /\G(.)/gc ) {
3070 # error..we reached the end without seeing the ending quote char
3073 Error reading file $config_file at line number $line_no.
3074 Did not see ending quote character <$quote_char> in this text:
3076 Please fix this line or use -npro to avoid reading this file
3082 # accumulating characters and looking for start of a quoted string
3084 if ( $instr =~ /\G([\"\'])/gc ) {
3089 # Note: not yet enforcing the space-before-hash rule for side
3090 # comments if the parameter is quoted.
3091 elsif ( $instr =~ /\G#/gc ) {
3094 elsif ( $instr =~ /\G(.)/gc ) {
3102 return ( $outstr, $msg );
3107 # Parse a command string containing multiple string with possible
3108 # quotes, into individual commands. It might look like this, for example:
3110 # -wba=" + - " -some-thing -wbb='. && ||'
3112 # There is no need, at present, to handle escaped quote characters.
3113 # (They are not perltidy tokens, so needn't be in strings).
3116 my @body_parts = ();
3117 my $quote_char = "";
3122 # looking for ending quote character
3124 if ( $body =~ /\G($quote_char)/gc ) {
3127 elsif ( $body =~ /\G(.)/gc ) {
3131 # error..we reached the end without seeing the ending quote char
3133 if ( length($part) ) { push @body_parts, $part; }
3135 Did not see ending quote character <$quote_char> in this text:
3142 # accumulating characters and looking for start of a quoted string
3144 if ( $body =~ /\G([\"\'])/gc ) {
3147 elsif ( $body =~ /\G(\s+)/gc ) {
3148 if ( length($part) ) { push @body_parts, $part; }
3151 elsif ( $body =~ /\G(.)/gc ) {
3155 if ( length($part) ) { push @body_parts, $part; }
3160 return ( \@body_parts, $msg );
3163 sub dump_long_names {
3165 my @names = sort @_;
3167 # Command line long names (passed to GetOptions)
3168 #---------------------------------------------------------------
3169 # here is a summary of the Getopt codes:
3170 # <none> does not take an argument
3171 # =s takes a mandatory string
3172 # :s takes an optional string
3173 # =i takes a mandatory integer
3174 # :i takes an optional integer
3175 # ! does not take an argument and may be negated
3176 # i.e., -foo and -nofoo are allowed
3177 # a double dash signals the end of the options list
3179 #---------------------------------------------------------------
3182 foreach (@names) { print STDOUT "$_\n" }
3186 my @defaults = sort @_;
3187 print STDOUT "Default command line options:\n";
3188 foreach (@_) { print STDOUT "$_\n" }
3191 sub readable_options {
3193 # return options for this run as a string which could be
3194 # put in a perltidyrc file
3195 my ( $rOpts, $roption_string ) = @_;
3197 my $rGetopt_flags = \%Getopt_flags;
3198 my $readable_options = "# Final parameter set for this run.\n";
3199 $readable_options .=
3200 "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
3201 foreach my $opt ( @{$roption_string} ) {
3203 if ( $opt =~ /(.*)(!|=.*)$/ ) {
3207 if ( defined( $rOpts->{$opt} ) ) {
3208 $rGetopt_flags->{$opt} = $flag;
3211 foreach my $key ( sort keys %{$rOpts} ) {
3212 my $flag = $rGetopt_flags->{$key};
3213 my $value = $rOpts->{$key};
3217 if ( $flag =~ /^=/ ) {
3218 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
3219 $suffix = "=" . $value;
3221 elsif ( $flag =~ /^!/ ) {
3222 $prefix .= "no" unless ($value);
3227 $readable_options .=
3228 "# ERROR in dump_options: unrecognized flag $flag for $key\n";
3231 $readable_options .= $prefix . $key . $suffix . "\n";
3233 return $readable_options;
3238 This is perltidy, v$VERSION
3240 Copyright 2000-2012, Steve Hancock
3242 Perltidy is free software and may be copied under the terms of the GNU
3243 General Public License, which is included in the distribution files.
3245 Complete documentation for perltidy can be found using 'man perltidy'
3246 or on the internet at http://perltidy.sourceforge.net.
3253 This is perltidy version $VERSION, a perl script indenter. Usage:
3255 perltidy [ options ] file1 file2 file3 ...
3256 (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
3257 perltidy [ options ] file1 -o outfile
3258 perltidy [ options ] file1 -st >outfile
3259 perltidy [ options ] <infile >outfile
3261 Options have short and long forms. Short forms are shown; see
3262 man pages for long forms. Note: '=s' indicates a required string,
3263 and '=n' indicates a required integer.
3267 -o=file name of the output file (only if single input file)
3268 -oext=s change output extension from 'tdy' to s
3269 -opath=path change path to be 'path' for output files
3270 -b backup original to .bak and modify file in-place
3271 -bext=s change default backup extension from 'bak' to s
3272 -q deactivate error messages (for running under editor)
3273 -w include non-critical warning messages in the .ERR error output
3274 -syn run perl -c to check syntax (default under unix systems)
3275 -log save .LOG file, which has useful diagnostics
3276 -f force perltidy to read a binary file
3277 -g like -log but writes more detailed .LOG file, for debugging scripts
3278 -opt write the set of options actually used to a .LOG file
3279 -npro ignore .perltidyrc configuration command file
3280 -pro=file read configuration commands from file instead of .perltidyrc
3281 -st send output to standard output, STDOUT
3282 -se send error output to standard error output, STDERR
3283 -v display version number to standard output and quit
3286 -i=n use n columns per indentation level (default n=4)
3287 -t tabs: use one tab character per indentation level, not recommeded
3288 -nt no tabs: use n spaces per indentation level (default)
3289 -et=n entab leading whitespace n spaces per tab; not recommended
3290 -io "indent only": just do indentation, no other formatting.
3291 -sil=n set starting indentation level to n; use if auto detection fails
3292 -ole=s specify output line ending (s=dos or win, mac, unix)
3293 -ple keep output line endings same as input (input must be filename)
3296 -fws freeze whitespace; this disables all whitespace changes
3297 and disables the following switches:
3298 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)
3299 -bbt same as -bt but for code block braces; same as -bt if not given
3300 -bbvt block braces vertically tight; use with -bl or -bli
3301 -bbvtl=s make -bbvt to apply to selected list of block types
3302 -pt=n paren tightness (n=0, 1 or 2)
3303 -sbt=n square bracket tightness (n=0, 1, or 2)
3304 -bvt=n brace vertical tightness,
3305 n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3306 -pvt=n paren vertical tightness (see -bvt for n)
3307 -sbvt=n square bracket vertical tightness (see -bvt for n)
3308 -bvtc=n closing brace vertical tightness:
3309 n=(0=open, 1=sometimes close, 2=always close)
3310 -pvtc=n closing paren vertical tightness, see -bvtc for n.
3311 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3312 -ci=n sets continuation indentation=n, default is n=2 spaces
3313 -lp line up parentheses, brackets, and non-BLOCK braces
3314 -sfs add space before semicolon in for( ; ; )
3315 -aws allow perltidy to add whitespace (default)
3316 -dws delete all old non-essential whitespace
3317 -icb indent closing brace of a code block
3318 -cti=n closing indentation of paren, square bracket, or non-block brace:
3319 n=0 none, =1 align with opening, =2 one full indentation level
3320 -icp equivalent to -cti=2
3321 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /'
3322 -wrs=s want space right of tokens in string;
3323 -sts put space before terminal semicolon of a statement
3324 -sak=s put space between keywords given in s and '(';
3325 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3328 -fnl freeze newlines; this disables all line break changes
3329 and disables the following switches:
3330 -anl add newlines; ok to introduce new line breaks
3331 -bbs add blank line before subs and packages
3332 -bbc add blank line before block comments
3333 -bbb add blank line between major blocks
3334 -kbl=n keep old blank lines? 0=no, 1=some, 2=all
3335 -mbl=n maximum consecutive blank lines to output (default=1)
3336 -ce cuddled else; use this style: '} else {'
3337 -dnl delete old newlines (default)
3338 -l=n maximum line length; default n=80
3339 -bl opening brace on new line
3340 -sbl opening sub brace on new line. value of -bl is used if not given.
3341 -bli opening brace on new line and indented
3342 -bar opening brace always on right, even for long clauses
3343 -vt=n vertical tightness (requires -lp); n controls break after opening
3344 token: 0=never 1=no break if next line balanced 2=no break
3345 -vtc=n vertical tightness of closing container; n controls if closing
3346 token starts new line: 0=always 1=not unless list 1=never
3347 -wba=s want break after tokens in string; i.e. wba=': .'
3348 -wbb=s want break before tokens in string
3350 Following Old Breakpoints
3351 -kis keep interior semicolons. Allows multiple statements per line.
3352 -boc break at old comma breaks: turns off all automatic list formatting
3353 -bol break at old logical breakpoints: or, and, ||, && (default)
3354 -bok break at old list keyword breakpoints such as map, sort (default)
3355 -bot break at old conditional (ternary ?:) operator breakpoints (default)
3356 -boa break at old attribute breakpoints
3357 -cab=n break at commas after a comma-arrow (=>):
3358 n=0 break at all commas after =>
3359 n=1 stable: break unless this breaks an existing one-line container
3360 n=2 break only if a one-line container cannot be formed
3361 n=3 do not treat commas after => specially at all
3364 -ibc indent block comments (default)
3365 -isbc indent spaced block comments; may indent unless no leading space
3366 -msc=n minimum desired spaces to side comment, default 4
3367 -fpsc=n fix position for side comments; default 0;
3368 -csc add or update closing side comments after closing BLOCK brace
3369 -dcsc delete closing side comments created by a -csc command
3370 -cscp=s change closing side comment prefix to be other than '## end'
3371 -cscl=s change closing side comment to apply to selected list of blocks
3372 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3373 -csct=n maximum number of columns of appended text, default n=20
3374 -cscw causes warning if old side comment is overwritten with -csc
3376 -sbc use 'static block comments' identified by leading '##' (default)
3377 -sbcp=s change static block comment identifier to be other than '##'
3378 -osbc outdent static block comments
3380 -ssc use 'static side comments' identified by leading '##' (default)
3381 -sscp=s change static side comment identifier to be other than '##'
3383 Delete selected text
3384 -dac delete all comments AND pod
3385 -dbc delete block comments
3386 -dsc delete side comments
3389 Send selected text to a '.TEE' file
3390 -tac tee all comments AND pod
3391 -tbc tee block comments
3392 -tsc tee side comments
3396 -olq outdent long quoted strings (default)
3397 -olc outdent a long block comment line
3398 -ola outdent statement labels
3399 -okw outdent control keywords (redo, next, last, goto, return)
3400 -okwl=s specify alternative keywords for -okw command
3403 -mft=n maximum fields per table; default n=40
3404 -x do not format lines before hash-bang line (i.e., for VMS)
3405 -asc allows perltidy to add a ';' when missing (default)
3406 -dsm allows perltidy to delete an unnecessary ';' (default)
3408 Combinations of other parameters
3409 -gnu attempt to follow GNU Coding Standards as applied to perl
3410 -mangle remove as many newlines as possible (but keep comments and pods)
3411 -extrude insert as many newlines as possible
3413 Dump and die, debugging
3414 -dop dump options used in this run to standard output and quit
3415 -ddf dump default options to standard output and quit
3416 -dsn dump all option short names to standard output and quit
3417 -dln dump option long names to standard output and quit
3418 -dpro dump whatever configuration file is in effect to standard output
3419 -dtt dump all token types to standard output and quit
3422 -html write an html file (see 'man perl2web' for many options)
3423 Note: when -html is used, no indentation or formatting are done.
3424 Hint: try perltidy -html -css=mystyle.css filename.pl
3425 and edit mystyle.css to change the appearance of filename.html.
3426 -nnn gives line numbers
3427 -pre only writes out <pre>..</pre> code section
3428 -toc places a table of contents to subs at the top (default)
3429 -pod passes pod text through pod2html (default)
3430 -frm write html as a frame (3 files)
3431 -text=s extra extension for table of contents if -frm, default='toc'
3432 -sext=s extra extension for file content if -frm, default='src'
3434 A prefix of "n" negates short form toggle switches, and a prefix of "no"
3435 negates the long forms. For example, -nasc means don't add missing
3438 If you are unable to see this entire text, try "perltidy -h | more"
3439 For more detailed information, and additional options, try "man perltidy",
3440 or go to the perltidy home page at http://perltidy.sourceforge.net
3445 sub process_this_file {
3447 my ( $truth, $beauty ) = @_;
3449 # loop to process each line of this file
3450 while ( my $line_of_tokens = $truth->get_line() ) {
3451 $beauty->write_line($line_of_tokens);
3455 eval { $beauty->finish_formatting() };
3456 $truth->report_tokenization_errors();
3461 # Use 'perl -c' to make sure that we did not create bad syntax
3462 # This is a very good independent check for programming errors
3464 # Given names of the input and output files, ($istream, $ostream),
3465 # we do the following:
3466 # - check syntax of the input file
3467 # - if bad, all done (could be an incomplete code snippet)
3468 # - if infile syntax ok, then check syntax of the output file;
3469 # - if outfile syntax bad, issue warning; this implies a code bug!
3470 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3472 my ( $istream, $ostream, $logger_object, $rOpts ) = @_;
3473 my $infile_syntax_ok = 0;
3474 my $line_of_dashes = '-' x 42 . "\n";
3476 my $flags = $rOpts->{'perl-syntax-check-flags'};
3478 # be sure we invoke perl with -c
3479 # note: perl will accept repeated flags like '-c -c'. It is safest
3480 # to append another -c than try to find an interior bundled c, as
3481 # in -Tc, because such a 'c' might be in a quoted string, for example.
3482 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3484 # be sure we invoke perl with -x if requested
3485 # same comments about repeated parameters applies
3486 if ( $rOpts->{'look-for-hash-bang'} ) {
3487 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3490 # this shouldn't happen unless a termporary file couldn't be made
3491 if ( $istream eq '-' ) {
3492 $logger_object->write_logfile_entry(
3493 "Cannot run perl -c on STDIN and STDOUT\n");
3494 return $infile_syntax_ok;
3497 $logger_object->write_logfile_entry(
3498 "checking input file syntax with perl $flags\n");
3500 # Not all operating systems/shells support redirection of the standard
3502 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3504 my ( $istream_filename, $perl_output ) =
3505 do_syntax_check( $istream, $flags, $error_redirection );
3506 $logger_object->write_logfile_entry(
3507 "Input stream passed to Perl as file $istream_filename\n");
3508 $logger_object->write_logfile_entry($line_of_dashes);
3509 $logger_object->write_logfile_entry("$perl_output\n");
3511 if ( $perl_output =~ /syntax\s*OK/ ) {
3512 $infile_syntax_ok = 1;
3513 $logger_object->write_logfile_entry($line_of_dashes);
3514 $logger_object->write_logfile_entry(
3515 "checking output file syntax with perl $flags ...\n");
3516 my ( $ostream_filename, $perl_output ) =
3517 do_syntax_check( $ostream, $flags, $error_redirection );
3518 $logger_object->write_logfile_entry(
3519 "Output stream passed to Perl as file $ostream_filename\n");
3520 $logger_object->write_logfile_entry($line_of_dashes);
3521 $logger_object->write_logfile_entry("$perl_output\n");
3523 unless ( $perl_output =~ /syntax\s*OK/ ) {
3524 $logger_object->write_logfile_entry($line_of_dashes);
3525 $logger_object->warning(
3526 "The output file has a syntax error when tested with perl $flags $ostream !\n"
3528 $logger_object->warning(
3529 "This implies an error in perltidy; the file $ostream is bad\n"
3531 $logger_object->report_definite_bug();
3533 # the perl version number will be helpful for diagnosing the problem
3534 $logger_object->write_logfile_entry(
3535 qx/perl -v $error_redirection/ . "\n" );
3540 # Only warn of perl -c syntax errors. Other messages,
3541 # such as missing modules, are too common. They can be
3542 # seen by running with perltidy -w
3543 $logger_object->complain("A syntax check using perl $flags\n");
3544 $logger_object->complain(
3545 "for the output in file $istream_filename gives:\n");
3546 $logger_object->complain($line_of_dashes);
3547 $logger_object->complain("$perl_output\n");
3548 $logger_object->complain($line_of_dashes);
3549 $infile_syntax_ok = -1;
3550 $logger_object->write_logfile_entry($line_of_dashes);
3551 $logger_object->write_logfile_entry(
3552 "The output file will not be checked because of input file problems\n"
3555 return $infile_syntax_ok;
3558 sub do_syntax_check {
3559 my ( $stream, $flags, $error_redirection ) = @_;
3561 # We need a named input file for executing perl
3562 my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
3564 # TODO: Need to add name of file to log somewhere
3565 # otherwise Perl output is hard to read
3566 if ( !$stream_filename ) { return $stream_filename, "" }
3568 # We have to quote the filename in case it has unusual characters
3569 # or spaces. Example: this filename #CM11.pm# gives trouble.
3570 my $quoted_stream_filename = '"' . $stream_filename . '"';
3572 # Under VMS something like -T will become -t (and an error) so we
3573 # will put quotes around the flags. Double quotes seem to work on
3574 # Unix/Windows/VMS, but this may not work on all systems. (Single
3575 # quotes do not work under Windows). It could become necessary to
3576 # put double quotes around each flag, such as: -"c" -"T"
3577 # We may eventually need some system-dependent coding here.
3578 $flags = '"' . $flags . '"';
3580 # now wish for luck...
3581 my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
3583 unlink $stream_filename if ($is_tmpfile);
3584 return $stream_filename, $msg;
3587 #####################################################################
3589 # This is a stripped down version of IO::Scalar
3590 # Given a reference to a scalar, it supplies either:
3591 # a getline method which reads lines (mode='r'), or
3592 # a print method which reads lines (mode='w')
3594 #####################################################################
3595 package Perl::Tidy::IOScalar;
3599 my ( $package, $rscalar, $mode ) = @_;
3600 my $ref = ref $rscalar;
3601 if ( $ref ne 'SCALAR' ) {
3603 ------------------------------------------------------------------------
3604 expecting ref to SCALAR but got ref to ($ref); trace follows:
3605 ------------------------------------------------------------------------
3609 if ( $mode eq 'w' ) {
3611 return bless [ $rscalar, $mode ], $package;
3613 elsif ( $mode eq 'r' ) {
3615 # Convert a scalar to an array.
3616 # This avoids looking for "\n" on each call to getline
3618 # NOTES: The -1 count is needed to avoid loss of trailing blank lines
3619 # (which might be important in a DATA section).
3621 if ( $rscalar && ${$rscalar} ) {
3622 @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
3624 # remove possible extra blank line introduced with split
3625 if ( @array && $array[-1] eq "\n" ) { pop @array }
3628 return bless [ \@array, $mode, $i_next ], $package;
3632 ------------------------------------------------------------------------
3633 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3634 ------------------------------------------------------------------------
3641 my $mode = $self->[1];
3642 if ( $mode ne 'r' ) {
3644 ------------------------------------------------------------------------
3645 getline call requires mode = 'r' but mode = ($mode); trace follows:
3646 ------------------------------------------------------------------------
3649 my $i = $self->[2]++;
3650 return $self->[0]->[$i];
3655 my $mode = $self->[1];
3656 if ( $mode ne 'w' ) {
3658 ------------------------------------------------------------------------
3659 print call requires mode = 'w' but mode = ($mode); trace follows:
3660 ------------------------------------------------------------------------
3663 ${ $self->[0] } .= $_[0];
3665 sub close { return }
3667 #####################################################################
3669 # This is a stripped down version of IO::ScalarArray
3670 # Given a reference to an array, it supplies either:
3671 # a getline method which reads lines (mode='r'), or
3672 # a print method which reads lines (mode='w')
3674 # NOTE: this routine assumes that that there aren't any embedded
3675 # newlines within any of the array elements. There are no checks
3678 #####################################################################
3679 package Perl::Tidy::IOScalarArray;
3683 my ( $package, $rarray, $mode ) = @_;
3684 my $ref = ref $rarray;
3685 if ( $ref ne 'ARRAY' ) {
3687 ------------------------------------------------------------------------
3688 expecting ref to ARRAY but got ref to ($ref); trace follows:
3689 ------------------------------------------------------------------------
3693 if ( $mode eq 'w' ) {
3695 return bless [ $rarray, $mode ], $package;
3697 elsif ( $mode eq 'r' ) {
3699 return bless [ $rarray, $mode, $i_next ], $package;
3703 ------------------------------------------------------------------------
3704 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3705 ------------------------------------------------------------------------
3712 my $mode = $self->[1];
3713 if ( $mode ne 'r' ) {
3715 ------------------------------------------------------------------------
3716 getline requires mode = 'r' but mode = ($mode); trace follows:
3717 ------------------------------------------------------------------------
3720 my $i = $self->[2]++;
3721 return $self->[0]->[$i];
3726 my $mode = $self->[1];
3727 if ( $mode ne 'w' ) {
3729 ------------------------------------------------------------------------
3730 print requires mode = 'w' but mode = ($mode); trace follows:
3731 ------------------------------------------------------------------------
3734 push @{ $self->[0] }, $_[0];
3736 sub close { return }
3738 #####################################################################
3740 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3741 # which returns the next line to be parsed
3743 #####################################################################
3745 package Perl::Tidy::LineSource;
3749 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3751 my $input_line_ending;
3752 if ( $rOpts->{'preserve-line-endings'} ) {
3753 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3756 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3757 return undef unless $fh;
3759 # in order to check output syntax when standard output is used,
3760 # or when it is an object, we have to make a copy of the file
3761 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3764 # Turning off syntax check when input output is used.
3765 # The reason is that temporary files cause problems on
3767 $rOpts->{'check-syntax'} = 0;
3769 $$rpending_logfile_message .= <<EOM;
3770 Note: --syntax check will be skipped because standard input is used
3777 _filename => $input_file,
3778 _input_line_ending => $input_line_ending,
3779 _rinput_buffer => [],
3784 sub close_input_file {
3786 eval { $self->{_fh}->close() };
3792 my $fh = $self->{_fh};
3793 my $rinput_buffer = $self->{_rinput_buffer};
3795 if ( scalar(@$rinput_buffer) ) {
3796 $line = shift @$rinput_buffer;
3799 $line = $fh->getline();
3801 # patch to read raw mac files under unix, dos
3802 # see if the first line has embedded \r's
3803 if ( $line && !$self->{_started} ) {
3804 if ( $line =~ /[\015][^\015\012]/ ) {
3806 # found one -- break the line up and store in a buffer
3807 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3808 my $count = @$rinput_buffer;
3809 $line = shift @$rinput_buffer;
3811 $self->{_started}++;
3817 #####################################################################
3819 # the Perl::Tidy::LineSink class supplies a write_line method for
3820 # actual file writing
3822 #####################################################################
3824 package Perl::Tidy::LineSink;
3828 my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3829 $rpending_logfile_message, $binmode )
3834 my $output_file_open = 0;
3836 if ( $rOpts->{'format'} eq 'tidy' ) {
3837 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3838 unless ($fh) { die "Cannot write to output stream\n"; }
3839 $output_file_open = 1;
3841 if ( ref($fh) eq 'IO::File' ) {
3844 if ( $output_file eq '-' ) { binmode STDOUT }
3848 # in order to check output syntax when standard output is used,
3849 # or when it is an object, we have to make a copy of the file
3850 if ( $output_file eq '-' || ref $output_file ) {
3851 if ( $rOpts->{'check-syntax'} ) {
3853 # Turning off syntax check when standard output is used.
3854 # The reason is that temporary files cause problems on
3856 $rOpts->{'check-syntax'} = 0;
3857 $$rpending_logfile_message .= <<EOM;
3858 Note: --syntax check will be skipped because standard output is used
3867 _output_file => $output_file,
3868 _output_file_open => $output_file_open,
3870 _tee_file => $tee_file,
3871 _tee_file_opened => 0,
3872 _line_separator => $line_separator,
3873 _binmode => $binmode,
3880 my $fh = $self->{_fh};
3882 my $output_file_open = $self->{_output_file_open};
3884 $_[0] .= $self->{_line_separator};
3886 $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3888 if ( $self->{_tee_flag} ) {
3889 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3890 my $fh_tee = $self->{_fh_tee};
3891 print $fh_tee $_[0];
3897 $self->{_tee_flag} = 1;
3902 $self->{_tee_flag} = 0;
3905 sub really_open_tee_file {
3907 my $tee_file = $self->{_tee_file};
3909 $fh_tee = IO::File->new(">$tee_file")
3910 or die("couldn't open TEE file $tee_file: $!\n");
3911 binmode $fh_tee if $self->{_binmode};
3912 $self->{_tee_file_opened} = 1;
3913 $self->{_fh_tee} = $fh_tee;
3916 sub close_output_file {
3918 eval { $self->{_fh}->close() } if $self->{_output_file_open};
3919 $self->close_tee_file();
3922 sub close_tee_file {
3925 if ( $self->{_tee_file_opened} ) {
3926 eval { $self->{_fh_tee}->close() };
3927 $self->{_tee_file_opened} = 0;
3931 #####################################################################
3933 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3934 # useful for program development.
3936 # Only one such file is created regardless of the number of input
3937 # files processed. This allows the results of processing many files
3938 # to be summarized in a single file.
3940 #####################################################################
3942 package Perl::Tidy::Diagnostics;
3948 _write_diagnostics_count => 0,
3949 _last_diagnostic_file => "",
3955 sub set_input_file {
3957 $self->{_input_file} = $_[0];
3960 # This is a diagnostic routine which is useful for program development.
3961 # Output from debug messages go to a file named DIAGNOSTICS, where
3962 # they are labeled by file and line. This allows many files to be
3963 # scanned at once for some particular condition of interest.
3964 sub write_diagnostics {
3967 unless ( $self->{_write_diagnostics_count} ) {
3968 open DIAGNOSTICS, ">DIAGNOSTICS"
3969 or death("couldn't open DIAGNOSTICS: $!\n");
3972 my $last_diagnostic_file = $self->{_last_diagnostic_file};
3973 my $input_file = $self->{_input_file};
3974 if ( $last_diagnostic_file ne $input_file ) {
3975 print DIAGNOSTICS "\nFILE:$input_file\n";
3977 $self->{_last_diagnostic_file} = $input_file;
3978 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3979 print DIAGNOSTICS "$input_line_number:\t@_";
3980 $self->{_write_diagnostics_count}++;
3983 #####################################################################
3985 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
3987 #####################################################################
3989 package Perl::Tidy::Logger;
3994 my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3996 # remove any old error output file
3997 unless ( ref($warning_file) ) {
3998 if ( -e $warning_file ) { unlink($warning_file) }
4002 _log_file => $log_file,
4004 _fh_warnings => undef,
4005 _last_input_line_written => 0,
4006 _at_end_of_file => 0,
4008 _block_log_output => 0,
4009 _line_of_tokens => undef,
4010 _output_line_number => undef,
4011 _wrote_line_information_string => 0,
4012 _wrote_column_headings => 0,
4013 _warning_file => $warning_file,
4014 _warning_count => 0,
4015 _complaint_count => 0,
4016 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure
4017 _saw_brace_error => 0,
4018 _saw_extrude => $saw_extrude,
4019 _output_array => [],
4023 sub close_log_file {
4026 if ( $self->{_fh_warnings} ) {
4027 eval { $self->{_fh_warnings}->close() };
4028 $self->{_fh_warnings} = undef;
4032 sub get_warning_count {
4034 return $self->{_warning_count};
4037 sub get_use_prefix {
4039 return $self->{_use_prefix};
4042 sub block_log_output {
4044 $self->{_block_log_output} = 1;
4047 sub unblock_log_output {
4049 $self->{_block_log_output} = 0;
4052 sub interrupt_logfile {
4054 $self->{_use_prefix} = 0;
4055 $self->warning("\n");
4056 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
4059 sub resume_logfile {
4061 $self->write_logfile_entry( '#' x 60 . "\n" );
4062 $self->{_use_prefix} = 1;
4065 sub we_are_at_the_last_line {
4067 unless ( $self->{_wrote_line_information_string} ) {
4068 $self->write_logfile_entry("Last line\n\n");
4070 $self->{_at_end_of_file} = 1;
4073 # record some stuff in case we go down in flames
4076 my ( $line_of_tokens, $output_line_number ) = @_;
4077 my $input_line = $line_of_tokens->{_line_text};
4078 my $input_line_number = $line_of_tokens->{_line_number};
4080 # save line information in case we have to write a logfile message
4081 $self->{_line_of_tokens} = $line_of_tokens;
4082 $self->{_output_line_number} = $output_line_number;
4083 $self->{_wrote_line_information_string} = 0;
4085 my $last_input_line_written = $self->{_last_input_line_written};
4086 my $rOpts = $self->{_rOpts};
4089 ( $input_line_number - $last_input_line_written ) >=
4090 $rOpts->{'logfile-gap'}
4092 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
4095 my $rlevels = $line_of_tokens->{_rlevels};
4096 my $structural_indentation_level = $$rlevels[0];
4097 $self->{_last_input_line_written} = $input_line_number;
4098 ( my $out_str = $input_line ) =~ s/^\s*//;
4101 $out_str = ( '.' x $structural_indentation_level ) . $out_str;
4103 if ( length($out_str) > 35 ) {
4104 $out_str = substr( $out_str, 0, 35 ) . " ....";
4106 $self->logfile_output( "", "$out_str\n" );
4110 sub write_logfile_entry {
4113 # add leading >>> to avoid confusing error mesages and code
4114 $self->logfile_output( ">>>", "@_" );
4117 sub write_column_headings {
4120 $self->{_wrote_column_headings} = 1;
4121 my $routput_array = $self->{_output_array};
4122 push @{$routput_array}, <<EOM;
4123 The nesting depths in the table below are at the start of the lines.
4124 The indicated output line numbers are not always exact.
4125 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
4127 in:out indent c b nesting code + messages; (messages begin with >>>)
4128 lines levels i k (code begins with one '.' per indent level)
4129 ------ ----- - - -------- -------------------------------------------
4133 sub make_line_information_string {
4135 # make columns of information when a logfile message needs to go out
4137 my $line_of_tokens = $self->{_line_of_tokens};
4138 my $input_line_number = $line_of_tokens->{_line_number};
4139 my $line_information_string = "";
4140 if ($input_line_number) {
4142 my $output_line_number = $self->{_output_line_number};
4143 my $brace_depth = $line_of_tokens->{_curly_brace_depth};
4144 my $paren_depth = $line_of_tokens->{_paren_depth};
4145 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
4146 my $python_indentation_level =
4147 $line_of_tokens->{_python_indentation_level};
4148 my $rlevels = $line_of_tokens->{_rlevels};
4149 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
4150 my $rci_levels = $line_of_tokens->{_rci_levels};
4151 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
4153 my $structural_indentation_level = $$rlevels[0];
4155 $self->write_column_headings() unless $self->{_wrote_column_headings};
4157 # keep logfile columns aligned for scripts up to 999 lines;
4158 # for longer scripts it doesn't really matter
4159 my $extra_space = "";
4161 ( $input_line_number < 10 ) ? " "
4162 : ( $input_line_number < 100 ) ? " "
4165 ( $output_line_number < 10 ) ? " "
4166 : ( $output_line_number < 100 ) ? " "
4169 # there are 2 possible nesting strings:
4170 # the original which looks like this: (0 [1 {2
4171 # the new one, which looks like this: {{[
4172 # the new one is easier to read, and shows the order, but
4173 # could be arbitrarily long, so we use it unless it is too long
4174 my $nesting_string =
4175 "($paren_depth [$square_bracket_depth {$brace_depth";
4176 my $nesting_string_new = $$rnesting_tokens[0];
4178 my $ci_level = $$rci_levels[0];
4179 if ( $ci_level > 9 ) { $ci_level = '*' }
4180 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
4182 if ( length($nesting_string_new) <= 8 ) {
4184 $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
4186 if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
4187 $line_information_string =
4188 "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
4190 return $line_information_string;
4193 sub logfile_output {
4195 my ( $prompt, $msg ) = @_;
4196 return if ( $self->{_block_log_output} );
4198 my $routput_array = $self->{_output_array};
4199 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
4200 push @{$routput_array}, "$msg";
4203 my $line_information_string = $self->make_line_information_string();
4204 $self->{_wrote_line_information_string} = 1;
4206 if ($line_information_string) {
4207 push @{$routput_array}, "$line_information_string $prompt$msg";
4210 push @{$routput_array}, "$msg";
4215 sub get_saw_brace_error {
4217 return $self->{_saw_brace_error};
4220 sub increment_brace_error {
4222 $self->{_saw_brace_error}++;
4227 use constant BRACE_WARNING_LIMIT => 10;
4228 my $saw_brace_error = $self->{_saw_brace_error};
4230 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
4234 $self->{_saw_brace_error} = $saw_brace_error;
4236 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
4237 $self->warning("No further warnings of this type will be given\n");
4243 # handle non-critical warning messages based on input flag
4245 my $rOpts = $self->{_rOpts};
4247 # these appear in .ERR output only if -w flag is used
4248 if ( $rOpts->{'warning-output'} ) {
4252 # otherwise, they go to the .LOG file
4254 $self->{_complaint_count}++;
4255 $self->write_logfile_entry(@_);
4261 # report errors to .ERR file (or stdout)
4263 use constant WARNING_LIMIT => 50;
4265 my $rOpts = $self->{_rOpts};
4266 unless ( $rOpts->{'quiet'} ) {
4268 my $warning_count = $self->{_warning_count};
4269 unless ($warning_count) {
4270 my $warning_file = $self->{_warning_file};
4272 if ( $rOpts->{'standard-error-output'} ) {
4273 $fh_warnings = *STDERR;
4276 ( $fh_warnings, my $filename ) =
4277 Perl::Tidy::streamhandle( $warning_file, 'w' );
4278 $fh_warnings or die("couldn't open $filename $!\n");
4279 warn "## Please see file $filename\n" unless ref($warning_file);
4281 $self->{_fh_warnings} = $fh_warnings;
4284 my $fh_warnings = $self->{_fh_warnings};
4285 if ( $warning_count < WARNING_LIMIT ) {
4286 if ( $self->get_use_prefix() > 0 ) {
4287 my $input_line_number =
4288 Perl::Tidy::Tokenizer::get_input_line_number();
4289 $fh_warnings->print("$input_line_number:\t@_");
4290 $self->write_logfile_entry("WARNING: @_");
4293 $fh_warnings->print(@_);
4294 $self->write_logfile_entry(@_);
4298 $self->{_warning_count} = $warning_count;
4300 if ( $warning_count == WARNING_LIMIT ) {
4301 $fh_warnings->print("No further warnings will be given\n");
4306 # programming bug codes:
4308 # 0 = maybe, not sure.
4310 sub report_possible_bug {
4312 my $saw_code_bug = $self->{_saw_code_bug};
4313 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4316 sub report_definite_bug {
4318 $self->{_saw_code_bug} = 1;
4321 sub ask_user_for_bug_report {
4324 my ( $infile_syntax_ok, $formatter ) = @_;
4325 my $saw_code_bug = $self->{_saw_code_bug};
4326 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4327 $self->warning(<<EOM);
4329 You may have encountered a code bug in perltidy. If you think so, and
4330 the problem is not listed in the BUGS file at
4331 http://perltidy.sourceforge.net, please report it so that it can be
4332 corrected. Include the smallest possible script which has the problem,
4333 along with the .LOG file. See the manual pages for contact information.
4338 elsif ( $saw_code_bug == 1 ) {
4339 if ( $self->{_saw_extrude} ) {
4340 $self->warning(<<EOM);
4342 You may have encountered a bug in perltidy. However, since you are using the
4343 -extrude option, the problem may be with perl or one of its modules, which have
4344 occasional problems with this type of file. If you believe that the
4345 problem is with perltidy, and the problem is not listed in the BUGS file at
4346 http://perltidy.sourceforge.net, please report it so that it can be corrected.
4347 Include the smallest possible script which has the problem, along with the .LOG
4348 file. See the manual pages for contact information.
4353 $self->warning(<<EOM);
4355 Oops, you seem to have encountered a bug in perltidy. Please check the
4356 BUGS file at http://perltidy.sourceforge.net. If the problem is not
4357 listed there, please report it so that it can be corrected. Include the
4358 smallest possible script which produces this message, along with the
4359 .LOG file if appropriate. See the manual pages for contact information.
4360 Your efforts are appreciated.
4363 my $added_semicolon_count = 0;
4365 $added_semicolon_count =
4366 $formatter->get_added_semicolon_count();
4368 if ( $added_semicolon_count > 0 ) {
4369 $self->warning(<<EOM);
4371 The log file shows that perltidy added $added_semicolon_count semicolons.
4372 Please rerun with -nasc to see if that is the cause of the syntax error. Even
4373 if that is the problem, please report it so that it can be fixed.
4383 # called after all formatting to summarize errors
4385 my ( $infile_syntax_ok, $formatter ) = @_;
4387 my $rOpts = $self->{_rOpts};
4388 my $warning_count = $self->{_warning_count};
4389 my $saw_code_bug = $self->{_saw_code_bug};
4392 ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4393 || $saw_code_bug == 1
4394 || $rOpts->{'logfile'};
4395 my $log_file = $self->{_log_file};
4396 if ($warning_count) {
4397 if ($save_logfile) {
4398 $self->block_log_output(); # avoid echoing this to the logfile
4400 "The logfile $log_file may contain useful information\n");
4401 $self->unblock_log_output();
4404 if ( $self->{_complaint_count} > 0 ) {
4406 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4410 if ( $self->{_saw_brace_error}
4411 && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
4413 $self->warning("To save a full .LOG file rerun with -g\n");
4416 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4418 if ($save_logfile) {
4419 my $log_file = $self->{_log_file};
4420 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4422 my $routput_array = $self->{_output_array};
4423 foreach ( @{$routput_array} ) { $fh->print($_) }
4424 eval { $fh->close() };
4429 #####################################################################
4431 # The Perl::Tidy::DevNull class supplies a dummy print method
4433 #####################################################################
4435 package Perl::Tidy::DevNull;
4436 sub new { return bless {}, $_[0] }
4437 sub print { return }
4438 sub close { return }
4440 #####################################################################
4442 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4444 #####################################################################
4446 package Perl::Tidy::HtmlWriter;
4456 %short_to_long_names
4460 $missing_html_entities
4463 # replace unsafe characters with HTML entity representation if HTML::Entities
4465 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4469 my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4470 $html_src_extension )
4473 my $html_file_opened = 0;
4475 ( $html_fh, my $html_filename ) =
4476 Perl::Tidy::streamhandle( $html_file, 'w' );
4478 warn("can't open $html_file: $!\n");
4481 $html_file_opened = 1;
4483 if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4484 $input_file = "NONAME";
4487 # write the table of contents to a string
4489 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4492 my @pre_string_stack;
4493 if ( $rOpts->{'html-pre-only'} ) {
4495 # pre section goes directly to the output stream
4496 $html_pre_fh = $html_fh;
4497 $html_pre_fh->print( <<"PRE_END");
4503 # pre section go out to a temporary string
4505 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4506 push @pre_string_stack, \$pre_string;
4509 # pod text gets diverted if the 'pod2html' is used
4512 if ( $rOpts->{'pod2html'} ) {
4513 if ( $rOpts->{'html-pre-only'} ) {
4514 undef $rOpts->{'pod2html'};
4517 eval "use Pod::Html";
4520 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4521 undef $rOpts->{'pod2html'};
4524 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4531 if ( $rOpts->{'frames'} ) {
4532 unless ($extension) {
4534 "cannot use frames without a specified output extension; ignoring -frm\n";
4535 undef $rOpts->{'frames'};
4538 $toc_filename = $input_file . $html_toc_extension . $extension;
4539 $src_filename = $input_file . $html_src_extension . $extension;
4543 # ----------------------------------------------------------
4544 # Output is now directed as follows:
4545 # html_toc_fh <-- table of contents items
4546 # html_pre_fh <-- the <pre> section of formatted code, except:
4547 # html_pod_fh <-- pod goes here with the pod2html option
4548 # ----------------------------------------------------------
4550 my $title = $rOpts->{'title'};
4552 ( $title, my $path ) = fileparse($input_file);
4554 my $toc_item_count = 0;
4555 my $in_toc_package = "";
4558 _input_file => $input_file, # name of input file
4559 _title => $title, # title, unescaped
4560 _html_file => $html_file, # name of .html output file
4561 _toc_filename => $toc_filename, # for frames option
4562 _src_filename => $src_filename, # for frames option
4563 _html_file_opened => $html_file_opened, # a flag
4564 _html_fh => $html_fh, # the output stream
4565 _html_pre_fh => $html_pre_fh, # pre section goes here
4566 _rpre_string_stack => \@pre_string_stack, # stack of pre sections
4567 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html
4568 _rpod_string => \$pod_string, # string holding pod
4569 _pod_cut_count => 0, # how many =cut's?
4570 _html_toc_fh => $html_toc_fh, # fh for table of contents
4571 _rtoc_string => \$toc_string, # string holding toc
4572 _rtoc_item_count => \$toc_item_count, # how many toc items
4573 _rin_toc_package => \$in_toc_package, # package name
4574 _rtoc_name_count => {}, # hash to track unique names
4575 _rpackage_stack => [], # stack to check for package
4577 _rlast_level => \$last_level, # brace indentation level
4583 # Add an item to the html table of contents.
4584 # This is called even if no table of contents is written,
4585 # because we still want to put the anchors in the <pre> text.
4586 # We are given an anchor name and its type; types are:
4587 # 'package', 'sub', '__END__', '__DATA__', 'EOF'
4588 # There must be an 'EOF' call at the end to wrap things up.
4590 my ( $name, $type ) = @_;
4591 my $html_toc_fh = $self->{_html_toc_fh};
4592 my $html_pre_fh = $self->{_html_pre_fh};
4593 my $rtoc_name_count = $self->{_rtoc_name_count};
4594 my $rtoc_item_count = $self->{_rtoc_item_count};
4595 my $rlast_level = $self->{_rlast_level};
4596 my $rin_toc_package = $self->{_rin_toc_package};
4597 my $rpackage_stack = $self->{_rpackage_stack};
4599 # packages contain sublists of subs, so to avoid errors all package
4600 # items are written and finished with the following routines
4601 my $end_package_list = sub {
4602 if ($$rin_toc_package) {
4603 $html_toc_fh->print("</ul>\n</li>\n");
4604 $$rin_toc_package = "";
4608 my $start_package_list = sub {
4609 my ( $unique_name, $package ) = @_;
4610 if ($$rin_toc_package) { $end_package_list->() }
4611 $html_toc_fh->print(<<EOM);
4612 <li><a href=\"#$unique_name\">package $package</a>
4615 $$rin_toc_package = $package;
4618 # start the table of contents on the first item
4619 unless ($$rtoc_item_count) {
4621 # but just quit if we hit EOF without any other entries
4622 # in this case, there will be no toc
4623 return if ( $type eq 'EOF' );
4624 $html_toc_fh->print( <<"TOC_END");
4625 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4629 $$rtoc_item_count++;
4631 # make a unique anchor name for this location:
4632 # - packages get a 'package-' prefix
4633 # - subs use their names
4634 my $unique_name = $name;
4635 if ( $type eq 'package' ) { $unique_name = "package-$name" }
4637 # append '-1', '-2', etc if necessary to make unique; this will
4638 # be unique because subs and packages cannot have a '-'
4639 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4640 $unique_name .= "-$count";
4643 # - all names get terminal '-' if pod2html is used, to avoid
4644 # conflicts with anchor names created by pod2html
4645 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4647 # start/stop lists of subs
4648 if ( $type eq 'sub' ) {
4649 my $package = $rpackage_stack->[$$rlast_level];
4650 unless ($package) { $package = 'main' }
4652 # if we're already in a package/sub list, be sure its the right
4653 # package or else close it
4654 if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4655 $end_package_list->();
4658 # start a package/sub list if necessary
4659 unless ($$rin_toc_package) {
4660 $start_package_list->( $unique_name, $package );
4664 # now write an entry in the toc for this item
4665 if ( $type eq 'package' ) {
4666 $start_package_list->( $unique_name, $name );
4668 elsif ( $type eq 'sub' ) {
4669 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4672 $end_package_list->();
4673 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4676 # write the anchor in the <pre> section
4677 $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4679 # end the table of contents, if any, on the end of file
4680 if ( $type eq 'EOF' ) {
4681 $html_toc_fh->print( <<"TOC_END");
4683 <!-- END CODE INDEX -->
4690 # This is the official list of tokens which may be identified by the
4691 # user. Long names are used as getopt keys. Short names are
4692 # convenient short abbreviations for specifying input. Short names
4693 # somewhat resemble token type characters, but are often different
4694 # because they may only be alphanumeric, to allow command line
4695 # input. Also, note that because of case insensitivity of html,
4696 # this table must be in a single case only (I've chosen to use all
4698 # When adding NEW_TOKENS: update this hash table
4699 # short names => long names
4700 %short_to_long_names = (
4710 'pu' => 'punctuation',
4711 'i' => 'identifier',
4713 'h' => 'here-doc-target',
4714 'hh' => 'here-doc-text',
4716 'sc' => 'semicolon',
4717 'm' => 'subroutine',
4721 # Now we have to map actual token types into one of the above short
4722 # names; any token types not mapped will get 'punctuation'
4725 # The values of this hash table correspond to the keys of the
4726 # previous hash table.
4727 # The keys of this hash table are token types and can be seen
4728 # by running with --dump-token-types (-dtt).
4730 # When adding NEW_TOKENS: update this hash table
4731 # $type => $short_name
4732 %token_short_names = (
4757 # These token types will all be called identifiers for now
4758 # FIXME: need to separate user defined modules as separate type
4759 my @identifier = qw" i t U C Y Z G :: ";
4760 @token_short_names{@identifier} = ('i') x scalar(@identifier);
4762 # These token types will be called 'structure'
4763 my @structure = qw" { } ";
4764 @token_short_names{@structure} = ('s') x scalar(@structure);
4766 # OLD NOTES: save for reference
4767 # Any of these could be added later if it would be useful.
4768 # For now, they will by default become punctuation
4769 # my @list = qw" L R [ ] ";
4770 # @token_long_names{@list} = ('non-structure') x scalar(@list);
4773 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4775 # @token_long_names{@list} = ('math') x scalar(@list);
4777 # my @list = qw" & &= ~ ~= ^ ^= | |= ";
4778 # @token_long_names{@list} = ('bit') x scalar(@list);
4780 # my @list = qw" == != < > <= <=> ";
4781 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4783 # my @list = qw" && || ! &&= ||= //= ";
4784 # @token_long_names{@list} = ('logical') x scalar(@list);
4786 # my @list = qw" . .= =~ !~ x x= ";
4787 # @token_long_names{@list} = ('string-operators') x scalar(@list);
4790 # my @list = qw" .. -> <> ... \ ? ";
4791 # @token_long_names{@list} = ('misc-operators') x scalar(@list);
4795 sub make_getopt_long_names {
4797 my ($rgetopt_names) = @_;
4798 while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4799 push @$rgetopt_names, "html-color-$name=s";
4800 push @$rgetopt_names, "html-italic-$name!";
4801 push @$rgetopt_names, "html-bold-$name!";
4803 push @$rgetopt_names, "html-color-background=s";
4804 push @$rgetopt_names, "html-linked-style-sheet=s";
4805 push @$rgetopt_names, "nohtml-style-sheets";
4806 push @$rgetopt_names, "html-pre-only";
4807 push @$rgetopt_names, "html-line-numbers";
4808 push @$rgetopt_names, "html-entities!";
4809 push @$rgetopt_names, "stylesheet";
4810 push @$rgetopt_names, "html-table-of-contents!";
4811 push @$rgetopt_names, "pod2html!";
4812 push @$rgetopt_names, "frames!";
4813 push @$rgetopt_names, "html-toc-extension=s";
4814 push @$rgetopt_names, "html-src-extension=s";
4816 # Pod::Html parameters:
4817 push @$rgetopt_names, "backlink=s";
4818 push @$rgetopt_names, "cachedir=s";
4819 push @$rgetopt_names, "htmlroot=s";
4820 push @$rgetopt_names, "libpods=s";
4821 push @$rgetopt_names, "podpath=s";
4822 push @$rgetopt_names, "podroot=s";
4823 push @$rgetopt_names, "title=s";
4825 # Pod::Html parameters with leading 'pod' which will be removed
4826 # before the call to Pod::Html
4827 push @$rgetopt_names, "podquiet!";
4828 push @$rgetopt_names, "podverbose!";
4829 push @$rgetopt_names, "podrecurse!";
4830 push @$rgetopt_names, "podflush";
4831 push @$rgetopt_names, "podheader!";
4832 push @$rgetopt_names, "podindex!";
4835 sub make_abbreviated_names {
4837 # We're appending things like this to the expansion list:
4838 # 'hcc' => [qw(html-color-comment)],
4839 # 'hck' => [qw(html-color-keyword)],
4842 my ($rexpansion) = @_;
4844 # abbreviations for color/bold/italic properties
4845 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4846 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
4847 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
4848 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
4849 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4850 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4853 # abbreviations for all other html options
4854 ${$rexpansion}{"hcbg"} = ["html-color-background"];
4855 ${$rexpansion}{"pre"} = ["html-pre-only"];
4856 ${$rexpansion}{"toc"} = ["html-table-of-contents"];
4857 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
4858 ${$rexpansion}{"nnn"} = ["html-line-numbers"];
4859 ${$rexpansion}{"hent"} = ["html-entities"];
4860 ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4861 ${$rexpansion}{"css"} = ["html-linked-style-sheet"];
4862 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
4863 ${$rexpansion}{"ss"} = ["stylesheet"];
4864 ${$rexpansion}{"pod"} = ["pod2html"];
4865 ${$rexpansion}{"npod"} = ["nopod2html"];
4866 ${$rexpansion}{"frm"} = ["frames"];
4867 ${$rexpansion}{"nfrm"} = ["noframes"];
4868 ${$rexpansion}{"text"} = ["html-toc-extension"];
4869 ${$rexpansion}{"sext"} = ["html-src-extension"];
4874 # This will be called once after options have been parsed
4878 # X11 color names for default settings that seemed to look ok
4879 # (these color names are only used for programming clarity; the hex
4880 # numbers are actually written)
4881 use constant ForestGreen => "#228B22";
4882 use constant SaddleBrown => "#8B4513";
4883 use constant magenta4 => "#8B008B";
4884 use constant IndianRed3 => "#CD5555";
4885 use constant DeepSkyBlue4 => "#00688B";
4886 use constant MediumOrchid3 => "#B452CD";
4887 use constant black => "#000000";
4888 use constant white => "#FFFFFF";
4889 use constant red => "#FF0000";
4891 # set default color, bold, italic properties
4892 # anything not listed here will be given the default (punctuation) color --
4893 # these types currently not listed and get default: ws pu s sc cm co p
4894 # When adding NEW_TOKENS: add an entry here if you don't want defaults
4896 # set_default_properties( $short_name, default_color, bold?, italic? );
4897 set_default_properties( 'c', ForestGreen, 0, 0 );
4898 set_default_properties( 'pd', ForestGreen, 0, 1 );
4899 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown
4900 set_default_properties( 'q', IndianRed3, 0, 0 );
4901 set_default_properties( 'hh', IndianRed3, 0, 1 );
4902 set_default_properties( 'h', IndianRed3, 1, 0 );
4903 set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
4904 set_default_properties( 'w', black, 0, 0 );
4905 set_default_properties( 'n', MediumOrchid3, 0, 0 );
4906 set_default_properties( 'v', MediumOrchid3, 0, 0 );
4907 set_default_properties( 'j', IndianRed3, 1, 0 );
4908 set_default_properties( 'm', red, 1, 0 );
4910 set_default_color( 'html-color-background', white );
4911 set_default_color( 'html-color-punctuation', black );
4913 # setup property lookup tables for tokens based on their short names
4914 # every token type has a short name, and will use these tables
4915 # to do the html markup
4916 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4917 $html_color{$short_name} = $rOpts->{"html-color-$long_name"};
4918 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
4919 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4922 # write style sheet to STDOUT and die if requested
4923 if ( defined( $rOpts->{'stylesheet'} ) ) {
4924 write_style_sheet_file('-');
4928 # make sure user gives a file name after -css
4929 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4930 $css_linkname = $rOpts->{'html-linked-style-sheet'};
4931 if ( $css_linkname =~ /^-/ ) {
4932 die "You must specify a valid filename after -css\n";
4936 # check for conflict
4937 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4938 $rOpts->{'nohtml-style-sheets'} = 0;
4939 warning("You can't specify both -css and -nss; -nss ignored\n");
4942 # write a style sheet file if necessary
4943 if ($css_linkname) {
4945 # if the selected filename exists, don't write, because user may
4946 # have done some work by hand to create it; use backup name instead
4947 # Also, this will avoid a potential disaster in which the user
4948 # forgets to specify the style sheet, like this:
4949 # perltidy -html -css myfile1.pl myfile2.pl
4950 # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4951 my $css_filename = $css_linkname;
4952 unless ( -e $css_filename ) {
4953 write_style_sheet_file($css_filename);
4956 $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4959 sub write_style_sheet_file {
4961 my $css_filename = shift;
4963 unless ( $fh = IO::File->new("> $css_filename") ) {
4964 die "can't open $css_filename: $!\n";
4966 write_style_sheet_data($fh);
4967 eval { $fh->close };
4970 sub write_style_sheet_data {
4972 # write the style sheet data to an open file handle
4975 my $bg_color = $rOpts->{'html-color-background'};
4976 my $text_color = $rOpts->{'html-color-punctuation'};
4978 # pre-bgcolor is new, and may not be defined
4979 my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4980 $pre_bg_color = $bg_color unless $pre_bg_color;
4982 $fh->print(<<"EOM");
4983 /* default style sheet generated by perltidy */
4984 body {background: $bg_color; color: $text_color}
4985 pre { color: $text_color;
4986 background: $pre_bg_color;
4987 font-family: courier;
4992 foreach my $short_name ( sort keys %short_to_long_names ) {
4993 my $long_name = $short_to_long_names{$short_name};
4995 my $abbrev = '.' . $short_name;
4996 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment
4997 my $color = $html_color{$short_name};
4998 if ( !defined($color) ) { $color = $text_color }
4999 $fh->print("$abbrev \{ color: $color;");
5001 if ( $html_bold{$short_name} ) {
5002 $fh->print(" font-weight:bold;");
5005 if ( $html_italic{$short_name} ) {
5006 $fh->print(" font-style:italic;");
5008 $fh->print("} /* $long_name */\n");
5012 sub set_default_color {
5014 # make sure that options hash $rOpts->{$key} contains a valid color
5015 my ( $key, $color ) = @_;
5016 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
5017 $rOpts->{$key} = check_RGB($color);
5022 # if color is a 6 digit hex RGB value, prepend a #, otherwise
5023 # assume that it is a valid ascii color name
5025 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
5029 sub set_default_properties {
5030 my ( $short_name, $color, $bold, $italic ) = @_;
5032 set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
5034 $key = "html-bold-$short_to_long_names{$short_name}";
5035 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
5036 $key = "html-italic-$short_to_long_names{$short_name}";
5037 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
5042 # Use Pod::Html to process the pod and make the page
5043 # then merge the perltidy code sections into it.
5044 # return 1 if success, 0 otherwise
5046 my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
5047 my $input_file = $self->{_input_file};
5048 my $title = $self->{_title};
5049 my $success_flag = 0;
5051 # don't try to use pod2html if no pod
5052 unless ($pod_string) {
5053 return $success_flag;
5056 # Pod::Html requires a real temporary filename
5057 # If we are making a frame, we have a name available
5058 # Otherwise, we have to fine one
5060 if ( $rOpts->{'frames'} ) {
5061 $tmpfile = $self->{_toc_filename};
5064 $tmpfile = Perl::Tidy::make_temporary_filename();
5066 my $fh_tmp = IO::File->new( $tmpfile, 'w' );
5068 warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
5069 return $success_flag;
5072 #------------------------------------------------------------------
5073 # Warning: a temporary file is open; we have to clean up if
5074 # things go bad. From here on all returns should be by going to
5075 # RETURN so that the temporary file gets unlinked.
5076 #------------------------------------------------------------------
5078 # write the pod text to the temporary file
5079 $fh_tmp->print($pod_string);
5082 # Hand off the pod to pod2html.
5083 # Note that we can use the same temporary filename for input and output
5084 # because of the way pod2html works.
5088 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
5091 # Flags with string args:
5092 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
5093 # "podpath=s", "podroot=s"
5094 # Note: -css=s is handled by perltidy itself
5095 foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
5096 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
5099 # Toggle switches; these have extra leading 'pod'
5100 # "header!", "index!", "recurse!", "quiet!", "verbose!"
5101 foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
5102 my $kwd = $kw; # allows us to strip 'pod'
5103 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
5104 elsif ( defined( $rOpts->{$kw} ) ) {
5106 push @args, "--no$kwd";
5112 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
5114 # Must clean up if pod2html dies (it can);
5115 # Be careful not to overwrite callers __DIE__ routine
5116 local $SIG{__DIE__} = sub {
5118 unlink $tmpfile if -e $tmpfile;
5124 $fh_tmp = IO::File->new( $tmpfile, 'r' );
5127 # this error shouldn't happen ... we just used this filename
5128 warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
5132 my $html_fh = $self->{_html_fh};
5137 # This routine will write the html selectively and store the toc
5138 my $html_print = sub {
5140 $html_fh->print($_) unless ($no_print);
5141 if ($in_toc) { push @toc, $_ }
5145 # loop over lines of html output from pod2html and merge in
5146 # the necessary perltidy html sections
5147 my ( $saw_body, $saw_index, $saw_body_end );
5148 while ( my $line = $fh_tmp->getline() ) {
5150 if ( $line =~ /^\s*<html>\s*$/i ) {
5151 my $date = localtime;
5152 $html_print->("<!-- Generated by perltidy on $date -->\n");
5153 $html_print->($line);
5156 # Copy the perltidy css, if any, after <body> tag
5157 elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
5159 $html_print->($css_string) if $css_string;
5160 $html_print->($line);
5162 # add a top anchor and heading
5163 $html_print->("<a name=\"-top-\"></a>\n");
5164 $title = escape_html($title);
5165 $html_print->("<h1>$title</h1>\n");
5167 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
5170 # when frames are used, an extra table of contents in the
5171 # contents panel is confusing, so don't print it
5172 $no_print = $rOpts->{'frames'}
5173 || !$rOpts->{'html-table-of-contents'};
5174 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5175 $html_print->($line);
5178 # Copy the perltidy toc, if any, after the Pod::Html toc
5179 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
5181 $html_print->($line);
5183 $html_print->("<hr />\n") if $rOpts->{'frames'};
5184 $html_print->("<h2>Code Index:</h2>\n");
5185 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5186 $html_print->(@toc);
5192 # Copy one perltidy section after each marker
5193 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
5195 $html_print->($1) if $1;
5197 # Intermingle code and pod sections if we saw multiple =cut's.
5198 if ( $self->{_pod_cut_count} > 1 ) {
5199 my $rpre_string = shift(@$rpre_string_stack);
5200 if ($$rpre_string) {
5201 $html_print->('<pre>');
5202 $html_print->($$rpre_string);
5203 $html_print->('</pre>');
5207 # shouldn't happen: we stored a string before writing
5210 "Problem merging html stream with pod2html; order may be wrong\n";
5212 $html_print->($line);
5215 # If didn't see multiple =cut lines, we'll put the pod out first
5216 # and then the code, because it's less confusing.
5219 # since we are not intermixing code and pod, we don't need
5220 # or want any <hr> lines which separated pod and code
5221 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
5225 # Copy any remaining code section before the </body> tag
5226 elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
5228 if (@$rpre_string_stack) {
5229 unless ( $self->{_pod_cut_count} > 1 ) {
5230 $html_print->('<hr />');
5232 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
5233 $html_print->('<pre>');
5234 $html_print->($$rpre_string);
5235 $html_print->('</pre>');
5238 $html_print->($line);
5241 $html_print->($line);
5246 unless ($saw_body) {
5247 warn "Did not see <body> in pod2html output\n";
5250 unless ($saw_body_end) {
5251 warn "Did not see </body> in pod2html output\n";
5254 unless ($saw_index) {
5255 warn "Did not find INDEX END in pod2html output\n";
5260 eval { $html_fh->close() };
5262 # note that we have to unlink tmpfile before making frames
5263 # because the tmpfile may be one of the names used for frames
5264 unlink $tmpfile if -e $tmpfile;
5265 if ( $success_flag && $rOpts->{'frames'} ) {
5266 $self->make_frame( \@toc );
5268 return $success_flag;
5273 # Make a frame with table of contents in the left panel
5274 # and the text in the right panel.
5276 # $html_filename contains the no-frames html output
5277 # $rtoc is a reference to an array with the table of contents
5280 my $input_file = $self->{_input_file};
5281 my $html_filename = $self->{_html_file};
5282 my $toc_filename = $self->{_toc_filename};
5283 my $src_filename = $self->{_src_filename};
5284 my $title = $self->{_title};
5285 $title = escape_html($title);
5287 # FUTURE input parameter:
5288 my $top_basename = "";
5290 # We need to produce 3 html files:
5291 # 1. - the table of contents
5292 # 2. - the contents (source code) itself
5293 # 3. - the frame which contains them
5295 # get basenames for relative links
5296 my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5297 my ( $src_basename, $src_path ) = fileparse($src_filename);
5299 # 1. Make the table of contents panel, with appropriate changes
5300 # to the anchor names
5301 my $src_frame_name = 'SRC';
5303 write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5306 # 2. The current .html filename is renamed to be the contents panel
5307 rename( $html_filename, $src_filename )
5308 or die "Cannot rename $html_filename to $src_filename:$!\n";
5310 # 3. Then use the original html filename for the frame
5312 $title, $html_filename, $top_basename,
5313 $toc_basename, $src_basename, $src_frame_name
5317 sub write_toc_html {
5319 # write a separate html table of contents file for frames
5320 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5321 my $fh = IO::File->new( $toc_filename, 'w' )
5322 or die "Cannot open $toc_filename:$!\n";
5326 <title>$title</title>
5329 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5333 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5334 $fh->print( join "", @$rtoc );
5343 sub write_frame_html {
5345 # write an html file to be the table of contents frame
5347 $title, $frame_filename, $top_basename,
5348 $toc_basename, $src_basename, $src_frame_name
5351 my $fh = IO::File->new( $frame_filename, 'w' )
5352 or die "Cannot open $toc_basename:$!\n";
5355 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5356 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5357 <?xml version="1.0" encoding="iso-8859-1" ?>
5358 <html xmlns="http://www.w3.org/1999/xhtml">
5360 <title>$title</title>
5364 # two left panels, one right, if master index file
5365 if ($top_basename) {
5367 <frameset cols="20%,80%">
5368 <frameset rows="30%,70%">
5369 <frame src = "$top_basename" />
5370 <frame src = "$toc_basename" />
5375 # one left panels, one right, if no master index file
5378 <frameset cols="20%,*">
5379 <frame src = "$toc_basename" />
5383 <frame src = "$src_basename" name = "$src_frame_name" />
5386 <p>If you see this message, you are using a non-frame-capable web client.</p>
5387 <p>This document contains:</p>
5389 <li><a href="$toc_basename">A table of contents</a></li>
5390 <li><a href="$src_basename">The source code</a></li>
5399 sub change_anchor_names {
5401 # add a filename and target to anchors
5402 # also return the first anchor
5403 my ( $rlines, $filename, $target ) = @_;
5405 foreach my $line (@$rlines) {
5407 # We're looking for lines like this:
5408 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5409 # ---- - -------- -----------------
5411 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5415 my $href = "$filename#$name";
5416 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5417 unless ($first_anchor) { $first_anchor = $href }
5420 return $first_anchor;
5423 sub close_html_file {
5425 return unless $self->{_html_file_opened};
5427 my $html_fh = $self->{_html_fh};
5428 my $rtoc_string = $self->{_rtoc_string};
5430 # There are 3 basic paths to html output...
5432 # ---------------------------------
5433 # Path 1: finish up if in -pre mode
5434 # ---------------------------------
5435 if ( $rOpts->{'html-pre-only'} ) {
5436 $html_fh->print( <<"PRE_END");
5439 eval { $html_fh->close() };
5444 $self->add_toc_item( 'EOF', 'EOF' );
5446 my $rpre_string_stack = $self->{_rpre_string_stack};
5448 # Patch to darken the <pre> background color in case of pod2html and
5449 # interleaved code/documentation. Otherwise, the distinction
5450 # between code and documentation is blurred.
5451 if ( $rOpts->{pod2html}
5452 && $self->{_pod_cut_count} >= 1
5453 && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5455 $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5458 # put the css or its link into a string, if used
5460 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5462 # use css linked to another file
5463 if ( $rOpts->{'html-linked-style-sheet'} ) {
5465 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5469 # use css embedded in this file
5470 elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5471 $fh_css->print( <<'ENDCSS');
5472 <style type="text/css">
5475 write_style_sheet_data($fh_css);
5476 $fh_css->print( <<"ENDCSS");
5482 # -----------------------------------------------------------
5483 # path 2: use pod2html if requested
5484 # If we fail for some reason, continue on to path 3
5485 # -----------------------------------------------------------
5486 if ( $rOpts->{'pod2html'} ) {
5487 my $rpod_string = $self->{_rpod_string};
5488 $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5489 $rpre_string_stack )
5493 # --------------------------------------------------
5494 # path 3: write code in html, with pod only in italics
5495 # --------------------------------------------------
5496 my $input_file = $self->{_input_file};
5497 my $title = escape_html($input_file);
5498 my $date = localtime;
5499 $html_fh->print( <<"HTML_START");
5500 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5501 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5502 <!-- Generated by perltidy on $date -->
5503 <html xmlns="http://www.w3.org/1999/xhtml">
5505 <title>$title</title>
5508 # output the css, if used
5510 $html_fh->print($css_string);
5511 $html_fh->print( <<"ENDCSS");
5518 $html_fh->print( <<"HTML_START");
5520 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5524 $html_fh->print("<a name=\"-top-\"></a>\n");
5525 $html_fh->print( <<"EOM");
5529 # copy the table of contents
5531 && !$rOpts->{'frames'}
5532 && $rOpts->{'html-table-of-contents'} )
5534 $html_fh->print($$rtoc_string);
5537 # copy the pre section(s)
5538 my $fname_comment = $input_file;
5539 $fname_comment =~ s/--+/-/g; # protect HTML comment tags
5540 $html_fh->print( <<"END_PRE");
5542 <!-- contents of filename: $fname_comment -->
5546 foreach my $rpre_string (@$rpre_string_stack) {
5547 $html_fh->print($$rpre_string);
5550 # and finish the html page
5551 $html_fh->print( <<"HTML_END");
5556 eval { $html_fh->close() }; # could be object without close method
5558 if ( $rOpts->{'frames'} ) {
5559 my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5560 $self->make_frame( \@toc );
5566 my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5567 my ( @colored_tokens, $j, $string, $type, $token, $level );
5568 my $rlast_level = $self->{_rlast_level};
5569 my $rpackage_stack = $self->{_rpackage_stack};
5571 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5572 $type = $$rtoken_type[$j];
5573 $token = $$rtokens[$j];
5574 $level = $$rlevels[$j];
5575 $level = 0 if ( $level < 0 );
5577 #-------------------------------------------------------
5578 # Update the package stack. The package stack is needed to keep
5579 # the toc correct because some packages may be declared within
5580 # blocks and go out of scope when we leave the block.
5581 #-------------------------------------------------------
5582 if ( $level > $$rlast_level ) {
5583 unless ( $rpackage_stack->[ $level - 1 ] ) {
5584 $rpackage_stack->[ $level - 1 ] = 'main';
5586 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5588 elsif ( $level < $$rlast_level ) {
5589 my $package = $rpackage_stack->[$level];
5590 unless ($package) { $package = 'main' }
5592 # if we change packages due to a nesting change, we
5593 # have to make an entry in the toc
5594 if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5595 $self->add_toc_item( $package, 'package' );
5598 $$rlast_level = $level;
5600 #-------------------------------------------------------
5601 # Intercept a sub name here; split it
5602 # into keyword 'sub' and sub name; and add an
5604 #-------------------------------------------------------
5605 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5606 $token = $self->markup_html_element( $1, 'k' );
5607 push @colored_tokens, $token;
5611 # but don't include sub declarations in the toc;
5612 # these wlll have leading token types 'i;'
5613 my $signature = join "", @$rtoken_type;
5614 unless ( $signature =~ /^i;/ ) {
5615 my $subname = $token;
5616 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5617 $self->add_toc_item( $subname, 'sub' );
5621 #-------------------------------------------------------
5622 # Intercept a package name here; split it
5623 # into keyword 'package' and name; add to the toc,
5624 # and update the package stack
5625 #-------------------------------------------------------
5626 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5627 $token = $self->markup_html_element( $1, 'k' );
5628 push @colored_tokens, $token;
5631 $self->add_toc_item( "$token", 'package' );
5632 $rpackage_stack->[$level] = $token;
5635 $token = $self->markup_html_element( $token, $type );
5636 push @colored_tokens, $token;
5638 return ( \@colored_tokens );
5641 sub markup_html_element {
5643 my ( $token, $type ) = @_;
5645 return $token if ( $type eq 'b' ); # skip a blank token
5646 return $token if ( $token =~ /^\s*$/ ); # skip a blank line
5647 $token = escape_html($token);
5649 # get the short abbreviation for this token type
5650 my $short_name = $token_short_names{$type};
5651 if ( !defined($short_name) ) {
5652 $short_name = "pu"; # punctuation is default
5655 # handle style sheets..
5656 if ( !$rOpts->{'nohtml-style-sheets'} ) {
5657 if ( $short_name ne 'pu' ) {
5658 $token = qq(<span class="$short_name">) . $token . "</span>";
5662 # handle no style sheets..
5664 my $color = $html_color{$short_name};
5666 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5667 $token = qq(<font color="$color">) . $token . "</font>";
5669 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5670 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
5678 if ($missing_html_entities) {
5679 $token =~ s/\&/&/g;
5680 $token =~ s/\</</g;
5681 $token =~ s/\>/>/g;
5682 $token =~ s/\"/"/g;
5685 HTML::Entities::encode_entities($token);
5690 sub finish_formatting {
5692 # called after last line
5694 $self->close_html_file();
5701 return unless $self->{_html_file_opened};
5702 my $html_pre_fh = $self->{_html_pre_fh};
5703 my ($line_of_tokens) = @_;
5704 my $line_type = $line_of_tokens->{_line_type};
5705 my $input_line = $line_of_tokens->{_line_text};
5706 my $line_number = $line_of_tokens->{_line_number};
5709 # markup line of code..
5711 if ( $line_type eq 'CODE' ) {
5712 my $rtoken_type = $line_of_tokens->{_rtoken_type};
5713 my $rtokens = $line_of_tokens->{_rtokens};
5714 my $rlevels = $line_of_tokens->{_rlevels};
5716 if ( $input_line =~ /(^\s*)/ ) {
5722 my ($rcolored_tokens) =
5723 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5724 $html_line .= join '', @$rcolored_tokens;
5727 # markup line of non-code..
5730 if ( $line_type eq 'HERE' ) { $line_character = 'H' }
5731 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
5732 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
5733 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5734 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
5735 elsif ( $line_type eq 'END_START' ) {
5736 $line_character = 'k';
5737 $self->add_toc_item( '__END__', '__END__' );
5739 elsif ( $line_type eq 'DATA_START' ) {
5740 $line_character = 'k';
5741 $self->add_toc_item( '__DATA__', '__DATA__' );
5743 elsif ( $line_type =~ /^POD/ ) {
5744 $line_character = 'P';
5745 if ( $rOpts->{'pod2html'} ) {
5746 my $html_pod_fh = $self->{_html_pod_fh};
5747 if ( $line_type eq 'POD_START' ) {
5749 my $rpre_string_stack = $self->{_rpre_string_stack};
5750 my $rpre_string = $rpre_string_stack->[-1];
5752 # if we have written any non-blank lines to the
5753 # current pre section, start writing to a new output
5755 if ( $$rpre_string =~ /\S/ ) {
5758 Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5759 $self->{_html_pre_fh} = $html_pre_fh;
5760 push @$rpre_string_stack, \$pre_string;
5762 # leave a marker in the pod stream so we know
5763 # where to put the pre section we just
5765 my $for_html = '=for html'; # don't confuse pod utils
5766 $html_pod_fh->print(<<EOM);
5769 <!-- pERLTIDY sECTION -->
5774 # otherwise, just clear the current string and start
5778 $html_pod_fh->print("\n");
5781 $html_pod_fh->print( $input_line . "\n" );
5782 if ( $line_type eq 'POD_END' ) {
5783 $self->{_pod_cut_count}++;
5784 $html_pod_fh->print("\n");
5789 else { $line_character = 'Q' }
5790 $html_line = $self->markup_html_element( $input_line, $line_character );
5793 # add the line number if requested
5794 if ( $rOpts->{'html-line-numbers'} ) {
5796 ( $line_number < 10 ) ? " "
5797 : ( $line_number < 100 ) ? " "
5798 : ( $line_number < 1000 ) ? " "
5800 $html_line = $extra_space . $line_number . " " . $html_line;
5804 $html_pre_fh->print("$html_line\n");
5807 #####################################################################
5809 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5810 # line breaks to the token stream
5812 # WARNING: This is not a real class for speed reasons. Only one
5813 # Formatter may be used.
5815 #####################################################################
5817 package Perl::Tidy::Formatter;
5821 # Caution: these debug flags produce a lot of output
5822 # They should all be 0 except when debugging small scripts
5823 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
5824 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
5825 use constant FORMATTER_DEBUG_FLAG_CI => 0;
5826 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
5827 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
5828 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
5829 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5830 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
5831 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
5832 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
5833 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
5834 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
5836 my $debug_warning = sub {
5837 print "FORMATTER_DEBUGGING with key $_[0]\n";
5840 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
5841 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
5842 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
5843 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
5844 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
5845 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
5846 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5847 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
5848 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
5849 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
5850 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
5851 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
5858 $max_gnu_stack_index
5859 $gnu_position_predictor
5860 $line_start_index_to_go
5861 $last_indentation_written
5862 $last_unadjusted_indentation
5864 $last_output_short_opening_token
5866 $saw_VERSION_in_this_file
5871 $gnu_sequence_number
5872 $last_output_indentation
5878 @type_sequence_to_go
5879 @container_environment_to_go
5880 @bond_strength_to_go
5881 @forced_breakpoint_to_go
5884 @leading_spaces_to_go
5885 @reduced_spaces_to_go
5886 @matching_token_to_go
5888 @nesting_blocks_to_go
5890 @nesting_depth_to_go
5892 @old_breakpoint_to_go
5896 %saved_opening_indentation
5899 $comma_count_in_batch
5900 $old_line_count_in_batch
5901 $last_nonblank_index_to_go
5902 $last_nonblank_type_to_go
5903 $last_nonblank_token_to_go
5904 $last_last_nonblank_index_to_go
5905 $last_last_nonblank_type_to_go
5906 $last_last_nonblank_token_to_go
5907 @nonblank_lines_at_depth
5911 $in_format_skipping_section
5912 $format_skipping_pattern_begin
5913 $format_skipping_pattern_end
5915 $forced_breakpoint_count
5916 $forced_breakpoint_undo_count
5917 @forced_breakpoint_undo_stack
5918 %postponed_breakpoint
5922 $first_embedded_tab_at
5923 $last_embedded_tab_at
5924 $deleted_semicolon_count
5925 $first_deleted_semicolon_at
5926 $last_deleted_semicolon_at
5927 $added_semicolon_count
5928 $first_added_semicolon_at
5929 $last_added_semicolon_at
5930 $first_tabbing_disagreement
5931 $last_tabbing_disagreement
5932 $in_tabbing_disagreement
5933 $tabbing_disagreement_count
5937 $last_line_leading_type
5938 $last_line_leading_level
5939 $last_last_line_leading_level
5942 %block_opening_line_number
5943 $csc_new_statement_ok
5946 $accumulating_text_for_block
5948 $rleading_block_if_elsif_text
5949 $leading_block_text_level
5950 $leading_block_text_length_exceeded
5951 $leading_block_text_line_length
5952 $leading_block_text_line_number
5953 $closing_side_comment_prefix_pattern
5954 $closing_side_comment_list_pattern
5956 $last_nonblank_token
5958 $last_last_nonblank_token
5959 $last_last_nonblank_type
5960 $last_nonblank_block_type
5963 %is_if_brace_follower
5964 %space_after_keyword
5967 %is_last_next_redo_return
5968 %is_other_brace_follower
5969 %is_else_brace_follower
5970 %is_anon_sub_brace_follower
5971 %is_anon_sub_1_brace_follower
5973 %is_sort_map_grep_eval
5974 %is_sort_map_grep_eval_do
5975 %is_block_without_semicolon
5980 %is_if_unless_and_or_last_next_redo_return
5981 %is_until_while_for_if_elsif_else
5987 $is_static_block_comment
5988 $index_start_one_line_block
5989 $semicolons_before_block_self_destruct
5990 $index_max_forced_break
5993 $vertical_aligner_object
5998 $last_line_had_side_comment
6001 $static_block_comment_pattern
6002 $static_side_comment_pattern
6003 %opening_vertical_tightness
6004 %closing_vertical_tightness
6005 %closing_token_indentation
6006 $some_closing_token_indentation
6008 %opening_token_right
6009 %stack_opening_token
6010 %stack_closing_token
6012 $block_brace_vertical_tightness_pattern
6015 $rOpts_add_whitespace
6016 $rOpts_block_brace_tightness
6017 $rOpts_block_brace_vertical_tightness
6018 $rOpts_brace_left_and_indent
6019 $rOpts_comma_arrow_breakpoints
6020 $rOpts_break_at_old_keyword_breakpoints
6021 $rOpts_break_at_old_comma_breakpoints
6022 $rOpts_break_at_old_logical_breakpoints
6023 $rOpts_break_at_old_ternary_breakpoints
6024 $rOpts_break_at_old_attribute_breakpoints
6025 $rOpts_closing_side_comment_else_flag
6026 $rOpts_closing_side_comment_maximum_text
6027 $rOpts_continuation_indentation
6029 $rOpts_delete_old_whitespace
6030 $rOpts_fuzzy_line_length
6031 $rOpts_indent_columns
6032 $rOpts_line_up_parentheses
6033 $rOpts_maximum_fields_per_table
6034 $rOpts_maximum_line_length
6035 $rOpts_short_concatenation_item_length
6036 $rOpts_keep_old_blank_lines
6037 $rOpts_ignore_old_breakpoints
6038 $rOpts_format_skipping
6039 $rOpts_space_function_paren
6040 $rOpts_space_keyword_paren
6041 $rOpts_keep_interior_semicolons
6043 $half_maximum_line_length
6047 %is_keyword_returning_list
6051 %right_bond_strength
6068 # default list of block types for which -bli would apply
6069 $bli_list_string = 'if else elsif unless while for foreach do : sub';
6072 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
6073 <= >= == =~ !~ != ++ -- /= x=
6075 @is_digraph{@_} = (1) x scalar(@_);
6077 @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
6078 @is_trigraph{@_} = (1) x scalar(@_);
6081 = **= += *= &= <<= &&=
6082 -= /= |= >>= ||= //=
6086 @is_assignment{@_} = (1) x scalar(@_);
6096 @is_keyword_returning_list{@_} = (1) x scalar(@_);
6098 @_ = qw(is if unless and or err last next redo return);
6099 @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
6101 # always break after a closing curly of these block types:
6102 @_ = qw(until while for if elsif else);
6103 @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
6105 @_ = qw(last next redo return);
6106 @is_last_next_redo_return{@_} = (1) x scalar(@_);
6108 @_ = qw(sort map grep);
6109 @is_sort_map_grep{@_} = (1) x scalar(@_);
6111 @_ = qw(sort map grep eval);
6112 @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
6114 @_ = qw(sort map grep eval do);
6115 @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
6118 @is_if_unless{@_} = (1) x scalar(@_);
6120 @_ = qw(and or err);
6121 @is_and_or{@_} = (1) x scalar(@_);
6123 # Identify certain operators which often occur in chains.
6124 # Note: the minus (-) causes a side effect of padding of the first line in
6125 # something like this (by sub set_logical_padding):
6126 # Checkbutton => 'Transmission checked',
6127 # -variable => \$TRANS
6128 # This usually improves appearance so it seems ok.
6129 @_ = qw(&& || and or : ? . + - * /);
6130 @is_chain_operator{@_} = (1) x scalar(@_);
6132 # We can remove semicolons after blocks preceded by these keywords
6134 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6135 unless while until for foreach given when default);
6136 @is_block_without_semicolon{@_} = (1) x scalar(@_);
6138 # 'L' is token for opening { at hash key
6140 @is_opening_type{@_} = (1) x scalar(@_);
6142 # 'R' is token for closing } at hash key
6144 @is_closing_type{@_} = (1) x scalar(@_);
6147 @is_opening_token{@_} = (1) x scalar(@_);
6150 @is_closing_token{@_} = (1) x scalar(@_);
6154 use constant WS_YES => 1;
6155 use constant WS_OPTIONAL => 0;
6156 use constant WS_NO => -1;
6158 # Token bond strengths.
6159 use constant NO_BREAK => 10000;
6160 use constant VERY_STRONG => 100;
6161 use constant STRONG => 2.1;
6162 use constant NOMINAL => 1.1;
6163 use constant WEAK => 0.8;
6164 use constant VERY_WEAK => 0.55;
6166 # values for testing indexes in output array
6167 use constant UNDEFINED_INDEX => -1;
6169 # Maximum number of little messages; probably need not be changed.
6170 use constant MAX_NAG_MESSAGES => 6;
6172 # increment between sequence numbers for each type
6173 # For example, ?: pairs might have numbers 7,11,15,...
6174 use constant TYPE_SEQUENCE_INCREMENT => 4;
6178 # methods to count instances
6180 sub get_count { $_count; }
6181 sub _increment_count { ++$_count }
6182 sub _decrement_count { --$_count }
6187 # trim leading and trailing whitespace from a string
6195 # given a string containing words separated by whitespace,
6196 # return the list of words
6201 return split( /\s+/, $str );
6204 # interface to Perl::Tidy::Logger routines
6206 if ($logger_object) {
6207 $logger_object->warning(@_);
6212 if ($logger_object) {
6213 $logger_object->complain(@_);
6217 sub write_logfile_entry {
6218 if ($logger_object) {
6219 $logger_object->write_logfile_entry(@_);
6224 if ($logger_object) {
6225 $logger_object->black_box(@_);
6229 sub report_definite_bug {
6230 if ($logger_object) {
6231 $logger_object->report_definite_bug();
6235 sub get_saw_brace_error {
6236 if ($logger_object) {
6237 $logger_object->get_saw_brace_error();
6241 sub we_are_at_the_last_line {
6242 if ($logger_object) {
6243 $logger_object->we_are_at_the_last_line();
6247 # interface to Perl::Tidy::Diagnostics routine
6248 sub write_diagnostics {
6250 if ($diagnostics_object) {
6251 $diagnostics_object->write_diagnostics(@_);
6255 sub get_added_semicolon_count {
6257 return $added_semicolon_count;
6261 $_[0]->_decrement_count();
6268 # we are given an object with a write_line() method to take lines
6270 sink_object => undef,
6271 diagnostics_object => undef,
6272 logger_object => undef,
6274 my %args = ( %defaults, @_ );
6276 $logger_object = $args{logger_object};
6277 $diagnostics_object = $args{diagnostics_object};
6279 # we create another object with a get_line() and peek_ahead() method
6280 my $sink_object = $args{sink_object};
6281 $file_writer_object =
6282 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6284 # initialize the leading whitespace stack to negative levels
6285 # so that we can never run off the end of the stack
6286 $gnu_position_predictor = 0; # where the current token is predicted to be
6287 $max_gnu_stack_index = 0;
6288 $max_gnu_item_index = -1;
6289 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6290 @gnu_item_list = ();
6291 $last_output_indentation = 0;
6292 $last_indentation_written = 0;
6293 $last_unadjusted_indentation = 0;
6294 $last_leading_token = "";
6295 $last_output_short_opening_token = 0;
6297 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6298 $saw_END_or_DATA_ = 0;
6300 @block_type_to_go = ();
6301 @type_sequence_to_go = ();
6302 @container_environment_to_go = ();
6303 @bond_strength_to_go = ();
6304 @forced_breakpoint_to_go = ();
6305 @lengths_to_go = (); # line length to start of ith token
6307 @matching_token_to_go = ();
6308 @mate_index_to_go = ();
6309 @nesting_blocks_to_go = ();
6310 @ci_levels_to_go = ();
6311 @nesting_depth_to_go = (0);
6312 @nobreak_to_go = ();
6313 @old_breakpoint_to_go = ();
6316 @leading_spaces_to_go = ();
6317 @reduced_spaces_to_go = ();
6320 @has_broken_sublist = ();
6321 @want_comma_break = ();
6324 $first_tabbing_disagreement = 0;
6325 $last_tabbing_disagreement = 0;
6326 $tabbing_disagreement_count = 0;
6327 $in_tabbing_disagreement = 0;
6328 $input_line_tabbing = undef;
6330 $last_line_type = "";
6331 $last_last_line_leading_level = 0;
6332 $last_line_leading_level = 0;
6333 $last_line_leading_type = '#';
6335 $last_nonblank_token = ';';
6336 $last_nonblank_type = ';';
6337 $last_last_nonblank_token = ';';
6338 $last_last_nonblank_type = ';';
6339 $last_nonblank_block_type = "";
6340 $last_output_level = 0;
6341 $looking_for_else = 0;
6342 $embedded_tab_count = 0;
6343 $first_embedded_tab_at = 0;
6344 $last_embedded_tab_at = 0;
6345 $deleted_semicolon_count = 0;
6346 $first_deleted_semicolon_at = 0;
6347 $last_deleted_semicolon_at = 0;
6348 $added_semicolon_count = 0;
6349 $first_added_semicolon_at = 0;
6350 $last_added_semicolon_at = 0;
6351 $last_line_had_side_comment = 0;
6352 $is_static_block_comment = 0;
6353 %postponed_breakpoint = ();
6355 # variables for adding side comments
6356 %block_leading_text = ();
6357 %block_opening_line_number = ();
6358 $csc_new_statement_ok = 1;
6359 %csc_block_label = ();
6361 %saved_opening_indentation = ();
6362 $in_format_skipping_section = 0;
6364 reset_block_text_accumulator();
6366 prepare_for_new_input_lines();
6368 $vertical_aligner_object =
6369 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6370 $logger_object, $diagnostics_object );
6372 if ( $rOpts->{'entab-leading-whitespace'} ) {
6373 write_logfile_entry(
6374 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6377 elsif ( $rOpts->{'tabs'} ) {
6378 write_logfile_entry("Indentation will be with a tab character\n");
6381 write_logfile_entry(
6382 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6385 # This was the start of a formatter referent, but object-oriented
6386 # coding has turned out to be too slow here.
6387 $formatter_self = {};
6389 bless $formatter_self, $class;
6391 # Safety check..this is not a class yet
6392 if ( _increment_count() > 1 ) {
6394 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
6396 return $formatter_self;
6399 sub prepare_for_new_input_lines {
6401 $gnu_sequence_number++; # increment output batch counter
6402 %last_gnu_equals = ();
6403 %gnu_comma_count = ();
6404 %gnu_arrow_count = ();
6405 $line_start_index_to_go = 0;
6406 $max_gnu_item_index = UNDEFINED_INDEX;
6407 $index_max_forced_break = UNDEFINED_INDEX;
6408 $max_index_to_go = UNDEFINED_INDEX;
6409 $last_nonblank_index_to_go = UNDEFINED_INDEX;
6410 $last_nonblank_type_to_go = '';
6411 $last_nonblank_token_to_go = '';
6412 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6413 $last_last_nonblank_type_to_go = '';
6414 $last_last_nonblank_token_to_go = '';
6415 $forced_breakpoint_count = 0;
6416 $forced_breakpoint_undo_count = 0;
6417 $rbrace_follower = undef;
6418 $lengths_to_go[0] = 0;
6419 $old_line_count_in_batch = 1;
6420 $comma_count_in_batch = 0;
6421 $starting_in_quote = 0;
6423 destroy_one_line_block();
6429 my ($line_of_tokens) = @_;
6431 my $line_type = $line_of_tokens->{_line_type};
6432 my $input_line = $line_of_tokens->{_line_text};
6434 if ( $rOpts->{notidy} ) {
6435 write_unindented_line($input_line);
6436 $last_line_type = $line_type;
6440 # _line_type codes are:
6441 # SYSTEM - system-specific code before hash-bang line
6442 # CODE - line of perl code (including comments)
6443 # POD_START - line starting pod, such as '=head'
6444 # POD - pod documentation text
6445 # POD_END - last line of pod section, '=cut'
6446 # HERE - text of here-document
6447 # HERE_END - last line of here-doc (target word)
6448 # FORMAT - format section
6449 # FORMAT_END - last line of format section, '.'
6450 # DATA_START - __DATA__ line
6451 # DATA - unidentified text following __DATA__
6452 # END_START - __END__ line
6453 # END - unidentified text following __END__
6454 # ERROR - we are in big trouble, probably not a perl script
6456 # put a blank line after an =cut which comes before __END__ and __DATA__
6457 # (required by podchecker)
6458 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6459 $file_writer_object->reset_consecutive_blank_lines();
6460 if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6463 # handle line of code..
6464 if ( $line_type eq 'CODE' ) {
6466 # let logger see all non-blank lines of code
6467 if ( $input_line !~ /^\s*$/ ) {
6468 my $output_line_number =
6469 $vertical_aligner_object->get_output_line_number();
6470 black_box( $line_of_tokens, $output_line_number );
6472 print_line_of_tokens($line_of_tokens);
6475 # handle line of non-code..
6481 if ( $line_type =~ /^POD/ ) {
6483 # Pod docs should have a preceding blank line. But stay
6484 # out of __END__ and __DATA__ sections, because
6485 # the user may be using this section for any purpose whatsoever
6486 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6487 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
6489 && $line_type eq 'POD_START'
6490 # If the previous line is a __DATA__ line (or data
6491 # contents, it's not valid to change it at all, no
6492 # matter what is in the data
6493 && !$saw_END_or_DATA_ )
6499 # leave the blank counters in a predictable state
6500 # after __END__ or __DATA__
6501 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6502 $file_writer_object->reset_consecutive_blank_lines();
6503 $saw_END_or_DATA_ = 1;
6506 # write unindented non-code line
6507 if ( !$skip_line ) {
6508 if ($tee_line) { $file_writer_object->tee_on() }
6509 write_unindented_line($input_line);
6510 if ($tee_line) { $file_writer_object->tee_off() }
6513 $last_line_type = $line_type;
6516 sub create_one_line_block {
6517 $index_start_one_line_block = $_[0];
6518 $semicolons_before_block_self_destruct = $_[1];
6521 sub destroy_one_line_block {
6522 $index_start_one_line_block = UNDEFINED_INDEX;
6523 $semicolons_before_block_self_destruct = 0;
6526 sub leading_spaces_to_go {
6528 # return the number of indentation spaces for a token in the output stream;
6529 # these were previously stored by 'set_leading_whitespace'.
6531 return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
6537 # return the number of leading spaces associated with an indentation
6538 # variable $indentation is either a constant number of spaces or an object
6539 # with a get_SPACES method.
6540 my $indentation = shift;
6541 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6544 sub get_RECOVERABLE_SPACES {
6546 # return the number of spaces (+ means shift right, - means shift left)
6547 # that we would like to shift a group of lines with the same indentation
6548 # to get them to line up with their opening parens
6549 my $indentation = shift;
6550 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6553 sub get_AVAILABLE_SPACES_to_go {
6555 my $item = $leading_spaces_to_go[ $_[0] ];
6557 # return the number of available leading spaces associated with an
6558 # indentation variable. $indentation is either a constant number of
6559 # spaces or an object with a get_AVAILABLE_SPACES method.
6560 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6563 sub new_lp_indentation_item {
6565 # this is an interface to the IndentationItem class
6566 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6568 # A negative level implies not to store the item in the item_list
6570 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6572 my $item = Perl::Tidy::IndentationItem->new(
6574 $ci_level, $available_spaces,
6575 $index, $gnu_sequence_number,
6576 $align_paren, $max_gnu_stack_index,
6577 $line_start_index_to_go,
6580 if ( $level >= 0 ) {
6581 $gnu_item_list[$max_gnu_item_index] = $item;
6587 sub set_leading_whitespace {
6589 # This routine defines leading whitespace
6590 # given: the level and continuation_level of a token,
6591 # define: space count of leading string which would apply if it
6592 # were the first token of a new line.
6594 my ( $level, $ci_level, $in_continued_quote ) = @_;
6596 # modify for -bli, which adds one continuation indentation for
6598 if ( $rOpts_brace_left_and_indent
6599 && $max_index_to_go == 0
6600 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6605 # patch to avoid trouble when input file has negative indentation.
6606 # other logic should catch this error.
6607 if ( $level < 0 ) { $level = 0 }
6609 #-------------------------------------------
6610 # handle the standard indentation scheme
6611 #-------------------------------------------
6612 unless ($rOpts_line_up_parentheses) {
6614 $ci_level * $rOpts_continuation_indentation +
6615 $level * $rOpts_indent_columns;
6617 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6619 if ($in_continued_quote) {
6623 $leading_spaces_to_go[$max_index_to_go] = $space_count;
6624 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6628 #-------------------------------------------------------------
6629 # handle case of -lp indentation..
6630 #-------------------------------------------------------------
6632 # The continued_quote flag means that this is the first token of a
6633 # line, and it is the continuation of some kind of multi-line quote
6634 # or pattern. It requires special treatment because it must have no
6635 # added leading whitespace. So we create a special indentation item
6636 # which is not in the stack.
6637 if ($in_continued_quote) {
6638 my $space_count = 0;
6639 my $available_space = 0;
6640 $level = -1; # flag to prevent storing in item_list
6641 $leading_spaces_to_go[$max_index_to_go] =
6642 $reduced_spaces_to_go[$max_index_to_go] =
6643 new_lp_indentation_item( $space_count, $level, $ci_level,
6644 $available_space, 0 );
6648 # get the top state from the stack
6649 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6650 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6651 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6653 my $type = $types_to_go[$max_index_to_go];
6654 my $token = $tokens_to_go[$max_index_to_go];
6655 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6657 if ( $type eq '{' || $type eq '(' ) {
6659 $gnu_comma_count{ $total_depth + 1 } = 0;
6660 $gnu_arrow_count{ $total_depth + 1 } = 0;
6662 # If we come to an opening token after an '=' token of some type,
6663 # see if it would be helpful to 'break' after the '=' to save space
6664 my $last_equals = $last_gnu_equals{$total_depth};
6665 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6667 # find the position if we break at the '='
6668 my $i_test = $last_equals;
6669 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6672 ##my $too_close = ($i_test==$max_index_to_go-1);
6674 my $test_position = total_line_length( $i_test, $max_index_to_go );
6678 # the equals is not just before an open paren (testing)
6681 # if we are beyond the midpoint
6682 $gnu_position_predictor > $half_maximum_line_length
6684 # or we are beyont the 1/4 point and there was an old
6685 # break at the equals
6687 $gnu_position_predictor > $half_maximum_line_length / 2
6689 $old_breakpoint_to_go[$last_equals]
6690 || ( $last_equals > 0
6691 && $old_breakpoint_to_go[ $last_equals - 1 ] )
6692 || ( $last_equals > 1
6693 && $types_to_go[ $last_equals - 1 ] eq 'b'
6694 && $old_breakpoint_to_go[ $last_equals - 2 ] )
6700 # then make the switch -- note that we do not set a real
6701 # breakpoint here because we may not really need one; sub
6702 # scan_list will do that if necessary
6703 $line_start_index_to_go = $i_test + 1;
6704 $gnu_position_predictor = $test_position;
6709 # Check for decreasing depth ..
6710 # Note that one token may have both decreasing and then increasing
6711 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
6712 # in this example we would first go back to (1,0) then up to (2,0)
6714 if ( $level < $current_level || $ci_level < $current_ci_level ) {
6716 # loop to find the first entry at or completely below this level
6717 my ( $lev, $ci_lev );
6719 if ($max_gnu_stack_index) {
6721 # save index of token which closes this level
6722 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6724 # Undo any extra indentation if we saw no commas
6725 my $available_spaces =
6726 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6728 my $comma_count = 0;
6729 my $arrow_count = 0;
6730 if ( $type eq '}' || $type eq ')' ) {
6731 $comma_count = $gnu_comma_count{$total_depth};
6732 $arrow_count = $gnu_arrow_count{$total_depth};
6733 $comma_count = 0 unless $comma_count;
6734 $arrow_count = 0 unless $arrow_count;
6736 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6737 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6739 if ( $available_spaces > 0 ) {
6741 if ( $comma_count <= 0 || $arrow_count > 0 ) {
6743 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6745 $gnu_stack[$max_gnu_stack_index]
6746 ->get_SEQUENCE_NUMBER();
6748 # Be sure this item was created in this batch. This
6749 # should be true because we delete any available
6750 # space from open items at the end of each batch.
6751 if ( $gnu_sequence_number != $seqno
6752 || $i > $max_gnu_item_index )
6755 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6757 report_definite_bug();
6761 if ( $arrow_count == 0 ) {
6763 ->permanently_decrease_AVAILABLE_SPACES(
6768 ->tentatively_decrease_AVAILABLE_SPACES(
6775 $j <= $max_gnu_item_index ;
6780 ->decrease_SPACES($available_spaces);
6787 --$max_gnu_stack_index;
6788 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6789 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6791 # stop when we reach a level at or below the current level
6792 if ( $lev <= $level && $ci_lev <= $ci_level ) {
6794 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6795 $current_level = $lev;
6796 $current_ci_level = $ci_lev;
6801 # reached bottom of stack .. should never happen because
6802 # only negative levels can get here, and $level was forced
6803 # to be positive above.
6806 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6808 report_definite_bug();
6814 # handle increasing depth
6815 if ( $level > $current_level || $ci_level > $current_ci_level ) {
6817 # Compute the standard incremental whitespace. This will be
6818 # the minimum incremental whitespace that will be used. This
6819 # choice results in a smooth transition between the gnu-style
6820 # and the standard style.
6821 my $standard_increment =
6822 ( $level - $current_level ) * $rOpts_indent_columns +
6823 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6825 # Now we have to define how much extra incremental space
6826 # ("$available_space") we want. This extra space will be
6827 # reduced as necessary when long lines are encountered or when
6828 # it becomes clear that we do not have a good list.
6829 my $available_space = 0;
6830 my $align_paren = 0;
6833 # initialization on empty stack..
6834 if ( $max_gnu_stack_index == 0 ) {
6835 $space_count = $level * $rOpts_indent_columns;
6838 # if this is a BLOCK, add the standard increment
6839 elsif ($last_nonblank_block_type) {
6840 $space_count += $standard_increment;
6843 # if last nonblank token was not structural indentation,
6844 # just use standard increment
6845 elsif ( $last_nonblank_type ne '{' ) {
6846 $space_count += $standard_increment;
6849 # otherwise use the space to the first non-blank level change token
6852 $space_count = $gnu_position_predictor;
6854 my $min_gnu_indentation =
6855 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6857 $available_space = $space_count - $min_gnu_indentation;
6858 if ( $available_space >= $standard_increment ) {
6859 $min_gnu_indentation += $standard_increment;
6861 elsif ( $available_space > 1 ) {
6862 $min_gnu_indentation += $available_space + 1;
6864 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
6865 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
6866 $min_gnu_indentation += 2;
6869 $min_gnu_indentation += 1;
6873 $min_gnu_indentation += $standard_increment;
6875 $available_space = $space_count - $min_gnu_indentation;
6877 if ( $available_space < 0 ) {
6878 $space_count = $min_gnu_indentation;
6879 $available_space = 0;
6884 # update state, but not on a blank token
6885 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
6887 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
6889 ++$max_gnu_stack_index;
6890 $gnu_stack[$max_gnu_stack_index] =
6891 new_lp_indentation_item( $space_count, $level, $ci_level,
6892 $available_space, $align_paren );
6894 # If the opening paren is beyond the half-line length, then
6895 # we will use the minimum (standard) indentation. This will
6896 # help avoid problems associated with running out of space
6897 # near the end of a line. As a result, in deeply nested
6898 # lists, there will be some indentations which are limited
6899 # to this minimum standard indentation. But the most deeply
6900 # nested container will still probably be able to shift its
6901 # parameters to the right for proper alignment, so in most
6902 # cases this will not be noticable.
6903 if ( $available_space > 0
6904 && $space_count > $half_maximum_line_length )
6906 $gnu_stack[$max_gnu_stack_index]
6907 ->tentatively_decrease_AVAILABLE_SPACES($available_space);
6912 # Count commas and look for non-list characters. Once we see a
6913 # non-list character, we give up and don't look for any more commas.
6914 if ( $type eq '=>' ) {
6915 $gnu_arrow_count{$total_depth}++;
6917 # tentatively treating '=>' like '=' for estimating breaks
6918 # TODO: this could use some experimentation
6919 $last_gnu_equals{$total_depth} = $max_index_to_go;
6922 elsif ( $type eq ',' ) {
6923 $gnu_comma_count{$total_depth}++;
6926 elsif ( $is_assignment{$type} ) {
6927 $last_gnu_equals{$total_depth} = $max_index_to_go;
6930 # this token might start a new line
6931 # if this is a non-blank..
6932 if ( $type ne 'b' ) {
6937 # this is the first nonblank token of the line
6938 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
6940 # or previous character was one of these:
6941 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
6943 # or previous character was opening and this does not close it
6944 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6945 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6947 # or this token is one of these:
6948 || $type =~ /^([\.]|\|\||\&\&)$/
6950 # or this is a closing structure
6951 || ( $last_nonblank_type_to_go eq '}'
6952 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6954 # or previous token was keyword 'return'
6955 || ( $last_nonblank_type_to_go eq 'k'
6956 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6958 # or starting a new line at certain keywords is fine
6960 && $is_if_unless_and_or_last_next_redo_return{$token} )
6962 # or this is after an assignment after a closing structure
6964 $is_assignment{$last_nonblank_type_to_go}
6966 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6968 # and it is significantly to the right
6969 || $gnu_position_predictor > $half_maximum_line_length
6974 check_for_long_gnu_style_lines();
6975 $line_start_index_to_go = $max_index_to_go;
6977 # back up 1 token if we want to break before that type
6978 # otherwise, we may strand tokens like '?' or ':' on a line
6979 if ( $line_start_index_to_go > 0 ) {
6980 if ( $last_nonblank_type_to_go eq 'k' ) {
6982 if ( $want_break_before{$last_nonblank_token_to_go} ) {
6983 $line_start_index_to_go--;
6986 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6987 $line_start_index_to_go--;
6993 # remember the predicted position of this token on the output line
6994 if ( $max_index_to_go > $line_start_index_to_go ) {
6995 $gnu_position_predictor =
6996 total_line_length( $line_start_index_to_go, $max_index_to_go );
6999 $gnu_position_predictor = $space_count +
7000 token_sequence_length( $max_index_to_go, $max_index_to_go );
7003 # store the indentation object for this token
7004 # this allows us to manipulate the leading whitespace
7005 # (in case we have to reduce indentation to fit a line) without
7006 # having to change any token values
7007 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
7008 $reduced_spaces_to_go[$max_index_to_go] =
7009 ( $max_gnu_stack_index > 0 && $ci_level )
7010 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
7011 : $gnu_stack[$max_gnu_stack_index];
7015 sub check_for_long_gnu_style_lines {
7017 # look at the current estimated maximum line length, and
7018 # remove some whitespace if it exceeds the desired maximum
7020 # this is only for the '-lp' style
7021 return unless ($rOpts_line_up_parentheses);
7023 # nothing can be done if no stack items defined for this line
7024 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7026 # see if we have exceeded the maximum desired line length
7027 # keep 2 extra free because they are needed in some cases
7028 # (result of trial-and-error testing)
7030 $gnu_position_predictor - $rOpts_maximum_line_length + 2;
7032 return if ( $spaces_needed <= 0 );
7034 # We are over the limit, so try to remove a requested number of
7035 # spaces from leading whitespace. We are only allowed to remove
7036 # from whitespace items created on this batch, since others have
7037 # already been used and cannot be undone.
7038 my @candidates = ();
7041 # loop over all whitespace items created for the current batch
7042 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7043 my $item = $gnu_item_list[$i];
7045 # item must still be open to be a candidate (otherwise it
7046 # cannot influence the current token)
7047 next if ( $item->get_CLOSED() >= 0 );
7049 my $available_spaces = $item->get_AVAILABLE_SPACES();
7051 if ( $available_spaces > 0 ) {
7052 push( @candidates, [ $i, $available_spaces ] );
7056 return unless (@candidates);
7058 # sort by available whitespace so that we can remove whitespace
7059 # from the maximum available first
7060 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
7062 # keep removing whitespace until we are done or have no more
7064 foreach $candidate (@candidates) {
7065 my ( $i, $available_spaces ) = @{$candidate};
7066 my $deleted_spaces =
7067 ( $available_spaces > $spaces_needed )
7069 : $available_spaces;
7071 # remove the incremental space from this item
7072 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
7076 # update the leading whitespace of this item and all items
7077 # that came after it
7078 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
7080 my $old_spaces = $gnu_item_list[$i]->get_SPACES();
7081 if ( $old_spaces >= $deleted_spaces ) {
7082 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
7085 # shouldn't happen except for code bug:
7087 my $level = $gnu_item_list[$i_debug]->get_LEVEL();
7088 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();
7089 my $old_level = $gnu_item_list[$i]->get_LEVEL();
7090 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
7092 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
7094 report_definite_bug();
7097 $gnu_position_predictor -= $deleted_spaces;
7098 $spaces_needed -= $deleted_spaces;
7099 last unless ( $spaces_needed > 0 );
7103 sub finish_lp_batch {
7105 # This routine is called once after each each output stream batch is
7106 # finished to undo indentation for all incomplete -lp
7107 # indentation levels. It is too risky to leave a level open,
7108 # because then we can't backtrack in case of a long line to follow.
7109 # This means that comments and blank lines will disrupt this
7110 # indentation style. But the vertical aligner may be able to
7111 # get the space back if there are side comments.
7113 # this is only for the 'lp' style
7114 return unless ($rOpts_line_up_parentheses);
7116 # nothing can be done if no stack items defined for this line
7117 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7119 # loop over all whitespace items created for the current batch
7121 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7122 my $item = $gnu_item_list[$i];
7124 # only look for open items
7125 next if ( $item->get_CLOSED() >= 0 );
7127 # Tentatively remove all of the available space
7128 # (The vertical aligner will try to get it back later)
7129 my $available_spaces = $item->get_AVAILABLE_SPACES();
7130 if ( $available_spaces > 0 ) {
7132 # delete incremental space for this item
7134 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
7136 # Reduce the total indentation space of any nodes that follow
7137 # Note that any such nodes must necessarily be dependents
7139 foreach ( $i + 1 .. $max_gnu_item_index ) {
7140 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
7147 sub reduce_lp_indentation {
7149 # reduce the leading whitespace at token $i if possible by $spaces_needed
7150 # (a large value of $spaces_needed will remove all excess space)
7151 # NOTE: to be called from scan_list only for a sequence of tokens
7152 # contained between opening and closing parens/braces/brackets
7154 my ( $i, $spaces_wanted ) = @_;
7155 my $deleted_spaces = 0;
7157 my $item = $leading_spaces_to_go[$i];
7158 my $available_spaces = $item->get_AVAILABLE_SPACES();
7161 $available_spaces > 0
7162 && ( ( $spaces_wanted <= $available_spaces )
7163 || !$item->get_HAVE_CHILD() )
7167 # we'll remove these spaces, but mark them as recoverable
7169 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
7172 return $deleted_spaces;
7175 sub token_sequence_length {
7177 # return length of tokens ($ifirst .. $ilast) including first & last
7178 # returns 0 if $ifirst > $ilast
7181 return 0 if ( $ilast < 0 || $ifirst > $ilast );
7182 return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
7183 return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
7186 sub total_line_length {
7188 # return length of a line of tokens ($ifirst .. $ilast)
7191 if ( $ifirst < 0 ) { $ifirst = 0 }
7193 return leading_spaces_to_go($ifirst) +
7194 token_sequence_length( $ifirst, $ilast );
7197 sub excess_line_length {
7199 # return number of characters by which a line of tokens ($ifirst..$ilast)
7200 # exceeds the allowable line length.
7203 if ( $ifirst < 0 ) { $ifirst = 0 }
7204 return leading_spaces_to_go($ifirst) +
7205 token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
7208 sub finish_formatting {
7210 # flush buffer and write any informative messages
7214 $file_writer_object->decrement_output_line_number()
7215 ; # fix up line number since it was incremented
7216 we_are_at_the_last_line();
7217 if ( $added_semicolon_count > 0 ) {
7218 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
7220 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
7221 write_logfile_entry("$added_semicolon_count $what added:\n");
7222 write_logfile_entry(
7223 " $first at input line $first_added_semicolon_at\n");
7225 if ( $added_semicolon_count > 1 ) {
7226 write_logfile_entry(
7227 " Last at input line $last_added_semicolon_at\n");
7229 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
7230 write_logfile_entry("\n");
7233 if ( $deleted_semicolon_count > 0 ) {
7234 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
7236 ( $deleted_semicolon_count > 1 )
7239 write_logfile_entry(
7240 "$deleted_semicolon_count unnecessary $what deleted:\n");
7241 write_logfile_entry(
7242 " $first at input line $first_deleted_semicolon_at\n");
7244 if ( $deleted_semicolon_count > 1 ) {
7245 write_logfile_entry(
7246 " Last at input line $last_deleted_semicolon_at\n");
7248 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
7249 write_logfile_entry("\n");
7252 if ( $embedded_tab_count > 0 ) {
7253 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
7255 ( $embedded_tab_count > 1 )
7256 ? "quotes or patterns"
7257 : "quote or pattern";
7258 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
7259 write_logfile_entry(
7260 "This means the display of this script could vary with device or software\n"
7262 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
7264 if ( $embedded_tab_count > 1 ) {
7265 write_logfile_entry(
7266 " Last at input line $last_embedded_tab_at\n");
7268 write_logfile_entry("\n");
7271 if ($first_tabbing_disagreement) {
7272 write_logfile_entry(
7273 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
7277 if ($in_tabbing_disagreement) {
7278 write_logfile_entry(
7279 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
7284 if ($last_tabbing_disagreement) {
7286 write_logfile_entry(
7287 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
7291 write_logfile_entry("No indentation disagreement seen\n");
7294 write_logfile_entry("\n");
7296 $vertical_aligner_object->report_anything_unusual();
7298 $file_writer_object->report_line_length_errors();
7303 # This routine is called to check the Opts hash after it is defined
7306 my ( $tabbing_string, $tab_msg );
7308 make_static_block_comment_pattern();
7309 make_static_side_comment_pattern();
7310 make_closing_side_comment_prefix();
7311 make_closing_side_comment_list_pattern();
7312 $format_skipping_pattern_begin =
7313 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
7314 $format_skipping_pattern_end =
7315 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
7317 # If closing side comments ARE selected, then we can safely
7318 # delete old closing side comments unless closing side comment
7319 # warnings are requested. This is a good idea because it will
7320 # eliminate any old csc's which fall below the line count threshold.
7321 # We cannot do this if warnings are turned on, though, because we
7322 # might delete some text which has been added. So that must
7323 # be handled when comments are created.
7324 if ( $rOpts->{'closing-side-comments'} ) {
7325 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
7326 $rOpts->{'delete-closing-side-comments'} = 1;
7330 # If closing side comments ARE NOT selected, but warnings ARE
7331 # selected and we ARE DELETING csc's, then we will pretend to be
7332 # adding with a huge interval. This will force the comments to be
7333 # generated for comparison with the old comments, but not added.
7334 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
7335 if ( $rOpts->{'delete-closing-side-comments'} ) {
7336 $rOpts->{'delete-closing-side-comments'} = 0;
7337 $rOpts->{'closing-side-comments'} = 1;
7338 $rOpts->{'closing-side-comment-interval'} = 100000000;
7343 make_block_brace_vertical_tightness_pattern();
7345 if ( $rOpts->{'line-up-parentheses'} ) {
7347 if ( $rOpts->{'indent-only'}
7348 || !$rOpts->{'add-newlines'}
7349 || !$rOpts->{'delete-old-newlines'} )
7352 -----------------------------------------------------------------------
7353 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
7355 The -lp indentation logic requires that perltidy be able to coordinate
7356 arbitrarily large numbers of line breakpoints. This isn't possible
7357 with these flags. Sometimes an acceptable workaround is to use -wocb=3
7358 -----------------------------------------------------------------------
7360 $rOpts->{'line-up-parentheses'} = 0;
7364 # At present, tabs are not compatable with the line-up-parentheses style
7365 # (it would be possible to entab the total leading whitespace
7366 # just prior to writing the line, if desired).
7367 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
7369 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
7371 $rOpts->{'tabs'} = 0;
7374 # Likewise, tabs are not compatable with outdenting..
7375 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
7377 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
7379 $rOpts->{'tabs'} = 0;
7382 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
7384 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
7386 $rOpts->{'tabs'} = 0;
7389 if ( !$rOpts->{'space-for-semicolon'} ) {
7390 $want_left_space{'f'} = -1;
7393 if ( $rOpts->{'space-terminal-semicolon'} ) {
7394 $want_left_space{';'} = 1;
7397 # implement outdenting preferences for keywords
7398 %outdent_keyword = ();
7399 unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
7400 @_ = qw(next last redo goto return); # defaults
7403 # FUTURE: if not a keyword, assume that it is an identifier
7405 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
7406 $outdent_keyword{$_} = 1;
7409 warn "ignoring '$_' in -okwl list; not a perl keyword";
7413 # implement user whitespace preferences
7414 if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
7415 @want_left_space{@_} = (1) x scalar(@_);
7418 if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
7419 @want_right_space{@_} = (1) x scalar(@_);
7422 if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
7423 @want_left_space{@_} = (-1) x scalar(@_);
7426 if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
7427 @want_right_space{@_} = (-1) x scalar(@_);
7429 if ( $rOpts->{'dump-want-left-space'} ) {
7430 dump_want_left_space(*STDOUT);
7434 if ( $rOpts->{'dump-want-right-space'} ) {
7435 dump_want_right_space(*STDOUT);
7439 # default keywords for which space is introduced before an opening paren
7440 # (at present, including them messes up vertical alignment)
7441 @_ = qw(my local our and or err eq ne if else elsif until
7442 unless while for foreach return switch case given when);
7443 @space_after_keyword{@_} = (1) x scalar(@_);
7445 # first remove any or all of these if desired
7446 if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7448 # -nsak='*' selects all the above keywords
7449 if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
7450 @space_after_keyword{@_} = (0) x scalar(@_);
7453 # then allow user to add to these defaults
7454 if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
7455 @space_after_keyword{@_} = (1) x scalar(@_);
7458 # implement user break preferences
7459 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7460 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7461 . : ? && || and or err xor
7464 my $break_after = sub {
7465 foreach my $tok (@_) {
7466 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
7467 my $lbs = $left_bond_strength{$tok};
7468 my $rbs = $right_bond_strength{$tok};
7469 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7470 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7476 my $break_before = sub {
7477 foreach my $tok (@_) {
7478 my $lbs = $left_bond_strength{$tok};
7479 my $rbs = $right_bond_strength{$tok};
7480 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7481 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7487 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7488 $break_before->(@all_operators)
7489 if ( $rOpts->{'break-before-all-operators'} );
7491 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7492 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7494 # make note if breaks are before certain key types
7495 %want_break_before = ();
7496 foreach my $tok ( @all_operators, ',' ) {
7497 $want_break_before{$tok} =
7498 $left_bond_strength{$tok} < $right_bond_strength{$tok};
7501 # Coordinate ?/: breaks, which must be similar
7502 if ( !$want_break_before{':'} ) {
7503 $want_break_before{'?'} = $want_break_before{':'};
7504 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7505 $left_bond_strength{'?'} = NO_BREAK;
7508 # Define here tokens which may follow the closing brace of a do statement
7509 # on the same line, as in:
7510 # } while ( $something);
7511 @_ = qw(until while unless if ; : );
7513 @is_do_follower{@_} = (1) x scalar(@_);
7515 # These tokens may follow the closing brace of an if or elsif block.
7516 # In other words, for cuddled else we want code to look like:
7517 # } elsif ( $something) {
7519 if ( $rOpts->{'cuddled-else'} ) {
7520 @_ = qw(else elsif);
7521 @is_if_brace_follower{@_} = (1) x scalar(@_);
7524 %is_if_brace_follower = ();
7527 # nothing can follow the closing curly of an else { } block:
7528 %is_else_brace_follower = ();
7530 # what can follow a multi-line anonymous sub definition closing curly:
7531 @_ = qw# ; : => or and && || ~~ !~~ ) #;
7533 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7535 # what can follow a one-line anonynomous sub closing curly:
7536 # one-line anonumous subs also have ']' here...
7537 # see tk3.t and PP.pm
7538 @_ = qw# ; : => or and && || ) ] ~~ !~~ #;
7540 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7542 # What can follow a closing curly of a block
7543 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7544 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7545 @_ = qw# ; : => or and && || ) #;
7548 # allow cuddled continue if cuddled else is specified
7549 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7551 @is_other_brace_follower{@_} = (1) x scalar(@_);
7553 $right_bond_strength{'{'} = WEAK;
7554 $left_bond_strength{'{'} = VERY_STRONG;
7556 # make -l=0 equal to -l=infinite
7557 if ( !$rOpts->{'maximum-line-length'} ) {
7558 $rOpts->{'maximum-line-length'} = 1000000;
7561 # make -lbl=0 equal to -lbl=infinite
7562 if ( !$rOpts->{'long-block-line-count'} ) {
7563 $rOpts->{'long-block-line-count'} = 1000000;
7566 my $ole = $rOpts->{'output-line-ending'};
7575 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7576 my $str = join " ", keys %endings;
7578 Unrecognized line ending '$ole'; expecting one of: $str
7581 if ( $rOpts->{'preserve-line-endings'} ) {
7582 warn "Ignoring -ple; conflicts with -ole\n";
7583 $rOpts->{'preserve-line-endings'} = undef;
7587 # hashes used to simplify setting whitespace
7589 '{' => $rOpts->{'brace-tightness'},
7590 '}' => $rOpts->{'brace-tightness'},
7591 '(' => $rOpts->{'paren-tightness'},
7592 ')' => $rOpts->{'paren-tightness'},
7593 '[' => $rOpts->{'square-bracket-tightness'},
7594 ']' => $rOpts->{'square-bracket-tightness'},
7603 # frequently used parameters
7604 $rOpts_add_newlines = $rOpts->{'add-newlines'};
7605 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
7606 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7607 $rOpts_block_brace_vertical_tightness =
7608 $rOpts->{'block-brace-vertical-tightness'};
7609 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
7610 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7611 $rOpts_break_at_old_ternary_breakpoints =
7612 $rOpts->{'break-at-old-ternary-breakpoints'};
7613 $rOpts_break_at_old_attribute_breakpoints =
7614 $rOpts->{'break-at-old-attribute-breakpoints'};
7615 $rOpts_break_at_old_comma_breakpoints =
7616 $rOpts->{'break-at-old-comma-breakpoints'};
7617 $rOpts_break_at_old_keyword_breakpoints =
7618 $rOpts->{'break-at-old-keyword-breakpoints'};
7619 $rOpts_break_at_old_logical_breakpoints =
7620 $rOpts->{'break-at-old-logical-breakpoints'};
7621 $rOpts_closing_side_comment_else_flag =
7622 $rOpts->{'closing-side-comment-else-flag'};
7623 $rOpts_closing_side_comment_maximum_text =
7624 $rOpts->{'closing-side-comment-maximum-text'};
7625 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7626 $rOpts_cuddled_else = $rOpts->{'cuddled-else'};
7627 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
7628 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
7629 $rOpts_indent_columns = $rOpts->{'indent-columns'};
7630 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
7631 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7632 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
7633 $rOpts_short_concatenation_item_length =
7634 $rOpts->{'short-concatenation-item-length'};
7635 $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
7636 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
7637 $rOpts_format_skipping = $rOpts->{'format-skipping'};
7638 $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
7639 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
7640 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7641 $half_maximum_line_length = $rOpts_maximum_line_length / 2;
7643 # Note that both opening and closing tokens can access the opening
7644 # and closing flags of their container types.
7645 %opening_vertical_tightness = (
7646 '(' => $rOpts->{'paren-vertical-tightness'},
7647 '{' => $rOpts->{'brace-vertical-tightness'},
7648 '[' => $rOpts->{'square-bracket-vertical-tightness'},
7649 ')' => $rOpts->{'paren-vertical-tightness'},
7650 '}' => $rOpts->{'brace-vertical-tightness'},
7651 ']' => $rOpts->{'square-bracket-vertical-tightness'},
7654 %closing_vertical_tightness = (
7655 '(' => $rOpts->{'paren-vertical-tightness-closing'},
7656 '{' => $rOpts->{'brace-vertical-tightness-closing'},
7657 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7658 ')' => $rOpts->{'paren-vertical-tightness-closing'},
7659 '}' => $rOpts->{'brace-vertical-tightness-closing'},
7660 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7663 # assume flag for '>' same as ')' for closing qw quotes
7664 %closing_token_indentation = (
7665 ')' => $rOpts->{'closing-paren-indentation'},
7666 '}' => $rOpts->{'closing-brace-indentation'},
7667 ']' => $rOpts->{'closing-square-bracket-indentation'},
7668 '>' => $rOpts->{'closing-paren-indentation'},
7671 # flag indicating if any closing tokens are indented
7672 $some_closing_token_indentation =
7673 $rOpts->{'closing-paren-indentation'}
7674 || $rOpts->{'closing-brace-indentation'}
7675 || $rOpts->{'closing-square-bracket-indentation'}
7676 || $rOpts->{'indent-closing-brace'};
7678 %opening_token_right = (
7679 '(' => $rOpts->{'opening-paren-right'},
7680 '{' => $rOpts->{'opening-hash-brace-right'},
7681 '[' => $rOpts->{'opening-square-bracket-right'},
7684 %stack_opening_token = (
7685 '(' => $rOpts->{'stack-opening-paren'},
7686 '{' => $rOpts->{'stack-opening-hash-brace'},
7687 '[' => $rOpts->{'stack-opening-square-bracket'},
7690 %stack_closing_token = (
7691 ')' => $rOpts->{'stack-closing-paren'},
7692 '}' => $rOpts->{'stack-closing-hash-brace'},
7693 ']' => $rOpts->{'stack-closing-square-bracket'},
7697 sub make_static_block_comment_pattern {
7699 # create the pattern used to identify static block comments
7700 $static_block_comment_pattern = '^\s*##';
7702 # allow the user to change it
7703 if ( $rOpts->{'static-block-comment-prefix'} ) {
7704 my $prefix = $rOpts->{'static-block-comment-prefix'};
7705 $prefix =~ s/^\s*//;
7706 my $pattern = $prefix;
7708 # user may give leading caret to force matching left comments only
7709 if ( $prefix !~ /^\^#/ ) {
7710 if ( $prefix !~ /^#/ ) {
7712 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7714 $pattern = '^\s*' . $prefix;
7716 eval "'##'=~/$pattern/";
7719 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7721 $static_block_comment_pattern = $pattern;
7725 sub make_format_skipping_pattern {
7726 my ( $opt_name, $default ) = @_;
7727 my $param = $rOpts->{$opt_name};
7728 unless ($param) { $param = $default }
7730 if ( $param !~ /^#/ ) {
7731 die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7733 my $pattern = '^' . $param . '\s';
7734 eval "'#'=~/$pattern/";
7737 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7742 sub make_closing_side_comment_list_pattern {
7744 # turn any input list into a regex for recognizing selected block types
7745 $closing_side_comment_list_pattern = '^\w+';
7746 if ( defined( $rOpts->{'closing-side-comment-list'} )
7747 && $rOpts->{'closing-side-comment-list'} )
7749 $closing_side_comment_list_pattern =
7750 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7754 sub make_bli_pattern {
7756 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7757 && $rOpts->{'brace-left-and-indent-list'} )
7759 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7762 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7765 sub make_block_brace_vertical_tightness_pattern {
7767 # turn any input list into a regex for recognizing selected block types
7768 $block_brace_vertical_tightness_pattern =
7769 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7771 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7772 && $rOpts->{'block-brace-vertical-tightness-list'} )
7774 $block_brace_vertical_tightness_pattern =
7775 make_block_pattern( '-bbvtl',
7776 $rOpts->{'block-brace-vertical-tightness-list'} );
7780 sub make_block_pattern {
7782 # given a string of block-type keywords, return a regex to match them
7783 # The only tricky part is that labels are indicated with a single ':'
7784 # and the 'sub' token text may have additional text after it (name of
7789 # input string: "if else elsif unless while for foreach do : sub";
7790 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7792 my ( $abbrev, $string ) = @_;
7793 my @list = split_words($string);
7799 if ( $i eq 'sub' ) {
7801 elsif ( $i eq ':' ) {
7802 push @words, '\w+:';
7804 elsif ( $i =~ /^\w/ ) {
7808 warn "unrecognized block type $i after $abbrev, ignoring\n";
7811 my $pattern = '(' . join( '|', @words ) . ')$';
7812 if ( $seen{'sub'} ) {
7813 $pattern = '(' . $pattern . '|sub)';
7815 $pattern = '^' . $pattern;
7819 sub make_static_side_comment_pattern {
7821 # create the pattern used to identify static side comments
7822 $static_side_comment_pattern = '^##';
7824 # allow the user to change it
7825 if ( $rOpts->{'static-side-comment-prefix'} ) {
7826 my $prefix = $rOpts->{'static-side-comment-prefix'};
7827 $prefix =~ s/^\s*//;
7828 my $pattern = '^' . $prefix;
7829 eval "'##'=~/$pattern/";
7832 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
7834 $static_side_comment_pattern = $pattern;
7838 sub make_closing_side_comment_prefix {
7840 # Be sure we have a valid closing side comment prefix
7841 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
7842 my $csc_prefix_pattern;
7843 if ( !defined($csc_prefix) ) {
7844 $csc_prefix = '## end';
7845 $csc_prefix_pattern = '^##\s+end';
7848 my $test_csc_prefix = $csc_prefix;
7849 if ( $test_csc_prefix !~ /^#/ ) {
7850 $test_csc_prefix = '#' . $test_csc_prefix;
7853 # make a regex to recognize the prefix
7854 my $test_csc_prefix_pattern = $test_csc_prefix;
7856 # escape any special characters
7857 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
7859 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
7861 # allow exact number of intermediate spaces to vary
7862 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
7864 # make sure we have a good pattern
7865 # if we fail this we probably have an error in escaping
7867 eval "'##'=~/$test_csc_prefix_pattern/";
7870 # shouldn't happen..must have screwed up escaping, above
7871 report_definite_bug();
7873 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
7875 # just warn and keep going with defaults
7876 warn "Please consider using a simpler -cscp prefix\n";
7877 warn "Using default -cscp instead; please check output\n";
7880 $csc_prefix = $test_csc_prefix;
7881 $csc_prefix_pattern = $test_csc_prefix_pattern;
7884 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
7885 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
7888 sub dump_want_left_space {
7892 These values are the main control of whitespace to the left of a token type;
7893 They may be altered with the -wls parameter.
7894 For a list of token types, use perltidy --dump-token-types (-dtt)
7895 1 means the token wants a space to its left
7896 -1 means the token does not want a space to its left
7897 ------------------------------------------------------------------------
7899 foreach ( sort keys %want_left_space ) {
7900 print $fh "$_\t$want_left_space{$_}\n";
7904 sub dump_want_right_space {
7908 These values are the main control of whitespace to the right of a token type;
7909 They may be altered with the -wrs parameter.
7910 For a list of token types, use perltidy --dump-token-types (-dtt)
7911 1 means the token wants a space to its right
7912 -1 means the token does not want a space to its right
7913 ------------------------------------------------------------------------
7915 foreach ( sort keys %want_right_space ) {
7916 print $fh "$_\t$want_right_space{$_}\n";
7920 { # begin is_essential_whitespace
7922 my %is_sort_grep_map;
7927 @_ = qw(sort grep map);
7928 @is_sort_grep_map{@_} = (1) x scalar(@_);
7930 @_ = qw(for foreach);
7931 @is_for_foreach{@_} = (1) x scalar(@_);
7935 sub is_essential_whitespace {
7937 # Essential whitespace means whitespace which cannot be safely deleted
7938 # without risking the introduction of a syntax error.
7939 # We are given three tokens and their types:
7940 # ($tokenl, $typel) is the token to the left of the space in question
7941 # ($tokenr, $typer) is the token to the right of the space in question
7942 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
7944 # This is a slow routine but is not needed too often except when -mangle
7947 # Note: This routine should almost never need to be changed. It is
7948 # for avoiding syntax problems rather than for formatting.
7949 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
7953 # never combine two bare words or numbers
7954 # examples: and ::ok(1)
7956 # for bla::bla:: abc
7957 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7958 # $input eq"quit" to make $inputeq"quit"
7959 # my $size=-s::SINK if $file; <==OK but we won't do it
7960 # don't join something like: for bla::bla:: abc
7961 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7962 ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
7964 # do not combine a number with a concatination dot
7965 # example: pom.caputo:
7966 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
7967 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
7968 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
7970 # do not join a minus with a bare word, because you might form
7971 # a file test operator. Example from Complex.pm:
7972 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
7973 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
7975 # and something like this could become ambiguous without space
7977 # use constant III=>1;
7981 || ( ( $tokenl eq '-' )
7982 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
7984 # '= -' should not become =- or you will get a warning
7986 # || ($tokenr eq '-')
7988 # keep a space between a quote and a bareword to prevent the
7989 # bareword from becomming a quote modifier.
7990 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7992 # keep a space between a token ending in '$' and any word;
7993 # this caused trouble: "die @$ if $@"
7994 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7995 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7997 # perl is very fussy about spaces before <<
7998 || ( $tokenr =~ /^\<\</ )
8000 # avoid combining tokens to create new meanings. Example:
8001 # $a+ +$b must not become $a++$b
8002 || ( $is_digraph{ $tokenl . $tokenr } )
8003 || ( $is_trigraph{ $tokenl . $tokenr } )
8005 # another example: do not combine these two &'s:
8006 # allow_options & &OPT_EXECCGI
8007 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
8009 # don't combine $$ or $# with any alphanumeric
8010 # (testfile mangle.t with --mangle)
8011 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
8013 # retain any space after possible filehandle
8014 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
8015 || ( $typel eq 'Z' )
8017 # Perl is sensitive to whitespace after the + here:
8018 # $b = xvals $a + 0.1 * yvals $a;
8019 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
8021 # keep paren separate in 'use Foo::Bar ()'
8025 && $tokenll eq 'use' )
8027 # keep any space between filehandle and paren:
8028 # file mangle.t with --mangle:
8029 || ( $typel eq 'Y' && $tokenr eq '(' )
8031 # retain any space after here doc operator ( hereerr.t)
8032 || ( $typel eq 'h' )
8034 # be careful with a space around ++ and --, to avoid ambiguity as to
8035 # which token it applies
8036 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
8037 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
8039 # need space after foreach my; for example, this will fail in
8040 # older versions of Perl:
8041 # foreach my$ft(@filetypes)...
8046 && $is_for_foreach{$tokenll}
8050 # must have space between grep and left paren; "grep(" will fail
8051 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
8053 # don't stick numbers next to left parens, as in:
8054 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
8055 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
8057 # We must be sure that a space between a ? and a quoted string
8058 # remains if the space before the ? remains. [Loca.pm, lockarea]
8060 # $b=join $comma ? ',' : ':', @_; # ok
8061 # $b=join $comma?',' : ':', @_; # ok!
8062 # $b=join $comma ?',' : ':', @_; # error!
8063 # Not really required:
8064 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
8066 # do not remove space between an '&' and a bare word because
8067 # it may turn into a function evaluation, like here
8068 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
8069 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
8070 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8072 ; # the value of this long logic sequence is the result we want
8077 sub set_white_space_flag {
8079 # This routine examines each pair of nonblank tokens and
8080 # sets values for array @white_space_flag.
8082 # $white_space_flag[$j] is a flag indicating whether a white space
8083 # BEFORE token $j is needed, with the following values:
8085 # -1 do not want a space before token $j
8086 # 0 optional space or $j is a whitespace
8087 # 1 want a space before token $j
8090 # The values for the first token will be defined based
8091 # upon the contents of the "to_go" output array.
8093 # Note: retain debug print statements because they are usually
8094 # required after adding new token types.
8098 # initialize these global hashes, which control the use of
8099 # whitespace around tokens:
8104 # %space_after_keyword
8106 # Many token types are identical to the tokens themselves.
8107 # See the tokenizer for a complete list. Here are some special types:
8109 # f = semicolon in for statement
8112 # Note that :: is excluded since it should be contained in an identifier
8113 # Note that '->' is excluded because it never gets space
8114 # parentheses and brackets are excluded since they are handled specially
8115 # curly braces are included but may be overridden by logic, such as
8118 # NEW_TOKENS: create a whitespace rule here. This can be as
8119 # simple as adding your new letter to @spaces_both_sides, for
8123 @is_opening_type{@_} = (1) x scalar(@_);
8126 @is_closing_type{@_} = (1) x scalar(@_);
8128 my @spaces_both_sides = qw"
8129 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
8130 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
8131 &&= ||= //= <=> A k f w F n C Y U G v
8134 my @spaces_left_side = qw"
8135 t ! ~ m p { \ h pp mm Z j
8137 push( @spaces_left_side, '#' ); # avoids warning message
8139 my @spaces_right_side = qw"
8140 ; } ) ] R J ++ -- **=
8142 push( @spaces_right_side, ',' ); # avoids warning message
8143 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
8144 @want_right_space{@spaces_both_sides} =
8145 (1) x scalar(@spaces_both_sides);
8146 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);
8147 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
8148 @want_left_space{@spaces_right_side} =
8149 (-1) x scalar(@spaces_right_side);
8150 @want_right_space{@spaces_right_side} =
8151 (1) x scalar(@spaces_right_side);
8152 $want_left_space{'L'} = WS_NO;
8153 $want_left_space{'->'} = WS_NO;
8154 $want_right_space{'->'} = WS_NO;
8155 $want_left_space{'**'} = WS_NO;
8156 $want_right_space{'**'} = WS_NO;
8158 # hash type information must stay tightly bound
8160 $binary_ws_rules{'i'}{'L'} = WS_NO;
8161 $binary_ws_rules{'i'}{'{'} = WS_YES;
8162 $binary_ws_rules{'k'}{'{'} = WS_YES;
8163 $binary_ws_rules{'U'}{'{'} = WS_YES;
8164 $binary_ws_rules{'i'}{'['} = WS_NO;
8165 $binary_ws_rules{'R'}{'L'} = WS_NO;
8166 $binary_ws_rules{'R'}{'{'} = WS_NO;
8167 $binary_ws_rules{'t'}{'L'} = WS_NO;
8168 $binary_ws_rules{'t'}{'{'} = WS_NO;
8169 $binary_ws_rules{'}'}{'L'} = WS_NO;
8170 $binary_ws_rules{'}'}{'{'} = WS_NO;
8171 $binary_ws_rules{'$'}{'L'} = WS_NO;
8172 $binary_ws_rules{'$'}{'{'} = WS_NO;
8173 $binary_ws_rules{'@'}{'L'} = WS_NO;
8174 $binary_ws_rules{'@'}{'{'} = WS_NO;
8175 $binary_ws_rules{'='}{'L'} = WS_YES;
8177 # the following includes ') {'
8178 # as in : if ( xxx ) { yyy }
8179 $binary_ws_rules{']'}{'L'} = WS_NO;
8180 $binary_ws_rules{']'}{'{'} = WS_NO;
8181 $binary_ws_rules{')'}{'{'} = WS_YES;
8182 $binary_ws_rules{')'}{'['} = WS_NO;
8183 $binary_ws_rules{']'}{'['} = WS_NO;
8184 $binary_ws_rules{']'}{'{'} = WS_NO;
8185 $binary_ws_rules{'}'}{'['} = WS_NO;
8186 $binary_ws_rules{'R'}{'['} = WS_NO;
8188 $binary_ws_rules{']'}{'++'} = WS_NO;
8189 $binary_ws_rules{']'}{'--'} = WS_NO;
8190 $binary_ws_rules{')'}{'++'} = WS_NO;
8191 $binary_ws_rules{')'}{'--'} = WS_NO;
8193 $binary_ws_rules{'R'}{'++'} = WS_NO;
8194 $binary_ws_rules{'R'}{'--'} = WS_NO;
8196 ########################################################
8197 # should no longer be necessary (see niek.pl)
8198 ##$binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label
8199 ##$binary_ws_rules{'w'}{':'} = WS_NO;
8200 ########################################################
8201 $binary_ws_rules{'i'}{'Q'} = WS_YES;
8202 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
8204 # FIXME: we need to split 'i' into variables and functions
8205 # and have no space for functions but space for variables. For now,
8206 # I have a special patch in the special rules below
8207 $binary_ws_rules{'i'}{'('} = WS_NO;
8209 $binary_ws_rules{'w'}{'('} = WS_NO;
8210 $binary_ws_rules{'w'}{'{'} = WS_YES;
8212 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
8213 my ( $last_token, $last_type, $last_block_type, $token, $type,
8215 my (@white_space_flag);
8216 my $j_tight_closing_paren = -1;
8218 if ( $max_index_to_go >= 0 ) {
8219 $token = $tokens_to_go[$max_index_to_go];
8220 $type = $types_to_go[$max_index_to_go];
8221 $block_type = $block_type_to_go[$max_index_to_go];
8229 # loop over all tokens
8232 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8234 if ( $$rtoken_type[$j] eq 'b' ) {
8235 $white_space_flag[$j] = WS_OPTIONAL;
8239 # set a default value, to be changed as needed
8241 $last_token = $token;
8243 $last_block_type = $block_type;
8244 $token = $$rtokens[$j];
8245 $type = $$rtoken_type[$j];
8246 $block_type = $$rblock_type[$j];
8248 #---------------------------------------------------------------
8250 # handle space on the inside of opening braces
8251 #---------------------------------------------------------------
8254 if ( $is_opening_type{$last_type} ) {
8256 $j_tight_closing_paren = -1;
8258 # let's keep empty matched braces together: () {} []
8260 if ( $token eq $matching_token{$last_token} ) {
8270 # we're considering the right of an opening brace
8271 # tightness = 0 means always pad inside with space
8272 # tightness = 1 means pad inside if "complex"
8273 # tightness = 2 means never pad inside with space
8276 if ( $last_type eq '{'
8277 && $last_token eq '{'
8278 && $last_block_type )
8280 $tightness = $rOpts_block_brace_tightness;
8282 else { $tightness = $tightness{$last_token} }
8284 #=================================================================
8285 # Patch for fabrice_bug.pl
8286 # We must always avoid spaces around a bare word beginning with ^ as in:
8287 # my $before = ${^PREMATCH};
8288 # Because all of the following cause an error in perl:
8289 # my $before = ${ ^PREMATCH };
8290 # my $before = ${ ^PREMATCH};
8291 # my $before = ${^PREMATCH };
8292 # So if brace tightness flag is -bt=0 we must temporarily reset to bt=1.
8293 # Note that here we must set tightness=1 and not 2 so that the closing space
8294 # is also avoided (via the $j_tight_closing_paren flag in coding)
8295 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
8297 #=================================================================
8299 if ( $tightness <= 0 ) {
8302 elsif ( $tightness > 1 ) {
8307 # Patch to count '-foo' as single token so that
8308 # each of $a{-foo} and $a{foo} and $a{'foo'} do
8309 # not get spaces with default formatting.
8313 && $last_token eq '{'
8314 && $$rtoken_type[ $j + 1 ] eq 'w' );
8316 # $j_next is where a closing token should be if
8317 # the container has a single token
8319 ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
8322 my $tok_next = $$rtokens[$j_next];
8323 my $type_next = $$rtoken_type[$j_next];
8325 # for tightness = 1, if there is just one token
8326 # within the matching pair, we will keep it tight
8328 $tok_next eq $matching_token{$last_token}
8330 # but watch out for this: [ [ ] (misc.t)
8331 && $last_token ne $token
8335 # remember where to put the space for the closing paren
8336 $j_tight_closing_paren = $j_next;
8344 } # done with opening braces and brackets
8346 if FORMATTER_DEBUG_FLAG_WHITE;
8348 #---------------------------------------------------------------
8350 # handle space on inside of closing brace pairs
8351 #---------------------------------------------------------------
8354 if ( $is_closing_type{$type} ) {
8356 if ( $j == $j_tight_closing_paren ) {
8358 $j_tight_closing_paren = -1;
8363 if ( !defined($ws) ) {
8366 if ( $type eq '}' && $token eq '}' && $block_type ) {
8367 $tightness = $rOpts_block_brace_tightness;
8369 else { $tightness = $tightness{$token} }
8371 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
8377 if FORMATTER_DEBUG_FLAG_WHITE;
8379 #---------------------------------------------------------------
8381 # use the binary table
8382 #---------------------------------------------------------------
8383 if ( !defined($ws) ) {
8384 $ws = $binary_ws_rules{$last_type}{$type};
8387 if FORMATTER_DEBUG_FLAG_WHITE;
8389 #---------------------------------------------------------------
8391 # some special cases
8392 #---------------------------------------------------------------
8393 if ( $token eq '(' ) {
8395 # This will have to be tweaked as tokenization changes.
8396 # We usually want a space at '} (', for example:
8397 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
8400 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
8401 # At present, the above & block is marked as type L/R so this case
8402 # won't go through here.
8403 if ( $last_type eq '}' ) { $ws = WS_YES }
8405 # NOTE: some older versions of Perl had occasional problems if
8406 # spaces are introduced between keywords or functions and opening
8407 # parens. So the default is not to do this except is certain
8408 # cases. The current Perl seems to tolerate spaces.
8410 # Space between keyword and '('
8411 elsif ( $last_type eq 'k' ) {
8413 unless ( $rOpts_space_keyword_paren
8414 || $space_after_keyword{$last_token} );
8417 # Space between function and '('
8418 # -----------------------------------------------------
8419 # 'w' and 'i' checks for something like:
8420 # myfun( &myfun( ->myfun(
8421 # -----------------------------------------------------
8422 elsif (( $last_type =~ /^[wUG]$/ )
8423 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
8425 $ws = WS_NO unless ($rOpts_space_function_paren);
8428 # space between something like $i and ( in
8429 # for $i ( 0 .. 20 ) {
8430 # FIXME: eventually, type 'i' needs to be split into multiple
8431 # token types so this can be a hardwired rule.
8432 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
8436 # allow constant function followed by '()' to retain no space
8437 elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
8442 # patch for SWITCH/CASE: make space at ']{' optional
8443 # since the '{' might begin a case or when block
8444 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
8448 # keep space between 'sub' and '{' for anonymous sub definition
8449 if ( $type eq '{' ) {
8450 if ( $last_token eq 'sub' ) {
8454 # this is needed to avoid no space in '){'
8455 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
8457 # avoid any space before the brace or bracket in something like
8458 # @opts{'a','b',...}
8459 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
8464 elsif ( $type eq 'i' ) {
8466 # never a space before ->
8467 if ( $token =~ /^\-\>/ ) {
8472 # retain any space between '-' and bare word
8473 elsif ( $type eq 'w' || $type eq 'C' ) {
8474 $ws = WS_OPTIONAL if $last_type eq '-';
8476 # never a space before ->
8477 if ( $token =~ /^\-\>/ ) {
8482 # retain any space between '-' and bare word
8483 # example: avoid space between 'USER' and '-' here:
8484 # $myhash{USER-NAME}='steve';
8485 elsif ( $type eq 'm' || $type eq '-' ) {
8486 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
8489 # always space before side comment
8490 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8492 # always preserver whatever space was used after a possible
8493 # filehandle (except _) or here doc operator
8496 && ( ( $last_type eq 'Z' && $last_token ne '_' )
8497 || $last_type eq 'h' )
8504 if FORMATTER_DEBUG_FLAG_WHITE;
8506 #---------------------------------------------------------------
8508 # default rules not covered above
8509 #---------------------------------------------------------------
8510 # if we fall through to here,
8511 # look at the pre-defined hash tables for the two tokens, and
8512 # if (they are equal) use the common value
8513 # if (either is zero or undef) use the other
8514 # if (either is -1) use it
8528 if ( !defined($ws) ) {
8529 my $wl = $want_left_space{$type};
8530 my $wr = $want_right_space{$last_type};
8531 if ( !defined($wl) ) { $wl = 0 }
8532 if ( !defined($wr) ) { $wr = 0 }
8533 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8536 if ( !defined($ws) ) {
8539 "WS flag is undefined for tokens $last_token $token\n");
8542 # Treat newline as a whitespace. Otherwise, we might combine
8543 # 'Send' and '-recipients' here according to the above rules:
8544 # my $msg = new Fax::Send
8545 # -recipients => $to,
8547 if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8552 && ( $last_type !~ /^[Zh]$/ ) )
8555 # If this happens, we have a non-fatal but undesirable
8556 # hole in the above rules which should be patched.
8558 "WS flag is zero for tokens $last_token $token\n");
8560 $white_space_flag[$j] = $ws;
8562 FORMATTER_DEBUG_FLAG_WHITE && do {
8563 my $str = substr( $last_token, 0, 15 );
8564 $str .= ' ' x ( 16 - length($str) );
8565 if ( !defined($ws_1) ) { $ws_1 = "*" }
8566 if ( !defined($ws_2) ) { $ws_2 = "*" }
8567 if ( !defined($ws_3) ) { $ws_3 = "*" }
8568 if ( !defined($ws_4) ) { $ws_4 = "*" }
8570 "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8573 return \@white_space_flag;
8576 { # begin print_line_of_tokens
8583 my $rcontainer_type;
8584 my $rcontainer_environment;
8587 my $rnesting_tokens;
8589 my $rnesting_blocks;
8592 my $python_indentation_level;
8594 # These local token variables are stored by store_token_to_go:
8597 my $container_environment;
8599 my $in_continued_quote;
8602 my $no_internal_newlines;
8608 # routine to pull the jth token from the line of tokens
8611 $token = $$rtokens[$j];
8612 $type = $$rtoken_type[$j];
8613 $block_type = $$rblock_type[$j];
8614 $container_type = $$rcontainer_type[$j];
8615 $container_environment = $$rcontainer_environment[$j];
8616 $type_sequence = $$rtype_sequence[$j];
8617 $level = $$rlevels[$j];
8618 $slevel = $$rslevels[$j];
8619 $nesting_blocks = $$rnesting_blocks[$j];
8620 $ci_level = $$rci_levels[$j];
8626 sub save_current_token {
8629 $block_type, $ci_level,
8630 $container_environment, $container_type,
8631 $in_continued_quote, $level,
8632 $nesting_blocks, $no_internal_newlines,
8634 $type, $type_sequence,
8638 sub restore_current_token {
8640 $block_type, $ci_level,
8641 $container_environment, $container_type,
8642 $in_continued_quote, $level,
8643 $nesting_blocks, $no_internal_newlines,
8645 $type, $type_sequence,
8650 # Routine to place the current token into the output stream.
8651 # Called once per output token.
8652 sub store_token_to_go {
8654 my $flag = $no_internal_newlines;
8655 if ( $_[0] ) { $flag = 1 }
8657 $tokens_to_go[ ++$max_index_to_go ] = $token;
8658 $types_to_go[$max_index_to_go] = $type;
8659 $nobreak_to_go[$max_index_to_go] = $flag;
8660 $old_breakpoint_to_go[$max_index_to_go] = 0;
8661 $forced_breakpoint_to_go[$max_index_to_go] = 0;
8662 $block_type_to_go[$max_index_to_go] = $block_type;
8663 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
8664 $container_environment_to_go[$max_index_to_go] = $container_environment;
8665 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;
8666 $ci_levels_to_go[$max_index_to_go] = $ci_level;
8667 $mate_index_to_go[$max_index_to_go] = -1;
8668 $matching_token_to_go[$max_index_to_go] = '';
8669 $bond_strength_to_go[$max_index_to_go] = 0;
8671 # Note: negative levels are currently retained as a diagnostic so that
8672 # the 'final indentation level' is correctly reported for bad scripts.
8673 # But this means that every use of $level as an index must be checked.
8674 # If this becomes too much of a problem, we might give up and just clip
8676 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
8677 $levels_to_go[$max_index_to_go] = $level;
8678 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
8679 $lengths_to_go[ $max_index_to_go + 1 ] =
8680 $lengths_to_go[$max_index_to_go] + length($token);
8682 # Define the indentation that this token would have if it started
8683 # a new line. We have to do this now because we need to know this
8684 # when considering one-line blocks.
8685 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
8687 if ( $type ne 'b' ) {
8688 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
8689 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
8690 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
8691 $last_nonblank_index_to_go = $max_index_to_go;
8692 $last_nonblank_type_to_go = $type;
8693 $last_nonblank_token_to_go = $token;
8694 if ( $type eq ',' ) {
8695 $comma_count_in_batch++;
8699 FORMATTER_DEBUG_FLAG_STORE && do {
8700 my ( $a, $b, $c ) = caller();
8702 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
8706 sub insert_new_token_to_go {
8708 # insert a new token into the output stream. use same level as
8709 # previous token; assumes a character at max_index_to_go.
8710 save_current_token();
8711 ( $token, $type, $slevel, $no_internal_newlines ) = @_;
8713 if ( $max_index_to_go == UNDEFINED_INDEX ) {
8714 warning("code bug: bad call to insert_new_token_to_go\n");
8716 $level = $levels_to_go[$max_index_to_go];
8718 # FIXME: it seems to be necessary to use the next, rather than
8719 # previous, value of this variable when creating a new blank (align.t)
8720 #my $slevel = $nesting_depth_to_go[$max_index_to_go];
8721 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
8722 $ci_level = $ci_levels_to_go[$max_index_to_go];
8723 $container_environment = $container_environment_to_go[$max_index_to_go];
8724 $in_continued_quote = 0;
8726 $type_sequence = "";
8727 store_token_to_go();
8728 restore_current_token();
8732 sub print_line_of_tokens {
8734 my $line_of_tokens = shift;
8736 # This routine is called once per input line to process all of
8737 # the tokens on that line. This is the first stage of
8740 # Full-line comments and blank lines may be processed immediately.
8742 # For normal lines of code, the tokens are stored one-by-one,
8743 # via calls to 'sub store_token_to_go', until a known line break
8744 # point is reached. Then, the batch of collected tokens is
8745 # passed along to 'sub output_line_to_go' for further
8746 # processing. This routine decides if there should be
8747 # whitespace between each pair of non-white tokens, so later
8748 # routines only need to decide on any additional line breaks.
8749 # Any whitespace is initally a single space character. Later,
8750 # the vertical aligner may expand that to be multiple space
8751 # characters if necessary for alignment.
8753 # extract input line number for error messages
8754 $input_line_number = $line_of_tokens->{_line_number};
8756 $rtoken_type = $line_of_tokens->{_rtoken_type};
8757 $rtokens = $line_of_tokens->{_rtokens};
8758 $rlevels = $line_of_tokens->{_rlevels};
8759 $rslevels = $line_of_tokens->{_rslevels};
8760 $rblock_type = $line_of_tokens->{_rblock_type};
8761 $rcontainer_type = $line_of_tokens->{_rcontainer_type};
8762 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
8763 $rtype_sequence = $line_of_tokens->{_rtype_sequence};
8764 $input_line = $line_of_tokens->{_line_text};
8765 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
8766 $rci_levels = $line_of_tokens->{_rci_levels};
8767 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
8769 $in_continued_quote = $starting_in_quote =
8770 $line_of_tokens->{_starting_in_quote};
8771 $in_quote = $line_of_tokens->{_ending_in_quote};
8772 $ending_in_quote = $in_quote;
8773 $python_indentation_level =
8774 $line_of_tokens->{_python_indentation_level};
8779 my $next_nonblank_token;
8780 my $next_nonblank_token_type;
8781 my $rwhite_space_flag;
8783 $jmax = @$rtokens - 1;
8785 $container_type = "";
8786 $container_environment = "";
8787 $type_sequence = "";
8788 $no_internal_newlines = 1 - $rOpts_add_newlines;
8789 $is_static_block_comment = 0;
8791 # Handle a continued quote..
8792 if ($in_continued_quote) {
8794 # A line which is entirely a quote or pattern must go out
8795 # verbatim. Note: the \n is contained in $input_line.
8797 if ( ( $input_line =~ "\t" ) ) {
8798 note_embedded_tab();
8800 write_unindented_line("$input_line");
8801 $last_line_had_side_comment = 0;
8805 # prior to version 20010406, perltidy had a bug which placed
8806 # continuation indentation before the last line of some multiline
8807 # quotes and patterns -- exactly the lines passing this way.
8808 # To help find affected lines in scripts run with these
8809 # versions, run with '-chk', and it will warn of any quotes or
8810 # patterns which might have been modified by these early
8812 if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
8814 "-chk: please check this line for extra leading whitespace\n"
8819 # Write line verbatim if we are in a formatting skip section
8820 if ($in_format_skipping_section) {
8821 write_unindented_line("$input_line");
8822 $last_line_had_side_comment = 0;
8824 # Note: extra space appended to comment simplifies pattern matching
8826 && $$rtoken_type[0] eq '#'
8827 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
8829 $in_format_skipping_section = 0;
8830 write_logfile_entry("Exiting formatting skip section\n");
8831 $file_writer_object->reset_consecutive_blank_lines();
8836 # See if we are entering a formatting skip section
8837 if ( $rOpts_format_skipping
8839 && $$rtoken_type[0] eq '#'
8840 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
8843 $in_format_skipping_section = 1;
8844 write_logfile_entry("Entering formatting skip section\n");
8845 write_unindented_line("$input_line");
8846 $last_line_had_side_comment = 0;
8850 # delete trailing blank tokens
8851 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
8853 # Handle a blank line..
8856 # If keep-old-blank-lines is zero, we delete all
8857 # old blank lines and let the blank line rules generate any
8859 if ($rOpts_keep_old_blank_lines) {
8861 $file_writer_object->write_blank_code_line(
8862 $rOpts_keep_old_blank_lines == 2 );
8863 $last_line_leading_type = 'b';
8865 $last_line_had_side_comment = 0;
8869 # see if this is a static block comment (starts with ## by default)
8870 my $is_static_block_comment_without_leading_space = 0;
8872 && $$rtoken_type[0] eq '#'
8873 && $rOpts->{'static-block-comments'}
8874 && $input_line =~ /$static_block_comment_pattern/o )
8876 $is_static_block_comment = 1;
8877 $is_static_block_comment_without_leading_space =
8878 substr( $input_line, 0, 1 ) eq '#';
8881 # Check for comments which are line directives
8882 # Treat exactly as static block comments without leading space
8883 # reference: perlsyn, near end, section Plain Old Comments (Not!)
8884 # example: '# line 42 "new_filename.plx"'
8887 && $$rtoken_type[0] eq '#'
8888 && $input_line =~ /^\# \s*
8890 (?:\s("?)([^"]+)\2)? \s*
8894 $is_static_block_comment = 1;
8895 $is_static_block_comment_without_leading_space = 1;
8898 # create a hanging side comment if appropriate
8901 && $$rtoken_type[0] eq '#' # only token is a comment
8902 && $last_line_had_side_comment # last line had side comment
8903 && $input_line =~ /^\s/ # there is some leading space
8904 && !$is_static_block_comment # do not make static comment hanging
8905 && $rOpts->{'hanging-side-comments'} # user is allowing
8906 # hanging side comments
8911 # We will insert an empty qw string at the start of the token list
8912 # to force this comment to be a side comment. The vertical aligner
8913 # should then line it up with the previous side comment.
8914 unshift @$rtoken_type, 'q';
8915 unshift @$rtokens, '';
8916 unshift @$rlevels, $$rlevels[0];
8917 unshift @$rslevels, $$rslevels[0];
8918 unshift @$rblock_type, '';
8919 unshift @$rcontainer_type, '';
8920 unshift @$rcontainer_environment, '';
8921 unshift @$rtype_sequence, '';
8922 unshift @$rnesting_tokens, $$rnesting_tokens[0];
8923 unshift @$rci_levels, $$rci_levels[0];
8924 unshift @$rnesting_blocks, $$rnesting_blocks[0];
8928 # remember if this line has a side comment
8929 $last_line_had_side_comment =
8930 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
8932 # Handle a block (full-line) comment..
8933 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
8935 if ( $rOpts->{'delete-block-comments'} ) { return }
8937 if ( $rOpts->{'tee-block-comments'} ) {
8938 $file_writer_object->tee_on();
8941 destroy_one_line_block();
8942 output_line_to_go();
8944 # output a blank line before block comments
8946 # unless we follow a blank or comment line
8947 $last_line_leading_type !~ /^[#b]$/
8950 && $rOpts->{'blanks-before-comments'}
8952 # not if this is an empty comment line
8953 && $$rtokens[0] ne '#'
8955 # not after a short line ending in an opening token
8956 # because we already have space above this comment.
8957 # Note that the first comment in this if block, after
8958 # the 'if (', does not get a blank line because of this.
8959 && !$last_output_short_opening_token
8961 # never before static block comments
8962 && !$is_static_block_comment
8965 flush(); # switching to new output stream
8966 $file_writer_object->write_blank_code_line();
8967 $last_line_leading_type = 'b';
8970 # TRIM COMMENTS -- This could be turned off as a option
8971 $$rtokens[0] =~ s/\s*$//; # trim right end
8974 $rOpts->{'indent-block-comments'}
8975 && ( !$rOpts->{'indent-spaced-block-comments'}
8976 || $input_line =~ /^\s+/ )
8977 && !$is_static_block_comment_without_leading_space
8981 store_token_to_go();
8982 output_line_to_go();
8985 flush(); # switching to new output stream
8986 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
8987 $last_line_leading_type = '#';
8989 if ( $rOpts->{'tee-block-comments'} ) {
8990 $file_writer_object->tee_off();
8995 # compare input/output indentation except for continuation lines
8996 # (because they have an unknown amount of initial blank space)
8997 # and lines which are quotes (because they may have been outdented)
8998 # Note: this test is placed here because we know the continuation flag
8999 # at this point, which allows us to avoid non-meaningful checks.
9000 my $structural_indentation_level = $$rlevels[0];
9001 compare_indentation_levels( $python_indentation_level,
9002 $structural_indentation_level )
9003 unless ( $python_indentation_level < 0
9004 || ( $$rci_levels[0] > 0 )
9005 || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
9008 # Patch needed for MakeMaker. Do not break a statement
9009 # in which $VERSION may be calculated. See MakeMaker.pm;
9010 # this is based on the coding in it.
9011 # The first line of a file that matches this will be eval'd:
9012 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9014 # *VERSION = \'1.01';
9015 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
9016 # We will pass such a line straight through without breaking
9017 # it unless -npvl is used
9019 my $is_VERSION_statement = 0;
9022 !$saw_VERSION_in_this_file
9023 && $input_line =~ /VERSION/ # quick check to reject most lines
9024 && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9027 $saw_VERSION_in_this_file = 1;
9028 $is_VERSION_statement = 1;
9029 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
9030 $no_internal_newlines = 1;
9033 # take care of indentation-only
9034 # NOTE: In previous versions we sent all qw lines out immediately here.
9035 # No longer doing this: also write a line which is entirely a 'qw' list
9036 # to allow stacking of opening and closing tokens. Note that interior
9037 # qw lines will still go out at the end of this routine.
9038 if ( $rOpts->{'indent-only'} ) {
9043 $token = $input_line;
9046 $container_type = "";
9047 $container_environment = "";
9048 $type_sequence = "";
9049 store_token_to_go();
9050 output_line_to_go();
9054 push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding
9055 push( @$rtoken_type, 'b', 'b' );
9056 ($rwhite_space_flag) =
9057 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
9059 # find input tabbing to allow checks for tabbing disagreement
9061 ##$input_line_tabbing = "";
9062 ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
9064 # if the buffer hasn't been flushed, add a leading space if
9065 # necessary to keep essential whitespace. This is really only
9066 # necessary if we are squeezing out all ws.
9067 if ( $max_index_to_go >= 0 ) {
9069 $old_line_count_in_batch++;
9072 is_essential_whitespace(
9073 $last_last_nonblank_token,
9074 $last_last_nonblank_type,
9075 $tokens_to_go[$max_index_to_go],
9076 $types_to_go[$max_index_to_go],
9082 my $slevel = $$rslevels[0];
9083 insert_new_token_to_go( ' ', 'b', $slevel,
9084 $no_internal_newlines );
9088 # If we just saw the end of an elsif block, write nag message
9089 # if we do not see another elseif or an else.
9090 if ($looking_for_else) {
9092 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
9093 write_logfile_entry("(No else block)\n");
9095 $looking_for_else = 0;
9098 # This is a good place to kill incomplete one-line blocks
9099 if ( ( $semicolons_before_block_self_destruct == 0 )
9100 && ( $max_index_to_go >= 0 )
9101 && ( $types_to_go[$max_index_to_go] eq ';' )
9102 && ( $$rtokens[0] ne '}' ) )
9104 destroy_one_line_block();
9105 output_line_to_go();
9108 # loop to process the tokens one-by-one
9112 foreach $j ( 0 .. $jmax ) {
9114 # pull out the local values for this token
9117 if ( $type eq '#' ) {
9119 # trim trailing whitespace
9120 # (there is no option at present to prevent this)
9124 $rOpts->{'delete-side-comments'}
9126 # delete closing side comments if necessary
9127 || ( $rOpts->{'delete-closing-side-comments'}
9128 && $token =~ /$closing_side_comment_prefix_pattern/o
9129 && $last_nonblank_block_type =~
9130 /$closing_side_comment_list_pattern/o )
9133 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9134 unstore_token_to_go();
9140 # If we are continuing after seeing a right curly brace, flush
9141 # buffer unless we see what we are looking for, as in
9143 if ( $rbrace_follower && $type ne 'b' ) {
9145 unless ( $rbrace_follower->{$token} ) {
9146 output_line_to_go();
9148 $rbrace_follower = undef;
9151 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
9152 $next_nonblank_token = $$rtokens[$j_next];
9153 $next_nonblank_token_type = $$rtoken_type[$j_next];
9155 #--------------------------------------------------------
9156 # Start of section to patch token text
9157 #--------------------------------------------------------
9159 # Modify certain tokens here for whitespace
9160 # The following is not yet done, but could be:
9162 if ( $type =~ /^[wit]$/ ) {
9165 # change '$ var' to '$var' etc
9166 # '-> new' to '->new'
9167 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
9171 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
9173 # trim identifiers of trailing blanks which can occur
9174 # under some unusual circumstances, such as if the
9175 # identifier 'witch' has trailing blanks on input here:
9179 # () # prototype may be on new line ...
9181 if ( $type eq 'i' ) { $token =~ s/\s+$//g }
9184 # change 'LABEL :' to 'LABEL:'
9185 elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
9187 # patch to add space to something like "x10"
9188 # This avoids having to split this token in the pre-tokenizer
9189 elsif ( $type eq 'n' ) {
9190 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
9193 elsif ( $type eq 'Q' ) {
9194 note_embedded_tab() if ( $token =~ "\t" );
9196 # make note of something like '$var = s/xxx/yyy/;'
9197 # in case it should have been '$var =~ s/xxx/yyy/;'
9199 $token =~ /^(s|tr|y|m|\/)/
9200 && $last_nonblank_token =~ /^(=|==|!=)$/
9202 # precededed by simple scalar
9203 && $last_last_nonblank_type eq 'i'
9204 && $last_last_nonblank_token =~ /^\$/
9206 # followed by some kind of termination
9207 # (but give complaint if we can's see far enough ahead)
9208 && $next_nonblank_token =~ /^[; \)\}]$/
9210 # scalar is not decleared
9212 $types_to_go[0] eq 'k'
9213 && $tokens_to_go[0] =~ /^(my|our|local)$/
9217 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
9219 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
9224 # trim blanks from right of qw quotes
9225 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
9226 elsif ( $type eq 'q' ) {
9228 note_embedded_tab() if ( $token =~ "\t" );
9231 #--------------------------------------------------------
9232 # End of section to patch token text
9233 #--------------------------------------------------------
9235 # insert any needed whitespace
9236 if ( ( $type ne 'b' )
9237 && ( $max_index_to_go >= 0 )
9238 && ( $types_to_go[$max_index_to_go] ne 'b' )
9239 && $rOpts_add_whitespace )
9241 my $ws = $$rwhite_space_flag[$j];
9244 insert_new_token_to_go( ' ', 'b', $slevel,
9245 $no_internal_newlines );
9249 # Do not allow breaks which would promote a side comment to a
9250 # block comment. In order to allow a break before an opening
9251 # or closing BLOCK, followed by a side comment, those sections
9252 # of code will handle this flag separately.
9253 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
9254 my $is_opening_BLOCK =
9258 && $block_type ne 't' );
9259 my $is_closing_BLOCK =
9263 && $block_type ne 't' );
9265 if ( $side_comment_follows
9266 && !$is_opening_BLOCK
9267 && !$is_closing_BLOCK )
9269 $no_internal_newlines = 1;
9272 # We're only going to handle breaking for code BLOCKS at this
9273 # (top) level. Other indentation breaks will be handled by
9274 # sub scan_list, which is better suited to dealing with them.
9275 if ($is_opening_BLOCK) {
9277 # Tentatively output this token. This is required before
9278 # calling starting_one_line_block. We may have to unstore
9279 # it, though, if we have to break before it.
9280 store_token_to_go($side_comment_follows);
9282 # Look ahead to see if we might form a one-line block
9284 starting_one_line_block( $j, $jmax, $level, $slevel,
9285 $ci_level, $rtokens, $rtoken_type, $rblock_type );
9286 clear_breakpoint_undo_stack();
9288 # to simplify the logic below, set a flag to indicate if
9289 # this opening brace is far from the keyword which introduces it
9290 my $keyword_on_same_line = 1;
9291 if ( ( $max_index_to_go >= 0 )
9292 && ( $last_nonblank_type eq ')' ) )
9294 if ( $block_type =~ /^(if|else|elsif)$/
9295 && ( $tokens_to_go[0] eq '}' )
9296 && $rOpts_cuddled_else )
9298 $keyword_on_same_line = 1;
9300 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
9302 $keyword_on_same_line = 0;
9306 # decide if user requested break before '{'
9309 # use -bl flag if not a sub block of any type
9310 $block_type !~ /^sub/
9311 ? $rOpts->{'opening-brace-on-new-line'}
9313 # use -sbl flag for a named sub block
9314 : $block_type !~ /^sub\W*$/
9315 ? $rOpts->{'opening-sub-brace-on-new-line'}
9317 # use -asbl flag for an anonymous sub block
9318 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
9320 # Break before an opening '{' ...
9326 # and we were unable to start looking for a block,
9327 && $index_start_one_line_block == UNDEFINED_INDEX
9329 # or if it will not be on same line as its keyword, so that
9330 # it will be outdented (eval.t, overload.t), and the user
9331 # has not insisted on keeping it on the right
9332 || ( !$keyword_on_same_line
9333 && !$rOpts->{'opening-brace-always-on-right'} )
9338 # but only if allowed
9339 unless ($no_internal_newlines) {
9341 # since we already stored this token, we must unstore it
9342 unstore_token_to_go();
9344 # then output the line
9345 output_line_to_go();
9347 # and now store this token at the start of a new line
9348 store_token_to_go($side_comment_follows);
9352 # Now update for side comment
9353 if ($side_comment_follows) { $no_internal_newlines = 1 }
9355 # now output this line
9356 unless ($no_internal_newlines) {
9357 output_line_to_go();
9361 elsif ($is_closing_BLOCK) {
9363 # If there is a pending one-line block ..
9364 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9366 # we have to terminate it if..
9369 # it is too long (final length may be different from
9370 # initial estimate). note: must allow 1 space for this token
9371 excess_line_length( $index_start_one_line_block,
9372 $max_index_to_go ) >= 0
9374 # or if it has too many semicolons
9375 || ( $semicolons_before_block_self_destruct == 0
9376 && $last_nonblank_type ne ';' )
9379 destroy_one_line_block();
9383 # put a break before this closing curly brace if appropriate
9384 unless ( $no_internal_newlines
9385 || $index_start_one_line_block != UNDEFINED_INDEX )
9388 # add missing semicolon if ...
9389 # there are some tokens
9391 ( $max_index_to_go > 0 )
9393 # and we don't have one
9394 && ( $last_nonblank_type ne ';' )
9396 # patch until some block type issues are fixed:
9397 # Do not add semi-colon for block types '{',
9398 # '}', and ';' because we cannot be sure yet
9399 # that this is a block and not an anonomyous
9400 # hash (blktype.t, blktype1.t)
9401 && ( $block_type !~ /^[\{\};]$/ )
9403 # patch: and do not add semi-colons for recently
9404 # added block types (see tmp/semicolon.t)
9406 /^(switch|case|given|when|default)$/ )
9408 # it seems best not to add semicolons in these
9409 # special block types: sort|map|grep
9410 && ( !$is_sort_map_grep{$block_type} )
9412 # and we are allowed to do so.
9413 && $rOpts->{'add-semicolons'}
9417 save_current_token();
9420 $level = $levels_to_go[$max_index_to_go];
9421 $slevel = $nesting_depth_to_go[$max_index_to_go];
9423 $nesting_blocks_to_go[$max_index_to_go];
9424 $ci_level = $ci_levels_to_go[$max_index_to_go];
9426 $container_type = "";
9427 $container_environment = "";
9428 $type_sequence = "";
9430 # Note - we remove any blank AFTER extracting its
9431 # parameters such as level, etc, above
9432 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9433 unstore_token_to_go();
9435 store_token_to_go();
9437 note_added_semicolon();
9438 restore_current_token();
9441 # then write out everything before this closing curly brace
9442 output_line_to_go();
9446 # Now update for side comment
9447 if ($side_comment_follows) { $no_internal_newlines = 1 }
9449 # store the closing curly brace
9450 store_token_to_go();
9452 # ok, we just stored a closing curly brace. Often, but
9453 # not always, we want to end the line immediately.
9454 # So now we have to check for special cases.
9456 # if this '}' successfully ends a one-line block..
9457 my $is_one_line_block = 0;
9459 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9461 # Remember the type of token just before the
9462 # opening brace. It would be more general to use
9463 # a stack, but this will work for one-line blocks.
9464 $is_one_line_block =
9465 $types_to_go[$index_start_one_line_block];
9467 # we have to actually make it by removing tentative
9468 # breaks that were set within it
9469 undo_forced_breakpoint_stack(0);
9470 set_nobreaks( $index_start_one_line_block,
9471 $max_index_to_go - 1 );
9473 # then re-initialize for the next one-line block
9474 destroy_one_line_block();
9476 # then decide if we want to break after the '}' ..
9477 # We will keep going to allow certain brace followers as in:
9478 # do { $ifclosed = 1; last } unless $losing;
9480 # But make a line break if the curly ends a
9481 # significant block:
9483 $is_block_without_semicolon{$block_type}
9485 # if needless semicolon follows we handle it later
9486 && $next_nonblank_token ne ';'
9489 output_line_to_go() unless ($no_internal_newlines);
9493 # set string indicating what we need to look for brace follower
9495 if ( $block_type eq 'do' ) {
9496 $rbrace_follower = \%is_do_follower;
9498 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
9499 $rbrace_follower = \%is_if_brace_follower;
9501 elsif ( $block_type eq 'else' ) {
9502 $rbrace_follower = \%is_else_brace_follower;
9505 # added eval for borris.t
9506 elsif ($is_sort_map_grep_eval{$block_type}
9507 || $is_one_line_block eq 'G' )
9509 $rbrace_follower = undef;
9514 elsif ( $block_type =~ /^sub\W*$/ ) {
9516 if ($is_one_line_block) {
9517 $rbrace_follower = \%is_anon_sub_1_brace_follower;
9520 $rbrace_follower = \%is_anon_sub_brace_follower;
9524 # None of the above: specify what can follow a closing
9525 # brace of a block which is not an
9526 # if/elsif/else/do/sort/map/grep/eval
9528 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
9530 $rbrace_follower = \%is_other_brace_follower;
9533 # See if an elsif block is followed by another elsif or else;
9535 if ( $block_type eq 'elsif' ) {
9537 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
9538 $looking_for_else = 1; # ok, check on next line
9542 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
9543 write_logfile_entry("No else block :(\n");
9548 # keep going after certain block types (map,sort,grep,eval)
9549 # added eval for borris.t
9555 # if no more tokens, postpone decision until re-entring
9556 elsif ( ( $next_nonblank_token_type eq 'b' )
9557 && $rOpts_add_newlines )
9559 unless ($rbrace_follower) {
9560 output_line_to_go() unless ($no_internal_newlines);
9564 elsif ($rbrace_follower) {
9566 unless ( $rbrace_follower->{$next_nonblank_token} ) {
9567 output_line_to_go() unless ($no_internal_newlines);
9569 $rbrace_follower = undef;
9573 output_line_to_go() unless ($no_internal_newlines);
9576 } # end treatment of closing block token
9579 elsif ( $type eq ';' ) {
9581 # kill one-line blocks with too many semicolons
9582 $semicolons_before_block_self_destruct--;
9584 ( $semicolons_before_block_self_destruct < 0 )
9585 || ( $semicolons_before_block_self_destruct == 0
9586 && $next_nonblank_token_type !~ /^[b\}]$/ )
9589 destroy_one_line_block();
9592 # Remove unnecessary semicolons, but not after bare
9593 # blocks, where it could be unsafe if the brace is
9597 $last_nonblank_token eq '}'
9599 $is_block_without_semicolon{
9600 $last_nonblank_block_type}
9601 || $last_nonblank_block_type =~ /^sub\s+\w/
9602 || $last_nonblank_block_type =~ /^\w+:$/ )
9604 || $last_nonblank_type eq ';'
9609 $rOpts->{'delete-semicolons'}
9611 # don't delete ; before a # because it would promote it
9612 # to a block comment
9613 && ( $next_nonblank_token_type ne '#' )
9616 note_deleted_semicolon();
9618 unless ( $no_internal_newlines
9619 || $index_start_one_line_block != UNDEFINED_INDEX );
9623 write_logfile_entry("Extra ';'\n");
9626 store_token_to_go();
9629 unless ( $no_internal_newlines
9630 || ( $rOpts_keep_interior_semicolons && $j < $jmax )
9631 || ( $next_nonblank_token eq '}' ) );
9635 # handle here_doc target string
9636 elsif ( $type eq 'h' ) {
9637 $no_internal_newlines =
9638 1; # no newlines after seeing here-target
9639 destroy_one_line_block();
9640 store_token_to_go();
9643 # handle all other token types
9646 # if this is a blank...
9647 if ( $type eq 'b' ) {
9649 # make it just one character
9650 $token = ' ' if $rOpts_add_whitespace;
9652 # delete it if unwanted by whitespace rules
9653 # or we are deleting all whitespace
9654 my $ws = $$rwhite_space_flag[ $j + 1 ];
9655 if ( ( defined($ws) && $ws == -1 )
9656 || $rOpts_delete_old_whitespace )
9659 # unless it might make a syntax error
9661 unless is_essential_whitespace(
9662 $last_last_nonblank_token,
9663 $last_last_nonblank_type,
9664 $tokens_to_go[$max_index_to_go],
9665 $types_to_go[$max_index_to_go],
9666 $$rtokens[ $j + 1 ],
9667 $$rtoken_type[ $j + 1 ]
9671 store_token_to_go();
9674 # remember two previous nonblank OUTPUT tokens
9675 if ( $type ne '#' && $type ne 'b' ) {
9676 $last_last_nonblank_token = $last_nonblank_token;
9677 $last_last_nonblank_type = $last_nonblank_type;
9678 $last_nonblank_token = $token;
9679 $last_nonblank_type = $type;
9680 $last_nonblank_block_type = $block_type;
9683 # unset the continued-quote flag since it only applies to the
9684 # first token, and we want to resume normal formatting if
9685 # there are additional tokens on the line
9686 $in_continued_quote = 0;
9688 } # end of loop over all tokens in this 'line_of_tokens'
9690 # we have to flush ..
9693 # if there is a side comment
9694 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
9696 # if this line ends in a quote
9697 # NOTE: This is critically important for insuring that quoted lines
9698 # do not get processed by things like -sot and -sct
9701 # if this is a VERSION statement
9702 || $is_VERSION_statement
9704 # to keep a label on one line if that is how it is now
9705 || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
9707 # if we are instructed to keep all old line breaks
9708 || !$rOpts->{'delete-old-newlines'}
9711 destroy_one_line_block();
9712 output_line_to_go();
9715 # mark old line breakpoints in current output stream
9716 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
9717 $old_breakpoint_to_go[$max_index_to_go] = 1;
9719 } # end sub print_line_of_tokens
9720 } # end print_line_of_tokens
9722 # sub output_line_to_go sends one logical line of tokens on down the
9723 # pipeline to the VerticalAligner package, breaking the line into continuation
9724 # lines as necessary. The line of tokens is ready to go in the "to_go"
9726 sub output_line_to_go {
9728 # debug stuff; this routine can be called from many points
9729 FORMATTER_DEBUG_FLAG_OUTPUT && do {
9730 my ( $a, $b, $c ) = caller;
9732 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9734 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9735 write_diagnostics("$output_str\n");
9738 # just set a tentative breakpoint if we might be in a one-line block
9739 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9740 set_forced_breakpoint($max_index_to_go);
9744 my $cscw_block_comment;
9745 $cscw_block_comment = add_closing_side_comment()
9746 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9748 match_opening_and_closing_tokens();
9750 # tell the -lp option we are outputting a batch so it can close
9751 # any unfinished items in its stack
9754 # If this line ends in a code block brace, set breaks at any
9755 # previous closing code block braces to breakup a chain of code
9756 # blocks on one line. This is very rare but can happen for
9757 # user-defined subs. For example we might be looking at this:
9758 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
9759 my $saw_good_break = 0; # flag to force breaks even if short line
9762 # looking for opening or closing block brace
9763 $block_type_to_go[$max_index_to_go]
9765 # but not one of these which are never duplicated on a line:
9766 # until|while|for|if|elsif|else
9767 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
9770 my $lev = $nesting_depth_to_go[$max_index_to_go];
9772 # Walk backwards from the end and
9773 # set break at any closing block braces at the same level.
9774 # But quit if we are not in a chain of blocks.
9775 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
9776 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
9777 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
9779 if ( $block_type_to_go[$i] ) {
9780 if ( $tokens_to_go[$i] eq '}' ) {
9781 set_forced_breakpoint($i);
9782 $saw_good_break = 1;
9786 # quit if we see anything besides words, function, blanks
9788 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
9793 my $imax = $max_index_to_go;
9795 # trim any blank tokens
9796 if ( $max_index_to_go >= 0 ) {
9797 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
9798 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
9801 # anything left to write?
9802 if ( $imin <= $imax ) {
9804 # add a blank line before certain key types but not after a comment
9805 ##if ( $last_line_leading_type !~ /^[#b]/ ) {
9806 if ( $last_line_leading_type !~ /^[#]/ ) {
9808 my $leading_token = $tokens_to_go[$imin];
9809 my $leading_type = $types_to_go[$imin];
9811 # blank lines before subs except declarations and one-liners
9812 # MCONVERSION LOCATION - for sub tokenization change
9813 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
9814 $want_blank = $rOpts->{'blank-lines-before-subs'}
9816 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9817 $imax ) !~ /^[\;\}]$/
9821 # break before all package declarations
9822 # MCONVERSION LOCATION - for tokenizaton change
9823 elsif ($leading_token =~ /^(package\s)/
9824 && $leading_type eq 'i' )
9826 $want_blank = $rOpts->{'blank-lines-before-packages'};
9829 # break before certain key blocks except one-liners
9830 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
9831 $want_blank = $rOpts->{'blank-lines-before-subs'}
9833 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9838 # Break before certain block types if we haven't had a
9839 # break at this level for a while. This is the
9840 # difficult decision..
9841 elsif ($leading_type eq 'k'
9842 && $last_line_leading_type ne 'b'
9843 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
9845 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
9846 if ( !defined($lc) ) { $lc = 0 }
9849 $rOpts->{'blanks-before-blocks'}
9850 && $lc >= $rOpts->{'long-block-line-count'}
9851 && $file_writer_object->get_consecutive_nonblank_lines() >=
9852 $rOpts->{'long-block-line-count'}
9854 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9861 # future: send blank line down normal path to VerticalAligner
9862 Perl::Tidy::VerticalAligner::flush();
9863 $file_writer_object->require_blank_code_lines($want_blank);
9867 # update blank line variables and count number of consecutive
9868 # non-blank, non-comment lines at this level
9869 $last_last_line_leading_level = $last_line_leading_level;
9870 $last_line_leading_level = $levels_to_go[$imin];
9871 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
9872 $last_line_leading_type = $types_to_go[$imin];
9873 if ( $last_line_leading_level == $last_last_line_leading_level
9874 && $last_line_leading_type ne 'b'
9875 && $last_line_leading_type ne '#'
9876 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
9878 $nonblank_lines_at_depth[$last_line_leading_level]++;
9881 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
9884 FORMATTER_DEBUG_FLAG_FLUSH && do {
9885 my ( $package, $file, $line ) = caller;
9887 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
9890 # add a couple of extra terminal blank tokens
9893 # set all forced breakpoints for good list formatting
9894 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
9897 $max_index_to_go > 0
9900 || $old_line_count_in_batch > 1
9901 || is_unbalanced_batch()
9903 $comma_count_in_batch
9904 && ( $rOpts_maximum_fields_per_table > 0
9905 || $rOpts_comma_arrow_breakpoints == 0 )
9910 $saw_good_break ||= scan_list();
9913 # let $ri_first and $ri_last be references to lists of
9914 # first and last tokens of line fragments to output..
9915 my ( $ri_first, $ri_last );
9917 # write a single line if..
9920 # we aren't allowed to add any newlines
9921 !$rOpts_add_newlines
9923 # or, we don't already have an interior breakpoint
9924 # and we didn't see a good breakpoint
9926 !$forced_breakpoint_count
9929 # and this line is 'short'
9934 @$ri_first = ($imin);
9935 @$ri_last = ($imax);
9938 # otherwise use multiple lines
9941 ( $ri_first, $ri_last, my $colon_count ) =
9942 set_continuation_breaks($saw_good_break);
9944 break_all_chain_tokens( $ri_first, $ri_last );
9946 break_equals( $ri_first, $ri_last );
9948 # now we do a correction step to clean this up a bit
9949 # (The only time we would not do this is for debugging)
9950 if ( $rOpts->{'recombine'} ) {
9951 ( $ri_first, $ri_last ) =
9952 recombine_breakpoints( $ri_first, $ri_last );
9955 insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
9958 # do corrector step if -lp option is used
9960 if ($rOpts_line_up_parentheses) {
9961 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
9963 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
9965 prepare_for_new_input_lines();
9967 # output any new -cscw block comment
9968 if ($cscw_block_comment) {
9970 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9974 sub note_added_semicolon {
9975 $last_added_semicolon_at = $input_line_number;
9976 if ( $added_semicolon_count == 0 ) {
9977 $first_added_semicolon_at = $last_added_semicolon_at;
9979 $added_semicolon_count++;
9980 write_logfile_entry("Added ';' here\n");
9983 sub note_deleted_semicolon {
9984 $last_deleted_semicolon_at = $input_line_number;
9985 if ( $deleted_semicolon_count == 0 ) {
9986 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
9988 $deleted_semicolon_count++;
9989 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
9992 sub note_embedded_tab {
9993 $embedded_tab_count++;
9994 $last_embedded_tab_at = $input_line_number;
9995 if ( !$first_embedded_tab_at ) {
9996 $first_embedded_tab_at = $last_embedded_tab_at;
9999 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
10000 write_logfile_entry("Embedded tabs in quote or pattern\n");
10004 sub starting_one_line_block {
10006 # after seeing an opening curly brace, look for the closing brace
10007 # and see if the entire block will fit on a line. This routine is
10008 # not always right because it uses the old whitespace, so a check
10009 # is made later (at the closing brace) to make sure we really
10010 # have a one-line block. We have to do this preliminary check,
10011 # though, because otherwise we would always break at a semicolon
10012 # within a one-line block if the block contains multiple statements.
10014 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
10018 # kill any current block - we can only go 1 deep
10019 destroy_one_line_block();
10022 # 1=distance from start of block to opening brace exceeds line length
10027 # shouldn't happen: there must have been a prior call to
10028 # store_token_to_go to put the opening brace in the output stream
10029 if ( $max_index_to_go < 0 ) {
10030 warning("program bug: store_token_to_go called incorrectly\n");
10031 report_definite_bug();
10035 # cannot use one-line blocks with cuddled else else/elsif lines
10036 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
10041 my $block_type = $$rblock_type[$j];
10043 # find the starting keyword for this block (such as 'if', 'else', ...)
10045 if ( $block_type =~ /^[\{\}\;\:]$/ ) {
10046 $i_start = $max_index_to_go;
10049 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
10051 # For something like "if (xxx) {", the keyword "if" will be
10052 # just after the most recent break. This will be 0 unless
10053 # we have just killed a one-line block and are starting another.
10055 $i_start = $index_max_forced_break + 1;
10056 if ( $types_to_go[$i_start] eq 'b' ) {
10060 unless ( $tokens_to_go[$i_start] eq $block_type ) {
10065 # the previous nonblank token should start these block types
10067 ( $last_last_nonblank_token_to_go eq $block_type )
10068 || ( $block_type =~ /^sub/
10069 && $last_last_nonblank_token_to_go =~ /^sub/ )
10072 $i_start = $last_last_nonblank_index_to_go;
10075 # patch for SWITCH/CASE to retain one-line case/when blocks
10076 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
10077 $i_start = $index_max_forced_break + 1;
10078 if ( $types_to_go[$i_start] eq 'b' ) {
10081 unless ( $tokens_to_go[$i_start] eq $block_type ) {
10090 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
10094 # see if length is too long to even start
10095 if ( $pos > $rOpts_maximum_line_length ) {
10099 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
10101 # old whitespace could be arbitrarily large, so don't use it
10102 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
10103 else { $pos += length( $$rtokens[$i] ) }
10105 # Return false result if we exceed the maximum line length,
10106 if ( $pos > $rOpts_maximum_line_length ) {
10110 # or encounter another opening brace before finding the closing brace.
10111 elsif ($$rtokens[$i] eq '{'
10112 && $$rtoken_type[$i] eq '{'
10113 && $$rblock_type[$i] )
10118 # if we find our closing brace..
10119 elsif ($$rtokens[$i] eq '}'
10120 && $$rtoken_type[$i] eq '}'
10121 && $$rblock_type[$i] )
10124 # be sure any trailing comment also fits on the line
10126 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
10128 # Patch for one-line sort/map/grep/eval blocks with side comments:
10129 # We will ignore the side comment length for sort/map/grep/eval
10130 # because this can lead to statements which change every time
10131 # perltidy is run. Here is an example from Denis Moskowitz which
10132 # oscillates between these two states without this patch:
10135 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10139 ## $_->foo ne 'bar'
10140 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10144 # When the first line is input it gets broken apart by the main
10145 # line break logic in sub print_line_of_tokens.
10146 # When the second line is input it gets recombined by
10147 # print_line_of_tokens and passed to the output routines. The
10148 # output routines (set_continuation_breaks) do not break it apart
10149 # because the bond strengths are set to the highest possible value
10150 # for grep/map/eval/sort blocks, so the first version gets output.
10151 # It would be possible to fix this by changing bond strengths,
10152 # but they are high to prevent errors in older versions of perl.
10154 if ( $$rtoken_type[$i_nonblank] eq '#'
10155 && !$is_sort_map_grep{$block_type} )
10158 ## POSSIBLE FUTURE PATCH FOR IGNORING SIDE COMMENT LENGTHS
10159 ## WHEN CHECKING FOR ONE-LINE BLOCKS:
10160 ## if (flag set) then (just add 1 to pos)
10161 $pos += length( $$rtokens[$i_nonblank] );
10163 if ( $i_nonblank > $i + 1 ) {
10165 # source whitespace could be anything, assume
10166 # at least one space before the hash on output
10167 if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 }
10168 else { $pos += length( $$rtokens[ $i + 1 ] ) }
10171 if ( $pos >= $rOpts_maximum_line_length ) {
10176 # ok, it's a one-line block
10177 create_one_line_block( $i_start, 20 );
10181 # just keep going for other characters
10186 # Allow certain types of new one-line blocks to form by joining
10187 # input lines. These can be safely done, but for other block types,
10188 # we keep old one-line blocks but do not form new ones. It is not
10189 # always a good idea to make as many one-line blocks as possible,
10190 # so other types are not done. The user can always use -mangle.
10191 if ( $is_sort_map_grep_eval{$block_type} ) {
10192 create_one_line_block( $i_start, 1 );
10198 sub unstore_token_to_go {
10200 # remove most recent token from output stream
10201 if ( $max_index_to_go > 0 ) {
10202 $max_index_to_go--;
10205 $max_index_to_go = UNDEFINED_INDEX;
10210 sub want_blank_line {
10212 $file_writer_object->want_blank_line();
10215 sub write_unindented_line {
10217 $file_writer_object->write_line( $_[0] );
10222 # Undo continuation indentation in certain sequences
10223 # For example, we can undo continuation indation in sort/map/grep chains
10224 # my $dat1 = pack( "n*",
10225 # map { $_, $lookup->{$_} }
10226 # sort { $a <=> $b }
10227 # grep { $lookup->{$_} ne $default } keys %$lookup );
10228 # To align the map/sort/grep keywords like this:
10229 # my $dat1 = pack( "n*",
10230 # map { $_, $lookup->{$_} }
10231 # sort { $a <=> $b }
10232 # grep { $lookup->{$_} ne $default } keys %$lookup );
10233 my ( $ri_first, $ri_last ) = @_;
10234 my ( $line_1, $line_2, $lev_last );
10235 my $this_line_is_semicolon_terminated;
10236 my $max_line = @$ri_first - 1;
10238 # looking at each line of this batch..
10239 # We are looking at leading tokens and looking for a sequence
10240 # all at the same level and higher level than enclosing lines.
10241 foreach my $line ( 0 .. $max_line ) {
10243 my $ibeg = $$ri_first[$line];
10244 my $lev = $levels_to_go[$ibeg];
10247 # if we have started a chain..
10250 # see if it continues..
10251 if ( $lev == $lev_last ) {
10252 if ( $types_to_go[$ibeg] eq 'k'
10253 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10256 # chain continues...
10257 # check for chain ending at end of a a statement
10258 if ( $line == $max_line ) {
10260 # see of this line ends a statement
10261 my $iend = $$ri_last[$line];
10262 $this_line_is_semicolon_terminated =
10263 $types_to_go[$iend] eq ';'
10265 # with possible side comment
10266 || ( $types_to_go[$iend] eq '#'
10267 && $iend - $ibeg >= 2
10268 && $types_to_go[ $iend - 2 ] eq ';'
10269 && $types_to_go[ $iend - 1 ] eq 'b' );
10271 $line_2 = $line if ($this_line_is_semicolon_terminated);
10279 elsif ( $lev < $lev_last ) {
10281 # chain ends with previous line
10282 $line_2 = $line - 1;
10284 elsif ( $lev > $lev_last ) {
10290 # undo the continuation indentation if a chain ends
10291 if ( defined($line_2) && defined($line_1) ) {
10292 my $continuation_line_count = $line_2 - $line_1 + 1;
10293 @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10294 (0) x ($continuation_line_count);
10295 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10296 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
10301 # not in a chain yet..
10304 # look for start of a new sort/map/grep chain
10305 if ( $lev > $lev_last ) {
10306 if ( $types_to_go[$ibeg] eq 'k'
10307 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10320 # If there is a single, long parameter within parens, like this:
10322 # $self->command( "/msg "
10323 # . $infoline->chan
10324 # . " You said $1, but did you know that it's square was "
10325 # . $1 * $1 . " ?" );
10327 # we can remove the continuation indentation of the 2nd and higher lines
10328 # to achieve this effect, which is more pleasing:
10330 # $self->command("/msg "
10331 # . $infoline->chan
10332 # . " You said $1, but did you know that it's square was "
10333 # . $1 * $1 . " ?");
10335 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
10336 my $max_line = @$ri_first - 1;
10338 # must be multiple lines
10339 return unless $max_line > $line_open;
10341 my $lev_start = $levels_to_go[$i_start];
10342 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
10344 # see if all additional lines in this container have continuation
10347 my $line_1 = 1 + $line_open;
10348 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
10349 my $ibeg = $$ri_first[$n];
10350 my $iend = $$ri_last[$n];
10351 if ( $ibeg eq $closing_index ) { $n--; last }
10352 return if ( $lev_start != $levels_to_go[$ibeg] );
10353 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
10354 last if ( $closing_index <= $iend );
10357 # we can reduce the indentation of all continuation lines
10358 my $continuation_line_count = $n - $line_open;
10359 @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10360 (0) x ($continuation_line_count);
10361 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10362 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
10365 sub set_logical_padding {
10367 # Look at a batch of lines and see if extra padding can improve the
10368 # alignment when there are certain leading operators. Here is an
10369 # example, in which some extra space is introduced before
10370 # '( $year' to make it line up with the subsequent lines:
10372 # if ( ( $Year < 1601 )
10373 # || ( $Year > 2899 )
10374 # || ( $EndYear < 1601 )
10375 # || ( $EndYear > 2899 ) )
10377 # &Error_OutOfRange;
10380 my ( $ri_first, $ri_last ) = @_;
10381 my $max_line = @$ri_first - 1;
10383 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
10384 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
10386 # looking at each line of this batch..
10387 foreach $line ( 0 .. $max_line - 1 ) {
10389 # see if the next line begins with a logical operator
10390 $ibeg = $$ri_first[$line];
10391 $iend = $$ri_last[$line];
10392 $ibeg_next = $$ri_first[ $line + 1 ];
10393 $tok_next = $tokens_to_go[$ibeg_next];
10394 $type_next = $types_to_go[$ibeg_next];
10396 $has_leading_op_next = ( $tok_next =~ /^\w/ )
10397 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
10398 : $is_chain_operator{$type_next}; # and, or
10400 next unless ($has_leading_op_next);
10402 # next line must not be at lesser depth
10404 if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
10406 # identify the token in this line to be padded on the left
10409 # handle lines at same depth...
10410 if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
10412 # if this is not first line of the batch ...
10415 # and we have leading operator..
10416 next if $has_leading_op;
10418 # Introduce padding if..
10419 # 1. the previous line is at lesser depth, or
10420 # 2. the previous line ends in an assignment
10421 # 3. the previous line ends in a 'return'
10422 # 4. the previous line ends in a comma
10423 # Example 1: previous line at lesser depth
10424 # if ( ( $Year < 1601 ) # <- we are here but
10425 # || ( $Year > 2899 ) # list has not yet
10426 # || ( $EndYear < 1601 ) # collapsed vertically
10427 # || ( $EndYear > 2899 ) )
10430 # Example 2: previous line ending in assignment:
10432 # $year % 4 ? 0 # <- We are here
10433 # : $year % 100 ? 1
10434 # : $year % 400 ? 0
10437 # Example 3: previous line ending in comma:
10444 # be sure levels agree (do not indent after an indented 'if')
10445 next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
10447 # allow padding on first line after a comma but only if:
10448 # (1) this is line 2 and
10449 # (2) there are at more than three lines and
10450 # (3) lines 3 and 4 have the same leading operator
10451 # These rules try to prevent padding within a long
10452 # comma-separated list.
10454 if ( $types_to_go[$iendm] eq ','
10458 my $ibeg_next_next = $$ri_first[ $line + 2 ];
10459 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
10460 $ok_comma = $tok_next_next eq $tok_next;
10465 $is_assignment{ $types_to_go[$iendm] }
10467 || ( $nesting_depth_to_go[$ibegm] <
10468 $nesting_depth_to_go[$ibeg] )
10469 || ( $types_to_go[$iendm] eq 'k'
10470 && $tokens_to_go[$iendm] eq 'return' )
10473 # we will add padding before the first token
10477 # for first line of the batch..
10480 # WARNING: Never indent if first line is starting in a
10481 # continued quote, which would change the quote.
10482 next if $starting_in_quote;
10484 # if this is text after closing '}'
10485 # then look for an interior token to pad
10486 if ( $types_to_go[$ibeg] eq '}' ) {
10490 # otherwise, we might pad if it looks really good
10493 # we might pad token $ibeg, so be sure that it
10494 # is at the same depth as the next line.
10496 if ( $nesting_depth_to_go[$ibeg] !=
10497 $nesting_depth_to_go[$ibeg_next] );
10499 # We can pad on line 1 of a statement if at least 3
10500 # lines will be aligned. Otherwise, it
10501 # can look very confusing.
10503 # We have to be careful not to pad if there are too few
10504 # lines. The current rule is:
10505 # (1) in general we require at least 3 consecutive lines
10506 # with the same leading chain operator token,
10507 # (2) but an exception is that we only require two lines
10508 # with leading colons if there are no more lines. For example,
10509 # the first $i in the following snippet would get padding
10510 # by the second rule:
10512 # $i == 1 ? ( "First", "Color" )
10513 # : $i == 2 ? ( "Then", "Rarity" )
10514 # : ( "Then", "Name" );
10516 if ( $max_line > 1 ) {
10517 my $leading_token = $tokens_to_go[$ibeg_next];
10520 # never indent line 1 of a '.' series because
10521 # previous line is most likely at same level.
10522 # TODO: we should also look at the leasing_spaces
10523 # of the last output line and skip if it is same
10525 next if ( $leading_token eq '.' );
10528 foreach my $l ( 2 .. 3 ) {
10529 last if ( $line + $l > $max_line );
10530 my $ibeg_next_next = $$ri_first[ $line + $l ];
10531 if ( $tokens_to_go[$ibeg_next_next] ne
10534 $tokens_differ = 1;
10539 next if ($tokens_differ);
10540 next if ( $count < 3 && $leading_token ne ':' );
10550 # find interior token to pad if necessary
10551 if ( !defined($ipad) ) {
10553 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
10555 # find any unclosed container
10557 unless ( $type_sequence_to_go[$i]
10558 && $mate_index_to_go[$i] > $iend );
10560 # find next nonblank token to pad
10562 if ( $types_to_go[$ipad] eq 'b' ) {
10564 last if ( $ipad > $iend );
10570 # We cannot pad a leading token at the lowest level because
10571 # it could cause a bug in which the starting indentation
10572 # level is guessed incorrectly each time the code is run
10573 # though perltidy, thus causing the code to march off to
10574 # the right. For example, the following snippet would have
10577 ## ov_method mycan( $package, '(""' ), $package
10578 ## or ov_method mycan( $package, '(0+' ), $package
10579 ## or ov_method mycan( $package, '(bool' ), $package
10580 ## or ov_method mycan( $package, '(nomethod' ), $package;
10582 # If this snippet is within a block this won't happen
10583 # unless the user just processes the snippet alone within
10584 # an editor. In that case either the user will see and
10585 # fix the problem or it will be corrected next time the
10586 # entire file is processed with perltidy.
10587 next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
10589 # next line must not be at greater depth
10590 my $iend_next = $$ri_last[ $line + 1 ];
10592 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
10593 $nesting_depth_to_go[$ipad] );
10595 # lines must be somewhat similar to be padded..
10596 my $inext_next = $ibeg_next + 1;
10597 if ( $types_to_go[$inext_next] eq 'b' ) {
10600 my $type = $types_to_go[$ipad];
10601 my $type_next = $types_to_go[ $ipad + 1 ];
10603 # see if there are multiple continuation lines
10604 my $logical_continuation_lines = 1;
10605 if ( $line + 2 <= $max_line ) {
10606 my $leading_token = $tokens_to_go[$ibeg_next];
10607 my $ibeg_next_next = $$ri_first[ $line + 2 ];
10608 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
10609 && $nesting_depth_to_go[$ibeg_next] eq
10610 $nesting_depth_to_go[$ibeg_next_next] )
10612 $logical_continuation_lines++;
10616 # see if leading types match
10617 my $types_match = $types_to_go[$inext_next] eq $type;
10618 my $matches_without_bang;
10620 # if first line has leading ! then compare the following token
10621 if ( !$types_match && $type eq '!' ) {
10622 $types_match = $matches_without_bang =
10623 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
10628 # either we have multiple continuation lines to follow
10629 # and we are not padding the first token
10630 ( $logical_continuation_lines > 1 && $ipad > 0 )
10638 # and keywords must match if keyword
10641 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
10647 #----------------------begin special checks--------------
10650 # A check is needed before we can make the pad.
10651 # If we are in a list with some long items, we want each
10652 # item to stand out. So in the following example, the
10653 # first line begining with '$casefold->' would look good
10654 # padded to align with the next line, but then it
10655 # would be indented more than the last line, so we
10659 # $casefold->{code} eq '0041'
10660 # && $casefold->{status} eq 'C'
10661 # && $casefold->{mapping} eq '0061',
10666 # It would be faster, and almost as good, to use a comma
10667 # count, and not pad if comma_count > 1 and the previous
10668 # line did not end with a comma.
10672 my $ibg = $$ri_first[ $line + 1 ];
10673 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
10675 # just use simplified formula for leading spaces to avoid
10676 # needless sub calls
10677 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
10679 # look at each line beyond the next ..
10681 foreach $l ( $line + 2 .. $max_line ) {
10682 my $ibg = $$ri_first[$l];
10684 # quit looking at the end of this container
10686 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
10687 || ( $nesting_depth_to_go[$ibg] < $depth );
10689 # cannot do the pad if a later line would be
10691 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
10697 # don't pad if we end in a broken list
10698 if ( $l == $max_line ) {
10699 my $i2 = $$ri_last[$l];
10700 if ( $types_to_go[$i2] eq '#' ) {
10701 my $i1 = $$ri_first[$l];
10704 terminal_type( \@types_to_go, \@block_type_to_go, $i1,
10711 # a minus may introduce a quoted variable, and we will
10712 # add the pad only if this line begins with a bare word,
10713 # such as for the word 'Button' here:
10715 # Button => "Print letter \"~$_\"",
10716 # -command => [ sub { print "$_[0]\n" }, $_ ],
10717 # -accelerator => "Meta+$_"
10720 # On the other hand, if 'Button' is quoted, it looks best
10723 # 'Button' => "Print letter \"~$_\"",
10724 # -command => [ sub { print "$_[0]\n" }, $_ ],
10725 # -accelerator => "Meta+$_"
10727 if ( $types_to_go[$ibeg_next] eq 'm' ) {
10728 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
10731 next unless $ok_to_pad;
10733 #----------------------end special check---------------
10735 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
10736 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
10737 $pad_spaces = $length_2 - $length_1;
10739 # If the first line has a leading ! and the second does
10740 # not, then remove one space to try to align the next
10741 # leading characters, which are often the same. For example:
10743 # || $ts == $self->Holder
10744 # || $self->Holder->Type eq "Arena" )
10746 # This usually helps readability, but if there are subsequent
10747 # ! operators things will still get messed up. For example:
10749 # if ( !exists $Net::DNS::typesbyname{$qtype}
10750 # && exists $Net::DNS::classesbyname{$qtype}
10751 # && !exists $Net::DNS::classesbyname{$qclass}
10752 # && exists $Net::DNS::typesbyname{$qclass} )
10753 # We can't fix that.
10754 if ($matches_without_bang) { $pad_spaces-- }
10756 # make sure this won't change if -lp is used
10757 my $indentation_1 = $leading_spaces_to_go[$ibeg];
10758 if ( ref($indentation_1) ) {
10759 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
10760 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
10761 unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
10767 # we might be able to handle a pad of -1 by removing a blank
10769 if ( $pad_spaces < 0 ) {
10771 if ( $pad_spaces == -1 ) {
10772 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
10773 $tokens_to_go[ $ipad - 1 ] = '';
10779 # now apply any padding for alignment
10780 if ( $ipad >= 0 && $pad_spaces ) {
10782 my $length_t = total_line_length( $ibeg, $iend );
10783 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
10784 $tokens_to_go[$ipad] =
10785 ' ' x $pad_spaces . $tokens_to_go[$ipad];
10793 $has_leading_op = $has_leading_op_next;
10794 } # end of loop over lines
10798 sub correct_lp_indentation {
10800 # When the -lp option is used, we need to make a last pass through
10801 # each line to correct the indentation positions in case they differ
10802 # from the predictions. This is necessary because perltidy uses a
10803 # predictor/corrector method for aligning with opening parens. The
10804 # predictor is usually good, but sometimes stumbles. The corrector
10805 # tries to patch things up once the actual opening paren locations
10807 my ( $ri_first, $ri_last ) = @_;
10808 my $do_not_pad = 0;
10810 # Note on flag '$do_not_pad':
10811 # We want to avoid a situation like this, where the aligner inserts
10812 # whitespace before the '=' to align it with a previous '=', because
10813 # otherwise the parens might become mis-aligned in a situation like
10814 # this, where the '=' has become aligned with the previous line,
10815 # pushing the opening '(' forward beyond where we want it.
10817 # $mkFloor::currentRoom = '';
10818 # $mkFloor::c_entry = $c->Entry(
10820 # -relief => 'sunken',
10824 # We leave it to the aligner to decide how to do this.
10826 # first remove continuation indentation if appropriate
10827 my $max_line = @$ri_first - 1;
10829 # looking at each line of this batch..
10830 my ( $ibeg, $iend );
10832 foreach $line ( 0 .. $max_line ) {
10833 $ibeg = $$ri_first[$line];
10834 $iend = $$ri_last[$line];
10836 # looking at each token in this output line..
10838 foreach $i ( $ibeg .. $iend ) {
10840 # How many space characters to place before this token
10841 # for special alignment. Actual padding is done in the
10844 # looking for next unvisited indentation item
10845 my $indentation = $leading_spaces_to_go[$i];
10846 if ( !$indentation->get_MARKED() ) {
10847 $indentation->set_MARKED(1);
10849 # looking for indentation item for which we are aligning
10850 # with parens, braces, and brackets
10851 next unless ( $indentation->get_ALIGN_PAREN() );
10853 # skip closed container on this line
10854 if ( $i > $ibeg ) {
10856 if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
10857 if ( $type_sequence_to_go[$im]
10858 && $mate_index_to_go[$im] <= $iend )
10864 if ( $line == 1 && $i == $ibeg ) {
10868 # Ok, let's see what the error is and try to fix it
10870 my $predicted_pos = $indentation->get_SPACES();
10871 if ( $i > $ibeg ) {
10873 # token is mid-line - use length to previous token
10874 $actual_pos = total_line_length( $ibeg, $i - 1 );
10876 # for mid-line token, we must check to see if all
10877 # additional lines have continuation indentation,
10878 # and remove it if so. Otherwise, we do not get
10880 my $closing_index = $indentation->get_CLOSED();
10881 if ( $closing_index > $iend ) {
10882 my $ibeg_next = $$ri_first[ $line + 1 ];
10883 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
10884 undo_lp_ci( $line, $i, $closing_index, $ri_first,
10889 elsif ( $line > 0 ) {
10891 # handle case where token starts a new line;
10892 # use length of previous line
10893 my $ibegm = $$ri_first[ $line - 1 ];
10894 my $iendm = $$ri_last[ $line - 1 ];
10895 $actual_pos = total_line_length( $ibegm, $iendm );
10899 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
10903 # token is first character of first line of batch
10904 $actual_pos = $predicted_pos;
10907 my $move_right = $actual_pos - $predicted_pos;
10909 # done if no error to correct (gnu2.t)
10910 if ( $move_right == 0 ) {
10911 $indentation->set_RECOVERABLE_SPACES($move_right);
10915 # if we have not seen closure for this indentation in
10916 # this batch, we can only pass on a request to the
10918 my $closing_index = $indentation->get_CLOSED();
10920 if ( $closing_index < 0 ) {
10921 $indentation->set_RECOVERABLE_SPACES($move_right);
10925 # If necessary, look ahead to see if there is really any
10926 # leading whitespace dependent on this whitespace, and
10927 # also find the longest line using this whitespace.
10928 # Since it is always safe to move left if there are no
10929 # dependents, we only need to do this if we may have
10930 # dependent nodes or need to move right.
10932 my $right_margin = 0;
10933 my $have_child = $indentation->get_HAVE_CHILD();
10935 my %saw_indentation;
10936 my $line_count = 1;
10937 $saw_indentation{$indentation} = $indentation;
10939 if ( $have_child || $move_right > 0 ) {
10941 my $max_length = 0;
10942 if ( $i == $ibeg ) {
10943 $max_length = total_line_length( $ibeg, $iend );
10946 # look ahead at the rest of the lines of this batch..
10948 foreach $line_t ( $line + 1 .. $max_line ) {
10949 my $ibeg_t = $$ri_first[$line_t];
10950 my $iend_t = $$ri_last[$line_t];
10951 last if ( $closing_index <= $ibeg_t );
10953 # remember all different indentation objects
10954 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
10955 $saw_indentation{$indentation_t} = $indentation_t;
10958 # remember longest line in the group
10959 my $length_t = total_line_length( $ibeg_t, $iend_t );
10960 if ( $length_t > $max_length ) {
10961 $max_length = $length_t;
10964 $right_margin = $rOpts_maximum_line_length - $max_length;
10965 if ( $right_margin < 0 ) { $right_margin = 0 }
10968 my $first_line_comma_count =
10969 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
10970 my $comma_count = $indentation->get_COMMA_COUNT();
10971 my $arrow_count = $indentation->get_ARROW_COUNT();
10973 # This is a simple approximate test for vertical alignment:
10974 # if we broke just after an opening paren, brace, bracket,
10975 # and there are 2 or more commas in the first line,
10976 # and there are no '=>'s,
10977 # then we are probably vertically aligned. We could set
10978 # an exact flag in sub scan_list, but this is good
10980 my $indentation_count = keys %saw_indentation;
10981 my $is_vertically_aligned =
10983 && $first_line_comma_count > 1
10984 && $indentation_count == 1
10985 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
10987 # Make the move if possible ..
10990 # we can always move left
10993 # but we should only move right if we are sure it will
10994 # not spoil vertical alignment
10995 || ( $comma_count == 0 )
10996 || ( $comma_count > 0 && !$is_vertically_aligned )
11000 ( $move_right <= $right_margin )
11004 foreach ( keys %saw_indentation ) {
11005 $saw_indentation{$_}
11006 ->permanently_decrease_AVAILABLE_SPACES( -$move );
11010 # Otherwise, record what we want and the vertical aligner
11011 # will try to recover it.
11013 $indentation->set_RECOVERABLE_SPACES($move_right);
11018 return $do_not_pad;
11021 # flush is called to output any tokens in the pipeline, so that
11022 # an alternate source of lines can be written in the correct order
11025 destroy_one_line_block();
11026 output_line_to_go();
11027 Perl::Tidy::VerticalAligner::flush();
11030 sub reset_block_text_accumulator {
11032 # save text after 'if' and 'elsif' to append after 'else'
11033 if ($accumulating_text_for_block) {
11035 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
11036 push @{$rleading_block_if_elsif_text}, $leading_block_text;
11039 $accumulating_text_for_block = "";
11040 $leading_block_text = "";
11041 $leading_block_text_level = 0;
11042 $leading_block_text_length_exceeded = 0;
11043 $leading_block_text_line_number = 0;
11044 $leading_block_text_line_length = 0;
11047 sub set_block_text_accumulator {
11049 $accumulating_text_for_block = $tokens_to_go[$i];
11050 if ( $accumulating_text_for_block !~ /^els/ ) {
11051 $rleading_block_if_elsif_text = [];
11053 $leading_block_text = "";
11054 $leading_block_text_level = $levels_to_go[$i];
11055 $leading_block_text_line_number =
11056 $vertical_aligner_object->get_output_line_number();
11057 $leading_block_text_length_exceeded = 0;
11059 # this will contain the column number of the last character
11060 # of the closing side comment
11061 ##$csc_last_label="" unless $csc_last_label;
11062 $leading_block_text_line_length =
11063 length($csc_last_label) +
11064 length($accumulating_text_for_block) +
11065 length( $rOpts->{'closing-side-comment-prefix'} ) +
11066 $leading_block_text_level * $rOpts_indent_columns + 3;
11069 sub accumulate_block_text {
11072 # accumulate leading text for -csc, ignoring any side comments
11073 if ( $accumulating_text_for_block
11074 && !$leading_block_text_length_exceeded
11075 && $types_to_go[$i] ne '#' )
11078 my $added_length = length( $tokens_to_go[$i] );
11079 $added_length += 1 if $i == 0;
11080 my $new_line_length = $leading_block_text_line_length + $added_length;
11082 # we can add this text if we don't exceed some limits..
11085 # we must not have already exceeded the text length limit
11086 length($leading_block_text) <
11087 $rOpts_closing_side_comment_maximum_text
11090 # the new total line length must be below the line length limit
11091 # or the new length must be below the text length limit
11092 # (ie, we may allow one token to exceed the text length limit)
11093 && ( $new_line_length < $rOpts_maximum_line_length
11094 || length($leading_block_text) + $added_length <
11095 $rOpts_closing_side_comment_maximum_text )
11097 # UNLESS: we are adding a closing paren before the brace we seek.
11098 # This is an attempt to avoid situations where the ... to be
11099 # added are longer than the omitted right paren, as in:
11101 # foreach my $item (@a_rather_long_variable_name_here) {
11103 # } ## end foreach my $item (@a_rather_long_variable_name_here...
11106 $tokens_to_go[$i] eq ')'
11109 $i + 1 <= $max_index_to_go
11110 && $block_type_to_go[ $i + 1 ] eq
11111 $accumulating_text_for_block
11113 || ( $i + 2 <= $max_index_to_go
11114 && $block_type_to_go[ $i + 2 ] eq
11115 $accumulating_text_for_block )
11121 # add an extra space at each newline
11122 if ( $i == 0 ) { $leading_block_text .= ' ' }
11124 # add the token text
11125 $leading_block_text .= $tokens_to_go[$i];
11126 $leading_block_text_line_length = $new_line_length;
11129 # show that text was truncated if necessary
11130 elsif ( $types_to_go[$i] ne 'b' ) {
11131 $leading_block_text_length_exceeded = 1;
11132 $leading_block_text .= '...';
11138 my %is_if_elsif_else_unless_while_until_for_foreach;
11142 # These block types may have text between the keyword and opening
11143 # curly. Note: 'else' does not, but must be included to allow trailing
11144 # if/elsif text to be appended.
11145 # patch for SWITCH/CASE: added 'case' and 'when'
11146 @_ = qw(if elsif else unless while until for foreach case when);
11147 @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
11150 sub accumulate_csc_text {
11152 # called once per output buffer when -csc is used. Accumulates
11153 # the text placed after certain closing block braces.
11154 # Defines and returns the following for this buffer:
11156 my $block_leading_text = ""; # the leading text of the last '}'
11157 my $rblock_leading_if_elsif_text;
11158 my $i_block_leading_text =
11159 -1; # index of token owning block_leading_text
11160 my $block_line_count = 100; # how many lines the block spans
11161 my $terminal_type = 'b'; # type of last nonblank token
11162 my $i_terminal = 0; # index of last nonblank token
11163 my $terminal_block_type = "";
11165 # update most recent statement label
11166 $csc_last_label = "" unless ($csc_last_label);
11167 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
11168 my $block_label = $csc_last_label;
11170 # Loop over all tokens of this batch
11171 for my $i ( 0 .. $max_index_to_go ) {
11172 my $type = $types_to_go[$i];
11173 my $block_type = $block_type_to_go[$i];
11174 my $token = $tokens_to_go[$i];
11176 # remember last nonblank token type
11177 if ( $type ne '#' && $type ne 'b' ) {
11178 $terminal_type = $type;
11179 $terminal_block_type = $block_type;
11183 my $type_sequence = $type_sequence_to_go[$i];
11184 if ( $block_type && $type_sequence ) {
11186 if ( $token eq '}' ) {
11188 # restore any leading text saved when we entered this block
11189 if ( defined( $block_leading_text{$type_sequence} ) ) {
11190 ( $block_leading_text, $rblock_leading_if_elsif_text ) =
11191 @{ $block_leading_text{$type_sequence} };
11192 $i_block_leading_text = $i;
11193 delete $block_leading_text{$type_sequence};
11194 $rleading_block_if_elsif_text =
11195 $rblock_leading_if_elsif_text;
11198 if ( defined( $csc_block_label{$type_sequence} ) ) {
11199 $block_label = $csc_block_label{$type_sequence};
11200 delete $csc_block_label{$type_sequence};
11203 # if we run into a '}' then we probably started accumulating
11204 # at something like a trailing 'if' clause..no harm done.
11205 if ( $accumulating_text_for_block
11206 && $levels_to_go[$i] <= $leading_block_text_level )
11208 my $lev = $levels_to_go[$i];
11209 reset_block_text_accumulator();
11212 if ( defined( $block_opening_line_number{$type_sequence} ) )
11214 my $output_line_number =
11215 $vertical_aligner_object->get_output_line_number();
11216 $block_line_count =
11217 $output_line_number -
11218 $block_opening_line_number{$type_sequence} + 1;
11219 delete $block_opening_line_number{$type_sequence};
11223 # Error: block opening line undefined for this line..
11224 # This shouldn't be possible, but it is not a
11225 # significant problem.
11229 elsif ( $token eq '{' ) {
11232 $vertical_aligner_object->get_output_line_number();
11233 $block_opening_line_number{$type_sequence} = $line_number;
11235 # set a label for this block, except for
11236 # a bare block which already has the label
11237 # A label can only be used on the next {
11238 if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
11239 $csc_block_label{$type_sequence} = $csc_last_label;
11240 $csc_last_label = "";
11242 if ( $accumulating_text_for_block
11243 && $levels_to_go[$i] == $leading_block_text_level )
11246 if ( $accumulating_text_for_block eq $block_type ) {
11248 # save any leading text before we enter this block
11249 $block_leading_text{$type_sequence} = [
11250 $leading_block_text,
11251 $rleading_block_if_elsif_text
11253 $block_opening_line_number{$type_sequence} =
11254 $leading_block_text_line_number;
11255 reset_block_text_accumulator();
11259 # shouldn't happen, but not a serious error.
11260 # We were accumulating -csc text for block type
11261 # $accumulating_text_for_block and unexpectedly
11262 # encountered a '{' for block type $block_type.
11269 && $csc_new_statement_ok
11270 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
11271 && $token =~ /$closing_side_comment_list_pattern/o )
11273 set_block_text_accumulator($i);
11277 # note: ignoring type 'q' because of tricks being played
11278 # with 'q' for hanging side comments
11279 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
11280 $csc_new_statement_ok =
11281 ( $block_type || $type eq 'J' || $type eq ';' );
11284 && $accumulating_text_for_block
11285 && $levels_to_go[$i] == $leading_block_text_level )
11287 reset_block_text_accumulator();
11290 accumulate_block_text($i);
11295 # Treat an 'else' block specially by adding preceding 'if' and
11296 # 'elsif' text. Otherwise, the 'end else' is not helpful,
11297 # especially for cuddled-else formatting.
11298 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
11299 $block_leading_text =
11300 make_else_csc_text( $i_terminal, $terminal_block_type,
11301 $block_leading_text, $rblock_leading_if_elsif_text );
11304 # if this line ends in a label then remember it for the next pass
11305 $csc_last_label = "";
11306 if ( $terminal_type eq 'J' ) {
11307 $csc_last_label = $tokens_to_go[$i_terminal];
11310 return ( $terminal_type, $i_terminal, $i_block_leading_text,
11311 $block_leading_text, $block_line_count, $block_label );
11315 sub make_else_csc_text {
11317 # create additional -csc text for an 'else' and optionally 'elsif',
11318 # depending on the value of switch
11319 # $rOpts_closing_side_comment_else_flag:
11321 # = 0 add 'if' text to trailing else
11322 # = 1 same as 0 plus:
11323 # add 'if' to 'elsif's if can fit in line length
11324 # add last 'elsif' to trailing else if can fit in one line
11325 # = 2 same as 1 but do not check if exceed line length
11327 # $rif_elsif_text = a reference to a list of all previous closing
11328 # side comments created for this if block
11330 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
11331 my $csc_text = $block_leading_text;
11333 if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
11338 my $count = @{$rif_elsif_text};
11339 return $csc_text unless ($count);
11341 my $if_text = '[ if' . $rif_elsif_text->[0];
11343 # always show the leading 'if' text on 'else'
11344 if ( $block_type eq 'else' ) {
11345 $csc_text .= $if_text;
11348 # see if that's all
11349 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
11353 my $last_elsif_text = "";
11354 if ( $count > 1 ) {
11355 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
11356 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
11359 # tentatively append one more item
11360 my $saved_text = $csc_text;
11361 if ( $block_type eq 'else' ) {
11362 $csc_text .= $last_elsif_text;
11365 $csc_text .= ' ' . $if_text;
11368 # all done if no length checks requested
11369 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
11373 # undo it if line length exceeded
11375 length($csc_text) +
11376 length($block_type) +
11377 length( $rOpts->{'closing-side-comment-prefix'} ) +
11378 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
11379 if ( $length > $rOpts_maximum_line_length ) {
11380 $csc_text = $saved_text;
11385 { # sub balance_csc_text
11400 sub balance_csc_text {
11402 # Append characters to balance a closing side comment so that editors
11403 # such as vim can correctly jump through code.
11405 # input = ## end foreach my $foo ( sort { $b ...
11406 # output = ## end foreach my $foo ( sort { $b ...})
11408 # NOTE: This routine does not currently filter out structures within
11409 # quoted text because the bounce algorithims in text editors do not
11410 # necessarily do this either (a version of vim was checked and
11411 # did not do this).
11413 # Some complex examples which will cause trouble for some editors:
11414 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
11415 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
11416 # if ( $1 eq '{' ) {
11417 # test file test1/braces.pl has many such examples.
11421 # loop to examine characters one-by-one, RIGHT to LEFT and
11422 # build a balancing ending, LEFT to RIGHT.
11423 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
11425 my $char = substr( $csc, $pos, 1 );
11427 # ignore everything except structural characters
11428 next unless ( $matching_char{$char} );
11430 # pop most recently appended character
11431 my $top = chop($csc);
11433 # push it back plus the mate to the newest character
11434 # unless they balance each other.
11435 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
11438 # return the balanced string
11443 sub add_closing_side_comment {
11445 # add closing side comments after closing block braces if -csc used
11446 my $cscw_block_comment;
11448 #---------------------------------------------------------------
11449 # Step 1: loop through all tokens of this line to accumulate
11450 # the text needed to create the closing side comments. Also see
11451 # how the line ends.
11452 #---------------------------------------------------------------
11454 my ( $terminal_type, $i_terminal, $i_block_leading_text,
11455 $block_leading_text, $block_line_count, $block_label )
11456 = accumulate_csc_text();
11458 #---------------------------------------------------------------
11459 # Step 2: make the closing side comment if this ends a block
11460 #---------------------------------------------------------------
11461 my $have_side_comment = $i_terminal != $max_index_to_go;
11463 # if this line might end in a block closure..
11465 $terminal_type eq '}'
11470 # the block is long enough
11471 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
11473 # or there is an existing comment to check
11474 || ( $have_side_comment
11475 && $rOpts->{'closing-side-comment-warnings'} )
11478 # .. and if this is one of the types of interest
11479 && $block_type_to_go[$i_terminal] =~
11480 /$closing_side_comment_list_pattern/o
11482 # .. but not an anonymous sub
11483 # These are not normally of interest, and their closing braces are
11484 # often followed by commas or semicolons anyway. This also avoids
11485 # possible erratic output due to line numbering inconsistencies
11486 # in the cases where their closing braces terminate a line.
11487 && $block_type_to_go[$i_terminal] ne 'sub'
11489 # ..and the corresponding opening brace must is not in this batch
11490 # (because we do not need to tag one-line blocks, although this
11491 # should also be caught with a positive -csci value)
11492 && $mate_index_to_go[$i_terminal] < 0
11497 # this is the last token (line doesnt have a side comment)
11498 !$have_side_comment
11500 # or the old side comment is a closing side comment
11501 || $tokens_to_go[$max_index_to_go] =~
11502 /$closing_side_comment_prefix_pattern/o
11507 # then make the closing side comment text
11508 if ($block_label) { $block_label .= " " }
11510 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
11512 # append any extra descriptive text collected above
11513 if ( $i_block_leading_text == $i_terminal ) {
11514 $token .= $block_leading_text;
11517 $token = balance_csc_text($token)
11518 if $rOpts->{'closing-side-comments-balanced'};
11520 $token =~ s/\s*$//; # trim any trailing whitespace
11522 # handle case of existing closing side comment
11523 if ($have_side_comment) {
11525 # warn if requested and tokens differ significantly
11526 if ( $rOpts->{'closing-side-comment-warnings'} ) {
11527 my $old_csc = $tokens_to_go[$max_index_to_go];
11528 my $new_csc = $token;
11529 $new_csc =~ s/\s+//g; # trim all whitespace
11530 $old_csc =~ s/\s+//g; # trim all whitespace
11531 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
11532 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
11533 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
11534 my $new_trailing_dots = $1;
11535 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
11537 # Patch to handle multiple closing side comments at
11538 # else and elsif's. These have become too complicated
11539 # to check, so if we see an indication of
11540 # '[ if' or '[ # elsif', then assume they were made
11542 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
11543 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
11545 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
11546 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
11549 # if old comment is contained in new comment,
11550 # only compare the common part.
11551 if ( length($new_csc) > length($old_csc) ) {
11552 $new_csc = substr( $new_csc, 0, length($old_csc) );
11555 # if the new comment is shorter and has been limited,
11556 # only compare the common part.
11557 if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
11559 $old_csc = substr( $old_csc, 0, length($new_csc) );
11562 # any remaining difference?
11563 if ( $new_csc ne $old_csc ) {
11565 # just leave the old comment if we are below the threshold
11566 # for creating side comments
11567 if ( $block_line_count <
11568 $rOpts->{'closing-side-comment-interval'} )
11573 # otherwise we'll make a note of it
11577 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
11580 # save the old side comment in a new trailing block comment
11581 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
11584 $cscw_block_comment =
11585 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
11590 # No differences.. we can safely delete old comment if we
11591 # are below the threshold
11592 if ( $block_line_count <
11593 $rOpts->{'closing-side-comment-interval'} )
11596 unstore_token_to_go()
11597 if ( $types_to_go[$max_index_to_go] eq '#' );
11598 unstore_token_to_go()
11599 if ( $types_to_go[$max_index_to_go] eq 'b' );
11604 # switch to the new csc (unless we deleted it!)
11605 $tokens_to_go[$max_index_to_go] = $token if $token;
11608 # handle case of NO existing closing side comment
11611 # insert the new side comment into the output token stream
11613 my $block_type = '';
11614 my $type_sequence = '';
11615 my $container_environment =
11616 $container_environment_to_go[$max_index_to_go];
11617 my $level = $levels_to_go[$max_index_to_go];
11618 my $slevel = $nesting_depth_to_go[$max_index_to_go];
11619 my $no_internal_newlines = 0;
11621 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
11622 my $ci_level = $ci_levels_to_go[$max_index_to_go];
11623 my $in_continued_quote = 0;
11625 # first insert a blank token
11626 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
11628 # then the side comment
11629 insert_new_token_to_go( $token, $type, $slevel,
11630 $no_internal_newlines );
11633 return $cscw_block_comment;
11636 sub previous_nonblank_token {
11640 return "" if ( $im < 0 );
11641 if ( $types_to_go[$im] eq 'b' ) { $im--; }
11642 return "" if ( $im < 0 );
11643 $name = $tokens_to_go[$im];
11645 # prepend any sub name to an isolated -> to avoid unwanted alignments
11646 # [test case is test8/penco.pl]
11647 if ( $name eq '->' ) {
11649 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
11650 $name = $tokens_to_go[$im] . $name;
11656 sub send_lines_to_vertical_aligner {
11658 my ( $ri_first, $ri_last, $do_not_pad ) = @_;
11660 my $rindentation_list = [0]; # ref to indentations for each line
11662 # define the array @matching_token_to_go for the output tokens
11663 # which will be non-blank for each special token (such as =>)
11664 # for which alignment is required.
11665 set_vertical_alignment_markers( $ri_first, $ri_last );
11667 # flush if necessary to avoid unwanted alignment
11668 my $must_flush = 0;
11669 if ( @$ri_first > 1 ) {
11671 # flush before a long if statement
11672 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
11677 Perl::Tidy::VerticalAligner::flush();
11680 undo_ci( $ri_first, $ri_last );
11682 set_logical_padding( $ri_first, $ri_last );
11684 # loop to prepare each line for shipment
11685 my $n_last_line = @$ri_first - 1;
11687 for my $n ( 0 .. $n_last_line ) {
11688 my $ibeg = $$ri_first[$n];
11689 my $iend = $$ri_last[$n];
11691 my ( $rtokens, $rfields, $rpatterns ) =
11692 make_alignment_patterns( $ibeg, $iend );
11694 # Set flag to show how much level changes between this line
11695 # and the next line, if we have it.
11697 if ( $n < $n_last_line ) {
11698 my $ibegp = $$ri_first[ $n + 1 ];
11699 $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
11702 my ( $indentation, $lev, $level_end, $terminal_type,
11703 $is_semicolon_terminated, $is_outdented_line )
11704 = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
11705 $ri_first, $ri_last, $rindentation_list, $ljump );
11707 # we will allow outdenting of long lines..
11708 my $outdent_long_lines = (
11710 # which are long quotes, if allowed
11711 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
11713 # which are long block comments, if allowed
11715 $types_to_go[$ibeg] eq '#'
11716 && $rOpts->{'outdent-long-comments'}
11718 # but not if this is a static block comment
11719 && !$is_static_block_comment
11724 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
11726 my $rvertical_tightness_flags =
11727 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
11728 $ri_first, $ri_last );
11730 # flush an outdented line to avoid any unwanted vertical alignment
11731 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11733 my $is_terminal_ternary = 0;
11734 if ( $tokens_to_go[$ibeg] eq ':'
11735 || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
11737 if ( ( $terminal_type eq ';' && $level_end <= $lev )
11738 || ( $level_end < $lev ) )
11740 $is_terminal_ternary = 1;
11744 # send this new line down the pipe
11745 my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
11746 Perl::Tidy::VerticalAligner::append_line(
11753 $forced_breakpoint_to_go[$iend] || $in_comma_list,
11754 $outdent_long_lines,
11755 $is_terminal_ternary,
11756 $is_semicolon_terminated,
11758 $rvertical_tightness_flags,
11762 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
11764 # flush an outdented line to avoid any unwanted vertical alignment
11765 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11769 # Set flag indicating if this line ends in an opening
11770 # token and is very short, so that a blank line is not
11771 # needed if the subsequent line is a comment.
11772 # Examples of what we are looking for:
11778 $last_output_short_opening_token
11780 # line ends in opening token
11781 = $types_to_go[$iend] =~ /^[\{\(\[L]$/
11785 # line has either single opening token
11788 # or is a single token followed by opening token.
11789 # Note that sub identifiers have blanks like 'sub doit'
11790 || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
11793 # and limit total to 10 character widths
11794 && token_sequence_length( $ibeg, $iend ) <= 10;
11796 ## $last_output_short_opening_token =
11797 ## $types_to_go[$iend] =~ /^[\{\(\[L]$/
11798 ## && $iend - $ibeg <= 2
11799 ## && $tokens_to_go[$ibeg] !~ /^sub/
11800 ## && token_sequence_length( $ibeg, $iend ) <= 10;
11802 } # end of loop to output each line
11804 # remember indentation of lines containing opening containers for
11805 # later use by sub set_adjusted_indentation
11806 save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
11809 { # begin make_alignment_patterns
11811 my %block_type_map;
11816 # map related block names into a common name to
11818 %block_type_map = (
11829 # map certain keywords to the same 'if' class to align
11830 # long if/elsif sequences. [elsif.pl]
11836 'default' => 'given',
11837 'case' => 'switch',
11839 # treat an 'undef' similar to numbers and quotes
11844 sub make_alignment_patterns {
11846 # Here we do some important preliminary work for the
11847 # vertical aligner. We create three arrays for one
11848 # output line. These arrays contain strings that can
11849 # be tested by the vertical aligner to see if
11850 # consecutive lines can be aligned vertically.
11852 # The three arrays are indexed on the vertical
11853 # alignment fields and are:
11854 # @tokens - a list of any vertical alignment tokens for this line.
11855 # These are tokens, such as '=' '&&' '#' etc which
11856 # we want to might align vertically. These are
11857 # decorated with various information such as
11858 # nesting depth to prevent unwanted vertical
11859 # alignment matches.
11860 # @fields - the actual text of the line between the vertical alignment
11862 # @patterns - a modified list of token types, one for each alignment
11863 # field. These should normally each match before alignment is
11864 # allowed, even when the alignment tokens match.
11865 my ( $ibeg, $iend ) = @_;
11869 my $i_start = $ibeg;
11873 my @container_name = ("");
11874 my @multiple_comma_arrows = (undef);
11876 my $j = 0; # field index
11879 for $i ( $ibeg .. $iend ) {
11881 # Keep track of containers balanced on this line only.
11882 # These are used below to prevent unwanted cross-line alignments.
11883 # Unbalanced containers already avoid aligning across
11884 # container boundaries.
11885 if ( $tokens_to_go[$i] eq '(' ) {
11887 # if container is balanced on this line...
11888 my $i_mate = $mate_index_to_go[$i];
11889 if ( $i_mate > $i && $i_mate <= $iend ) {
11891 my $seqno = $type_sequence_to_go[$i];
11892 my $count = comma_arrow_count($seqno);
11893 $multiple_comma_arrows[$depth] = $count && $count > 1;
11895 # Append the previous token name to make the container name
11896 # more unique. This name will also be given to any commas
11897 # within this container, and it helps avoid undesirable
11898 # alignments of different types of containers.
11899 my $name = previous_nonblank_token($i);
11901 $container_name[$depth] = "+" . $name;
11903 # Make the container name even more unique if necessary.
11904 # If we are not vertically aligning this opening paren,
11905 # append a character count to avoid bad alignment because
11906 # it usually looks bad to align commas within continers
11907 # for which the opening parens do not align. Here
11908 # is an example very BAD alignment of commas (because
11909 # the atan2 functions are not all aligned):
11911 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
11912 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
11913 # $X * atan2( $X, 1 ) -
11914 # $Y * atan2( $Y, 1 );
11916 # On the other hand, it is usually okay to align commas if
11917 # opening parens align, such as:
11918 # glVertex3d( $cx + $s * $xs, $cy, $z );
11919 # glVertex3d( $cx, $cy + $s * $ys, $z );
11920 # glVertex3d( $cx - $s * $xs, $cy, $z );
11921 # glVertex3d( $cx, $cy - $s * $ys, $z );
11923 # To distinguish between these situations, we will
11924 # append the length of the line from the previous matching
11925 # token, or beginning of line, to the function name. This
11926 # will allow the vertical aligner to reject undesirable
11929 # if we are not aligning on this paren...
11930 if ( $matching_token_to_go[$i] eq '' ) {
11932 # Sum length from previous alignment, or start of line.
11933 # Note that we have to sum token lengths here because
11934 # padding has been done and so array $lengths_to_go
11938 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11939 $len += leading_spaces_to_go($i_start)
11940 if ( $i_start == $ibeg );
11942 # tack length onto the container name to make unique
11943 $container_name[$depth] .= "-" . $len;
11947 elsif ( $tokens_to_go[$i] eq ')' ) {
11948 $depth-- if $depth > 0;
11951 # if we find a new synchronization token, we are done with
11953 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
11955 my $tok = my $raw_tok = $matching_token_to_go[$i];
11957 # make separators in different nesting depths unique
11958 # by appending the nesting depth digit.
11959 if ( $raw_tok ne '#' ) {
11960 $tok .= "$nesting_depth_to_go[$i]";
11963 # also decorate commas with any container name to avoid
11964 # unwanted cross-line alignments.
11965 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
11966 if ( $container_name[$depth] ) {
11967 $tok .= $container_name[$depth];
11971 # Patch to avoid aligning leading and trailing if, unless.
11972 # Mark trailing if, unless statements with container names.
11973 # This makes them different from leading if, unless which
11974 # are not so marked at present. If we ever need to name
11975 # them too, we could use ci to distinguish them.
11976 # Example problem to avoid:
11977 # return ( 2, "DBERROR" )
11978 # if ( $retval == 2 );
11979 # if ( scalar @_ ) {
11980 # my ( $a, $b, $c, $d, $e, $f ) = @_;
11982 if ( $raw_tok eq '(' ) {
11983 my $ci = $ci_levels_to_go[$ibeg];
11984 if ( $container_name[$depth] =~ /^\+(if|unless)/
11987 $tok .= $container_name[$depth];
11991 # Decorate block braces with block types to avoid
11992 # unwanted alignments such as the following:
11993 # foreach ( @{$routput_array} ) { $fh->print($_) }
11994 # eval { $fh->close() };
11995 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
11996 my $block_type = $block_type_to_go[$i];
11998 # map certain related block types to allow
11999 # else blocks to align
12000 $block_type = $block_type_map{$block_type}
12001 if ( defined( $block_type_map{$block_type} ) );
12003 # remove sub names to allow one-line sub braces to align
12004 # regardless of name
12005 if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
12007 # allow all control-type blocks to align
12008 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
12010 $tok .= $block_type;
12013 # concatenate the text of the consecutive tokens to form
12016 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
12018 # store the alignment token for this field
12019 push( @tokens, $tok );
12021 # get ready for the next batch
12024 $patterns[$j] = "";
12027 # continue accumulating tokens
12028 # handle non-keywords..
12029 if ( $types_to_go[$i] ne 'k' ) {
12030 my $type = $types_to_go[$i];
12032 # Mark most things before arrows as a quote to
12033 # get them to line up. Testfile: mixed.pl.
12034 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
12035 my $next_type = $types_to_go[ $i + 1 ];
12036 my $i_next_nonblank =
12037 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12039 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
12042 # Patch to ignore leading minus before words,
12043 # by changing pattern 'mQ' into just 'Q',
12044 # so that we can align things like this:
12045 # Button => "Print letter \"~$_\"",
12046 # -command => [ sub { print "$_[0]\n" }, $_ ],
12047 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
12051 # patch to make numbers and quotes align
12052 if ( $type eq 'n' ) { $type = 'Q' }
12054 # patch to ignore any ! in patterns
12055 if ( $type eq '!' ) { $type = '' }
12057 $patterns[$j] .= $type;
12060 # for keywords we have to use the actual text
12063 my $tok = $tokens_to_go[$i];
12065 # but map certain keywords to a common string to allow
12067 $tok = $keyword_map{$tok}
12068 if ( defined( $keyword_map{$tok} ) );
12069 $patterns[$j] .= $tok;
12073 # done with this line .. join text of tokens to make the last field
12074 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
12075 return ( \@tokens, \@fields, \@patterns );
12078 } # end make_alignment_patterns
12080 { # begin unmatched_indexes
12082 # closure to keep track of unbalanced containers.
12083 # arrays shared by the routines in this block:
12084 my @unmatched_opening_indexes_in_this_batch;
12085 my @unmatched_closing_indexes_in_this_batch;
12086 my %comma_arrow_count;
12088 sub is_unbalanced_batch {
12089 @unmatched_opening_indexes_in_this_batch +
12090 @unmatched_closing_indexes_in_this_batch;
12093 sub comma_arrow_count {
12095 return $comma_arrow_count{$seqno};
12098 sub match_opening_and_closing_tokens {
12100 # Match up indexes of opening and closing braces, etc, in this batch.
12101 # This has to be done after all tokens are stored because unstoring
12102 # of tokens would otherwise cause trouble.
12104 @unmatched_opening_indexes_in_this_batch = ();
12105 @unmatched_closing_indexes_in_this_batch = ();
12106 %comma_arrow_count = ();
12108 my ( $i, $i_mate, $token );
12109 foreach $i ( 0 .. $max_index_to_go ) {
12110 if ( $type_sequence_to_go[$i] ) {
12111 $token = $tokens_to_go[$i];
12112 if ( $token =~ /^[\(\[\{\?]$/ ) {
12113 push @unmatched_opening_indexes_in_this_batch, $i;
12115 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
12117 $i_mate = pop @unmatched_opening_indexes_in_this_batch;
12118 if ( defined($i_mate) && $i_mate >= 0 ) {
12119 if ( $type_sequence_to_go[$i_mate] ==
12120 $type_sequence_to_go[$i] )
12122 $mate_index_to_go[$i] = $i_mate;
12123 $mate_index_to_go[$i_mate] = $i;
12126 push @unmatched_opening_indexes_in_this_batch,
12128 push @unmatched_closing_indexes_in_this_batch, $i;
12132 push @unmatched_closing_indexes_in_this_batch, $i;
12136 elsif ( $tokens_to_go[$i] eq '=>' ) {
12137 if (@unmatched_opening_indexes_in_this_batch) {
12138 my $j = $unmatched_opening_indexes_in_this_batch[-1];
12139 my $seqno = $type_sequence_to_go[$j];
12140 $comma_arrow_count{$seqno}++;
12146 sub save_opening_indentation {
12148 # This should be called after each batch of tokens is output. It
12149 # saves indentations of lines of all unmatched opening tokens.
12150 # These will be used by sub get_opening_indentation.
12152 my ( $ri_first, $ri_last, $rindentation_list ) = @_;
12154 # we no longer need indentations of any saved indentations which
12155 # are unmatched closing tokens in this batch, because we will
12156 # never encounter them again. So we can delete them to keep
12157 # the hash size down.
12158 foreach (@unmatched_closing_indexes_in_this_batch) {
12159 my $seqno = $type_sequence_to_go[$_];
12160 delete $saved_opening_indentation{$seqno};
12163 # we need to save indentations of any unmatched opening tokens
12164 # in this batch because we may need them in a subsequent batch.
12165 foreach (@unmatched_opening_indexes_in_this_batch) {
12166 my $seqno = $type_sequence_to_go[$_];
12167 $saved_opening_indentation{$seqno} = [
12168 lookup_opening_indentation(
12169 $_, $ri_first, $ri_last, $rindentation_list
12174 } # end unmatched_indexes
12176 sub get_opening_indentation {
12178 # get the indentation of the line which output the opening token
12179 # corresponding to a given closing token in the current output batch.
12182 # $i_closing - index in this line of a closing token ')' '}' or ']'
12184 # $ri_first - reference to list of the first index $i for each output
12185 # line in this batch
12186 # $ri_last - reference to list of the last index $i for each output line
12188 # $rindentation_list - reference to a list containing the indentation
12189 # used for each line.
12192 # -the indentation of the line which contained the opening token
12193 # which matches the token at index $i_opening
12194 # -and its offset (number of columns) from the start of the line
12196 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
12198 # first, see if the opening token is in the current batch
12199 my $i_opening = $mate_index_to_go[$i_closing];
12200 my ( $indent, $offset, $is_leading, $exists );
12202 if ( $i_opening >= 0 ) {
12204 # it is..look up the indentation
12205 ( $indent, $offset, $is_leading ) =
12206 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
12207 $rindentation_list );
12210 # if not, it should have been stored in the hash by a previous batch
12212 my $seqno = $type_sequence_to_go[$i_closing];
12214 if ( $saved_opening_indentation{$seqno} ) {
12215 ( $indent, $offset, $is_leading ) =
12216 @{ $saved_opening_indentation{$seqno} };
12219 # some kind of serious error
12220 # (example is badfile.t)
12229 # if no sequence number it must be an unbalanced container
12237 return ( $indent, $offset, $is_leading, $exists );
12240 sub lookup_opening_indentation {
12242 # get the indentation of the line in the current output batch
12243 # which output a selected opening token
12246 # $i_opening - index of an opening token in the current output batch
12247 # whose line indentation we need
12248 # $ri_first - reference to list of the first index $i for each output
12249 # line in this batch
12250 # $ri_last - reference to list of the last index $i for each output line
12252 # $rindentation_list - reference to a list containing the indentation
12253 # used for each line. (NOTE: the first slot in
12254 # this list is the last returned line number, and this is
12255 # followed by the list of indentations).
12258 # -the indentation of the line which contained token $i_opening
12259 # -and its offset (number of columns) from the start of the line
12261 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
12263 my $nline = $rindentation_list->[0]; # line number of previous lookup
12265 # reset line location if necessary
12266 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
12268 # find the correct line
12269 unless ( $i_opening > $ri_last->[-1] ) {
12270 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
12273 # error - token index is out of bounds - shouldn't happen
12276 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
12278 report_definite_bug();
12279 $nline = $#{$ri_last};
12282 $rindentation_list->[0] =
12283 $nline; # save line number to start looking next call
12284 my $ibeg = $ri_start->[$nline];
12285 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
12286 my $is_leading = ( $ibeg == $i_opening );
12287 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
12291 my %is_if_elsif_else_unless_while_until_for_foreach;
12295 # These block types may have text between the keyword and opening
12296 # curly. Note: 'else' does not, but must be included to allow trailing
12297 # if/elsif text to be appended.
12298 # patch for SWITCH/CASE: added 'case' and 'when'
12299 @_ = qw(if elsif else unless while until for foreach case when);
12300 @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
12303 sub set_adjusted_indentation {
12305 # This routine has the final say regarding the actual indentation of
12306 # a line. It starts with the basic indentation which has been
12307 # defined for the leading token, and then takes into account any
12308 # options that the user has set regarding special indenting and
12311 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
12312 $rindentation_list, $level_jump )
12315 # we need to know the last token of this line
12316 my ( $terminal_type, $i_terminal ) =
12317 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
12319 my $is_outdented_line = 0;
12321 my $is_semicolon_terminated = $terminal_type eq ';'
12322 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
12324 ##########################################################
12325 # Section 1: set a flag and a default indentation
12327 # Most lines are indented according to the initial token.
12328 # But it is common to outdent to the level just after the
12329 # terminal token in certain cases...
12330 # adjust_indentation flag:
12331 # 0 - do not adjust
12333 # 2 - vertically align with opening token
12335 ##########################################################
12336 my $adjust_indentation = 0;
12337 my $default_adjust_indentation = $adjust_indentation;
12340 $opening_indentation, $opening_offset,
12341 $is_leading, $opening_exists
12344 # if we are at a closing token of some type..
12345 if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
12347 # get the indentation of the line containing the corresponding
12350 $opening_indentation, $opening_offset,
12351 $is_leading, $opening_exists
12353 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12354 $rindentation_list );
12356 # First set the default behavior:
12359 # default behavior is to outdent closing lines
12360 # of the form: "); }; ]; )->xxx;"
12361 $is_semicolon_terminated
12363 # and 'cuddled parens' of the form: ")->pack("
12365 $terminal_type eq '('
12366 && $types_to_go[$ibeg] eq ')'
12367 && ( $nesting_depth_to_go[$iend] + 1 ==
12368 $nesting_depth_to_go[$ibeg] )
12371 # and when the next line is at a lower indentation level
12372 # PATCH: and only if the style allows undoing continuation
12373 # for all closing token types. We should really wait until
12374 # the indentation of the next line is known and then make
12375 # a decision, but that would require another pass.
12376 || ( $level_jump < 0 && !$some_closing_token_indentation )
12379 $adjust_indentation = 1;
12382 # outdent something like '),'
12384 $terminal_type eq ','
12386 # allow just one character before the comma
12387 && $i_terminal == $ibeg + 1
12389 # requre LIST environment; otherwise, we may outdent too much --
12390 # this can happen in calls without parentheses (overload.t);
12391 && $container_environment_to_go[$i_terminal] eq 'LIST'
12394 $adjust_indentation = 1;
12397 # undo continuation indentation of a terminal closing token if
12398 # it is the last token before a level decrease. This will allow
12399 # a closing token to line up with its opening counterpart, and
12400 # avoids a indentation jump larger than 1 level.
12401 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
12402 && $i_terminal == $ibeg )
12404 my $ci = $ci_levels_to_go[$ibeg];
12405 my $lev = $levels_to_go[$ibeg];
12406 my $next_type = $types_to_go[ $ibeg + 1 ];
12407 my $i_next_nonblank =
12408 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
12409 if ( $i_next_nonblank <= $max_index_to_go
12410 && $levels_to_go[$i_next_nonblank] < $lev )
12412 $adjust_indentation = 1;
12416 # YVES patch 1 of 2:
12417 # Undo ci of line with leading closing eval brace,
12418 # but not beyond the indention of the line with
12419 # the opening brace.
12420 if ( $block_type_to_go[$ibeg] eq 'eval'
12421 && !$rOpts->{'line-up-parentheses'}
12422 && !$rOpts->{'indent-closing-brace'} )
12425 $opening_indentation, $opening_offset,
12426 $is_leading, $opening_exists
12428 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12429 $rindentation_list );
12430 my $indentation = $leading_spaces_to_go[$ibeg];
12431 if ( defined($opening_indentation)
12432 && $indentation > $opening_indentation )
12434 $adjust_indentation = 1;
12438 $default_adjust_indentation = $adjust_indentation;
12440 # Now modify default behavior according to user request:
12441 # handle option to indent non-blocks of the form ); }; ];
12442 # But don't do special indentation to something like ')->pack('
12443 if ( !$block_type_to_go[$ibeg] ) {
12444 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
12446 if ( $i_terminal <= $ibeg + 1
12447 || $is_semicolon_terminated )
12449 $adjust_indentation = 2;
12452 $adjust_indentation = 0;
12455 elsif ( $cti == 2 ) {
12456 if ($is_semicolon_terminated) {
12457 $adjust_indentation = 3;
12460 $adjust_indentation = 0;
12463 elsif ( $cti == 3 ) {
12464 $adjust_indentation = 3;
12468 # handle option to indent blocks
12471 $rOpts->{'indent-closing-brace'}
12473 $i_terminal == $ibeg # isolated terminal '}'
12474 || $is_semicolon_terminated
12478 $adjust_indentation = 3;
12483 # if at ');', '};', '>;', and '];' of a terminal qw quote
12484 elsif ($$rpatterns[0] =~ /^qb*;$/
12485 && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
12487 if ( $closing_token_indentation{$1} == 0 ) {
12488 $adjust_indentation = 1;
12491 $adjust_indentation = 3;
12495 # if line begins with a ':', align it with any
12496 # previous line leading with corresponding ?
12497 elsif ( $types_to_go[$ibeg] eq ':' ) {
12499 $opening_indentation, $opening_offset,
12500 $is_leading, $opening_exists
12502 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12503 $rindentation_list );
12504 if ($is_leading) { $adjust_indentation = 2; }
12507 ##########################################################
12508 # Section 2: set indentation according to flag set above
12510 # Select the indentation object to define leading
12511 # whitespace. If we are outdenting something like '} } );'
12512 # then we want to use one level below the last token
12513 # ($i_terminal) in order to get it to fully outdent through
12515 ##########################################################
12518 my $level_end = $levels_to_go[$iend];
12520 if ( $adjust_indentation == 0 ) {
12521 $indentation = $leading_spaces_to_go[$ibeg];
12522 $lev = $levels_to_go[$ibeg];
12524 elsif ( $adjust_indentation == 1 ) {
12525 $indentation = $reduced_spaces_to_go[$i_terminal];
12526 $lev = $levels_to_go[$i_terminal];
12529 # handle indented closing token which aligns with opening token
12530 elsif ( $adjust_indentation == 2 ) {
12532 # handle option to align closing token with opening token
12533 $lev = $levels_to_go[$ibeg];
12535 # calculate spaces needed to align with opening token
12537 get_SPACES($opening_indentation) + $opening_offset;
12539 # Indent less than the previous line.
12541 # Problem: For -lp we don't exactly know what it was if there
12542 # were recoverable spaces sent to the aligner. A good solution
12543 # would be to force a flush of the vertical alignment buffer, so
12544 # that we would know. For now, this rule is used for -lp:
12546 # When the last line did not start with a closing token we will
12547 # be optimistic that the aligner will recover everything wanted.
12549 # This rule will prevent us from breaking a hierarchy of closing
12550 # tokens, and in a worst case will leave a closing paren too far
12551 # indented, but this is better than frequently leaving it not
12553 my $last_spaces = get_SPACES($last_indentation_written);
12554 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
12556 get_RECOVERABLE_SPACES($last_indentation_written);
12559 # reset the indentation to the new space count if it works
12560 # only options are all or none: nothing in-between looks good
12561 $lev = $levels_to_go[$ibeg];
12562 if ( $space_count < $last_spaces ) {
12563 if ($rOpts_line_up_parentheses) {
12564 my $lev = $levels_to_go[$ibeg];
12566 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
12569 $indentation = $space_count;
12573 # revert to default if it doesnt work
12575 $space_count = leading_spaces_to_go($ibeg);
12576 if ( $default_adjust_indentation == 0 ) {
12577 $indentation = $leading_spaces_to_go[$ibeg];
12579 elsif ( $default_adjust_indentation == 1 ) {
12580 $indentation = $reduced_spaces_to_go[$i_terminal];
12581 $lev = $levels_to_go[$i_terminal];
12586 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
12589 # handle -icb (indented closing code block braces)
12590 # Updated method for indented block braces: indent one full level if
12591 # there is no continuation indentation. This will occur for major
12592 # structures such as sub, if, else, but not for things like map
12595 # Note: only code blocks without continuation indentation are
12596 # handled here (if, else, unless, ..). In the following snippet,
12597 # the terminal brace of the sort block will have continuation
12598 # indentation as shown so it will not be handled by the coding
12599 # here. We would have to undo the continuation indentation to do
12600 # this, but it probably looks ok as is. This is a possible future
12601 # update for semicolon terminated lines.
12603 # if ($sortby eq 'date' or $sortby eq 'size') {
12605 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
12610 if ( $block_type_to_go[$ibeg]
12611 && $ci_levels_to_go[$i_terminal] == 0 )
12613 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
12614 $indentation = $spaces + $rOpts_indent_columns;
12616 # NOTE: for -lp we could create a new indentation object, but
12617 # there is probably no need to do it
12620 # handle -icp and any -icb block braces which fall through above
12621 # test such as the 'sort' block mentioned above.
12624 # There are currently two ways to handle -icp...
12625 # One way is to use the indentation of the previous line:
12626 # $indentation = $last_indentation_written;
12628 # The other way is to use the indentation that the previous line
12629 # would have had if it hadn't been adjusted:
12630 $indentation = $last_unadjusted_indentation;
12632 # Current method: use the minimum of the two. This avoids
12633 # inconsistent indentation.
12634 if ( get_SPACES($last_indentation_written) <
12635 get_SPACES($indentation) )
12637 $indentation = $last_indentation_written;
12641 # use previous indentation but use own level
12642 # to cause list to be flushed properly
12643 $lev = $levels_to_go[$ibeg];
12646 # remember indentation except for multi-line quotes, which get
12648 unless ( $ibeg == 0 && $starting_in_quote ) {
12649 $last_indentation_written = $indentation;
12650 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
12651 $last_leading_token = $tokens_to_go[$ibeg];
12654 # be sure lines with leading closing tokens are not outdented more
12655 # than the line which contained the corresponding opening token.
12657 #############################################################
12658 # updated per bug report in alex_bug.pl: we must not
12659 # mess with the indentation of closing logical braces so
12660 # we must treat something like '} else {' as if it were
12661 # an isolated brace my $is_isolated_block_brace = (
12662 # $iend == $ibeg ) && $block_type_to_go[$ibeg];
12663 #############################################################
12664 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
12665 && ( $iend == $ibeg
12666 || $is_if_elsif_else_unless_while_until_for_foreach{
12667 $block_type_to_go[$ibeg]
12670 # only do this for a ':; which is aligned with its leading '?'
12671 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
12672 if ( defined($opening_indentation)
12673 && !$is_isolated_block_brace
12674 && !$is_unaligned_colon )
12676 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
12677 $indentation = $opening_indentation;
12681 # remember the indentation of each line of this batch
12682 push @{$rindentation_list}, $indentation;
12684 # outdent lines with certain leading tokens...
12687 # must be first word of this batch
12693 # certain leading keywords if requested
12695 $rOpts->{'outdent-keywords'}
12696 && $types_to_go[$ibeg] eq 'k'
12697 && $outdent_keyword{ $tokens_to_go[$ibeg] }
12700 # or labels if requested
12701 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
12703 # or static block comments if requested
12704 || ( $types_to_go[$ibeg] eq '#'
12705 && $rOpts->{'outdent-static-block-comments'}
12706 && $is_static_block_comment )
12711 my $space_count = leading_spaces_to_go($ibeg);
12712 if ( $space_count > 0 ) {
12713 $space_count -= $rOpts_continuation_indentation;
12714 $is_outdented_line = 1;
12715 if ( $space_count < 0 ) { $space_count = 0 }
12717 # do not promote a spaced static block comment to non-spaced;
12718 # this is not normally necessary but could be for some
12719 # unusual user inputs (such as -ci = -i)
12720 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
12724 if ($rOpts_line_up_parentheses) {
12726 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
12729 $indentation = $space_count;
12734 return ( $indentation, $lev, $level_end, $terminal_type,
12735 $is_semicolon_terminated, $is_outdented_line );
12739 sub set_vertical_tightness_flags {
12741 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
12743 # Define vertical tightness controls for the nth line of a batch.
12744 # We create an array of parameters which tell the vertical aligner
12745 # if we should combine this line with the next line to achieve the
12746 # desired vertical tightness. The array of parameters contains:
12748 # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace
12749 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
12750 # if closing: spaces of padding to use
12751 # [2] sequence number of container
12752 # [3] valid flag: do not append if this flag is false. Will be
12753 # true if appropriate -vt flag is set. Otherwise, Will be
12754 # made true only for 2 line container in parens with -lp
12756 # These flags are used by sub set_leading_whitespace in
12757 # the vertical aligner
12759 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
12761 # For non-BLOCK tokens, we will need to examine the next line
12762 # too, so we won't consider the last line.
12763 if ( $n < $n_last_line ) {
12765 # see if last token is an opening token...not a BLOCK...
12766 my $ibeg_next = $$ri_first[ $n + 1 ];
12767 my $token_end = $tokens_to_go[$iend];
12768 my $iend_next = $$ri_last[ $n + 1 ];
12770 $type_sequence_to_go[$iend]
12771 && !$block_type_to_go[$iend]
12772 && $is_opening_token{$token_end}
12774 $opening_vertical_tightness{$token_end} > 0
12776 # allow 2-line method call to be closed up
12777 || ( $rOpts_line_up_parentheses
12778 && $token_end eq '('
12780 && $types_to_go[ $iend - 1 ] ne 'b' )
12785 # avoid multiple jumps in nesting depth in one line if
12787 my $ovt = $opening_vertical_tightness{$token_end};
12788 my $iend_next = $$ri_last[ $n + 1 ];
12791 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
12792 $nesting_depth_to_go[$ibeg_next] )
12796 # If -vt flag has not been set, mark this as invalid
12797 # and aligner will validate it if it sees the closing paren
12799 my $valid_flag = $ovt;
12800 @{$rvertical_tightness_flags} =
12801 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
12805 # see if first token of next line is a closing token...
12806 # ..and be sure this line does not have a side comment
12807 my $token_next = $tokens_to_go[$ibeg_next];
12808 if ( $type_sequence_to_go[$ibeg_next]
12809 && !$block_type_to_go[$ibeg_next]
12810 && $is_closing_token{$token_next}
12811 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
12813 my $ovt = $opening_vertical_tightness{$token_next};
12814 my $cvt = $closing_vertical_tightness{$token_next};
12817 # never append a trailing line like )->pack(
12818 # because it will throw off later alignment
12820 $nesting_depth_to_go[$ibeg_next] ==
12821 $nesting_depth_to_go[ $iend_next + 1 ] + 1
12826 $container_environment_to_go[$ibeg_next] ne 'LIST'
12830 # allow closing up 2-line method calls
12831 || ( $rOpts_line_up_parentheses
12832 && $token_next eq ')' )
12839 # decide which trailing closing tokens to append..
12841 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
12843 my $str = join( '',
12844 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
12846 # append closing token if followed by comment or ';'
12847 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
12851 my $valid_flag = $cvt;
12852 @{$rvertical_tightness_flags} = (
12854 $tightness{$token_next} == 2 ? 0 : 1,
12855 $type_sequence_to_go[$ibeg_next], $valid_flag,
12861 # Opening Token Right
12862 # If requested, move an isolated trailing opening token to the end of
12863 # the previous line which ended in a comma. We could do this
12864 # in sub recombine_breakpoints but that would cause problems
12865 # with -lp formatting. The problem is that indentation will
12866 # quickly move far to the right in nested expressions. By
12867 # doing it after indentation has been set, we avoid changes
12868 # to the indentation. Actual movement of the token takes place
12869 # in sub write_leader_and_string.
12871 $opening_token_right{ $tokens_to_go[$ibeg_next] }
12873 # previous line is not opening
12874 # (use -sot to combine with it)
12875 && !$is_opening_token{$token_end}
12877 # previous line ended in one of these
12878 # (add other cases if necessary; '=>' and '.' are not necessary
12879 ##&& ($is_opening_token{$token_end} || $token_end eq ',')
12880 && !$block_type_to_go[$ibeg_next]
12882 # this is a line with just an opening token
12883 && ( $iend_next == $ibeg_next
12884 || $iend_next == $ibeg_next + 2
12885 && $types_to_go[$iend_next] eq '#' )
12887 # looks bad if we align vertically with the wrong container
12888 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
12891 my $valid_flag = 1;
12892 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12893 @{$rvertical_tightness_flags} =
12894 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
12897 # Stacking of opening and closing tokens
12899 my $token_beg_next = $tokens_to_go[$ibeg_next];
12901 # patch to make something like 'qw(' behave like an opening paren
12903 if ( $types_to_go[$ibeg_next] eq 'q' ) {
12904 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
12905 $token_beg_next = $1;
12909 if ( $is_closing_token{$token_end}
12910 && $is_closing_token{$token_beg_next} )
12912 $stackable = $stack_closing_token{$token_beg_next}
12913 unless ( $block_type_to_go[$ibeg_next] )
12914 ; # shouldn't happen; just checking
12916 elsif ($is_opening_token{$token_end}
12917 && $is_opening_token{$token_beg_next} )
12919 $stackable = $stack_opening_token{$token_beg_next}
12920 unless ( $block_type_to_go[$ibeg_next] )
12921 ; # shouldn't happen; just checking
12926 my $is_semicolon_terminated;
12927 if ( $n + 1 == $n_last_line ) {
12928 my ( $terminal_type, $i_terminal ) = terminal_type(
12929 \@types_to_go, \@block_type_to_go,
12930 $ibeg_next, $iend_next
12932 $is_semicolon_terminated = $terminal_type eq ';'
12933 && $nesting_depth_to_go[$iend_next] <
12934 $nesting_depth_to_go[$ibeg_next];
12937 # this must be a line with just an opening token
12938 # or end in a semicolon
12940 $is_semicolon_terminated
12941 || ( $iend_next == $ibeg_next
12942 || $iend_next == $ibeg_next + 2
12943 && $types_to_go[$iend_next] eq '#' )
12946 my $valid_flag = 1;
12947 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12948 @{$rvertical_tightness_flags} =
12949 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
12955 # Check for a last line with isolated opening BLOCK curly
12956 elsif ($rOpts_block_brace_vertical_tightness
12958 && $types_to_go[$iend] eq '{'
12959 && $block_type_to_go[$iend] =~
12960 /$block_brace_vertical_tightness_pattern/o )
12962 @{$rvertical_tightness_flags} =
12963 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
12966 # pack in the sequence numbers of the ends of this line
12967 $rvertical_tightness_flags->[4] = get_seqno($ibeg);
12968 $rvertical_tightness_flags->[5] = get_seqno($iend);
12969 return $rvertical_tightness_flags;
12974 # get opening and closing sequence numbers of a token for the vertical
12975 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
12976 # to be treated somewhat like opening and closing tokens for stacking
12977 # tokens by the vertical aligner.
12979 my $seqno = $type_sequence_to_go[$ii];
12980 if ( $types_to_go[$ii] eq 'q' ) {
12983 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
12986 if ( !$ending_in_quote ) {
12987 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
12995 my %is_vertical_alignment_type;
12996 my %is_vertical_alignment_keyword;
13001 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
13002 { ? : => =~ && || // ~~ !~~
13004 @is_vertical_alignment_type{@_} = (1) x scalar(@_);
13006 @_ = qw(if unless and or err eq ne for foreach while until);
13007 @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
13010 sub set_vertical_alignment_markers {
13012 # This routine takes the first step toward vertical alignment of the
13013 # lines of output text. It looks for certain tokens which can serve as
13014 # vertical alignment markers (such as an '=').
13016 # Method: We look at each token $i in this output batch and set
13017 # $matching_token_to_go[$i] equal to those tokens at which we would
13018 # accept vertical alignment.
13020 # nothing to do if we aren't allowed to change whitespace
13021 if ( !$rOpts_add_whitespace ) {
13022 for my $i ( 0 .. $max_index_to_go ) {
13023 $matching_token_to_go[$i] = '';
13028 my ( $ri_first, $ri_last ) = @_;
13030 # remember the index of last nonblank token before any sidecomment
13031 my $i_terminal = $max_index_to_go;
13032 if ( $types_to_go[$i_terminal] eq '#' ) {
13033 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
13034 if ( $i_terminal > 0 ) { --$i_terminal }
13038 # look at each line of this batch..
13039 my $last_vertical_alignment_before_index;
13040 my $vert_last_nonblank_type;
13041 my $vert_last_nonblank_token;
13042 my $vert_last_nonblank_block_type;
13043 my $max_line = @$ri_first - 1;
13044 my ( $i, $type, $token, $block_type, $alignment_type );
13045 my ( $ibeg, $iend, $line );
13047 foreach $line ( 0 .. $max_line ) {
13048 $ibeg = $$ri_first[$line];
13049 $iend = $$ri_last[$line];
13050 $last_vertical_alignment_before_index = -1;
13051 $vert_last_nonblank_type = '';
13052 $vert_last_nonblank_token = '';
13053 $vert_last_nonblank_block_type = '';
13055 # look at each token in this output line..
13056 foreach $i ( $ibeg .. $iend ) {
13057 $alignment_type = '';
13058 $type = $types_to_go[$i];
13059 $block_type = $block_type_to_go[$i];
13060 $token = $tokens_to_go[$i];
13062 # check for flag indicating that we should not align
13064 if ( $matching_token_to_go[$i] ) {
13065 $matching_token_to_go[$i] = '';
13069 #--------------------------------------------------------
13070 # First see if we want to align BEFORE this token
13071 #--------------------------------------------------------
13073 # The first possible token that we can align before
13074 # is index 2 because: 1) it doesn't normally make sense to
13075 # align before the first token and 2) the second
13076 # token must be a blank if we are to align before
13078 if ( $i < $ibeg + 2 ) { }
13080 # must follow a blank token
13081 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
13083 # align a side comment --
13084 elsif ( $type eq '#' ) {
13088 # it is a static side comment
13090 $rOpts->{'static-side-comments'}
13091 && $token =~ /$static_side_comment_pattern/o
13094 # or a closing side comment
13095 || ( $vert_last_nonblank_block_type
13097 /$closing_side_comment_prefix_pattern/o )
13100 $alignment_type = $type;
13101 } ## Example of a static side comment
13104 # otherwise, do not align two in a row to create a
13106 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
13108 # align before one of these keywords
13109 # (within a line, since $i>1)
13110 elsif ( $type eq 'k' ) {
13112 # /^(if|unless|and|or|eq|ne)$/
13113 if ( $is_vertical_alignment_keyword{$token} ) {
13114 $alignment_type = $token;
13118 # align before one of these types..
13119 # Note: add '.' after new vertical aligner is operational
13120 elsif ( $is_vertical_alignment_type{$type} ) {
13121 $alignment_type = $token;
13123 # Do not align a terminal token. Although it might
13124 # occasionally look ok to do this, it has been found to be
13125 # a good general rule. The main problems are:
13126 # (1) that the terminal token (such as an = or :) might get
13127 # moved far to the right where it is hard to see because
13128 # nothing follows it, and
13129 # (2) doing so may prevent other good alignments.
13130 if ( $i == $iend || $i >= $i_terminal ) {
13131 $alignment_type = "";
13134 # Do not align leading ': (' or '. ('. This would prevent
13135 # alignment in something like the following:
13137 # ( $input_line_number < 10 ) ? " "
13138 # : ( $input_line_number < 100 ) ? " "
13142 # ( $case_matters ? $accessor : " lc($accessor) " )
13143 # . ( $yesno ? " eq " : " ne " )
13144 if ( $i == $ibeg + 2
13145 && $types_to_go[$ibeg] =~ /^[\.\:]$/
13146 && $types_to_go[ $i - 1 ] eq 'b' )
13148 $alignment_type = "";
13151 # For a paren after keyword, only align something like this:
13153 # elsif ( $b ) { &b }
13154 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
13155 $alignment_type = ""
13156 unless $vert_last_nonblank_token =~
13157 /^(if|unless|elsif)$/;
13160 # be sure the alignment tokens are unique
13161 # This didn't work well: reason not determined
13162 # if ($token ne $type) {$alignment_type .= $type}
13165 # NOTE: This is deactivated because it causes the previous
13166 # if/elsif alignment to fail
13167 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
13168 #{ $alignment_type = $type; }
13170 if ($alignment_type) {
13171 $last_vertical_alignment_before_index = $i;
13174 #--------------------------------------------------------
13175 # Next see if we want to align AFTER the previous nonblank
13176 #--------------------------------------------------------
13178 # We want to line up ',' and interior ';' tokens, with the added
13179 # space AFTER these tokens. (Note: interior ';' is included
13180 # because it may occur in short blocks).
13183 # we haven't already set it
13186 # and its not the first token of the line
13189 # and it follows a blank
13190 && $types_to_go[ $i - 1 ] eq 'b'
13192 # and previous token IS one of these:
13193 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
13195 # and it's NOT one of these
13196 && ( $type !~ /^[b\#\)\]\}]$/ )
13198 # then go ahead and align
13202 $alignment_type = $vert_last_nonblank_type;
13205 #--------------------------------------------------------
13206 # then store the value
13207 #--------------------------------------------------------
13208 $matching_token_to_go[$i] = $alignment_type;
13209 if ( $type ne 'b' ) {
13210 $vert_last_nonblank_type = $type;
13211 $vert_last_nonblank_token = $token;
13212 $vert_last_nonblank_block_type = $block_type;
13219 sub terminal_type {
13221 # returns type of last token on this line (terminal token), as follows:
13222 # returns # for a full-line comment
13223 # returns ' ' for a blank line
13224 # otherwise returns final token type
13226 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
13228 # check for full-line comment..
13229 if ( $$rtype[$ibeg] eq '#' ) {
13230 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
13234 # start at end and walk bakwards..
13235 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
13237 # skip past any side comment and blanks
13238 next if ( $$rtype[$i] eq 'b' );
13239 next if ( $$rtype[$i] eq '#' );
13241 # found it..make sure it is a BLOCK termination,
13242 # but hide a terminal } after sort/grep/map because it is not
13243 # necessarily the end of the line. (terminal.t)
13244 my $terminal_type = $$rtype[$i];
13246 $terminal_type eq '}'
13247 && ( !$$rblock_type[$i]
13248 || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
13251 $terminal_type = 'b';
13253 return wantarray ? ( $terminal_type, $i ) : $terminal_type;
13257 return wantarray ? ( ' ', $ibeg ) : ' ';
13262 my %is_good_keyword_breakpoint;
13263 my %is_lt_gt_le_ge;
13265 sub set_bond_strengths {
13269 @_ = qw(if unless while until for foreach);
13270 @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
13272 @_ = qw(lt gt le ge);
13273 @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
13275 ###############################################################
13276 # NOTE: NO_BREAK's set here are HINTS which may not be honored;
13277 # essential NO_BREAKS's must be enforced in section 2, below.
13278 ###############################################################
13280 # adding NEW_TOKENS: add a left and right bond strength by
13281 # mimmicking what is done for an existing token type. You
13282 # can skip this step at first and take the default, then
13283 # tweak later to get desired results.
13285 # The bond strengths should roughly follow precenence order where
13286 # possible. If you make changes, please check the results very
13287 # carefully on a variety of scripts.
13289 # no break around possible filehandle
13290 $left_bond_strength{'Z'} = NO_BREAK;
13291 $right_bond_strength{'Z'} = NO_BREAK;
13293 # never put a bare word on a new line:
13294 # example print (STDERR, "bla"); will fail with break after (
13295 $left_bond_strength{'w'} = NO_BREAK;
13297 # blanks always have infinite strength to force breaks after real tokens
13298 $right_bond_strength{'b'} = NO_BREAK;
13300 # try not to break on exponentation
13301 @_ = qw" ** .. ... <=> ";
13302 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13303 @right_bond_strength{@_} = (STRONG) x scalar(@_);
13305 # The comma-arrow has very low precedence but not a good break point
13306 $left_bond_strength{'=>'} = NO_BREAK;
13307 $right_bond_strength{'=>'} = NOMINAL;
13309 # ok to break after label
13310 $left_bond_strength{'J'} = NO_BREAK;
13311 $right_bond_strength{'J'} = NOMINAL;
13312 $left_bond_strength{'j'} = STRONG;
13313 $right_bond_strength{'j'} = STRONG;
13314 $left_bond_strength{'A'} = STRONG;
13315 $right_bond_strength{'A'} = STRONG;
13317 $left_bond_strength{'->'} = STRONG;
13318 $right_bond_strength{'->'} = VERY_STRONG;
13320 # breaking AFTER modulus operator is ok:
13322 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13323 @right_bond_strength{@_} =
13324 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
13326 # Break AFTER math operators * and /
13328 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13329 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
13331 # Break AFTER weakest math operators + and -
13332 # Make them weaker than * but a bit stronger than '.'
13334 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13335 @right_bond_strength{@_} =
13336 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
13338 # breaking BEFORE these is just ok:
13340 @right_bond_strength{@_} = (STRONG) x scalar(@_);
13341 @left_bond_strength{@_} = (NOMINAL) x scalar(@_);
13343 # breaking before the string concatenation operator seems best
13344 # because it can be hard to see at the end of a line
13345 $right_bond_strength{'.'} = STRONG;
13346 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
13349 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13350 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
13352 # make these a little weaker than nominal so that they get
13353 # favored for end-of-line characters
13354 @_ = qw"!= == =~ !~ ~~ !~~";
13355 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13356 @right_bond_strength{@_} =
13357 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
13359 # break AFTER these
13360 @_ = qw" < > | & >= <=";
13361 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
13362 @right_bond_strength{@_} =
13363 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
13365 # breaking either before or after a quote is ok
13366 # but bias for breaking before a quote
13367 $left_bond_strength{'Q'} = NOMINAL;
13368 $right_bond_strength{'Q'} = NOMINAL + 0.02;
13369 $left_bond_strength{'q'} = NOMINAL;
13370 $right_bond_strength{'q'} = NOMINAL;
13372 # starting a line with a keyword is usually ok
13373 $left_bond_strength{'k'} = NOMINAL;
13375 # we usually want to bond a keyword strongly to what immediately
13376 # follows, rather than leaving it stranded at the end of a line
13377 $right_bond_strength{'k'} = STRONG;
13379 $left_bond_strength{'G'} = NOMINAL;
13380 $right_bond_strength{'G'} = STRONG;
13382 # it is good to break AFTER various assignment operators
13384 = **= += *= &= <<= &&=
13385 -= /= |= >>= ||= //=
13389 @left_bond_strength{@_} = (STRONG) x scalar(@_);
13390 @right_bond_strength{@_} =
13391 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
13393 # break BEFORE '&&' and '||' and '//'
13394 # set strength of '||' to same as '=' so that chains like
13395 # $a = $b || $c || $d will break before the first '||'
13396 $right_bond_strength{'||'} = NOMINAL;
13397 $left_bond_strength{'||'} = $right_bond_strength{'='};
13399 # same thing for '//'
13400 $right_bond_strength{'//'} = NOMINAL;
13401 $left_bond_strength{'//'} = $right_bond_strength{'='};
13403 # set strength of && a little higher than ||
13404 $right_bond_strength{'&&'} = NOMINAL;
13405 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
13407 $left_bond_strength{';'} = VERY_STRONG;
13408 $right_bond_strength{';'} = VERY_WEAK;
13409 $left_bond_strength{'f'} = VERY_STRONG;
13411 # make right strength of for ';' a little less than '='
13412 # to make for contents break after the ';' to avoid this:
13413 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
13414 # $number_of_fields )
13415 # and make it weaker than ',' and 'and' too
13416 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
13418 # The strengths of ?/: should be somewhere between
13419 # an '=' and a quote (NOMINAL),
13420 # make strength of ':' slightly less than '?' to help
13421 # break long chains of ? : after the colons
13422 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
13423 $right_bond_strength{':'} = NO_BREAK;
13424 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
13425 $right_bond_strength{'?'} = NO_BREAK;
13427 $left_bond_strength{','} = VERY_STRONG;
13428 $right_bond_strength{','} = VERY_WEAK;
13430 # Set bond strengths of certain keywords
13431 # make 'or', 'err', 'and' slightly weaker than a ','
13432 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
13433 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
13434 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
13435 $left_bond_strength{'xor'} = NOMINAL;
13436 $right_bond_strength{'and'} = NOMINAL;
13437 $right_bond_strength{'or'} = NOMINAL;
13438 $right_bond_strength{'err'} = NOMINAL;
13439 $right_bond_strength{'xor'} = STRONG;
13442 # patch-its always ok to break at end of line
13443 $nobreak_to_go[$max_index_to_go] = 0;
13445 # adding a small 'bias' to strengths is a simple way to make a line
13446 # break at the first of a sequence of identical terms. For example,
13447 # to force long string of conditional operators to break with
13448 # each line ending in a ':', we can add a small number to the bond
13449 # strength of each ':'
13450 my $colon_bias = 0;
13457 my $code_bias = -.01;
13461 my $last_nonblank_type = $type;
13462 my $last_nonblank_token = $token;
13463 my $delta_bias = 0.0001;
13464 my $list_str = $left_bond_strength{'?'};
13466 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
13467 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
13470 # preliminary loop to compute bond strengths
13471 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
13472 $last_type = $type;
13473 if ( $type ne 'b' ) {
13474 $last_nonblank_type = $type;
13475 $last_nonblank_token = $token;
13477 $type = $types_to_go[$i];
13479 # strength on both sides of a blank is the same
13480 if ( $type eq 'b' && $last_type ne 'b' ) {
13481 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
13485 $token = $tokens_to_go[$i];
13486 $block_type = $block_type_to_go[$i];
13488 $next_type = $types_to_go[$i_next];
13489 $next_token = $tokens_to_go[$i_next];
13490 $total_nesting_depth = $nesting_depth_to_go[$i_next];
13491 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13492 $next_nonblank_type = $types_to_go[$i_next_nonblank];
13493 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
13495 # Some token chemistry... The decision about where to break a
13496 # line depends upon a "bond strength" between tokens. The LOWER
13497 # the bond strength, the MORE likely a break. The strength
13498 # values are based on trial-and-error, and need to be tweaked
13499 # occasionally to get desired results. Things to keep in mind
13501 # 1. relative strengths are important. small differences
13502 # in strengths can make big formatting differences.
13503 # 2. each indentation level adds one unit of bond strength
13504 # 3. a value of NO_BREAK makes an unbreakable bond
13505 # 4. a value of VERY_WEAK is the strength of a ','
13506 # 5. values below NOMINAL are considered ok break points
13507 # 6. values above NOMINAL are considered poor break points
13508 # We are computing the strength of the bond between the current
13509 # token and the NEXT token.
13510 my $bond_str = VERY_STRONG; # a default, high strength
13512 #---------------------------------------------------------------
13514 # use minimum of left and right bond strengths if defined;
13515 # digraphs and trigraphs like to break on their left
13516 #---------------------------------------------------------------
13517 my $bsr = $right_bond_strength{$type};
13519 if ( !defined($bsr) ) {
13521 if ( $is_digraph{$type} || $is_trigraph{$type} ) {
13525 $bsr = VERY_STRONG;
13529 # define right bond strengths of certain keywords
13530 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
13531 $bsr = $right_bond_strength{$token};
13533 elsif ( $token eq 'ne' or $token eq 'eq' ) {
13536 my $bsl = $left_bond_strength{$next_nonblank_type};
13538 # set terminal bond strength to the nominal value
13539 # this will cause good preceding breaks to be retained
13540 if ( $i_next_nonblank > $max_index_to_go ) {
13544 if ( !defined($bsl) ) {
13546 if ( $is_digraph{$next_nonblank_type}
13547 || $is_trigraph{$next_nonblank_type} )
13552 $bsl = VERY_STRONG;
13556 # define right bond strengths of certain keywords
13557 if ( $next_nonblank_type eq 'k'
13558 && defined( $left_bond_strength{$next_nonblank_token} ) )
13560 $bsl = $left_bond_strength{$next_nonblank_token};
13562 elsif ($next_nonblank_token eq 'ne'
13563 or $next_nonblank_token eq 'eq' )
13567 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
13568 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
13571 # Note: it might seem that we would want to keep a NO_BREAK if
13572 # either token has this value. This didn't work, because in an
13573 # arrow list, it prevents the comma from separating from the
13574 # following bare word (which is probably quoted by its arrow).
13575 # So necessary NO_BREAK's have to be handled as special cases
13576 # in the final section.
13577 $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
13578 my $bond_str_1 = $bond_str;
13580 #---------------------------------------------------------------
13583 #---------------------------------------------------------------
13585 # allow long lines before final { in an if statement, as in:
13590 # Otherwise, the line before the { tends to be too short.
13591 if ( $type eq ')' ) {
13592 if ( $next_nonblank_type eq '{' ) {
13593 $bond_str = VERY_WEAK + 0.03;
13597 elsif ( $type eq '(' ) {
13598 if ( $next_nonblank_type eq '{' ) {
13599 $bond_str = NOMINAL;
13603 # break on something like '} (', but keep this stronger than a ','
13604 # example is in 'howe.pl'
13605 elsif ( $type eq 'R' or $type eq '}' ) {
13606 if ( $next_nonblank_type eq '(' ) {
13607 $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
13611 #-----------------------------------------------------------------
13612 # adjust bond strength bias
13613 #-----------------------------------------------------------------
13615 # add any bias set by sub scan_list at old comma break points.
13616 elsif ( $type eq ',' ) {
13617 $bond_str += $bond_strength_to_go[$i];
13620 elsif ( $type eq 'f' ) {
13621 $bond_str += $f_bias;
13622 $f_bias += $delta_bias;
13625 # in long ?: conditionals, bias toward just one set per line (colon.t)
13626 elsif ( $type eq ':' ) {
13627 if ( !$want_break_before{$type} ) {
13628 $bond_str += $colon_bias;
13629 $colon_bias += $delta_bias;
13633 if ( $next_nonblank_type eq ':'
13634 && $want_break_before{$next_nonblank_type} )
13636 $bond_str += $colon_bias;
13637 $colon_bias += $delta_bias;
13640 # if leading '.' is used, align all but 'short' quotes;
13641 # the idea is to not place something like "\n" on a single line.
13642 elsif ( $next_nonblank_type eq '.' ) {
13643 if ( $want_break_before{'.'} ) {
13645 $last_nonblank_type eq '.'
13648 $rOpts_short_concatenation_item_length )
13649 && ( $token !~ /^[\)\]\}]$/ )
13652 $dot_bias += $delta_bias;
13654 $bond_str += $dot_bias;
13657 elsif ($next_nonblank_type eq '&&'
13658 && $want_break_before{$next_nonblank_type} )
13660 $bond_str += $amp_bias;
13661 $amp_bias += $delta_bias;
13663 elsif ($next_nonblank_type eq '||'
13664 && $want_break_before{$next_nonblank_type} )
13666 $bond_str += $bar_bias;
13667 $bar_bias += $delta_bias;
13669 elsif ( $next_nonblank_type eq 'k' ) {
13671 if ( $next_nonblank_token eq 'and'
13672 && $want_break_before{$next_nonblank_token} )
13674 $bond_str += $and_bias;
13675 $and_bias += $delta_bias;
13677 elsif ($next_nonblank_token =~ /^(or|err)$/
13678 && $want_break_before{$next_nonblank_token} )
13680 $bond_str += $or_bias;
13681 $or_bias += $delta_bias;
13684 # FIXME: needs more testing
13685 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
13686 $bond_str = $list_str if ( $bond_str > $list_str );
13688 elsif ( $token eq 'err'
13689 && !$want_break_before{$token} )
13691 $bond_str += $or_bias;
13692 $or_bias += $delta_bias;
13697 && !$want_break_before{$type} )
13699 $bond_str += $colon_bias;
13700 $colon_bias += $delta_bias;
13702 elsif ( $type eq '&&'
13703 && !$want_break_before{$type} )
13705 $bond_str += $amp_bias;
13706 $amp_bias += $delta_bias;
13708 elsif ( $type eq '||'
13709 && !$want_break_before{$type} )
13711 $bond_str += $bar_bias;
13712 $bar_bias += $delta_bias;
13714 elsif ( $type eq 'k' ) {
13716 if ( $token eq 'and'
13717 && !$want_break_before{$token} )
13719 $bond_str += $and_bias;
13720 $and_bias += $delta_bias;
13722 elsif ( $token eq 'or'
13723 && !$want_break_before{$token} )
13725 $bond_str += $or_bias;
13726 $or_bias += $delta_bias;
13730 # keep matrix and hash indices together
13731 # but make them a little below STRONG to allow breaking open
13732 # something like {'some-word'}{'some-very-long-word'} at the }{
13734 if ( ( $type eq ']' or $type eq 'R' )
13735 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
13738 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
13741 if ( $next_nonblank_token =~ /^->/ ) {
13743 # increase strength to the point where a break in the following
13744 # will be after the opening paren rather than at the arrow:
13746 if ( $type eq 'i' ) {
13747 $bond_str = 1.45 * STRONG;
13750 elsif ( $type =~ /^[\)\]\}R]$/ ) {
13751 $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
13754 # otherwise make strength before an '->' a little over a '+'
13756 if ( $bond_str <= NOMINAL ) {
13757 $bond_str = NOMINAL + 0.01;
13762 if ( $token eq ')' && $next_nonblank_token eq '[' ) {
13763 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
13766 # map1.t -- correct for a quirk in perl
13768 && $next_nonblank_type eq 'i'
13769 && $last_nonblank_type eq 'k'
13770 && $is_sort_map_grep{$last_nonblank_token} )
13772 # /^(sort|map|grep)$/ )
13774 $bond_str = NO_BREAK;
13777 # extrude.t: do not break before paren at:
13779 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
13780 $bond_str = NO_BREAK;
13783 # good to break after end of code blocks
13784 if ( $type eq '}' && $block_type ) {
13786 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
13787 $code_bias += $delta_bias;
13790 if ( $type eq 'k' ) {
13792 # allow certain control keywords to stand out
13793 if ( $next_nonblank_type eq 'k'
13794 && $is_last_next_redo_return{$token} )
13796 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
13799 # Don't break after keyword my. This is a quick fix for a
13800 # rare problem with perl. An example is this line from file
13802 # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
13804 if ( $token eq 'my' ) {
13805 $bond_str = NO_BREAK;
13810 # good to break before 'if', 'unless', etc
13811 if ( $is_if_brace_follower{$next_nonblank_token} ) {
13812 $bond_str = VERY_WEAK;
13815 if ( $next_nonblank_type eq 'k' ) {
13817 # keywords like 'unless', 'if', etc, within statements
13819 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
13820 $bond_str = VERY_WEAK / 1.05;
13824 # try not to break before a comma-arrow
13825 elsif ( $next_nonblank_type eq '=>' ) {
13826 if ( $bond_str < STRONG ) { $bond_str = STRONG }
13829 #----------------------------------------------------------------------
13830 # only set NO_BREAK's from here on
13831 #----------------------------------------------------------------------
13832 if ( $type eq 'C' or $type eq 'U' ) {
13834 # use strict requires that bare word and => not be separated
13835 if ( $next_nonblank_type eq '=>' ) {
13836 $bond_str = NO_BREAK;
13839 # Never break between a bareword and a following paren because
13840 # perl may give an error. For example, if a break is placed
13841 # between 'to_filehandle' and its '(' the following line will
13842 # give a syntax error [Carp.pm]: my( $no) =fileno(
13843 # to_filehandle( $in)) ;
13844 if ( $next_nonblank_token eq '(' ) {
13845 $bond_str = NO_BREAK;
13849 # use strict requires that bare word within braces not start new line
13850 elsif ( $type eq 'L' ) {
13852 if ( $next_nonblank_type eq 'w' ) {
13853 $bond_str = NO_BREAK;
13857 # in older version of perl, use strict can cause problems with
13858 # breaks before bare words following opening parens. For example,
13859 # this will fail under older versions if a break is made between
13862 # open( MAIL, "a long filename or command");
13864 elsif ( $type eq '{' ) {
13866 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
13868 # but it's fine to break if the word is followed by a '=>'
13869 # or if it is obviously a sub call
13870 my $i_next_next_nonblank = $i_next_nonblank + 1;
13871 my $next_next_type = $types_to_go[$i_next_next_nonblank];
13872 if ( $next_next_type eq 'b'
13873 && $i_next_nonblank < $max_index_to_go )
13875 $i_next_next_nonblank++;
13876 $next_next_type = $types_to_go[$i_next_next_nonblank];
13879 ##if ( $next_next_type ne '=>' ) {
13880 # these are ok: '->xxx', '=>', '('
13882 # We'll check for an old breakpoint and keep a leading
13883 # bareword if it was that way in the input file.
13884 # Presumably it was ok that way. For example, the
13885 # following would remain unchanged:
13888 # January, February, March, April,
13889 # May, June, July, August,
13890 # September, October, November, December,
13893 # This should be sufficient:
13894 if ( !$old_breakpoint_to_go[$i]
13895 && ( $next_next_type eq ',' || $next_next_type eq '}' )
13898 $bond_str = NO_BREAK;
13903 elsif ( $type eq 'w' ) {
13905 if ( $next_nonblank_type eq 'R' ) {
13906 $bond_str = NO_BREAK;
13909 # use strict requires that bare word and => not be separated
13910 if ( $next_nonblank_type eq '=>' ) {
13911 $bond_str = NO_BREAK;
13915 # in fact, use strict hates bare words on any new line. For
13916 # example, a break before the underscore here provokes the
13917 # wrath of use strict:
13918 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
13919 elsif ( $type eq 'F' ) {
13920 $bond_str = NO_BREAK;
13923 # use strict does not allow separating type info from trailing { }
13924 # testfile is readmail.pl
13925 elsif ( $type eq 't' or $type eq 'i' ) {
13927 if ( $next_nonblank_type eq 'L' ) {
13928 $bond_str = NO_BREAK;
13932 # Do not break between a possible filehandle and a ? or / and do
13933 # not introduce a break after it if there is no blank
13935 elsif ( $type eq 'Z' ) {
13940 # if there is no blank and we do not want one. Examples:
13941 # print $x++ # do not break after $x
13942 # print HTML"HELLO" # break ok after HTML
13945 && defined( $want_left_space{$next_type} )
13946 && $want_left_space{$next_type} == WS_NO
13949 # or we might be followed by the start of a quote
13950 || $next_nonblank_type =~ /^[\/\?]$/
13953 $bond_str = NO_BREAK;
13957 # Do not break before a possible file handle
13958 if ( $next_nonblank_type eq 'Z' ) {
13959 $bond_str = NO_BREAK;
13962 # As a defensive measure, do not break between a '(' and a
13963 # filehandle. In some cases, this can cause an error. For
13964 # example, the following program works:
13971 # But this program fails:
13979 # This is normally only a problem with the 'extrude' option
13980 if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
13981 $bond_str = NO_BREAK;
13984 # Breaking before a ++ can cause perl to guess wrong. For
13985 # example the following line will cause a syntax error
13986 # with -extrude if we break between '$i' and '++' [fixstyle2]
13987 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
13988 elsif ( $next_nonblank_type eq '++' ) {
13989 $bond_str = NO_BREAK;
13992 # Breaking before a ? before a quote can cause trouble if
13993 # they are not separated by a blank.
13994 # Example: a syntax error occurs if you break before the ? here
13995 # my$logic=join$all?' && ':' || ',@regexps;
13996 # From: Professional_Perl_Programming_Code/multifind.pl
13997 elsif ( $next_nonblank_type eq '?' ) {
13998 $bond_str = NO_BREAK
13999 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
14002 # Breaking before a . followed by a number
14003 # can cause trouble if there is no intervening space
14004 # Example: a syntax error occurs if you break before the .2 here
14005 # $str .= pack($endian.2, ensurrogate($ord));
14006 # From: perl58/Unicode.pm
14007 elsif ( $next_nonblank_type eq '.' ) {
14008 $bond_str = NO_BREAK
14009 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
14012 # patch to put cuddled elses back together when on multiple
14013 # lines, as in: } \n else \n { \n
14014 if ($rOpts_cuddled_else) {
14016 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
14017 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
14019 $bond_str = NO_BREAK;
14023 # keep '}' together with ';'
14024 if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
14025 $bond_str = NO_BREAK;
14028 # never break between sub name and opening paren
14029 if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
14030 $bond_str = NO_BREAK;
14033 #---------------------------------------------------------------
14035 # now take nesting depth into account
14036 #---------------------------------------------------------------
14037 # final strength incorporates the bond strength and nesting depth
14040 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
14041 if ( $total_nesting_depth > 0 ) {
14042 $strength = $bond_str + $total_nesting_depth;
14045 $strength = $bond_str;
14049 $strength = NO_BREAK;
14052 # always break after side comment
14053 if ( $type eq '#' ) { $strength = 0 }
14055 $bond_strength_to_go[$i] = $strength;
14057 FORMATTER_DEBUG_FLAG_BOND && do {
14058 my $str = substr( $token, 0, 15 );
14059 $str .= ' ' x ( 16 - length($str) );
14061 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
14068 sub pad_array_to_go {
14070 # to simplify coding in scan_list and set_bond_strengths, it helps
14071 # to create some extra blank tokens at the end of the arrays
14072 $tokens_to_go[ $max_index_to_go + 1 ] = '';
14073 $tokens_to_go[ $max_index_to_go + 2 ] = '';
14074 $types_to_go[ $max_index_to_go + 1 ] = 'b';
14075 $types_to_go[ $max_index_to_go + 2 ] = 'b';
14076 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
14077 $nesting_depth_to_go[$max_index_to_go];
14080 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
14081 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
14083 # shouldn't happen:
14084 unless ( get_saw_brace_error() ) {
14086 "Program bug in scan_list: hit nesting error which should have been caught\n"
14088 report_definite_bug();
14092 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
14097 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
14098 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
14102 { # begin scan_list
14105 $block_type, $current_depth,
14107 $i_last_nonblank_token, $last_colon_sequence_number,
14108 $last_nonblank_token, $last_nonblank_type,
14109 $last_old_breakpoint_count, $minimum_depth,
14110 $next_nonblank_block_type, $next_nonblank_token,
14111 $next_nonblank_type, $old_breakpoint_count,
14112 $starting_breakpoint_count, $starting_depth,
14118 @breakpoint_stack, @breakpoint_undo_stack,
14119 @comma_index, @container_type,
14120 @identifier_count_stack, @index_before_arrow,
14121 @interrupted_list, @item_count_stack,
14122 @last_comma_index, @last_dot_index,
14123 @last_nonblank_type, @old_breakpoint_count_stack,
14124 @opening_structure_index_stack, @rfor_semicolon_list,
14125 @has_old_logical_breakpoints, @rand_or_list,
14129 # routine to define essential variables when we go 'up' to
14131 sub check_for_new_minimum_depth {
14133 if ( $depth < $minimum_depth ) {
14135 $minimum_depth = $depth;
14137 # these arrays need not retain values between calls
14138 $breakpoint_stack[$depth] = $starting_breakpoint_count;
14139 $container_type[$depth] = "";
14140 $identifier_count_stack[$depth] = 0;
14141 $index_before_arrow[$depth] = -1;
14142 $interrupted_list[$depth] = 1;
14143 $item_count_stack[$depth] = 0;
14144 $last_nonblank_type[$depth] = "";
14145 $opening_structure_index_stack[$depth] = -1;
14147 $breakpoint_undo_stack[$depth] = undef;
14148 $comma_index[$depth] = undef;
14149 $last_comma_index[$depth] = undef;
14150 $last_dot_index[$depth] = undef;
14151 $old_breakpoint_count_stack[$depth] = undef;
14152 $has_old_logical_breakpoints[$depth] = 0;
14153 $rand_or_list[$depth] = [];
14154 $rfor_semicolon_list[$depth] = [];
14155 $i_equals[$depth] = -1;
14157 # these arrays must retain values between calls
14158 if ( !defined( $has_broken_sublist[$depth] ) ) {
14159 $dont_align[$depth] = 0;
14160 $has_broken_sublist[$depth] = 0;
14161 $want_comma_break[$depth] = 0;
14166 # routine to decide which commas to break at within a container;
14168 # $bp_count = number of comma breakpoints set
14169 # $do_not_break_apart = a flag indicating if container need not
14171 sub set_comma_breakpoints {
14175 my $do_not_break_apart = 0;
14178 if ( $item_count_stack[$dd] ) {
14180 # handle commas not in containers...
14181 if ( $dont_align[$dd] ) {
14182 do_uncontained_comma_breaks($dd);
14185 # handle commas within containers...
14187 my $fbc = $forced_breakpoint_count;
14189 # always open comma lists not preceded by keywords,
14190 # barewords, identifiers (that is, anything that doesn't
14191 # look like a function call)
14192 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
14194 set_comma_breakpoints_do(
14196 $opening_structure_index_stack[$dd],
14198 $item_count_stack[$dd],
14199 $identifier_count_stack[$dd],
14201 $next_nonblank_type,
14202 $container_type[$dd],
14203 $interrupted_list[$dd],
14204 \$do_not_break_apart,
14207 $bp_count = $forced_breakpoint_count - $fbc;
14208 $do_not_break_apart = 0 if $must_break_open;
14211 return ( $bp_count, $do_not_break_apart );
14214 sub do_uncontained_comma_breaks {
14216 # Handle commas not in containers...
14217 # This is a catch-all routine for commas that we
14218 # don't know what to do with because the don't fall
14219 # within containers. We will bias the bond strength
14220 # to break at commas which ended lines in the input
14221 # file. This usually works better than just trying
14222 # to put as many items on a line as possible. A
14223 # downside is that if the input file is garbage it
14224 # won't work very well. However, the user can always
14225 # prevent following the old breakpoints with the
14229 my $old_comma_break_count = 0;
14230 foreach my $ii ( @{ $comma_index[$dd] } ) {
14231 if ( $old_breakpoint_to_go[$ii] ) {
14232 $old_comma_break_count++;
14233 $bond_strength_to_go[$ii] = $bias;
14235 # reduce bias magnitude to force breaks in order
14240 # Also put a break before the first comma if
14241 # (1) there was a break there in the input, and
14242 # (2) that was exactly one previous break in the input
14243 # (3) there are multiple old comma breaks
14245 # For example, we will follow the user and break after
14246 # 'print' in this snippet:
14248 # "conformability (Not the same dimension)\n",
14249 # "\t", $have, " is ", text_unit($hu), "\n",
14250 # "\t", $want, " is ", text_unit($wu), "\n",
14252 # But we will not force a break after the first comma here
14253 # (causes a blinker):
14254 # $heap->{stream}->set_output_filter(
14255 # poe::filter::reference->new('myotherfreezer') ),
14258 my $i_first_comma = $comma_index[$dd]->[0];
14259 if ( $old_breakpoint_to_go[$i_first_comma] ) {
14260 my $level_comma = $levels_to_go[$i_first_comma];
14263 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
14264 if ( $old_breakpoint_to_go[$ii] ) {
14266 last if ( $obp_count > 1 );
14268 if ( $levels_to_go[$ii] == $level_comma );
14271 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 1 )
14273 set_forced_breakpoint($ibreak);
14278 my %is_logical_container;
14281 @_ = qw# if elsif unless while and or err not && | || ? : ! #;
14282 @is_logical_container{@_} = (1) x scalar(@_);
14285 sub set_for_semicolon_breakpoints {
14287 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
14288 set_forced_breakpoint($_);
14292 sub set_logical_breakpoints {
14295 $item_count_stack[$dd] == 0
14296 && $is_logical_container{ $container_type[$dd] }
14298 || $has_old_logical_breakpoints[$dd]
14302 # Look for breaks in this order:
14305 foreach my $i ( 0 .. 3 ) {
14306 if ( $rand_or_list[$dd][$i] ) {
14307 foreach ( @{ $rand_or_list[$dd][$i] } ) {
14308 set_forced_breakpoint($_);
14311 # break at any 'if' and 'unless' too
14312 foreach ( @{ $rand_or_list[$dd][4] } ) {
14313 set_forced_breakpoint($_);
14315 $rand_or_list[$dd] = [];
14322 sub is_unbreakable_container {
14324 # never break a container of one of these types
14325 # because bad things can happen (map1.t)
14327 $is_sort_map_grep{ $container_type[$dd] };
14332 # This routine is responsible for setting line breaks for all lists,
14333 # so that hierarchical structure can be displayed and so that list
14334 # items can be vertically aligned. The output of this routine is
14335 # stored in the array @forced_breakpoint_to_go, which is used to set
14336 # final breakpoints.
14338 $starting_depth = $nesting_depth_to_go[0];
14341 $current_depth = $starting_depth;
14343 $last_colon_sequence_number = -1;
14344 $last_nonblank_token = ';';
14345 $last_nonblank_type = ';';
14346 $last_nonblank_block_type = ' ';
14347 $last_old_breakpoint_count = 0;
14348 $minimum_depth = $current_depth + 1; # forces update in check below
14349 $old_breakpoint_count = 0;
14350 $starting_breakpoint_count = $forced_breakpoint_count;
14353 $type_sequence = '';
14355 check_for_new_minimum_depth($current_depth);
14357 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
14358 my $want_previous_breakpoint = -1;
14360 my $saw_good_breakpoint;
14361 my $i_line_end = -1;
14362 my $i_line_start = -1;
14364 # loop over all tokens in this batch
14365 while ( ++$i <= $max_index_to_go ) {
14366 if ( $type ne 'b' ) {
14367 $i_last_nonblank_token = $i - 1;
14368 $last_nonblank_type = $type;
14369 $last_nonblank_token = $token;
14370 $last_nonblank_block_type = $block_type;
14372 $type = $types_to_go[$i];
14373 $block_type = $block_type_to_go[$i];
14374 $token = $tokens_to_go[$i];
14375 $type_sequence = $type_sequence_to_go[$i];
14376 my $next_type = $types_to_go[ $i + 1 ];
14377 my $next_token = $tokens_to_go[ $i + 1 ];
14378 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
14379 $next_nonblank_type = $types_to_go[$i_next_nonblank];
14380 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14381 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
14383 # set break if flag was set
14384 if ( $want_previous_breakpoint >= 0 ) {
14385 set_forced_breakpoint($want_previous_breakpoint);
14386 $want_previous_breakpoint = -1;
14389 $last_old_breakpoint_count = $old_breakpoint_count;
14390 if ( $old_breakpoint_to_go[$i] ) {
14392 $i_line_start = $i_next_nonblank;
14394 $old_breakpoint_count++;
14396 # Break before certain keywords if user broke there and
14397 # this is a 'safe' break point. The idea is to retain
14398 # any preferred breaks for sequential list operations,
14399 # like a schwartzian transform.
14400 if ($rOpts_break_at_old_keyword_breakpoints) {
14402 $next_nonblank_type eq 'k'
14403 && $is_keyword_returning_list{$next_nonblank_token}
14404 && ( $type =~ /^[=\)\]\}Riw]$/
14406 && $is_keyword_returning_list{$token} )
14410 # we actually have to set this break next time through
14411 # the loop because if we are at a closing token (such
14412 # as '}') which forms a one-line block, this break might
14414 $want_previous_breakpoint = $i;
14418 # Break before attributes if user broke there
14419 if ($rOpts_break_at_old_attribute_breakpoints) {
14420 if ( $next_nonblank_type eq 'A' ) {
14421 $want_previous_breakpoint = $i;
14425 next if ( $type eq 'b' );
14426 $depth = $nesting_depth_to_go[ $i + 1 ];
14428 # safety check - be sure we always break after a comment
14429 # Shouldn't happen .. an error here probably means that the
14430 # nobreak flag did not get turned off correctly during
14432 if ( $type eq '#' ) {
14433 if ( $i != $max_index_to_go ) {
14435 "Non-fatal program bug: backup logic needed to break after a comment\n"
14437 report_definite_bug();
14438 $nobreak_to_go[$i] = 0;
14439 set_forced_breakpoint($i);
14443 # Force breakpoints at certain tokens in long lines.
14444 # Note that such breakpoints will be undone later if these tokens
14445 # are fully contained within parens on a line.
14448 # break before a keyword within a line
14452 # if one of these keywords:
14453 && $token =~ /^(if|unless|while|until|for)$/
14455 # but do not break at something like '1 while'
14456 && ( $last_nonblank_type ne 'n' || $i > 2 )
14458 # and let keywords follow a closing 'do' brace
14459 && $last_nonblank_block_type ne 'do'
14464 # or container is broken (by side-comment, etc)
14465 || ( $next_nonblank_token eq '('
14466 && $mate_index_to_go[$i_next_nonblank] < $i )
14470 set_forced_breakpoint( $i - 1 );
14473 # remember locations of '||' and '&&' for possible breaks if we
14474 # decide this is a long logical expression.
14475 if ( $type eq '||' ) {
14476 push @{ $rand_or_list[$depth][2] }, $i;
14477 ++$has_old_logical_breakpoints[$depth]
14478 if ( ( $i == $i_line_start || $i == $i_line_end )
14479 && $rOpts_break_at_old_logical_breakpoints );
14481 elsif ( $type eq '&&' ) {
14482 push @{ $rand_or_list[$depth][3] }, $i;
14483 ++$has_old_logical_breakpoints[$depth]
14484 if ( ( $i == $i_line_start || $i == $i_line_end )
14485 && $rOpts_break_at_old_logical_breakpoints );
14487 elsif ( $type eq 'f' ) {
14488 push @{ $rfor_semicolon_list[$depth] }, $i;
14490 elsif ( $type eq 'k' ) {
14491 if ( $token eq 'and' ) {
14492 push @{ $rand_or_list[$depth][1] }, $i;
14493 ++$has_old_logical_breakpoints[$depth]
14494 if ( ( $i == $i_line_start || $i == $i_line_end )
14495 && $rOpts_break_at_old_logical_breakpoints );
14498 # break immediately at 'or's which are probably not in a logical
14499 # block -- but we will break in logical breaks below so that
14500 # they do not add to the forced_breakpoint_count
14501 elsif ( $token eq 'or' ) {
14502 push @{ $rand_or_list[$depth][0] }, $i;
14503 ++$has_old_logical_breakpoints[$depth]
14504 if ( ( $i == $i_line_start || $i == $i_line_end )
14505 && $rOpts_break_at_old_logical_breakpoints );
14506 if ( $is_logical_container{ $container_type[$depth] } ) {
14509 if ($is_long_line) { set_forced_breakpoint($i) }
14510 elsif ( ( $i == $i_line_start || $i == $i_line_end )
14511 && $rOpts_break_at_old_logical_breakpoints )
14513 $saw_good_breakpoint = 1;
14517 elsif ( $token eq 'if' || $token eq 'unless' ) {
14518 push @{ $rand_or_list[$depth][4] }, $i;
14519 if ( ( $i == $i_line_start || $i == $i_line_end )
14520 && $rOpts_break_at_old_logical_breakpoints )
14522 set_forced_breakpoint($i);
14526 elsif ( $is_assignment{$type} ) {
14527 $i_equals[$depth] = $i;
14530 if ($type_sequence) {
14532 # handle any postponed closing breakpoints
14533 if ( $token =~ /^[\)\]\}\:]$/ ) {
14534 if ( $type eq ':' ) {
14535 $last_colon_sequence_number = $type_sequence;
14537 # retain break at a ':' line break
14538 if ( ( $i == $i_line_start || $i == $i_line_end )
14539 && $rOpts_break_at_old_ternary_breakpoints )
14543 set_forced_breakpoint($i);
14545 # break at previous '='
14546 if ( $i_equals[$depth] > 0 ) {
14547 set_forced_breakpoint( $i_equals[$depth] );
14548 $i_equals[$depth] = -1;
14552 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
14553 my $inc = ( $type eq ':' ) ? 0 : 1;
14554 set_forced_breakpoint( $i - $inc );
14555 delete $postponed_breakpoint{$type_sequence};
14559 # set breaks at ?/: if they will get separated (and are
14560 # not a ?/: chain), or if the '?' is at the end of the
14562 elsif ( $token eq '?' ) {
14563 my $i_colon = $mate_index_to_go[$i];
14565 $i_colon <= 0 # the ':' is not in this batch
14566 || $i == 0 # this '?' is the first token of the line
14568 $max_index_to_go # or this '?' is the last token
14572 # don't break at a '?' if preceded by ':' on
14573 # this line of previous ?/: pair on this line.
14574 # This is an attempt to preserve a chain of ?/:
14575 # expressions (elsif2.t). And don't break if
14576 # this has a side comment.
14577 set_forced_breakpoint($i)
14579 $type_sequence == (
14580 $last_colon_sequence_number +
14581 TYPE_SEQUENCE_INCREMENT
14583 || $tokens_to_go[$max_index_to_go] eq '#'
14585 set_closing_breakpoint($i);
14590 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
14592 #------------------------------------------------------------
14593 # Handle Increasing Depth..
14595 # prepare for a new list when depth increases
14596 # token $i is a '(','{', or '['
14597 #------------------------------------------------------------
14598 if ( $depth > $current_depth ) {
14600 $breakpoint_stack[$depth] = $forced_breakpoint_count;
14601 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
14602 $has_broken_sublist[$depth] = 0;
14603 $identifier_count_stack[$depth] = 0;
14604 $index_before_arrow[$depth] = -1;
14605 $interrupted_list[$depth] = 0;
14606 $item_count_stack[$depth] = 0;
14607 $last_comma_index[$depth] = undef;
14608 $last_dot_index[$depth] = undef;
14609 $last_nonblank_type[$depth] = $last_nonblank_type;
14610 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
14611 $opening_structure_index_stack[$depth] = $i;
14612 $rand_or_list[$depth] = [];
14613 $rfor_semicolon_list[$depth] = [];
14614 $i_equals[$depth] = -1;
14615 $want_comma_break[$depth] = 0;
14616 $container_type[$depth] =
14617 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
14618 ? $last_nonblank_token
14620 $has_old_logical_breakpoints[$depth] = 0;
14622 # if line ends here then signal closing token to break
14623 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
14625 set_closing_breakpoint($i);
14628 # Not all lists of values should be vertically aligned..
14629 $dont_align[$depth] =
14631 # code BLOCKS are handled at a higher level
14632 ( $block_type ne "" )
14634 # certain paren lists
14635 || ( $type eq '(' ) && (
14637 # it does not usually look good to align a list of
14638 # identifiers in a parameter list, as in:
14639 # my($var1, $var2, ...)
14640 # (This test should probably be refined, for now I'm just
14641 # testing for any keyword)
14642 ( $last_nonblank_type eq 'k' )
14644 # a trailing '(' usually indicates a non-list
14645 || ( $next_nonblank_type eq '(' )
14648 # patch to outdent opening brace of long if/for/..
14649 # statements (like this one). See similar coding in
14650 # set_continuation breaks. We have also catch it here for
14651 # short line fragments which otherwise will not go through
14652 # set_continuation_breaks.
14656 # if we have the ')' but not its '(' in this batch..
14657 && ( $last_nonblank_token eq ')' )
14658 && $mate_index_to_go[$i_last_nonblank_token] < 0
14660 # and user wants brace to left
14661 && !$rOpts->{'opening-brace-always-on-right'}
14663 && ( $type eq '{' ) # should be true
14664 && ( $token eq '{' ) # should be true
14667 set_forced_breakpoint( $i - 1 );
14671 #------------------------------------------------------------
14672 # Handle Decreasing Depth..
14674 # finish off any old list when depth decreases
14675 # token $i is a ')','}', or ']'
14676 #------------------------------------------------------------
14677 elsif ( $depth < $current_depth ) {
14679 check_for_new_minimum_depth($depth);
14681 # force all outer logical containers to break after we see on
14683 $has_old_logical_breakpoints[$depth] ||=
14684 $has_old_logical_breakpoints[$current_depth];
14686 # Patch to break between ') {' if the paren list is broken.
14687 # There is similar logic in set_continuation_breaks for
14688 # non-broken lists.
14690 && $next_nonblank_block_type
14691 && $interrupted_list[$current_depth]
14692 && $next_nonblank_type eq '{'
14693 && !$rOpts->{'opening-brace-always-on-right'} )
14695 set_forced_breakpoint($i);
14698 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
14700 # set breaks at commas if necessary
14701 my ( $bp_count, $do_not_break_apart ) =
14702 set_comma_breakpoints($current_depth);
14704 my $i_opening = $opening_structure_index_stack[$current_depth];
14705 my $saw_opening_structure = ( $i_opening >= 0 );
14707 # this term is long if we had to break at interior commas..
14708 my $is_long_term = $bp_count > 0;
14710 # ..or if the length between opening and closing parens exceeds
14711 # allowed line length
14712 if ( !$is_long_term && $saw_opening_structure ) {
14713 my $i_opening_minus = find_token_starting_list($i_opening);
14715 # Note: we have to allow for one extra space after a
14716 # closing token so that we do not strand a comma or
14717 # semicolon, hence the '>=' here (oneline.t)
14719 excess_line_length( $i_opening_minus, $i ) >= 0;
14722 # We've set breaks after all comma-arrows. Now we have to
14723 # undo them if this can be a one-line block
14724 # (the only breakpoints set will be due to comma-arrows)
14727 # user doesn't require breaking after all comma-arrows
14728 ( $rOpts_comma_arrow_breakpoints != 0 )
14730 # and if the opening structure is in this batch
14731 && $saw_opening_structure
14733 # and either on the same old line
14735 $old_breakpoint_count_stack[$current_depth] ==
14736 $last_old_breakpoint_count
14738 # or user wants to form long blocks with arrows
14739 || $rOpts_comma_arrow_breakpoints == 2
14742 # and we made some breakpoints between the opening and closing
14743 && ( $breakpoint_undo_stack[$current_depth] <
14744 $forced_breakpoint_undo_count )
14746 # and this block is short enough to fit on one line
14747 # Note: use < because need 1 more space for possible comma
14752 undo_forced_breakpoint_stack(
14753 $breakpoint_undo_stack[$current_depth] );
14756 # now see if we have any comma breakpoints left
14757 my $has_comma_breakpoints =
14758 ( $breakpoint_stack[$current_depth] !=
14759 $forced_breakpoint_count );
14761 # update broken-sublist flag of the outer container
14762 $has_broken_sublist[$depth] =
14763 $has_broken_sublist[$depth]
14764 || $has_broken_sublist[$current_depth]
14766 || $has_comma_breakpoints;
14768 # Having come to the closing ')', '}', or ']', now we have to decide if we
14769 # should 'open up' the structure by placing breaks at the opening and
14770 # closing containers. This is a tricky decision. Here are some of the
14771 # basic considerations:
14773 # -If this is a BLOCK container, then any breakpoints will have already
14774 # been set (and according to user preferences), so we need do nothing here.
14776 # -If we have a comma-separated list for which we can align the list items,
14777 # then we need to do so because otherwise the vertical aligner cannot
14778 # currently do the alignment.
14780 # -If this container does itself contain a container which has been broken
14781 # open, then it should be broken open to properly show the structure.
14783 # -If there is nothing to align, and no other reason to break apart,
14784 # then do not do it.
14786 # We will not break open the parens of a long but 'simple' logical expression.
14789 # This is an example of a simple logical expression and its formatting:
14791 # if ( $bigwasteofspace1 && $bigwasteofspace2
14792 # || $bigwasteofspace3 && $bigwasteofspace4 )
14794 # Most people would prefer this than the 'spacey' version:
14797 # $bigwasteofspace1 && $bigwasteofspace2
14798 # || $bigwasteofspace3 && $bigwasteofspace4
14801 # To illustrate the rules for breaking logical expressions, consider:
14805 # and ( exists $ids_excl_uc{$id_uc}
14806 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
14808 # This is on the verge of being difficult to read. The current default is to
14809 # open it up like this:
14814 # and ( exists $ids_excl_uc{$id_uc}
14815 # or grep $id_uc =~ /$_/, @ids_excl_uc )
14818 # This is a compromise which tries to avoid being too dense and to spacey.
14819 # A more spaced version would be:
14825 # exists $ids_excl_uc{$id_uc}
14826 # or grep $id_uc =~ /$_/, @ids_excl_uc
14830 # Some people might prefer the spacey version -- an option could be added. The
14831 # innermost expression contains a long block '( exists $ids_... ')'.
14833 # Here is how the logic goes: We will force a break at the 'or' that the
14834 # innermost expression contains, but we will not break apart its opening and
14835 # closing containers because (1) it contains no multi-line sub-containers itself,
14836 # and (2) there is no alignment to be gained by breaking it open like this
14839 # exists $ids_excl_uc{$id_uc}
14840 # or grep $id_uc =~ /$_/, @ids_excl_uc
14843 # (although this looks perfectly ok and might be good for long expressions). The
14844 # outer 'if' container, though, contains a broken sub-container, so it will be
14845 # broken open to avoid too much density. Also, since it contains no 'or's, there
14846 # will be a forced break at its 'and'.
14848 # set some flags telling something about this container..
14849 my $is_simple_logical_expression = 0;
14850 if ( $item_count_stack[$current_depth] == 0
14851 && $saw_opening_structure
14852 && $tokens_to_go[$i_opening] eq '('
14853 && $is_logical_container{ $container_type[$current_depth] }
14857 # This seems to be a simple logical expression with
14858 # no existing breakpoints. Set a flag to prevent
14860 if ( !$has_comma_breakpoints ) {
14861 $is_simple_logical_expression = 1;
14864 # This seems to be a simple logical expression with
14865 # breakpoints (broken sublists, for example). Break
14866 # at all 'or's and '||'s.
14868 set_logical_breakpoints($current_depth);
14873 && @{ $rfor_semicolon_list[$current_depth] } )
14875 set_for_semicolon_breakpoints($current_depth);
14877 # open up a long 'for' or 'foreach' container to allow
14878 # leading term alignment unless -lp is used.
14879 $has_comma_breakpoints = 1
14880 unless $rOpts_line_up_parentheses;
14885 # breaks for code BLOCKS are handled at a higher level
14888 # we do not need to break at the top level of an 'if'
14890 && !$is_simple_logical_expression
14892 ## modification to keep ': (' containers vertically tight;
14893 ## but probably better to let user set -vt=1 to avoid
14894 ## inconsistency with other paren types
14895 ## && ($container_type[$current_depth] ne ':')
14897 # otherwise, we require one of these reasons for breaking:
14900 # - this term has forced line breaks
14901 $has_comma_breakpoints
14903 # - the opening container is separated from this batch
14904 # for some reason (comment, blank line, code block)
14905 # - this is a non-paren container spanning multiple lines
14906 || !$saw_opening_structure
14908 # - this is a long block contained in another breakable
14911 && $container_environment_to_go[$i_opening] ne
14917 # For -lp option, we must put a breakpoint before
14918 # the token which has been identified as starting
14919 # this indentation level. This is necessary for
14920 # proper alignment.
14921 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
14923 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
14924 if ( $i_opening + 1 < $max_index_to_go
14925 && $types_to_go[ $i_opening + 1 ] eq 'b' )
14927 $item = $leading_spaces_to_go[ $i_opening + 2 ];
14929 if ( defined($item) ) {
14930 my $i_start_2 = $item->get_STARTING_INDEX();
14932 defined($i_start_2)
14934 # we are breaking after an opening brace, paren,
14935 # so don't break before it too
14936 && $i_start_2 ne $i_opening
14940 # Only break for breakpoints at the same
14941 # indentation level as the opening paren
14942 my $test1 = $nesting_depth_to_go[$i_opening];
14943 my $test2 = $nesting_depth_to_go[$i_start_2];
14944 if ( $test2 == $test1 ) {
14945 set_forced_breakpoint( $i_start_2 - 1 );
14951 # break after opening structure.
14952 # note: break before closing structure will be automatic
14953 if ( $minimum_depth <= $current_depth ) {
14955 set_forced_breakpoint($i_opening)
14956 unless ( $do_not_break_apart
14957 || is_unbreakable_container($current_depth) );
14959 # break at '.' of lower depth level before opening token
14960 if ( $last_dot_index[$depth] ) {
14961 set_forced_breakpoint( $last_dot_index[$depth] );
14964 # break before opening structure if preeced by another
14965 # closing structure and a comma. This is normally
14966 # done by the previous closing brace, but not
14967 # if it was a one-line block.
14968 if ( $i_opening > 2 ) {
14970 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
14974 if ( $types_to_go[$i_prev] eq ','
14975 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
14977 set_forced_breakpoint($i_prev);
14980 # also break before something like ':(' or '?('
14983 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
14985 my $token_prev = $tokens_to_go[$i_prev];
14986 if ( $want_break_before{$token_prev} ) {
14987 set_forced_breakpoint($i_prev);
14993 # break after comma following closing structure
14994 if ( $next_type eq ',' ) {
14995 set_forced_breakpoint( $i + 1 );
14998 # break before an '=' following closing structure
15000 $is_assignment{$next_nonblank_type}
15001 && ( $breakpoint_stack[$current_depth] !=
15002 $forced_breakpoint_count )
15005 set_forced_breakpoint($i);
15008 # break at any comma before the opening structure Added
15009 # for -lp, but seems to be good in general. It isn't
15010 # obvious how far back to look; the '5' below seems to
15011 # work well and will catch the comma in something like
15012 # push @list, myfunc( $param, $param, ..
15014 my $icomma = $last_comma_index[$depth];
15015 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
15016 unless ( $forced_breakpoint_to_go[$icomma] ) {
15017 set_forced_breakpoint($icomma);
15020 } # end logic to open up a container
15022 # Break open a logical container open if it was already open
15023 elsif ($is_simple_logical_expression
15024 && $has_old_logical_breakpoints[$current_depth] )
15026 set_logical_breakpoints($current_depth);
15029 # Handle long container which does not get opened up
15030 elsif ($is_long_term) {
15032 # must set fake breakpoint to alert outer containers that
15034 set_fake_breakpoint();
15038 #------------------------------------------------------------
15039 # Handle this token
15040 #------------------------------------------------------------
15042 $current_depth = $depth;
15044 # handle comma-arrow
15045 if ( $type eq '=>' ) {
15046 next if ( $last_nonblank_type eq '=>' );
15047 next if $rOpts_break_at_old_comma_breakpoints;
15048 next if $rOpts_comma_arrow_breakpoints == 3;
15049 $want_comma_break[$depth] = 1;
15050 $index_before_arrow[$depth] = $i_last_nonblank_token;
15054 elsif ( $type eq '.' ) {
15055 $last_dot_index[$depth] = $i;
15058 # Turn off alignment if we are sure that this is not a list
15059 # environment. To be safe, we will do this if we see certain
15060 # non-list tokens, such as ';', and also the environment is
15061 # not a list. Note that '=' could be in any of the = operators
15062 # (lextest.t). We can't just use the reported environment
15063 # because it can be incorrect in some cases.
15064 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
15065 && $container_environment_to_go[$i] ne 'LIST' )
15067 $dont_align[$depth] = 1;
15068 $want_comma_break[$depth] = 0;
15069 $index_before_arrow[$depth] = -1;
15072 # now just handle any commas
15073 next unless ( $type eq ',' );
15075 $last_dot_index[$depth] = undef;
15076 $last_comma_index[$depth] = $i;
15078 # break here if this comma follows a '=>'
15079 # but not if there is a side comment after the comma
15080 if ( $want_comma_break[$depth] ) {
15082 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
15083 $want_comma_break[$depth] = 0;
15084 $index_before_arrow[$depth] = -1;
15088 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
15090 # break before the previous token if it looks safe
15091 # Example of something that we will not try to break before:
15092 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
15093 # Also we don't want to break at a binary operator (like +):
15097 # $y - $R, -fill => 'black',
15099 my $ibreak = $index_before_arrow[$depth] - 1;
15101 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
15103 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
15104 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
15105 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
15107 # don't break pointer calls, such as the following:
15108 # File::Spec->curdir => 1,
15109 # (This is tokenized as adjacent 'w' tokens)
15110 if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
15111 set_forced_breakpoint($ibreak);
15116 $want_comma_break[$depth] = 0;
15117 $index_before_arrow[$depth] = -1;
15119 # handle list which mixes '=>'s and ','s:
15120 # treat any list items so far as an interrupted list
15121 $interrupted_list[$depth] = 1;
15125 # break after all commas above starting depth
15126 if ( $depth < $starting_depth && !$dont_align[$depth] ) {
15127 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
15131 # add this comma to the list..
15132 my $item_count = $item_count_stack[$depth];
15133 if ( $item_count == 0 ) {
15135 # but do not form a list with no opening structure
15138 # open INFILE_COPY, ">$input_file_copy"
15139 # or die ("very long message");
15141 if ( ( $opening_structure_index_stack[$depth] < 0 )
15142 && $container_environment_to_go[$i] eq 'BLOCK' )
15144 $dont_align[$depth] = 1;
15148 $comma_index[$depth][$item_count] = $i;
15149 ++$item_count_stack[$depth];
15150 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
15151 $identifier_count_stack[$depth]++;
15155 #-------------------------------------------
15156 # end of loop over all tokens in this batch
15157 #-------------------------------------------
15159 # set breaks for any unfinished lists ..
15160 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
15162 $interrupted_list[$dd] = 1;
15163 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
15164 set_comma_breakpoints($dd);
15165 set_logical_breakpoints($dd)
15166 if ( $has_old_logical_breakpoints[$dd] );
15167 set_for_semicolon_breakpoints($dd);
15169 # break open container...
15170 my $i_opening = $opening_structure_index_stack[$dd];
15171 set_forced_breakpoint($i_opening)
15173 is_unbreakable_container($dd)
15175 # Avoid a break which would place an isolated ' or "
15178 && $i_opening >= $max_index_to_go - 2
15179 && $token =~ /^['"]$/ )
15183 # Return a flag indicating if the input file had some good breakpoints.
15184 # This flag will be used to force a break in a line shorter than the
15185 # allowed line length.
15186 if ( $has_old_logical_breakpoints[$current_depth] ) {
15187 $saw_good_breakpoint = 1;
15189 return $saw_good_breakpoint;
15193 sub find_token_starting_list {
15195 # When testing to see if a block will fit on one line, some
15196 # previous token(s) may also need to be on the line; particularly
15197 # if this is a sub call. So we will look back at least one
15198 # token. NOTE: This isn't perfect, but not critical, because
15199 # if we mis-identify a block, it will be wrapped and therefore
15200 # fixed the next time it is formatted.
15201 my $i_opening_paren = shift;
15202 my $i_opening_minus = $i_opening_paren;
15203 my $im1 = $i_opening_paren - 1;
15204 my $im2 = $i_opening_paren - 2;
15205 my $im3 = $i_opening_paren - 3;
15206 my $typem1 = $types_to_go[$im1];
15207 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
15208 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
15209 $i_opening_minus = $i_opening_paren;
15211 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
15212 $i_opening_minus = $im1 if $im1 >= 0;
15214 # walk back to improve length estimate
15215 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
15216 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
15217 $i_opening_minus = $j;
15219 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
15221 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
15222 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
15223 $i_opening_minus = $im2;
15225 return $i_opening_minus;
15228 { # begin set_comma_breakpoints_do
15230 my %is_keyword_with_special_leading_term;
15234 # These keywords have prototypes which allow a special leading item
15235 # followed by a list
15237 qw(formline grep kill map printf sprintf push chmod join pack unshift);
15238 @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
15241 sub set_comma_breakpoints_do {
15243 # Given a list with some commas, set breakpoints at some of the
15244 # commas, if necessary, to make it easy to read. This list is
15247 $depth, $i_opening_paren, $i_closing_paren,
15248 $item_count, $identifier_count, $rcomma_index,
15249 $next_nonblank_type, $list_type, $interrupted,
15250 $rdo_not_break_apart, $must_break_open,
15253 # nothing to do if no commas seen
15254 return if ( $item_count < 1 );
15255 my $i_first_comma = $$rcomma_index[0];
15256 my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
15257 my $i_last_comma = $i_true_last_comma;
15258 if ( $i_last_comma >= $max_index_to_go ) {
15259 $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
15260 return if ( $item_count < 1 );
15263 #---------------------------------------------------------------
15264 # find lengths of all items in the list to calculate page layout
15265 #---------------------------------------------------------------
15266 my $comma_count = $item_count;
15272 my @max_length = ( 0, 0 );
15273 my $first_term_length;
15274 my $i = $i_opening_paren;
15277 for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
15278 $is_odd = 1 - $is_odd;
15279 $i_prev_plus = $i + 1;
15280 $i = $$rcomma_index[$j];
15283 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
15285 ( $types_to_go[$i_prev_plus] eq 'b' )
15288 push @i_term_begin, $i_term_begin;
15289 push @i_term_end, $i_term_end;
15290 push @i_term_comma, $i;
15292 # note: currently adding 2 to all lengths (for comma and space)
15294 2 + token_sequence_length( $i_term_begin, $i_term_end );
15295 push @item_lengths, $length;
15298 $first_term_length = $length;
15302 if ( $length > $max_length[$is_odd] ) {
15303 $max_length[$is_odd] = $length;
15308 # now we have to make a distinction between the comma count and item
15309 # count, because the item count will be one greater than the comma
15310 # count if the last item is not terminated with a comma
15312 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
15313 ? $i_last_comma + 1
15316 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
15317 ? $i_closing_paren - 2
15318 : $i_closing_paren - 1;
15319 my $i_effective_last_comma = $i_last_comma;
15321 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
15323 if ( $last_item_length > 0 ) {
15325 # add 2 to length because other lengths include a comma and a blank
15326 $last_item_length += 2;
15327 push @item_lengths, $last_item_length;
15328 push @i_term_begin, $i_b + 1;
15329 push @i_term_end, $i_e;
15330 push @i_term_comma, undef;
15332 my $i_odd = $item_count % 2;
15334 if ( $last_item_length > $max_length[$i_odd] ) {
15335 $max_length[$i_odd] = $last_item_length;
15339 $i_effective_last_comma = $i_e + 1;
15341 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
15342 $identifier_count++;
15346 #---------------------------------------------------------------
15347 # End of length calculations
15348 #---------------------------------------------------------------
15350 #---------------------------------------------------------------
15351 # Compound List Rule 1:
15352 # Break at (almost) every comma for a list containing a broken
15353 # sublist. This has higher priority than the Interrupted List
15355 #---------------------------------------------------------------
15356 if ( $has_broken_sublist[$depth] ) {
15358 # Break at every comma except for a comma between two
15359 # simple, small terms. This prevents long vertical
15360 # columns of, say, just 0's.
15361 my $small_length = 10; # 2 + actual maximum length wanted
15363 # We'll insert a break in long runs of small terms to
15364 # allow alignment in uniform tables.
15365 my $skipped_count = 0;
15366 my $columns = table_columns_available($i_first_comma);
15367 my $fields = int( $columns / $small_length );
15368 if ( $rOpts_maximum_fields_per_table
15369 && $fields > $rOpts_maximum_fields_per_table )
15371 $fields = $rOpts_maximum_fields_per_table;
15373 my $max_skipped_count = $fields - 1;
15375 my $is_simple_last_term = 0;
15376 my $is_simple_next_term = 0;
15377 foreach my $j ( 0 .. $item_count ) {
15378 $is_simple_last_term = $is_simple_next_term;
15379 $is_simple_next_term = 0;
15380 if ( $j < $item_count
15381 && $i_term_end[$j] == $i_term_begin[$j]
15382 && $item_lengths[$j] <= $small_length )
15384 $is_simple_next_term = 1;
15387 if ( $is_simple_last_term
15388 && $is_simple_next_term
15389 && $skipped_count < $max_skipped_count )
15394 $skipped_count = 0;
15395 my $i = $i_term_comma[ $j - 1 ];
15396 last unless defined $i;
15397 set_forced_breakpoint($i);
15401 # always break at the last comma if this list is
15402 # interrupted; we wouldn't want to leave a terminal '{', for
15404 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
15408 #my ( $a, $b, $c ) = caller();
15409 #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
15410 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
15411 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
15413 #---------------------------------------------------------------
15414 # Interrupted List Rule:
15415 # A list is is forced to use old breakpoints if it was interrupted
15416 # by side comments or blank lines, or requested by user.
15417 #---------------------------------------------------------------
15418 if ( $rOpts_break_at_old_comma_breakpoints
15420 || $i_opening_paren < 0 )
15422 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
15426 #---------------------------------------------------------------
15427 # Looks like a list of items. We have to look at it and size it up.
15428 #---------------------------------------------------------------
15430 my $opening_token = $tokens_to_go[$i_opening_paren];
15431 my $opening_environment =
15432 $container_environment_to_go[$i_opening_paren];
15434 #-------------------------------------------------------------------
15435 # Return if this will fit on one line
15436 #-------------------------------------------------------------------
15438 my $i_opening_minus = find_token_starting_list($i_opening_paren);
15440 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
15442 #-------------------------------------------------------------------
15443 # Now we know that this block spans multiple lines; we have to set
15444 # at least one breakpoint -- real or fake -- as a signal to break
15445 # open any outer containers.
15446 #-------------------------------------------------------------------
15447 set_fake_breakpoint();
15449 # be sure we do not extend beyond the current list length
15450 if ( $i_effective_last_comma >= $max_index_to_go ) {
15451 $i_effective_last_comma = $max_index_to_go - 1;
15454 # Set a flag indicating if we need to break open to keep -lp
15455 # items aligned. This is necessary if any of the list terms
15456 # exceeds the available space after the '('.
15457 my $need_lp_break_open = $must_break_open;
15458 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
15459 my $columns_if_unbroken = $rOpts_maximum_line_length -
15460 total_line_length( $i_opening_minus, $i_opening_paren );
15461 $need_lp_break_open =
15462 ( $max_length[0] > $columns_if_unbroken )
15463 || ( $max_length[1] > $columns_if_unbroken )
15464 || ( $first_term_length > $columns_if_unbroken );
15467 # Specify if the list must have an even number of fields or not.
15468 # It is generally safest to assume an even number, because the
15469 # list items might be a hash list. But if we can be sure that
15470 # it is not a hash, then we can allow an odd number for more
15472 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
15474 if ( $identifier_count >= $item_count - 1
15475 || $is_assignment{$next_nonblank_type}
15476 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
15482 # do we have a long first term which should be
15483 # left on a line by itself?
15484 my $use_separate_first_term = (
15485 $odd_or_even == 1 # only if we can use 1 field/line
15486 && $item_count > 3 # need several items
15487 && $first_term_length >
15488 2 * $max_length[0] - 2 # need long first term
15489 && $first_term_length >
15490 2 * $max_length[1] - 2 # need long first term
15493 # or do we know from the type of list that the first term should
15495 if ( !$use_separate_first_term ) {
15496 if ( $is_keyword_with_special_leading_term{$list_type} ) {
15497 $use_separate_first_term = 1;
15499 # should the container be broken open?
15500 if ( $item_count < 3 ) {
15501 if ( $i_first_comma - $i_opening_paren < 4 ) {
15502 $$rdo_not_break_apart = 1;
15505 elsif ($first_term_length < 20
15506 && $i_first_comma - $i_opening_paren < 4 )
15508 my $columns = table_columns_available($i_first_comma);
15509 if ( $first_term_length < $columns ) {
15510 $$rdo_not_break_apart = 1;
15517 if ($use_separate_first_term) {
15519 # ..set a break and update starting values
15520 $use_separate_first_term = 1;
15521 set_forced_breakpoint($i_first_comma);
15522 $i_opening_paren = $i_first_comma;
15523 $i_first_comma = $$rcomma_index[1];
15525 return if $comma_count == 1;
15526 shift @item_lengths;
15527 shift @i_term_begin;
15529 shift @i_term_comma;
15532 # if not, update the metrics to include the first term
15534 if ( $first_term_length > $max_length[0] ) {
15535 $max_length[0] = $first_term_length;
15539 # Field width parameters
15540 my $pair_width = ( $max_length[0] + $max_length[1] );
15542 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
15544 # Number of free columns across the page width for laying out tables
15545 my $columns = table_columns_available($i_first_comma);
15547 # Estimated maximum number of fields which fit this space
15548 # This will be our first guess
15549 my $number_of_fields_max =
15550 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
15552 my $number_of_fields = $number_of_fields_max;
15554 # Find the best-looking number of fields
15555 # and make this our second guess if possible
15556 my ( $number_of_fields_best, $ri_ragged_break_list,
15557 $new_identifier_count )
15558 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
15561 if ( $number_of_fields_best != 0
15562 && $number_of_fields_best < $number_of_fields_max )
15564 $number_of_fields = $number_of_fields_best;
15567 # ----------------------------------------------------------------------
15568 # If we are crowded and the -lp option is being used, try to
15569 # undo some indentation
15570 # ----------------------------------------------------------------------
15572 $rOpts_line_up_parentheses
15574 $number_of_fields == 0
15575 || ( $number_of_fields == 1
15576 && $number_of_fields != $number_of_fields_best )
15580 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
15581 if ( $available_spaces > 0 ) {
15583 my $spaces_wanted = $max_width - $columns; # for 1 field
15585 if ( $number_of_fields_best == 0 ) {
15586 $number_of_fields_best =
15587 get_maximum_fields_wanted( \@item_lengths );
15590 if ( $number_of_fields_best != 1 ) {
15591 my $spaces_wanted_2 =
15592 1 + $pair_width - $columns; # for 2 fields
15593 if ( $available_spaces > $spaces_wanted_2 ) {
15594 $spaces_wanted = $spaces_wanted_2;
15598 if ( $spaces_wanted > 0 ) {
15599 my $deleted_spaces =
15600 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
15603 if ( $deleted_spaces > 0 ) {
15604 $columns = table_columns_available($i_first_comma);
15605 $number_of_fields_max =
15606 maximum_number_of_fields( $columns, $odd_or_even,
15607 $max_width, $pair_width );
15608 $number_of_fields = $number_of_fields_max;
15610 if ( $number_of_fields_best == 1
15611 && $number_of_fields >= 1 )
15613 $number_of_fields = $number_of_fields_best;
15620 # try for one column if two won't work
15621 if ( $number_of_fields <= 0 ) {
15622 $number_of_fields = int( $columns / $max_width );
15625 # The user can place an upper bound on the number of fields,
15626 # which can be useful for doing maintenance on tables
15627 if ( $rOpts_maximum_fields_per_table
15628 && $number_of_fields > $rOpts_maximum_fields_per_table )
15630 $number_of_fields = $rOpts_maximum_fields_per_table;
15633 # How many columns (characters) and lines would this container take
15634 # if no additional whitespace were added?
15635 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
15636 $i_effective_last_comma + 1 );
15637 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
15638 my $packed_lines = 1 + int( $packed_columns / $columns );
15640 # are we an item contained in an outer list?
15641 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
15643 if ( $number_of_fields <= 0 ) {
15645 # #---------------------------------------------------------------
15646 # # We're in trouble. We can't find a single field width that works.
15647 # # There is no simple answer here; we may have a single long list
15649 # #---------------------------------------------------------------
15651 # In many cases, it may be best to not force a break if there is just one
15652 # comma, because the standard continuation break logic will do a better
15655 # In the common case that all but one of the terms can fit
15656 # on a single line, it may look better not to break open the
15657 # containing parens. Consider, for example
15661 # sort { $color_value{$::a} <=> $color_value{$::b}; }
15664 # which will look like this with the container broken:
15668 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
15671 # Here is an example of this rule for a long last term:
15673 # log_message( 0, 256, 128,
15674 # "Number of routes in adj-RIB-in to be considered: $peercount" );
15676 # And here is an example with a long first term:
15679 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
15680 # $r, $pu, $ps, $cu, $cs, $tt
15682 # if $style eq 'all';
15684 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
15685 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
15686 my $long_first_term =
15687 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
15689 # break at every comma ...
15692 # if requested by user or is best looking
15693 $number_of_fields_best == 1
15695 # or if this is a sublist of a larger list
15696 || $in_hierarchical_list
15698 # or if multiple commas and we dont have a long first or last
15700 || ( $comma_count > 1
15701 && !( $long_last_term || $long_first_term ) )
15704 foreach ( 0 .. $comma_count - 1 ) {
15705 set_forced_breakpoint( $$rcomma_index[$_] );
15708 elsif ($long_last_term) {
15710 set_forced_breakpoint($i_last_comma);
15711 $$rdo_not_break_apart = 1 unless $must_break_open;
15713 elsif ($long_first_term) {
15715 set_forced_breakpoint($i_first_comma);
15719 # let breaks be defined by default bond strength logic
15724 # --------------------------------------------------------
15725 # We have a tentative field count that seems to work.
15726 # How many lines will this require?
15727 # --------------------------------------------------------
15728 my $formatted_lines = $item_count / ($number_of_fields);
15729 if ( $formatted_lines != int $formatted_lines ) {
15730 $formatted_lines = 1 + int $formatted_lines;
15733 # So far we've been trying to fill out to the right margin. But
15734 # compact tables are easier to read, so let's see if we can use fewer
15735 # fields without increasing the number of lines.
15736 $number_of_fields =
15737 compactify_table( $item_count, $number_of_fields, $formatted_lines,
15740 # How many spaces across the page will we fill?
15741 my $columns_per_line =
15742 ( int $number_of_fields / 2 ) * $pair_width +
15743 ( $number_of_fields % 2 ) * $max_width;
15745 my $formatted_columns;
15747 if ( $number_of_fields > 1 ) {
15748 $formatted_columns =
15749 ( $pair_width * ( int( $item_count / 2 ) ) +
15750 ( $item_count % 2 ) * $max_width );
15753 $formatted_columns = $max_width * $item_count;
15755 if ( $formatted_columns < $packed_columns ) {
15756 $formatted_columns = $packed_columns;
15759 my $unused_columns = $formatted_columns - $packed_columns;
15761 # set some empirical parameters to help decide if we should try to
15762 # align; high sparsity does not look good, especially with few lines
15763 my $sparsity = ($unused_columns) / ($formatted_columns);
15764 my $max_allowed_sparsity =
15765 ( $item_count < 3 ) ? 0.1
15766 : ( $packed_lines == 1 ) ? 0.15
15767 : ( $packed_lines == 2 ) ? 0.4
15770 # Begin check for shortcut methods, which avoid treating a list
15771 # as a table for relatively small parenthesized lists. These
15772 # are usually easier to read if not formatted as tables.
15774 $packed_lines <= 2 # probably can fit in 2 lines
15775 && $item_count < 9 # doesn't have too many items
15776 && $opening_environment eq 'BLOCK' # not a sub-container
15777 && $opening_token eq '(' # is paren list
15781 # Shortcut method 1: for -lp and just one comma:
15782 # This is a no-brainer, just break at the comma.
15784 $rOpts_line_up_parentheses # -lp
15785 && $item_count == 2 # two items, one comma
15786 && !$must_break_open
15789 my $i_break = $$rcomma_index[0];
15790 set_forced_breakpoint($i_break);
15791 $$rdo_not_break_apart = 1;
15792 set_non_alignment_flags( $comma_count, $rcomma_index );
15797 # method 2 is for most small ragged lists which might look
15798 # best if not displayed as a table.
15800 ( $number_of_fields == 2 && $item_count == 3 )
15802 $new_identifier_count > 0 # isn't all quotes
15803 && $sparsity > 0.15
15804 ) # would be fairly spaced gaps if aligned
15808 my $break_count = set_ragged_breakpoints( \@i_term_comma,
15809 $ri_ragged_break_list );
15810 ++$break_count if ($use_separate_first_term);
15812 # NOTE: we should really use the true break count here,
15813 # which can be greater if there are large terms and
15814 # little space, but usually this will work well enough.
15815 unless ($must_break_open) {
15817 if ( $break_count <= 1 ) {
15818 $$rdo_not_break_apart = 1;
15820 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15822 $$rdo_not_break_apart = 1;
15825 set_non_alignment_flags( $comma_count, $rcomma_index );
15829 } # end shortcut methods
15833 FORMATTER_DEBUG_FLAG_SPARSE && do {
15835 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
15839 #---------------------------------------------------------------
15840 # Compound List Rule 2:
15841 # If this list is too long for one line, and it is an item of a
15842 # larger list, then we must format it, regardless of sparsity
15843 # (ian.t). One reason that we have to do this is to trigger
15844 # Compound List Rule 1, above, which causes breaks at all commas of
15845 # all outer lists. In this way, the structure will be properly
15847 #---------------------------------------------------------------
15849 # Decide if this list is too long for one line unless broken
15850 my $total_columns = table_columns_available($i_opening_paren);
15851 my $too_long = $packed_columns > $total_columns;
15853 # For a paren list, include the length of the token just before the
15854 # '(' because this is likely a sub call, and we would have to
15855 # include the sub name on the same line as the list. This is still
15856 # imprecise, but not too bad. (steve.t)
15857 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
15859 $too_long = excess_line_length( $i_opening_minus,
15860 $i_effective_last_comma + 1 ) > 0;
15863 # FIXME: For an item after a '=>', try to include the length of the
15864 # thing before the '=>'. This is crude and should be improved by
15865 # actually looking back token by token.
15866 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
15867 my $i_opening_minus = $i_opening_paren - 4;
15868 if ( $i_opening_minus >= 0 ) {
15869 $too_long = excess_line_length( $i_opening_minus,
15870 $i_effective_last_comma + 1 ) > 0;
15874 # Always break lists contained in '[' and '{' if too long for 1 line,
15875 # and always break lists which are too long and part of a more complex
15877 my $must_break_open_container = $must_break_open
15879 && ( $in_hierarchical_list || $opening_token ne '(' ) );
15881 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
15883 #---------------------------------------------------------------
15884 # The main decision:
15885 # Now decide if we will align the data into aligned columns. Do not
15886 # attempt to align columns if this is a tiny table or it would be
15887 # too spaced. It seems that the more packed lines we have, the
15888 # sparser the list that can be allowed and still look ok.
15889 #---------------------------------------------------------------
15891 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
15892 || ( $formatted_lines < 2 )
15893 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
15897 #---------------------------------------------------------------
15898 # too sparse: would look ugly if aligned in a table;
15899 #---------------------------------------------------------------
15901 # use old breakpoints if this is a 'big' list
15902 # FIXME: goal is to improve set_ragged_breakpoints so that
15903 # this is not necessary.
15904 if ( $packed_lines > 2 && $item_count > 10 ) {
15905 write_logfile_entry("List sparse: using old breakpoints\n");
15906 copy_old_breakpoints( $i_first_comma, $i_last_comma );
15909 # let the continuation logic handle it if 2 lines
15912 my $break_count = set_ragged_breakpoints( \@i_term_comma,
15913 $ri_ragged_break_list );
15914 ++$break_count if ($use_separate_first_term);
15916 unless ($must_break_open_container) {
15917 if ( $break_count <= 1 ) {
15918 $$rdo_not_break_apart = 1;
15920 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15922 $$rdo_not_break_apart = 1;
15925 set_non_alignment_flags( $comma_count, $rcomma_index );
15930 #---------------------------------------------------------------
15931 # go ahead and format as a table
15932 #---------------------------------------------------------------
15933 write_logfile_entry(
15934 "List: auto formatting with $number_of_fields fields/row\n");
15936 my $j_first_break =
15937 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
15940 my $j = $j_first_break ;
15941 $j < $comma_count ;
15942 $j += $number_of_fields
15945 my $i = $$rcomma_index[$j];
15946 set_forced_breakpoint($i);
15952 sub set_non_alignment_flags {
15954 # set flag which indicates that these commas should not be
15956 my ( $comma_count, $rcomma_index ) = @_;
15957 foreach ( 0 .. $comma_count - 1 ) {
15958 $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
15962 sub study_list_complexity {
15964 # Look for complex tables which should be formatted with one term per line.
15965 # Returns the following:
15967 # \@i_ragged_break_list = list of good breakpoints to avoid lines
15968 # which are hard to read
15969 # $number_of_fields_best = suggested number of fields based on
15970 # complexity; = 0 if any number may be used.
15972 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
15973 my $item_count = @{$ri_term_begin};
15974 my $complex_item_count = 0;
15975 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
15976 my $i_max = @{$ritem_lengths} - 1;
15977 ##my @item_complexity;
15979 my $i_last_last_break = -3;
15980 my $i_last_break = -2;
15981 my @i_ragged_break_list;
15983 my $definitely_complex = 30;
15984 my $definitely_simple = 12;
15985 my $quote_count = 0;
15987 for my $i ( 0 .. $i_max ) {
15988 my $ib = $ri_term_begin->[$i];
15989 my $ie = $ri_term_end->[$i];
15991 # define complexity: start with the actual term length
15992 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
15994 ##TBD: join types here and check for variations
15995 ##my $str=join "", @tokens_to_go[$ib..$ie];
15998 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
16002 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
16006 if ( $ib eq $ie ) {
16007 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
16008 $complex_item_count++;
16009 $weighted_length *= 2;
16015 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
16016 $complex_item_count++;
16017 $weighted_length *= 2;
16019 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
16020 $weighted_length += 4;
16024 # add weight for extra tokens.
16025 $weighted_length += 2 * ( $ie - $ib );
16027 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
16028 ## print "# COMPLEXITY:$weighted_length $BUB\n";
16030 ##push @item_complexity, $weighted_length;
16032 # now mark a ragged break after this item it if it is 'long and
16034 if ( $weighted_length >= $definitely_complex ) {
16036 # if we broke after the previous term
16037 # then break before it too
16038 if ( $i_last_break == $i - 1
16040 && $i_last_last_break != $i - 2 )
16043 ## FIXME: don't strand a small term
16044 pop @i_ragged_break_list;
16045 push @i_ragged_break_list, $i - 2;
16046 push @i_ragged_break_list, $i - 1;
16049 push @i_ragged_break_list, $i;
16050 $i_last_last_break = $i_last_break;
16051 $i_last_break = $i;
16054 # don't break before a small last term -- it will
16055 # not look good on a line by itself.
16056 elsif ($i == $i_max
16057 && $i_last_break == $i - 1
16058 && $weighted_length <= $definitely_simple )
16060 pop @i_ragged_break_list;
16064 my $identifier_count = $i_max + 1 - $quote_count;
16066 # Need more tuning here..
16067 if ( $max_width > 12
16068 && $complex_item_count > $item_count / 2
16069 && $number_of_fields_best != 2 )
16071 $number_of_fields_best = 1;
16074 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
16077 sub get_maximum_fields_wanted {
16079 # Not all tables look good with more than one field of items.
16080 # This routine looks at a table and decides if it should be
16081 # formatted with just one field or not.
16082 # This coding is still under development.
16083 my ($ritem_lengths) = @_;
16085 my $number_of_fields_best = 0;
16087 # For just a few items, we tentatively assume just 1 field.
16088 my $item_count = @{$ritem_lengths};
16089 if ( $item_count <= 5 ) {
16090 $number_of_fields_best = 1;
16093 # For larger tables, look at it both ways and see what looks best
16097 my @max_length = ( 0, 0 );
16098 my @last_length_2 = ( undef, undef );
16099 my @first_length_2 = ( undef, undef );
16100 my $last_length = undef;
16101 my $total_variation_1 = 0;
16102 my $total_variation_2 = 0;
16103 my @total_variation_2 = ( 0, 0 );
16104 for ( my $j = 0 ; $j < $item_count ; $j++ ) {
16106 $is_odd = 1 - $is_odd;
16107 my $length = $ritem_lengths->[$j];
16108 if ( $length > $max_length[$is_odd] ) {
16109 $max_length[$is_odd] = $length;
16112 if ( defined($last_length) ) {
16113 my $dl = abs( $length - $last_length );
16114 $total_variation_1 += $dl;
16116 $last_length = $length;
16118 my $ll = $last_length_2[$is_odd];
16119 if ( defined($ll) ) {
16120 my $dl = abs( $length - $ll );
16121 $total_variation_2[$is_odd] += $dl;
16124 $first_length_2[$is_odd] = $length;
16126 $last_length_2[$is_odd] = $length;
16128 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
16130 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
16131 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
16132 $number_of_fields_best = 1;
16135 return ($number_of_fields_best);
16138 sub table_columns_available {
16139 my $i_first_comma = shift;
16141 $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
16143 # Patch: the vertical formatter does not line up lines whose lengths
16144 # exactly equal the available line length because of allowances
16145 # that must be made for side comments. Therefore, the number of
16146 # available columns is reduced by 1 character.
16151 sub maximum_number_of_fields {
16153 # how many fields will fit in the available space?
16154 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
16155 my $max_pairs = int( $columns / $pair_width );
16156 my $number_of_fields = $max_pairs * 2;
16157 if ( $odd_or_even == 1
16158 && $max_pairs * $pair_width + $max_width <= $columns )
16160 $number_of_fields++;
16162 return $number_of_fields;
16165 sub compactify_table {
16167 # given a table with a certain number of fields and a certain number
16168 # of lines, see if reducing the number of fields will make it look
16170 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
16171 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
16175 $min_fields = $number_of_fields ;
16176 $min_fields >= $odd_or_even
16177 && $min_fields * $formatted_lines >= $item_count ;
16178 $min_fields -= $odd_or_even
16181 $number_of_fields = $min_fields;
16184 return $number_of_fields;
16187 sub set_ragged_breakpoints {
16189 # Set breakpoints in a list that cannot be formatted nicely as a
16191 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
16193 my $break_count = 0;
16194 foreach (@$ri_ragged_break_list) {
16195 my $j = $ri_term_comma->[$_];
16197 set_forced_breakpoint($j);
16201 return $break_count;
16204 sub copy_old_breakpoints {
16205 my ( $i_first_comma, $i_last_comma ) = @_;
16206 for my $i ( $i_first_comma .. $i_last_comma ) {
16207 if ( $old_breakpoint_to_go[$i] ) {
16208 set_forced_breakpoint($i);
16214 my ( $i, $j ) = @_;
16215 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
16217 FORMATTER_DEBUG_FLAG_NOBREAK && do {
16218 my ( $a, $b, $c ) = caller();
16220 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
16224 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
16227 # shouldn't happen; non-critical error
16229 FORMATTER_DEBUG_FLAG_NOBREAK && do {
16230 my ( $a, $b, $c ) = caller();
16232 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
16238 sub set_fake_breakpoint {
16240 # Just bump up the breakpoint count as a signal that there are breaks.
16241 # This is useful if we have breaks but may want to postpone deciding where
16243 $forced_breakpoint_count++;
16246 sub set_forced_breakpoint {
16249 return unless defined $i && $i >= 0;
16251 # when called with certain tokens, use bond strengths to decide
16252 # if we break before or after it
16253 my $token = $tokens_to_go[$i];
16255 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
16256 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
16259 # breaks are forced before 'if' and 'unless'
16260 elsif ( $is_if_unless{$token} ) { $i-- }
16262 if ( $i >= 0 && $i <= $max_index_to_go ) {
16263 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
16265 FORMATTER_DEBUG_FLAG_FORCE && do {
16266 my ( $a, $b, $c ) = caller();
16268 "FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
16271 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
16272 $forced_breakpoint_to_go[$i_nonblank] = 1;
16274 if ( $i_nonblank > $index_max_forced_break ) {
16275 $index_max_forced_break = $i_nonblank;
16277 $forced_breakpoint_count++;
16278 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
16281 # if we break at an opening container..break at the closing
16282 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
16283 set_closing_breakpoint($i_nonblank);
16289 sub clear_breakpoint_undo_stack {
16290 $forced_breakpoint_undo_count = 0;
16293 sub undo_forced_breakpoint_stack {
16295 my $i_start = shift;
16296 if ( $i_start < 0 ) {
16298 my ( $a, $b, $c ) = caller();
16300 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
16304 while ( $forced_breakpoint_undo_count > $i_start ) {
16306 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
16307 if ( $i >= 0 && $i <= $max_index_to_go ) {
16308 $forced_breakpoint_to_go[$i] = 0;
16309 $forced_breakpoint_count--;
16311 FORMATTER_DEBUG_FLAG_UNDOBP && do {
16312 my ( $a, $b, $c ) = caller();
16314 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
16319 # shouldn't happen, but not a critical error
16321 FORMATTER_DEBUG_FLAG_UNDOBP && do {
16322 my ( $a, $b, $c ) = caller();
16324 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
16331 { # begin recombine_breakpoints
16340 @is_amp_amp{@_} = (1) x scalar(@_);
16343 @is_ternary{@_} = (1) x scalar(@_);
16345 @_ = qw( + - * / );
16346 @is_math_op{@_} = (1) x scalar(@_);
16349 sub recombine_breakpoints {
16351 # sub set_continuation_breaks is very liberal in setting line breaks
16352 # for long lines, always setting breaks at good breakpoints, even
16353 # when that creates small lines. Occasionally small line fragments
16354 # are produced which would look better if they were combined.
16355 # That's the task of this routine, recombine_breakpoints.
16357 # $ri_beg = ref to array of BEGinning indexes of each line
16358 # $ri_end = ref to array of ENDing indexes of each line
16359 my ( $ri_beg, $ri_end ) = @_;
16361 my $more_to_do = 1;
16363 # We keep looping over all of the lines of this batch
16364 # until there are no more possible recombinations
16365 my $nmax_last = @$ri_end;
16366 while ($more_to_do) {
16370 my $nmax = @$ri_end - 1;
16372 # safety check for infinite loop
16373 unless ( $nmax < $nmax_last ) {
16375 # shouldn't happen because splice below decreases nmax on each pass:
16376 # but i get paranoid sometimes
16377 die "Program bug-infinite loop in recombine breakpoints\n";
16379 $nmax_last = $nmax;
16381 my $previous_outdentable_closing_paren;
16382 my $leading_amp_count = 0;
16383 my $this_line_is_semicolon_terminated;
16385 # loop over all remaining lines in this batch
16386 for $n ( 1 .. $nmax ) {
16388 #----------------------------------------------------------
16389 # If we join the current pair of lines,
16390 # line $n-1 will become the left part of the joined line
16391 # line $n will become the right part of the joined line
16393 # Here are Indexes of the endpoint tokens of the two lines:
16395 # -----line $n-1--- | -----line $n-----
16396 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
16399 # We want to decide if we should remove the line break
16400 # betwen the tokens at $iend_1 and $ibeg_2
16402 # We will apply a number of ad-hoc tests to see if joining
16403 # here will look ok. The code will just issue a 'next'
16404 # command if the join doesn't look good. If we get through
16405 # the gauntlet of tests, the lines will be recombined.
16406 #----------------------------------------------------------
16408 # beginning and ending tokens of the lines we are working on
16409 my $ibeg_1 = $$ri_beg[ $n - 1 ];
16410 my $iend_1 = $$ri_end[ $n - 1 ];
16411 my $iend_2 = $$ri_end[$n];
16412 my $ibeg_2 = $$ri_beg[$n];
16414 my $ibeg_nmax = $$ri_beg[$nmax];
16416 # some beginning indexes of other lines, which may not exist
16417 my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1;
16418 my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1;
16419 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
16423 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
16424 # $nesting_depth_to_go[$ibeg_1] );
16426 ##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n";
16428 # If line $n is the last line, we set some flags and
16429 # do any special checks for it
16430 if ( $n == $nmax ) {
16432 # a terminal '{' should stay where it is
16433 next if $types_to_go[$ibeg_2] eq '{';
16435 # set flag if statement $n ends in ';'
16436 $this_line_is_semicolon_terminated =
16437 $types_to_go[$iend_2] eq ';'
16439 # with possible side comment
16440 || ( $types_to_go[$iend_2] eq '#'
16441 && $iend_2 - $ibeg_2 >= 2
16442 && $types_to_go[ $iend_2 - 2 ] eq ';'
16443 && $types_to_go[ $iend_2 - 1 ] eq 'b' );
16446 #----------------------------------------------------------
16447 # Section 1: examine token at $iend_1 (right end of first line
16449 #----------------------------------------------------------
16451 # an isolated '}' may join with a ';' terminated segment
16452 if ( $types_to_go[$iend_1] eq '}' ) {
16454 # Check for cases where combining a semicolon terminated
16455 # statement with a previous isolated closing paren will
16456 # allow the combined line to be outdented. This is
16457 # generally a good move. For example, we can join up
16458 # the last two lines here:
16460 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
16461 # $size, $atime, $mtime, $ctime, $blksize, $blocks
16467 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
16468 # $size, $atime, $mtime, $ctime, $blksize, $blocks
16471 # which makes the parens line up.
16473 # Another example, from Joe Matarazzo, probably looks best
16474 # with the 'or' clause appended to the trailing paren:
16475 # $self->some_method(
16478 # ) or die "Some_method didn't work";
16480 $previous_outdentable_closing_paren =
16481 $this_line_is_semicolon_terminated # ends in ';'
16482 && $ibeg_1 == $iend_1 # only one token on last line
16483 && $tokens_to_go[$iend_1] eq
16484 ')' # must be structural paren
16486 # only &&, ||, and : if no others seen
16487 # (but note: our count made below could be wrong
16488 # due to intervening comments)
16489 && ( $leading_amp_count == 0
16490 || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ )
16492 # but leading colons probably line up with with a
16493 # previous colon or question (count could be wrong).
16494 && $types_to_go[$ibeg_2] ne ':'
16496 # only one step in depth allowed. this line must not
16497 # begin with a ')' itself.
16498 && ( $nesting_depth_to_go[$iend_1] ==
16499 $nesting_depth_to_go[$iend_2] + 1 );
16501 # YVES patch 2 of 2:
16502 # Allow cuddled eval chains, like this:
16509 # This patch works together with a patch in
16510 # setting adjusted indentation (where the closing eval
16511 # brace is outdented if possible).
16512 # The problem is that an 'eval' block has continuation
16513 # indentation and it looks better to undo it in some
16514 # cases. If we do not use this patch we would get:
16522 # The alternative, for uncuddled style, is to create
16523 # a patch in set_adjusted_indentation which undoes
16524 # the indentation of a leading line like 'or do {'.
16525 # This doesn't work well with -icb through
16527 $block_type_to_go[$iend_1] eq 'eval'
16528 && !$rOpts->{'line-up-parentheses'}
16529 && !$rOpts->{'indent-closing-brace'}
16530 && $tokens_to_go[$iend_2] eq '{'
16532 ( $types_to_go[$ibeg_2] =~ /^(|\&\&|\|\|)$/ )
16533 || ( $types_to_go[$ibeg_2] eq 'k'
16534 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
16535 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
16539 $previous_outdentable_closing_paren ||= 1;
16544 $previous_outdentable_closing_paren
16546 # handle '.' and '?' specially below
16547 || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ )
16552 # honor breaks at opening brace
16553 # Added to prevent recombining something like this:
16554 # } || eval { package main;
16555 elsif ( $types_to_go[$iend_1] eq '{' ) {
16556 next if $forced_breakpoint_to_go[$iend_1];
16559 # do not recombine lines with ending &&, ||,
16560 elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) {
16561 next unless $want_break_before{ $types_to_go[$iend_1] };
16564 # keep a terminal colon
16565 elsif ( $types_to_go[$iend_1] eq ':' ) {
16566 next unless $want_break_before{ $types_to_go[$iend_1] };
16569 # Identify and recombine a broken ?/: chain
16570 elsif ( $types_to_go[$iend_1] eq '?' ) {
16572 # Do not recombine different levels
16574 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
16576 # do not recombine unless next line ends in :
16577 next unless $types_to_go[$iend_2] eq ':';
16580 # for lines ending in a comma...
16581 elsif ( $types_to_go[$iend_1] eq ',' ) {
16583 # Do not recombine at comma which is following the
16585 # TODO: might be best to make a special flag
16586 next if ( $old_breakpoint_to_go[$iend_1] );
16588 # an isolated '},' may join with an identifier + ';'
16589 # this is useful for the class of a 'bless' statement (bless.t)
16590 if ( $types_to_go[$ibeg_1] eq '}'
16591 && $types_to_go[$ibeg_2] eq 'i' )
16594 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
16595 && ( $iend_2 == ( $ibeg_2 + 1 ) )
16596 && $this_line_is_semicolon_terminated );
16598 # override breakpoint
16599 $forced_breakpoint_to_go[$iend_1] = 0;
16605 # do not recombine after a comma unless this will leave
16607 next unless ( $n + 1 >= $nmax );
16609 # do not recombine if there is a change in indentation depth
16612 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
16614 # do not recombine a "complex expression" after a
16615 # comma. "complex" means no parens.
16617 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
16618 if ( $tokens_to_go[$ii] eq '(' ) {
16623 next if $saw_paren;
16628 elsif ( $types_to_go[$iend_1] eq '(' ) {
16630 # No longer doing this
16633 elsif ( $types_to_go[$iend_1] eq ')' ) {
16635 # No longer doing this
16638 # keep a terminal for-semicolon
16639 elsif ( $types_to_go[$iend_1] eq 'f' ) {
16643 # if '=' at end of line ...
16644 elsif ( $is_assignment{ $types_to_go[$iend_1] } ) {
16646 # keep break after = if it was in input stream
16647 # this helps prevent 'blinkers'
16648 next if $old_breakpoint_to_go[$iend_1]
16650 # don't strand an isolated '='
16651 && $iend_1 != $ibeg_1;
16653 my $is_short_quote =
16654 ( $types_to_go[$ibeg_2] eq 'Q'
16655 && $ibeg_2 == $iend_2
16656 && length( $tokens_to_go[$ibeg_2] ) <
16657 $rOpts_short_concatenation_item_length );
16659 ( $types_to_go[$ibeg_1] eq '?'
16660 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
16662 # always join an isolated '=', a short quote, or if this
16663 # will put ?/: at start of adjacent lines
16664 if ( $ibeg_1 != $iend_1
16665 && !$is_short_quote
16672 # unless we can reduce this to two lines
16675 # or three lines, the last with a leading semicolon
16676 || ( $nmax == $n + 2
16677 && $types_to_go[$ibeg_nmax] eq ';' )
16679 # or the next line ends with a here doc
16680 || $types_to_go[$iend_2] eq 'h'
16682 # or the next line ends in an open paren or brace
16683 # and the break hasn't been forced [dima.t]
16684 || ( !$forced_breakpoint_to_go[$iend_1]
16685 && $types_to_go[$iend_2] eq '{' )
16688 # do not recombine if the two lines might align well
16689 # this is a very approximate test for this
16691 && $types_to_go[$ibeg_2] ne
16692 $types_to_go[$ibeg_3] )
16695 # -lp users often prefer this:
16696 # my $title = function($env, $env, $sysarea,
16697 # "bubba Borrower Entry");
16698 # so we will recombine if -lp is used we have ending
16700 if ( !$rOpts_line_up_parentheses
16701 || $types_to_go[$iend_2] ne ',' )
16704 # otherwise, scan the rhs line up to last token for
16705 # complexity. Note that we are not counting the last
16706 # token in case it is an opening paren.
16708 my $depth = $nesting_depth_to_go[$ibeg_2];
16709 for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
16710 if ( $nesting_depth_to_go[$i] != $depth ) {
16712 last if ( $tv > 1 );
16714 $depth = $nesting_depth_to_go[$i];
16717 # ok to recombine if no level changes before last token
16720 # otherwise, do not recombine if more than two
16722 next if ( $tv > 1 );
16724 # check total complexity of the two adjacent lines
16725 # that will occur if we do this join
16727 ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
16728 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
16729 if ( $nesting_depth_to_go[$i] != $depth ) {
16731 last if ( $tv > 2 );
16733 $depth = $nesting_depth_to_go[$i];
16736 # do not recombine if total is more than 2 level changes
16737 next if ( $tv > 2 );
16742 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
16743 $forced_breakpoint_to_go[$iend_1] = 0;
16748 elsif ( $types_to_go[$iend_1] eq 'k' ) {
16750 # make major control keywords stand out
16755 #/^(last|next|redo|return)$/
16756 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
16758 # but only if followed by multiple lines
16762 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
16764 unless $want_break_before{ $tokens_to_go[$iend_1] };
16768 # handle trailing + - * /
16769 elsif ( $is_math_op{ $types_to_go[$iend_1] } ) {
16771 # combine lines if next line has single number
16772 # or a short term followed by same operator
16773 my $i_next_nonblank = $ibeg_2;
16774 my $i_next_next = $i_next_nonblank + 1;
16775 $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
16776 my $number_follows = $types_to_go[$i_next_nonblank] eq 'n'
16778 $i_next_nonblank == $iend_2
16779 || ( $i_next_next == $iend_2
16780 && $is_math_op{ $types_to_go[$i_next_next] } )
16781 || $types_to_go[$i_next_next] eq ';'
16784 # find token before last operator of previous line
16785 my $iend_1_minus = $iend_1;
16787 if ( $iend_1_minus > $ibeg_1 );
16789 if ( $types_to_go[$iend_1_minus] eq 'b'
16790 && $iend_1_minus > $ibeg_1 );
16792 my $short_term_follows =
16793 ( $types_to_go[$iend_2] eq $types_to_go[$iend_1]
16794 && $types_to_go[$iend_1_minus] =~ /^[in]$/
16795 && $iend_2 <= $ibeg_2 + 2
16796 && length( $tokens_to_go[$ibeg_2] ) <
16797 $rOpts_short_concatenation_item_length );
16800 unless ( $number_follows || $short_term_follows );
16803 #----------------------------------------------------------
16804 # Section 2: Now examine token at $ibeg_2 (left end of second
16806 #----------------------------------------------------------
16808 # join lines identified above as capable of
16809 # causing an outdented line with leading closing paren
16810 if ($previous_outdentable_closing_paren) {
16811 $forced_breakpoint_to_go[$iend_1] = 0;
16814 # do not recombine lines with leading :
16815 elsif ( $types_to_go[$ibeg_2] eq ':' ) {
16816 $leading_amp_count++;
16817 next if $want_break_before{ $types_to_go[$ibeg_2] };
16820 # handle lines with leading &&, ||
16821 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
16823 $leading_amp_count++;
16825 # ok to recombine if it follows a ? or :
16826 # and is followed by an open paren..
16828 ( $is_ternary{ $types_to_go[$ibeg_1] }
16829 && $tokens_to_go[$iend_2] eq '(' )
16831 # or is followed by a ? or : at same depth
16833 # We are looking for something like this. We can
16834 # recombine the && line with the line above to make the
16835 # structure more clear:
16837 # exists $G->{Attr}->{V}
16838 # && exists $G->{Attr}->{V}->{$u}
16839 # ? %{ $G->{Attr}->{V}->{$u} }
16842 # We should probably leave something like this alone:
16844 # exists $G->{Attr}->{E}
16845 # && exists $G->{Attr}->{E}->{$u}
16846 # && exists $G->{Attr}->{E}->{$u}->{$v}
16847 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
16849 # so that we either have all of the &&'s (or ||'s)
16850 # on one line, as in the first example, or break at
16851 # each one as in the second example. However, it
16852 # sometimes makes things worse to check for this because
16853 # it prevents multiple recombinations. So this is not done.
16855 && $is_ternary{ $types_to_go[$ibeg_3] }
16856 && $nesting_depth_to_go[$ibeg_3] ==
16857 $nesting_depth_to_go[$ibeg_2] );
16859 next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] };
16860 $forced_breakpoint_to_go[$iend_1] = 0;
16862 # tweak the bond strength to give this joint priority
16867 # Identify and recombine a broken ?/: chain
16868 elsif ( $types_to_go[$ibeg_2] eq '?' ) {
16870 # Do not recombine different levels
16871 my $lev = $levels_to_go[$ibeg_2];
16872 next if ( $lev ne $levels_to_go[$ibeg_1] );
16874 # Do not recombine a '?' if either next line or
16875 # previous line does not start with a ':'. The reasons
16876 # are that (1) no alignment of the ? will be possible
16877 # and (2) the expression is somewhat complex, so the
16878 # '?' is harder to see in the interior of the line.
16879 my $follows_colon =
16880 $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':';
16881 my $precedes_colon =
16882 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
16883 next unless ( $follows_colon || $precedes_colon );
16885 # we will always combining a ? line following a : line
16886 if ( !$follows_colon ) {
16888 # ...otherwise recombine only if it looks like a chain.
16889 # we will just look at a few nearby lines to see if
16890 # this looks like a chain.
16891 my $local_count = 0;
16892 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
16895 && $types_to_go[$ii] eq ':'
16896 && $levels_to_go[$ii] == $lev;
16898 next unless ( $local_count > 1 );
16900 $forced_breakpoint_to_go[$iend_1] = 0;
16903 # do not recombine lines with leading '.'
16904 elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) {
16905 my $i_next_nonblank = $ibeg_2 + 1;
16906 if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16907 $i_next_nonblank++;
16913 # ... unless there is just one and we can reduce
16914 # this to two lines if we do. For example, this
16918 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16920 # looks better than this:
16921 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16922 # . '$args .= $pat;'
16927 && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2]
16930 # ... or this would strand a short quote , like this
16931 # . "some long qoute"
16933 || ( $types_to_go[$i_next_nonblank] eq 'Q'
16934 && $i_next_nonblank >= $iend_2 - 1
16935 && length( $tokens_to_go[$i_next_nonblank] ) <
16936 $rOpts_short_concatenation_item_length )
16940 # handle leading keyword..
16941 elsif ( $types_to_go[$ibeg_2] eq 'k' ) {
16943 # handle leading "or"
16944 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16947 $this_line_is_semicolon_terminated
16950 # following 'if' or 'unless' or 'or'
16951 $types_to_go[$ibeg_1] eq 'k'
16952 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16954 # important: only combine a very simple or
16955 # statement because the step below may have
16956 # combined a trailing 'and' with this or,
16957 # and we do not want to then combine
16958 # everything together
16959 && ( $iend_2 - $ibeg_2 <= 7 )
16964 # handle leading 'and'
16965 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
16967 # Decide if we will combine a single terminal 'and'
16968 # after an 'if' or 'unless'.
16970 # This looks best with the 'and' on the same
16971 # line as the 'if':
16974 # if $seconds and $nu < 2;
16976 # But this looks better as shown:
16979 # if !$this->{Parents}{$_}
16980 # or $this->{Parents}{$_} eq $_;
16984 $this_line_is_semicolon_terminated
16987 # following 'if' or 'unless' or 'or'
16988 $types_to_go[$ibeg_1] eq 'k'
16989 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
16990 || $tokens_to_go[$ibeg_1] eq 'or' )
16995 # handle leading "if" and "unless"
16996 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16998 # FIXME: This is still experimental..may not be too useful
17001 $this_line_is_semicolon_terminated
17003 # previous line begins with 'and' or 'or'
17004 && $types_to_go[$ibeg_1] eq 'k'
17005 && $is_and_or{ $tokens_to_go[$ibeg_1] }
17010 # handle all other leading keywords
17013 # keywords look best at start of lines,
17014 # but combine things like "1 while"
17015 unless ( $is_assignment{ $types_to_go[$iend_1] } ) {
17017 if ( ( $types_to_go[$iend_1] ne 'k' )
17018 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
17023 # similar treatment of && and || as above for 'and' and 'or':
17024 # NOTE: This block of code is currently bypassed because
17025 # of a previous block but is retained for possible future use.
17026 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
17028 # maybe looking at something like:
17029 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
17033 $this_line_is_semicolon_terminated
17035 # previous line begins with an 'if' or 'unless' keyword
17036 && $types_to_go[$ibeg_1] eq 'k'
17037 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
17042 # handle leading + - * /
17043 elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) {
17044 my $i_next_nonblank = $ibeg_2 + 1;
17045 if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
17046 $i_next_nonblank++;
17049 my $i_next_next = $i_next_nonblank + 1;
17050 $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
17053 $types_to_go[$i_next_nonblank] eq 'n'
17054 && ( $i_next_nonblank >= $iend_2 - 1
17055 || $types_to_go[$i_next_next] eq ';' )
17058 my $iend_1_nonblank =
17059 $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1;
17060 my $iend_2_nonblank =
17061 $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2;
17063 my $is_short_term =
17064 ( $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1]
17065 && $types_to_go[$iend_2_nonblank] =~ /^[in]$/
17066 && $types_to_go[$iend_1_nonblank] =~ /^[in]$/
17067 && $iend_2_nonblank <= $ibeg_2 + 2
17068 && length( $tokens_to_go[$iend_2_nonblank] ) <
17069 $rOpts_short_concatenation_item_length );
17071 # Combine these lines if this line is a single
17072 # number, or if it is a short term with same
17073 # operator as the previous line. For example, in
17074 # the following code we will combine all of the
17075 # short terms $A, $B, $C, $D, $E, $F, together
17076 # instead of leaving them one per line:
17078 # $A * $B * $C * $D * $E * $F *
17079 # ( 2. * $eps * $sigma * $area ) *
17080 # ( 1. / $tcold**3 - 1. / $thot**3 );
17081 # This can be important in math-intensive code.
17087 # or if we can reduce this to two lines if we do.
17090 && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] )
17094 # handle line with leading = or similar
17095 elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) {
17096 next unless ( $n == 1 || $n == $nmax );
17100 # unless we can reduce this to two lines
17103 # or three lines, the last with a leading semicolon
17104 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
17106 # or the next line ends with a here doc
17107 || $types_to_go[$iend_2] eq 'h'
17109 # or this is a short line ending in ;
17110 || ( $n == $nmax && $this_line_is_semicolon_terminated )
17112 $forced_breakpoint_to_go[$iend_1] = 0;
17115 #----------------------------------------------------------
17117 # Combine the lines if we arrive here and it is possible
17118 #----------------------------------------------------------
17120 # honor hard breakpoints
17121 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
17123 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
17125 # combined line cannot be too long
17126 my $excess = excess_line_length( $ibeg_1, $iend_2 );
17127 next if ( $excess > 0 );
17129 # Require a few extra spaces before recombining lines if we are
17130 # at an old breakpoint unless this is a simple list or terminal
17131 # line. The goal is to avoid oscillating between two
17132 # quasi-stable end states. For example this snippet caused
17136 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
17140 if ( $old_breakpoint_to_go[$iend_1]
17141 && !$this_line_is_semicolon_terminated
17144 && $types_to_go[$iend_2] ne ',' );
17146 # do not recombine if we would skip in indentation levels
17147 if ( $n < $nmax ) {
17148 my $if_next = $$ri_beg[ $n + 1 ];
17151 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
17152 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
17154 # but an isolated 'if (' is undesirable
17157 && $iend_1 - $ibeg_1 <= 2
17158 && $types_to_go[$ibeg_1] eq 'k'
17159 && $tokens_to_go[$ibeg_1] eq 'if'
17160 && $tokens_to_go[$iend_1] ne '('
17166 next if ( $bs == NO_BREAK );
17168 # remember the pair with the greatest bond strength
17175 if ( $bs > $bs_best ) {
17182 # recombine the pair with the greatest bond strength
17184 splice @$ri_beg, $n_best, 1;
17185 splice @$ri_end, $n_best - 1, 1;
17187 # keep going if we are still making progress
17191 return ( $ri_beg, $ri_end );
17193 } # end recombine_breakpoints
17195 sub break_all_chain_tokens {
17197 # scan the current breakpoints looking for breaks at certain "chain
17198 # operators" (. : && || + etc) which often occur repeatedly in a long
17199 # statement. If we see a break at any one, break at all similar tokens
17200 # within the same container.
17202 my ( $ri_left, $ri_right ) = @_;
17204 my %saw_chain_type;
17205 my %left_chain_type;
17206 my %right_chain_type;
17207 my %interior_chain_type;
17208 my $nmax = @$ri_right - 1;
17210 # scan the left and right end tokens of all lines
17212 for my $n ( 0 .. $nmax ) {
17213 my $il = $$ri_left[$n];
17214 my $ir = $$ri_right[$n];
17215 my $typel = $types_to_go[$il];
17216 my $typer = $types_to_go[$ir];
17217 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
17218 $typer = '+' if ( $typer eq '-' );
17219 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
17220 $typer = '*' if ( $typer eq '/' );
17221 my $tokenl = $tokens_to_go[$il];
17222 my $tokenr = $tokens_to_go[$ir];
17224 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
17225 next if ( $typel eq '?' );
17226 push @{ $left_chain_type{$typel} }, $il;
17227 $saw_chain_type{$typel} = 1;
17230 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
17231 next if ( $typer eq '?' );
17232 push @{ $right_chain_type{$typer} }, $ir;
17233 $saw_chain_type{$typer} = 1;
17237 return unless $count;
17239 # now look for any interior tokens of the same types
17241 for my $n ( 0 .. $nmax ) {
17242 my $il = $$ri_left[$n];
17243 my $ir = $$ri_right[$n];
17244 for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
17245 my $type = $types_to_go[$i];
17246 $type = '+' if ( $type eq '-' );
17247 $type = '*' if ( $type eq '/' );
17248 if ( $saw_chain_type{$type} ) {
17249 push @{ $interior_chain_type{$type} }, $i;
17254 return unless $count;
17256 # now make a list of all new break points
17259 # loop over all chain types
17260 foreach my $type ( keys %saw_chain_type ) {
17262 # quit if just ONE continuation line with leading . For example--
17263 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
17265 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
17267 # loop over all interior chain tokens
17268 foreach my $itest ( @{ $interior_chain_type{$type} } ) {
17270 # loop over all left end tokens of same type
17271 if ( $left_chain_type{$type} ) {
17272 next if $nobreak_to_go[ $itest - 1 ];
17273 foreach my $i ( @{ $left_chain_type{$type} } ) {
17274 next unless in_same_container( $i, $itest );
17275 push @insert_list, $itest - 1;
17277 # Break at matching ? if this : is at a different level.
17278 # For example, the ? before $THRf_DEAD in the following
17279 # should get a break if its : gets a break.
17282 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
17283 # : ( $_ & 4 ) ? $THRf_R_DETACHED
17284 # : $THRf_R_JOINABLE;
17286 && $levels_to_go[$i] != $levels_to_go[$itest] )
17288 my $i_question = $mate_index_to_go[$itest];
17289 if ( $i_question > 0 ) {
17290 push @insert_list, $i_question - 1;
17297 # loop over all right end tokens of same type
17298 if ( $right_chain_type{$type} ) {
17299 next if $nobreak_to_go[$itest];
17300 foreach my $i ( @{ $right_chain_type{$type} } ) {
17301 next unless in_same_container( $i, $itest );
17302 push @insert_list, $itest;
17304 # break at matching ? if this : is at a different level
17306 && $levels_to_go[$i] != $levels_to_go[$itest] )
17308 my $i_question = $mate_index_to_go[$itest];
17309 if ( $i_question >= 0 ) {
17310 push @insert_list, $i_question;
17319 # insert any new break points
17320 if (@insert_list) {
17321 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
17327 # Look for assignment operators that could use a breakpoint.
17328 # For example, in the following snippet
17330 # $HOME = $ENV{HOME}
17333 # || die "no home directory for user $<";
17335 # we could break at the = to get this, which is a little nicer:
17340 # || die "no home directory for user $<";
17342 # The logic here follows the logic in set_logical_padding, which
17343 # will add the padding in the second line to improve alignment.
17345 my ( $ri_left, $ri_right ) = @_;
17346 my $nmax = @$ri_right - 1;
17347 return unless ( $nmax >= 2 );
17349 # scan the left ends of first two lines
17352 for my $n ( 1 .. 2 ) {
17353 my $il = $$ri_left[$n];
17354 my $typel = $types_to_go[$il];
17355 my $tokenl = $tokens_to_go[$il];
17357 my $has_leading_op = ( $tokenl =~ /^\w/ )
17358 ? $is_chain_operator{$tokenl} # + - * / : ? && ||
17359 : $is_chain_operator{$typel}; # and, or
17360 return unless ($has_leading_op);
17363 unless ( $tokenl eq $tokbeg
17364 && $nesting_depth_to_go[$il] eq $depth_beg );
17367 $depth_beg = $nesting_depth_to_go[$il];
17370 # now look for any interior tokens of the same types
17371 my $il = $$ri_left[0];
17372 my $ir = $$ri_right[0];
17374 # now make a list of all new break points
17376 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
17377 my $type = $types_to_go[$i];
17378 if ( $is_assignment{$type}
17379 && $nesting_depth_to_go[$i] eq $depth_beg )
17381 if ( $want_break_before{$type} ) {
17382 push @insert_list, $i - 1;
17385 push @insert_list, $i;
17390 # Break after a 'return' followed by a chain of operators
17391 # return ( $^O !~ /win32|dos/i )
17392 # && ( $^O ne 'VMS' )
17393 # && ( $^O ne 'OS2' )
17394 # && ( $^O ne 'MacOS' );
17397 # ( $^O !~ /win32|dos/i )
17398 # && ( $^O ne 'VMS' )
17399 # && ( $^O ne 'OS2' )
17400 # && ( $^O ne 'MacOS' );
17402 if ( $types_to_go[$i] eq 'k'
17403 && $tokens_to_go[$i] eq 'return'
17405 && $nesting_depth_to_go[$i] eq $depth_beg )
17407 push @insert_list, $i;
17410 return unless (@insert_list);
17412 # One final check...
17413 # scan second and thrid lines and be sure there are no assignments
17414 # we want to avoid breaking at an = to make something like this:
17416 # $html_icons{"$type-$state"}
17417 # or $icon = $html_icons{$type}
17418 # or $icon = $html_icons{$state} )
17419 for my $n ( 1 .. 2 ) {
17420 my $il = $$ri_left[$n];
17421 my $ir = $$ri_right[$n];
17422 for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
17423 my $type = $types_to_go[$i];
17425 if ( $is_assignment{$type}
17426 && $nesting_depth_to_go[$i] eq $depth_beg );
17430 # ok, insert any new break point
17431 if (@insert_list) {
17432 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
17436 sub insert_final_breaks {
17438 my ( $ri_left, $ri_right ) = @_;
17440 my $nmax = @$ri_right - 1;
17442 # scan the left and right end tokens of all lines
17444 my $i_first_colon = -1;
17445 for my $n ( 0 .. $nmax ) {
17446 my $il = $$ri_left[$n];
17447 my $ir = $$ri_right[$n];
17448 my $typel = $types_to_go[$il];
17449 my $typer = $types_to_go[$ir];
17450 return if ( $typel eq '?' );
17451 return if ( $typer eq '?' );
17452 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
17453 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
17456 # For long ternary chains,
17457 # if the first : we see has its # ? is in the interior
17458 # of a preceding line, then see if there are any good
17459 # breakpoints before the ?.
17460 if ( $i_first_colon > 0 ) {
17461 my $i_question = $mate_index_to_go[$i_first_colon];
17462 if ( $i_question > 0 ) {
17464 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
17465 my $token = $tokens_to_go[$ii];
17466 my $type = $types_to_go[$ii];
17468 # For now, a good break is either a comma or a 'return'.
17469 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
17470 && in_same_container( $ii, $i_question ) )
17472 push @insert_list, $ii;
17477 # insert any new break points
17478 if (@insert_list) {
17479 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
17485 sub in_same_container {
17487 # check to see if tokens at i1 and i2 are in the
17488 # same container, and not separated by a comma, ? or :
17489 my ( $i1, $i2 ) = @_;
17490 my $type = $types_to_go[$i1];
17491 my $depth = $nesting_depth_to_go[$i1];
17492 return unless ( $nesting_depth_to_go[$i2] == $depth );
17493 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
17495 ###########################################################
17496 # This is potentially a very slow routine and not critical.
17497 # For safety just give up for large differences.
17498 # See test file 'infinite_loop.txt'
17499 # TODO: replace this loop with a data structure
17500 ###########################################################
17501 return if ( $i2 - $i1 > 200 );
17503 for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
17504 next if ( $nesting_depth_to_go[$i] > $depth );
17505 return if ( $nesting_depth_to_go[$i] < $depth );
17507 my $tok = $tokens_to_go[$i];
17508 $tok = ',' if $tok eq '=>'; # treat => same as ,
17510 # Example: we would not want to break at any of these .'s
17511 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
17512 if ( $type ne ':' ) {
17513 return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
17516 return if ( $tok =~ /^[\,]$/ );
17522 sub set_continuation_breaks {
17524 # Define an array of indexes for inserting newline characters to
17525 # keep the line lengths below the maximum desired length. There is
17526 # an implied break after the last token, so it need not be included.
17529 # This routine is part of series of routines which adjust line
17530 # lengths. It is only called if a statement is longer than the
17531 # maximum line length, or if a preliminary scanning located
17532 # desirable break points. Sub scan_list has already looked at
17533 # these tokens and set breakpoints (in array
17534 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
17535 # after commas, after opening parens, and before closing parens).
17536 # This routine will honor these breakpoints and also add additional
17537 # breakpoints as necessary to keep the line length below the maximum
17538 # requested. It bases its decision on where the 'bond strength' is
17541 # Output: returns references to the arrays:
17544 # which contain the indexes $i of the first and last tokens on each
17547 # In addition, the array:
17548 # $forced_breakpoint_to_go[$i]
17549 # may be updated to be =1 for any index $i after which there must be
17550 # a break. This signals later routines not to undo the breakpoint.
17552 my $saw_good_break = shift;
17553 my @i_first = (); # the first index to output
17554 my @i_last = (); # the last index to output
17555 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
17556 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
17558 set_bond_strengths();
17561 my $imax = $max_index_to_go;
17562 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
17563 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
17564 my $i_begin = $imin; # index for starting next iteration
17566 my $leading_spaces = leading_spaces_to_go($imin);
17567 my $line_count = 0;
17568 my $last_break_strength = NO_BREAK;
17569 my $i_last_break = -1;
17570 my $max_bias = 0.001;
17571 my $tiny_bias = 0.0001;
17572 my $leading_alignment_token = "";
17573 my $leading_alignment_type = "";
17575 # see if any ?/:'s are in order
17576 my $colons_in_order = 1;
17578 my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
17579 my $colon_count = @colon_list;
17580 foreach (@colon_list) {
17581 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
17585 # This is a sufficient but not necessary condition for colon chain
17586 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
17588 #-------------------------------------------------------
17589 # BEGINNING of main loop to set continuation breakpoints
17590 # Keep iterating until we reach the end
17591 #-------------------------------------------------------
17592 while ( $i_begin <= $imax ) {
17593 my $lowest_strength = NO_BREAK;
17594 my $starting_sum = $lengths_to_go[$i_begin];
17597 my $lowest_next_token = '';
17598 my $lowest_next_type = 'b';
17599 my $i_lowest_next_nonblank = -1;
17601 #-------------------------------------------------------
17602 # BEGINNING of inner loop to find the best next breakpoint
17603 #-------------------------------------------------------
17604 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
17605 my $type = $types_to_go[$i_test];
17606 my $token = $tokens_to_go[$i_test];
17607 my $next_type = $types_to_go[ $i_test + 1 ];
17608 my $next_token = $tokens_to_go[ $i_test + 1 ];
17609 my $i_next_nonblank =
17610 ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
17611 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
17612 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17613 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17614 my $strength = $bond_strength_to_go[$i_test];
17616 # use old breaks as a tie-breaker. For example to
17617 # prevent blinkers with -pbp in this code:
17620 ## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
17623 # At the same time try to prevent a leading * in this code
17624 # with the default formatting:
17627 ## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
17628 ## * ( $x**( $a - 1 ) )
17629 ## * ( ( 1 - $x )**( $b - 1 ) );
17631 # reduce strength a bit to break ties at an old breakpoint ...
17632 $strength -= $tiny_bias
17633 if $old_breakpoint_to_go[$i_test]
17635 # which is a 'good' breakpoint, meaning ...
17636 # we don't want to break before it
17637 && !$want_break_before{$type}
17639 # and either we want to break before the next token
17640 # or the next token is not short (i.e. not a '*', '/' etc.)
17641 && $i_next_nonblank <= $imax
17643 $want_break_before{$next_nonblank_type}
17644 || ( $lengths_to_go[ $i_next_nonblank + 1 ] -
17645 $lengths_to_go[$i_next_nonblank] > 2 )
17646 || $next_nonblank_type =~ /^[\(\[\{L]$/
17649 my $must_break = 0;
17651 # FIXME: Might want to be able to break after these
17652 # force an immediate break at certain operators
17653 # with lower level than the start of the line
17656 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
17657 || ( $next_nonblank_type eq 'k'
17658 && $next_nonblank_token =~ /^(and|or)$/ )
17660 && ( $nesting_depth_to_go[$i_begin] >
17661 $nesting_depth_to_go[$i_next_nonblank] )
17664 set_forced_breakpoint($i_next_nonblank);
17669 # Try to put a break where requested by scan_list
17670 $forced_breakpoint_to_go[$i_test]
17672 # break between ) { in a continued line so that the '{' can
17674 # See similar logic in scan_list which catches instances
17675 # where a line is just something like ') {'
17677 && ( $token eq ')' )
17678 && ( $next_nonblank_type eq '{' )
17679 && ($next_nonblank_block_type)
17680 && !$rOpts->{'opening-brace-always-on-right'} )
17682 # There is an implied forced break at a terminal opening brace
17683 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
17687 # Forced breakpoints must sometimes be overridden, for example
17688 # because of a side comment causing a NO_BREAK. It is easier
17689 # to catch this here than when they are set.
17690 if ( $strength < NO_BREAK ) {
17691 $strength = $lowest_strength - $tiny_bias;
17696 # quit if a break here would put a good terminal token on
17697 # the next line and we already have a possible break
17700 && ( $next_nonblank_type =~ /^[\;\,]$/ )
17704 $lengths_to_go[ $i_next_nonblank + 1 ] -
17706 ) > $rOpts_maximum_line_length
17710 last if ( $i_lowest >= 0 );
17713 # Avoid a break which would strand a single punctuation
17714 # token. For example, we do not want to strand a leading
17715 # '.' which is followed by a long quoted string.
17716 # But note that we do want to do this with -extrude (l=1)
17717 # so please test any changes to this code on -extrude.
17720 && ( $i_test == $i_begin )
17721 && ( $i_test < $imax )
17722 && ( $token eq $type )
17726 $lengths_to_go[ $i_test + 1 ] -
17728 ) < $rOpts_maximum_line_length
17734 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
17740 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
17743 # break at previous best break if it would have produced
17744 # a leading alignment of certain common tokens, and it
17745 # is different from the latest candidate break
17747 if ($leading_alignment_type);
17749 # Force at least one breakpoint if old code had good
17750 # break It is only called if a breakpoint is required or
17751 # desired. This will probably need some adjustments
17752 # over time. A goal is to try to be sure that, if a new
17753 # side comment is introduced into formated text, then
17754 # the same breakpoints will occur. scbreak.t
17757 $i_test == $imax # we are at the end
17758 && !$forced_breakpoint_count #
17759 && $saw_good_break # old line had good break
17760 && $type =~ /^[#;\{]$/ # and this line ends in
17761 # ';' or side comment
17762 && $i_last_break < 0 # and we haven't made a break
17763 && $i_lowest > 0 # and we saw a possible break
17764 && $i_lowest < $imax - 1 # (but not just before this ;)
17765 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
17768 $lowest_strength = $strength;
17769 $i_lowest = $i_test;
17770 $lowest_next_token = $next_nonblank_token;
17771 $lowest_next_type = $next_nonblank_type;
17772 $i_lowest_next_nonblank = $i_next_nonblank;
17773 last if $must_break;
17775 # set flags to remember if a break here will produce a
17776 # leading alignment of certain common tokens
17777 if ( $line_count > 0
17779 && ( $lowest_strength - $last_break_strength <= $max_bias )
17782 my $i_last_end = $i_begin - 1;
17783 if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
17784 my $tok_beg = $tokens_to_go[$i_begin];
17785 my $type_beg = $types_to_go[$i_begin];
17788 # check for leading alignment of certain tokens
17790 $tok_beg eq $next_nonblank_token
17791 && $is_chain_operator{$tok_beg}
17792 && ( $type_beg eq 'k'
17793 || $type_beg eq $tok_beg )
17794 && $nesting_depth_to_go[$i_begin] >=
17795 $nesting_depth_to_go[$i_next_nonblank]
17798 || ( $tokens_to_go[$i_last_end] eq $token
17799 && $is_chain_operator{$token}
17800 && ( $type eq 'k' || $type eq $token )
17801 && $nesting_depth_to_go[$i_last_end] >=
17802 $nesting_depth_to_go[$i_test] )
17805 $leading_alignment_token = $next_nonblank_token;
17806 $leading_alignment_type = $next_nonblank_type;
17812 ( $i_test >= $imax )
17817 $lengths_to_go[ $i_test + 2 ] -
17819 ) > $rOpts_maximum_line_length
17822 FORMATTER_DEBUG_FLAG_BREAK
17824 "BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
17826 # allow one extra terminal token after exceeding line length
17827 # if it would strand this token.
17828 if ( $rOpts_fuzzy_line_length
17830 && ( $i_lowest == $i_test )
17831 && ( length($token) > 1 )
17832 && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
17839 ( $i_test == $imax ) # we're done if no more tokens,
17841 ( $i_lowest >= 0 ) # or no more space and we have a break
17847 #-------------------------------------------------------
17848 # END of inner loop to find the best next breakpoint
17849 # Now decide exactly where to put the breakpoint
17850 #-------------------------------------------------------
17852 # it's always ok to break at imax if no other break was found
17853 if ( $i_lowest < 0 ) { $i_lowest = $imax }
17855 # semi-final index calculation
17856 my $i_next_nonblank = (
17857 ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17861 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
17862 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17864 #-------------------------------------------------------
17865 # ?/: rule 1 : if a break here will separate a '?' on this
17866 # line from its closing ':', then break at the '?' instead.
17867 #-------------------------------------------------------
17869 foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
17870 next unless ( $tokens_to_go[$i] eq '?' );
17872 # do not break if probable sequence of ?/: statements
17873 next if ($is_colon_chain);
17875 # do not break if statement is broken by side comment
17878 $tokens_to_go[$max_index_to_go] eq '#'
17879 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
17880 $max_index_to_go ) !~ /^[\;\}]$/
17883 # no break needed if matching : is also on the line
17885 if ( $mate_index_to_go[$i] >= 0
17886 && $mate_index_to_go[$i] <= $i_next_nonblank );
17889 if ( $want_break_before{'?'} ) { $i_lowest-- }
17893 #-------------------------------------------------------
17894 # END of inner loop to find the best next breakpoint:
17895 # Break the line after the token with index i=$i_lowest
17896 #-------------------------------------------------------
17898 # final index calculation
17899 $i_next_nonblank = (
17900 ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17904 $next_nonblank_type = $types_to_go[$i_next_nonblank];
17905 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17907 FORMATTER_DEBUG_FLAG_BREAK
17908 && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
17910 #-------------------------------------------------------
17911 # ?/: rule 2 : if we break at a '?', then break at its ':'
17913 # Note: this rule is also in sub scan_list to handle a break
17914 # at the start and end of a line (in case breaks are dictated
17915 # by side comments).
17916 #-------------------------------------------------------
17917 if ( $next_nonblank_type eq '?' ) {
17918 set_closing_breakpoint($i_next_nonblank);
17920 elsif ( $types_to_go[$i_lowest] eq '?' ) {
17921 set_closing_breakpoint($i_lowest);
17924 #-------------------------------------------------------
17925 # ?/: rule 3 : if we break at a ':' then we save
17926 # its location for further work below. We may need to go
17927 # back and break at its '?'.
17928 #-------------------------------------------------------
17929 if ( $next_nonblank_type eq ':' ) {
17930 push @i_colon_breaks, $i_next_nonblank;
17932 elsif ( $types_to_go[$i_lowest] eq ':' ) {
17933 push @i_colon_breaks, $i_lowest;
17936 # here we should set breaks for all '?'/':' pairs which are
17937 # separated by this line
17941 # save this line segment, after trimming blanks at the ends
17943 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
17945 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
17947 # set a forced breakpoint at a container opening, if necessary, to
17948 # signal a break at a closing container. Excepting '(' for now.
17949 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
17950 && !$forced_breakpoint_to_go[$i_lowest] )
17952 set_closing_breakpoint($i_lowest);
17955 # get ready to go again
17956 $i_begin = $i_lowest + 1;
17957 $last_break_strength = $lowest_strength;
17958 $i_last_break = $i_lowest;
17959 $leading_alignment_token = "";
17960 $leading_alignment_type = "";
17961 $lowest_next_token = '';
17962 $lowest_next_type = 'b';
17964 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
17968 # update indentation size
17969 if ( $i_begin <= $imax ) {
17970 $leading_spaces = leading_spaces_to_go($i_begin);
17974 #-------------------------------------------------------
17975 # END of main loop to set continuation breakpoints
17976 # Now go back and make any necessary corrections
17977 #-------------------------------------------------------
17979 #-------------------------------------------------------
17980 # ?/: rule 4 -- if we broke at a ':', then break at
17981 # corresponding '?' unless this is a chain of ?: expressions
17982 #-------------------------------------------------------
17983 if (@i_colon_breaks) {
17985 # using a simple method for deciding if we are in a ?/: chain --
17986 # this is a chain if it has multiple ?/: pairs all in order;
17988 # Note that if line starts in a ':' we count that above as a break
17989 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
17991 unless ($is_chain) {
17992 my @insert_list = ();
17993 foreach (@i_colon_breaks) {
17994 my $i_question = $mate_index_to_go[$_];
17995 if ( $i_question >= 0 ) {
17996 if ( $want_break_before{'?'} ) {
17998 if ( $i_question > 0
17999 && $types_to_go[$i_question] eq 'b' )
18005 if ( $i_question >= 0 ) {
18006 push @insert_list, $i_question;
18009 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
18013 return ( \@i_first, \@i_last, $colon_count );
18016 sub insert_additional_breaks {
18018 # this routine will add line breaks at requested locations after
18019 # sub set_continuation_breaks has made preliminary breaks.
18021 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
18024 my $line_number = 0;
18026 foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
18028 $i_f = $$ri_first[$line_number];
18029 $i_l = $$ri_last[$line_number];
18030 while ( $i_break_left >= $i_l ) {
18033 # shouldn't happen unless caller passes bad indexes
18034 if ( $line_number >= @$ri_last ) {
18036 "Non-fatal program bug: couldn't set break at $i_break_left\n"
18038 report_definite_bug();
18041 $i_f = $$ri_first[$line_number];
18042 $i_l = $$ri_last[$line_number];
18045 # Do not leave a blank at the end of a line; back up if necessary
18046 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
18048 my $i_break_right = $i_break_left + 1;
18049 if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
18051 if ( $i_break_left >= $i_f
18052 && $i_break_left < $i_l
18053 && $i_break_right > $i_f
18054 && $i_break_right <= $i_l )
18056 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
18057 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
18062 sub set_closing_breakpoint {
18064 # set a breakpoint at a matching closing token
18065 # at present, this is only used to break at a ':' which matches a '?'
18066 my $i_break = shift;
18068 if ( $mate_index_to_go[$i_break] >= 0 ) {
18070 # CAUTION: infinite recursion possible here:
18071 # set_closing_breakpoint calls set_forced_breakpoint, and
18072 # set_forced_breakpoint call set_closing_breakpoint
18073 # ( test files attrib.t, BasicLyx.pm.html).
18074 # Don't reduce the '2' in the statement below
18075 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
18077 # break before } ] and ), but sub set_forced_breakpoint will decide
18078 # to break before or after a ? and :
18079 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
18080 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
18084 my $type_sequence = $type_sequence_to_go[$i_break];
18085 if ($type_sequence) {
18086 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
18087 $postponed_breakpoint{$type_sequence} = 1;
18092 # check to see if output line tabbing agrees with input line
18093 # this can be very useful for debugging a script which has an extra
18095 sub compare_indentation_levels {
18097 my ( $python_indentation_level, $structural_indentation_level ) = @_;
18098 if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
18099 $last_tabbing_disagreement = $input_line_number;
18101 if ($in_tabbing_disagreement) {
18104 $tabbing_disagreement_count++;
18106 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
18107 write_logfile_entry(
18108 "Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
18111 $in_tabbing_disagreement = $input_line_number;
18112 $first_tabbing_disagreement = $in_tabbing_disagreement
18113 unless ($first_tabbing_disagreement);
18118 if ($in_tabbing_disagreement) {
18120 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
18121 write_logfile_entry(
18122 "End indentation disagreement from input line $in_tabbing_disagreement\n"
18125 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
18126 write_logfile_entry(
18127 "No further tabbing disagreements will be noted\n");
18130 $in_tabbing_disagreement = 0;
18135 #####################################################################
18137 # the Perl::Tidy::IndentationItem class supplies items which contain
18138 # how much whitespace should be used at the start of a line
18140 #####################################################################
18142 package Perl::Tidy::IndentationItem;
18144 # Indexes for indentation items
18145 use constant SPACES => 0; # total leading white spaces
18146 use constant LEVEL => 1; # the indentation 'level'
18147 use constant CI_LEVEL => 2; # the 'continuation level'
18148 use constant AVAILABLE_SPACES => 3; # how many left spaces available
18150 use constant CLOSED => 4; # index where we saw closing '}'
18151 use constant COMMA_COUNT => 5; # how many commas at this level?
18152 use constant SEQUENCE_NUMBER => 6; # output batch number
18153 use constant INDEX => 7; # index in output batch list
18154 use constant HAVE_CHILD => 8; # any dependents?
18155 use constant RECOVERABLE_SPACES => 9; # how many spaces to the right
18156 # we would like to move to get
18157 # alignment (negative if left)
18158 use constant ALIGN_PAREN => 10; # do we want to try to align
18159 # with an opening structure?
18160 use constant MARKED => 11; # if visited by corrector logic
18161 use constant STACK_DEPTH => 12; # indentation nesting depth
18162 use constant STARTING_INDEX => 13; # first token index of this level
18163 use constant ARROW_COUNT => 14; # how many =>'s
18167 # Create an 'indentation_item' which describes one level of leading
18168 # whitespace when the '-lp' indentation is used. We return
18169 # a reference to an anonymous array of associated variables.
18170 # See above constants for storage scheme.
18172 $class, $spaces, $level,
18173 $ci_level, $available_spaces, $index,
18174 $gnu_sequence_number, $align_paren, $stack_depth,
18178 my $arrow_count = 0;
18179 my $comma_count = 0;
18180 my $have_child = 0;
18181 my $want_right_spaces = 0;
18184 $spaces, $level, $ci_level,
18185 $available_spaces, $closed, $comma_count,
18186 $gnu_sequence_number, $index, $have_child,
18187 $want_right_spaces, $align_paren, $marked,
18188 $stack_depth, $starting_index, $arrow_count,
18192 sub permanently_decrease_AVAILABLE_SPACES {
18194 # make a permanent reduction in the available indentation spaces
18195 # at one indentation item. NOTE: if there are child nodes, their
18196 # total SPACES must be reduced by the caller.
18198 my ( $item, $spaces_needed ) = @_;
18199 my $available_spaces = $item->get_AVAILABLE_SPACES();
18200 my $deleted_spaces =
18201 ( $available_spaces > $spaces_needed )
18203 : $available_spaces;
18204 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
18205 $item->decrease_SPACES($deleted_spaces);
18206 $item->set_RECOVERABLE_SPACES(0);
18208 return $deleted_spaces;
18211 sub tentatively_decrease_AVAILABLE_SPACES {
18213 # We are asked to tentatively delete $spaces_needed of indentation
18214 # for a indentation item. We may want to undo this later. NOTE: if
18215 # there are child nodes, their total SPACES must be reduced by the
18217 my ( $item, $spaces_needed ) = @_;
18218 my $available_spaces = $item->get_AVAILABLE_SPACES();
18219 my $deleted_spaces =
18220 ( $available_spaces > $spaces_needed )
18222 : $available_spaces;
18223 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
18224 $item->decrease_SPACES($deleted_spaces);
18225 $item->increase_RECOVERABLE_SPACES($deleted_spaces);
18226 return $deleted_spaces;
18229 sub get_STACK_DEPTH {
18231 return $self->[STACK_DEPTH];
18236 return $self->[SPACES];
18241 return $self->[MARKED];
18245 my ( $self, $value ) = @_;
18246 if ( defined($value) ) {
18247 $self->[MARKED] = $value;
18249 return $self->[MARKED];
18252 sub get_AVAILABLE_SPACES {
18254 return $self->[AVAILABLE_SPACES];
18257 sub decrease_SPACES {
18258 my ( $self, $value ) = @_;
18259 if ( defined($value) ) {
18260 $self->[SPACES] -= $value;
18262 return $self->[SPACES];
18265 sub decrease_AVAILABLE_SPACES {
18266 my ( $self, $value ) = @_;
18267 if ( defined($value) ) {
18268 $self->[AVAILABLE_SPACES] -= $value;
18270 return $self->[AVAILABLE_SPACES];
18273 sub get_ALIGN_PAREN {
18275 return $self->[ALIGN_PAREN];
18278 sub get_RECOVERABLE_SPACES {
18280 return $self->[RECOVERABLE_SPACES];
18283 sub set_RECOVERABLE_SPACES {
18284 my ( $self, $value ) = @_;
18285 if ( defined($value) ) {
18286 $self->[RECOVERABLE_SPACES] = $value;
18288 return $self->[RECOVERABLE_SPACES];
18291 sub increase_RECOVERABLE_SPACES {
18292 my ( $self, $value ) = @_;
18293 if ( defined($value) ) {
18294 $self->[RECOVERABLE_SPACES] += $value;
18296 return $self->[RECOVERABLE_SPACES];
18301 return $self->[CI_LEVEL];
18306 return $self->[LEVEL];
18309 sub get_SEQUENCE_NUMBER {
18311 return $self->[SEQUENCE_NUMBER];
18316 return $self->[INDEX];
18319 sub get_STARTING_INDEX {
18321 return $self->[STARTING_INDEX];
18324 sub set_HAVE_CHILD {
18325 my ( $self, $value ) = @_;
18326 if ( defined($value) ) {
18327 $self->[HAVE_CHILD] = $value;
18329 return $self->[HAVE_CHILD];
18332 sub get_HAVE_CHILD {
18334 return $self->[HAVE_CHILD];
18337 sub set_ARROW_COUNT {
18338 my ( $self, $value ) = @_;
18339 if ( defined($value) ) {
18340 $self->[ARROW_COUNT] = $value;
18342 return $self->[ARROW_COUNT];
18345 sub get_ARROW_COUNT {
18347 return $self->[ARROW_COUNT];
18350 sub set_COMMA_COUNT {
18351 my ( $self, $value ) = @_;
18352 if ( defined($value) ) {
18353 $self->[COMMA_COUNT] = $value;
18355 return $self->[COMMA_COUNT];
18358 sub get_COMMA_COUNT {
18360 return $self->[COMMA_COUNT];
18364 my ( $self, $value ) = @_;
18365 if ( defined($value) ) {
18366 $self->[CLOSED] = $value;
18368 return $self->[CLOSED];
18373 return $self->[CLOSED];
18376 #####################################################################
18378 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
18379 # contain a single output line
18381 #####################################################################
18383 package Perl::Tidy::VerticalAligner::Line;
18390 use constant JMAX => 0;
18391 use constant JMAX_ORIGINAL_LINE => 1;
18392 use constant RTOKENS => 2;
18393 use constant RFIELDS => 3;
18394 use constant RPATTERNS => 4;
18395 use constant INDENTATION => 5;
18396 use constant LEADING_SPACE_COUNT => 6;
18397 use constant OUTDENT_LONG_LINES => 7;
18398 use constant LIST_TYPE => 8;
18399 use constant IS_HANGING_SIDE_COMMENT => 9;
18400 use constant RALIGNMENTS => 10;
18401 use constant MAXIMUM_LINE_LENGTH => 11;
18402 use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
18405 $_index_map{jmax} = JMAX;
18406 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;
18407 $_index_map{rtokens} = RTOKENS;
18408 $_index_map{rfields} = RFIELDS;
18409 $_index_map{rpatterns} = RPATTERNS;
18410 $_index_map{indentation} = INDENTATION;
18411 $_index_map{leading_space_count} = LEADING_SPACE_COUNT;
18412 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;
18413 $_index_map{list_type} = LIST_TYPE;
18414 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
18415 $_index_map{ralignments} = RALIGNMENTS;
18416 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;
18417 $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
18419 my @_default_data = ();
18420 $_default_data[JMAX] = undef;
18421 $_default_data[JMAX_ORIGINAL_LINE] = undef;
18422 $_default_data[RTOKENS] = undef;
18423 $_default_data[RFIELDS] = undef;
18424 $_default_data[RPATTERNS] = undef;
18425 $_default_data[INDENTATION] = undef;
18426 $_default_data[LEADING_SPACE_COUNT] = undef;
18427 $_default_data[OUTDENT_LONG_LINES] = undef;
18428 $_default_data[LIST_TYPE] = undef;
18429 $_default_data[IS_HANGING_SIDE_COMMENT] = undef;
18430 $_default_data[RALIGNMENTS] = [];
18431 $_default_data[MAXIMUM_LINE_LENGTH] = undef;
18432 $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
18436 # methods to count object population
18438 sub get_count { $_count; }
18439 sub _increment_count { ++$_count }
18440 sub _decrement_count { --$_count }
18443 # Constructor may be called as a class method
18445 my ( $caller, %arg ) = @_;
18446 my $caller_is_obj = ref($caller);
18447 my $class = $caller_is_obj || $caller;
18449 my $self = bless [], $class;
18451 $self->[RALIGNMENTS] = [];
18454 foreach ( keys %_index_map ) {
18455 $index = $_index_map{$_};
18456 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
18457 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
18458 else { $self->[$index] = $_default_data[$index] }
18461 $self->_increment_count();
18466 $_[0]->_decrement_count();
18469 sub get_jmax { $_[0]->[JMAX] }
18470 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }
18471 sub get_rtokens { $_[0]->[RTOKENS] }
18472 sub get_rfields { $_[0]->[RFIELDS] }
18473 sub get_rpatterns { $_[0]->[RPATTERNS] }
18474 sub get_indentation { $_[0]->[INDENTATION] }
18475 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }
18476 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }
18477 sub get_list_type { $_[0]->[LIST_TYPE] }
18478 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
18479 sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
18481 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
18482 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
18483 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
18484 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
18486 sub get_starting_column {
18487 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
18490 sub increment_column {
18491 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
18493 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
18495 sub current_field_width {
18499 return $self->get_column($j);
18502 return $self->get_column($j) - $self->get_column( $j - 1 );
18506 sub field_width_growth {
18509 return $self->get_column($j) - $self->get_starting_column($j);
18512 sub starting_field_width {
18516 return $self->get_starting_column($j);
18519 return $self->get_starting_column($j) -
18520 $self->get_starting_column( $j - 1 );
18524 sub increase_field_width {
18527 my ( $j, $pad ) = @_;
18528 my $jmax = $self->get_jmax();
18529 for my $k ( $j .. $jmax ) {
18530 $self->increment_column( $k, $pad );
18534 sub get_available_space_on_right {
18536 my $jmax = $self->get_jmax();
18537 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
18540 sub set_jmax { $_[0]->[JMAX] = $_[1] }
18541 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }
18542 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }
18543 sub set_rfields { $_[0]->[RFIELDS] = $_[1] }
18544 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }
18545 sub set_indentation { $_[0]->[INDENTATION] = $_[1] }
18546 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
18547 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }
18548 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }
18549 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
18550 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
18554 #####################################################################
18556 # the Perl::Tidy::VerticalAligner::Alignment class holds information
18557 # on a single column being aligned
18559 #####################################################################
18560 package Perl::Tidy::VerticalAligner::Alignment;
18568 # Symbolic array indexes
18569 use constant COLUMN => 0; # the current column number
18570 use constant STARTING_COLUMN => 1; # column number when created
18571 use constant MATCHING_TOKEN => 2; # what token we are matching
18572 use constant STARTING_LINE => 3; # the line index of creation
18573 use constant ENDING_LINE => 4; # the most recent line to use it
18574 use constant SAVED_COLUMN => 5; # the most recent line to use it
18575 use constant SERIAL_NUMBER => 6; # unique number for this alignment
18576 # (just its index in an array)
18578 # Correspondence between variables and array indexes
18580 $_index_map{column} = COLUMN;
18581 $_index_map{starting_column} = STARTING_COLUMN;
18582 $_index_map{matching_token} = MATCHING_TOKEN;
18583 $_index_map{starting_line} = STARTING_LINE;
18584 $_index_map{ending_line} = ENDING_LINE;
18585 $_index_map{saved_column} = SAVED_COLUMN;
18586 $_index_map{serial_number} = SERIAL_NUMBER;
18588 my @_default_data = ();
18589 $_default_data[COLUMN] = undef;
18590 $_default_data[STARTING_COLUMN] = undef;
18591 $_default_data[MATCHING_TOKEN] = undef;
18592 $_default_data[STARTING_LINE] = undef;
18593 $_default_data[ENDING_LINE] = undef;
18594 $_default_data[SAVED_COLUMN] = undef;
18595 $_default_data[SERIAL_NUMBER] = undef;
18597 # class population count
18600 sub get_count { $_count; }
18601 sub _increment_count { ++$_count }
18602 sub _decrement_count { --$_count }
18607 my ( $caller, %arg ) = @_;
18608 my $caller_is_obj = ref($caller);
18609 my $class = $caller_is_obj || $caller;
18611 my $self = bless [], $class;
18613 foreach ( keys %_index_map ) {
18614 my $index = $_index_map{$_};
18615 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
18616 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
18617 else { $self->[$index] = $_default_data[$index] }
18619 $self->_increment_count();
18624 $_[0]->_decrement_count();
18627 sub get_column { return $_[0]->[COLUMN] }
18628 sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
18629 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }
18630 sub get_starting_line { return $_[0]->[STARTING_LINE] }
18631 sub get_ending_line { return $_[0]->[ENDING_LINE] }
18632 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }
18634 sub set_column { $_[0]->[COLUMN] = $_[1] }
18635 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
18636 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }
18637 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }
18638 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }
18639 sub increment_column { $_[0]->[COLUMN] += $_[1] }
18641 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
18642 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }
18646 package Perl::Tidy::VerticalAligner;
18648 # The Perl::Tidy::VerticalAligner package collects output lines and
18649 # attempts to line up certain common tokens, such as => and #, which are
18650 # identified by the calling routine.
18652 # There are two main routines: append_line and flush. Append acts as a
18653 # storage buffer, collecting lines into a group which can be vertically
18654 # aligned. When alignment is no longer possible or desirable, it dumps
18655 # the group to flush.
18657 # append_line -----> flush
18665 # Caution: these debug flags produce a lot of output
18666 # They should all be 0 except when debugging small scripts
18668 use constant VALIGN_DEBUG_FLAG_APPEND => 0;
18669 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
18670 use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
18672 my $debug_warning = sub {
18673 print "VALIGN_DEBUGGING with key $_[0]\n";
18676 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
18677 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
18682 $vertical_aligner_self
18684 $maximum_alignment_index
18688 $previous_minimum_jmax_seen
18689 $previous_maximum_jmax_seen
18690 $maximum_line_index
18695 $last_group_level_written
18696 $last_leading_space_count
18700 $last_comment_column
18701 $last_side_comment_line_number
18702 $last_side_comment_length
18703 $last_side_comment_level
18704 $outdented_line_count
18705 $first_outdented_line_at
18706 $last_outdented_line_at
18707 $diagnostics_object
18709 $file_writer_object
18710 @side_comment_history
18711 $comment_leading_space_count
18712 $is_matching_terminal_line
18719 $cached_line_leading_space_count
18720 $cached_seqno_string
18723 $last_nonblank_seqno_string
18727 $rOpts_maximum_line_length
18728 $rOpts_continuation_indentation
18729 $rOpts_indent_columns
18731 $rOpts_entab_leading_whitespace
18734 $rOpts_fixed_position_side_comment
18735 $rOpts_minimum_space_to_comment
18743 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
18746 # variables describing the entire space group:
18747 $ralignment_list = [];
18749 $last_group_level_written = -1;
18750 $extra_indent_ok = 0; # can we move all lines to the right?
18751 $last_side_comment_length = 0;
18752 $maximum_jmax_seen = 0;
18753 $minimum_jmax_seen = 0;
18754 $previous_minimum_jmax_seen = 0;
18755 $previous_maximum_jmax_seen = 0;
18757 # variables describing each line of the group
18758 @group_lines = (); # list of all lines in group
18760 $outdented_line_count = 0;
18761 $first_outdented_line_at = 0;
18762 $last_outdented_line_at = 0;
18763 $last_side_comment_line_number = 0;
18764 $last_side_comment_level = -1;
18765 $is_matching_terminal_line = 0;
18767 # most recent 3 side comments; [ line number, column ]
18768 $side_comment_history[0] = [ -300, 0 ];
18769 $side_comment_history[1] = [ -200, 0 ];
18770 $side_comment_history[2] = [ -100, 0 ];
18772 # write_leader_and_string cache:
18773 $cached_line_text = "";
18774 $cached_line_type = 0;
18775 $cached_line_flag = 0;
18777 $cached_line_valid = 0;
18778 $cached_line_leading_space_count = 0;
18779 $cached_seqno_string = "";
18781 # string of sequence numbers joined together
18782 $seqno_string = "";
18783 $last_nonblank_seqno_string = "";
18785 # frequently used parameters
18786 $rOpts_indent_columns = $rOpts->{'indent-columns'};
18787 $rOpts_tabs = $rOpts->{'tabs'};
18788 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
18789 $rOpts_fixed_position_side_comment =
18790 $rOpts->{'fixed-position-side-comment'};
18791 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
18792 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
18793 $rOpts_valign = $rOpts->{'valign'};
18795 forget_side_comment();
18797 initialize_for_new_group();
18799 $vertical_aligner_self = {};
18800 bless $vertical_aligner_self, $class;
18801 return $vertical_aligner_self;
18804 sub initialize_for_new_group {
18805 $maximum_line_index = -1; # lines in the current group
18806 $maximum_alignment_index = -1; # alignments in current group
18807 $zero_count = 0; # count consecutive lines without tokens
18808 $current_line = undef; # line being matched for alignment
18809 $group_maximum_gap = 0; # largest gap introduced
18811 $marginal_match = 0;
18812 $comment_leading_space_count = 0;
18813 $last_leading_space_count = 0;
18816 # interface to Perl::Tidy::Diagnostics routines
18817 sub write_diagnostics {
18818 if ($diagnostics_object) {
18819 $diagnostics_object->write_diagnostics(@_);
18823 # interface to Perl::Tidy::Logger routines
18825 if ($logger_object) {
18826 $logger_object->warning(@_);
18830 sub write_logfile_entry {
18831 if ($logger_object) {
18832 $logger_object->write_logfile_entry(@_);
18836 sub report_definite_bug {
18837 if ($logger_object) {
18838 $logger_object->report_definite_bug();
18844 # return the number of leading spaces associated with an indentation
18845 # variable $indentation is either a constant number of spaces or an
18846 # object with a get_SPACES method.
18847 my $indentation = shift;
18848 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
18851 sub get_RECOVERABLE_SPACES {
18853 # return the number of spaces (+ means shift right, - means shift left)
18854 # that we would like to shift a group of lines with the same indentation
18855 # to get them to line up with their opening parens
18856 my $indentation = shift;
18857 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
18860 sub get_STACK_DEPTH {
18862 my $indentation = shift;
18863 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
18866 sub make_alignment {
18867 my ( $col, $token ) = @_;
18869 # make one new alignment at column $col which aligns token $token
18870 ++$maximum_alignment_index;
18871 my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
18873 starting_column => $col,
18874 matching_token => $token,
18875 starting_line => $maximum_line_index,
18876 ending_line => $maximum_line_index,
18877 serial_number => $maximum_alignment_index,
18879 $ralignment_list->[$maximum_alignment_index] = $alignment;
18883 sub dump_alignments {
18885 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
18886 for my $i ( 0 .. $maximum_alignment_index ) {
18887 my $column = $ralignment_list->[$i]->get_column();
18888 my $starting_column = $ralignment_list->[$i]->get_starting_column();
18889 my $matching_token = $ralignment_list->[$i]->get_matching_token();
18890 my $starting_line = $ralignment_list->[$i]->get_starting_line();
18891 my $ending_line = $ralignment_list->[$i]->get_ending_line();
18893 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
18897 sub save_alignment_columns {
18898 for my $i ( 0 .. $maximum_alignment_index ) {
18899 $ralignment_list->[$i]->save_column();
18903 sub restore_alignment_columns {
18904 for my $i ( 0 .. $maximum_alignment_index ) {
18905 $ralignment_list->[$i]->restore_column();
18909 sub forget_side_comment {
18910 $last_comment_column = 0;
18915 # sub append is called to place one line in the current vertical group.
18917 # The input parameters are:
18918 # $level = indentation level of this line
18919 # $rfields = reference to array of fields
18920 # $rpatterns = reference to array of patterns, one per field
18921 # $rtokens = reference to array of tokens starting fields 1,2,..
18923 # Here is an example of what this package does. In this example,
18924 # we are trying to line up both the '=>' and the '#'.
18926 # '18' => 'grave', # \`
18927 # '19' => 'acute', # `'
18928 # '20' => 'caron', # \v
18929 # <-tabs-><f1-><--field 2 ---><-f3->
18932 # col1 col2 col3 col4
18934 # The calling routine has already broken the entire line into 3 fields as
18935 # indicated. (So the work of identifying promising common tokens has
18936 # already been done).
18938 # In this example, there will be 2 tokens being matched: '=>' and '#'.
18939 # They are the leading parts of fields 2 and 3, but we do need to know
18940 # what they are so that we can dump a group of lines when these tokens
18943 # The fields contain the actual characters of each field. The patterns
18944 # are like the fields, but they contain mainly token types instead
18945 # of tokens, so they have fewer characters. They are used to be
18946 # sure we are matching fields of similar type.
18948 # In this example, there will be 4 column indexes being adjusted. The
18949 # first one is always at zero. The interior columns are at the start of
18950 # the matching tokens, and the last one tracks the maximum line length.
18952 # Basically, each time a new line comes in, it joins the current vertical
18953 # group if possible. Otherwise it causes the current group to be dumped
18954 # and a new group is started.
18956 # For each new group member, the column locations are increased, as
18957 # necessary, to make room for the new fields. When the group is finally
18958 # output, these column numbers are used to compute the amount of spaces of
18959 # padding needed for each field.
18961 # Programming note: the fields are assumed not to have any tab characters.
18962 # Tabs have been previously removed except for tabs in quoted strings and
18963 # side comments. Tabs in these fields can mess up the column counting.
18964 # The log file warns the user if there are any such tabs.
18967 $level, $level_end,
18968 $indentation, $rfields,
18969 $rtokens, $rpatterns,
18970 $is_forced_break, $outdent_long_lines,
18971 $is_terminal_ternary, $is_terminal_statement,
18972 $do_not_pad, $rvertical_tightness_flags,
18976 # number of fields is $jmax
18977 # number of tokens between fields is $jmax-1
18978 my $jmax = $#{$rfields};
18980 my $leading_space_count = get_SPACES($indentation);
18982 # set outdented flag to be sure we either align within statements or
18983 # across statement boundaries, but not both.
18984 my $is_outdented = $last_leading_space_count > $leading_space_count;
18985 $last_leading_space_count = $leading_space_count;
18987 # Patch: undo for hanging side comment
18988 my $is_hanging_side_comment =
18989 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
18990 $is_outdented = 0 if $is_hanging_side_comment;
18992 VALIGN_DEBUG_FLAG_APPEND0 && do {
18994 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
18997 # Validate cached line if necessary: If we can produce a container
18998 # with just 2 lines total by combining an existing cached opening
18999 # token with the closing token to follow, then we will mark both
19000 # cached flags as valid.
19001 if ($rvertical_tightness_flags) {
19002 if ( $maximum_line_index <= 0
19003 && $cached_line_type
19005 && $rvertical_tightness_flags->[2]
19006 && $rvertical_tightness_flags->[2] == $cached_seqno )
19008 $rvertical_tightness_flags->[3] ||= 1;
19009 $cached_line_valid ||= 1;
19013 # do not join an opening block brace with an unbalanced line
19014 # unless requested with a flag value of 2
19015 if ( $cached_line_type == 3
19016 && $maximum_line_index < 0
19017 && $cached_line_flag < 2
19018 && $level_jump != 0 )
19020 $cached_line_valid = 0;
19023 # patch until new aligner is finished
19024 if ($do_not_pad) { my_flush() }
19026 # shouldn't happen:
19027 if ( $level < 0 ) { $level = 0 }
19029 # do not align code across indentation level changes
19030 # or if vertical alignment is turned off for debugging
19031 if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
19033 # we are allowed to shift a group of lines to the right if its
19034 # level is greater than the previous and next group
19036 ( $level < $group_level && $last_group_level_written < $group_level );
19040 # If we know that this line will get flushed out by itself because
19041 # of level changes, we can leave the extra_indent_ok flag set.
19042 # That way, if we get an external flush call, we will still be
19043 # able to do some -lp alignment if necessary.
19044 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
19046 $group_level = $level;
19048 # wait until after the above flush to get the leading space
19049 # count because it may have been changed if the -icp flag is in
19051 $leading_space_count = get_SPACES($indentation);
19055 # --------------------------------------------------------------------
19056 # Patch to collect outdentable block COMMENTS
19057 # --------------------------------------------------------------------
19058 my $is_blank_line = "";
19059 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
19060 if ( $group_type eq 'COMMENT' ) {
19064 && $outdent_long_lines
19065 && $leading_space_count == $comment_leading_space_count
19070 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
19078 # --------------------------------------------------------------------
19079 # add dummy fields for terminal ternary
19080 # --------------------------------------------------------------------
19081 my $j_terminal_match;
19082 if ( $is_terminal_ternary && $current_line ) {
19083 $j_terminal_match =
19084 fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
19085 $jmax = @{$rfields} - 1;
19088 # --------------------------------------------------------------------
19089 # add dummy fields for else statement
19090 # --------------------------------------------------------------------
19091 if ( $rfields->[0] =~ /^else\s*$/
19093 && $level_jump == 0 )
19095 $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
19096 $jmax = @{$rfields} - 1;
19099 # --------------------------------------------------------------------
19100 # Step 1. Handle simple line of code with no fields to match.
19101 # --------------------------------------------------------------------
19102 if ( $jmax <= 0 ) {
19105 if ( $maximum_line_index >= 0
19106 && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
19109 # flush the current group if it has some aligned columns..
19110 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
19112 # flush current group if we are just collecting side comments..
19115 # ...and we haven't seen a comment lately
19116 ( $zero_count > 3 )
19118 # ..or if this new line doesn't fit to the left of the comments
19119 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
19120 $group_lines[0]->get_column(0) )
19127 # patch to start new COMMENT group if this comment may be outdented
19128 if ( $is_block_comment
19129 && $outdent_long_lines
19130 && $maximum_line_index < 0 )
19132 $group_type = 'COMMENT';
19133 $comment_leading_space_count = $leading_space_count;
19134 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
19138 # just write this line directly if no current group, no side comment,
19139 # and no space recovery is needed.
19140 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
19142 write_leader_and_string( $leading_space_count, $$rfields[0], 0,
19143 $outdent_long_lines, $rvertical_tightness_flags );
19151 # programming check: (shouldn't happen)
19152 # an error here implies an incorrect call was made
19153 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
19155 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
19157 report_definite_bug();
19160 # --------------------------------------------------------------------
19161 # create an object to hold this line
19162 # --------------------------------------------------------------------
19163 my $new_line = new Perl::Tidy::VerticalAligner::Line(
19165 jmax_original_line => $jmax,
19166 rtokens => $rtokens,
19167 rfields => $rfields,
19168 rpatterns => $rpatterns,
19169 indentation => $indentation,
19170 leading_space_count => $leading_space_count,
19171 outdent_long_lines => $outdent_long_lines,
19173 is_hanging_side_comment => $is_hanging_side_comment,
19174 maximum_line_length => $rOpts->{'maximum-line-length'},
19175 rvertical_tightness_flags => $rvertical_tightness_flags,
19178 # Initialize a global flag saying if the last line of the group should
19179 # match end of group and also terminate the group. There should be no
19180 # returns between here and where the flag is handled at the bottom.
19181 my $col_matching_terminal = 0;
19182 if ( defined($j_terminal_match) ) {
19184 # remember the column of the terminal ? or { to match with
19185 $col_matching_terminal = $current_line->get_column($j_terminal_match);
19187 # set global flag for sub decide_if_aligned
19188 $is_matching_terminal_line = 1;
19191 # --------------------------------------------------------------------
19192 # It simplifies things to create a zero length side comment
19194 # --------------------------------------------------------------------
19195 make_side_comment( $new_line, $level_end );
19197 # --------------------------------------------------------------------
19198 # Decide if this is a simple list of items.
19199 # There are 3 list types: none, comma, comma-arrow.
19200 # We use this below to be less restrictive in deciding what to align.
19201 # --------------------------------------------------------------------
19202 if ($is_forced_break) {
19203 decide_if_list($new_line);
19206 if ($current_line) {
19208 # --------------------------------------------------------------------
19209 # Allow hanging side comment to join current group, if any
19210 # This will help keep side comments aligned, because otherwise we
19211 # will have to start a new group, making alignment less likely.
19212 # --------------------------------------------------------------------
19213 join_hanging_comment( $new_line, $current_line )
19214 if $is_hanging_side_comment;
19216 # --------------------------------------------------------------------
19217 # If there is just one previous line, and it has more fields
19218 # than the new line, try to join fields together to get a match with
19219 # the new line. At the present time, only a single leading '=' is
19220 # allowed to be compressed out. This is useful in rare cases where
19221 # a table is forced to use old breakpoints because of side comments,
19222 # and the table starts out something like this:
19223 # my %MonthChars = ('0', 'Jan', # side comment
19226 # Eliminating the '=' field will allow the remaining fields to line up.
19227 # This situation does not occur if there are no side comments
19228 # because scan_list would put a break after the opening '('.
19229 # --------------------------------------------------------------------
19230 eliminate_old_fields( $new_line, $current_line );
19232 # --------------------------------------------------------------------
19233 # If the new line has more fields than the current group,
19234 # see if we can match the first fields and combine the remaining
19235 # fields of the new line.
19236 # --------------------------------------------------------------------
19237 eliminate_new_fields( $new_line, $current_line );
19239 # --------------------------------------------------------------------
19240 # Flush previous group unless all common tokens and patterns match..
19241 # --------------------------------------------------------------------
19242 check_match( $new_line, $current_line );
19244 # --------------------------------------------------------------------
19245 # See if there is space for this line in the current group (if any)
19246 # --------------------------------------------------------------------
19247 if ($current_line) {
19248 check_fit( $new_line, $current_line );
19252 # --------------------------------------------------------------------
19253 # Append this line to the current group (or start new group)
19254 # --------------------------------------------------------------------
19255 accept_line($new_line);
19257 # Future update to allow this to vary:
19258 $current_line = $new_line if ( $maximum_line_index == 0 );
19260 # output this group if it ends in a terminal else or ternary line
19261 if ( defined($j_terminal_match) ) {
19263 # if there is only one line in the group (maybe due to failure to match
19264 # perfectly with previous lines), then align the ? or { of this
19265 # terminal line with the previous one unless that would make the line
19267 if ( $maximum_line_index == 0 ) {
19268 my $col_now = $current_line->get_column($j_terminal_match);
19269 my $pad = $col_matching_terminal - $col_now;
19270 my $padding_available =
19271 $current_line->get_available_space_on_right();
19272 if ( $pad > 0 && $pad <= $padding_available ) {
19273 $current_line->increase_field_width( $j_terminal_match, $pad );
19277 $is_matching_terminal_line = 0;
19280 # --------------------------------------------------------------------
19281 # Step 8. Some old debugging stuff
19282 # --------------------------------------------------------------------
19283 VALIGN_DEBUG_FLAG_APPEND && do {
19284 print "APPEND fields:";
19285 dump_array(@$rfields);
19286 print "APPEND tokens:";
19287 dump_array(@$rtokens);
19288 print "APPEND patterns:";
19289 dump_array(@$rpatterns);
19296 sub join_hanging_comment {
19299 my $jmax = $line->get_jmax();
19300 return 0 unless $jmax == 1; # must be 2 fields
19301 my $rtokens = $line->get_rtokens();
19302 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment..
19303 my $rfields = $line->get_rfields();
19304 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty...
19305 my $old_line = shift;
19306 my $maximum_field_index = $old_line->get_jmax();
19308 unless $maximum_field_index > $jmax; # the current line has more fields
19309 my $rpatterns = $line->get_rpatterns();
19311 $line->set_is_hanging_side_comment(1);
19312 $jmax = $maximum_field_index;
19313 $line->set_jmax($jmax);
19314 $$rfields[$jmax] = $$rfields[1];
19315 $$rtokens[ $jmax - 1 ] = $$rtokens[0];
19316 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
19317 for ( my $j = 1 ; $j < $jmax ; $j++ ) {
19318 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
19319 $$rtokens[ $j - 1 ] = "";
19320 $$rpatterns[ $j - 1 ] = "";
19325 sub eliminate_old_fields {
19327 my $new_line = shift;
19328 my $jmax = $new_line->get_jmax();
19329 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
19330 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
19332 # there must be one previous line
19333 return unless ( $maximum_line_index == 0 );
19335 my $old_line = shift;
19336 my $maximum_field_index = $old_line->get_jmax();
19338 ###############################################
19339 # this line must have fewer fields
19340 return unless $maximum_field_index > $jmax;
19341 ###############################################
19343 # Identify specific cases where field elimination is allowed:
19344 # case=1: both lines have comma-separated lists, and the first
19345 # line has an equals
19346 # case=2: both lines have leading equals
19348 # case 1 is the default
19351 # See if case 2: both lines have leading '='
19352 # We'll require smiliar leading patterns in this case
19353 my $old_rtokens = $old_line->get_rtokens();
19354 my $rtokens = $new_line->get_rtokens();
19355 my $rpatterns = $new_line->get_rpatterns();
19356 my $old_rpatterns = $old_line->get_rpatterns();
19357 if ( $rtokens->[0] =~ /^=\d*$/
19358 && $old_rtokens->[0] eq $rtokens->[0]
19359 && $old_rpatterns->[0] eq $rpatterns->[0] )
19364 # not too many fewer fields in new line for case 1
19365 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
19367 # case 1 must have side comment
19368 my $old_rfields = $old_line->get_rfields();
19371 && length( $$old_rfields[$maximum_field_index] ) == 0 );
19373 my $rfields = $new_line->get_rfields();
19375 my $hid_equals = 0;
19377 my @new_alignments = ();
19378 my @new_fields = ();
19379 my @new_matching_patterns = ();
19380 my @new_matching_tokens = ();
19384 my $current_field = '';
19385 my $current_pattern = '';
19387 # loop over all old tokens
19389 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
19390 $current_field .= $$old_rfields[$k];
19391 $current_pattern .= $$old_rpatterns[$k];
19392 last if ( $j > $jmax - 1 );
19394 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
19396 $new_fields[$j] = $current_field;
19397 $new_matching_patterns[$j] = $current_pattern;
19398 $current_field = '';
19399 $current_pattern = '';
19400 $new_matching_tokens[$j] = $$old_rtokens[$k];
19401 $new_alignments[$j] = $old_line->get_alignment($k);
19406 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
19407 last if ( $case == 2 ); # avoid problems with stuff
19408 # like: $a=$b=$c=$d;
19412 if ( $in_match && $case == 1 )
19413 ; # disallow gaps in matching field types in case 1
19417 # Modify the current state if we are successful.
19418 # We must exactly reach the ends of both lists for success.
19419 if ( ( $j == $jmax )
19420 && ( $current_field eq '' )
19421 && ( $case != 1 || $hid_equals ) )
19423 $k = $maximum_field_index;
19424 $current_field .= $$old_rfields[$k];
19425 $current_pattern .= $$old_rpatterns[$k];
19426 $new_fields[$j] = $current_field;
19427 $new_matching_patterns[$j] = $current_pattern;
19429 $new_alignments[$j] = $old_line->get_alignment($k);
19430 $maximum_field_index = $j;
19432 $old_line->set_alignments(@new_alignments);
19433 $old_line->set_jmax($jmax);
19434 $old_line->set_rtokens( \@new_matching_tokens );
19435 $old_line->set_rfields( \@new_fields );
19436 $old_line->set_rpatterns( \@$rpatterns );
19440 # create an empty side comment if none exists
19441 sub make_side_comment {
19442 my $new_line = shift;
19443 my $level_end = shift;
19444 my $jmax = $new_line->get_jmax();
19445 my $rtokens = $new_line->get_rtokens();
19447 # if line does not have a side comment...
19448 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
19449 my $rfields = $new_line->get_rfields();
19450 my $rpatterns = $new_line->get_rpatterns();
19451 $$rtokens[$jmax] = '#';
19452 $$rfields[ ++$jmax ] = '';
19453 $$rpatterns[$jmax] = '#';
19454 $new_line->set_jmax($jmax);
19455 $new_line->set_jmax_original_line($jmax);
19458 # line has a side comment..
19461 # don't remember old side comment location for very long
19462 my $line_number = $vertical_aligner_self->get_output_line_number();
19463 my $rfields = $new_line->get_rfields();
19465 $line_number - $last_side_comment_line_number > 12
19467 # and don't remember comment location across block level changes
19468 || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
19471 forget_side_comment();
19473 $last_side_comment_line_number = $line_number;
19474 $last_side_comment_level = $level_end;
19478 sub decide_if_list {
19482 # A list will be taken to be a line with a forced break in which all
19483 # of the field separators are commas or comma-arrows (except for the
19486 # List separator tokens are things like ',3' or '=>2',
19487 # where the trailing digit is the nesting depth. Allow braces
19488 # to allow nested list items.
19489 my $rtokens = $line->get_rtokens();
19490 my $test_token = $$rtokens[0];
19491 if ( $test_token =~ /^(\,|=>)/ ) {
19492 my $list_type = $test_token;
19493 my $jmax = $line->get_jmax();
19495 foreach ( 1 .. $jmax - 2 ) {
19496 if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
19501 $line->set_list_type($list_type);
19505 sub eliminate_new_fields {
19507 return unless ( $maximum_line_index >= 0 );
19508 my ( $new_line, $old_line ) = @_;
19509 my $jmax = $new_line->get_jmax();
19511 my $old_rtokens = $old_line->get_rtokens();
19512 my $rtokens = $new_line->get_rtokens();
19513 my $is_assignment =
19514 ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
19516 # must be monotonic variation
19517 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
19519 # must be more fields in the new line
19520 my $maximum_field_index = $old_line->get_jmax();
19521 return unless ( $maximum_field_index < $jmax );
19523 unless ($is_assignment) {
19525 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
19526 ; # only if monotonic
19528 # never combine fields of a comma list
19530 unless ( $maximum_field_index > 1 )
19531 && ( $new_line->get_list_type() !~ /^,/ );
19534 my $rfields = $new_line->get_rfields();
19535 my $rpatterns = $new_line->get_rpatterns();
19536 my $old_rpatterns = $old_line->get_rpatterns();
19538 # loop over all OLD tokens except comment and check match
19541 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
19542 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
19543 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
19550 # first tokens agree, so combine extra new tokens
19552 for $k ( $maximum_field_index .. $jmax - 1 ) {
19554 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
19555 $$rfields[$k] = "";
19556 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
19557 $$rpatterns[$k] = "";
19560 $$rtokens[ $maximum_field_index - 1 ] = '#';
19561 $$rfields[$maximum_field_index] = $$rfields[$jmax];
19562 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
19563 $jmax = $maximum_field_index;
19565 $new_line->set_jmax($jmax);
19568 sub fix_terminal_ternary {
19570 # Add empty fields as necessary to align a ternary term
19575 # : $year % 100 ? 1
19576 # : $year % 400 ? 0
19579 # returns 1 if the terminal item should be indented
19581 my ( $rfields, $rtokens, $rpatterns ) = @_;
19583 my $jmax = @{$rfields} - 1;
19584 my $old_line = $group_lines[$maximum_line_index];
19585 my $rfields_old = $old_line->get_rfields();
19587 my $rpatterns_old = $old_line->get_rpatterns();
19588 my $rtokens_old = $old_line->get_rtokens();
19589 my $maximum_field_index = $old_line->get_jmax();
19591 # look for the question mark after the :
19593 my $depth_question;
19595 for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
19596 my $tok = $rtokens_old->[$j];
19597 if ( $tok =~ /^\?(\d+)$/ ) {
19598 $depth_question = $1;
19600 # depth must be correct
19601 next unless ( $depth_question eq $group_level );
19604 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
19605 $pad = " " x length($1);
19608 return; # shouldn't happen
19613 return unless ( defined($jquestion) ); # shouldn't happen
19615 # Now splice the tokens and patterns of the previous line
19616 # into the else line to insure a match. Add empty fields
19618 my $jadd = $jquestion;
19620 # Work on copies of the actual arrays in case we have
19621 # to return due to an error
19622 my @fields = @{$rfields};
19623 my @patterns = @{$rpatterns};
19624 my @tokens = @{$rtokens};
19626 VALIGN_DEBUG_FLAG_TERNARY && do {
19628 print "CURRENT FIELDS=<@{$rfields_old}>\n";
19629 print "CURRENT TOKENS=<@{$rtokens_old}>\n";
19630 print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
19631 print "UNMODIFIED FIELDS=<@{$rfields}>\n";
19632 print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
19633 print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
19636 # handle cases of leading colon on this line
19637 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
19639 my ( $colon, $therest ) = ( $1, $2 );
19641 # Handle sub-case of first field with leading colon plus additional code
19642 # This is the usual situation as at the '1' below:
19644 # : $year % 400 ? 0
19648 # Split the first field after the leading colon and insert padding.
19649 # Note that this padding will remain even if the terminal value goes
19650 # out on a separate line. This does not seem to look to bad, so no
19651 # mechanism has been included to undo it.
19652 my $field1 = shift @fields;
19653 unshift @fields, ( $colon, $pad . $therest );
19655 # change the leading pattern from : to ?
19656 return unless ( $patterns[0] =~ s/^\:/?/ );
19658 # install leading tokens and patterns of existing line
19659 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
19660 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
19662 # insert appropriate number of empty fields
19663 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
19666 # handle sub-case of first field just equal to leading colon.
19667 # This can happen for example in the example below where
19668 # the leading '(' would create a new alignment token
19669 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
19670 # : ( $mname = $name . '->' );
19673 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
19675 # prepend a leading ? onto the second pattern
19676 $patterns[1] = "?b" . $patterns[1];
19678 # pad the second field
19679 $fields[1] = $pad . $fields[1];
19681 # install leading tokens and patterns of existing line, replacing
19682 # leading token and inserting appropriate number of empty fields
19683 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
19684 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
19685 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
19689 # Handle case of no leading colon on this line. This will
19690 # be the case when -wba=':' is used. For example,
19691 # $year % 400 ? 0 :
19695 # install leading tokens and patterns of existing line
19696 $patterns[0] = '?' . 'b' . $patterns[0];
19697 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
19698 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
19700 # insert appropriate number of empty fields
19701 $jadd = $jquestion + 1;
19702 $fields[0] = $pad . $fields[0];
19703 splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
19706 VALIGN_DEBUG_FLAG_TERNARY && do {
19708 print "MODIFIED TOKENS=<@tokens>\n";
19709 print "MODIFIED PATTERNS=<@patterns>\n";
19710 print "MODIFIED FIELDS=<@fields>\n";
19713 # all ok .. update the arrays
19714 @{$rfields} = @fields;
19715 @{$rtokens} = @tokens;
19716 @{$rpatterns} = @patterns;
19718 # force a flush after this line
19722 sub fix_terminal_else {
19724 # Add empty fields as necessary to align a balanced terminal
19725 # else block to a previous if/elsif/unless block,
19728 # if ( 1 || $x ) { print "ok 13\n"; }
19729 # else { print "not ok 13\n"; }
19731 # returns 1 if the else block should be indented
19733 my ( $rfields, $rtokens, $rpatterns ) = @_;
19734 my $jmax = @{$rfields} - 1;
19735 return unless ( $jmax > 0 );
19737 # check for balanced else block following if/elsif/unless
19738 my $rfields_old = $current_line->get_rfields();
19740 # TBD: add handling for 'case'
19741 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
19743 # look for the opening brace after the else, and extrace the depth
19744 my $tok_brace = $rtokens->[0];
19746 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
19748 # probably: "else # side_comment"
19751 my $rpatterns_old = $current_line->get_rpatterns();
19752 my $rtokens_old = $current_line->get_rtokens();
19753 my $maximum_field_index = $current_line->get_jmax();
19755 # be sure the previous if/elsif is followed by an opening paren
19757 my $tok_paren = '(' . $depth_brace;
19758 my $tok_test = $rtokens_old->[$jparen];
19759 return unless ( $tok_test eq $tok_paren ); # shouldn't happen
19761 # Now find the opening block brace
19763 for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
19764 my $tok = $rtokens_old->[$j];
19765 if ( $tok eq $tok_brace ) {
19770 return unless ( defined($jbrace) ); # shouldn't happen
19772 # Now splice the tokens and patterns of the previous line
19773 # into the else line to insure a match. Add empty fields
19775 my $jadd = $jbrace - $jparen;
19776 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
19777 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
19778 splice( @{$rfields}, 1, 0, ('') x $jadd );
19780 # force a flush after this line if it does not follow a case
19782 unless ( $rfields_old->[0] =~ /^case\s*$/ );
19785 { # sub check_match
19786 my %is_good_alignment;
19790 # Vertically aligning on certain "good" tokens is usually okay
19791 # so we can be less restrictive in marginal cases.
19792 @_ = qw( { ? => = );
19794 @is_good_alignment{@_} = (1) x scalar(@_);
19799 # See if the current line matches the current vertical alignment group.
19800 # If not, flush the current group.
19801 my $new_line = shift;
19802 my $old_line = shift;
19804 # uses global variables:
19805 # $previous_minimum_jmax_seen
19806 # $maximum_jmax_seen
19807 # $maximum_line_index
19809 my $jmax = $new_line->get_jmax();
19810 my $maximum_field_index = $old_line->get_jmax();
19812 # flush if this line has too many fields
19813 if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
19815 # flush if adding this line would make a non-monotonic field count
19817 ( $maximum_field_index > $jmax ) # this has too few fields
19819 ( $previous_minimum_jmax_seen <
19820 $jmax ) # and wouldn't be monotonic
19821 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
19828 # otherwise see if this line matches the current group
19829 my $jmax_original_line = $new_line->get_jmax_original_line();
19830 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
19831 my $rtokens = $new_line->get_rtokens();
19832 my $rfields = $new_line->get_rfields();
19833 my $rpatterns = $new_line->get_rpatterns();
19834 my $list_type = $new_line->get_list_type();
19836 my $group_list_type = $old_line->get_list_type();
19837 my $old_rpatterns = $old_line->get_rpatterns();
19838 my $old_rtokens = $old_line->get_rtokens();
19840 my $jlimit = $jmax - 1;
19841 if ( $maximum_field_index > $jmax ) {
19842 $jlimit = $jmax_original_line;
19843 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
19846 # handle comma-separated lists ..
19847 if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
19848 for my $j ( 0 .. $jlimit ) {
19849 my $old_tok = $$old_rtokens[$j];
19850 next unless $old_tok;
19851 my $new_tok = $$rtokens[$j];
19852 next unless $new_tok;
19854 # lists always match ...
19855 # unless they would align any '=>'s with ','s
19857 if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
19858 || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
19862 # do detailed check for everything else except hanging side comments
19863 elsif ( !$is_hanging_side_comment ) {
19865 my $leading_space_count = $new_line->get_leading_space_count();
19869 my $saw_good_alignment;
19871 for my $j ( 0 .. $jlimit ) {
19873 my $old_tok = $$old_rtokens[$j];
19874 my $new_tok = $$rtokens[$j];
19876 # Note on encoding used for alignment tokens:
19877 # -------------------------------------------
19878 # Tokens are "decorated" with information which can help
19879 # prevent unwanted alignments. Consider for example the
19880 # following two lines:
19881 # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
19882 # local ( $i, $f ) = &'bdiv( $xn, $xd );
19883 # There are three alignment tokens in each line, a comma,
19884 # an =, and a comma. In the first line these three tokens
19886 # ,4+local-18 =3 ,4+split-7
19887 # and in the second line they are encoded as
19888 # ,4+local-18 =3 ,4+&'bdiv-8
19889 # Tokens always at least have token name and nesting
19890 # depth. So in this example the ='s are at depth 3 and
19891 # the ,'s are at depth 4. This prevents aligning tokens
19892 # of different depths. Commas contain additional
19893 # information, as follows:
19894 # , {depth} + {container name} - {spaces to opening paren}
19895 # This allows us to reject matching the rightmost commas
19896 # in the above two lines, since they are for different
19897 # function calls. This encoding is done in
19898 # 'sub send_lines_to_vertical_aligner'.
19900 # Pick off actual token.
19901 # Everything up to the first digit is the actual token.
19902 my $alignment_token = $new_tok;
19903 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
19905 # see if the decorated tokens match
19906 my $tokens_match = $new_tok eq $old_tok
19908 # Exception for matching terminal : of ternary statement..
19909 # consider containers prefixed by ? and : a match
19910 || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
19912 # No match if the alignment tokens differ...
19913 if ( !$tokens_match ) {
19915 # ...Unless this is a side comment
19919 # and there is either at least one alignment token
19920 # or this is a single item following a list. This
19921 # latter rule is required for 'December' to join
19922 # the following list:
19924 # '', 'January', 'February', 'March',
19925 # 'April', 'May', 'June', 'July',
19926 # 'August', 'September', 'October', 'November',
19929 # If it doesn't then the -lp formatting will fail.
19930 && ( $j > 0 || $old_tok =~ /^,/ )
19933 $marginal_match = 1
19934 if ( $marginal_match == 0
19935 && $maximum_line_index == 0 );
19942 # Calculate amount of padding required to fit this in.
19943 # $pad is the number of spaces by which we must increase
19944 # the current field to squeeze in this field.
19946 length( $$rfields[$j] ) - $old_line->current_field_width($j);
19947 if ( $j == 0 ) { $pad += $leading_space_count; }
19949 # remember max pads to limit marginal cases
19950 if ( $alignment_token ne '#' ) {
19951 if ( $pad > $max_pad ) { $max_pad = $pad }
19952 if ( $pad < $min_pad ) { $min_pad = $pad }
19954 if ( $is_good_alignment{$alignment_token} ) {
19955 $saw_good_alignment = 1;
19958 # If patterns don't match, we have to be careful...
19959 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
19961 # flag this as a marginal match since patterns differ
19962 $marginal_match = 1
19963 if ( $marginal_match == 0 && $maximum_line_index == 0 );
19965 # We have to be very careful about aligning commas
19966 # when the pattern's don't match, because it can be
19967 # worse to create an alignment where none is needed
19968 # than to omit one. Here's an example where the ','s
19969 # are not in named continers. The first line below
19970 # should not match the next two:
19971 # ( $a, $b ) = ( $b, $r );
19972 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
19973 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
19974 if ( $alignment_token eq ',' ) {
19976 # do not align commas unless they are in named containers
19977 goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
19980 # do not align parens unless patterns match;
19981 # large ugly spaces can occur in math expressions.
19982 elsif ( $alignment_token eq '(' ) {
19984 # But we can allow a match if the parens don't
19985 # require any padding.
19986 if ( $pad != 0 ) { goto NO_MATCH }
19989 # Handle an '=' alignment with different patterns to
19991 elsif ( $alignment_token eq '=' ) {
19993 # It is best to be a little restrictive when
19994 # aligning '=' tokens. Here is an example of
19995 # two lines that we will not align:
19998 # The problem is that one is a 'my' declaration,
19999 # and the other isn't, so they're not very similar.
20000 # We will filter these out by comparing the first
20001 # letter of the pattern. This is crude, but works
20004 substr( $$old_rpatterns[$j], 0, 1 ) ne
20005 substr( $$rpatterns[$j], 0, 1 ) )
20010 # If we pass that test, we'll call it a marginal match.
20011 # Here is an example of a marginal match:
20013 # $op = compile_bblock($op);
20014 # The left tokens are both identifiers, but
20015 # one accesses a hash and the other doesn't.
20016 # We'll let this be a tentative match and undo
20017 # it later if we don't find more than 2 lines
20019 elsif ( $maximum_line_index == 0 ) {
20021 2; # =2 prevents being undone below
20026 # Don't let line with fewer fields increase column widths
20028 if ( $maximum_field_index > $jmax ) {
20030 # Exception: suspend this rule to allow last lines to join
20031 if ( $pad > 0 ) { goto NO_MATCH; }
20033 } ## end for my $j ( 0 .. $jlimit)
20035 # Turn off the "marginal match" flag in some cases...
20036 # A "marginal match" occurs when the alignment tokens agree
20037 # but there are differences in the other tokens (patterns).
20038 # If we leave the marginal match flag set, then the rule is that we
20039 # will align only if there are more than two lines in the group.
20040 # We will turn of the flag if we almost have a match
20041 # and either we have seen a good alignment token or we
20042 # just need a small pad (2 spaces) to fit. These rules are
20043 # the result of experimentation. Tokens which misaligned by just
20044 # one or two characters are annoying. On the other hand,
20045 # large gaps to less important alignment tokens are also annoying.
20046 if ( $marginal_match == 1
20047 && $jmax == $maximum_field_index
20048 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
20051 $marginal_match = 0;
20053 ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
20056 # We have a match (even if marginal).
20057 # If the current line has fewer fields than the current group
20058 # but otherwise matches, copy the remaining group fields to
20059 # make it a perfect match.
20060 if ( $maximum_field_index > $jmax ) {
20061 my $comment = $$rfields[$jmax];
20062 for $jmax ( $jlimit .. $maximum_field_index ) {
20063 $$rtokens[$jmax] = $$old_rtokens[$jmax];
20064 $$rfields[ ++$jmax ] = '';
20065 $$rpatterns[$jmax] = $$old_rpatterns[$jmax];
20067 $$rfields[$jmax] = $comment;
20068 $new_line->set_jmax($jmax);
20073 ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
20081 return unless ( $maximum_line_index >= 0 );
20082 my $new_line = shift;
20083 my $old_line = shift;
20085 my $jmax = $new_line->get_jmax();
20086 my $leading_space_count = $new_line->get_leading_space_count();
20087 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
20088 my $rtokens = $new_line->get_rtokens();
20089 my $rfields = $new_line->get_rfields();
20090 my $rpatterns = $new_line->get_rpatterns();
20092 my $group_list_type = $group_lines[0]->get_list_type();
20094 my $padding_so_far = 0;
20095 my $padding_available = $old_line->get_available_space_on_right();
20097 # save current columns in case this doesn't work
20098 save_alignment_columns();
20100 my ( $j, $pad, $eight );
20101 my $maximum_field_index = $old_line->get_jmax();
20102 for $j ( 0 .. $jmax ) {
20104 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
20107 $pad += $leading_space_count;
20110 # remember largest gap of the group, excluding gap to side comment
20112 && $group_maximum_gap < -$pad
20114 && $j < $jmax - 1 )
20116 $group_maximum_gap = -$pad;
20121 ## This patch helps sometimes, but it doesn't check to see if
20122 ## the line is too long even without the side comment. It needs
20124 ##don't let a long token with no trailing side comment push
20125 ##side comments out, or end a group. (sidecmt1.t)
20126 ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
20128 # This line will need space; lets see if we want to accept it..
20131 # not if this won't fit
20132 ( $pad > $padding_available )
20134 # previously, there were upper bounds placed on padding here
20135 # (maximum_whitespace_columns), but they were not really helpful
20140 # revert to starting state then flush; things didn't work out
20141 restore_alignment_columns();
20146 # patch to avoid excessive gaps in previous lines,
20147 # due to a line of fewer fields.
20148 # return join( ".",
20149 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
20150 # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
20151 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
20153 # looks ok, squeeze this field in
20154 $old_line->increase_field_width( $j, $pad );
20155 $padding_available -= $pad;
20157 # remember largest gap of the group, excluding gap to side comment
20158 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
20159 $group_maximum_gap = $pad;
20166 # The current line either starts a new alignment group or is
20167 # accepted into the current alignment group.
20168 my $new_line = shift;
20169 $group_lines[ ++$maximum_line_index ] = $new_line;
20171 # initialize field lengths if starting new group
20172 if ( $maximum_line_index == 0 ) {
20174 my $jmax = $new_line->get_jmax();
20175 my $rfields = $new_line->get_rfields();
20176 my $rtokens = $new_line->get_rtokens();
20178 my $col = $new_line->get_leading_space_count();
20180 for $j ( 0 .. $jmax ) {
20181 $col += length( $$rfields[$j] );
20183 # create initial alignments for the new group
20185 if ( $j < $jmax ) { $token = $$rtokens[$j] }
20186 my $alignment = make_alignment( $col, $token );
20187 $new_line->set_alignment( $j, $alignment );
20190 $maximum_jmax_seen = $jmax;
20191 $minimum_jmax_seen = $jmax;
20194 # use previous alignments otherwise
20196 my @new_alignments =
20197 $group_lines[ $maximum_line_index - 1 ]->get_alignments();
20198 $new_line->set_alignments(@new_alignments);
20201 # remember group jmax extremes for next call to append_line
20202 $previous_minimum_jmax_seen = $minimum_jmax_seen;
20203 $previous_maximum_jmax_seen = $maximum_jmax_seen;
20208 # debug routine to dump array contents
20213 # flush() sends the current Perl::Tidy::VerticalAligner group down the
20214 # pipeline to Perl::Tidy::FileWriter.
20216 # This is the external flush, which also empties the cache
20219 if ( $maximum_line_index < 0 ) {
20220 if ($cached_line_type) {
20221 $seqno_string = $cached_seqno_string;
20222 entab_and_output( $cached_line_text,
20223 $cached_line_leading_space_count,
20224 $last_group_level_written );
20225 $cached_line_type = 0;
20226 $cached_line_text = "";
20227 $cached_seqno_string = "";
20235 # This is the internal flush, which leaves the cache intact
20238 return if ( $maximum_line_index < 0 );
20240 # handle a group of comment lines
20241 if ( $group_type eq 'COMMENT' ) {
20243 VALIGN_DEBUG_FLAG_APPEND0 && do {
20244 my ( $a, $b, $c ) = caller();
20246 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
20249 my $leading_space_count = $comment_leading_space_count;
20250 my $leading_string = get_leading_string($leading_space_count);
20252 # zero leading space count if any lines are too long
20253 my $max_excess = 0;
20254 for my $i ( 0 .. $maximum_line_index ) {
20255 my $str = $group_lines[$i];
20257 length($str) + $leading_space_count - $rOpts_maximum_line_length;
20258 if ( $excess > $max_excess ) {
20259 $max_excess = $excess;
20263 if ( $max_excess > 0 ) {
20264 $leading_space_count -= $max_excess;
20265 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
20266 $last_outdented_line_at =
20267 $file_writer_object->get_output_line_number();
20268 unless ($outdented_line_count) {
20269 $first_outdented_line_at = $last_outdented_line_at;
20271 $outdented_line_count += ( $maximum_line_index + 1 );
20274 # write the group of lines
20275 my $outdent_long_lines = 0;
20276 for my $i ( 0 .. $maximum_line_index ) {
20277 write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
20278 $outdent_long_lines, "" );
20282 # handle a group of code lines
20285 VALIGN_DEBUG_FLAG_APPEND0 && do {
20286 my $group_list_type = $group_lines[0]->get_list_type();
20287 my ( $a, $b, $c ) = caller();
20288 my $maximum_field_index = $group_lines[0]->get_jmax();
20290 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
20294 # some small groups are best left unaligned
20295 my $do_not_align = decide_if_aligned();
20297 # optimize side comment location
20298 $do_not_align = adjust_side_comment($do_not_align);
20300 # recover spaces for -lp option if possible
20301 my $extra_leading_spaces = get_extra_leading_spaces();
20303 # all lines of this group have the same basic leading spacing
20304 my $group_leader_length = $group_lines[0]->get_leading_space_count();
20306 # add extra leading spaces if helpful
20307 my $min_ci_gap = improve_continuation_indentation( $do_not_align,
20308 $group_leader_length );
20310 # loop to output all lines
20311 for my $i ( 0 .. $maximum_line_index ) {
20312 my $line = $group_lines[$i];
20313 write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
20314 $group_leader_length, $extra_leading_spaces );
20317 initialize_for_new_group();
20320 sub decide_if_aligned {
20322 # Do not try to align two lines which are not really similar
20323 return unless $maximum_line_index == 1;
20324 return if ($is_matching_terminal_line);
20326 my $group_list_type = $group_lines[0]->get_list_type();
20328 my $do_not_align = (
20330 # always align lists
20335 # don't align if it was just a marginal match
20338 # don't align two lines with big gap
20339 || $group_maximum_gap > 12
20341 # or lines with differing number of alignment tokens
20342 # TODO: this could be improved. It occasionally rejects
20344 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
20348 # But try to convert them into a simple comment group if the first line
20349 # a has side comment
20350 my $rfields = $group_lines[0]->get_rfields();
20351 my $maximum_field_index = $group_lines[0]->get_jmax();
20353 && ( $maximum_line_index > 0 )
20354 && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
20359 return $do_not_align;
20362 sub adjust_side_comment {
20364 my $do_not_align = shift;
20366 # let's see if we can move the side comment field out a little
20367 # to improve readability (the last field is always a side comment field)
20368 my $have_side_comment = 0;
20369 my $first_side_comment_line = -1;
20370 my $maximum_field_index = $group_lines[0]->get_jmax();
20371 for my $i ( 0 .. $maximum_line_index ) {
20372 my $line = $group_lines[$i];
20374 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
20375 $have_side_comment = 1;
20376 $first_side_comment_line = $i;
20381 my $kmax = $maximum_field_index + 1;
20383 if ($have_side_comment) {
20385 my $line = $group_lines[0];
20387 # the maximum space without exceeding the line length:
20388 my $avail = $line->get_available_space_on_right();
20390 # try to use the previous comment column
20391 my $side_comment_column = $line->get_column( $kmax - 2 );
20392 my $move = $last_comment_column - $side_comment_column;
20394 ## my $sc_line0 = $side_comment_history[0]->[0];
20395 ## my $sc_col0 = $side_comment_history[0]->[1];
20396 ## my $sc_line1 = $side_comment_history[1]->[0];
20397 ## my $sc_col1 = $side_comment_history[1]->[1];
20398 ## my $sc_line2 = $side_comment_history[2]->[0];
20399 ## my $sc_col2 = $side_comment_history[2]->[1];
20401 ## # FUTURE UPDATES:
20402 ## # Be sure to ignore 'do not align' and '} # end comments'
20403 ## # Find first $move > 0 and $move <= $avail as follows:
20404 ## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
20405 ## # 2. try sc_col2 if (line-sc_line2) < 12
20406 ## # 3. try min possible space, plus up to 8,
20407 ## # 4. try min possible space
20409 if ( $kmax > 0 && !$do_not_align ) {
20411 # but if this doesn't work, give up and use the minimum space
20412 if ( $move > $avail ) {
20413 $move = $rOpts_minimum_space_to_comment - 1;
20416 # but we want some minimum space to the comment
20417 my $min_move = $rOpts_minimum_space_to_comment - 1;
20419 && $last_side_comment_length > 0
20420 && ( $first_side_comment_line == 0 )
20421 && $group_level == $last_group_level_written )
20426 if ( $move < $min_move ) {
20430 # prevously, an upper bound was placed on $move here,
20431 # (maximum_space_to_comment), but it was not helpful
20433 # don't exceed the available space
20434 if ( $move > $avail ) { $move = $avail }
20436 # we can only increase space, never decrease
20438 $line->increase_field_width( $maximum_field_index - 1, $move );
20441 # remember this column for the next group
20442 $last_comment_column = $line->get_column( $kmax - 2 );
20446 # try to at least line up the existing side comment location
20447 if ( $kmax > 0 && $move > 0 && $move < $avail ) {
20448 $line->increase_field_width( $maximum_field_index - 1, $move );
20452 # reset side comment column if we can't align
20454 forget_side_comment();
20458 return $do_not_align;
20461 sub improve_continuation_indentation {
20462 my ( $do_not_align, $group_leader_length ) = @_;
20464 # See if we can increase the continuation indentation
20465 # to move all continuation lines closer to the next field
20466 # (unless it is a comment).
20468 # '$min_ci_gap'is the extra indentation that we may need to introduce.
20469 # We will only introduce this to fields which already have some ci.
20470 # Without this variable, we would occasionally get something like this
20473 # use overload '+' => \&plus,
20475 # '*' => \&multiply,
20478 # 'atan2' => \&atan2,
20480 # Whereas with this variable, we can shift variables over to get this:
20482 # use overload '+' => \&plus,
20484 # '*' => \&multiply,
20487 # 'atan2' => \&atan2,
20489 ## BUB: Deactivated####################
20490 # The trouble with this patch is that it may, for example,
20491 # move in some 'or's or ':'s, and leave some out, so that the
20492 # left edge alignment suffers.
20494 ###########################################
20496 my $maximum_field_index = $group_lines[0]->get_jmax();
20498 my $min_ci_gap = $rOpts_maximum_line_length;
20499 if ( $maximum_field_index > 1 && !$do_not_align ) {
20501 for my $i ( 0 .. $maximum_line_index ) {
20502 my $line = $group_lines[$i];
20503 my $leading_space_count = $line->get_leading_space_count();
20504 my $rfields = $line->get_rfields();
20507 $line->get_column(0) -
20508 $leading_space_count -
20509 length( $$rfields[0] );
20511 if ( $leading_space_count > $group_leader_length ) {
20512 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
20516 if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
20523 return $min_ci_gap;
20526 sub write_vertically_aligned_line {
20528 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
20529 $extra_leading_spaces )
20531 my $rfields = $line->get_rfields();
20532 my $leading_space_count = $line->get_leading_space_count();
20533 my $outdent_long_lines = $line->get_outdent_long_lines();
20534 my $maximum_field_index = $line->get_jmax();
20535 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
20537 # add any extra spaces
20538 if ( $leading_space_count > $group_leader_length ) {
20539 $leading_space_count += $min_ci_gap;
20542 my $str = $$rfields[0];
20544 # loop to concatenate all fields of this line and needed padding
20545 my $total_pad_count = 0;
20547 for $j ( 1 .. $maximum_field_index ) {
20549 # skip zero-length side comments
20551 if ( ( $j == $maximum_field_index )
20552 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
20555 # compute spaces of padding before this field
20556 my $col = $line->get_column( $j - 1 );
20557 $pad = $col - ( length($str) + $leading_space_count );
20559 if ($do_not_align) {
20561 ( $j < $maximum_field_index )
20563 : $rOpts_minimum_space_to_comment - 1;
20566 # if the -fpsc flag is set, move the side comment to the selected
20567 # column if and only if it is possible, ignoring constraints on
20568 # line length and minimum space to comment
20569 if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
20571 my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
20572 if ( $newpad >= 0 ) { $pad = $newpad; }
20575 # accumulate the padding
20576 if ( $pad > 0 ) { $total_pad_count += $pad; }
20579 if ( !defined $$rfields[$j] ) {
20580 write_diagnostics("UNDEFined field at j=$j\n");
20583 # only add padding when we have a finite field;
20584 # this avoids extra terminal spaces if we have empty fields
20585 if ( length( $$rfields[$j] ) > 0 ) {
20586 $str .= ' ' x $total_pad_count;
20587 $total_pad_count = 0;
20588 $str .= $$rfields[$j];
20591 $total_pad_count = 0;
20594 # update side comment history buffer
20595 if ( $j == $maximum_field_index ) {
20596 my $lineno = $file_writer_object->get_output_line_number();
20597 shift @side_comment_history;
20598 push @side_comment_history, [ $lineno, $col ];
20602 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
20604 # ship this line off
20605 write_leader_and_string( $leading_space_count + $extra_leading_spaces,
20606 $str, $side_comment_length, $outdent_long_lines,
20607 $rvertical_tightness_flags );
20610 sub get_extra_leading_spaces {
20612 #----------------------------------------------------------
20613 # Define any extra indentation space (for the -lp option).
20615 # If a list has side comments, sub scan_list must dump the
20616 # list before it sees everything. When this happens, it sets
20617 # the indentation to the standard scheme, but notes how
20618 # many spaces it would have liked to use. We may be able
20619 # to recover that space here in the event that that all of the
20620 # lines of a list are back together again.
20621 #----------------------------------------------------------
20623 my $extra_leading_spaces = 0;
20624 if ($extra_indent_ok) {
20625 my $object = $group_lines[0]->get_indentation();
20626 if ( ref($object) ) {
20627 my $extra_indentation_spaces_wanted =
20628 get_RECOVERABLE_SPACES($object);
20630 # all indentation objects must be the same
20632 for $i ( 1 .. $maximum_line_index ) {
20633 if ( $object != $group_lines[$i]->get_indentation() ) {
20634 $extra_indentation_spaces_wanted = 0;
20639 if ($extra_indentation_spaces_wanted) {
20641 # the maximum space without exceeding the line length:
20642 my $avail = $group_lines[0]->get_available_space_on_right();
20643 $extra_leading_spaces =
20644 ( $avail > $extra_indentation_spaces_wanted )
20645 ? $extra_indentation_spaces_wanted
20648 # update the indentation object because with -icp the terminal
20649 # ');' will use the same adjustment.
20650 $object->permanently_decrease_AVAILABLE_SPACES(
20651 -$extra_leading_spaces );
20655 return $extra_leading_spaces;
20658 sub combine_fields {
20660 # combine all fields except for the comment field ( sidecmt.t )
20661 # Uses global variables:
20663 # $maximum_line_index
20665 my $maximum_field_index = $group_lines[0]->get_jmax();
20666 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
20667 my $line = $group_lines[$j];
20668 my $rfields = $line->get_rfields();
20669 foreach ( 1 .. $maximum_field_index - 1 ) {
20670 $$rfields[0] .= $$rfields[$_];
20672 $$rfields[1] = $$rfields[$maximum_field_index];
20674 $line->set_jmax(1);
20675 $line->set_column( 0, 0 );
20676 $line->set_column( 1, 0 );
20679 $maximum_field_index = 1;
20681 for $j ( 0 .. $maximum_line_index ) {
20682 my $line = $group_lines[$j];
20683 my $rfields = $line->get_rfields();
20684 for $k ( 0 .. $maximum_field_index ) {
20685 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
20687 $pad += $group_lines[$j]->get_leading_space_count();
20690 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
20696 sub get_output_line_number {
20698 # the output line number reported to a caller is the number of items
20699 # written plus the number of items in the buffer
20701 1 + $maximum_line_index + $file_writer_object->get_output_line_number();
20704 sub write_leader_and_string {
20706 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
20707 $rvertical_tightness_flags )
20710 # handle outdenting of long lines:
20711 if ($outdent_long_lines) {
20714 $side_comment_length +
20715 $leading_space_count -
20716 $rOpts_maximum_line_length;
20717 if ( $excess > 0 ) {
20718 $leading_space_count = 0;
20719 $last_outdented_line_at =
20720 $file_writer_object->get_output_line_number();
20722 unless ($outdented_line_count) {
20723 $first_outdented_line_at = $last_outdented_line_at;
20725 $outdented_line_count++;
20729 # Make preliminary leading whitespace. It could get changed
20730 # later by entabbing, so we have to keep track of any changes
20731 # to the leading_space_count from here on.
20732 my $leading_string =
20733 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
20735 # Unpack any recombination data; it was packed by
20736 # sub send_lines_to_vertical_aligner. Contents:
20738 # [0] type: 1=opening 2=closing 3=opening block brace
20739 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
20740 # if closing: spaces of padding to use
20741 # [2] sequence number of container
20742 # [3] valid flag: do not append if this flag is false
20744 my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20746 if ($rvertical_tightness_flags) {
20748 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20750 ) = @{$rvertical_tightness_flags};
20753 $seqno_string = $seqno_end;
20755 # handle any cached line ..
20756 # either append this line to it or write it out
20757 if ( length($cached_line_text) ) {
20759 if ( !$cached_line_valid ) {
20760 entab_and_output( $cached_line_text,
20761 $cached_line_leading_space_count,
20762 $last_group_level_written );
20765 # handle cached line with opening container token
20766 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
20768 my $gap = $leading_space_count - length($cached_line_text);
20770 # handle option of just one tight opening per line:
20771 if ( $cached_line_flag == 1 ) {
20772 if ( defined($open_or_close) && $open_or_close == 1 ) {
20778 $leading_string = $cached_line_text . ' ' x $gap;
20779 $leading_space_count = $cached_line_leading_space_count;
20780 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
20783 entab_and_output( $cached_line_text,
20784 $cached_line_leading_space_count,
20785 $last_group_level_written );
20789 # handle cached line to place before this closing container token
20791 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
20793 if ( length($test_line) <= $rOpts_maximum_line_length ) {
20795 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
20797 # Patch to outdent closing tokens ending # in ');'
20798 # If we are joining a line like ');' to a previous stacked
20799 # set of closing tokens, then decide if we may outdent the
20800 # combined stack to the indentation of the ');'. Since we
20801 # should not normally outdent any of the other tokens more than
20802 # the indentation of the lines that contained them, we will
20803 # only do this if all of the corresponding opening
20804 # tokens were on the same line. This can happen with
20805 # -sot and -sct. For example, it is ok here:
20806 # __PACKAGE__->load_components( qw(
20811 # But, for example, we do not outdent in this example because
20812 # that would put the closing sub brace out farther than the
20813 # opening sub brace:
20815 # perltidy -sot -sct
20817 # '<Control-f>' => sub {
20819 # my $e = $c->XEvent;
20820 # itemsUnderArea $c;
20823 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
20825 # The way to tell this is if the stacked sequence numbers
20826 # of this output line are the reverse of the stacked
20827 # sequence numbers of the previous non-blank line of
20828 # sequence numbers. So we can join if the previous
20829 # nonblank string of tokens is the mirror image. For
20830 # example if stack )}] is 13:8:6 then we are looking for a
20831 # leading stack like [{( which is 6:8:13 We only need to
20832 # check the two ends, because the intermediate tokens must
20833 # fall in order. Note on speed: having to split on colons
20834 # and eliminate multiple colons might appear to be slow,
20835 # but it's not an issue because we almost never come
20836 # through here. In a typical file we don't.
20837 $seqno_string =~ s/^:+//;
20838 $last_nonblank_seqno_string =~ s/^:+//;
20839 $seqno_string =~ s/:+/:/g;
20840 $last_nonblank_seqno_string =~ s/:+/:/g;
20842 # how many spaces can we outdent?
20844 $cached_line_leading_space_count - $leading_space_count;
20846 && length($seqno_string)
20847 && length($last_nonblank_seqno_string) ==
20848 length($seqno_string) )
20851 ( split ':', $last_nonblank_seqno_string );
20852 my @seqno_now = ( split ':', $seqno_string );
20853 if ( $seqno_now[-1] == $seqno_last[0]
20854 && $seqno_now[0] == $seqno_last[-1] )
20858 # for absolute safety, be sure we only remove
20860 my $ws = substr( $test_line, 0, $diff );
20861 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
20863 $test_line = substr( $test_line, $diff );
20864 $cached_line_leading_space_count -= $diff;
20867 # shouldn't happen, but not critical:
20869 ## ERROR transferring indentation here
20876 $leading_string = "";
20877 $leading_space_count = $cached_line_leading_space_count;
20880 entab_and_output( $cached_line_text,
20881 $cached_line_leading_space_count,
20882 $last_group_level_written );
20886 $cached_line_type = 0;
20887 $cached_line_text = "";
20889 # make the line to be written
20890 my $line = $leading_string . $str;
20892 # write or cache this line
20893 if ( !$open_or_close || $side_comment_length > 0 ) {
20894 entab_and_output( $line, $leading_space_count, $group_level );
20897 $cached_line_text = $line;
20898 $cached_line_type = $open_or_close;
20899 $cached_line_flag = $tightness_flag;
20900 $cached_seqno = $seqno;
20901 $cached_line_valid = $valid;
20902 $cached_line_leading_space_count = $leading_space_count;
20903 $cached_seqno_string = $seqno_string;
20906 $last_group_level_written = $group_level;
20907 $last_side_comment_length = $side_comment_length;
20908 $extra_indent_ok = 0;
20911 sub entab_and_output {
20912 my ( $line, $leading_space_count, $level ) = @_;
20914 # The line is currently correct if there is no tabbing (recommended!)
20915 # We may have to lop off some leading spaces and replace with tabs.
20916 if ( $leading_space_count > 0 ) {
20918 # Nothing to do if no tabs
20919 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
20920 || $rOpts_indent_columns <= 0 )
20926 # Handle entab option
20927 elsif ($rOpts_entab_leading_whitespace) {
20929 $leading_space_count % $rOpts_entab_leading_whitespace;
20931 int( $leading_space_count / $rOpts_entab_leading_whitespace );
20932 my $leading_string = "\t" x $tab_count . ' ' x $space_count;
20933 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20934 substr( $line, 0, $leading_space_count ) = $leading_string;
20938 # shouldn't happen - program error counting whitespace
20939 # we'll skip entabbing
20941 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20946 # Handle option of one tab per level
20948 my $leading_string = ( "\t" x $level );
20950 $leading_space_count - $level * $rOpts_indent_columns;
20952 # shouldn't happen:
20953 if ( $space_count < 0 ) {
20955 "Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
20957 $leading_string = ( ' ' x $leading_space_count );
20960 $leading_string .= ( ' ' x $space_count );
20962 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20963 substr( $line, 0, $leading_space_count ) = $leading_string;
20967 # shouldn't happen - program error counting whitespace
20968 # we'll skip entabbing
20970 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20975 $file_writer_object->write_code_line( $line . "\n" );
20976 if ($seqno_string) {
20977 $last_nonblank_seqno_string = $seqno_string;
20981 { # begin get_leading_string
20983 my @leading_string_cache;
20985 sub get_leading_string {
20987 # define the leading whitespace string for this line..
20988 my $leading_whitespace_count = shift;
20990 # Handle case of zero whitespace, which includes multi-line quotes
20991 # (which may have a finite level; this prevents tab problems)
20992 if ( $leading_whitespace_count <= 0 ) {
20996 # look for previous result
20997 elsif ( $leading_string_cache[$leading_whitespace_count] ) {
20998 return $leading_string_cache[$leading_whitespace_count];
21001 # must compute a string for this number of spaces
21002 my $leading_string;
21004 # Handle simple case of no tabs
21005 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
21006 || $rOpts_indent_columns <= 0 )
21008 $leading_string = ( ' ' x $leading_whitespace_count );
21011 # Handle entab option
21012 elsif ($rOpts_entab_leading_whitespace) {
21014 $leading_whitespace_count % $rOpts_entab_leading_whitespace;
21015 my $tab_count = int(
21016 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
21017 $leading_string = "\t" x $tab_count . ' ' x $space_count;
21020 # Handle option of one tab per level
21022 $leading_string = ( "\t" x $group_level );
21024 $leading_whitespace_count - $group_level * $rOpts_indent_columns;
21026 # shouldn't happen:
21027 if ( $space_count < 0 ) {
21029 "Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
21031 $leading_string = ( ' ' x $leading_whitespace_count );
21034 $leading_string .= ( ' ' x $space_count );
21037 $leading_string_cache[$leading_whitespace_count] = $leading_string;
21038 return $leading_string;
21040 } # end get_leading_string
21042 sub report_anything_unusual {
21044 if ( $outdented_line_count > 0 ) {
21045 write_logfile_entry(
21046 "$outdented_line_count long lines were outdented:\n");
21047 write_logfile_entry(
21048 " First at output line $first_outdented_line_at\n");
21050 if ( $outdented_line_count > 1 ) {
21051 write_logfile_entry(
21052 " Last at output line $last_outdented_line_at\n");
21054 write_logfile_entry(
21055 " use -noll to prevent outdenting, -l=n to increase line length\n"
21057 write_logfile_entry("\n");
21061 #####################################################################
21063 # the Perl::Tidy::FileWriter class writes the output file
21065 #####################################################################
21067 package Perl::Tidy::FileWriter;
21069 # Maximum number of little messages; probably need not be changed.
21070 use constant MAX_NAG_MESSAGES => 6;
21072 sub write_logfile_entry {
21074 my $logger_object = $self->{_logger_object};
21075 if ($logger_object) {
21076 $logger_object->write_logfile_entry(@_);
21082 my ( $line_sink_object, $rOpts, $logger_object ) = @_;
21085 _line_sink_object => $line_sink_object,
21086 _logger_object => $logger_object,
21088 _output_line_number => 1,
21089 _consecutive_blank_lines => 0,
21090 _consecutive_nonblank_lines => 0,
21091 _first_line_length_error => 0,
21092 _max_line_length_error => 0,
21093 _last_line_length_error => 0,
21094 _first_line_length_error_at => 0,
21095 _max_line_length_error_at => 0,
21096 _last_line_length_error_at => 0,
21097 _line_length_error_count => 0,
21098 _max_output_line_length => 0,
21099 _max_output_line_length_at => 0,
21105 $self->{_line_sink_object}->tee_on();
21110 $self->{_line_sink_object}->tee_off();
21113 sub get_output_line_number {
21115 return $self->{_output_line_number};
21118 sub decrement_output_line_number {
21120 $self->{_output_line_number}--;
21123 sub get_consecutive_nonblank_lines {
21125 return $self->{_consecutive_nonblank_lines};
21128 sub reset_consecutive_blank_lines {
21130 $self->{_consecutive_blank_lines} = 0;
21133 sub want_blank_line {
21135 unless ( $self->{_consecutive_blank_lines} ) {
21136 $self->write_blank_code_line();
21140 sub require_blank_code_lines {
21142 # write out the requested number of blanks regardless of the value of -mbl
21143 # unless -mbl=0. This allows extra blank lines to be written for subs and
21144 # packages even with the default -mbl=1
21147 my $need = $count - $self->{_consecutive_blank_lines};
21148 my $rOpts = $self->{_rOpts};
21149 my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
21150 for ( my $i = 0 ; $i < $need ; $i++ ) {
21151 $self->write_blank_code_line($forced);
21155 sub write_blank_code_line {
21157 my $forced = shift;
21158 my $rOpts = $self->{_rOpts};
21161 && $self->{_consecutive_blank_lines} >=
21162 $rOpts->{'maximum-consecutive-blank-lines'} );
21163 $self->{_consecutive_blank_lines}++;
21164 $self->{_consecutive_nonblank_lines} = 0;
21165 $self->write_line("\n");
21168 sub write_code_line {
21172 if ( $a =~ /^\s*$/ ) {
21173 my $rOpts = $self->{_rOpts};
21175 if ( $self->{_consecutive_blank_lines} >=
21176 $rOpts->{'maximum-consecutive-blank-lines'} );
21177 $self->{_consecutive_blank_lines}++;
21178 $self->{_consecutive_nonblank_lines} = 0;
21181 $self->{_consecutive_blank_lines} = 0;
21182 $self->{_consecutive_nonblank_lines}++;
21184 $self->write_line($a);
21191 # TODO: go through and see if the test is necessary here
21192 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
21194 $self->{_line_sink_object}->write_line($a);
21196 # This calculation of excess line length ignores any internal tabs
21197 my $rOpts = $self->{_rOpts};
21198 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
21199 if ( $a =~ /^\t+/g ) {
21200 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
21203 # Note that we just incremented output line number to future value
21204 # so we must subtract 1 for current line number
21205 if ( length($a) > 1 + $self->{_max_output_line_length} ) {
21206 $self->{_max_output_line_length} = length($a) - 1;
21207 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
21210 if ( $exceed > 0 ) {
21211 my $output_line_number = $self->{_output_line_number};
21212 $self->{_last_line_length_error} = $exceed;
21213 $self->{_last_line_length_error_at} = $output_line_number - 1;
21214 if ( $self->{_line_length_error_count} == 0 ) {
21215 $self->{_first_line_length_error} = $exceed;
21216 $self->{_first_line_length_error_at} = $output_line_number - 1;
21220 $self->{_last_line_length_error} > $self->{_max_line_length_error} )
21222 $self->{_max_line_length_error} = $exceed;
21223 $self->{_max_line_length_error_at} = $output_line_number - 1;
21226 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
21227 $self->write_logfile_entry(
21228 "Line length exceeded by $exceed characters\n");
21230 $self->{_line_length_error_count}++;
21235 sub report_line_length_errors {
21237 my $rOpts = $self->{_rOpts};
21238 my $line_length_error_count = $self->{_line_length_error_count};
21239 if ( $line_length_error_count == 0 ) {
21240 $self->write_logfile_entry(
21241 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
21242 my $max_output_line_length = $self->{_max_output_line_length};
21243 my $max_output_line_length_at = $self->{_max_output_line_length_at};
21244 $self->write_logfile_entry(
21245 " Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
21251 my $word = ( $line_length_error_count > 1 ) ? "s" : "";
21252 $self->write_logfile_entry(
21253 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
21256 $word = ( $line_length_error_count > 1 ) ? "First" : "";
21257 my $first_line_length_error = $self->{_first_line_length_error};
21258 my $first_line_length_error_at = $self->{_first_line_length_error_at};
21259 $self->write_logfile_entry(
21260 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
21263 if ( $line_length_error_count > 1 ) {
21264 my $max_line_length_error = $self->{_max_line_length_error};
21265 my $max_line_length_error_at = $self->{_max_line_length_error_at};
21266 my $last_line_length_error = $self->{_last_line_length_error};
21267 my $last_line_length_error_at = $self->{_last_line_length_error_at};
21268 $self->write_logfile_entry(
21269 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
21271 $self->write_logfile_entry(
21272 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
21278 #####################################################################
21280 # The Perl::Tidy::Debugger class shows line tokenization
21282 #####################################################################
21284 package Perl::Tidy::Debugger;
21288 my ( $class, $filename ) = @_;
21291 _debug_file => $filename,
21292 _debug_file_opened => 0,
21297 sub really_open_debug_file {
21300 my $debug_file = $self->{_debug_file};
21302 unless ( $fh = IO::File->new("> $debug_file") ) {
21303 warn("can't open $debug_file: $!\n");
21305 $self->{_debug_file_opened} = 1;
21306 $self->{_fh} = $fh;
21308 "Use -dump-token-types (-dtt) to get a list of token type codes\n";
21311 sub close_debug_file {
21314 my $fh = $self->{_fh};
21315 if ( $self->{_debug_file_opened} ) {
21317 eval { $self->{_fh}->close() };
21321 sub write_debug_entry {
21323 # This is a debug dump routine which may be modified as necessary
21324 # to dump tokens on a line-by-line basis. The output will be written
21325 # to the .DEBUG file when the -D flag is entered.
21327 my $line_of_tokens = shift;
21329 my $input_line = $line_of_tokens->{_line_text};
21330 my $rtoken_type = $line_of_tokens->{_rtoken_type};
21331 my $rtokens = $line_of_tokens->{_rtokens};
21332 my $rlevels = $line_of_tokens->{_rlevels};
21333 my $rslevels = $line_of_tokens->{_rslevels};
21334 my $rblock_type = $line_of_tokens->{_rblock_type};
21335 my $input_line_number = $line_of_tokens->{_line_number};
21336 my $line_type = $line_of_tokens->{_line_type};
21340 my $token_str = "$input_line_number: ";
21341 my $reconstructed_original = "$input_line_number: ";
21342 my $block_str = "$input_line_number: ";
21344 #$token_str .= "$line_type: ";
21345 #$reconstructed_original .= "$line_type: ";
21348 my @next_char = ( '"', '"' );
21350 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
21351 my $fh = $self->{_fh};
21353 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
21356 if ( $$rtoken_type[$j] eq 'k' ) {
21357 $pattern .= $$rtokens[$j];
21360 $pattern .= $$rtoken_type[$j];
21362 $reconstructed_original .= $$rtokens[$j];
21363 $block_str .= "($$rblock_type[$j])";
21364 $num = length( $$rtokens[$j] );
21365 my $type_str = $$rtoken_type[$j];
21367 # be sure there are no blank tokens (shouldn't happen)
21368 # This can only happen if a programming error has been made
21369 # because all valid tokens are non-blank
21370 if ( $type_str eq ' ' ) {
21371 print $fh "BLANK TOKEN on the next line\n";
21372 $type_str = $next_char[$i_next];
21373 $i_next = 1 - $i_next;
21376 if ( length($type_str) == 1 ) {
21377 $type_str = $type_str x $num;
21379 $token_str .= $type_str;
21382 # Write what you want here ...
21383 # print $fh "$input_line\n";
21384 # print $fh "$pattern\n";
21385 print $fh "$reconstructed_original\n";
21386 print $fh "$token_str\n";
21388 #print $fh "$block_str\n";
21391 #####################################################################
21393 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
21394 # method for returning the next line to be parsed, as well as a
21395 # 'peek_ahead()' method
21397 # The input parameter is an object with a 'get_line()' method
21398 # which returns the next line to be parsed
21400 #####################################################################
21402 package Perl::Tidy::LineBuffer;
21407 my $line_source_object = shift;
21410 _line_source_object => $line_source_object,
21411 _rlookahead_buffer => [],
21417 my $buffer_index = shift;
21419 my $line_source_object = $self->{_line_source_object};
21420 my $rlookahead_buffer = $self->{_rlookahead_buffer};
21421 if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
21422 $line = $$rlookahead_buffer[$buffer_index];
21425 $line = $line_source_object->get_line();
21426 push( @$rlookahead_buffer, $line );
21434 my $line_source_object = $self->{_line_source_object};
21435 my $rlookahead_buffer = $self->{_rlookahead_buffer};
21437 if ( scalar(@$rlookahead_buffer) ) {
21438 $line = shift @$rlookahead_buffer;
21441 $line = $line_source_object->get_line();
21446 ########################################################################
21448 # the Perl::Tidy::Tokenizer package is essentially a filter which
21449 # reads lines of perl source code from a source object and provides
21450 # corresponding tokenized lines through its get_line() method. Lines
21451 # flow from the source_object to the caller like this:
21453 # source_object --> LineBuffer_object --> Tokenizer --> calling routine
21454 # get_line() get_line() get_line() line_of_tokens
21456 # The source object can be any object with a get_line() method which
21457 # supplies one line (a character string) perl call.
21458 # The LineBuffer object is created by the Tokenizer.
21459 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
21460 # containing one tokenized line for each call to its get_line() method.
21462 # WARNING: This is not a real class yet. Only one tokenizer my be used.
21464 ########################################################################
21466 package Perl::Tidy::Tokenizer;
21470 # Caution: these debug flags produce a lot of output
21471 # They should all be 0 except when debugging small scripts
21473 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
21474 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
21475 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
21476 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
21477 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
21479 my $debug_warning = sub {
21480 print "TOKENIZER_DEBUGGING with key $_[0]\n";
21483 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
21484 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
21485 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
21486 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
21487 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
21493 # PACKAGE VARIABLES for for processing an entire FILE.
21497 $last_nonblank_token
21498 $last_nonblank_type
21499 $last_nonblank_block_type
21507 %user_function_prototype
21509 %is_block_list_function
21510 %saw_function_definition
21514 $square_bracket_depth
21519 @nesting_sequence_number
21520 @current_sequence_number
21522 @paren_semicolon_count
21523 @paren_structural_type
21525 @brace_structural_type
21528 @square_bracket_type
21529 @square_bracket_structural_type
21531 @nested_ternary_flag
21532 @nested_statement_type
21533 @starting_line_of_current_depth
21536 # GLOBAL CONSTANTS for routines in this package
21538 %is_indirect_object_taker
21540 %expecting_operator_token
21541 %expecting_operator_types
21542 %expecting_term_types
21543 %expecting_term_token
21545 %is_file_test_operator
21547 %is_valid_token_type
21549 %is_code_block_token
21551 @opening_brace_names
21552 @closing_brace_names
21553 %is_keyword_taking_list
21554 %is_q_qq_qw_qx_qr_s_y_tr_m
21557 # possible values of operator_expected()
21558 use constant TERM => -1;
21559 use constant UNKNOWN => 0;
21560 use constant OPERATOR => 1;
21562 # possible values of context
21563 use constant SCALAR_CONTEXT => -1;
21564 use constant UNKNOWN_CONTEXT => 0;
21565 use constant LIST_CONTEXT => 1;
21567 # Maximum number of little messages; probably need not be changed.
21568 use constant MAX_NAG_MESSAGES => 6;
21572 # methods to count instances
21574 sub get_count { $_count; }
21575 sub _increment_count { ++$_count }
21576 sub _decrement_count { --$_count }
21580 $_[0]->_decrement_count();
21587 # Note: 'tabs' and 'indent_columns' are temporary and should be
21590 source_object => undef,
21591 debugger_object => undef,
21592 diagnostics_object => undef,
21593 logger_object => undef,
21594 starting_level => undef,
21595 indent_columns => 4,
21597 entab_leading_space => undef,
21598 look_for_hash_bang => 0,
21600 look_for_autoloader => 1,
21601 look_for_selfloader => 1,
21602 starting_line_number => 1,
21604 my %args = ( %defaults, @_ );
21606 # we are given an object with a get_line() method to supply source lines
21607 my $source_object = $args{source_object};
21609 # we create another object with a get_line() and peek_ahead() method
21610 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
21612 # Tokenizer state data is as follows:
21613 # _rhere_target_list reference to list of here-doc targets
21614 # _here_doc_target the target string for a here document
21615 # _here_quote_character the type of here-doc quoting (" ' ` or none)
21616 # to determine if interpolation is done
21617 # _quote_target character we seek if chasing a quote
21618 # _line_start_quote line where we started looking for a long quote
21619 # _in_here_doc flag indicating if we are in a here-doc
21620 # _in_pod flag set if we are in pod documentation
21621 # _in_error flag set if we saw severe error (binary in script)
21622 # _in_data flag set if we are in __DATA__ section
21623 # _in_end flag set if we are in __END__ section
21624 # _in_format flag set if we are in a format description
21625 # _in_attribute_list flag telling if we are looking for attributes
21626 # _in_quote flag telling if we are chasing a quote
21627 # _starting_level indentation level of first line
21628 # _input_tabstr string denoting one indentation level of input file
21629 # _know_input_tabstr flag indicating if we know _input_tabstr
21630 # _line_buffer_object object with get_line() method to supply source code
21631 # _diagnostics_object place to write debugging information
21632 # _unexpected_error_count error count used to limit output
21633 # _lower_case_labels_at line numbers where lower case labels seen
21634 $tokenizer_self = {
21635 _rhere_target_list => [],
21637 _here_doc_target => "",
21638 _here_quote_character => "",
21644 _in_attribute_list => 0,
21646 _quote_target => "",
21647 _line_start_quote => -1,
21648 _starting_level => $args{starting_level},
21649 _know_starting_level => defined( $args{starting_level} ),
21650 _tabs => $args{tabs},
21651 _entab_leading_space => $args{entab_leading_space},
21652 _indent_columns => $args{indent_columns},
21653 _look_for_hash_bang => $args{look_for_hash_bang},
21654 _trim_qw => $args{trim_qw},
21655 _input_tabstr => "",
21656 _know_input_tabstr => -1,
21657 _last_line_number => $args{starting_line_number} - 1,
21658 _saw_perl_dash_P => 0,
21659 _saw_perl_dash_w => 0,
21660 _saw_use_strict => 0,
21661 _saw_v_string => 0,
21662 _look_for_autoloader => $args{look_for_autoloader},
21663 _look_for_selfloader => $args{look_for_selfloader},
21664 _saw_autoloader => 0,
21665 _saw_selfloader => 0,
21666 _saw_hash_bang => 0,
21669 _saw_negative_indentation => 0,
21670 _started_tokenizing => 0,
21671 _line_buffer_object => $line_buffer_object,
21672 _debugger_object => $args{debugger_object},
21673 _diagnostics_object => $args{diagnostics_object},
21674 _logger_object => $args{logger_object},
21675 _unexpected_error_count => 0,
21676 _started_looking_for_here_target_at => 0,
21677 _nearly_matched_here_target_at => undef,
21679 _rlower_case_labels_at => undef,
21682 prepare_for_a_new_file();
21683 find_starting_indentation_level();
21685 bless $tokenizer_self, $class;
21687 # This is not a full class yet, so die if an attempt is made to
21688 # create more than one object.
21690 if ( _increment_count() > 1 ) {
21692 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
21695 return $tokenizer_self;
21699 # interface to Perl::Tidy::Logger routines
21701 my $logger_object = $tokenizer_self->{_logger_object};
21702 if ($logger_object) {
21703 $logger_object->warning(@_);
21708 my $logger_object = $tokenizer_self->{_logger_object};
21709 if ($logger_object) {
21710 $logger_object->complain(@_);
21714 sub write_logfile_entry {
21715 my $logger_object = $tokenizer_self->{_logger_object};
21716 if ($logger_object) {
21717 $logger_object->write_logfile_entry(@_);
21721 sub interrupt_logfile {
21722 my $logger_object = $tokenizer_self->{_logger_object};
21723 if ($logger_object) {
21724 $logger_object->interrupt_logfile();
21728 sub resume_logfile {
21729 my $logger_object = $tokenizer_self->{_logger_object};
21730 if ($logger_object) {
21731 $logger_object->resume_logfile();
21735 sub increment_brace_error {
21736 my $logger_object = $tokenizer_self->{_logger_object};
21737 if ($logger_object) {
21738 $logger_object->increment_brace_error();
21742 sub report_definite_bug {
21743 my $logger_object = $tokenizer_self->{_logger_object};
21744 if ($logger_object) {
21745 $logger_object->report_definite_bug();
21749 sub brace_warning {
21750 my $logger_object = $tokenizer_self->{_logger_object};
21751 if ($logger_object) {
21752 $logger_object->brace_warning(@_);
21756 sub get_saw_brace_error {
21757 my $logger_object = $tokenizer_self->{_logger_object};
21758 if ($logger_object) {
21759 $logger_object->get_saw_brace_error();
21766 # interface to Perl::Tidy::Diagnostics routines
21767 sub write_diagnostics {
21768 if ( $tokenizer_self->{_diagnostics_object} ) {
21769 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
21773 sub report_tokenization_errors {
21777 my $level = get_indentation_level();
21778 if ( $level != $tokenizer_self->{_starting_level} ) {
21779 warning("final indentation level: $level\n");
21782 check_final_nesting_depths();
21784 if ( $tokenizer_self->{_look_for_hash_bang}
21785 && !$tokenizer_self->{_saw_hash_bang} )
21788 "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
21791 if ( $tokenizer_self->{_in_format} ) {
21792 warning("hit EOF while in format description\n");
21795 if ( $tokenizer_self->{_in_pod} ) {
21797 # Just write log entry if this is after __END__ or __DATA__
21798 # because this happens to often, and it is not likely to be
21800 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
21801 write_logfile_entry(
21802 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21808 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21814 if ( $tokenizer_self->{_in_here_doc} ) {
21815 my $here_doc_target = $tokenizer_self->{_here_doc_target};
21816 my $started_looking_for_here_target_at =
21817 $tokenizer_self->{_started_looking_for_here_target_at};
21818 if ($here_doc_target) {
21820 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
21825 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
21828 my $nearly_matched_here_target_at =
21829 $tokenizer_self->{_nearly_matched_here_target_at};
21830 if ($nearly_matched_here_target_at) {
21832 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
21837 if ( $tokenizer_self->{_in_quote} ) {
21838 my $line_start_quote = $tokenizer_self->{_line_start_quote};
21839 my $quote_target = $tokenizer_self->{_quote_target};
21841 ( $tokenizer_self->{_in_attribute_list} )
21845 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
21849 unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
21850 if ( $] < 5.006 ) {
21851 write_logfile_entry("Suggest including '-w parameter'\n");
21854 write_logfile_entry("Suggest including 'use warnings;'\n");
21858 if ( $tokenizer_self->{_saw_perl_dash_P} ) {
21859 write_logfile_entry("Use of -P parameter for defines is discouraged\n");
21862 unless ( $tokenizer_self->{_saw_use_strict} ) {
21863 write_logfile_entry("Suggest including 'use strict;'\n");
21866 # it is suggested that lables have at least one upper case character
21867 # for legibility and to avoid code breakage as new keywords are introduced
21868 if ( $tokenizer_self->{_rlower_case_labels_at} ) {
21869 my @lower_case_labels_at =
21870 @{ $tokenizer_self->{_rlower_case_labels_at} };
21871 write_logfile_entry(
21872 "Suggest using upper case characters in label(s)\n");
21874 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
21878 sub report_v_string {
21880 # warn if this version can't handle v-strings
21882 unless ( $tokenizer_self->{_saw_v_string} ) {
21883 $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
21885 if ( $] < 5.006 ) {
21887 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
21892 sub get_input_line_number {
21893 return $tokenizer_self->{_last_line_number};
21896 # returns the next tokenized line
21901 # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
21902 # $square_bracket_depth, $paren_depth
21904 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
21905 $tokenizer_self->{_line_text} = $input_line;
21907 return undef unless ($input_line);
21909 my $input_line_number = ++$tokenizer_self->{_last_line_number};
21911 # Find and remove what characters terminate this line, including any
21913 my $input_line_separator = "";
21914 if ( chomp($input_line) ) { $input_line_separator = $/ }
21916 # TODO: what other characters should be included here?
21917 if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
21918 $input_line_separator = $2 . $input_line_separator;
21921 # for backwards compatability we keep the line text terminated with
21922 # a newline character
21923 $input_line .= "\n";
21924 $tokenizer_self->{_line_text} = $input_line; # update
21926 # create a data structure describing this line which will be
21927 # returned to the caller.
21929 # _line_type codes are:
21930 # SYSTEM - system-specific code before hash-bang line
21931 # CODE - line of perl code (including comments)
21932 # POD_START - line starting pod, such as '=head'
21933 # POD - pod documentation text
21934 # POD_END - last line of pod section, '=cut'
21935 # HERE - text of here-document
21936 # HERE_END - last line of here-doc (target word)
21937 # FORMAT - format section
21938 # FORMAT_END - last line of format section, '.'
21939 # DATA_START - __DATA__ line
21940 # DATA - unidentified text following __DATA__
21941 # END_START - __END__ line
21942 # END - unidentified text following __END__
21943 # ERROR - we are in big trouble, probably not a perl script
21946 # _curly_brace_depth - depth of curly braces at start of line
21947 # _square_bracket_depth - depth of square brackets at start of line
21948 # _paren_depth - depth of parens at start of line
21949 # _starting_in_quote - this line continues a multi-line quote
21950 # (so don't trim leading blanks!)
21951 # _ending_in_quote - this line ends in a multi-line quote
21952 # (so don't trim trailing blanks!)
21953 my $line_of_tokens = {
21954 _line_type => 'EOF',
21955 _line_text => $input_line,
21956 _line_number => $input_line_number,
21957 _rtoken_type => undef,
21960 _rslevels => undef,
21961 _rblock_type => undef,
21962 _rcontainer_type => undef,
21963 _rcontainer_environment => undef,
21964 _rtype_sequence => undef,
21965 _rnesting_tokens => undef,
21966 _rci_levels => undef,
21967 _rnesting_blocks => undef,
21968 _python_indentation_level => -1, ## 0,
21969 _starting_in_quote => 0, # to be set by subroutine
21970 _ending_in_quote => 0,
21971 _curly_brace_depth => $brace_depth,
21972 _square_bracket_depth => $square_bracket_depth,
21973 _paren_depth => $paren_depth,
21974 _quote_character => '',
21977 # must print line unchanged if we are in a here document
21978 if ( $tokenizer_self->{_in_here_doc} ) {
21980 $line_of_tokens->{_line_type} = 'HERE';
21981 my $here_doc_target = $tokenizer_self->{_here_doc_target};
21982 my $here_quote_character = $tokenizer_self->{_here_quote_character};
21983 my $candidate_target = $input_line;
21984 chomp $candidate_target;
21985 if ( $candidate_target eq $here_doc_target ) {
21986 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
21987 $line_of_tokens->{_line_type} = 'HERE_END';
21988 write_logfile_entry("Exiting HERE document $here_doc_target\n");
21990 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21991 if (@$rhere_target_list) { # there can be multiple here targets
21992 ( $here_doc_target, $here_quote_character ) =
21993 @{ shift @$rhere_target_list };
21994 $tokenizer_self->{_here_doc_target} = $here_doc_target;
21995 $tokenizer_self->{_here_quote_character} =
21996 $here_quote_character;
21997 write_logfile_entry(
21998 "Entering HERE document $here_doc_target\n");
21999 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
22000 $tokenizer_self->{_started_looking_for_here_target_at} =
22001 $input_line_number;
22004 $tokenizer_self->{_in_here_doc} = 0;
22005 $tokenizer_self->{_here_doc_target} = "";
22006 $tokenizer_self->{_here_quote_character} = "";
22010 # check for error of extra whitespace
22011 # note for PERL6: leading whitespace is allowed
22013 $candidate_target =~ s/\s*$//;
22014 $candidate_target =~ s/^\s*//;
22015 if ( $candidate_target eq $here_doc_target ) {
22016 $tokenizer_self->{_nearly_matched_here_target_at} =
22017 $input_line_number;
22020 return $line_of_tokens;
22023 # must print line unchanged if we are in a format section
22024 elsif ( $tokenizer_self->{_in_format} ) {
22026 if ( $input_line =~ /^\.[\s#]*$/ ) {
22027 write_logfile_entry("Exiting format section\n");
22028 $tokenizer_self->{_in_format} = 0;
22029 $line_of_tokens->{_line_type} = 'FORMAT_END';
22032 $line_of_tokens->{_line_type} = 'FORMAT';
22034 return $line_of_tokens;
22037 # must print line unchanged if we are in pod documentation
22038 elsif ( $tokenizer_self->{_in_pod} ) {
22040 $line_of_tokens->{_line_type} = 'POD';
22041 if ( $input_line =~ /^=cut/ ) {
22042 $line_of_tokens->{_line_type} = 'POD_END';
22043 write_logfile_entry("Exiting POD section\n");
22044 $tokenizer_self->{_in_pod} = 0;
22046 if ( $input_line =~ /^\#\!.*perl\b/ ) {
22048 "Hash-bang in pod can cause older versions of perl to fail! \n"
22052 return $line_of_tokens;
22055 # must print line unchanged if we have seen a severe error (i.e., we
22056 # are seeing illegal tokens and connot continue. Syntax errors do
22057 # not pass this route). Calling routine can decide what to do, but
22058 # the default can be to just pass all lines as if they were after __END__
22059 elsif ( $tokenizer_self->{_in_error} ) {
22060 $line_of_tokens->{_line_type} = 'ERROR';
22061 return $line_of_tokens;
22064 # print line unchanged if we are __DATA__ section
22065 elsif ( $tokenizer_self->{_in_data} ) {
22067 # ...but look for POD
22068 # Note that the _in_data and _in_end flags remain set
22069 # so that we return to that state after seeing the
22070 # end of a pod section
22071 if ( $input_line =~ /^=(?!cut)/ ) {
22072 $line_of_tokens->{_line_type} = 'POD_START';
22073 write_logfile_entry("Entering POD section\n");
22074 $tokenizer_self->{_in_pod} = 1;
22075 return $line_of_tokens;
22078 $line_of_tokens->{_line_type} = 'DATA';
22079 return $line_of_tokens;
22083 # print line unchanged if we are in __END__ section
22084 elsif ( $tokenizer_self->{_in_end} ) {
22086 # ...but look for POD
22087 # Note that the _in_data and _in_end flags remain set
22088 # so that we return to that state after seeing the
22089 # end of a pod section
22090 if ( $input_line =~ /^=(?!cut)/ ) {
22091 $line_of_tokens->{_line_type} = 'POD_START';
22092 write_logfile_entry("Entering POD section\n");
22093 $tokenizer_self->{_in_pod} = 1;
22094 return $line_of_tokens;
22097 $line_of_tokens->{_line_type} = 'END';
22098 return $line_of_tokens;
22102 # check for a hash-bang line if we haven't seen one
22103 if ( !$tokenizer_self->{_saw_hash_bang} ) {
22104 if ( $input_line =~ /^\#\!.*perl\b/ ) {
22105 $tokenizer_self->{_saw_hash_bang} = $input_line_number;
22107 # check for -w and -P flags
22108 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
22109 $tokenizer_self->{_saw_perl_dash_P} = 1;
22112 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
22113 $tokenizer_self->{_saw_perl_dash_w} = 1;
22116 if ( ( $input_line_number > 1 )
22117 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
22120 # this is helpful for VMS systems; we may have accidentally
22121 # tokenized some DCL commands
22122 if ( $tokenizer_self->{_started_tokenizing} ) {
22124 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
22128 complain("Useless hash-bang after line 1\n");
22132 # Report the leading hash-bang as a system line
22133 # This will prevent -dac from deleting it
22135 $line_of_tokens->{_line_type} = 'SYSTEM';
22136 return $line_of_tokens;
22141 # wait for a hash-bang before parsing if the user invoked us with -x
22142 if ( $tokenizer_self->{_look_for_hash_bang}
22143 && !$tokenizer_self->{_saw_hash_bang} )
22145 $line_of_tokens->{_line_type} = 'SYSTEM';
22146 return $line_of_tokens;
22149 # a first line of the form ': #' will be marked as SYSTEM
22150 # since lines of this form may be used by tcsh
22151 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
22152 $line_of_tokens->{_line_type} = 'SYSTEM';
22153 return $line_of_tokens;
22156 # now we know that it is ok to tokenize the line...
22157 # the line tokenizer will modify any of these private variables:
22158 # _rhere_target_list
22165 my $ending_in_quote_last = $tokenizer_self->{_in_quote};
22166 tokenize_this_line($line_of_tokens);
22168 # Now finish defining the return structure and return it
22169 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
22171 # handle severe error (binary data in script)
22172 if ( $tokenizer_self->{_in_error} ) {
22173 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages
22174 warning("Giving up after error\n");
22175 $line_of_tokens->{_line_type} = 'ERROR';
22176 reset_indentation_level(0); # avoid error messages
22177 return $line_of_tokens;
22180 # handle start of pod documentation
22181 if ( $tokenizer_self->{_in_pod} ) {
22183 # This gets tricky..above a __DATA__ or __END__ section, perl
22184 # accepts '=cut' as the start of pod section. But afterwards,
22185 # only pod utilities see it and they may ignore an =cut without
22186 # leading =head. In any case, this isn't good.
22187 if ( $input_line =~ /^=cut\b/ ) {
22188 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
22189 complain("=cut while not in pod ignored\n");
22190 $tokenizer_self->{_in_pod} = 0;
22191 $line_of_tokens->{_line_type} = 'POD_END';
22194 $line_of_tokens->{_line_type} = 'POD_START';
22196 "=cut starts a pod section .. this can fool pod utilities.\n"
22198 write_logfile_entry("Entering POD section\n");
22203 $line_of_tokens->{_line_type} = 'POD_START';
22204 write_logfile_entry("Entering POD section\n");
22207 return $line_of_tokens;
22210 # update indentation levels for log messages
22211 if ( $input_line !~ /^\s*$/ ) {
22212 my $rlevels = $line_of_tokens->{_rlevels};
22213 my $structural_indentation_level = $$rlevels[0];
22214 my ( $python_indentation_level, $msg ) =
22215 find_indentation_level( $input_line, $structural_indentation_level );
22216 if ($msg) { write_logfile_entry("$msg") }
22217 if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
22218 $line_of_tokens->{_python_indentation_level} =
22219 $python_indentation_level;
22223 # see if this line contains here doc targets
22224 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
22225 if (@$rhere_target_list) {
22227 my ( $here_doc_target, $here_quote_character ) =
22228 @{ shift @$rhere_target_list };
22229 $tokenizer_self->{_in_here_doc} = 1;
22230 $tokenizer_self->{_here_doc_target} = $here_doc_target;
22231 $tokenizer_self->{_here_quote_character} = $here_quote_character;
22232 write_logfile_entry("Entering HERE document $here_doc_target\n");
22233 $tokenizer_self->{_started_looking_for_here_target_at} =
22234 $input_line_number;
22237 # NOTE: __END__ and __DATA__ statements are written unformatted
22238 # because they can theoretically contain additional characters
22239 # which are not tokenized (and cannot be read with <DATA> either!).
22240 if ( $tokenizer_self->{_in_data} ) {
22241 $line_of_tokens->{_line_type} = 'DATA_START';
22242 write_logfile_entry("Starting __DATA__ section\n");
22243 $tokenizer_self->{_saw_data} = 1;
22245 # keep parsing after __DATA__ if use SelfLoader was seen
22246 if ( $tokenizer_self->{_saw_selfloader} ) {
22247 $tokenizer_self->{_in_data} = 0;
22248 write_logfile_entry(
22249 "SelfLoader seen, continuing; -nlsl deactivates\n");
22252 return $line_of_tokens;
22255 elsif ( $tokenizer_self->{_in_end} ) {
22256 $line_of_tokens->{_line_type} = 'END_START';
22257 write_logfile_entry("Starting __END__ section\n");
22258 $tokenizer_self->{_saw_end} = 1;
22260 # keep parsing after __END__ if use AutoLoader was seen
22261 if ( $tokenizer_self->{_saw_autoloader} ) {
22262 $tokenizer_self->{_in_end} = 0;
22263 write_logfile_entry(
22264 "AutoLoader seen, continuing; -nlal deactivates\n");
22266 return $line_of_tokens;
22269 # now, finally, we know that this line is type 'CODE'
22270 $line_of_tokens->{_line_type} = 'CODE';
22272 # remember if we have seen any real code
22273 if ( !$tokenizer_self->{_started_tokenizing}
22274 && $input_line !~ /^\s*$/
22275 && $input_line !~ /^\s*#/ )
22277 $tokenizer_self->{_started_tokenizing} = 1;
22280 if ( $tokenizer_self->{_debugger_object} ) {
22281 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
22284 # Note: if keyword 'format' occurs in this line code, it is still CODE
22285 # (keyword 'format' need not start a line)
22286 if ( $tokenizer_self->{_in_format} ) {
22287 write_logfile_entry("Entering format section\n");
22290 if ( $tokenizer_self->{_in_quote}
22291 and ( $tokenizer_self->{_line_start_quote} < 0 ) )
22294 #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
22296 ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
22298 $tokenizer_self->{_line_start_quote} = $input_line_number;
22299 write_logfile_entry(
22300 "Start multi-line quote or pattern ending in $quote_target\n");
22303 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
22304 and !$tokenizer_self->{_in_quote} )
22306 $tokenizer_self->{_line_start_quote} = -1;
22307 write_logfile_entry("End of multi-line quote or pattern\n");
22310 # we are returning a line of CODE
22311 return $line_of_tokens;
22314 sub find_starting_indentation_level {
22316 # USES GLOBAL VARIABLES: $tokenizer_self
22317 my $starting_level = 0;
22318 my $know_input_tabstr = -1; # flag for find_indentation_level
22320 # use value if given as parameter
22321 if ( $tokenizer_self->{_know_starting_level} ) {
22322 $starting_level = $tokenizer_self->{_starting_level};
22325 # if we know there is a hash_bang line, the level must be zero
22326 elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
22327 $tokenizer_self->{_know_starting_level} = 1;
22330 # otherwise figure it out from the input file
22334 my $structural_indentation_level = -1; # flag for find_indentation_level
22336 # keep looking at lines until we find a hash bang or piece of code
22339 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
22342 # if first line is #! then assume starting level is zero
22343 if ( $i == 1 && $line =~ /^\#\!/ ) {
22344 $starting_level = 0;
22347 next if ( $line =~ /^\s*#/ ); # skip past comments
22348 next if ( $line =~ /^\s*$/ ); # skip past blank lines
22349 ( $starting_level, $msg ) =
22350 find_indentation_level( $line, $structural_indentation_level );
22351 if ($msg) { write_logfile_entry("$msg") }
22354 $msg = "Line $i implies starting-indentation-level = $starting_level\n";
22356 if ( $starting_level > 0 ) {
22358 my $input_tabstr = $tokenizer_self->{_input_tabstr};
22359 if ( $input_tabstr eq "\t" ) {
22360 $msg .= "by guessing input tabbing uses 1 tab per level\n";
22363 my $cols = length($input_tabstr);
22365 "by guessing input tabbing uses $cols blanks per level\n";
22368 write_logfile_entry("$msg");
22370 $tokenizer_self->{_starting_level} = $starting_level;
22371 reset_indentation_level($starting_level);
22374 # Find indentation level given a input line. At the same time, try to
22375 # figure out the input tabbing scheme.
22377 # There are two types of calls:
22379 # Type 1: $structural_indentation_level < 0
22380 # In this case we have to guess $input_tabstr to figure out the level.
22382 # Type 2: $structural_indentation_level >= 0
22383 # In this case the level of this line is known, and this routine can
22384 # update the tabbing string, if still unknown, to make the level correct.
22386 sub find_indentation_level {
22387 my ( $line, $structural_indentation_level ) = @_;
22389 # USES GLOBAL VARIABLES: $tokenizer_self
22393 my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
22394 my $input_tabstr = $tokenizer_self->{_input_tabstr};
22396 # find leading whitespace
22397 my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
22399 # make first guess at input tabbing scheme if necessary
22400 if ( $know_input_tabstr < 0 ) {
22402 $know_input_tabstr = 0;
22404 # When -et=n is used for the output formatting, we will assume that
22405 # tabs in the input formatting were also produced with -et=n. This may
22406 # not be true, but it is the best guess because it will keep leading
22407 # whitespace unchanged on repeated formatting on small pieces of code
22408 # when -et=n is used. Thanks to Sam Kington for this patch.
22409 if ( my $tabsize = $tokenizer_self->{_entab_leading_space} ) {
22410 $leading_whitespace =~ s{^ (\t*) }
22411 { " " x (length($1) * $tabsize) }xe;
22412 $input_tabstr = " " x $tokenizer_self->{_indent_columns};
22414 elsif ( $tokenizer_self->{_tabs} ) {
22415 $input_tabstr = "\t";
22416 if ( length($leading_whitespace) > 0 ) {
22417 if ( $leading_whitespace !~ /\t/ ) {
22419 my $cols = $tokenizer_self->{_indent_columns};
22421 if ( length($leading_whitespace) < $cols ) {
22422 $cols = length($leading_whitespace);
22424 $input_tabstr = " " x $cols;
22429 $input_tabstr = " " x $tokenizer_self->{_indent_columns};
22431 if ( length($leading_whitespace) > 0 ) {
22432 if ( $leading_whitespace =~ /^\t/ ) {
22433 $input_tabstr = "\t";
22437 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
22438 $tokenizer_self->{_input_tabstr} = $input_tabstr;
22441 # determine the input tabbing scheme if possible
22442 if ( ( $know_input_tabstr == 0 )
22443 && ( length($leading_whitespace) > 0 )
22444 && ( $structural_indentation_level > 0 ) )
22446 my $saved_input_tabstr = $input_tabstr;
22448 # check for common case of one tab per indentation level
22449 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
22450 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
22451 $input_tabstr = "\t";
22452 $msg = "Guessing old indentation was tab character\n";
22458 # detab any tabs based on 8 blanks per tab
22460 if ( $leading_whitespace =~ s/^\t+/ /g ) {
22461 $entabbed = "entabbed";
22464 # now compute tabbing from number of spaces
22466 length($leading_whitespace) / $structural_indentation_level;
22467 if ( $columns == int $columns ) {
22469 "Guessing old indentation was $columns $entabbed spaces\n";
22472 $columns = int $columns;
22474 "old indentation is unclear, using $columns $entabbed spaces\n";
22476 $input_tabstr = " " x $columns;
22478 $know_input_tabstr = 1;
22479 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
22480 $tokenizer_self->{_input_tabstr} = $input_tabstr;
22482 # see if mistakes were made
22483 if ( ( $tokenizer_self->{_starting_level} > 0 )
22484 && !$tokenizer_self->{_know_starting_level} )
22487 if ( $input_tabstr ne $saved_input_tabstr ) {
22489 "I made a bad starting level guess; rerun with a value for -sil \n"
22495 # use current guess at input tabbing to get input indentation level
22497 # Patch to handle a common case of entabbed leading whitespace
22498 # If the leading whitespace equals 4 spaces and we also have
22499 # tabs, detab the input whitespace assuming 8 spaces per tab.
22500 if ( length($input_tabstr) == 4 ) {
22501 $leading_whitespace =~ s/^\t+/ /g;
22504 if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
22507 while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
22513 return ( $level, $msg );
22516 # This is a currently unused debug routine
22517 sub dump_functions {
22521 foreach $pkg ( keys %is_user_function ) {
22522 print $fh "\nnon-constant subs in package $pkg\n";
22524 foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
22526 if ( $is_block_list_function{$pkg}{$sub} ) {
22527 $msg = 'block_list';
22530 if ( $is_block_function{$pkg}{$sub} ) {
22533 print $fh "$sub $msg\n";
22537 foreach $pkg ( keys %is_constant ) {
22538 print $fh "\nconstants and constant subs in package $pkg\n";
22540 foreach $sub ( keys %{ $is_constant{$pkg} } ) {
22541 print $fh "$sub\n";
22548 # count number of 1's in a string of 1's and 0's
22549 # example: ones_count("010101010101") gives 6
22550 return ( my $cis = $_[0] ) =~ tr/1/0/;
22553 sub prepare_for_a_new_file {
22555 # previous tokens needed to determine what to expect next
22556 $last_nonblank_token = ';'; # the only possible starting state which
22557 $last_nonblank_type = ';'; # will make a leading brace a code block
22558 $last_nonblank_block_type = '';
22560 # scalars for remembering statement types across multiple lines
22561 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
22562 $in_attribute_list = 0;
22564 # scalars for remembering where we are in the file
22565 $current_package = "main";
22566 $context = UNKNOWN_CONTEXT;
22568 # hashes used to remember function information
22569 %is_constant = (); # user-defined constants
22570 %is_user_function = (); # user-defined functions
22571 %user_function_prototype = (); # their prototypes
22572 %is_block_function = ();
22573 %is_block_list_function = ();
22574 %saw_function_definition = ();
22576 # variables used to track depths of various containers
22577 # and report nesting errors
22580 $square_bracket_depth = 0;
22581 @current_depth[ 0 .. $#closing_brace_names ] =
22582 (0) x scalar @closing_brace_names;
22585 @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
22586 ( 0 .. $#closing_brace_names );
22587 @current_sequence_number = ();
22588 $paren_type[$paren_depth] = '';
22589 $paren_semicolon_count[$paren_depth] = 0;
22590 $paren_structural_type[$brace_depth] = '';
22591 $brace_type[$brace_depth] = ';'; # identify opening brace as code block
22592 $brace_structural_type[$brace_depth] = '';
22593 $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
22594 $brace_package[$paren_depth] = $current_package;
22595 $square_bracket_type[$square_bracket_depth] = '';
22596 $square_bracket_structural_type[$square_bracket_depth] = '';
22598 initialize_tokenizer_state();
22601 { # begin tokenize_this_line
22603 use constant BRACE => 0;
22604 use constant SQUARE_BRACKET => 1;
22605 use constant PAREN => 2;
22606 use constant QUESTION_COLON => 3;
22608 # TV1: scalars for processing one LINE.
22609 # Re-initialized on each entry to sub tokenize_this_line.
22611 $block_type, $container_type, $expecting,
22612 $i, $i_tok, $input_line,
22613 $input_line_number, $last_nonblank_i, $max_token_index,
22614 $next_tok, $next_type, $peeked_ahead,
22615 $prototype, $rhere_target_list, $rtoken_map,
22616 $rtoken_type, $rtokens, $tok,
22617 $type, $type_sequence, $indent_flag,
22620 # TV2: refs to ARRAYS for processing one LINE
22621 # Re-initialized on each call.
22622 my $routput_token_list = []; # stack of output token indexes
22623 my $routput_token_type = []; # token types
22624 my $routput_block_type = []; # types of code block
22625 my $routput_container_type = []; # paren types, such as if, elsif, ..
22626 my $routput_type_sequence = []; # nesting sequential number
22627 my $routput_indent_flag = []; #
22629 # TV3: SCALARS for quote variables. These are initialized with a
22630 # subroutine call and continually updated as lines are processed.
22631 my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
22632 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
22634 # TV4: SCALARS for multi-line identifiers and
22635 # statements. These are initialized with a subroutine call
22636 # and continually updated as lines are processed.
22637 my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
22639 # TV5: SCALARS for tracking indentation level.
22640 # Initialized once and continually updated as lines are
22643 $nesting_token_string, $nesting_type_string,
22644 $nesting_block_string, $nesting_block_flag,
22645 $nesting_list_string, $nesting_list_flag,
22646 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
22647 $in_statement_continuation, $level_in_tokenizer,
22648 $slevel_in_tokenizer, $rslevel_stack,
22651 # TV6: SCALARS for remembering several previous
22652 # tokens. Initialized once and continually updated as
22653 # lines are processed.
22655 $last_nonblank_container_type, $last_nonblank_type_sequence,
22656 $last_last_nonblank_token, $last_last_nonblank_type,
22657 $last_last_nonblank_block_type, $last_last_nonblank_container_type,
22658 $last_last_nonblank_type_sequence, $last_nonblank_prototype,
22661 # ----------------------------------------------------------------
22662 # beginning of tokenizer variable access and manipulation routines
22663 # ----------------------------------------------------------------
22665 sub initialize_tokenizer_state {
22667 # TV1: initialized on each call
22668 # TV2: initialized on each call
22672 $quote_character = "";
22675 $quoted_string_1 = "";
22676 $quoted_string_2 = "";
22677 $allowed_quote_modifiers = "";
22680 $id_scan_state = '';
22683 $indented_if_level = 0;
22686 $nesting_token_string = "";
22687 $nesting_type_string = "";
22688 $nesting_block_string = '1'; # initially in a block
22689 $nesting_block_flag = 1;
22690 $nesting_list_string = '0'; # initially not in a list
22691 $nesting_list_flag = 0; # initially not in a list
22692 $ci_string_in_tokenizer = "";
22693 $continuation_string_in_tokenizer = "0";
22694 $in_statement_continuation = 0;
22695 $level_in_tokenizer = 0;
22696 $slevel_in_tokenizer = 0;
22697 $rslevel_stack = [];
22700 $last_nonblank_container_type = '';
22701 $last_nonblank_type_sequence = '';
22702 $last_last_nonblank_token = ';';
22703 $last_last_nonblank_type = ';';
22704 $last_last_nonblank_block_type = '';
22705 $last_last_nonblank_container_type = '';
22706 $last_last_nonblank_type_sequence = '';
22707 $last_nonblank_prototype = "";
22710 sub save_tokenizer_state {
22713 $block_type, $container_type, $expecting,
22714 $i, $i_tok, $input_line,
22715 $input_line_number, $last_nonblank_i, $max_token_index,
22716 $next_tok, $next_type, $peeked_ahead,
22717 $prototype, $rhere_target_list, $rtoken_map,
22718 $rtoken_type, $rtokens, $tok,
22719 $type, $type_sequence, $indent_flag,
22723 $routput_token_list, $routput_token_type,
22724 $routput_block_type, $routput_container_type,
22725 $routput_type_sequence, $routput_indent_flag,
22729 $in_quote, $quote_type,
22730 $quote_character, $quote_pos,
22731 $quote_depth, $quoted_string_1,
22732 $quoted_string_2, $allowed_quote_modifiers,
22736 [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
22739 $nesting_token_string, $nesting_type_string,
22740 $nesting_block_string, $nesting_block_flag,
22741 $nesting_list_string, $nesting_list_flag,
22742 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
22743 $in_statement_continuation, $level_in_tokenizer,
22744 $slevel_in_tokenizer, $rslevel_stack,
22748 $last_nonblank_container_type,
22749 $last_nonblank_type_sequence,
22750 $last_last_nonblank_token,
22751 $last_last_nonblank_type,
22752 $last_last_nonblank_block_type,
22753 $last_last_nonblank_container_type,
22754 $last_last_nonblank_type_sequence,
22755 $last_nonblank_prototype,
22757 return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
22760 sub restore_tokenizer_state {
22762 my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
22764 $block_type, $container_type, $expecting,
22765 $i, $i_tok, $input_line,
22766 $input_line_number, $last_nonblank_i, $max_token_index,
22767 $next_tok, $next_type, $peeked_ahead,
22768 $prototype, $rhere_target_list, $rtoken_map,
22769 $rtoken_type, $rtokens, $tok,
22770 $type, $type_sequence, $indent_flag,
22774 $routput_token_list, $routput_token_type,
22775 $routput_block_type, $routput_container_type,
22776 $routput_type_sequence, $routput_type_sequence,
22780 $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
22781 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
22784 ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
22788 $nesting_token_string, $nesting_type_string,
22789 $nesting_block_string, $nesting_block_flag,
22790 $nesting_list_string, $nesting_list_flag,
22791 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
22792 $in_statement_continuation, $level_in_tokenizer,
22793 $slevel_in_tokenizer, $rslevel_stack,
22797 $last_nonblank_container_type,
22798 $last_nonblank_type_sequence,
22799 $last_last_nonblank_token,
22800 $last_last_nonblank_type,
22801 $last_last_nonblank_block_type,
22802 $last_last_nonblank_container_type,
22803 $last_last_nonblank_type_sequence,
22804 $last_nonblank_prototype,
22808 sub get_indentation_level {
22810 # patch to avoid reporting error if indented if is not terminated
22811 if ($indented_if_level) { return $level_in_tokenizer - 1 }
22812 return $level_in_tokenizer;
22815 sub reset_indentation_level {
22816 $level_in_tokenizer = $_[0];
22817 $slevel_in_tokenizer = $_[0];
22818 push @{$rslevel_stack}, $slevel_in_tokenizer;
22822 $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
22825 # ------------------------------------------------------------
22826 # end of tokenizer variable access and manipulation routines
22827 # ------------------------------------------------------------
22829 # ------------------------------------------------------------
22830 # beginning of various scanner interface routines
22831 # ------------------------------------------------------------
22832 sub scan_replacement_text {
22834 # check for here-docs in replacement text invoked by
22835 # a substitution operator with executable modifier 'e'.
22838 # $replacement_text
22840 # $rht = reference to any here-doc targets
22841 my ($replacement_text) = @_;
22844 return undef unless ( $replacement_text =~ /<</ );
22846 write_logfile_entry("scanning replacement text for here-doc targets\n");
22848 # save the logger object for error messages
22849 my $logger_object = $tokenizer_self->{_logger_object};
22851 # localize all package variables
22853 $tokenizer_self, $last_nonblank_token,
22854 $last_nonblank_type, $last_nonblank_block_type,
22855 $statement_type, $in_attribute_list,
22856 $current_package, $context,
22857 %is_constant, %is_user_function,
22858 %user_function_prototype, %is_block_function,
22859 %is_block_list_function, %saw_function_definition,
22860 $brace_depth, $paren_depth,
22861 $square_bracket_depth, @current_depth,
22862 @total_depth, $total_depth,
22863 @nesting_sequence_number, @current_sequence_number,
22864 @paren_type, @paren_semicolon_count,
22865 @paren_structural_type, @brace_type,
22866 @brace_structural_type, @brace_context,
22867 @brace_package, @square_bracket_type,
22868 @square_bracket_structural_type, @depth_array,
22869 @starting_line_of_current_depth, @nested_ternary_flag,
22870 @nested_statement_type,
22873 # save all lexical variables
22874 my $rstate = save_tokenizer_state();
22875 _decrement_count(); # avoid error check for multiple tokenizers
22877 # make a new tokenizer
22879 my $rpending_logfile_message;
22880 my $source_object =
22881 Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
22882 $rpending_logfile_message );
22883 my $tokenizer = Perl::Tidy::Tokenizer->new(
22884 source_object => $source_object,
22885 logger_object => $logger_object,
22886 starting_line_number => $input_line_number,
22889 # scan the replacement text
22890 1 while ( $tokenizer->get_line() );
22892 # remove any here doc targets
22894 if ( $tokenizer_self->{_in_here_doc} ) {
22898 $tokenizer_self->{_here_doc_target},
22899 $tokenizer_self->{_here_quote_character}
22901 if ( $tokenizer_self->{_rhere_target_list} ) {
22902 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
22903 $tokenizer_self->{_rhere_target_list} = undef;
22905 $tokenizer_self->{_in_here_doc} = undef;
22908 # now its safe to report errors
22909 $tokenizer->report_tokenization_errors();
22911 # restore all tokenizer lexical variables
22912 restore_tokenizer_state($rstate);
22914 # return the here doc targets
22918 sub scan_bare_identifier {
22919 ( $i, $tok, $type, $prototype ) =
22920 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
22921 $rtoken_map, $max_token_index );
22924 sub scan_identifier {
22925 ( $i, $tok, $type, $id_scan_state, $identifier ) =
22926 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
22927 $max_token_index, $expecting );
22931 ( $i, $tok, $type, $id_scan_state ) =
22932 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
22933 $id_scan_state, $max_token_index );
22938 ( $i, $type, $number ) =
22939 scan_number_do( $input_line, $i, $rtoken_map, $type,
22940 $max_token_index );
22944 # a sub to warn if token found where term expected
22945 sub error_if_expecting_TERM {
22946 if ( $expecting == TERM ) {
22947 if ( $really_want_term{$last_nonblank_type} ) {
22948 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
22949 $rtoken_type, $input_line );
22955 # a sub to warn if token found where operator expected
22956 sub error_if_expecting_OPERATOR {
22957 if ( $expecting == OPERATOR ) {
22958 my $thing = defined $_[0] ? $_[0] : $tok;
22959 unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
22960 $rtoken_map, $rtoken_type, $input_line );
22961 if ( $i_tok == 0 ) {
22962 interrupt_logfile();
22963 warning("Missing ';' above?\n");
22970 # ------------------------------------------------------------
22971 # end scanner interfaces
22972 # ------------------------------------------------------------
22974 my %is_for_foreach;
22975 @_ = qw(for foreach);
22976 @is_for_foreach{@_} = (1) x scalar(@_);
22980 @is_my_our{@_} = (1) x scalar(@_);
22982 # These keywords may introduce blocks after parenthesized expressions,
22984 # keyword ( .... ) { BLOCK }
22985 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
22986 my %is_blocktype_with_paren;
22987 @_ = qw(if elsif unless while until for foreach switch case given when);
22988 @is_blocktype_with_paren{@_} = (1) x scalar(@_);
22990 # ------------------------------------------------------------
22991 # begin hash of code for handling most token types
22992 # ------------------------------------------------------------
22993 my $tokenization_code = {
22995 # no special code for these types yet, but syntax checks
23030 error_if_expecting_TERM()
23031 if ( $expecting == TERM );
23034 error_if_expecting_TERM()
23035 if ( $expecting == TERM );
23039 # start looking for a scalar
23040 error_if_expecting_OPERATOR("Scalar")
23041 if ( $expecting == OPERATOR );
23044 if ( $identifier eq '$^W' ) {
23045 $tokenizer_self->{_saw_perl_dash_w} = 1;
23048 # Check for indentifier in indirect object slot
23049 # (vorboard.pl, sort.t). Something like:
23050 # /^(print|printf|sort|exec|system)$/
23052 $is_indirect_object_taker{$last_nonblank_token}
23054 || ( ( $last_nonblank_token eq '(' )
23055 && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
23056 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
23065 $paren_semicolon_count[$paren_depth] = 0;
23067 $container_type = $want_paren;
23071 $container_type = $last_nonblank_token;
23073 # We can check for a syntax error here of unexpected '(',
23074 # but this is going to get messy...
23076 $expecting == OPERATOR
23078 # be sure this is not a method call of the form
23079 # &method(...), $method->(..), &{method}(...),
23080 # $ref[2](list) is ok & short for $ref[2]->(list)
23081 # NOTE: at present, braces in something like &{ xxx }
23082 # are not marked as a block, we might have a method call
23083 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
23088 # ref: camel 3 p 703.
23089 if ( $last_last_nonblank_token eq 'do' ) {
23091 "do SUBROUTINE is deprecated; consider & or -> notation\n"
23096 # if this is an empty list, (), then it is not an
23097 # error; for example, we might have a constant pi and
23098 # invoke it with pi() or just pi;
23099 my ( $next_nonblank_token, $i_next ) =
23100 find_next_nonblank_token( $i, $rtokens,
23101 $max_token_index );
23102 if ( $next_nonblank_token ne ')' ) {
23104 error_if_expecting_OPERATOR('(');
23106 if ( $last_nonblank_type eq 'C' ) {
23108 "$last_nonblank_token has a void prototype\n";
23110 elsif ( $last_nonblank_type eq 'i' ) {
23112 && $last_nonblank_token =~ /^\$/ )
23115 "Do you mean '$last_nonblank_token->(' ?\n";
23119 interrupt_logfile();
23123 } ## end if ( $next_nonblank_token...
23124 } ## end else [ if ( $last_last_nonblank_token...
23125 } ## end if ( $expecting == OPERATOR...
23127 $paren_type[$paren_depth] = $container_type;
23128 ( $type_sequence, $indent_flag ) =
23129 increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
23131 # propagate types down through nested parens
23132 # for example: the second paren in 'if ((' would be structural
23133 # since the first is.
23135 if ( $last_nonblank_token eq '(' ) {
23136 $type = $last_nonblank_type;
23139 # We exclude parens as structural after a ',' because it
23140 # causes subtle problems with continuation indentation for
23141 # something like this, where the first 'or' will not get
23146 # ( not defined $check )
23148 # or $check eq "new"
23149 # or $check eq "old",
23152 # Likewise, we exclude parens where a statement can start
23153 # because of problems with continuation indentation, like
23156 # ($firstline =~ /^#\!.*perl/)
23157 # and (print $File::Find::name, "\n")
23160 # (ref($usage_fref) =~ /CODE/)
23162 # : (&blast_usage, &blast_params, &blast_general_params);
23168 if ( $last_nonblank_type eq ')' ) {
23170 "Syntax error? found token '$last_nonblank_type' then '('\n"
23173 $paren_structural_type[$paren_depth] = $type;
23177 ( $type_sequence, $indent_flag ) =
23178 decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
23180 if ( $paren_structural_type[$paren_depth] eq '{' ) {
23184 $container_type = $paren_type[$paren_depth];
23186 # /^(for|foreach)$/
23187 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
23188 my $num_sc = $paren_semicolon_count[$paren_depth];
23189 if ( $num_sc > 0 && $num_sc != 2 ) {
23190 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
23194 if ( $paren_depth > 0 ) { $paren_depth-- }
23197 if ( $last_nonblank_type eq ',' ) {
23198 complain("Repeated ','s \n");
23201 # patch for operator_expected: note if we are in the list (use.t)
23202 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
23203 ## FIXME: need to move this elsewhere, perhaps check after a '('
23204 ## elsif ($last_nonblank_token eq '(') {
23205 ## warning("Leading ','s illegal in some versions of perl\n");
23209 $context = UNKNOWN_CONTEXT;
23210 $statement_type = '';
23212 # /^(for|foreach)$/
23213 if ( $is_for_foreach{ $paren_type[$paren_depth] } )
23214 { # mark ; in for loop
23216 # Be careful: we do not want a semicolon such as the
23217 # following to be included:
23219 # for (sort {strcoll($a,$b);} keys %investments) {
23221 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
23222 && $square_bracket_depth ==
23223 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
23227 $paren_semicolon_count[$paren_depth]++;
23233 error_if_expecting_OPERATOR("String")
23234 if ( $expecting == OPERATOR );
23237 $allowed_quote_modifiers = "";
23240 error_if_expecting_OPERATOR("String")
23241 if ( $expecting == OPERATOR );
23244 $allowed_quote_modifiers = "";
23247 error_if_expecting_OPERATOR("String")
23248 if ( $expecting == OPERATOR );
23251 $allowed_quote_modifiers = "";
23256 if ( $expecting == UNKNOWN ) { # indeterminte, must guess..
23258 ( $is_pattern, $msg ) =
23259 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
23260 $max_token_index );
23263 write_diagnostics("DIVIDE:$msg\n");
23264 write_logfile_entry($msg);
23267 else { $is_pattern = ( $expecting == TERM ) }
23272 $allowed_quote_modifiers = '[msixpodualgc]';
23274 else { # not a pattern; check for a /= token
23276 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=
23282 #DEBUG - collecting info on what tokens follow a divide
23283 # for development of guessing algorithm
23284 #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
23285 # #write_diagnostics( "DIVIDE? $input_line\n" );
23291 # if we just saw a ')', we will label this block with
23292 # its type. We need to do this to allow sub
23293 # code_block_type to determine if this brace starts a
23294 # code block or anonymous hash. (The type of a paren
23295 # pair is the preceding token, such as 'if', 'else',
23297 $container_type = "";
23299 # ATTRS: for a '{' following an attribute list, reset
23300 # things to look like we just saw the sub name
23301 if ( $statement_type =~ /^sub/ ) {
23302 $last_nonblank_token = $statement_type;
23303 $last_nonblank_type = 'i';
23304 $statement_type = "";
23307 # patch for SWITCH/CASE: hide these keywords from an immediately
23308 # following opening brace
23309 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
23310 && $statement_type eq $last_nonblank_token )
23312 $last_nonblank_token = ";";
23315 elsif ( $last_nonblank_token eq ')' ) {
23316 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
23318 # defensive move in case of a nesting error (pbug.t)
23319 # in which this ')' had no previous '('
23320 # this nesting error will have been caught
23321 if ( !defined($last_nonblank_token) ) {
23322 $last_nonblank_token = 'if';
23325 # check for syntax error here;
23326 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
23327 my $list = join( ' ', sort keys %is_blocktype_with_paren );
23329 "syntax error at ') {', didn't see one of: $list\n");
23333 # patch for paren-less for/foreach glitch, part 2.
23334 # see note below under 'qw'
23335 elsif ($last_nonblank_token eq 'qw'
23336 && $is_for_foreach{$want_paren} )
23338 $last_nonblank_token = $want_paren;
23339 if ( $last_last_nonblank_token eq $want_paren ) {
23341 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
23348 # now identify which of the three possible types of
23349 # curly braces we have: hash index container, anonymous
23350 # hash reference, or code block.
23352 # non-structural (hash index) curly brace pair
23353 # get marked 'L' and 'R'
23354 if ( is_non_structural_brace() ) {
23357 # patch for SWITCH/CASE:
23358 # allow paren-less identifier after 'when'
23359 # if the brace is preceded by a space
23360 if ( $statement_type eq 'when'
23361 && $last_nonblank_type eq 'i'
23362 && $last_last_nonblank_type eq 'k'
23363 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
23366 $block_type = $statement_type;
23370 # code and anonymous hash have the same type, '{', but are
23371 # distinguished by 'block_type',
23372 # which will be blank for an anonymous hash
23375 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
23376 $max_token_index );
23378 # patch to promote bareword type to function taking block
23380 && $last_nonblank_type eq 'w'
23381 && $last_nonblank_i >= 0 )
23383 if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
23384 $routput_token_type->[$last_nonblank_i] = 'G';
23388 # patch for SWITCH/CASE: if we find a stray opening block brace
23389 # where we might accept a 'case' or 'when' block, then take it
23390 if ( $statement_type eq 'case'
23391 || $statement_type eq 'when' )
23393 if ( !$block_type || $block_type eq '}' ) {
23394 $block_type = $statement_type;
23398 $brace_type[ ++$brace_depth ] = $block_type;
23399 $brace_package[$brace_depth] = $current_package;
23400 $brace_structural_type[$brace_depth] = $type;
23401 $brace_context[$brace_depth] = $context;
23402 ( $type_sequence, $indent_flag ) =
23403 increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
23406 $block_type = $brace_type[$brace_depth];
23407 if ($block_type) { $statement_type = '' }
23408 if ( defined( $brace_package[$brace_depth] ) ) {
23409 $current_package = $brace_package[$brace_depth];
23412 # can happen on brace error (caught elsewhere)
23415 ( $type_sequence, $indent_flag ) =
23416 decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
23418 if ( $brace_structural_type[$brace_depth] eq 'L' ) {
23422 # propagate type information for 'do' and 'eval' blocks.
23423 # This is necessary to enable us to know if an operator
23424 # or term is expected next
23425 if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
23426 $tok = $brace_type[$brace_depth];
23429 $context = $brace_context[$brace_depth];
23430 if ( $brace_depth > 0 ) { $brace_depth--; }
23432 '&' => sub { # maybe sub call? start looking
23434 # We have to check for sub call unless we are sure we
23435 # are expecting an operator. This example from s2p
23436 # got mistaken as a q operator in an early version:
23437 # print BODY &q(<<'EOT');
23438 if ( $expecting != OPERATOR ) {
23440 # But only look for a sub call if we are expecting a term or
23441 # if there is no existing space after the &.
23442 # For example we probably don't want & as sub call here:
23443 # Fcntl::S_IRUSR & $mode;
23444 if ( $expecting == TERM || $next_type ne 'b' ) {
23451 '<' => sub { # angle operator or less than?
23453 if ( $expecting != OPERATOR ) {
23455 find_angle_operator_termination( $input_line, $i, $rtoken_map,
23456 $expecting, $max_token_index );
23458 if ( $type eq '<' && $expecting == TERM ) {
23459 error_if_expecting_TERM();
23460 interrupt_logfile();
23461 warning("Unterminated <> operator?\n");
23468 '?' => sub { # ?: conditional or starting pattern?
23472 if ( $expecting == UNKNOWN ) {
23475 ( $is_pattern, $msg ) =
23476 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
23477 $max_token_index );
23479 if ($msg) { write_logfile_entry($msg) }
23481 else { $is_pattern = ( $expecting == TERM ) }
23486 $allowed_quote_modifiers = '[msixpodualgc]';
23489 ( $type_sequence, $indent_flag ) =
23490 increase_nesting_depth( QUESTION_COLON,
23491 $$rtoken_map[$i_tok] );
23494 '*' => sub { # typeglob, or multiply?
23496 if ( $expecting == TERM ) {
23501 if ( $$rtokens[ $i + 1 ] eq '=' ) {
23506 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
23510 if ( $$rtokens[ $i + 1 ] eq '=' ) {
23518 '.' => sub { # what kind of . ?
23520 if ( $expecting != OPERATOR ) {
23522 if ( $type eq '.' ) {
23523 error_if_expecting_TERM()
23524 if ( $expecting == TERM );
23532 # if this is the first nonblank character, call it a label
23533 # since perl seems to just swallow it
23534 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
23538 # ATTRS: check for a ':' which introduces an attribute list
23539 # (this might eventually get its own token type)
23540 elsif ( $statement_type =~ /^sub/ ) {
23542 $in_attribute_list = 1;
23545 # check for scalar attribute, such as
23546 # my $foo : shared = 1;
23547 elsif ($is_my_our{$statement_type}
23548 && $current_depth[QUESTION_COLON] == 0 )
23551 $in_attribute_list = 1;
23554 # otherwise, it should be part of a ?/: operator
23556 ( $type_sequence, $indent_flag ) =
23557 decrease_nesting_depth( QUESTION_COLON,
23558 $$rtoken_map[$i_tok] );
23559 if ( $last_nonblank_token eq '?' ) {
23560 warning("Syntax error near ? :\n");
23564 '+' => sub { # what kind of plus?
23566 if ( $expecting == TERM ) {
23567 my $number = scan_number();
23569 # unary plus is safest assumption if not a number
23570 if ( !defined($number) ) { $type = 'p'; }
23572 elsif ( $expecting == OPERATOR ) {
23575 if ( $next_type eq 'w' ) { $type = 'p' }
23580 error_if_expecting_OPERATOR("Array")
23581 if ( $expecting == OPERATOR );
23584 '%' => sub { # hash or modulo?
23586 # first guess is hash if no following blank
23587 if ( $expecting == UNKNOWN ) {
23588 if ( $next_type ne 'b' ) { $expecting = TERM }
23590 if ( $expecting == TERM ) {
23595 $square_bracket_type[ ++$square_bracket_depth ] =
23596 $last_nonblank_token;
23597 ( $type_sequence, $indent_flag ) =
23598 increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
23600 # It may seem odd, but structural square brackets have
23601 # type '{' and '}'. This simplifies the indentation logic.
23602 if ( !is_non_structural_brace() ) {
23605 $square_bracket_structural_type[$square_bracket_depth] = $type;
23608 ( $type_sequence, $indent_flag ) =
23609 decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
23611 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
23615 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
23617 '-' => sub { # what kind of minus?
23619 if ( ( $expecting != OPERATOR )
23620 && $is_file_test_operator{$next_tok} )
23622 my ( $next_nonblank_token, $i_next ) =
23623 find_next_nonblank_token( $i + 1, $rtokens,
23624 $max_token_index );
23626 # check for a quoted word like "-w=>xx";
23627 # it is sufficient to just check for a following '='
23628 if ( $next_nonblank_token eq '=' ) {
23637 elsif ( $expecting == TERM ) {
23638 my $number = scan_number();
23640 # maybe part of bareword token? unary is safest
23641 if ( !defined($number) ) { $type = 'm'; }
23644 elsif ( $expecting == OPERATOR ) {
23648 if ( $next_type eq 'w' ) {
23656 # check for special variables like ${^WARNING_BITS}
23657 if ( $expecting == TERM ) {
23659 # FIXME: this should work but will not catch errors
23660 # because we also have to be sure that previous token is
23661 # a type character ($,@,%).
23662 if ( $last_nonblank_token eq '{'
23663 && ( $next_tok =~ /^[A-Za-z_]/ ) )
23666 if ( $next_tok eq 'W' ) {
23667 $tokenizer_self->{_saw_perl_dash_w} = 1;
23669 $tok = $tok . $next_tok;
23675 unless ( error_if_expecting_TERM() ) {
23677 # Something like this is valid but strange:
23679 complain("The '^' seems unusual here\n");
23685 '::' => sub { # probably a sub call
23686 scan_bare_identifier();
23688 '<<' => sub { # maybe a here-doc?
23690 unless ( $i < $max_token_index )
23691 ; # here-doc not possible if end of line
23693 if ( $expecting != OPERATOR ) {
23694 my ( $found_target, $here_doc_target, $here_quote_character,
23697 $found_target, $here_doc_target, $here_quote_character, $i,
23700 = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
23701 $max_token_index );
23703 if ($found_target) {
23704 push @{$rhere_target_list},
23705 [ $here_doc_target, $here_quote_character ];
23707 if ( length($here_doc_target) > 80 ) {
23708 my $truncated = substr( $here_doc_target, 0, 80 );
23709 complain("Long here-target: '$truncated' ...\n");
23711 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
23713 "Unconventional here-target: '$here_doc_target'\n"
23717 elsif ( $expecting == TERM ) {
23718 unless ($saw_error) {
23720 # shouldn't happen..
23721 warning("Program bug; didn't find here doc target\n");
23722 report_definite_bug();
23731 # if -> points to a bare word, we must scan for an identifier,
23732 # otherwise something like ->y would look like the y operator
23736 # type = 'pp' for pre-increment, '++' for post-increment
23738 if ( $expecting == TERM ) { $type = 'pp' }
23739 elsif ( $expecting == UNKNOWN ) {
23740 my ( $next_nonblank_token, $i_next ) =
23741 find_next_nonblank_token( $i, $rtokens, $max_token_index );
23742 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
23747 if ( $last_nonblank_type eq $tok ) {
23748 complain("Repeated '=>'s \n");
23751 # patch for operator_expected: note if we are in the list (use.t)
23752 # TODO: make version numbers a new token type
23753 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
23756 # type = 'mm' for pre-decrement, '--' for post-decrement
23759 if ( $expecting == TERM ) { $type = 'mm' }
23760 elsif ( $expecting == UNKNOWN ) {
23761 my ( $next_nonblank_token, $i_next ) =
23762 find_next_nonblank_token( $i, $rtokens, $max_token_index );
23763 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
23768 error_if_expecting_TERM()
23769 if ( $expecting == TERM );
23773 error_if_expecting_TERM()
23774 if ( $expecting == TERM );
23778 error_if_expecting_TERM()
23779 if ( $expecting == TERM );
23783 # ------------------------------------------------------------
23784 # end hash of code for handling individual token types
23785 # ------------------------------------------------------------
23787 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
23789 # These block types terminate statements and do not need a trailing
23791 # patched for SWITCH/CASE/
23792 my %is_zero_continuation_block_type;
23793 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
23794 if elsif else unless while until for foreach switch case given when);
23795 @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
23797 my %is_not_zero_continuation_block_type;
23798 @_ = qw(sort grep map do eval);
23799 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
23801 my %is_logical_container;
23802 @_ = qw(if elsif unless while and or err not && ! || for foreach);
23803 @is_logical_container{@_} = (1) x scalar(@_);
23805 my %is_binary_type;
23807 @is_binary_type{@_} = (1) x scalar(@_);
23809 my %is_binary_keyword;
23810 @_ = qw(and or err eq ne cmp);
23811 @is_binary_keyword{@_} = (1) x scalar(@_);
23813 # 'L' is token for opening { at hash key
23814 my %is_opening_type;
23815 @_ = qw" L { ( [ ";
23816 @is_opening_type{@_} = (1) x scalar(@_);
23818 # 'R' is token for closing } at hash key
23819 my %is_closing_type;
23820 @_ = qw" R } ) ] ";
23821 @is_closing_type{@_} = (1) x scalar(@_);
23823 my %is_redo_last_next_goto;
23824 @_ = qw(redo last next goto);
23825 @is_redo_last_next_goto{@_} = (1) x scalar(@_);
23827 my %is_use_require;
23828 @_ = qw(use require);
23829 @is_use_require{@_} = (1) x scalar(@_);
23831 my %is_sub_package;
23832 @_ = qw(sub package);
23833 @is_sub_package{@_} = (1) x scalar(@_);
23835 # This hash holds the hash key in $tokenizer_self for these keywords:
23836 my %is_format_END_DATA = (
23837 'format' => '_in_format',
23838 '__END__' => '_in_end',
23839 '__DATA__' => '_in_data',
23842 # ref: camel 3 p 147,
23843 # but perl may accept undocumented flags
23844 # perl 5.10 adds 'p' (preserve)
23845 # Perl version 5.16, http://perldoc.perl.org/perlop.html, has these:
23846 # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc
23847 # s/PATTERN/REPLACEMENT/msixpodualgcer
23848 # y/SEARCHLIST/REPLACEMENTLIST/cdsr
23849 # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
23850 # qr/STRING/msixpodual
23851 my %quote_modifiers = (
23852 's' => '[msixpodualgcer]',
23855 'm' => '[msixpodualgc]',
23856 'qr' => '[msixpodual]',
23863 # table showing how many quoted things to look for after quote operator..
23864 # s, y, tr have 2 (pattern and replacement)
23865 # others have 1 (pattern only)
23866 my %quote_items = (
23878 sub tokenize_this_line {
23880 # This routine breaks a line of perl code into tokens which are of use in
23881 # indentation and reformatting. One of my goals has been to define tokens
23882 # such that a newline may be inserted between any pair of tokens without
23883 # changing or invalidating the program. This version comes close to this,
23884 # although there are necessarily a few exceptions which must be caught by
23885 # the formatter. Many of these involve the treatment of bare words.
23887 # The tokens and their types are returned in arrays. See previous
23888 # routine for their names.
23890 # See also the array "valid_token_types" in the BEGIN section for an
23893 # To simplify things, token types are either a single character, or they
23894 # are identical to the tokens themselves.
23896 # As a debugging aid, the -D flag creates a file containing a side-by-side
23897 # comparison of the input string and its tokenization for each line of a file.
23898 # This is an invaluable debugging aid.
23900 # In addition to tokens, and some associated quantities, the tokenizer
23901 # also returns flags indication any special line types. These include
23902 # quotes, here_docs, formats.
23904 # -----------------------------------------------------------------------
23906 # How to add NEW_TOKENS:
23908 # New token types will undoubtedly be needed in the future both to keep up
23909 # with changes in perl and to help adapt the tokenizer to other applications.
23911 # Here are some notes on the minimal steps. I wrote these notes while
23912 # adding the 'v' token type for v-strings, which are things like version
23913 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You
23914 # can use your editor to search for the string "NEW_TOKENS" to find the
23915 # appropriate sections to change):
23917 # *. Try to talk somebody else into doing it! If not, ..
23919 # *. Make a backup of your current version in case things don't work out!
23921 # *. Think of a new, unused character for the token type, and add to
23922 # the array @valid_token_types in the BEGIN section of this package.
23923 # For example, I used 'v' for v-strings.
23925 # *. Implement coding to recognize the $type of the token in this routine.
23926 # This is the hardest part, and is best done by immitating or modifying
23927 # some of the existing coding. For example, to recognize v-strings, I
23928 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
23929 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
23931 # *. Update sub operator_expected. This update is critically important but
23932 # the coding is trivial. Look at the comments in that routine for help.
23933 # For v-strings, which should behave like numbers, I just added 'v' to the
23934 # regex used to handle numbers and strings (types 'n' and 'Q').
23936 # *. Implement a 'bond strength' rule in sub set_bond_strengths in
23937 # Perl::Tidy::Formatter for breaking lines around this token type. You can
23938 # skip this step and take the default at first, then adjust later to get
23939 # desired results. For adding type 'v', I looked at sub bond_strength and
23940 # saw that number type 'n' was using default strengths, so I didn't do
23941 # anything. I may tune it up someday if I don't like the way line
23942 # breaks with v-strings look.
23944 # *. Implement a 'whitespace' rule in sub set_white_space_flag in
23945 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
23946 # and saw that type 'n' used spaces on both sides, so I just added 'v'
23947 # to the array @spaces_both_sides.
23949 # *. Update HtmlWriter package so that users can colorize the token as
23950 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
23951 # that package. For v-strings, I initially chose to use a default color
23952 # equal to the default for numbers, but it might be nice to change that
23955 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
23957 # *. Run lots and lots of debug tests. Start with special files designed
23958 # to test the new token type. Run with the -D flag to create a .DEBUG
23959 # file which shows the tokenization. When these work ok, test as many old
23960 # scripts as possible. Start with all of the '.t' files in the 'test'
23961 # directory of the distribution file. Compare .tdy output with previous
23962 # version and updated version to see the differences. Then include as
23963 # many more files as possible. My own technique has been to collect a huge
23964 # number of perl scripts (thousands!) into one directory and run perltidy
23965 # *, then run diff between the output of the previous version and the
23968 # *. For another example, search for the smartmatch operator '~~'
23969 # with your editor to see where updates were made for it.
23971 # -----------------------------------------------------------------------
23973 my $line_of_tokens = shift;
23974 my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
23976 # patch while coding change is underway
23977 # make callers private data to allow access
23978 # $tokenizer_self = $caller_tokenizer_self;
23980 # extract line number for use in error messages
23981 $input_line_number = $line_of_tokens->{_line_number};
23983 # reinitialize for multi-line quote
23984 $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
23986 # check for pod documentation
23987 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
23989 # must not be in multi-line quote
23990 # and must not be in an eqn
23991 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
23993 $tokenizer_self->{_in_pod} = 1;
23998 $input_line = $untrimmed_input_line;
24002 # trim start of this line unless we are continuing a quoted line
24003 # do not trim end because we might end in a quote (test: deken4.pl)
24004 # Perl::Tidy::Formatter will delete needless trailing blanks
24005 unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
24006 $input_line =~ s/^\s*//; # trim left end
24009 # update the copy of the line for use in error messages
24010 # This must be exactly what we give the pre_tokenizer
24011 $tokenizer_self->{_line_text} = $input_line;
24013 # re-initialize for the main loop
24014 $routput_token_list = []; # stack of output token indexes
24015 $routput_token_type = []; # token types
24016 $routput_block_type = []; # types of code block
24017 $routput_container_type = []; # paren types, such as if, elsif, ..
24018 $routput_type_sequence = []; # nesting sequential number
24020 $rhere_target_list = [];
24022 $tok = $last_nonblank_token;
24023 $type = $last_nonblank_type;
24024 $prototype = $last_nonblank_prototype;
24025 $last_nonblank_i = -1;
24026 $block_type = $last_nonblank_block_type;
24027 $container_type = $last_nonblank_container_type;
24028 $type_sequence = $last_nonblank_type_sequence;
24032 # tokenization is done in two stages..
24033 # stage 1 is a very simple pre-tokenization
24034 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
24036 # a little optimization for a full-line comment
24037 if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
24038 $max_tokens_wanted = 1 # no use tokenizing a comment
24041 # start by breaking the line into pre-tokens
24042 ( $rtokens, $rtoken_map, $rtoken_type ) =
24043 pre_tokenize( $input_line, $max_tokens_wanted );
24045 $max_token_index = scalar(@$rtokens) - 1;
24046 push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
24047 push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced
24048 push( @$rtoken_type, 'b', 'b', 'b' );
24050 # initialize for main loop
24051 for $i ( 0 .. $max_token_index + 3 ) {
24052 $routput_token_type->[$i] = "";
24053 $routput_block_type->[$i] = "";
24054 $routput_container_type->[$i] = "";
24055 $routput_type_sequence->[$i] = "";
24056 $routput_indent_flag->[$i] = 0;
24061 # ------------------------------------------------------------
24062 # begin main tokenization loop
24063 # ------------------------------------------------------------
24065 # we are looking at each pre-token of one line and combining them
24067 while ( ++$i <= $max_token_index ) {
24069 if ($in_quote) { # continue looking for end of a quote
24070 $type = $quote_type;
24072 unless ( @{$routput_token_list} )
24073 { # initialize if continuation line
24074 push( @{$routput_token_list}, $i );
24075 $routput_token_type->[$i] = $type;
24078 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
24080 # scan for the end of the quote or pattern
24082 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
24083 $quoted_string_1, $quoted_string_2
24086 $i, $in_quote, $quote_character,
24087 $quote_pos, $quote_depth, $quoted_string_1,
24088 $quoted_string_2, $rtokens, $rtoken_map,
24092 # all done if we didn't find it
24093 last if ($in_quote);
24095 # save pattern and replacement text for rescanning
24096 my $qs1 = $quoted_string_1;
24097 my $qs2 = $quoted_string_2;
24099 # re-initialize for next search
24100 $quote_character = '';
24103 $quoted_string_1 = "";
24104 $quoted_string_2 = "";
24105 last if ( ++$i > $max_token_index );
24107 # look for any modifiers
24108 if ($allowed_quote_modifiers) {
24110 # check for exact quote modifiers
24111 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
24112 my $str = $$rtokens[$i];
24113 my $saw_modifier_e;
24114 while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
24115 my $pos = pos($str);
24116 my $char = substr( $str, $pos - 1, 1 );
24117 $saw_modifier_e ||= ( $char eq 'e' );
24120 # For an 'e' quote modifier we must scan the replacement
24121 # text for here-doc targets.
24122 if ($saw_modifier_e) {
24124 my $rht = scan_replacement_text($qs1);
24126 # Change type from 'Q' to 'h' for quotes with
24127 # here-doc targets so that the formatter (see sub
24128 # print_line_of_tokens) will not make any line
24129 # breaks after this point.
24131 push @{$rhere_target_list}, @{$rht};
24133 if ( $i_tok < 0 ) {
24134 my $ilast = $routput_token_list->[-1];
24135 $routput_token_type->[$ilast] = $type;
24140 if ( defined( pos($str) ) ) {
24143 if ( pos($str) == length($str) ) {
24144 last if ( ++$i > $max_token_index );
24147 # Looks like a joined quote modifier
24148 # and keyword, maybe something like
24149 # s/xxx/yyy/gefor @k=...
24150 # Example is "galgen.pl". Would have to split
24151 # the word and insert a new token in the
24152 # pre-token list. This is so rare that I haven't
24153 # done it. Will just issue a warning citation.
24155 # This error might also be triggered if my quote
24156 # modifier characters are incomplete
24160 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
24161 Please put a space between quote modifiers and trailing keywords.
24164 # print "token $$rtokens[$i]\n";
24165 # my $num = length($str) - pos($str);
24166 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
24167 # print "continuing with new token $$rtokens[$i]\n";
24169 # skipping past this token does least damage
24170 last if ( ++$i > $max_token_index );
24175 # example file: rokicki4.pl
24176 # This error might also be triggered if my quote
24177 # modifier characters are incomplete
24178 write_logfile_entry(
24179 "Note: found word $str at quote modifier location\n"
24185 $allowed_quote_modifiers = "";
24189 unless ( $tok =~ /^\s*$/ ) {
24191 # try to catch some common errors
24192 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
24194 if ( $last_nonblank_token eq 'eq' ) {
24195 complain("Should 'eq' be '==' here ?\n");
24197 elsif ( $last_nonblank_token eq 'ne' ) {
24198 complain("Should 'ne' be '!=' here ?\n");
24202 $last_last_nonblank_token = $last_nonblank_token;
24203 $last_last_nonblank_type = $last_nonblank_type;
24204 $last_last_nonblank_block_type = $last_nonblank_block_type;
24205 $last_last_nonblank_container_type =
24206 $last_nonblank_container_type;
24207 $last_last_nonblank_type_sequence =
24208 $last_nonblank_type_sequence;
24209 $last_nonblank_token = $tok;
24210 $last_nonblank_type = $type;
24211 $last_nonblank_prototype = $prototype;
24212 $last_nonblank_block_type = $block_type;
24213 $last_nonblank_container_type = $container_type;
24214 $last_nonblank_type_sequence = $type_sequence;
24215 $last_nonblank_i = $i_tok;
24218 # store previous token type
24219 if ( $i_tok >= 0 ) {
24220 $routput_token_type->[$i_tok] = $type;
24221 $routput_block_type->[$i_tok] = $block_type;
24222 $routput_container_type->[$i_tok] = $container_type;
24223 $routput_type_sequence->[$i_tok] = $type_sequence;
24224 $routput_indent_flag->[$i_tok] = $indent_flag;
24226 my $pre_tok = $$rtokens[$i]; # get the next pre-token
24227 my $pre_type = $$rtoken_type[$i]; # and type
24229 $type = $pre_type; # to be modified as necessary
24230 $block_type = ""; # blank for all tokens except code block braces
24231 $container_type = ""; # blank for all tokens except some parens
24232 $type_sequence = ""; # blank for all tokens except ?/:
24234 $prototype = ""; # blank for all tokens except user defined subs
24237 # this pre-token will start an output token
24238 push( @{$routput_token_list}, $i_tok );
24240 # continue gathering identifier if necessary
24241 # but do not start on blanks and comments
24242 if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
24244 if ( $id_scan_state =~ /^(sub|package)/ ) {
24251 last if ($id_scan_state);
24252 next if ( ( $i > 0 ) || $type );
24254 # didn't find any token; start over
24259 # handle whitespace tokens..
24260 next if ( $type eq 'b' );
24261 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
24262 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
24264 # Build larger tokens where possible, since we are not in a quote.
24266 # First try to assemble digraphs. The following tokens are
24267 # excluded and handled specially:
24268 # '/=' is excluded because the / might start a pattern.
24269 # 'x=' is excluded since it might be $x=, with $ on previous line
24270 # '**' and *= might be typeglobs of punctuation variables
24271 # I have allowed tokens starting with <, such as <=,
24272 # because I don't think these could be valid angle operators.
24273 # test file: storrs4.pl
24274 my $test_tok = $tok . $$rtokens[ $i + 1 ];
24275 my $combine_ok = $is_digraph{$test_tok};
24277 # check for special cases which cannot be combined
24280 # '//' must be defined_or operator if an operator is expected.
24281 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
24282 # could be migrated here for clarity
24283 if ( $test_tok eq '//' ) {
24284 my $next_type = $$rtokens[ $i + 1 ];
24286 operator_expected( $prev_type, $tok, $next_type );
24287 $combine_ok = 0 unless ( $expecting == OPERATOR );
24293 && ( $test_tok ne '/=' ) # might be pattern
24294 && ( $test_tok ne 'x=' ) # might be $x
24295 && ( $test_tok ne '**' ) # typeglob?
24296 && ( $test_tok ne '*=' ) # typeglob?
24302 # Now try to assemble trigraphs. Note that all possible
24303 # perl trigraphs can be constructed by appending a character
24305 $test_tok = $tok . $$rtokens[ $i + 1 ];
24307 if ( $is_trigraph{$test_tok} ) {
24314 $next_tok = $$rtokens[ $i + 1 ];
24315 $next_type = $$rtoken_type[ $i + 1 ];
24317 TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
24320 $last_nonblank_token, $tok,
24321 $next_tok, $brace_depth,
24322 $brace_type[$brace_depth], $paren_depth,
24323 $paren_type[$paren_depth]
24325 print "TOKENIZE:(@debug_list)\n";
24328 # turn off attribute list on first non-blank, non-bareword
24329 if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
24331 ###############################################################
24332 # We have the next token, $tok.
24333 # Now we have to examine this token and decide what it is
24334 # and define its $type
24336 # section 1: bare words
24337 ###############################################################
24339 if ( $pre_type eq 'w' ) {
24340 $expecting = operator_expected( $prev_type, $tok, $next_type );
24341 my ( $next_nonblank_token, $i_next ) =
24342 find_next_nonblank_token( $i, $rtokens, $max_token_index );
24344 # ATTRS: handle sub and variable attributes
24345 if ($in_attribute_list) {
24347 # treat bare word followed by open paren like qw(
24348 if ( $next_nonblank_token eq '(' ) {
24349 $in_quote = $quote_items{'q'};
24350 $allowed_quote_modifiers = $quote_modifiers{'q'};
24356 # handle bareword not followed by open paren
24363 # quote a word followed by => operator
24364 if ( $next_nonblank_token eq '=' ) {
24366 if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
24367 if ( $is_constant{$current_package}{$tok} ) {
24370 elsif ( $is_user_function{$current_package}{$tok} ) {
24373 $user_function_prototype{$current_package}{$tok};
24375 elsif ( $tok =~ /^v\d+$/ ) {
24377 report_v_string($tok);
24379 else { $type = 'w' }
24385 # quote a bare word within braces..like xxx->{s}; note that we
24386 # must be sure this is not a structural brace, to avoid
24387 # mistaking {s} in the following for a quoted bare word:
24388 # for(@[){s}bla}BLA}
24389 # Also treat q in something like var{-q} as a bare word, not qoute operator
24391 $next_nonblank_token eq '}'
24393 $last_nonblank_type eq 'L'
24394 || ( $last_nonblank_type eq 'm'
24395 && $last_last_nonblank_type eq 'L' )
24403 # a bare word immediately followed by :: is not a keyword;
24404 # use $tok_kw when testing for keywords to avoid a mistake
24406 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
24411 # handle operator x (now we know it isn't $x=)
24412 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
24413 if ( $tok eq 'x' ) {
24415 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=
24425 # FIXME: Patch: mark something like x4 as an integer for now
24426 # It gets fixed downstream. This is easier than
24427 # splitting the pretoken.
24433 elsif ( ( $tok eq 'strict' )
24434 and ( $last_nonblank_token eq 'use' ) )
24436 $tokenizer_self->{_saw_use_strict} = 1;
24437 scan_bare_identifier();
24440 elsif ( ( $tok eq 'warnings' )
24441 and ( $last_nonblank_token eq 'use' ) )
24443 $tokenizer_self->{_saw_perl_dash_w} = 1;
24445 # scan as identifier, so that we pick up something like:
24446 # use warnings::register
24447 scan_bare_identifier();
24451 $tok eq 'AutoLoader'
24452 && $tokenizer_self->{_look_for_autoloader}
24454 $last_nonblank_token eq 'use'
24456 # these regexes are from AutoSplit.pm, which we want
24458 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
24459 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
24463 write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
24464 $tokenizer_self->{_saw_autoloader} = 1;
24465 $tokenizer_self->{_look_for_autoloader} = 0;
24466 scan_bare_identifier();
24470 $tok eq 'SelfLoader'
24471 && $tokenizer_self->{_look_for_selfloader}
24472 && ( $last_nonblank_token eq 'use'
24473 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
24474 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
24477 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
24478 $tokenizer_self->{_saw_selfloader} = 1;
24479 $tokenizer_self->{_look_for_selfloader} = 0;
24480 scan_bare_identifier();
24483 elsif ( ( $tok eq 'constant' )
24484 and ( $last_nonblank_token eq 'use' ) )
24486 scan_bare_identifier();
24487 my ( $next_nonblank_token, $i_next ) =
24488 find_next_nonblank_token( $i, $rtokens,
24489 $max_token_index );
24491 if ($next_nonblank_token) {
24493 if ( $is_keyword{$next_nonblank_token} ) {
24495 # Assume qw is used as a quote and okay, as in:
24496 # use constant qw{ DEBUG 0 };
24497 # Not worth trying to parse for just a warning
24498 if ( $next_nonblank_token ne 'qw' ) {
24500 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
24505 # FIXME: could check for error in which next token is
24506 # not a word (number, punctuation, ..)
24508 $is_constant{$current_package}
24509 {$next_nonblank_token} = 1;
24514 # various quote operators
24515 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
24516 if ( $expecting == OPERATOR ) {
24518 # patch for paren-less for/foreach glitch, part 1
24519 # perl will accept this construct as valid:
24521 # foreach my $key qw\Uno Due Tres Quadro\ {
24522 # print "Set $key\n";
24524 unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
24526 error_if_expecting_OPERATOR();
24529 $in_quote = $quote_items{$tok};
24530 $allowed_quote_modifiers = $quote_modifiers{$tok};
24532 # All quote types are 'Q' except possibly qw quotes.
24533 # qw quotes are special in that they may generally be trimmed
24534 # of leading and trailing whitespace. So they are given a
24535 # separate type, 'q', unless requested otherwise.
24537 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
24540 $quote_type = $type;
24543 # check for a statement label
24545 ( $next_nonblank_token eq ':' )
24546 && ( $$rtokens[ $i_next + 1 ] ne ':' )
24547 && ( $i_next <= $max_token_index ) # colon on same line
24551 if ( $tok !~ /[A-Z]/ ) {
24552 push @{ $tokenizer_self->{_rlower_case_labels_at} },
24553 $input_line_number;
24561 # 'sub' || 'package'
24562 elsif ( $is_sub_package{$tok_kw} ) {
24563 error_if_expecting_OPERATOR()
24564 if ( $expecting == OPERATOR );
24568 # Note on token types for format, __DATA__, __END__:
24569 # It simplifies things to give these type ';', so that when we
24570 # start rescanning we will be expecting a token of type TERM.
24571 # We will switch to type 'k' before outputting the tokens.
24572 elsif ( $is_format_END_DATA{$tok_kw} ) {
24573 $type = ';'; # make tokenizer look for TERM next
24574 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
24578 elsif ( $is_keyword{$tok_kw} ) {
24581 # Since for and foreach may not be followed immediately
24582 # by an opening paren, we have to remember which keyword
24583 # is associated with the next '('
24584 if ( $is_for_foreach{$tok} ) {
24585 if ( new_statement_ok() ) {
24586 $want_paren = $tok;
24590 # recognize 'use' statements, which are special
24591 elsif ( $is_use_require{$tok} ) {
24592 $statement_type = $tok;
24593 error_if_expecting_OPERATOR()
24594 if ( $expecting == OPERATOR );
24597 # remember my and our to check for trailing ": shared"
24598 elsif ( $is_my_our{$tok} ) {
24599 $statement_type = $tok;
24602 # Check for misplaced 'elsif' and 'else', but allow isolated
24603 # else or elsif blocks to be formatted. This is indicated
24604 # by a last noblank token of ';'
24605 elsif ( $tok eq 'elsif' ) {
24606 if ( $last_nonblank_token ne ';'
24607 && $last_nonblank_block_type !~
24608 /^(if|elsif|unless)$/ )
24611 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
24615 elsif ( $tok eq 'else' ) {
24617 # patched for SWITCH/CASE
24618 if ( $last_nonblank_token ne ';'
24619 && $last_nonblank_block_type !~
24620 /^(if|elsif|unless|case|when)$/ )
24623 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
24627 elsif ( $tok eq 'continue' ) {
24628 if ( $last_nonblank_token ne ';'
24629 && $last_nonblank_block_type !~
24630 /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
24633 # note: ';' '{' and '}' in list above
24634 # because continues can follow bare blocks;
24635 # ':' is labeled block
24637 ############################################
24638 # NOTE: This check has been deactivated because
24639 # continue has an alternative usage for given/when
24640 # blocks in perl 5.10
24641 ## warning("'$tok' should follow a block\n");
24642 ############################################
24646 # patch for SWITCH/CASE if 'case' and 'when are
24647 # treated as keywords.
24648 elsif ( $tok eq 'when' || $tok eq 'case' ) {
24649 $statement_type = $tok; # next '{' is block
24653 # indent trailing if/unless/while/until
24654 # outdenting will be handled by later indentation loop
24655 ## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
24667 ## if ( $tok =~ /^(if|unless|while|until)$/
24668 ## && $next_nonblank_token ne '(' )
24670 ## $indent_flag = 1;
24674 # check for inline label following
24675 # /^(redo|last|next|goto)$/
24676 elsif (( $last_nonblank_type eq 'k' )
24677 && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
24683 # something else --
24686 scan_bare_identifier();
24687 if ( $type eq 'w' ) {
24689 if ( $expecting == OPERATOR ) {
24691 # don't complain about possible indirect object
24695 # sub new($) { ... }
24696 # $b = new A::; # calls A::new
24697 # $c = new A; # same thing but suspicious
24698 # This will call A::new but we have a 'new' in
24699 # main:: which looks like a constant.
24701 if ( $last_nonblank_type eq 'C' ) {
24702 if ( $tok !~ /::$/ ) {
24704 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
24705 Maybe indirectet object notation?
24710 error_if_expecting_OPERATOR("bareword");
24714 # mark bare words immediately followed by a paren as
24716 $next_tok = $$rtokens[ $i + 1 ];
24717 if ( $next_tok eq '(' ) {
24721 # underscore after file test operator is file handle
24722 if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
24726 # patch for SWITCH/CASE if 'case' and 'when are
24727 # not treated as keywords:
24731 && $brace_type[$brace_depth] eq 'switch'
24733 || ( $tok eq 'when'
24734 && $brace_type[$brace_depth] eq 'given' )
24737 $statement_type = $tok; # next '{' is block
24738 $type = 'k'; # for keyword syntax coloring
24741 # patch for SWITCH/CASE if switch and given not keywords
24742 # Switch is not a perl 5 keyword, but we will gamble
24743 # and mark switch followed by paren as a keyword. This
24744 # is only necessary to get html syntax coloring nice,
24745 # and does not commit this as being a switch/case.
24746 if ( $next_nonblank_token eq '('
24747 && ( $tok eq 'switch' || $tok eq 'given' ) )
24749 $type = 'k'; # for keyword syntax coloring
24755 ###############################################################
24756 # section 2: strings of digits
24757 ###############################################################
24758 elsif ( $pre_type eq 'd' ) {
24759 $expecting = operator_expected( $prev_type, $tok, $next_type );
24760 error_if_expecting_OPERATOR("Number")
24761 if ( $expecting == OPERATOR );
24762 my $number = scan_number();
24763 if ( !defined($number) ) {
24765 # shouldn't happen - we should always get a number
24766 warning("non-number beginning with digit--program bug\n");
24767 report_definite_bug();
24771 ###############################################################
24772 # section 3: all other tokens
24773 ###############################################################
24776 last if ( $tok eq '#' );
24777 my $code = $tokenization_code->{$tok};
24780 operator_expected( $prev_type, $tok, $next_type );
24787 # -----------------------------
24788 # end of main tokenization loop
24789 # -----------------------------
24791 if ( $i_tok >= 0 ) {
24792 $routput_token_type->[$i_tok] = $type;
24793 $routput_block_type->[$i_tok] = $block_type;
24794 $routput_container_type->[$i_tok] = $container_type;
24795 $routput_type_sequence->[$i_tok] = $type_sequence;
24796 $routput_indent_flag->[$i_tok] = $indent_flag;
24799 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
24800 $last_last_nonblank_token = $last_nonblank_token;
24801 $last_last_nonblank_type = $last_nonblank_type;
24802 $last_last_nonblank_block_type = $last_nonblank_block_type;
24803 $last_last_nonblank_container_type = $last_nonblank_container_type;
24804 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
24805 $last_nonblank_token = $tok;
24806 $last_nonblank_type = $type;
24807 $last_nonblank_block_type = $block_type;
24808 $last_nonblank_container_type = $container_type;
24809 $last_nonblank_type_sequence = $type_sequence;
24810 $last_nonblank_prototype = $prototype;
24813 # reset indentation level if necessary at a sub or package
24814 # in an attempt to recover from a nesting error
24815 if ( $level_in_tokenizer < 0 ) {
24816 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
24817 reset_indentation_level(0);
24818 brace_warning("resetting level to 0 at $1 $2\n");
24822 # all done tokenizing this line ...
24823 # now prepare the final list of tokens and types
24825 my @token_type = (); # stack of output token types
24826 my @block_type = (); # stack of output code block types
24827 my @container_type = (); # stack of output code container types
24828 my @type_sequence = (); # stack of output type sequence numbers
24829 my @tokens = (); # output tokens
24830 my @levels = (); # structural brace levels of output tokens
24831 my @slevels = (); # secondary nesting levels of output tokens
24832 my @nesting_tokens = (); # string of tokens leading to this depth
24833 my @nesting_types = (); # string of token types leading to this depth
24834 my @nesting_blocks = (); # string of block types leading to this depth
24835 my @nesting_lists = (); # string of list types leading to this depth
24836 my @ci_string = (); # string needed to compute continuation indentation
24837 my @container_environment = (); # BLOCK or LIST
24838 my $container_environment = '';
24839 my $im = -1; # previous $i value
24841 my $ci_string_sum = ones_count($ci_string_in_tokenizer);
24843 # Computing Token Indentation
24845 # The final section of the tokenizer forms tokens and also computes
24846 # parameters needed to find indentation. It is much easier to do it
24847 # in the tokenizer than elsewhere. Here is a brief description of how
24848 # indentation is computed. Perl::Tidy computes indentation as the sum
24851 # (1) structural indentation, such as if/else/elsif blocks
24852 # (2) continuation indentation, such as long parameter call lists.
24854 # These are occasionally called primary and secondary indentation.
24856 # Structural indentation is introduced by tokens of type '{', although
24857 # the actual tokens might be '{', '(', or '['. Structural indentation
24858 # is of two types: BLOCK and non-BLOCK. Default structural indentation
24859 # is 4 characters if the standard indentation scheme is used.
24861 # Continuation indentation is introduced whenever a line at BLOCK level
24862 # is broken before its termination. Default continuation indentation
24863 # is 2 characters in the standard indentation scheme.
24865 # Both types of indentation may be nested arbitrarily deep and
24866 # interlaced. The distinction between the two is somewhat arbitrary.
24868 # For each token, we will define two variables which would apply if
24869 # the current statement were broken just before that token, so that
24870 # that token started a new line:
24872 # $level = the structural indentation level,
24873 # $ci_level = the continuation indentation level
24875 # The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
24876 # assuming defaults. However, in some special cases it is customary
24877 # to modify $ci_level from this strict value.
24879 # The total structural indentation is easy to compute by adding and
24880 # subtracting 1 from a saved value as types '{' and '}' are seen. The
24881 # running value of this variable is $level_in_tokenizer.
24883 # The total continuation is much more difficult to compute, and requires
24884 # several variables. These veriables are:
24886 # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
24887 # each indentation level, if there are intervening open secondary
24888 # structures just prior to that level.
24889 # $continuation_string_in_tokenizer = a string of 1's and 0's indicating
24890 # if the last token at that level is "continued", meaning that it
24891 # is not the first token of an expression.
24892 # $nesting_block_string = a string of 1's and 0's indicating, for each
24893 # indentation level, if the level is of type BLOCK or not.
24894 # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
24895 # $nesting_list_string = a string of 1's and 0's indicating, for each
24896 # indentation level, if it is is appropriate for list formatting.
24897 # If so, continuation indentation is used to indent long list items.
24898 # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
24899 # @{$rslevel_stack} = a stack of total nesting depths at each
24900 # structural indentation level, where "total nesting depth" means
24901 # the nesting depth that would occur if every nesting token -- '{', '[',
24902 # and '(' -- , regardless of context, is used to compute a nesting
24905 #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
24906 #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
24908 my ( $ci_string_i, $level_i, $nesting_block_string_i,
24909 $nesting_list_string_i, $nesting_token_string_i,
24910 $nesting_type_string_i, );
24912 foreach $i ( @{$routput_token_list} )
24913 { # scan the list of pre-tokens indexes
24915 # self-checking for valid token types
24916 my $type = $routput_token_type->[$i];
24917 my $forced_indentation_flag = $routput_indent_flag->[$i];
24919 # See if we should undo the $forced_indentation_flag.
24920 # Forced indentation after 'if', 'unless', 'while' and 'until'
24921 # expressions without trailing parens is optional and doesn't
24922 # always look good. It is usually okay for a trailing logical
24923 # expression, but if the expression is a function call, code block,
24924 # or some kind of list it puts in an unwanted extra indentation
24925 # level which is hard to remove.
24927 # Example where extra indentation looks ok:
24929 # if $det_a < 0 and $det_b > 0
24930 # or $det_a > 0 and $det_b < 0;
24932 # Example where extra indentation is not needed because
24933 # the eval brace also provides indentation:
24934 # print "not " if defined eval {
24935 # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
24938 # The following rule works fairly well:
24939 # Undo the flag if the end of this line, or start of the next
24940 # line, is an opening container token or a comma.
24941 # This almost always works, but if not after another pass it will
24943 if ( $forced_indentation_flag && $type eq 'k' ) {
24945 my $ilast = $routput_token_list->[$ixlast];
24946 my $toklast = $routput_token_type->[$ilast];
24947 if ( $toklast eq '#' ) {
24949 $ilast = $routput_token_list->[$ixlast];
24950 $toklast = $routput_token_type->[$ilast];
24952 if ( $toklast eq 'b' ) {
24954 $ilast = $routput_token_list->[$ixlast];
24955 $toklast = $routput_token_type->[$ilast];
24957 if ( $toklast =~ /^[\{,]$/ ) {
24958 $forced_indentation_flag = 0;
24961 ( $toklast, my $i_next ) =
24962 find_next_nonblank_token( $max_token_index, $rtokens,
24963 $max_token_index );
24964 if ( $toklast =~ /^[\{,]$/ ) {
24965 $forced_indentation_flag = 0;
24970 # if we are already in an indented if, see if we should outdent
24971 if ($indented_if_level) {
24973 # don't try to nest trailing if's - shouldn't happen
24974 if ( $type eq 'k' ) {
24975 $forced_indentation_flag = 0;
24978 # check for the normal case - outdenting at next ';'
24979 elsif ( $type eq ';' ) {
24980 if ( $level_in_tokenizer == $indented_if_level ) {
24981 $forced_indentation_flag = -1;
24982 $indented_if_level = 0;
24986 # handle case of missing semicolon
24987 elsif ( $type eq '}' ) {
24988 if ( $level_in_tokenizer == $indented_if_level ) {
24989 $indented_if_level = 0;
24991 # TBD: This could be a subroutine call
24992 $level_in_tokenizer--;
24993 if ( @{$rslevel_stack} > 1 ) {
24994 pop( @{$rslevel_stack} );
24996 if ( length($nesting_block_string) > 1 )
24997 { # true for valid script
24998 chop $nesting_block_string;
24999 chop $nesting_list_string;
25006 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
25007 $level_i = $level_in_tokenizer;
25009 # This can happen by running perltidy on non-scripts
25010 # although it could also be bug introduced by programming change.
25011 # Perl silently accepts a 032 (^Z) and takes it as the end
25012 if ( !$is_valid_token_type{$type} ) {
25013 my $val = ord($type);
25015 "unexpected character decimal $val ($type) in script\n");
25016 $tokenizer_self->{_in_error} = 1;
25019 # ----------------------------------------------------------------
25020 # TOKEN TYPE PATCHES
25021 # output __END__, __DATA__, and format as type 'k' instead of ';'
25022 # to make html colors correct, etc.
25023 my $fix_type = $type;
25024 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
25026 # output anonymous 'sub' as keyword
25027 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
25029 # -----------------------------------------------------------------
25031 $nesting_token_string_i = $nesting_token_string;
25032 $nesting_type_string_i = $nesting_type_string;
25033 $nesting_block_string_i = $nesting_block_string;
25034 $nesting_list_string_i = $nesting_list_string;
25036 # set primary indentation levels based on structural braces
25037 # Note: these are set so that the leading braces have a HIGHER
25038 # level than their CONTENTS, which is convenient for indentation
25039 # Also, define continuation indentation for each token.
25040 if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
25043 # use environment before updating
25044 $container_environment =
25045 $nesting_block_flag ? 'BLOCK'
25046 : $nesting_list_flag ? 'LIST'
25049 # if the difference between total nesting levels is not 1,
25050 # there are intervening non-structural nesting types between
25051 # this '{' and the previous unclosed '{'
25052 my $intervening_secondary_structure = 0;
25053 if ( @{$rslevel_stack} ) {
25054 $intervening_secondary_structure =
25055 $slevel_in_tokenizer - $rslevel_stack->[-1];
25058 # Continuation Indentation
25060 # Having tried setting continuation indentation both in the formatter and
25061 # in the tokenizer, I can say that setting it in the tokenizer is much,
25062 # much easier. The formatter already has too much to do, and can't
25063 # make decisions on line breaks without knowing what 'ci' will be at
25064 # arbitrary locations.
25066 # But a problem with setting the continuation indentation (ci) here
25067 # in the tokenizer is that we do not know where line breaks will actually
25068 # be. As a result, we don't know if we should propagate continuation
25069 # indentation to higher levels of structure.
25071 # For nesting of only structural indentation, we never need to do this.
25072 # For example, in a long if statement, like this
25074 # if ( !$output_block_type[$i]
25075 # && ($in_statement_continuation) )
25080 # the second line has ci but we do normally give the lines within the BLOCK
25081 # any ci. This would be true if we had blocks nested arbitrarily deeply.
25083 # But consider something like this, where we have created a break after
25084 # an opening paren on line 1, and the paren is not (currently) a
25085 # structural indentation token:
25087 # my $file = $menubar->Menubutton(
25088 # qw/-text File -underline 0 -menuitems/ => [
25090 # Cascade => '~View',
25094 # The second line has ci, so it would seem reasonable to propagate it
25095 # down, giving the third line 1 ci + 1 indentation. This suggests the
25096 # following rule, which is currently used to propagating ci down: if there
25097 # are any non-structural opening parens (or brackets, or braces), before
25098 # an opening structural brace, then ci is propagated down, and otherwise
25099 # not. The variable $intervening_secondary_structure contains this
25100 # information for the current token, and the string
25101 # "$ci_string_in_tokenizer" is a stack of previous values of this
25104 # save the current states
25105 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
25106 $level_in_tokenizer++;
25108 if ($forced_indentation_flag) {
25110 # break BEFORE '?' when there is forced indentation
25111 if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
25112 if ( $type eq 'k' ) {
25113 $indented_if_level = $level_in_tokenizer;
25116 # do not change container environement here if we are not
25117 # at a real list. Adding this check prevents "blinkers"
25118 # often near 'unless" clauses, such as in the following
25123 ## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
25126 $nesting_block_string .= "$nesting_block_flag";
25130 if ( $routput_block_type->[$i] ) {
25131 $nesting_block_flag = 1;
25132 $nesting_block_string .= '1';
25135 $nesting_block_flag = 0;
25136 $nesting_block_string .= '0';
25140 # we will use continuation indentation within containers
25141 # which are not blocks and not logical expressions
25143 if ( !$routput_block_type->[$i] ) {
25145 # propagate flag down at nested open parens
25146 if ( $routput_container_type->[$i] eq '(' ) {
25147 $bit = 1 if $nesting_list_flag;
25150 # use list continuation if not a logical grouping
25151 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
25155 $is_logical_container{ $routput_container_type->[$i]
25159 $nesting_list_string .= $bit;
25160 $nesting_list_flag = $bit;
25162 $ci_string_in_tokenizer .=
25163 ( $intervening_secondary_structure != 0 ) ? '1' : '0';
25164 $ci_string_sum = ones_count($ci_string_in_tokenizer);
25165 $continuation_string_in_tokenizer .=
25166 ( $in_statement_continuation > 0 ) ? '1' : '0';
25168 # Sometimes we want to give an opening brace continuation indentation,
25169 # and sometimes not. For code blocks, we don't do it, so that the leading
25170 # '{' gets outdented, like this:
25172 # if ( !$output_block_type[$i]
25173 # && ($in_statement_continuation) )
25176 # For other types, we will give them continuation indentation. For example,
25177 # here is how a list looks with the opening paren indented:
25180 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
25181 # [ "homer", "marge", "bart" ], );
25183 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
25185 my $total_ci = $ci_string_sum;
25187 !$routput_block_type->[$i] # patch: skip for BLOCK
25188 && ($in_statement_continuation)
25189 && !( $forced_indentation_flag && $type eq ':' )
25192 $total_ci += $in_statement_continuation
25193 unless ( $ci_string_in_tokenizer =~ /1$/ );
25196 $ci_string_i = $total_ci;
25197 $in_statement_continuation = 0;
25200 elsif ($type eq '}'
25202 || $forced_indentation_flag < 0 )
25205 # only a nesting error in the script would prevent popping here
25206 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
25208 $level_i = --$level_in_tokenizer;
25210 # restore previous level values
25211 if ( length($nesting_block_string) > 1 )
25212 { # true for valid script
25213 chop $nesting_block_string;
25214 $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
25215 chop $nesting_list_string;
25216 $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
25218 chop $ci_string_in_tokenizer;
25219 $ci_string_sum = ones_count($ci_string_in_tokenizer);
25221 $in_statement_continuation =
25222 chop $continuation_string_in_tokenizer;
25224 # zero continuation flag at terminal BLOCK '}' which
25225 # ends a statement.
25226 if ( $routput_block_type->[$i] ) {
25228 # ...These include non-anonymous subs
25229 # note: could be sub ::abc { or sub 'abc
25230 if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
25232 # note: older versions of perl require the /gc modifier
25233 # here or else the \G does not work.
25234 if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
25236 $in_statement_continuation = 0;
25240 # ...and include all block types except user subs with
25241 # block prototypes and these: (sort|grep|map|do|eval)
25242 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
25244 $is_zero_continuation_block_type{
25245 $routput_block_type->[$i]
25248 $in_statement_continuation = 0;
25251 # ..but these are not terminal types:
25252 # /^(sort|grep|map|do|eval)$/ )
25254 $is_not_zero_continuation_block_type{
25255 $routput_block_type->[$i]
25260 # ..and a block introduced by a label
25261 # /^\w+\s*:$/gc ) {
25262 elsif ( $routput_block_type->[$i] =~ /:$/ ) {
25263 $in_statement_continuation = 0;
25266 # user function with block prototype
25268 $in_statement_continuation = 0;
25272 # If we are in a list, then
25273 # we must set continuatoin indentation at the closing
25274 # paren of something like this (paren after $check):
25277 # ( not defined $check )
25279 # or $check eq "new"
25280 # or $check eq "old",
25282 elsif ( $tok eq ')' ) {
25283 $in_statement_continuation = 1
25284 if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
25287 elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
25290 # use environment after updating
25291 $container_environment =
25292 $nesting_block_flag ? 'BLOCK'
25293 : $nesting_list_flag ? 'LIST'
25295 $ci_string_i = $ci_string_sum + $in_statement_continuation;
25296 $nesting_block_string_i = $nesting_block_string;
25297 $nesting_list_string_i = $nesting_list_string;
25300 # not a structural indentation type..
25303 $container_environment =
25304 $nesting_block_flag ? 'BLOCK'
25305 : $nesting_list_flag ? 'LIST'
25308 # zero the continuation indentation at certain tokens so
25309 # that they will be at the same level as its container. For
25310 # commas, this simplifies the -lp indentation logic, which
25311 # counts commas. For ?: it makes them stand out.
25312 if ($nesting_list_flag) {
25313 if ( $type =~ /^[,\?\:]$/ ) {
25314 $in_statement_continuation = 0;
25318 # be sure binary operators get continuation indentation
25320 $container_environment
25321 && ( $type eq 'k' && $is_binary_keyword{$tok}
25322 || $is_binary_type{$type} )
25325 $in_statement_continuation = 1;
25328 # continuation indentation is sum of any open ci from previous
25329 # levels plus the current level
25330 $ci_string_i = $ci_string_sum + $in_statement_continuation;
25332 # update continuation flag ...
25333 # if this isn't a blank or comment..
25334 if ( $type ne 'b' && $type ne '#' ) {
25336 # and we are in a BLOCK
25337 if ($nesting_block_flag) {
25339 # the next token after a ';' and label starts a new stmt
25340 if ( $type eq ';' || $type eq 'J' ) {
25341 $in_statement_continuation = 0;
25344 # otherwise, we are continuing the current statement
25346 $in_statement_continuation = 1;
25350 # if we are not in a BLOCK..
25353 # do not use continuation indentation if not list
25354 # environment (could be within if/elsif clause)
25355 if ( !$nesting_list_flag ) {
25356 $in_statement_continuation = 0;
25359 # otherwise, the next token after a ',' starts a new term
25360 elsif ( $type eq ',' ) {
25361 $in_statement_continuation = 0;
25364 # otherwise, we are continuing the current term
25366 $in_statement_continuation = 1;
25372 if ( $level_in_tokenizer < 0 ) {
25373 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
25374 $tokenizer_self->{_saw_negative_indentation} = 1;
25375 warning("Starting negative indentation\n");
25379 # set secondary nesting levels based on all continment token types
25380 # Note: these are set so that the nesting depth is the depth
25381 # of the PREVIOUS TOKEN, which is convenient for setting
25382 # the stength of token bonds
25383 my $slevel_i = $slevel_in_tokenizer;
25386 if ( $is_opening_type{$type} ) {
25387 $slevel_in_tokenizer++;
25388 $nesting_token_string .= $tok;
25389 $nesting_type_string .= $type;
25393 elsif ( $is_closing_type{$type} ) {
25394 $slevel_in_tokenizer--;
25395 my $char = chop $nesting_token_string;
25397 if ( $char ne $matching_start_token{$tok} ) {
25398 $nesting_token_string .= $char . $tok;
25399 $nesting_type_string .= $type;
25402 chop $nesting_type_string;
25406 push( @block_type, $routput_block_type->[$i] );
25407 push( @ci_string, $ci_string_i );
25408 push( @container_environment, $container_environment );
25409 push( @container_type, $routput_container_type->[$i] );
25410 push( @levels, $level_i );
25411 push( @nesting_tokens, $nesting_token_string_i );
25412 push( @nesting_types, $nesting_type_string_i );
25413 push( @slevels, $slevel_i );
25414 push( @token_type, $fix_type );
25415 push( @type_sequence, $routput_type_sequence->[$i] );
25416 push( @nesting_blocks, $nesting_block_string );
25417 push( @nesting_lists, $nesting_list_string );
25419 # now form the previous token
25422 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters
25426 substr( $input_line, $$rtoken_map[$im], $num ) );
25432 $num = length($input_line) - $$rtoken_map[$im]; # make the last token
25434 push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
25437 $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
25438 $tokenizer_self->{_in_quote} = $in_quote;
25439 $tokenizer_self->{_quote_target} =
25440 $in_quote ? matching_end_token($quote_character) : "";
25441 $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
25443 $line_of_tokens->{_rtoken_type} = \@token_type;
25444 $line_of_tokens->{_rtokens} = \@tokens;
25445 $line_of_tokens->{_rblock_type} = \@block_type;
25446 $line_of_tokens->{_rcontainer_type} = \@container_type;
25447 $line_of_tokens->{_rcontainer_environment} = \@container_environment;
25448 $line_of_tokens->{_rtype_sequence} = \@type_sequence;
25449 $line_of_tokens->{_rlevels} = \@levels;
25450 $line_of_tokens->{_rslevels} = \@slevels;
25451 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
25452 $line_of_tokens->{_rci_levels} = \@ci_string;
25453 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
25457 } # end tokenize_this_line
25459 #########i#############################################################
25460 # Tokenizer routines which assist in identifying token types
25461 #######################################################################
25463 sub operator_expected {
25465 # Many perl symbols have two or more meanings. For example, '<<'
25466 # can be a shift operator or a here-doc operator. The
25467 # interpretation of these symbols depends on the current state of
25468 # the tokenizer, which may either be expecting a term or an
25469 # operator. For this example, a << would be a shift if an operator
25470 # is expected, and a here-doc if a term is expected. This routine
25471 # is called to make this decision for any current token. It returns
25472 # one of three possible values:
25474 # OPERATOR - operator expected (or at least, not a term)
25475 # UNKNOWN - can't tell
25476 # TERM - a term is expected (or at least, not an operator)
25478 # The decision is based on what has been seen so far. This
25479 # information is stored in the "$last_nonblank_type" and
25480 # "$last_nonblank_token" variables. For example, if the
25481 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
25482 # if $last_nonblank_type is 'n' (numeric), we are expecting an
25485 # If a UNKNOWN is returned, the calling routine must guess. A major
25486 # goal of this tokenizer is to minimize the possiblity of returning
25487 # UNKNOWN, because a wrong guess can spoil the formatting of a
25490 # adding NEW_TOKENS: it is critically important that this routine be
25491 # updated to allow it to determine if an operator or term is to be
25492 # expected after the new token. Doing this simply involves adding
25493 # the new token character to one of the regexes in this routine or
25494 # to one of the hash lists
25495 # that it uses, which are initialized in the BEGIN section.
25496 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
25499 my ( $prev_type, $tok, $next_type ) = @_;
25501 my $op_expected = UNKNOWN;
25503 #print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
25505 # Note: function prototype is available for token type 'U' for future
25506 # program development. It contains the leading and trailing parens,
25507 # and no blanks. It might be used to eliminate token type 'C', for
25508 # example (prototype = '()'). Thus:
25509 # if ($last_nonblank_type eq 'U') {
25510 # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
25513 # A possible filehandle (or object) requires some care...
25514 if ( $last_nonblank_type eq 'Z' ) {
25517 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
25518 $op_expected = UNKNOWN;
25521 # For possible file handle like "$a", Perl uses weird parsing rules.
25523 # print $a/2,"/hi"; - division
25524 # print $a / 2,"/hi"; - division
25525 # print $a/ 2,"/hi"; - division
25526 # print $a /2,"/hi"; - pattern (and error)!
25527 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
25528 $op_expected = TERM;
25531 # Note when an operation is being done where a
25532 # filehandle might be expected, since a change in whitespace
25533 # could change the interpretation of the statement.
25535 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
25536 complain("operator in print statement not recommended\n");
25537 $op_expected = OPERATOR;
25542 # handle something after 'do' and 'eval'
25543 elsif ( $is_block_operator{$last_nonblank_token} ) {
25545 # something like $a = eval "expression";
25547 if ( $last_nonblank_type eq 'k' ) {
25548 $op_expected = TERM; # expression or list mode following keyword
25551 # something like $a = do { BLOCK } / 2;
25554 $op_expected = OPERATOR; # block mode following }
25558 # handle bare word..
25559 elsif ( $last_nonblank_type eq 'w' ) {
25561 # unfortunately, we can't tell what type of token to expect next
25562 # after most bare words
25563 $op_expected = UNKNOWN;
25566 # operator, but not term possible after these types
25567 # Note: moved ')' from type to token because parens in list context
25568 # get marked as '{' '}' now. This is a minor glitch in the following:
25569 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
25571 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
25572 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
25574 $op_expected = OPERATOR;
25576 # in a 'use' statement, numbers and v-strings are not true
25577 # numbers, so to avoid incorrect error messages, we will
25578 # mark them as unknown for now (use.t)
25579 # TODO: it would be much nicer to create a new token V for VERSION
25580 # number in a use statement. Then this could be a check on type V
25581 # and related patches which change $statement_type for '=>'
25582 # and ',' could be removed. Further, it would clean things up to
25583 # scan the 'use' statement with a separate subroutine.
25584 if ( ( $statement_type eq 'use' )
25585 && ( $last_nonblank_type =~ /^[nv]$/ ) )
25587 $op_expected = UNKNOWN;
25591 # no operator after many keywords, such as "die", "warn", etc
25592 elsif ( $expecting_term_token{$last_nonblank_token} ) {
25594 # patch for dor.t (defined or).
25595 # perl functions which may be unary operators
25596 # TODO: This list is incomplete, and these should be put
25599 && $next_type eq '/'
25600 && $last_nonblank_type eq 'k'
25601 && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
25603 $op_expected = OPERATOR;
25606 $op_expected = TERM;
25610 # no operator after things like + - ** (i.e., other operators)
25611 elsif ( $expecting_term_types{$last_nonblank_type} ) {
25612 $op_expected = TERM;
25615 # a few operators, like "time", have an empty prototype () and so
25616 # take no parameters but produce a value to operate on
25617 elsif ( $expecting_operator_token{$last_nonblank_token} ) {
25618 $op_expected = OPERATOR;
25621 # post-increment and decrement produce values to be operated on
25622 elsif ( $expecting_operator_types{$last_nonblank_type} ) {
25623 $op_expected = OPERATOR;
25626 # no value to operate on after sub block
25627 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
25629 # a right brace here indicates the end of a simple block.
25630 # all non-structural right braces have type 'R'
25631 # all braces associated with block operator keywords have been given those
25632 # keywords as "last_nonblank_token" and caught above.
25633 # (This statement is order dependent, and must come after checking
25634 # $last_nonblank_token).
25635 elsif ( $last_nonblank_type eq '}' ) {
25637 # patch for dor.t (defined or).
25639 && $next_type eq '/'
25640 && $last_nonblank_token eq ']' )
25642 $op_expected = OPERATOR;
25645 $op_expected = TERM;
25649 # something else..what did I forget?
25652 # collecting diagnostics on unknown operator types..see what was missed
25653 $op_expected = UNKNOWN;
25655 "OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
25659 TOKENIZER_DEBUG_FLAG_EXPECT && do {
25661 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
25663 return $op_expected;
25666 sub new_statement_ok {
25668 # return true if the current token can start a new statement
25669 # USES GLOBAL VARIABLES: $last_nonblank_type
25671 return label_ok() # a label would be ok here
25673 || $last_nonblank_type eq 'J'; # or we follow a label
25679 # Decide if a bare word followed by a colon here is a label
25680 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
25681 # $brace_depth, @brace_type
25683 # if it follows an opening or closing code block curly brace..
25684 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
25685 && $last_nonblank_type eq $last_nonblank_token )
25688 # it is a label if and only if the curly encloses a code block
25689 return $brace_type[$brace_depth];
25692 # otherwise, it is a label if and only if it follows a ';'
25695 return ( $last_nonblank_type eq ';' );
25699 sub code_block_type {
25701 # Decide if this is a block of code, and its type.
25702 # Must be called only when $type = $token = '{'
25703 # The problem is to distinguish between the start of a block of code
25704 # and the start of an anonymous hash reference
25705 # Returns "" if not code block, otherwise returns 'last_nonblank_token'
25706 # to indicate the type of code block. (For example, 'last_nonblank_token'
25707 # might be 'if' for an if block, 'else' for an else block, etc).
25708 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
25709 # $last_nonblank_block_type, $brace_depth, @brace_type
25711 # handle case of multiple '{'s
25713 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
25715 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
25716 if ( $last_nonblank_token eq '{'
25717 && $last_nonblank_type eq $last_nonblank_token )
25720 # opening brace where a statement may appear is probably
25721 # a code block but might be and anonymous hash reference
25722 if ( $brace_type[$brace_depth] ) {
25723 return decide_if_code_block( $i, $rtokens, $rtoken_type,
25724 $max_token_index );
25727 # cannot start a code block within an anonymous hash
25733 elsif ( $last_nonblank_token eq ';' ) {
25735 # an opening brace where a statement may appear is probably
25736 # a code block but might be and anonymous hash reference
25737 return decide_if_code_block( $i, $rtokens, $rtoken_type,
25738 $max_token_index );
25741 # handle case of '}{'
25742 elsif ($last_nonblank_token eq '}'
25743 && $last_nonblank_type eq $last_nonblank_token )
25746 # a } { situation ...
25747 # could be hash reference after code block..(blktype1.t)
25748 if ($last_nonblank_block_type) {
25749 return decide_if_code_block( $i, $rtokens, $rtoken_type,
25750 $max_token_index );
25753 # must be a block if it follows a closing hash reference
25755 return $last_nonblank_token;
25759 # NOTE: braces after type characters start code blocks, but for
25760 # simplicity these are not identified as such. See also
25761 # sub is_non_structural_brace.
25762 # elsif ( $last_nonblank_type eq 't' ) {
25763 # return $last_nonblank_token;
25766 # brace after label:
25767 elsif ( $last_nonblank_type eq 'J' ) {
25768 return $last_nonblank_token;
25771 # otherwise, look at previous token. This must be a code block if
25772 # it follows any of these:
25773 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
25774 elsif ( $is_code_block_token{$last_nonblank_token} ) {
25776 # Bug Patch: Note that the opening brace after the 'if' in the following
25777 # snippet is an anonymous hash ref and not a code block!
25778 # print 'hi' if { x => 1, }->{x};
25779 # We can identify this situation because the last nonblank type
25780 # will be a keyword (instead of a closing peren)
25781 if ( $last_nonblank_token =~ /^(if|unless)$/
25782 && $last_nonblank_type eq 'k' )
25787 return $last_nonblank_token;
25791 # or a sub definition
25792 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
25793 && $last_nonblank_token =~ /^(sub|package)\b/ )
25795 return $last_nonblank_token;
25798 # user-defined subs with block parameters (like grep/map/eval)
25799 elsif ( $last_nonblank_type eq 'G' ) {
25800 return $last_nonblank_token;
25804 elsif ( $last_nonblank_type eq 'w' ) {
25805 return decide_if_code_block( $i, $rtokens, $rtoken_type,
25806 $max_token_index );
25809 # anything else must be anonymous hash reference
25815 sub decide_if_code_block {
25817 # USES GLOBAL VARIABLES: $last_nonblank_token
25818 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
25819 my ( $next_nonblank_token, $i_next ) =
25820 find_next_nonblank_token( $i, $rtokens, $max_token_index );
25822 # we are at a '{' where a statement may appear.
25823 # We must decide if this brace starts an anonymous hash or a code
25825 # return "" if anonymous hash, and $last_nonblank_token otherwise
25827 # initialize to be code BLOCK
25828 my $code_block_type = $last_nonblank_token;
25830 # Check for the common case of an empty anonymous hash reference:
25831 # Maybe something like sub { { } }
25832 if ( $next_nonblank_token eq '}' ) {
25833 $code_block_type = "";
25838 # To guess if this '{' is an anonymous hash reference, look ahead
25839 # and test as follows:
25841 # it is a hash reference if next come:
25842 # - a string or digit followed by a comma or =>
25843 # - bareword followed by =>
25844 # otherwise it is a code block
25846 # Examples of anonymous hash ref:
25850 # Examples of code blocks:
25851 # {1; print "hello\n", 1;}
25854 # We are only going to look ahead one more (nonblank/comment) line.
25855 # Strange formatting could cause a bad guess, but that's unlikely.
25856 my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
25857 my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
25858 my ( $rpre_tokens, $rpre_types ) =
25859 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
25860 # generous, and prevents
25862 # time in mangled files
25863 if ( defined($rpre_types) && @$rpre_types ) {
25864 push @pre_types, @$rpre_types;
25865 push @pre_tokens, @$rpre_tokens;
25868 # put a sentinal token to simplify stopping the search
25869 push @pre_types, '}';
25872 $jbeg = 1 if $pre_types[0] eq 'b';
25874 # first look for one of these
25876 # - bareword with leading -
25880 if ( $pre_types[$j] =~ /^[\'\"]/ ) {
25882 # find the closing quote; don't worry about escapes
25883 my $quote_mark = $pre_types[$j];
25884 for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
25885 if ( $pre_types[$k] eq $quote_mark ) {
25887 my $next = $pre_types[$j];
25892 elsif ( $pre_types[$j] eq 'd' ) {
25895 elsif ( $pre_types[$j] eq 'w' ) {
25896 unless ( $is_keyword{ $pre_tokens[$j] } ) {
25900 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
25903 if ( $j > $jbeg ) {
25905 $j++ if $pre_types[$j] eq 'b';
25907 # it's a hash ref if a comma or => follow next
25908 if ( $pre_types[$j] eq ','
25909 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
25911 $code_block_type = "";
25916 return $code_block_type;
25921 # report unexpected token type and show where it is
25922 # USES GLOBAL VARIABLES: $tokenizer_self
25923 my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
25924 $rpretoken_type, $input_line )
25927 if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
25928 my $msg = "found $found where $expecting expected";
25929 my $pos = $$rpretoken_map[$i_tok];
25930 interrupt_logfile();
25931 my $input_line_number = $tokenizer_self->{_last_line_number};
25932 my ( $offset, $numbered_line, $underline ) =
25933 make_numbered_line( $input_line_number, $input_line, $pos );
25934 $underline = write_on_underline( $underline, $pos - $offset, '^' );
25937 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
25938 my $pos_prev = $$rpretoken_map[$last_nonblank_i];
25940 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
25941 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
25944 $num = $pos - $pos_prev;
25946 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
25949 write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
25950 $trailer = " (previous token underlined)";
25952 warning( $numbered_line . "\n" );
25953 warning( $underline . "\n" );
25954 warning( $msg . $trailer . "\n" );
25959 sub is_non_structural_brace {
25961 # Decide if a brace or bracket is structural or non-structural
25962 # by looking at the previous token and type
25963 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
25965 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
25966 # Tentatively deactivated because it caused the wrong operator expectation
25968 # $user = @vars[1] / 100;
25969 # Must update sub operator_expected before re-implementing.
25970 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
25974 # NOTE: braces after type characters start code blocks, but for
25975 # simplicity these are not identified as such. See also
25976 # sub code_block_type
25977 # if ($last_nonblank_type eq 't') {return 0}
25979 # otherwise, it is non-structural if it is decorated
25980 # by type information.
25981 # For example, the '{' here is non-structural: ${xxx}
25983 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
25985 # or if we follow a hash or array closing curly brace or bracket
25986 # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
25987 # because the first '}' would have been given type 'R'
25988 || $last_nonblank_type =~ /^([R\]])$/
25992 #########i#############################################################
25993 # Tokenizer routines for tracking container nesting depths
25994 #######################################################################
25996 # The following routines keep track of nesting depths of the nesting
25997 # types, ( [ { and ?. This is necessary for determining the indentation
25998 # level, and also for debugging programs. Not only do they keep track of
25999 # nesting depths of the individual brace types, but they check that each
26000 # of the other brace types is balanced within matching pairs. For
26001 # example, if the program sees this sequence:
26005 # then it can determine that there is an extra left paren somewhere
26006 # between the { and the }. And so on with every other possible
26007 # combination of outer and inner brace types. For another
26012 # which has an extra ] within the parens.
26014 # The brace types have indexes 0 .. 3 which are indexes into
26017 # The pair ? : are treated as just another nesting type, with ? acting
26018 # as the opening brace and : acting as the closing brace.
26022 # $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
26024 # saves the nesting depth of brace type $b (where $b is either of the other
26025 # nesting types) when brace type $a enters a new depth. When this depth
26026 # decreases, a check is made that the current depth of brace types $b is
26027 # unchanged, or otherwise there must have been an error. This can
26028 # be very useful for localizing errors, particularly when perl runs to
26029 # the end of a large file (such as this one) and announces that there
26030 # is a problem somewhere.
26032 # A numerical sequence number is maintained for every nesting type,
26033 # so that each matching pair can be uniquely identified in a simple
26036 sub increase_nesting_depth {
26037 my ( $aa, $pos ) = @_;
26039 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
26040 # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
26043 $current_depth[$aa]++;
26045 $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
26046 my $input_line_number = $tokenizer_self->{_last_line_number};
26047 my $input_line = $tokenizer_self->{_line_text};
26049 # Sequence numbers increment by number of items. This keeps
26050 # a unique set of numbers but still allows the relative location
26051 # of any type to be determined.
26052 $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
26053 my $seqno = $nesting_sequence_number[$aa];
26054 $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
26056 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
26057 [ $input_line_number, $input_line, $pos ];
26059 for $bb ( 0 .. $#closing_brace_names ) {
26060 next if ( $bb == $aa );
26061 $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
26064 # set a flag for indenting a nested ternary statement
26066 if ( $aa == QUESTION_COLON ) {
26067 $nested_ternary_flag[ $current_depth[$aa] ] = 0;
26068 if ( $current_depth[$aa] > 1 ) {
26069 if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
26070 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
26071 if ( $pdepth == $total_depth - 1 ) {
26073 $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
26078 $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
26079 $statement_type = "";
26080 return ( $seqno, $indent );
26083 sub decrease_nesting_depth {
26085 my ( $aa, $pos ) = @_;
26087 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
26088 # @current_sequence_number, @depth_array, @starting_line_of_current_depth
26092 my $input_line_number = $tokenizer_self->{_last_line_number};
26093 my $input_line = $tokenizer_self->{_line_text};
26097 if ( $current_depth[$aa] > 0 ) {
26099 # set a flag for un-indenting after seeing a nested ternary statement
26100 $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
26101 if ( $aa == QUESTION_COLON ) {
26102 $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
26104 $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
26106 # check that any brace types $bb contained within are balanced
26107 for $bb ( 0 .. $#closing_brace_names ) {
26108 next if ( $bb == $aa );
26110 unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
26111 $current_depth[$bb] )
26114 $current_depth[$bb] -
26115 $depth_array[$aa][$bb][ $current_depth[$aa] ];
26117 # don't whine too many times
26118 my $saw_brace_error = get_saw_brace_error();
26120 $saw_brace_error <= MAX_NAG_MESSAGES
26122 # if too many closing types have occured, we probably
26123 # already caught this error
26124 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
26127 interrupt_logfile();
26129 $starting_line_of_current_depth[$aa]
26130 [ $current_depth[$aa] ];
26132 my $rel = [ $input_line_number, $input_line, $pos ];
26136 if ( $diff == 1 || $diff == -1 ) {
26144 ? $opening_brace_names[$bb]
26145 : $closing_brace_names[$bb];
26146 write_error_indicator_pair( @$rsl, '^' );
26148 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
26153 $starting_line_of_current_depth[$bb]
26154 [ $current_depth[$bb] ];
26157 " The most recent un-matched $bname is on line $ml\n";
26158 write_error_indicator_pair( @$rml, '^' );
26160 write_error_indicator_pair( @$rel, '^' );
26164 increment_brace_error();
26167 $current_depth[$aa]--;
26171 my $saw_brace_error = get_saw_brace_error();
26172 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
26174 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
26176 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
26178 increment_brace_error();
26180 return ( $seqno, $outdent );
26183 sub check_final_nesting_depths {
26186 # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
26188 for $aa ( 0 .. $#closing_brace_names ) {
26190 if ( $current_depth[$aa] ) {
26192 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
26195 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
26196 The most recent un-matched $opening_brace_names[$aa] is on line $sl
26198 indicate_error( $msg, @$rsl, '^' );
26199 increment_brace_error();
26204 #########i#############################################################
26205 # Tokenizer routines for looking ahead in input stream
26206 #######################################################################
26208 sub peek_ahead_for_n_nonblank_pre_tokens {
26210 # returns next n pretokens if they exist
26211 # returns undef's if hits eof without seeing any pretokens
26212 # USES GLOBAL VARIABLES: $tokenizer_self
26213 my $max_pretokens = shift;
26216 my ( $rpre_tokens, $rmap, $rpre_types );
26218 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
26220 $line =~ s/^\s*//; # trim leading blanks
26221 next if ( length($line) <= 0 ); # skip blank
26222 next if ( $line =~ /^#/ ); # skip comment
26223 ( $rpre_tokens, $rmap, $rpre_types ) =
26224 pre_tokenize( $line, $max_pretokens );
26227 return ( $rpre_tokens, $rpre_types );
26230 # look ahead for next non-blank, non-comment line of code
26231 sub peek_ahead_for_nonblank_token {
26233 # USES GLOBAL VARIABLES: $tokenizer_self
26234 my ( $rtokens, $max_token_index ) = @_;
26238 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
26240 $line =~ s/^\s*//; # trim leading blanks
26241 next if ( length($line) <= 0 ); # skip blank
26242 next if ( $line =~ /^#/ ); # skip comment
26243 my ( $rtok, $rmap, $rtype ) =
26244 pre_tokenize( $line, 2 ); # only need 2 pre-tokens
26245 my $j = $max_token_index + 1;
26248 foreach $tok (@$rtok) {
26249 last if ( $tok =~ "\n" );
26250 $$rtokens[ ++$j ] = $tok;
26257 #########i#############################################################
26258 # Tokenizer guessing routines for ambiguous situations
26259 #######################################################################
26261 sub guess_if_pattern_or_conditional {
26263 # this routine is called when we have encountered a ? following an
26264 # unknown bareword, and we must decide if it starts a pattern or not
26265 # input parameters:
26266 # $i - token index of the ? starting possible pattern
26267 # output parameters:
26268 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
26269 # msg = a warning or diagnostic message
26270 # USES GLOBAL VARIABLES: $last_nonblank_token
26271 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
26272 my $is_pattern = 0;
26273 my $msg = "guessing that ? after $last_nonblank_token starts a ";
26275 if ( $i >= $max_token_index ) {
26276 $msg .= "conditional (no end to pattern found on the line)\n";
26281 my $next_token = $$rtokens[$i]; # first token after ?
26283 # look for a possible ending ? on this line..
26285 my $quote_depth = 0;
26286 my $quote_character = '';
26290 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
26293 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
26294 $quote_pos, $quote_depth, $max_token_index );
26298 # we didn't find an ending ? on this line,
26299 # so we bias towards conditional
26301 $msg .= "conditional (no ending ? on this line)\n";
26303 # we found an ending ?, so we bias towards a pattern
26307 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
26309 $msg .= "pattern (found ending ? and pattern expected)\n";
26312 $msg .= "pattern (uncertain, but found ending ?)\n";
26316 return ( $is_pattern, $msg );
26319 sub guess_if_pattern_or_division {
26321 # this routine is called when we have encountered a / following an
26322 # unknown bareword, and we must decide if it starts a pattern or is a
26324 # input parameters:
26325 # $i - token index of the / starting possible pattern
26326 # output parameters:
26327 # $is_pattern = 0 if probably division, =1 if probably a pattern
26328 # msg = a warning or diagnostic message
26329 # USES GLOBAL VARIABLES: $last_nonblank_token
26330 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
26331 my $is_pattern = 0;
26332 my $msg = "guessing that / after $last_nonblank_token starts a ";
26334 if ( $i >= $max_token_index ) {
26335 "division (no end to pattern found on the line)\n";
26339 my $divide_expected =
26340 numerator_expected( $i, $rtokens, $max_token_index );
26342 my $next_token = $$rtokens[$i]; # first token after slash
26344 # look for a possible ending / on this line..
26346 my $quote_depth = 0;
26347 my $quote_character = '';
26351 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
26354 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
26355 $quote_pos, $quote_depth, $max_token_index );
26359 # we didn't find an ending / on this line,
26360 # so we bias towards division
26361 if ( $divide_expected >= 0 ) {
26363 $msg .= "division (no ending / on this line)\n";
26366 $msg = "multi-line pattern (division not possible)\n";
26372 # we found an ending /, so we bias towards a pattern
26375 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
26377 if ( $divide_expected >= 0 ) {
26379 if ( $i - $ibeg > 60 ) {
26380 $msg .= "division (matching / too distant)\n";
26384 $msg .= "pattern (but division possible too)\n";
26390 $msg .= "pattern (division not possible)\n";
26395 if ( $divide_expected >= 0 ) {
26397 $msg .= "division (pattern not possible)\n";
26402 "pattern (uncertain, but division would not work here)\n";
26407 return ( $is_pattern, $msg );
26410 # try to resolve here-doc vs. shift by looking ahead for
26411 # non-code or the end token (currently only looks for end token)
26412 # returns 1 if it is probably a here doc, 0 if not
26413 sub guess_if_here_doc {
26415 # This is how many lines we will search for a target as part of the
26416 # guessing strategy. It is a constant because there is probably
26417 # little reason to change it.
26418 # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
26420 use constant HERE_DOC_WINDOW => 40;
26422 my $next_token = shift;
26423 my $here_doc_expected = 0;
26426 my $msg = "checking <<";
26428 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
26432 if ( $line =~ /^$next_token$/ ) {
26433 $msg .= " -- found target $next_token ahead $k lines\n";
26434 $here_doc_expected = 1; # got it
26437 last if ( $k >= HERE_DOC_WINDOW );
26440 unless ($here_doc_expected) {
26442 if ( !defined($line) ) {
26443 $here_doc_expected = -1; # hit eof without seeing target
26444 $msg .= " -- must be shift; target $next_token not in file\n";
26447 else { # still unsure..taking a wild guess
26449 if ( !$is_constant{$current_package}{$next_token} ) {
26450 $here_doc_expected = 1;
26452 " -- guessing it's a here-doc ($next_token not a constant)\n";
26456 " -- guessing it's a shift ($next_token is a constant)\n";
26460 write_logfile_entry($msg);
26461 return $here_doc_expected;
26464 #########i#############################################################
26465 # Tokenizer Routines for scanning identifiers and related items
26466 #######################################################################
26468 sub scan_bare_identifier_do {
26470 # this routine is called to scan a token starting with an alphanumeric
26471 # variable or package separator, :: or '.
26472 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
26473 # $last_nonblank_type,@paren_type, $paren_depth
26475 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
26479 my $package = undef;
26483 # we have to back up one pretoken at a :: since each : is one pretoken
26484 if ( $tok eq '::' ) { $i_beg-- }
26485 if ( $tok eq '->' ) { $i_beg-- }
26486 my $pos_beg = $$rtoken_map[$i_beg];
26487 pos($input_line) = $pos_beg;
26494 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
26496 my $pos = pos($input_line);
26497 my $numc = $pos - $pos_beg;
26498 $tok = substr( $input_line, $pos_beg, $numc );
26500 # type 'w' includes anything without leading type info
26501 # ($,%,@,*) including something like abc::def::ghi
26505 if ( defined($2) ) { $sub_name = $2; }
26506 if ( defined($1) ) {
26509 # patch: don't allow isolated package name which just ends
26510 # in the old style package separator (single quote). Example:
26512 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
26516 $package =~ s/\'/::/g;
26517 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26518 $package =~ s/::$//;
26521 $package = $current_package;
26523 if ( $is_keyword{$tok} ) {
26528 # if it is a bareword..
26529 if ( $type eq 'w' ) {
26531 # check for v-string with leading 'v' type character
26532 # (This seems to have presidence over filehandle, type 'Y')
26533 if ( $tok =~ /^v\d[_\d]*$/ ) {
26535 # we only have the first part - something like 'v101' -
26537 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
26538 $pos = pos($input_line);
26539 $numc = $pos - $pos_beg;
26540 $tok = substr( $input_line, $pos_beg, $numc );
26544 # warn if this version can't handle v-strings
26545 report_v_string($tok);
26548 elsif ( $is_constant{$package}{$sub_name} ) {
26552 # bareword after sort has implied empty prototype; for example:
26553 # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
26554 # This has priority over whatever the user has specified.
26555 elsif ($last_nonblank_token eq 'sort'
26556 && $last_nonblank_type eq 'k' )
26561 # Note: strangely, perl does not seem to really let you create
26562 # functions which act like eval and do, in the sense that eval
26563 # and do may have operators following the final }, but any operators
26564 # that you create with prototype (&) apparently do not allow
26565 # trailing operators, only terms. This seems strange.
26566 # If this ever changes, here is the update
26567 # to make perltidy behave accordingly:
26569 # elsif ( $is_block_function{$package}{$tok} ) {
26570 # $tok='eval'; # patch to do braces like eval - doesn't work
26573 # FIXME: This could become a separate type to allow for different
26575 elsif ( $is_block_function{$package}{$sub_name} ) {
26579 elsif ( $is_block_list_function{$package}{$sub_name} ) {
26582 elsif ( $is_user_function{$package}{$sub_name} ) {
26584 $prototype = $user_function_prototype{$package}{$sub_name};
26587 # check for indirect object
26590 # added 2001-03-27: must not be followed immediately by '('
26592 ( $input_line !~ m/\G\(/gc )
26597 # preceded by keyword like 'print', 'printf' and friends
26598 $is_indirect_object_taker{$last_nonblank_token}
26600 # or preceded by something like 'print(' or 'printf('
26602 ( $last_nonblank_token eq '(' )
26603 && $is_indirect_object_taker{ $paren_type[$paren_depth]
26611 # may not be indirect object unless followed by a space
26612 if ( $input_line =~ m/\G\s+/gc ) {
26616 # Perl's indirect object notation is a very bad
26617 # thing and can cause subtle bugs, especially for
26618 # beginning programmers. And I haven't even been
26619 # able to figure out a sane warning scheme which
26620 # doesn't get in the way of good scripts.
26622 # Complain if a filehandle has any lower case
26623 # letters. This is suggested good practice.
26624 # Use 'sub_name' because something like
26625 # main::MYHANDLE is ok for filehandle
26626 if ( $sub_name =~ /[a-z]/ ) {
26628 # could be bug caused by older perltidy if
26630 if ( $input_line =~ m/\G\s*\(/gc ) {
26632 "Caution: unknown word '$tok' in indirect object slot\n"
26638 # bareword not followed by a space -- may not be filehandle
26639 # (may be function call defined in a 'use' statement)
26646 # Now we must convert back from character position
26647 # to pre_token index.
26648 # I don't think an error flag can occur here ..but who knows
26651 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26653 warning("scan_bare_identifier: Possibly invalid tokenization\n");
26657 # no match but line not blank - could be syntax error
26658 # perl will take '::' alone without complaint
26662 # change this warning to log message if it becomes annoying
26663 warning("didn't find identifier after leading ::\n");
26665 return ( $i, $tok, $type, $prototype );
26670 # This is the new scanner and will eventually replace scan_identifier.
26671 # Only type 'sub' and 'package' are implemented.
26672 # Token types $ * % @ & -> are not yet implemented.
26674 # Scan identifier following a type token.
26675 # The type of call depends on $id_scan_state: $id_scan_state = ''
26676 # for starting call, in which case $tok must be the token defining
26679 # If the type token is the last nonblank token on the line, a value
26680 # of $id_scan_state = $tok is returned, indicating that further
26681 # calls must be made to get the identifier. If the type token is
26682 # not the last nonblank token on the line, the identifier is
26683 # scanned and handled and a value of '' is returned.
26684 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
26685 # $statement_type, $tokenizer_self
26687 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
26691 my ( $i_beg, $pos_beg );
26693 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
26694 #my ($a,$b,$c) = caller;
26695 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
26697 # on re-entry, start scanning at first token on the line
26698 if ($id_scan_state) {
26703 # on initial entry, start scanning just after type token
26706 $id_scan_state = $tok;
26710 # find $i_beg = index of next nonblank token,
26711 # and handle empty lines
26712 my $blank_line = 0;
26713 my $next_nonblank_token = $$rtokens[$i_beg];
26714 if ( $i_beg > $max_token_index ) {
26719 # only a '#' immediately after a '$' is not a comment
26720 if ( $next_nonblank_token eq '#' ) {
26721 unless ( $tok eq '$' ) {
26726 if ( $next_nonblank_token =~ /^\s/ ) {
26727 ( $next_nonblank_token, $i_beg ) =
26728 find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
26729 $max_token_index );
26730 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
26736 # handle non-blank line; identifier, if any, must follow
26737 unless ($blank_line) {
26739 if ( $id_scan_state eq 'sub' ) {
26740 ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
26741 $input_line, $i, $i_beg,
26742 $tok, $type, $rtokens,
26743 $rtoken_map, $id_scan_state, $max_token_index
26747 elsif ( $id_scan_state eq 'package' ) {
26748 ( $i, $tok, $type ) =
26749 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
26750 $rtoken_map, $max_token_index );
26751 $id_scan_state = '';
26755 warning("invalid token in scan_id: $tok\n");
26756 $id_scan_state = '';
26760 if ( $id_scan_state && ( !defined($type) || !$type ) ) {
26762 # shouldn't happen:
26764 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
26766 report_definite_bug();
26769 TOKENIZER_DEBUG_FLAG_NSCAN && do {
26771 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
26773 return ( $i, $tok, $type, $id_scan_state );
26776 sub check_prototype {
26777 my ( $proto, $package, $subname ) = @_;
26778 return unless ( defined($package) && defined($subname) );
26779 if ( defined($proto) ) {
26780 $proto =~ s/^\s*\(\s*//;
26781 $proto =~ s/\s*\)$//;
26783 $is_user_function{$package}{$subname} = 1;
26784 $user_function_prototype{$package}{$subname} = "($proto)";
26786 # prototypes containing '&' must be treated specially..
26787 if ( $proto =~ /\&/ ) {
26789 # right curly braces of prototypes ending in
26790 # '&' may be followed by an operator
26791 if ( $proto =~ /\&$/ ) {
26792 $is_block_function{$package}{$subname} = 1;
26795 # right curly braces of prototypes NOT ending in
26796 # '&' may NOT be followed by an operator
26797 elsif ( $proto !~ /\&$/ ) {
26798 $is_block_list_function{$package}{$subname} = 1;
26803 $is_constant{$package}{$subname} = 1;
26807 $is_user_function{$package}{$subname} = 1;
26811 sub do_scan_package {
26813 # do_scan_package parses a package name
26814 # it is called with $i_beg equal to the index of the first nonblank
26815 # token following a 'package' token.
26816 # USES GLOBAL VARIABLES: $current_package,
26818 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
26821 my $package = undef;
26822 my $pos_beg = $$rtoken_map[$i_beg];
26823 pos($input_line) = $pos_beg;
26825 # handle non-blank line; package name, if any, must follow
26826 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
26828 $package = ( defined($1) && $1 ) ? $1 : 'main';
26829 $package =~ s/\'/::/g;
26830 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26831 $package =~ s/::$//;
26832 my $pos = pos($input_line);
26833 my $numc = $pos - $pos_beg;
26834 $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
26837 # Now we must convert back from character position
26838 # to pre_token index.
26839 # I don't think an error flag can occur here ..but ?
26842 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26843 if ($error) { warning("Possibly invalid package\n") }
26844 $current_package = $package;
26847 my ( $next_nonblank_token, $i_next ) =
26848 find_next_nonblank_token( $i, $rtokens, $max_token_index );
26849 if ( $next_nonblank_token !~ /^[;\{\}]$/ ) {
26851 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
26856 # no match but line not blank --
26857 # could be a label with name package, like package: , for example.
26862 return ( $i, $tok, $type );
26865 sub scan_identifier_do {
26867 # This routine assembles tokens into identifiers. It maintains a
26868 # scan state, id_scan_state. It updates id_scan_state based upon
26869 # current id_scan_state and token, and returns an updated
26870 # id_scan_state and the next index after the identifier.
26871 # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
26872 # $last_nonblank_type
26874 my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
26879 my $tok_begin = $$rtokens[$i_begin];
26880 if ( $tok_begin eq ':' ) { $tok_begin = '::' }
26881 my $id_scan_state_begin = $id_scan_state;
26882 my $identifier_begin = $identifier;
26883 my $tok = $tok_begin;
26886 # these flags will be used to help figure out the type:
26887 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
26890 # allow old package separator (') except in 'use' statement
26891 my $allow_tick = ( $last_nonblank_token ne 'use' );
26893 # get started by defining a type and a state if necessary
26894 unless ($id_scan_state) {
26895 $context = UNKNOWN_CONTEXT;
26897 # fixup for digraph
26898 if ( $tok eq '>' ) {
26902 $identifier = $tok;
26904 if ( $tok eq '$' || $tok eq '*' ) {
26905 $id_scan_state = '$';
26906 $context = SCALAR_CONTEXT;
26908 elsif ( $tok eq '%' || $tok eq '@' ) {
26909 $id_scan_state = '$';
26910 $context = LIST_CONTEXT;
26912 elsif ( $tok eq '&' ) {
26913 $id_scan_state = '&';
26915 elsif ( $tok eq 'sub' or $tok eq 'package' ) {
26916 $saw_alpha = 0; # 'sub' is considered type info here
26917 $id_scan_state = '$';
26918 $identifier .= ' '; # need a space to separate sub from sub name
26920 elsif ( $tok eq '::' ) {
26921 $id_scan_state = 'A';
26923 elsif ( $tok =~ /^[A-Za-z_]/ ) {
26924 $id_scan_state = ':';
26926 elsif ( $tok eq '->' ) {
26927 $id_scan_state = '$';
26932 my ( $a, $b, $c ) = caller;
26933 warning("Program Bug: scan_identifier given bad token = $tok \n");
26934 warning(" called from sub $a line: $c\n");
26935 report_definite_bug();
26937 $saw_type = !$saw_alpha;
26941 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
26944 # now loop to gather the identifier
26947 while ( $i < $max_token_index ) {
26948 $i_save = $i unless ( $tok =~ /^\s*$/ );
26949 $tok = $$rtokens[ ++$i ];
26951 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
26956 if ( $id_scan_state eq '$' ) { # starting variable name
26958 if ( $tok eq '$' ) {
26960 $identifier .= $tok;
26962 # we've got a punctuation variable if end of line (punct.t)
26963 if ( $i == $max_token_index ) {
26965 $id_scan_state = '';
26969 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
26971 $id_scan_state = ':'; # now need ::
26972 $identifier .= $tok;
26974 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
26976 $id_scan_state = ':'; # now need ::
26977 $identifier .= $tok;
26979 # Perl will accept leading digits in identifiers,
26980 # although they may not always produce useful results.
26981 # Something like $main::0 is ok. But this also works:
26983 # sub howdy::123::bubba{ print "bubba $54321!\n" }
26984 # howdy::123::bubba();
26987 elsif ( $tok =~ /^[0-9]/ ) { # numeric
26989 $id_scan_state = ':'; # now need ::
26990 $identifier .= $tok;
26992 elsif ( $tok eq '::' ) {
26993 $id_scan_state = 'A';
26994 $identifier .= $tok;
26996 elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array
26997 $identifier .= $tok; # keep same state, a $ could follow
26999 elsif ( $tok eq '{' ) {
27001 # check for something like ${#} or ${©}
27002 if ( $identifier eq '$'
27003 && $i + 2 <= $max_token_index
27004 && $$rtokens[ $i + 2 ] eq '}'
27005 && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
27007 my $next2 = $$rtokens[ $i + 2 ];
27008 my $next1 = $$rtokens[ $i + 1 ];
27009 $identifier .= $tok . $next1 . $next2;
27011 $id_scan_state = '';
27015 # skip something like ${xxx} or ->{
27016 $id_scan_state = '';
27018 # if this is the first token of a line, any tokens for this
27019 # identifier have already been accumulated
27020 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
27025 # space ok after leading $ % * & @
27026 elsif ( $tok =~ /^\s*$/ ) {
27028 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
27030 if ( length($identifier) > 1 ) {
27031 $id_scan_state = '';
27033 $type = 'i'; # probably punctuation variable
27038 # spaces after $'s are common, and space after @
27039 # is harmless, so only complain about space
27040 # after other type characters. Space after $ and
27041 # @ will be removed in formatting. Report space
27042 # after % and * because they might indicate a
27043 # parsing error. In other words '% ' might be a
27044 # modulo operator. Delete this warning if it
27046 if ( $identifier !~ /^[\@\$]$/ ) {
27048 "Space in identifier, following $identifier\n";
27054 # space after '->' is ok
27056 elsif ( $tok eq '^' ) {
27058 # check for some special variables like $^W
27059 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
27060 $identifier .= $tok;
27061 $id_scan_state = 'A';
27063 # Perl accepts '$^]' or '@^]', but
27064 # there must not be a space before the ']'.
27065 my $next1 = $$rtokens[ $i + 1 ];
27066 if ( $next1 eq ']' ) {
27068 $identifier .= $next1;
27069 $id_scan_state = "";
27074 $id_scan_state = '';
27077 else { # something else
27079 # check for various punctuation variables
27080 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
27081 $identifier .= $tok;
27084 elsif ( $identifier eq '$#' ) {
27086 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
27088 # perl seems to allow just these: $#: $#- $#+
27089 elsif ( $tok =~ /^[\:\-\+]$/ ) {
27091 $identifier .= $tok;
27095 write_logfile_entry( 'Use of $# is deprecated' . "\n" );
27098 elsif ( $identifier eq '$$' ) {
27100 # perl does not allow references to punctuation
27101 # variables without braces. For example, this
27105 # You would have to use
27109 if ( $tok eq '{' ) { $type = 't' }
27110 else { $type = 'i' }
27112 elsif ( $identifier eq '->' ) {
27117 if ( length($identifier) == 1 ) { $identifier = ''; }
27119 $id_scan_state = '';
27123 elsif ( $id_scan_state eq '&' ) { # starting sub call?
27125 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric ..
27126 $id_scan_state = ':'; # now need ::
27128 $identifier .= $tok;
27130 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
27131 $id_scan_state = ':'; # now need ::
27133 $identifier .= $tok;
27135 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
27136 $id_scan_state = ':'; # now need ::
27138 $identifier .= $tok;
27140 elsif ( $tok =~ /^\s*$/ ) { # allow space
27142 elsif ( $tok eq '::' ) { # leading ::
27143 $id_scan_state = 'A'; # accept alpha next
27144 $identifier .= $tok;
27146 elsif ( $tok eq '{' ) {
27147 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
27149 $id_scan_state = '';
27154 # punctuation variable?
27155 # testfile: cunningham4.pl
27157 # We have to be careful here. If we are in an unknown state,
27158 # we will reject the punctuation variable. In the following
27159 # example the '&' is a binary opeator but we are in an unknown
27160 # state because there is no sigil on 'Prima', so we don't
27161 # know what it is. But it is a bad guess that
27162 # '&~' is a punction variable.
27163 # $self->{text}->{colorMap}->[
27164 # Prima::PodView::COLOR_CODE_FOREGROUND
27165 # & ~tb::COLOR_INDEX ] =
27166 # $sec->{ColorCode}
27167 if ( $identifier eq '&' && $expecting ) {
27168 $identifier .= $tok;
27175 $id_scan_state = '';
27179 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)
27181 if ( $tok =~ /^[A-Za-z_]/ ) { # found it
27182 $identifier .= $tok;
27183 $id_scan_state = ':'; # now need ::
27186 elsif ( $tok eq "'" && $allow_tick ) {
27187 $identifier .= $tok;
27188 $id_scan_state = ':'; # now need ::
27191 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
27192 $identifier .= $tok;
27193 $id_scan_state = ':'; # now need ::
27196 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
27197 $id_scan_state = '(';
27198 $identifier .= $tok;
27200 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
27201 $id_scan_state = ')';
27202 $identifier .= $tok;
27205 $id_scan_state = '';
27210 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha
27212 if ( $tok eq '::' ) { # got it
27213 $identifier .= $tok;
27214 $id_scan_state = 'A'; # now require alpha
27216 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here
27217 $identifier .= $tok;
27218 $id_scan_state = ':'; # now need ::
27221 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
27222 $identifier .= $tok;
27223 $id_scan_state = ':'; # now need ::
27226 elsif ( $tok eq "'" && $allow_tick ) { # tick
27228 if ( $is_keyword{$identifier} ) {
27229 $id_scan_state = ''; # that's all
27233 $identifier .= $tok;
27236 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
27237 $id_scan_state = '(';
27238 $identifier .= $tok;
27240 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
27241 $id_scan_state = ')';
27242 $identifier .= $tok;
27245 $id_scan_state = ''; # that's all
27250 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype
27252 if ( $tok eq '(' ) { # got it
27253 $identifier .= $tok;
27254 $id_scan_state = ')'; # now find the end of it
27256 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
27257 $identifier .= $tok;
27260 $id_scan_state = ''; # that's all - no prototype
27265 elsif ( $id_scan_state eq ')' ) { # looking for ) to end
27267 if ( $tok eq ')' ) { # got it
27268 $identifier .= $tok;
27269 $id_scan_state = ''; # all done
27272 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
27273 $identifier .= $tok;
27275 else { # probable error in script, but keep going
27276 warning("Unexpected '$tok' while seeking end of prototype\n");
27277 $identifier .= $tok;
27280 else { # can get here due to error in initialization
27281 $id_scan_state = '';
27287 if ( $id_scan_state eq ')' ) {
27288 warning("Hit end of line while seeking ) to end prototype\n");
27291 # once we enter the actual identifier, it may not extend beyond
27292 # the end of the current line
27293 if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
27294 $id_scan_state = '';
27296 if ( $i < 0 ) { $i = 0 }
27303 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
27306 else { $type = 'i' }
27308 elsif ( $identifier eq '->' ) {
27312 ( length($identifier) > 1 )
27314 # In something like '@$=' we have an identifier '@$'
27315 # In something like '$${' we have type '$$' (and only
27316 # part of an identifier)
27317 && !( $identifier =~ /\$$/ && $tok eq '{' )
27318 && ( $identifier !~ /^(sub |package )$/ )
27323 else { $type = 't' }
27325 elsif ($saw_alpha) {
27327 # type 'w' includes anything without leading type info
27328 # ($,%,@,*) including something like abc::def::ghi
27333 } # this can happen on a restart
27337 $tok = $identifier;
27338 if ($message) { write_logfile_entry($message) }
27345 TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
27346 my ( $a, $b, $c ) = caller;
27348 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
27350 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
27352 return ( $i, $tok, $type, $id_scan_state, $identifier );
27357 # saved package and subnames in case prototype is on separate line
27358 my ( $package_saved, $subname_saved );
27362 # do_scan_sub parses a sub name and prototype
27363 # it is called with $i_beg equal to the index of the first nonblank
27364 # token following a 'sub' token.
27366 # TODO: add future error checks to be sure we have a valid
27367 # sub name. For example, 'sub &doit' is wrong. Also, be sure
27368 # a name is given if and only if a non-anonymous sub is
27370 # USES GLOBAL VARS: $current_package, $last_nonblank_token,
27371 # $in_attribute_list, %saw_function_definition,
27375 $input_line, $i, $i_beg,
27376 $tok, $type, $rtokens,
27377 $rtoken_map, $id_scan_state, $max_token_index
27379 $id_scan_state = ""; # normally we get everything in one call
27380 my $subname = undef;
27381 my $package = undef;
27386 my $pos_beg = $$rtoken_map[$i_beg];
27387 pos($input_line) = $pos_beg;
27389 # sub NAME PROTO ATTRS
27391 $input_line =~ m/\G\s*
27392 ((?:\w*(?:'|::))*) # package - something that ends in :: or '
27393 (\w+) # NAME - required
27394 (\s*\([^){]*\))? # PROTO - something in parens
27395 (\s*:)? # ATTRS - leading : of attribute list
27404 $package = ( defined($1) && $1 ) ? $1 : $current_package;
27405 $package =~ s/\'/::/g;
27406 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
27407 $package =~ s/::$//;
27408 my $pos = pos($input_line);
27409 my $numc = $pos - $pos_beg;
27410 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
27414 # Look for prototype/attributes not preceded on this line by subname;
27415 # This might be an anonymous sub with attributes,
27416 # or a prototype on a separate line from its sub name
27418 $input_line =~ m/\G(\s*\([^){]*\))? # PROTO
27419 (\s*:)? # ATTRS leading ':'
27428 # Handle prototype on separate line from subname
27429 if ($subname_saved) {
27430 $package = $package_saved;
27431 $subname = $subname_saved;
27432 $tok = $last_nonblank_token;
27439 # ATTRS: if there are attributes, back up and let the ':' be
27440 # found later by the scanner.
27441 my $pos = pos($input_line);
27443 $pos -= length($attrs);
27446 my $next_nonblank_token = $tok;
27448 # catch case of line with leading ATTR ':' after anonymous sub
27449 if ( $pos == $pos_beg && $tok eq ':' ) {
27451 $in_attribute_list = 1;
27454 # We must convert back from character position
27455 # to pre_token index.
27458 # I don't think an error flag can occur here ..but ?
27460 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
27461 $max_token_index );
27462 if ($error) { warning("Possibly invalid sub\n") }
27464 # check for multiple definitions of a sub
27465 ( $next_nonblank_token, my $i_next ) =
27466 find_next_nonblank_token_on_this_line( $i, $rtokens,
27467 $max_token_index );
27470 if ( $next_nonblank_token =~ /^(\s*|#)$/ )
27471 { # skip blank or side comment
27472 my ( $rpre_tokens, $rpre_types ) =
27473 peek_ahead_for_n_nonblank_pre_tokens(1);
27474 if ( defined($rpre_tokens) && @$rpre_tokens ) {
27475 $next_nonblank_token = $rpre_tokens->[0];
27478 $next_nonblank_token = '}';
27481 $package_saved = "";
27482 $subname_saved = "";
27483 if ( $next_nonblank_token eq '{' ) {
27486 # Check for multiple definitions of a sub, but
27487 # it is ok to have multiple sub BEGIN, etc,
27488 # so we do not complain if name is all caps
27489 if ( $saw_function_definition{$package}{$subname}
27490 && $subname !~ /^[A-Z]+$/ )
27492 my $lno = $saw_function_definition{$package}{$subname};
27494 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
27497 $saw_function_definition{$package}{$subname} =
27498 $tokenizer_self->{_last_line_number};
27501 elsif ( $next_nonblank_token eq ';' ) {
27503 elsif ( $next_nonblank_token eq '}' ) {
27506 # ATTRS - if an attribute list follows, remember the name
27507 # of the sub so the next opening brace can be labeled.
27508 # Setting 'statement_type' causes any ':'s to introduce
27510 elsif ( $next_nonblank_token eq ':' ) {
27511 $statement_type = $tok;
27514 # see if PROTO follows on another line:
27515 elsif ( $next_nonblank_token eq '(' ) {
27516 if ( $attrs || $proto ) {
27518 "unexpected '(' after definition or declaration of sub '$subname'\n"
27522 $id_scan_state = 'sub'; # we must come back to get proto
27523 $statement_type = $tok;
27524 $package_saved = $package;
27525 $subname_saved = $subname;
27528 elsif ($next_nonblank_token) { # EOF technically ok
27530 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
27533 check_prototype( $proto, $package, $subname );
27536 # no match but line not blank
27539 return ( $i, $tok, $type, $id_scan_state );
27543 #########i###############################################################
27544 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
27545 #########################################################################
27547 sub find_next_nonblank_token {
27548 my ( $i, $rtokens, $max_token_index ) = @_;
27550 if ( $i >= $max_token_index ) {
27551 if ( !peeked_ahead() ) {
27554 peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
27557 my $next_nonblank_token = $$rtokens[ ++$i ];
27559 if ( $next_nonblank_token =~ /^\s*$/ ) {
27560 $next_nonblank_token = $$rtokens[ ++$i ];
27562 return ( $next_nonblank_token, $i );
27565 sub numerator_expected {
27567 # this is a filter for a possible numerator, in support of guessing
27568 # for the / pattern delimiter token.
27573 # Note: I am using the convention that variables ending in
27574 # _expected have these 3 possible values.
27575 my ( $i, $rtokens, $max_token_index ) = @_;
27576 my $next_token = $$rtokens[ $i + 1 ];
27577 if ( $next_token eq '=' ) { $i++; } # handle /=
27578 my ( $next_nonblank_token, $i_next ) =
27579 find_next_nonblank_token( $i, $rtokens, $max_token_index );
27581 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
27586 if ( $next_nonblank_token =~ /^\s*$/ ) {
27595 sub pattern_expected {
27597 # This is the start of a filter for a possible pattern.
27598 # It looks at the token after a possbible pattern and tries to
27599 # determine if that token could end a pattern.
27604 my ( $i, $rtokens, $max_token_index ) = @_;
27605 my $next_token = $$rtokens[ $i + 1 ];
27606 if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier
27607 my ( $next_nonblank_token, $i_next ) =
27608 find_next_nonblank_token( $i, $rtokens, $max_token_index );
27610 # list of tokens which may follow a pattern
27611 # (can probably be expanded)
27612 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
27618 if ( $next_nonblank_token =~ /^\s*$/ ) {
27627 sub find_next_nonblank_token_on_this_line {
27628 my ( $i, $rtokens, $max_token_index ) = @_;
27629 my $next_nonblank_token;
27631 if ( $i < $max_token_index ) {
27632 $next_nonblank_token = $$rtokens[ ++$i ];
27634 if ( $next_nonblank_token =~ /^\s*$/ ) {
27636 if ( $i < $max_token_index ) {
27637 $next_nonblank_token = $$rtokens[ ++$i ];
27642 $next_nonblank_token = "";
27644 return ( $next_nonblank_token, $i );
27647 sub find_angle_operator_termination {
27649 # We are looking at a '<' and want to know if it is an angle operator.
27650 # We are to return:
27651 # $i = pretoken index of ending '>' if found, current $i otherwise
27652 # $type = 'Q' if found, '>' otherwise
27653 my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
27656 pos($input_line) = 1 + $$rtoken_map[$i];
27660 # we just have to find the next '>' if a term is expected
27661 if ( $expecting == TERM ) { $filter = '[\>]' }
27663 # we have to guess if we don't know what is expected
27664 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
27666 # shouldn't happen - we shouldn't be here if operator is expected
27667 else { warning("Program Bug in find_angle_operator_termination\n") }
27669 # To illustrate what we might be looking at, in case we are
27670 # guessing, here are some examples of valid angle operators
27677 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
27678 # <${PREFIX}*img*.$IMAGE_TYPE>
27679 # <img*.$IMAGE_TYPE>
27680 # <Timg*.$IMAGE_TYPE>
27681 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
27683 # Here are some examples of lines which do not have angle operators:
27684 # return undef unless $self->[2]++ < $#{$self->[1]};
27687 # the following line from dlister.pl caused trouble:
27688 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
27690 # If the '<' starts an angle operator, it must end on this line and
27691 # it must not have certain characters like ';' and '=' in it. I use
27692 # this to limit the testing. This filter should be improved if
27695 if ( $input_line =~ /($filter)/g ) {
27699 # We MAY have found an angle operator termination if we get
27700 # here, but we need to do more to be sure we haven't been
27702 my $pos = pos($input_line);
27704 my $pos_beg = $$rtoken_map[$i];
27705 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
27707 # Reject if the closing '>' follows a '-' as in:
27708 # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
27709 if ( $expecting eq UNKNOWN ) {
27710 my $check = substr( $input_line, $pos - 2, 1 );
27711 if ( $check eq '-' ) {
27712 return ( $i, $type );
27716 ######################################debug#####
27717 #write_diagnostics( "ANGLE? :$str\n");
27718 #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
27719 ######################################debug#####
27723 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27725 # It may be possible that a quote ends midway in a pretoken.
27726 # If this happens, it may be necessary to split the pretoken.
27729 "Possible tokinization error..please check this line\n");
27730 report_possible_bug();
27733 # Now let's see where we stand....
27734 # OK if math op not possible
27735 if ( $expecting == TERM ) {
27738 # OK if there are no more than 2 pre-tokens inside
27739 # (not possible to write 2 token math between < and >)
27740 # This catches most common cases
27741 elsif ( $i <= $i_beg + 3 ) {
27742 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
27748 # Let's try a Brace Test: any braces inside must balance
27750 while ( $str =~ /\{/g ) { $br++ }
27751 while ( $str =~ /\}/g ) { $br-- }
27753 while ( $str =~ /\[/g ) { $sb++ }
27754 while ( $str =~ /\]/g ) { $sb-- }
27756 while ( $str =~ /\(/g ) { $pr++ }
27757 while ( $str =~ /\)/g ) { $pr-- }
27759 # if braces do not balance - not angle operator
27760 if ( $br || $sb || $pr ) {
27764 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
27767 # we should keep doing more checks here...to be continued
27768 # Tentatively accepting this as a valid angle operator.
27769 # There are lots more things that can be checked.
27772 "ANGLE-Guessing yes: $str expecting=$expecting\n");
27773 write_logfile_entry("Guessing angle operator here: $str\n");
27778 # didn't find ending >
27780 if ( $expecting == TERM ) {
27781 warning("No ending > for angle operator\n");
27785 return ( $i, $type );
27788 sub scan_number_do {
27790 # scan a number in any of the formats that Perl accepts
27791 # Underbars (_) are allowed in decimal numbers.
27792 # input parameters -
27793 # $input_line - the string to scan
27794 # $i - pre_token index to start scanning
27795 # $rtoken_map - reference to the pre_token map giving starting
27796 # character position in $input_line of token $i
27797 # output parameters -
27798 # $i - last pre_token index of the number just scanned
27799 # number - the number (characters); or undef if not a number
27801 my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
27802 my $pos_beg = $$rtoken_map[$i];
27805 my $number = undef;
27806 my $type = $input_type;
27808 my $first_char = substr( $input_line, $pos_beg, 1 );
27810 # Look for bad starting characters; Shouldn't happen..
27811 if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
27812 warning("Program bug - scan_number given character $first_char\n");
27813 report_definite_bug();
27814 return ( $i, $type, $number );
27817 # handle v-string without leading 'v' character ('Two Dot' rule)
27819 # TODO: v-strings may contain underscores
27820 pos($input_line) = $pos_beg;
27821 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
27822 $pos = pos($input_line);
27823 my $numc = $pos - $pos_beg;
27824 $number = substr( $input_line, $pos_beg, $numc );
27826 report_v_string($number);
27829 # handle octal, hex, binary
27830 if ( !defined($number) ) {
27831 pos($input_line) = $pos_beg;
27832 if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
27834 $pos = pos($input_line);
27835 my $numc = $pos - $pos_beg;
27836 $number = substr( $input_line, $pos_beg, $numc );
27842 if ( !defined($number) ) {
27843 pos($input_line) = $pos_beg;
27845 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
27846 $pos = pos($input_line);
27848 # watch out for things like 0..40 which would give 0. by this;
27849 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
27850 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
27854 my $numc = $pos - $pos_beg;
27855 $number = substr( $input_line, $pos_beg, $numc );
27860 # filter out non-numbers like e + - . e2 .e3 +e6
27861 # the rule: at least one digit, and any 'e' must be preceded by a digit
27863 $number !~ /\d/ # no digits
27864 || ( $number =~ /^(.*)[eE]/
27865 && $1 !~ /\d/ ) # or no digits before the 'e'
27869 $type = $input_type;
27870 return ( $i, $type, $number );
27873 # Found a number; now we must convert back from character position
27874 # to pre_token index. An error here implies user syntax error.
27875 # An example would be an invalid octal number like '009'.
27878 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27879 if ($error) { warning("Possibly invalid number\n") }
27881 return ( $i, $type, $number );
27884 sub inverse_pretoken_map {
27886 # Starting with the current pre_token index $i, scan forward until
27887 # finding the index of the next pre_token whose position is $pos.
27888 my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
27891 while ( ++$i <= $max_token_index ) {
27893 if ( $pos <= $$rtoken_map[$i] ) {
27895 # Let the calling routine handle errors in which we do not
27896 # land on a pre-token boundary. It can happen by running
27897 # perltidy on some non-perl scripts, for example.
27898 if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
27903 return ( $i, $error );
27906 sub find_here_doc {
27908 # find the target of a here document, if any
27909 # input parameters:
27910 # $i - token index of the second < of <<
27911 # ($i must be less than the last token index if this is called)
27912 # output parameters:
27913 # $found_target = 0 didn't find target; =1 found target
27914 # HERE_TARGET - the target string (may be empty string)
27915 # $i - unchanged if not here doc,
27916 # or index of the last token of the here target
27917 # $saw_error - flag noting unbalanced quote on here target
27918 my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27920 my $found_target = 0;
27921 my $here_doc_target = '';
27922 my $here_quote_character = '';
27924 my ( $next_nonblank_token, $i_next_nonblank, $next_token );
27925 $next_token = $$rtokens[ $i + 1 ];
27927 # perl allows a backslash before the target string (heredoc.t)
27929 if ( $next_token eq '\\' ) {
27931 $next_token = $$rtokens[ $i + 2 ];
27934 ( $next_nonblank_token, $i_next_nonblank ) =
27935 find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
27937 if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
27940 my $quote_depth = 0;
27945 $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
27948 = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
27949 $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
27951 if ($in_quote) { # didn't find end of quote, so no target found
27953 if ( $expecting == TERM ) {
27955 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
27960 else { # found ending quote
27965 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
27966 $tokj = $$rtokens[$j];
27968 # we have to remove any backslash before the quote character
27969 # so that the here-doc-target exactly matches this string
27973 && $$rtokens[ $j + 1 ] eq $here_quote_character );
27974 $here_doc_target .= $tokj;
27979 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
27981 write_logfile_entry(
27982 "found blank here-target after <<; suggest using \"\"\n");
27985 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
27987 my $here_doc_expected;
27988 if ( $expecting == UNKNOWN ) {
27989 $here_doc_expected = guess_if_here_doc($next_token);
27992 $here_doc_expected = 1;
27995 if ($here_doc_expected) {
27997 $here_doc_target = $next_token;
28004 if ( $expecting == TERM ) {
28006 write_logfile_entry("Note: bare here-doc operator <<\n");
28013 # patch to neglect any prepended backslash
28014 if ( $found_target && $backslash ) { $i++ }
28016 return ( $found_target, $here_doc_target, $here_quote_character, $i,
28022 # follow (or continue following) quoted string(s)
28023 # $in_quote return code:
28024 # 0 - ok, found end
28025 # 1 - still must find end of quote whose target is $quote_character
28026 # 2 - still looking for end of first of two quotes
28028 # Returns updated strings:
28029 # $quoted_string_1 = quoted string seen while in_quote=1
28030 # $quoted_string_2 = quoted string seen while in_quote=2
28032 $i, $in_quote, $quote_character,
28033 $quote_pos, $quote_depth, $quoted_string_1,
28034 $quoted_string_2, $rtokens, $rtoken_map,
28038 my $in_quote_starting = $in_quote;
28041 if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
28044 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
28047 = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
28048 $quote_pos, $quote_depth, $max_token_index );
28049 $quoted_string_2 .= $quoted_string;
28050 if ( $in_quote == 1 ) {
28051 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
28052 $quote_character = '';
28055 $quoted_string_2 .= "\n";
28059 if ( $in_quote == 1 ) { # one (more) quote to follow
28062 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
28065 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
28066 $quote_pos, $quote_depth, $max_token_index );
28067 $quoted_string_1 .= $quoted_string;
28068 if ( $in_quote == 1 ) {
28069 $quoted_string_1 .= "\n";
28072 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
28073 $quoted_string_1, $quoted_string_2 );
28076 sub follow_quoted_string {
28078 # scan for a specific token, skipping escaped characters
28079 # if the quote character is blank, use the first non-blank character
28080 # input parameters:
28081 # $rtokens = reference to the array of tokens
28082 # $i = the token index of the first character to search
28083 # $in_quote = number of quoted strings being followed
28084 # $beginning_tok = the starting quote character
28085 # $quote_pos = index to check next for alphanumeric delimiter
28086 # output parameters:
28087 # $i = the token index of the ending quote character
28088 # $in_quote = decremented if found end, unchanged if not
28089 # $beginning_tok = the starting quote character
28090 # $quote_pos = index to check next for alphanumeric delimiter
28091 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
28092 # $quoted_string = the text of the quote (without quotation tokens)
28093 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
28096 my ( $tok, $end_tok );
28097 my $i = $i_beg - 1;
28098 my $quoted_string = "";
28100 TOKENIZER_DEBUG_FLAG_QUOTE && do {
28102 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
28105 # get the corresponding end token
28106 if ( $beginning_tok !~ /^\s*$/ ) {
28107 $end_tok = matching_end_token($beginning_tok);
28110 # a blank token means we must find and use the first non-blank one
28112 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
28114 while ( $i < $max_token_index ) {
28115 $tok = $$rtokens[ ++$i ];
28117 if ( $tok !~ /^\s*$/ ) {
28119 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
28120 $i = $max_token_index;
28124 if ( length($tok) > 1 ) {
28125 if ( $quote_pos <= 0 ) { $quote_pos = 1 }
28126 $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
28129 $beginning_tok = $tok;
28132 $end_tok = matching_end_token($beginning_tok);
28138 $allow_quote_comments = 1;
28143 # There are two different loops which search for the ending quote
28144 # character. In the rare case of an alphanumeric quote delimiter, we
28145 # have to look through alphanumeric tokens character-by-character, since
28146 # the pre-tokenization process combines multiple alphanumeric
28147 # characters, whereas for a non-alphanumeric delimiter, only tokens of
28148 # length 1 can match.
28150 ###################################################################
28151 # Case 1 (rare): loop for case of alphanumeric quote delimiter..
28152 # "quote_pos" is the position the current word to begin searching
28153 ###################################################################
28154 if ( $beginning_tok =~ /\w/ ) {
28156 # Note this because it is not recommended practice except
28157 # for obfuscated perl contests
28158 if ( $in_quote == 1 ) {
28159 write_logfile_entry(
28160 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
28163 while ( $i < $max_token_index ) {
28165 if ( $quote_pos == 0 || ( $i < 0 ) ) {
28166 $tok = $$rtokens[ ++$i ];
28168 if ( $tok eq '\\' ) {
28170 # retain backslash unless it hides the end token
28171 $quoted_string .= $tok
28172 unless $$rtokens[ $i + 1 ] eq $end_tok;
28174 last if ( $i >= $max_token_index );
28175 $tok = $$rtokens[ ++$i ];
28178 my $old_pos = $quote_pos;
28180 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
28184 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
28186 if ( $quote_pos > 0 ) {
28189 substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
28193 if ( $quote_depth == 0 ) {
28199 $quoted_string .= substr( $tok, $old_pos );
28204 ########################################################################
28205 # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
28206 ########################################################################
28209 while ( $i < $max_token_index ) {
28210 $tok = $$rtokens[ ++$i ];
28212 if ( $tok eq $end_tok ) {
28215 if ( $quote_depth == 0 ) {
28220 elsif ( $tok eq $beginning_tok ) {
28223 elsif ( $tok eq '\\' ) {
28225 # retain backslash unless it hides the beginning or end token
28226 $tok = $$rtokens[ ++$i ];
28227 $quoted_string .= '\\'
28228 unless ( $tok eq $end_tok || $tok eq $beginning_tok );
28230 $quoted_string .= $tok;
28233 if ( $i > $max_token_index ) { $i = $max_token_index }
28234 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
28238 sub indicate_error {
28239 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
28240 interrupt_logfile();
28242 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
28246 sub write_error_indicator_pair {
28247 my ( $line_number, $input_line, $pos, $carrat ) = @_;
28248 my ( $offset, $numbered_line, $underline ) =
28249 make_numbered_line( $line_number, $input_line, $pos );
28250 $underline = write_on_underline( $underline, $pos - $offset, $carrat );
28251 warning( $numbered_line . "\n" );
28252 $underline =~ s/\s*$//;
28253 warning( $underline . "\n" );
28256 sub make_numbered_line {
28258 # Given an input line, its line number, and a character position of
28259 # interest, create a string not longer than 80 characters of the form
28260 # $lineno: sub_string
28261 # such that the sub_string of $str contains the position of interest
28263 # Here is an example of what we want, in this case we add trailing
28264 # '...' because the line is long.
28266 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
28268 # Here is another example, this time in which we used leading '...'
28269 # because of excessive length:
28271 # 2: ... er of the World Wide Web Consortium's
28273 # input parameters are:
28274 # $lineno = line number
28275 # $str = the text of the line
28276 # $pos = position of interest (the error) : 0 = first character
28279 # - $offset = an offset which corrects the position in case we only
28280 # display part of a line, such that $pos-$offset is the effective
28281 # position from the start of the displayed line.
28282 # - $numbered_line = the numbered line as above,
28283 # - $underline = a blank 'underline' which is all spaces with the same
28284 # number of characters as the numbered line.
28286 my ( $lineno, $str, $pos ) = @_;
28287 my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
28288 my $excess = length($str) - $offset - 68;
28289 my $numc = ( $excess > 0 ) ? 68 : undef;
28291 if ( defined($numc) ) {
28292 if ( $offset == 0 ) {
28293 $str = substr( $str, $offset, $numc - 4 ) . " ...";
28296 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
28301 if ( $offset == 0 ) {
28304 $str = "... " . substr( $str, $offset + 4 );
28308 my $numbered_line = sprintf( "%d: ", $lineno );
28309 $offset -= length($numbered_line);
28310 $numbered_line .= $str;
28311 my $underline = " " x length($numbered_line);
28312 return ( $offset, $numbered_line, $underline );
28315 sub write_on_underline {
28317 # The "underline" is a string that shows where an error is; it starts
28318 # out as a string of blanks with the same length as the numbered line of
28319 # code above it, and we have to add marking to show where an error is.
28320 # In the example below, we want to write the string '--^' just below
28321 # the line of bad code:
28323 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
28325 # We are given the current underline string, plus a position and a
28326 # string to write on it.
28328 # In the above example, there will be 2 calls to do this:
28329 # First call: $pos=19, pos_chr=^
28330 # Second call: $pos=16, pos_chr=---
28332 # This is a trivial thing to do with substr, but there is some
28335 my ( $underline, $pos, $pos_chr ) = @_;
28337 # check for error..shouldn't happen
28338 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
28341 my $excess = length($pos_chr) + $pos - length($underline);
28342 if ( $excess > 0 ) {
28343 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
28345 substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
28346 return ($underline);
28351 # Break a string, $str, into a sequence of preliminary tokens. We
28352 # are interested in these types of tokens:
28353 # words (type='w'), example: 'max_tokens_wanted'
28354 # digits (type = 'd'), example: '0755'
28355 # whitespace (type = 'b'), example: ' '
28356 # any other single character (i.e. punct; type = the character itself).
28357 # We cannot do better than this yet because we might be in a quoted
28358 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
28360 my ( $str, $max_tokens_wanted ) = @_;
28362 # we return references to these 3 arrays:
28363 my @tokens = (); # array of the tokens themselves
28364 my @token_map = (0); # string position of start of each token
28365 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
28370 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
28373 # note that this must come before words!
28374 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
28377 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
28379 # single-character punctuation
28380 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
28384 return ( \@tokens, \@token_map, \@type );
28388 push @token_map, pos($str);
28390 } while ( --$max_tokens_wanted != 0 );
28392 return ( \@tokens, \@token_map, \@type );
28397 # this is an old debug routine
28398 my ( $rtokens, $rtoken_map ) = @_;
28399 my $num = scalar(@$rtokens);
28402 for ( $i = 0 ; $i < $num ; $i++ ) {
28403 my $len = length( $$rtokens[$i] );
28404 print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
28408 sub matching_end_token {
28410 # find closing character for a pattern
28411 my $beginning_token = shift;
28413 if ( $beginning_token eq '{' ) {
28416 elsif ( $beginning_token eq '[' ) {
28419 elsif ( $beginning_token eq '<' ) {
28422 elsif ( $beginning_token eq '(' ) {
28430 sub dump_token_types {
28434 # This should be the latest list of token types in use
28435 # adding NEW_TOKENS: add a comment here
28436 print $fh <<'END_OF_LIST';
28438 Here is a list of the token types currently used for lines of type 'CODE'.
28439 For the following tokens, the "type" of a token is just the token itself.
28441 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
28442 ( ) <= >= == =~ !~ != ++ -- /= x=
28443 ... **= <<= >>= &&= ||= //= <=>
28444 , + - / * | % ! x ~ = \ ? : . < > ^ &
28446 The following additional token types are defined:
28449 b blank (white space)
28450 { indent: opening structural curly brace or square bracket or paren
28451 (code block, anonymous hash reference, or anonymous array reference)
28452 } outdent: right structural curly brace or square bracket or paren
28453 [ left non-structural square bracket (enclosing an array index)
28454 ] right non-structural square bracket
28455 ( left non-structural paren (all but a list right of an =)
28456 ) right non-structural parena
28457 L left non-structural curly brace (enclosing a key)
28458 R right non-structural curly brace
28459 ; terminal semicolon
28460 f indicates a semicolon in a "for" statement
28461 h here_doc operator <<
28463 Q indicates a quote or pattern
28464 q indicates a qw quote block
28466 C user-defined constant or constant function (with void prototype = ())
28467 U user-defined function taking parameters
28468 G user-defined function taking block parameter (like grep/map/eval)
28469 M (unused, but reserved for subroutine definition name)
28470 P (unused, but -html uses it to label pod text)
28471 t type indicater such as %,$,@,*,&,sub
28472 w bare word (perhaps a subroutine call)
28473 i identifier of some type (with leading %, $, @, *, &, sub, -> )
28476 F a file test operator (like -e)
28478 Z identifier in indirect object slot: may be file handle, object
28479 J LABEL: code block label
28480 j LABEL after next, last, redo, goto
28483 pp pre-increment operator ++
28484 mm pre-decrement operator --
28485 A : used as attribute separator
28487 Here are the '_line_type' codes used internally:
28488 SYSTEM - system-specific code before hash-bang line
28489 CODE - line of perl code (including comments)
28490 POD_START - line starting pod, such as '=head'
28491 POD - pod documentation text
28492 POD_END - last line of pod section, '=cut'
28493 HERE - text of here-document
28494 HERE_END - last line of here-doc (target word)
28495 FORMAT - format section
28496 FORMAT_END - last line of format section, '.'
28497 DATA_START - __DATA__ line
28498 DATA - unidentified text following __DATA__
28499 END_START - __END__ line
28500 END - unidentified text following __END__
28501 ERROR - we are in big trouble, probably not a perl script
28507 # These names are used in error messages
28508 @opening_brace_names = qw# '{' '[' '(' '?' #;
28509 @closing_brace_names = qw# '}' ']' ')' ':' #;
28512 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
28513 <= >= == =~ !~ != ++ -- /= x= ~~
28515 @is_digraph{@digraphs} = (1) x scalar(@digraphs);
28517 my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
28518 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
28520 # make a hash of all valid token types for self-checking the tokenizer
28521 # (adding NEW_TOKENS : select a new character and add to this list)
28522 my @valid_token_types = qw#
28523 A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
28524 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
28526 push( @valid_token_types, @digraphs );
28527 push( @valid_token_types, @trigraphs );
28528 push( @valid_token_types, '#' );
28529 push( @valid_token_types, ',' );
28530 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
28532 # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
28533 my @file_test_operators =
28534 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
28535 @is_file_test_operator{@file_test_operators} =
28536 (1) x scalar(@file_test_operators);
28538 # these functions have prototypes of the form (&), so when they are
28539 # followed by a block, that block MAY BE followed by an operator.
28540 @_ = qw( do eval );
28541 @is_block_operator{@_} = (1) x scalar(@_);
28543 # these functions allow an identifier in the indirect object slot
28544 @_ = qw( print printf sort exec system say);
28545 @is_indirect_object_taker{@_} = (1) x scalar(@_);
28547 # These tokens may precede a code block
28548 # patched for SWITCH/CASE
28550 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
28551 unless do while until eval for foreach map grep sort
28552 switch case given when);
28553 @is_code_block_token{@_} = (1) x scalar(@_);
28555 # I'll build the list of keywords incrementally
28558 # keywords and tokens after which a value or pattern is expected,
28559 # but not an operator. In other words, these should consume terms
28560 # to their right, or at least they are not expected to be followed
28561 # immediately by operators.
28562 my @value_requestor = qw(
28783 # patched above for SWITCH/CASE given/when err say
28784 # 'err' is a fairly safe addition.
28785 # TODO: 'default' still needed if appropriate
28786 # 'use feature' seen, but perltidy works ok without it.
28787 # Concerned that 'default' could break code.
28788 push( @Keywords, @value_requestor );
28790 # These are treated the same but are not keywords:
28795 push( @value_requestor, @extra_vr );
28797 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
28799 # this list contains keywords which do not look for arguments,
28800 # so that they might be followed by an operator, or at least
28802 my @operator_requestor = qw(
28826 push( @Keywords, @operator_requestor );
28828 # These are treated the same but are not considered keywords:
28835 push( @operator_requestor, @extra_or );
28837 @expecting_operator_token{@operator_requestor} =
28838 (1) x scalar(@operator_requestor);
28840 # these token TYPES expect trailing operator but not a term
28841 # note: ++ and -- are post-increment and decrement, 'C' = constant
28842 my @operator_requestor_types = qw( ++ -- C <> q );
28843 @expecting_operator_types{@operator_requestor_types} =
28844 (1) x scalar(@operator_requestor_types);
28846 # these token TYPES consume values (terms)
28847 # note: pp and mm are pre-increment and decrement
28848 # f=semicolon in for, F=file test operator
28849 my @value_requestor_type = qw#
28850 L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
28851 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
28852 <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
28853 f F pp mm Y p m U J G j >> << ^ t
28855 push( @value_requestor_type, ',' )
28856 ; # (perl doesn't like a ',' in a qw block)
28857 @expecting_term_types{@value_requestor_type} =
28858 (1) x scalar(@value_requestor_type);
28860 # Note: the following valid token types are not assigned here to
28861 # hashes requesting to be followed by values or terms, but are
28862 # instead currently hard-coded into sub operator_expected:
28863 # ) -> :: Q R Z ] b h i k n v w } #
28865 # For simple syntax checking, it is nice to have a list of operators which
28866 # will really be unhappy if not followed by a term. This includes most
28868 %really_want_term = %expecting_term_types;
28870 # with these exceptions...
28871 delete $really_want_term{'U'}; # user sub, depends on prototype
28872 delete $really_want_term{'F'}; # file test works on $_ if no following term
28873 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
28876 @_ = qw(q qq qw qx qr s y tr m);
28877 @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
28879 # These keywords are handled specially in the tokenizer code:
28880 my @special_keywords = qw(
28896 push( @Keywords, @special_keywords );
28898 # Keywords after which list formatting may be used
28899 # WARNING: do not include |map|grep|eval or perl may die on
28900 # syntax errors (map1.t).
28901 my @keyword_taking_list = qw(
28975 @is_keyword_taking_list{@keyword_taking_list} =
28976 (1) x scalar(@keyword_taking_list);
28978 # These are not used in any way yet
28979 # my @unused_keywords = qw(
28986 # The list of keywords was originally extracted from function 'keyword' in
28987 # perl file toke.c version 5.005.03, using this utility, plus a
28988 # little editing: (file getkwd.pl):
28989 # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
28990 # Add 'get' prefix where necessary, then split into the above lists.
28991 # This list should be updated as necessary.
28992 # The list should not contain these special variables:
28993 # ARGV DATA ENV SIG STDERR STDIN STDOUT
28996 @is_keyword{@Keywords} = (1) x scalar(@Keywords);
29003 Perl::Tidy - Parses and beautifies perl source
29009 Perl::Tidy::perltidy(
29011 destination => $destination,
29014 perltidyrc => $perltidyrc,
29015 logfile => $logfile,
29016 errorfile => $errorfile,
29017 formatter => $formatter, # callback object (see below)
29018 dump_options => $dump_options,
29019 dump_options_type => $dump_options_type,
29020 prefilter => $prefilter_coderef,
29021 postfilter => $postfilter_coderef,
29026 This module makes the functionality of the perltidy utility available to perl
29027 scripts. Any or all of the input parameters may be omitted, in which case the
29028 @ARGV array will be used to provide input parameters as described
29029 in the perltidy(1) man page.
29031 For example, the perltidy script is basically just this:
29034 Perl::Tidy::perltidy();
29036 The module accepts input and output streams by a variety of methods.
29037 The following list of parameters may be any of the following: a
29038 filename, an ARRAY reference, a SCALAR reference, or an object with
29039 either a B<getline> or B<print> method, as appropriate.
29041 source - the source of the script to be formatted
29042 destination - the destination of the formatted output
29043 stderr - standard error output
29044 perltidyrc - the .perltidyrc file
29045 logfile - the .LOG file stream, if any
29046 errorfile - the .ERR file stream, if any
29047 dump_options - ref to a hash to receive parameters (see below),
29048 dump_options_type - controls contents of dump_options
29049 dump_getopt_flags - ref to a hash to receive Getopt flags
29050 dump_options_category - ref to a hash giving category of options
29051 dump_abbreviations - ref to a hash giving all abbreviations
29053 The following chart illustrates the logic used to decide how to
29056 ref($param) $param is assumed to be:
29057 ----------- ---------------------
29059 SCALAR ref to string
29061 (other) object with getline (if source) or print method
29063 If the parameter is an object, and the object has a B<close> method, that
29064 close method will be called at the end of the stream.
29070 If the B<source> parameter is given, it defines the source of the input stream.
29071 If an input stream is defined with the B<source> parameter then no other source
29072 filenames may be specified in the @ARGV array or B<argv> parameter.
29076 If the B<destination> parameter is given, it will be used to define the
29077 file or memory location to receive output of perltidy.
29081 The B<stderr> parameter allows the calling program to redirect to a file the
29082 output of what would otherwise go to the standard error output device. Unlike
29083 many other parameters, $stderr must be a file or file handle; it may not be a
29084 reference to a SCALAR or ARRAY.
29088 If the B<perltidyrc> file is given, it will be used instead of any
29089 F<.perltidyrc> configuration file that would otherwise be used.
29093 If the B<argv> parameter is given, it will be used instead of the
29094 B<@ARGV> array. The B<argv> parameter may be a string, a reference to a
29095 string, or a reference to an array. If it is a string or reference to a
29096 string, it will be parsed into an array of items just as if it were a
29097 command line string.
29101 If the B<dump_options> parameter is given, it must be the reference to a hash.
29102 In this case, the parameters contained in any perltidyrc configuration file
29103 will be placed in this hash and perltidy will return immediately. This is
29104 equivalent to running perltidy with --dump-options, except that the perameters
29105 are returned in a hash rather than dumped to standard output. Also, by default
29106 only the parameters in the perltidyrc file are returned, but this can be
29107 changed (see the next parameter). This parameter provides a convenient method
29108 for external programs to read a perltidyrc file. An example program using
29109 this feature, F<perltidyrc_dump.pl>, is included in the distribution.
29111 Any combination of the B<dump_> parameters may be used together.
29113 =item dump_options_type
29115 This parameter is a string which can be used to control the parameters placed
29116 in the hash reference supplied by B<dump_options>. The possible values are
29117 'perltidyrc' (default) and 'full'. The 'full' parameter causes both the
29118 default options plus any options found in a perltidyrc file to be returned.
29120 =item dump_getopt_flags
29122 If the B<dump_getopt_flags> parameter is given, it must be the reference to a
29123 hash. This hash will receive all of the parameters that perltidy understands
29124 and flags that are passed to Getopt::Long. This parameter may be
29125 used alone or with the B<dump_options> flag. Perltidy will
29126 exit immediately after filling this hash. See the demo program
29127 F<perltidyrc_dump.pl> for example usage.
29129 =item dump_options_category
29131 If the B<dump_options_category> parameter is given, it must be the reference to a
29132 hash. This hash will receive a hash with keys equal to all long parameter names
29133 and values equal to the title of the corresponding section of the perltidy manual.
29134 See the demo program F<perltidyrc_dump.pl> for example usage.
29136 =item dump_abbreviations
29138 If the B<dump_abbreviations> parameter is given, it must be the reference to a
29139 hash. This hash will receive all abbreviations used by Perl::Tidy. See the
29140 demo program F<perltidyrc_dump.pl> for example usage.
29144 A code reference that will be applied to the source before tidying. It is
29145 expected to take the full content as a string in its input, and output the
29146 transformed content.
29150 A code reference that will be applied to the tidied result before outputting.
29151 It is expected to take the full content as a string in its input, and output
29152 the transformed content.
29154 Note: A convenient way to check the function of your custom prefilter and
29155 postfilter code is to use the --notidy option, first with just the prefilter
29156 and then with both the prefilter and postfilter. See also the file
29157 B<filter_example.pl> in the perltidy distribution.
29161 =head1 NOTES ON FORMATTING PARAMETERS
29163 Parameters which control formatting may be passed in several ways: in a
29164 F<.perltidyrc> configuration file, in the B<perltidyrc> parameter, and in the
29167 The B<-syn> (B<--check-syntax>) flag may be used with all source and
29168 destination streams except for standard input and output. However
29169 data streams which are not associated with a filename will
29170 be copied to a temporary file before being be passed to Perl. This
29171 use of temporary files can cause somewhat confusing output from Perl.
29175 The perltidy script itself is a simple example, and several
29176 examples are given in the perltidy distribution.
29178 The following example passes perltidy a snippet as a reference
29179 to a string and receives the result back in a reference to
29184 # some messy source code to format
29185 my $source = <<'EOM';
29187 my @editors=('Emacs', 'Vi '); my $rand = rand();
29188 print "A poll of 10 random programmers gave these results:\n";
29190 my $i=int ($rand+rand());
29191 print " $editors[$i] users are from Venus" . ", " .
29192 "$editors[1-$i] users are from Mars" .
29197 # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
29199 perltidy( source => \$source, destination => \@dest );
29200 foreach (@dest) {print}
29202 =head1 Using the B<formatter> Callback Object
29204 The B<formatter> parameter is an optional callback object which allows
29205 the calling program to receive tokenized lines directly from perltidy for
29206 further specialized processing. When this parameter is used, the two
29207 formatting options which are built into perltidy (beautification or
29208 html) are ignored. The following diagram illustrates the logical flow:
29210 |-- (normal route) -> code beautification
29211 caller->perltidy->|-- (-html flag ) -> create html
29212 |-- (formatter given)-> callback to write_line
29214 This can be useful for processing perl scripts in some way. The
29215 parameter C<$formatter> in the perltidy call,
29217 formatter => $formatter,
29219 is an object created by the caller with a C<write_line> method which
29220 will accept and process tokenized lines, one line per call. Here is
29221 a simple example of a C<write_line> which merely prints the line number,
29222 the line type (as determined by perltidy), and the text of the line:
29226 # This is called from perltidy line-by-line
29228 my $line_of_tokens = shift;
29229 my $line_type = $line_of_tokens->{_line_type};
29230 my $input_line_number = $line_of_tokens->{_line_number};
29231 my $input_line = $line_of_tokens->{_line_text};
29232 print "$input_line_number:$line_type:$input_line";
29235 The complete program, B<perllinetype>, is contained in the examples section of
29236 the source distribution. As this example shows, the callback method
29237 receives a parameter B<$line_of_tokens>, which is a reference to a hash
29238 of other useful information. This example uses these hash entries:
29240 $line_of_tokens->{_line_number} - the line number (1,2,...)
29241 $line_of_tokens->{_line_text} - the text of the line
29242 $line_of_tokens->{_line_type} - the type of the line, one of:
29244 SYSTEM - system-specific code before hash-bang line
29245 CODE - line of perl code (including comments)
29246 POD_START - line starting pod, such as '=head'
29247 POD - pod documentation text
29248 POD_END - last line of pod section, '=cut'
29249 HERE - text of here-document
29250 HERE_END - last line of here-doc (target word)
29251 FORMAT - format section
29252 FORMAT_END - last line of format section, '.'
29253 DATA_START - __DATA__ line
29254 DATA - unidentified text following __DATA__
29255 END_START - __END__ line
29256 END - unidentified text following __END__
29257 ERROR - we are in big trouble, probably not a perl script
29259 Most applications will be only interested in lines of type B<CODE>. For
29260 another example, let's write a program which checks for one of the
29261 so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
29262 can slow down processing. Here is a B<write_line>, from the example
29263 program B<find_naughty.pl>, which does that:
29267 # This is called back from perltidy line-by-line
29268 # We're looking for $`, $&, and $'
29269 my ( $self, $line_of_tokens ) = @_;
29271 # pull out some stuff we might need
29272 my $line_type = $line_of_tokens->{_line_type};
29273 my $input_line_number = $line_of_tokens->{_line_number};
29274 my $input_line = $line_of_tokens->{_line_text};
29275 my $rtoken_type = $line_of_tokens->{_rtoken_type};
29276 my $rtokens = $line_of_tokens->{_rtokens};
29279 # skip comments, pod, etc
29280 return if ( $line_type ne 'CODE' );
29282 # loop over tokens looking for $`, $&, and $'
29283 for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
29285 # we only want to examine token types 'i' (identifier)
29286 next unless $$rtoken_type[$j] eq 'i';
29288 # pull out the actual token text
29289 my $token = $$rtokens[$j];
29292 if ( $token =~ /^\$[\`\&\']$/ ) {
29294 "$input_line_number: $token\n";
29299 This example pulls out these tokenization variables from the $line_of_tokens
29302 $rtoken_type = $line_of_tokens->{_rtoken_type};
29303 $rtokens = $line_of_tokens->{_rtokens};
29305 The variable C<$rtoken_type> is a reference to an array of token type codes,
29306 and C<$rtokens> is a reference to a corresponding array of token text.
29307 These are obviously only defined for lines of type B<CODE>.
29308 Perltidy classifies tokens into types, and has a brief code for each type.
29309 You can get a complete list at any time by running perltidy from the
29312 perltidy --dump-token-types
29314 In the present example, we are only looking for tokens of type B<i>
29315 (identifiers), so the for loop skips past all other types. When an
29316 identifier is found, its actual text is checked to see if it is one
29317 being sought. If so, the above write_line prints the token and its
29320 The B<formatter> feature is relatively new in perltidy, and further
29321 documentation needs to be written to complete its description. However,
29322 several example programs have been written and can be found in the
29323 B<examples> section of the source distribution. Probably the best way
29324 to get started is to find one of the examples which most closely matches
29325 your application and start modifying it.
29327 For help with perltidy's pecular way of breaking lines into tokens, you
29328 might run, from the command line,
29330 perltidy -D filename
29332 where F<filename> is a short script of interest. This will produce
29333 F<filename.DEBUG> with interleaved lines of text and their token types.
29334 The B<-D> flag has been in perltidy from the beginning for this purpose.
29335 If you want to see the code which creates this file, it is
29336 C<write_debug_entry> in Tidy.pm.
29344 Thanks to Hugh Myers who developed the initial modular interface
29349 This man page documents Perl::Tidy version 20120701.
29353 This package is free software; you can redistribute it and/or modify it
29354 under the terms of the "GNU General Public License".
29356 Please refer to the file "COPYING" for details.
29361 perltidy at users.sourceforge.net
29365 The perltidy(1) man page describes all of the features of perltidy. It
29366 can be found at http://perltidy.sourceforge.net.