2 ############################################################
4 # perltidy - a perl script indenter and formatter
6 # Copyright (c) 2000-2017 by Steve Hancock
7 # Distributed under the GPL license agreement; see file COPYING
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License along
20 # with this program; if not, write to the Free Software Foundation, Inc.,
21 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 # For brief instructions, try 'perltidy -h'.
24 # For more complete documentation, try 'man perltidy'
25 # or visit http://perltidy.sourceforge.net
27 # This script is an example of the default style. It was formatted with:
31 # Code Contributions: See ChangeLog.html for a complete history.
32 # Michael Cartmell supplied code for adaptation to VMS and helped with
34 # Hugh S. Myers supplied sub streamhandle and the supporting code to
35 # create a Perl::Tidy module which can operate on strings, arrays, etc.
36 # Yves Orton supplied coding to help detect Windows versions.
37 # Axel Rose supplied a patch for MacPerl.
38 # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
39 # Dan Tyrell contributed a patch for binary I/O.
40 # Ueli Hugenschmidt contributed a patch for -fpsc
41 # Sam Kington supplied a patch to identify the initial indentation of
43 # jonathan swartz supplied patches for:
44 # * .../ pattern, which looks upwards from directory
45 # * --notidy, to be used in directories where we want to avoid
46 # accidentally tidying
47 # * prefilter and postfilter
50 # Many others have supplied key ideas, suggestions, and bug reports;
51 # see the CHANGES file.
53 ############################################################
57 # Actually should use a version later than about 5.8.5 to use
59 use 5.004; # need IO::File from 5.004 or later
72 $rOpts_character_encoding
75 @ISA = qw( Exporter );
76 @EXPORT = qw( &perltidy );
83 use File::Temp qw(tempfile);
86 ( $VERSION = q($Id: Tidy.pm,v 1.74 2017/05/21 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
91 # given filename and mode (r or w), create an object which:
92 # has a 'getline' method if mode='r', and
93 # has a 'print' method if mode='w'.
94 # The objects also need a 'close' method.
96 # How the object is made:
98 # if $filename is: Make object using:
99 # ---------------- -----------------
100 # '-' (STDIN if mode = 'r', STDOUT if mode='w')
102 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
103 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar)
105 # (check for 'print' method for 'w' mode)
106 # (check for 'getline' method for 'r' mode)
107 my $ref = ref( my $filename = shift );
114 if ( $ref eq 'ARRAY' ) {
115 $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
117 elsif ( $ref eq 'SCALAR' ) {
118 $New = sub { Perl::Tidy::IOScalar->new(@_) };
122 # Accept an object with a getline method for reading. Note:
123 # IO::File is built-in and does not respond to the defined
124 # operator. If this causes trouble, the check can be
125 # skipped and we can just let it crash if there is no
127 if ( $mode =~ /[rR]/ ) {
129 # RT#97159; part 1 of 2: updated to use 'can'
130 ##if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
131 if ( $ref->can('getline') ) {
132 $New = sub { $filename };
135 $New = sub { undef };
137 ------------------------------------------------------------------------
138 No 'getline' method is defined for object of class $ref
139 Please check your call to Perl::Tidy::perltidy. Trace follows.
140 ------------------------------------------------------------------------
145 # Accept an object with a print method for writing.
146 # See note above about IO::File
147 if ( $mode =~ /[wW]/ ) {
149 # RT#97159; part 2 of 2: updated to use 'can'
150 ##if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
151 if ( $ref->can('print') ) {
152 $New = sub { $filename };
155 $New = sub { undef };
157 ------------------------------------------------------------------------
158 No 'print' method is defined for object of class $ref
159 Please check your call to Perl::Tidy::perltidy. Trace follows.
160 ------------------------------------------------------------------------
169 if ( $filename eq '-' ) {
170 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
173 $New = sub { IO::File->new(@_) };
176 $fh = $New->( $filename, $mode )
177 or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
179 return $fh, ( $ref or $filename );
182 sub find_input_line_ending {
184 # Peek at a file and return first line ending character.
185 # Quietly return undef in case of any trouble.
186 my ($input_file) = @_;
189 # silently ignore input from object or stdin
190 if ( ref($input_file) || $input_file eq '-' ) {
193 open( INFILE, $input_file ) || return $ending;
197 read( INFILE, $buf, 1024 );
199 if ( $buf && $buf =~ /([\012\015]+)/ ) {
203 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
206 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
209 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
223 # concatenate a path and file basename
224 # returns undef in case of error
226 BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
228 # use File::Spec if we can
229 unless ($missing_file_spec) {
230 return File::Spec->catfile(@_);
233 # Perl 5.004 systems may not have File::Spec so we'll make
234 # a simple try. We assume File::Basename is available.
235 # return undef if not successful.
237 my $path = join '/', @_;
238 my $test_file = $path . $name;
239 my ( $test_name, $test_path ) = fileparse($test_file);
240 return $test_file if ( $test_name eq $name );
241 return undef if ( $^O eq 'VMS' );
243 # this should work at least for Windows and Unix:
244 $test_file = $path . '/' . $name;
245 ( $test_name, $test_path ) = fileparse($test_file);
246 return $test_file if ( $test_name eq $name );
250 # Here is a map of the flow of data from the input source to the output
253 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
254 # input groups output
255 # lines tokens lines of lines lines
258 # The names correspond to the package names responsible for the unit processes.
260 # The overall process is controlled by the "main" package.
262 # LineSource is the stream of input lines
264 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
265 # if necessary. A token is any section of the input line which should be
266 # manipulated as a single entity during formatting. For example, a single
267 # ',' character is a token, and so is an entire side comment. It handles
268 # the complexities of Perl syntax, such as distinguishing between '<<' as
269 # a shift operator and as a here-document, or distinguishing between '/'
270 # as a divide symbol and as a pattern delimiter.
272 # Formatter inserts and deletes whitespace between tokens, and breaks
273 # sequences of tokens at appropriate points as output lines. It bases its
274 # decisions on the default rules as modified by any command-line options.
276 # VerticalAligner collects groups of lines together and tries to line up
277 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
279 # FileWriter simply writes lines to the output stream.
281 # The Logger package, not shown, records significant events and warning
282 # messages. It writes a .LOG file, which may be saved with a
283 # '-log' or a '-g' flag.
289 destination => undef,
296 dump_options => undef,
297 dump_options_type => undef,
298 dump_getopt_flags => undef,
299 dump_options_category => undef,
300 dump_options_range => undef,
301 dump_abbreviations => undef,
306 # don't overwrite callers ARGV
308 local *STDERR = *STDERR;
312 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
314 my @good_keys = sort keys %defaults;
315 @bad_keys = sort @bad_keys;
317 ------------------------------------------------------------------------
318 Unknown perltidy parameter : (@bad_keys)
319 perltidy only understands : (@good_keys)
320 ------------------------------------------------------------------------
325 my $get_hash_ref = sub {
327 my $hash_ref = $input_hash{$key};
328 if ( defined($hash_ref) ) {
329 unless ( ref($hash_ref) eq 'HASH' ) {
330 my $what = ref($hash_ref);
332 $what ? "but is ref to $what" : "but is not a reference";
334 ------------------------------------------------------------------------
335 error in call to perltidy:
336 -$key must be reference to HASH $but_is
337 ------------------------------------------------------------------------
344 %input_hash = ( %defaults, %input_hash );
345 my $argv = $input_hash{'argv'};
346 my $destination_stream = $input_hash{'destination'};
347 my $errorfile_stream = $input_hash{'errorfile'};
348 my $logfile_stream = $input_hash{'logfile'};
349 my $perltidyrc_stream = $input_hash{'perltidyrc'};
350 my $source_stream = $input_hash{'source'};
351 my $stderr_stream = $input_hash{'stderr'};
352 my $user_formatter = $input_hash{'formatter'};
353 my $prefilter = $input_hash{'prefilter'};
354 my $postfilter = $input_hash{'postfilter'};
356 if ($stderr_stream) {
357 ( $fh_stderr, my $stderr_file ) =
358 Perl::Tidy::streamhandle( $stderr_stream, 'w' );
361 ------------------------------------------------------------------------
362 Unable to redirect STDERR to $stderr_stream
363 Please check value of -stderr in call to perltidy
364 ------------------------------------------------------------------------
369 $fh_stderr = *STDERR;
372 sub Warn ($) { $fh_stderr->print( $_[0] ); }
375 if ( $_[0] ) { goto ERROR_EXIT }
376 else { goto NORMAL_EXIT }
379 sub Die ($) { Warn $_[0]; Exit(1); }
381 # extract various dump parameters
382 my $dump_options_type = $input_hash{'dump_options_type'};
383 my $dump_options = $get_hash_ref->('dump_options');
384 my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
385 my $dump_options_category = $get_hash_ref->('dump_options_category');
386 my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
387 my $dump_options_range = $get_hash_ref->('dump_options_range');
389 # validate dump_options_type
390 if ( defined($dump_options) ) {
391 unless ( defined($dump_options_type) ) {
392 $dump_options_type = 'perltidyrc';
394 unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
396 ------------------------------------------------------------------------
397 Please check value of -dump_options_type in call to perltidy;
398 saw: '$dump_options_type'
399 expecting: 'perltidyrc' or 'full'
400 ------------------------------------------------------------------------
406 $dump_options_type = "";
409 if ($user_formatter) {
411 # if the user defines a formatter, there is no output stream,
412 # but we need a null stream to keep coding simple
413 $destination_stream = Perl::Tidy::DevNull->new();
416 # see if ARGV is overridden
417 if ( defined($argv) ) {
419 my $rargv = ref $argv;
420 if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
424 if ( $rargv eq 'ARRAY' ) {
429 ------------------------------------------------------------------------
430 Please check value of -argv in call to perltidy;
431 it must be a string or ref to ARRAY but is: $rargv
432 ------------------------------------------------------------------------
439 my ( $rargv, $msg ) = parse_args($argv);
442 Error parsing this string passed to to perltidy with 'argv':
450 my $rpending_complaint;
451 $$rpending_complaint = "";
452 my $rpending_logfile_message;
453 $$rpending_logfile_message = "";
455 my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint);
457 # VMS file names are restricted to a 40.40 format, so we append _tdy
458 # instead of .tdy, etc. (but see also sub check_vms_filename)
461 if ( $^O eq 'VMS' ) {
467 $dot_pattern = '\.'; # must escape for use in regex
470 #---------------------------------------------------------------
471 # get command line options
472 #---------------------------------------------------------------
473 my ( $rOpts, $config_file, $rraw_options, $roption_string,
474 $rexpansion, $roption_category, $roption_range )
475 = process_command_line(
476 $perltidyrc_stream, $is_Windows, $Windows_type,
477 $rpending_complaint, $dump_options_type,
480 my $saw_extrude = ( grep m/^-extrude$/, @$rraw_options ) ? 1 : 0;
482 ( grep m/^-(pbp|perl-best-practices)$/, @$rraw_options ) ? 1 : 0;
484 #---------------------------------------------------------------
485 # Handle requests to dump information
486 #---------------------------------------------------------------
488 # return or exit immediately after all dumps
491 # Getopt parameters and their flags
492 if ( defined($dump_getopt_flags) ) {
494 foreach my $op ( @{$roption_string} ) {
503 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
507 $dump_getopt_flags->{$opt} = $flag;
511 if ( defined($dump_options_category) ) {
513 %{$dump_options_category} = %{$roption_category};
516 if ( defined($dump_options_range) ) {
518 %{$dump_options_range} = %{$roption_range};
521 if ( defined($dump_abbreviations) ) {
523 %{$dump_abbreviations} = %{$rexpansion};
526 if ( defined($dump_options) ) {
528 %{$dump_options} = %{$rOpts};
531 Exit 0 if ($quit_now);
533 # make printable string of options for this run as possible diagnostic
534 my $readable_options = readable_options( $rOpts, $roption_string );
536 # dump from command line
537 if ( $rOpts->{'dump-options'} ) {
538 print STDOUT $readable_options;
542 #---------------------------------------------------------------
543 # check parameters and their interactions
544 #---------------------------------------------------------------
546 check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint );
548 if ($user_formatter) {
549 $rOpts->{'format'} = 'user';
552 # there must be one entry here for every possible format
553 my %default_file_extension = (
559 $rOpts_character_encoding = $rOpts->{'character-encoding'};
561 # be sure we have a valid output format
562 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
563 my $formats = join ' ',
564 sort map { "'" . $_ . "'" } keys %default_file_extension;
565 my $fmt = $rOpts->{'format'};
566 Die "-format='$fmt' but must be one of: $formats\n";
569 my $output_extension = make_extension( $rOpts->{'output-file-extension'},
570 $default_file_extension{ $rOpts->{'format'} }, $dot );
572 # If the backup extension contains a / character then the backup should
573 # be deleted when the -b option is used. On older versions of
574 # perltidy this will generate an error message due to an illegal
577 # A backup file will still be generated but will be deleted
578 # at the end. If -bext='/' then this extension will be
579 # the default 'bak'. Otherwise it will be whatever characters
580 # remains after all '/' characters are removed. For example:
581 # -bext extension slashes
585 # '/dev/null' devnull 2 (Currently not allowed)
586 my $bext = $rOpts->{'backup-file-extension'};
587 my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g );
589 # At present only one forward slash is allowed. In the future multiple
590 # slashes may be allowed to allow for other options
591 if ( $delete_backup > 1 ) {
592 Die "-bext=$bext contains more than one '/'\n";
595 my $backup_extension =
596 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
598 my $html_toc_extension =
599 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
601 my $html_src_extension =
602 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
604 # check for -b option;
605 # silently ignore unless beautify mode
606 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
607 && $rOpts->{'format'} eq 'tidy';
609 # Turn off -b with warnings in case of conflicts with other options.
610 # NOTE: Do this silently, without warnings, if there is a source or
611 # destination stream, or standard output is used. This is because the -b
612 # flag may have been in a .perltidyrc file and warnings break
613 # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014.
614 if ($in_place_modify) {
615 if ( $rOpts->{'standard-output'} ) {
616 ## my $msg = "Ignoring -b; you may not use -b and -st together";
617 ## $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
619 $in_place_modify = 0;
621 if ($destination_stream) {
622 ##Warn "Ignoring -b; you may not specify a destination stream and -b together\n";
623 $in_place_modify = 0;
625 if ( ref($source_stream) ) {
626 ##Warn "Ignoring -b; you may not specify a source array and -b together\n";
627 $in_place_modify = 0;
629 if ( $rOpts->{'outfile'} ) {
630 ##Warn "Ignoring -b; you may not use -b and -o together\n";
631 $in_place_modify = 0;
633 if ( defined( $rOpts->{'output-path'} ) ) {
634 ##Warn "Ignoring -b; you may not use -b and -opath together\n";
635 $in_place_modify = 0;
639 Perl::Tidy::Formatter::check_options($rOpts);
640 if ( $rOpts->{'format'} eq 'html' ) {
641 Perl::Tidy::HtmlWriter->check_options($rOpts);
644 # make the pattern of file extensions that we shouldn't touch
645 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
646 if ($output_extension) {
647 my $ext = quotemeta($output_extension);
648 $forbidden_file_extensions .= "|$ext";
650 if ( $in_place_modify && $backup_extension ) {
651 my $ext = quotemeta($backup_extension);
652 $forbidden_file_extensions .= "|$ext";
654 $forbidden_file_extensions .= ')$';
656 # Create a diagnostics object if requested;
657 # This is only useful for code development
658 my $diagnostics_object = undef;
659 if ( $rOpts->{'DIAGNOSTICS'} ) {
660 $diagnostics_object = Perl::Tidy::Diagnostics->new();
663 # no filenames should be given if input is from an array
664 if ($source_stream) {
667 "You may not specify any filenames when a source array is given\n";
670 # we'll stuff the source array into ARGV
671 unshift( @ARGV, $source_stream );
673 # No special treatment for source stream which is a filename.
674 # This will enable checks for binary files and other bad stuff.
675 $source_stream = undef unless ref($source_stream);
678 # use stdin by default if no source array and no args
680 unshift( @ARGV, '-' ) unless @ARGV;
683 #---------------------------------------------------------------
685 # main loop to process all files in argument list
686 #---------------------------------------------------------------
687 my $number_of_files = @ARGV;
688 my $formatter = undef;
689 my $tokenizer = undef;
690 while ( my $input_file = shift @ARGV ) {
692 my $input_file_permissions;
694 #---------------------------------------------------------------
695 # prepare this input stream
696 #---------------------------------------------------------------
697 if ($source_stream) {
698 $fileroot = "perltidy";
700 # If the source is from an array or string, then .LOG output
701 # is only possible if a logfile stream is specified. This prevents
702 # unexpected perltidy.LOG files.
703 if ( !defined($logfile_stream) ) {
704 $logfile_stream = Perl::Tidy::DevNull->new();
707 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
708 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
709 $in_place_modify = 0;
712 $fileroot = $input_file;
713 unless ( -e $input_file ) {
715 # file doesn't exist - check for a file glob
716 if ( $input_file =~ /([\?\*\[\{])/ ) {
718 # Windows shell may not remove quotes, so do it
719 my $input_file = $input_file;
720 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
721 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
722 my $pattern = fileglob_to_re($input_file);
724 if ( !$@ && opendir( DIR, './' ) ) {
726 grep { /$pattern/ && !-d $_ } readdir(DIR);
729 unshift @ARGV, @files;
734 Warn "skipping file: '$input_file': no matches found\n";
738 unless ( -f $input_file ) {
739 Warn "skipping file: $input_file: not a regular file\n";
743 # As a safety precaution, skip zero length files.
744 # If for example a source file got clobbered somehow,
745 # the old .tdy or .bak files might still exist so we
746 # shouldn't overwrite them with zero length files.
747 unless ( -s $input_file ) {
748 Warn "skipping file: $input_file: Zero size\n";
752 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
754 "skipping file: $input_file: Non-text (override with -f)\n";
758 # we should have a valid filename now
759 $fileroot = $input_file;
760 $input_file_permissions = ( stat $input_file )[2] & 07777;
762 if ( $^O eq 'VMS' ) {
763 ( $fileroot, $dot ) = check_vms_filename($fileroot);
766 # add option to change path here
767 if ( defined( $rOpts->{'output-path'} ) ) {
769 my ( $base, $old_path ) = fileparse($fileroot);
770 my $new_path = $rOpts->{'output-path'};
771 unless ( -d $new_path ) {
772 unless ( mkdir $new_path, 0777 ) {
773 Die "unable to create directory $new_path: $!\n";
776 my $path = $new_path;
777 $fileroot = catfile( $path, $base );
780 ------------------------------------------------------------------------
781 Problem combining $new_path and $base to make a filename; check -opath
782 ------------------------------------------------------------------------
788 # Skip files with same extension as the output files because
789 # this can lead to a messy situation with files like
790 # script.tdy.tdy.tdy ... or worse problems ... when you
791 # rerun perltidy over and over with wildcard input.
794 && ( $input_file =~ /$forbidden_file_extensions/o
795 || $input_file eq 'DIAGNOSTICS' )
798 Warn "skipping file: $input_file: wrong extension\n";
802 # the 'source_object' supplies a method to read the input file
804 Perl::Tidy::LineSource->new( $input_file, $rOpts,
805 $rpending_logfile_message );
806 next unless ($source_object);
808 # Prefilters and postfilters: The prefilter is a code reference
809 # that will be applied to the source before tidying, and the
810 # postfilter is a code reference to the result before outputting.
813 || ( $rOpts_character_encoding
814 && $rOpts_character_encoding eq 'utf8' )
818 while ( my $line = $source_object->get_line() ) {
822 $buf = $prefilter->($buf) if $prefilter;
824 if ( $rOpts_character_encoding
825 && $rOpts_character_encoding eq 'utf8'
826 && !utf8::is_utf8($buf) )
829 $buf = Encode::decode( 'UTF-8', $buf,
830 Encode::FB_CROAK | Encode::LEAVE_SRC );
834 "skipping file: $input_file: Unable to decode source as UTF-8\n";
839 $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
840 $rpending_logfile_message );
843 # register this file name with the Diagnostics package
844 $diagnostics_object->set_input_file($input_file)
845 if $diagnostics_object;
847 #---------------------------------------------------------------
848 # prepare the output stream
849 #---------------------------------------------------------------
850 my $output_file = undef;
851 my $actual_output_extension;
853 if ( $rOpts->{'outfile'} ) {
855 if ( $number_of_files <= 1 ) {
857 if ( $rOpts->{'standard-output'} ) {
858 my $msg = "You may not use -o and -st together";
859 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
862 elsif ($destination_stream) {
864 "You may not specify a destination array and -o together\n";
866 elsif ( defined( $rOpts->{'output-path'} ) ) {
867 Die "You may not specify -o and -opath together\n";
869 elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
870 Die "You may not specify -o and -oext together\n";
872 $output_file = $rOpts->{outfile};
874 # make sure user gives a file name after -o
875 if ( $output_file =~ /^-/ ) {
876 Die "You must specify a valid filename after -o\n";
879 # do not overwrite input file with -o
880 if ( defined($input_file_permissions)
881 && ( $output_file eq $input_file ) )
883 Die "Use 'perltidy -b $input_file' to modify in-place\n";
887 Die "You may not use -o with more than one input file\n";
890 elsif ( $rOpts->{'standard-output'} ) {
891 if ($destination_stream) {
893 "You may not specify a destination array and -st together\n";
894 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
899 if ( $number_of_files <= 1 ) {
902 Die "You may not use -st with more than one input file\n";
905 elsif ($destination_stream) {
906 $output_file = $destination_stream;
908 elsif ($source_stream) { # source but no destination goes to stdout
911 elsif ( $input_file eq '-' ) {
915 if ($in_place_modify) {
916 $output_file = IO::File->new_tmpfile()
917 or Die "cannot open temp file for -b option: $!\n";
920 $actual_output_extension = $output_extension;
921 $output_file = $fileroot . $output_extension;
925 # the 'sink_object' knows how to write the output file
926 my $tee_file = $fileroot . $dot . "TEE";
928 my $line_separator = $rOpts->{'output-line-ending'};
929 if ( $rOpts->{'preserve-line-endings'} ) {
930 $line_separator = find_input_line_ending($input_file);
933 # Eventually all I/O may be done with binmode, but for now it is
934 # only done when a user requests a particular line separator
935 # through the -ple or -ole flags
936 my $binmode = defined($line_separator)
937 || defined($rOpts_character_encoding);
938 $line_separator = "\n" unless defined($line_separator);
940 my ( $sink_object, $postfilter_buffer );
943 Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
944 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
948 Perl::Tidy::LineSink->new( $output_file, $tee_file,
949 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
952 #---------------------------------------------------------------
953 # initialize the error logger for this file
954 #---------------------------------------------------------------
955 my $warning_file = $fileroot . $dot . "ERR";
956 if ($errorfile_stream) { $warning_file = $errorfile_stream }
957 my $log_file = $fileroot . $dot . "LOG";
958 if ($logfile_stream) { $log_file = $logfile_stream }
961 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
962 $fh_stderr, $saw_extrude );
963 write_logfile_header(
964 $rOpts, $logger_object, $config_file,
965 $rraw_options, $Windows_type, $readable_options,
967 if ($$rpending_logfile_message) {
968 $logger_object->write_logfile_entry($$rpending_logfile_message);
970 if ($$rpending_complaint) {
971 $logger_object->complain($$rpending_complaint);
974 #---------------------------------------------------------------
975 # initialize the debug object, if any
976 #---------------------------------------------------------------
977 my $debugger_object = undef;
978 if ( $rOpts->{DEBUG} ) {
980 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
983 #---------------------------------------------------------------
984 # loop over iterations for one source stream
985 #---------------------------------------------------------------
987 # We will do a convergence test if 3 or more iterations are allowed.
988 # It would be pointless for fewer because we have to make at least
989 # two passes before we can see if we are converged, and the test
990 # would just slow things down.
991 my $max_iterations = $rOpts->{'iterations'};
992 my $convergence_log_message;
994 my $do_convergence_test = $max_iterations > 2;
995 if ($do_convergence_test) {
996 eval "use Digest::MD5 qw(md5_hex)";
997 $do_convergence_test = !$@;
999 # Trying to avoid problems with ancient versions of perl because
1000 # I don't know in which version number utf8::encode was introduced.
1001 eval { my $string = "perltidy"; utf8::encode($string) };
1002 $do_convergence_test = $do_convergence_test && !$@;
1005 # save objects to allow redirecting output during iterations
1006 my $sink_object_final = $sink_object;
1007 my $debugger_object_final = $debugger_object;
1008 my $logger_object_final = $logger_object;
1010 for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
1012 # send output stream to temp buffers until last iteration
1014 if ( $iter < $max_iterations ) {
1016 Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file,
1017 $line_separator, $rOpts, $rpending_logfile_message,
1021 $sink_object = $sink_object_final;
1024 # Save logger, debugger output only on pass 1 because:
1025 # (1) line number references must be to the starting
1026 # source, not an intermediate result, and
1027 # (2) we need to know if there are errors so we can stop the
1028 # iterations early if necessary.
1030 $debugger_object = undef;
1031 $logger_object = undef;
1034 #------------------------------------------------------------
1035 # create a formatter for this file : html writer or
1037 #------------------------------------------------------------
1039 # we have to delete any old formatter because, for safety,
1040 # the formatter will check to see that there is only one.
1043 if ($user_formatter) {
1044 $formatter = $user_formatter;
1046 elsif ( $rOpts->{'format'} eq 'html' ) {
1048 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
1049 $actual_output_extension, $html_toc_extension,
1050 $html_src_extension );
1052 elsif ( $rOpts->{'format'} eq 'tidy' ) {
1053 $formatter = Perl::Tidy::Formatter->new(
1054 logger_object => $logger_object,
1055 diagnostics_object => $diagnostics_object,
1056 sink_object => $sink_object,
1060 Die "I don't know how to do -format=$rOpts->{'format'}\n";
1063 unless ($formatter) {
1064 Die "Unable to continue with $rOpts->{'format'} formatting\n";
1067 #---------------------------------------------------------------
1068 # create the tokenizer for this file
1069 #---------------------------------------------------------------
1070 $tokenizer = undef; # must destroy old tokenizer
1071 $tokenizer = Perl::Tidy::Tokenizer->new(
1072 source_object => $source_object,
1073 logger_object => $logger_object,
1074 debugger_object => $debugger_object,
1075 diagnostics_object => $diagnostics_object,
1076 tabsize => $tabsize,
1078 starting_level => $rOpts->{'starting-indentation-level'},
1079 indent_columns => $rOpts->{'indent-columns'},
1080 look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
1081 look_for_autoloader => $rOpts->{'look-for-autoloader'},
1082 look_for_selfloader => $rOpts->{'look-for-selfloader'},
1083 trim_qw => $rOpts->{'trim-qw'},
1084 extended_syntax => $rOpts->{'extended-syntax'},
1086 continuation_indentation =>
1087 $rOpts->{'continuation-indentation'},
1088 outdent_labels => $rOpts->{'outdent-labels'},
1091 #---------------------------------------------------------------
1093 #---------------------------------------------------------------
1094 process_this_file( $tokenizer, $formatter );
1096 #---------------------------------------------------------------
1097 # close the input source and report errors
1098 #---------------------------------------------------------------
1099 $source_object->close_input_file();
1101 # line source for next iteration (if any) comes from the current
1102 # temporary output buffer
1103 if ( $iter < $max_iterations ) {
1105 $sink_object->close_output_file();
1107 Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts,
1108 $rpending_logfile_message );
1110 # stop iterations if errors or converged
1111 my $stop_now = $logger_object->{_warning_count};
1113 $convergence_log_message = <<EOM;
1114 Stopping iterations because of errors.
1117 elsif ($do_convergence_test) {
1119 # Patch for [rt.cpan.org #88020]
1120 # Use utf8::encode since md5_hex() only operates on bytes.
1121 my $digest = md5_hex( utf8::encode($sink_buffer) );
1122 if ( !$saw_md5{$digest} ) {
1123 $saw_md5{$digest} = $iter;
1127 # Deja vu, stop iterating
1129 my $iterm = $iter - 1;
1130 if ( $saw_md5{$digest} != $iterm ) {
1132 # Blinking (oscillating) between two stable
1133 # end states. This has happened in the past
1134 # but at present there are no known instances.
1135 $convergence_log_message = <<EOM;
1136 Blinking. Output for iteration $iter same as for $saw_md5{$digest}.
1138 $diagnostics_object->write_diagnostics(
1139 $convergence_log_message)
1140 if $diagnostics_object;
1143 $convergence_log_message = <<EOM;
1144 Converged. Output for iteration $iter same as for iter $iterm.
1146 $diagnostics_object->write_diagnostics(
1147 $convergence_log_message)
1148 if $diagnostics_object && $iterm > 2;
1151 } ## end if ($do_convergence_test)
1155 # we are stopping the iterations early;
1156 # copy the output stream to its final destination
1157 $sink_object = $sink_object_final;
1158 while ( my $line = $source_object->get_line() ) {
1159 $sink_object->write_line($line);
1161 $source_object->close_input_file();
1164 } ## end if ( $iter < $max_iterations)
1165 } # end loop over iterations for one source file
1167 # restore objects which have been temporarily undefined
1168 # for second and higher iterations
1169 $debugger_object = $debugger_object_final;
1170 $logger_object = $logger_object_final;
1172 $logger_object->write_logfile_entry($convergence_log_message)
1173 if $convergence_log_message;
1175 #---------------------------------------------------------------
1176 # Perform any postfilter operation
1177 #---------------------------------------------------------------
1179 $sink_object->close_output_file();
1181 Perl::Tidy::LineSink->new( $output_file, $tee_file,
1182 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
1183 my $buf = $postfilter->($postfilter_buffer);
1185 Perl::Tidy::LineSource->new( \$buf, $rOpts,
1186 $rpending_logfile_message );
1187 while ( my $line = $source_object->get_line() ) {
1188 $sink_object->write_line($line);
1190 $source_object->close_input_file();
1193 # Save names of the input and output files for syntax check
1194 my $ifname = $input_file;
1195 my $ofname = $output_file;
1197 #---------------------------------------------------------------
1198 # handle the -b option (backup and modify in-place)
1199 #---------------------------------------------------------------
1200 if ($in_place_modify) {
1201 unless ( -f $input_file ) {
1203 # oh, oh, no real file to backup ..
1204 # shouldn't happen because of numerous preliminary checks
1206 "problem with -b backing up input file '$input_file': not a file\n";
1208 my $backup_name = $input_file . $backup_extension;
1209 if ( -f $backup_name ) {
1210 unlink($backup_name)
1212 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1215 # backup the input file
1216 # we use copy for symlinks, move for regular files
1217 if ( -l $input_file ) {
1218 File::Copy::copy( $input_file, $backup_name )
1219 or Die "File::Copy failed trying to backup source: $!";
1222 rename( $input_file, $backup_name )
1224 "problem renaming $input_file to $backup_name for -b option: $!\n";
1226 $ifname = $backup_name;
1228 # copy the output to the original input file
1229 # NOTE: it would be nice to just close $output_file and use
1230 # File::Copy::copy here, but in this case $output_file is the
1231 # handle of an open nameless temporary file so we would lose
1232 # everything if we closed it.
1233 seek( $output_file, 0, 0 )
1234 or Die "unable to rewind a temporary file for -b option: $!\n";
1235 my $fout = IO::File->new("> $input_file")
1237 "problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
1239 if ( $rOpts->{'character-encoding'}
1240 && $rOpts->{'character-encoding'} eq 'utf8' )
1242 binmode $fout, ":encoding(UTF-8)";
1244 else { binmode $fout }
1247 while ( $line = $output_file->getline() ) {
1248 $fout->print($line);
1251 $output_file = $input_file;
1252 $ofname = $input_file;
1255 #---------------------------------------------------------------
1256 # clean up and report errors
1257 #---------------------------------------------------------------
1258 $sink_object->close_output_file() if $sink_object;
1259 $debugger_object->close_debug_file() if $debugger_object;
1261 # set output file permissions
1262 if ( $output_file && -f $output_file && !-l $output_file ) {
1263 if ($input_file_permissions) {
1265 # give output script same permissions as input script, but
1266 # make it user-writable or else we can't run perltidy again.
1267 # Thus we retain whatever executable flags were set.
1268 if ( $rOpts->{'format'} eq 'tidy' ) {
1269 chmod( $input_file_permissions | 0600, $output_file );
1272 # else use default permissions for html and any other format
1276 #---------------------------------------------------------------
1277 # Do syntax check if requested and possible
1278 #---------------------------------------------------------------
1279 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
1281 && $rOpts->{'check-syntax'}
1286 check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1289 #---------------------------------------------------------------
1290 # remove the original file for in-place modify as follows:
1291 # $delete_backup=0 never
1292 # $delete_backup=1 only if no errors
1293 # $delete_backup>1 always : NOT ALLOWED, too risky, see above
1294 #---------------------------------------------------------------
1295 if ( $in_place_modify
1298 && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) )
1301 # As an added safety precaution, do not delete the source file
1302 # if its size has dropped from positive to zero, since this
1303 # could indicate a disaster of some kind, including a hardware
1304 # failure. Actually, this could happen if you had a file of
1305 # all comments (or pod) and deleted everything with -dac (-dap)
1307 if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) {
1309 "output file '$output_file' missing or zero length; original '$ifname' not deleted\n"
1315 "unable to remove previous '$ifname' for -b option; check permissions: $!\n";
1319 $logger_object->finish( $infile_syntax_ok, $formatter )
1321 } # end of main loop to process all files
1328 } # end of main program perltidy
1330 sub get_stream_as_named_file {
1332 # Return the name of a file containing a stream of data, creating
1333 # a temporary file if necessary.
1335 # $stream - the name of a file or stream
1337 # $fname = name of file if possible, or undef
1338 # $if_tmpfile = true if temp file, undef if not temp file
1340 # This routine is needed for passing actual files to Perl for
1346 if ( ref($stream) ) {
1347 my ( $fh_stream, $fh_name ) =
1348 Perl::Tidy::streamhandle( $stream, 'r' );
1350 my ( $fout, $tmpnam ) = File::Temp::tempfile();
1355 while ( my $line = $fh_stream->getline() ) {
1356 $fout->print($line);
1360 $fh_stream->close();
1363 elsif ( $stream ne '-' && -f $stream ) {
1367 return ( $fname, $is_tmpfile );
1370 sub fileglob_to_re {
1372 # modified (corrected) from version in find2perl
1374 $x =~ s#([./^\$()])#\\$1#g; # escape special characters
1375 $x =~ s#\*#.*#g; # '*' -> '.*'
1376 $x =~ s#\?#.#g; # '?' -> '.'
1377 "^$x\\z"; # match whole word
1380 sub make_extension {
1382 # Make a file extension, including any leading '.' if necessary
1383 # The '.' may actually be an '_' under VMS
1384 my ( $extension, $default, $dot ) = @_;
1386 # Use the default if none specified
1387 $extension = $default unless ($extension);
1389 # Only extensions with these leading characters get a '.'
1390 # This rule gives the user some freedom
1391 if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1392 $extension = $dot . $extension;
1397 sub write_logfile_header {
1399 $rOpts, $logger_object, $config_file,
1400 $rraw_options, $Windows_type, $readable_options
1402 $logger_object->write_logfile_entry(
1403 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1405 if ($Windows_type) {
1406 $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1408 my $options_string = join( ' ', @$rraw_options );
1411 $logger_object->write_logfile_entry(
1412 "Found Configuration File >>> $config_file \n");
1414 $logger_object->write_logfile_entry(
1415 "Configuration and command line parameters for this run:\n");
1416 $logger_object->write_logfile_entry("$options_string\n");
1418 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1419 $rOpts->{'logfile'} = 1; # force logfile to be saved
1420 $logger_object->write_logfile_entry(
1421 "Final parameter set for this run\n");
1422 $logger_object->write_logfile_entry(
1423 "------------------------------------\n");
1425 $logger_object->write_logfile_entry($readable_options);
1427 $logger_object->write_logfile_entry(
1428 "------------------------------------\n");
1430 $logger_object->write_logfile_entry(
1431 "To find error messages search for 'WARNING' with your editor\n");
1434 sub generate_options {
1436 ######################################################################
1437 # Generate and return references to:
1438 # @option_string - the list of options to be passed to Getopt::Long
1439 # @defaults - the list of default options
1440 # %expansion - a hash showing how all abbreviations are expanded
1441 # %category - a hash giving the general category of each option
1442 # %option_range - a hash giving the valid ranges of certain options
1444 # Note: a few options are not documented in the man page and usage
1445 # message. This is because these are experimental or debug options and
1446 # may or may not be retained in future versions.
1448 # Here are the undocumented flags as far as I know. Any of them
1449 # may disappear at any time. They are mainly for fine-tuning
1452 # fll --> fuzzy-line-length # a trivial parameter which gets
1453 # turned off for the extrude option
1454 # which is mainly for debugging
1455 # scl --> short-concatenation-item-length # helps break at '.'
1456 # recombine # for debugging line breaks
1457 # valign # for debugging vertical alignment
1458 # I --> DIAGNOSTICS # for debugging
1459 ######################################################################
1461 # here is a summary of the Getopt codes:
1462 # <none> does not take an argument
1463 # =s takes a mandatory string
1464 # :s takes an optional string (DO NOT USE - filenames will get eaten up)
1465 # =i takes a mandatory integer
1466 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1467 # ! does not take an argument and may be negated
1468 # i.e., -foo and -nofoo are allowed
1469 # a double dash signals the end of the options list
1471 #---------------------------------------------------------------
1472 # Define the option string passed to GetOptions.
1473 #---------------------------------------------------------------
1475 my @option_string = ();
1477 my %option_category = ();
1478 my %option_range = ();
1479 my $rexpansion = \%expansion;
1481 # names of categories in manual
1482 # leading integers will allow sorting
1483 my @category_name = (
1485 '1. Basic formatting options',
1486 '2. Code indentation control',
1487 '3. Whitespace control',
1488 '4. Comment controls',
1489 '5. Linebreak controls',
1490 '6. Controlling list formatting',
1491 '7. Retaining or ignoring existing line breaks',
1492 '8. Blank line control',
1493 '9. Other controls',
1495 '11. pod2html options',
1496 '12. Controlling HTML properties',
1500 # These options are parsed directly by perltidy:
1503 # However, they are included in the option set so that they will
1504 # be seen in the options dump.
1506 # These long option names have no abbreviations or are treated specially
1507 @option_string = qw(
1517 my $category = 13; # Debugging
1518 foreach (@option_string) {
1519 my $opt = $_; # must avoid changing the actual flag
1521 $option_category{$opt} = $category_name[$category];
1524 $category = 11; # HTML
1525 $option_category{html} = $category_name[$category];
1527 # routine to install and check options
1528 my $add_option = sub {
1529 my ( $long_name, $short_name, $flag ) = @_;
1530 push @option_string, $long_name . $flag;
1531 $option_category{$long_name} = $category_name[$category];
1533 if ( $expansion{$short_name} ) {
1534 my $existing_name = $expansion{$short_name}[0];
1536 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1538 $expansion{$short_name} = [$long_name];
1539 if ( $flag eq '!' ) {
1540 my $nshort_name = 'n' . $short_name;
1541 my $nolong_name = 'no' . $long_name;
1542 if ( $expansion{$nshort_name} ) {
1543 my $existing_name = $expansion{$nshort_name}[0];
1545 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1547 $expansion{$nshort_name} = [$nolong_name];
1552 # Install long option names which have a simple abbreviation.
1553 # Options with code '!' get standard negation ('no' for long names,
1554 # 'n' for abbreviations). Categories follow the manual.
1556 ###########################
1557 $category = 0; # I/O_Control
1558 ###########################
1559 $add_option->( 'backup-and-modify-in-place', 'b', '!' );
1560 $add_option->( 'backup-file-extension', 'bext', '=s' );
1561 $add_option->( 'force-read-binary', 'f', '!' );
1562 $add_option->( 'format', 'fmt', '=s' );
1563 $add_option->( 'iterations', 'it', '=i' );
1564 $add_option->( 'logfile', 'log', '!' );
1565 $add_option->( 'logfile-gap', 'g', ':i' );
1566 $add_option->( 'outfile', 'o', '=s' );
1567 $add_option->( 'output-file-extension', 'oext', '=s' );
1568 $add_option->( 'output-path', 'opath', '=s' );
1569 $add_option->( 'profile', 'pro', '=s' );
1570 $add_option->( 'quiet', 'q', '!' );
1571 $add_option->( 'standard-error-output', 'se', '!' );
1572 $add_option->( 'standard-output', 'st', '!' );
1573 $add_option->( 'warning-output', 'w', '!' );
1574 $add_option->( 'character-encoding', 'enc', '=s' );
1576 # options which are both toggle switches and values moved here
1577 # to hide from tidyview (which does not show category 0 flags):
1578 # -ole moved here from category 1
1579 # -sil moved here from category 2
1580 $add_option->( 'output-line-ending', 'ole', '=s' );
1581 $add_option->( 'starting-indentation-level', 'sil', '=i' );
1583 ########################################
1584 $category = 1; # Basic formatting options
1585 ########################################
1586 $add_option->( 'check-syntax', 'syn', '!' );
1587 $add_option->( 'entab-leading-whitespace', 'et', '=i' );
1588 $add_option->( 'indent-columns', 'i', '=i' );
1589 $add_option->( 'maximum-line-length', 'l', '=i' );
1590 $add_option->( 'variable-maximum-line-length', 'vmll', '!' );
1591 $add_option->( 'whitespace-cycle', 'wc', '=i' );
1592 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
1593 $add_option->( 'preserve-line-endings', 'ple', '!' );
1594 $add_option->( 'tabs', 't', '!' );
1595 $add_option->( 'default-tabsize', 'dt', '=i' );
1596 $add_option->( 'extended-syntax', 'xs', '!' );
1598 ########################################
1599 $category = 2; # Code indentation control
1600 ########################################
1601 $add_option->( 'continuation-indentation', 'ci', '=i' );
1602 $add_option->( 'line-up-parentheses', 'lp', '!' );
1603 $add_option->( 'outdent-keyword-list', 'okwl', '=s' );
1604 $add_option->( 'outdent-keywords', 'okw', '!' );
1605 $add_option->( 'outdent-labels', 'ola', '!' );
1606 $add_option->( 'outdent-long-quotes', 'olq', '!' );
1607 $add_option->( 'indent-closing-brace', 'icb', '!' );
1608 $add_option->( 'closing-token-indentation', 'cti', '=i' );
1609 $add_option->( 'closing-paren-indentation', 'cpi', '=i' );
1610 $add_option->( 'closing-brace-indentation', 'cbi', '=i' );
1611 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1612 $add_option->( 'brace-left-and-indent', 'bli', '!' );
1613 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
1615 ########################################
1616 $category = 3; # Whitespace control
1617 ########################################
1618 $add_option->( 'add-semicolons', 'asc', '!' );
1619 $add_option->( 'add-whitespace', 'aws', '!' );
1620 $add_option->( 'block-brace-tightness', 'bbt', '=i' );
1621 $add_option->( 'brace-tightness', 'bt', '=i' );
1622 $add_option->( 'delete-old-whitespace', 'dws', '!' );
1623 $add_option->( 'delete-semicolons', 'dsm', '!' );
1624 $add_option->( 'nospace-after-keyword', 'nsak', '=s' );
1625 $add_option->( 'nowant-left-space', 'nwls', '=s' );
1626 $add_option->( 'nowant-right-space', 'nwrs', '=s' );
1627 $add_option->( 'paren-tightness', 'pt', '=i' );
1628 $add_option->( 'space-after-keyword', 'sak', '=s' );
1629 $add_option->( 'space-for-semicolon', 'sfs', '!' );
1630 $add_option->( 'space-function-paren', 'sfp', '!' );
1631 $add_option->( 'space-keyword-paren', 'skp', '!' );
1632 $add_option->( 'space-terminal-semicolon', 'sts', '!' );
1633 $add_option->( 'square-bracket-tightness', 'sbt', '=i' );
1634 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
1635 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1636 $add_option->( 'tight-secret-operators', 'tso', '!' );
1637 $add_option->( 'trim-qw', 'tqw', '!' );
1638 $add_option->( 'trim-pod', 'trp', '!' );
1639 $add_option->( 'want-left-space', 'wls', '=s' );
1640 $add_option->( 'want-right-space', 'wrs', '=s' );
1642 ########################################
1643 $category = 4; # Comment controls
1644 ########################################
1645 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
1646 $add_option->( 'closing-side-comment-interval', 'csci', '=i' );
1647 $add_option->( 'closing-side-comment-list', 'cscl', '=s' );
1648 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1649 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
1650 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
1651 $add_option->( 'closing-side-comments', 'csc', '!' );
1652 $add_option->( 'closing-side-comments-balanced', 'cscb', '!' );
1653 $add_option->( 'format-skipping', 'fs', '!' );
1654 $add_option->( 'format-skipping-begin', 'fsb', '=s' );
1655 $add_option->( 'format-skipping-end', 'fse', '=s' );
1656 $add_option->( 'hanging-side-comments', 'hsc', '!' );
1657 $add_option->( 'indent-block-comments', 'ibc', '!' );
1658 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
1659 $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
1660 $add_option->( 'minimum-space-to-comment', 'msc', '=i' );
1661 $add_option->( 'outdent-long-comments', 'olc', '!' );
1662 $add_option->( 'outdent-static-block-comments', 'osbc', '!' );
1663 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
1664 $add_option->( 'static-block-comments', 'sbc', '!' );
1665 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
1666 $add_option->( 'static-side-comments', 'ssc', '!' );
1667 $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' );
1669 ########################################
1670 $category = 5; # Linebreak controls
1671 ########################################
1672 $add_option->( 'add-newlines', 'anl', '!' );
1673 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
1674 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1675 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
1676 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
1677 $add_option->( 'cuddled-else', 'ce', '!' );
1678 $add_option->( 'delete-old-newlines', 'dnl', '!' );
1679 $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
1680 $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
1681 $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
1682 $add_option->( 'opening-paren-right', 'opr', '!' );
1683 $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
1684 $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' );
1685 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
1686 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
1687 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
1688 $add_option->( 'stack-closing-block-brace', 'scbb', '!' );
1689 $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
1690 $add_option->( 'stack-closing-paren', 'scp', '!' );
1691 $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
1692 $add_option->( 'stack-opening-block-brace', 'sobb', '!' );
1693 $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
1694 $add_option->( 'stack-opening-paren', 'sop', '!' );
1695 $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
1696 $add_option->( 'vertical-tightness', 'vt', '=i' );
1697 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
1698 $add_option->( 'want-break-after', 'wba', '=s' );
1699 $add_option->( 'want-break-before', 'wbb', '=s' );
1700 $add_option->( 'break-after-all-operators', 'baao', '!' );
1701 $add_option->( 'break-before-all-operators', 'bbao', '!' );
1702 $add_option->( 'keep-interior-semicolons', 'kis', '!' );
1704 ########################################
1705 $category = 6; # Controlling list formatting
1706 ########################################
1707 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1708 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
1709 $add_option->( 'maximum-fields-per-table', 'mft', '=i' );
1711 ########################################
1712 $category = 7; # Retaining or ignoring existing line breaks
1713 ########################################
1714 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1715 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1716 $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1717 $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
1718 $add_option->( 'ignore-old-breakpoints', 'iob', '!' );
1720 ########################################
1721 $category = 8; # Blank line control
1722 ########################################
1723 $add_option->( 'blanks-before-blocks', 'bbb', '!' );
1724 $add_option->( 'blanks-before-comments', 'bbc', '!' );
1725 $add_option->( 'blank-lines-before-subs', 'blbs', '=i' );
1726 $add_option->( 'blank-lines-before-packages', 'blbp', '=i' );
1727 $add_option->( 'long-block-line-count', 'lbl', '=i' );
1728 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1729 $add_option->( 'keep-old-blank-lines', 'kbl', '=i' );
1731 $add_option->( 'blank-lines-after-opening-block', 'blao', '=i' );
1732 $add_option->( 'blank-lines-before-closing-block', 'blbc', '=i' );
1733 $add_option->( 'blank-lines-after-opening-block-list', 'blaol', '=s' );
1734 $add_option->( 'blank-lines-before-closing-block-list', 'blbcl', '=s' );
1736 ########################################
1737 $category = 9; # Other controls
1738 ########################################
1739 $add_option->( 'delete-block-comments', 'dbc', '!' );
1740 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1741 $add_option->( 'delete-pod', 'dp', '!' );
1742 $add_option->( 'delete-side-comments', 'dsc', '!' );
1743 $add_option->( 'tee-block-comments', 'tbc', '!' );
1744 $add_option->( 'tee-pod', 'tp', '!' );
1745 $add_option->( 'tee-side-comments', 'tsc', '!' );
1746 $add_option->( 'look-for-autoloader', 'lal', '!' );
1747 $add_option->( 'look-for-hash-bang', 'x', '!' );
1748 $add_option->( 'look-for-selfloader', 'lsl', '!' );
1749 $add_option->( 'pass-version-line', 'pvl', '!' );
1751 ########################################
1752 $category = 13; # Debugging
1753 ########################################
1754 $add_option->( 'DEBUG', 'D', '!' );
1755 $add_option->( 'DIAGNOSTICS', 'I', '!' );
1756 $add_option->( 'dump-defaults', 'ddf', '!' );
1757 $add_option->( 'dump-long-names', 'dln', '!' );
1758 $add_option->( 'dump-options', 'dop', '!' );
1759 $add_option->( 'dump-profile', 'dpro', '!' );
1760 $add_option->( 'dump-short-names', 'dsn', '!' );
1761 $add_option->( 'dump-token-types', 'dtt', '!' );
1762 $add_option->( 'dump-want-left-space', 'dwls', '!' );
1763 $add_option->( 'dump-want-right-space', 'dwrs', '!' );
1764 $add_option->( 'fuzzy-line-length', 'fll', '!' );
1765 $add_option->( 'help', 'h', '' );
1766 $add_option->( 'short-concatenation-item-length', 'scl', '=i' );
1767 $add_option->( 'show-options', 'opt', '!' );
1768 $add_option->( 'version', 'v', '' );
1769 $add_option->( 'memoize', 'mem', '!' );
1771 #---------------------------------------------------------------------
1773 # The Perl::Tidy::HtmlWriter will add its own options to the string
1774 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1776 ########################################
1777 # Set categories 10, 11, 12
1778 ########################################
1779 # Based on their known order
1780 $category = 12; # HTML properties
1781 foreach my $opt (@option_string) {
1782 my $long_name = $opt;
1783 $long_name =~ s/(!|=.*|:.*)$//;
1784 unless ( defined( $option_category{$long_name} ) ) {
1785 if ( $long_name =~ /^html-linked/ ) {
1786 $category = 10; # HTML options
1788 elsif ( $long_name =~ /^pod2html/ ) {
1789 $category = 11; # Pod2html
1791 $option_category{$long_name} = $category_name[$category];
1795 #---------------------------------------------------------------
1796 # Assign valid ranges to certain options
1797 #---------------------------------------------------------------
1798 # In the future, these may be used to make preliminary checks
1799 # hash keys are long names
1800 # If key or value is undefined:
1801 # strings may have any value
1802 # integer ranges are >=0
1803 # If value is defined:
1804 # value is [qw(any valid words)] for strings
1805 # value is [min, max] for integers
1806 # if min is undefined, there is no lower limit
1807 # if max is undefined, there is no upper limit
1808 # Parameters not listed here have defaults
1810 'format' => [ 'tidy', 'html', 'user' ],
1811 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
1812 'character-encoding' => [ 'none', 'utf8' ],
1814 'block-brace-tightness' => [ 0, 2 ],
1815 'brace-tightness' => [ 0, 2 ],
1816 'paren-tightness' => [ 0, 2 ],
1817 'square-bracket-tightness' => [ 0, 2 ],
1819 'block-brace-vertical-tightness' => [ 0, 2 ],
1820 'brace-vertical-tightness' => [ 0, 2 ],
1821 'brace-vertical-tightness-closing' => [ 0, 2 ],
1822 'paren-vertical-tightness' => [ 0, 2 ],
1823 'paren-vertical-tightness-closing' => [ 0, 2 ],
1824 'square-bracket-vertical-tightness' => [ 0, 2 ],
1825 'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1826 'vertical-tightness' => [ 0, 2 ],
1827 'vertical-tightness-closing' => [ 0, 2 ],
1829 'closing-brace-indentation' => [ 0, 3 ],
1830 'closing-paren-indentation' => [ 0, 3 ],
1831 'closing-square-bracket-indentation' => [ 0, 3 ],
1832 'closing-token-indentation' => [ 0, 3 ],
1834 'closing-side-comment-else-flag' => [ 0, 2 ],
1835 'comma-arrow-breakpoints' => [ 0, 5 ],
1838 # Note: we could actually allow negative ci if someone really wants it:
1839 # $option_range{'continuation-indentation'} = [ undef, undef ];
1841 #---------------------------------------------------------------
1842 # Assign default values to the above options here, except
1843 # for 'outfile' and 'help'.
1844 # These settings should approximate the perlstyle(1) suggestions.
1845 #---------------------------------------------------------------
1850 blanks-before-blocks
1851 blanks-before-comments
1852 blank-lines-before-subs=1
1853 blank-lines-before-packages=1
1854 block-brace-tightness=0
1855 block-brace-vertical-tightness=0
1857 brace-vertical-tightness-closing=0
1858 brace-vertical-tightness=0
1859 break-at-old-logical-breakpoints
1860 break-at-old-ternary-breakpoints
1861 break-at-old-attribute-breakpoints
1862 break-at-old-keyword-breakpoints
1863 comma-arrow-breakpoints=5
1865 closing-side-comment-interval=6
1866 closing-side-comment-maximum-text=20
1867 closing-side-comment-else-flag=0
1868 closing-side-comments-balanced
1869 closing-paren-indentation=0
1870 closing-brace-indentation=0
1871 closing-square-bracket-indentation=0
1872 continuation-indentation=2
1877 hanging-side-comments
1878 indent-block-comments
1881 keep-old-blank-lines=1
1882 long-block-line-count=8
1885 maximum-consecutive-blank-lines=1
1886 maximum-fields-per-table=0
1887 maximum-line-length=80
1889 minimum-space-to-comment=4
1890 nobrace-left-and-indent
1892 nodelete-old-whitespace
1897 nostatic-side-comments
1900 character-encoding=none
1903 outdent-long-comments
1905 paren-vertical-tightness-closing=0
1906 paren-vertical-tightness=0
1910 short-concatenation-item-length=8
1912 square-bracket-tightness=1
1913 square-bracket-vertical-tightness-closing=0
1914 square-bracket-vertical-tightness=0
1915 static-block-comments
1918 backup-file-extension=bak
1923 html-table-of-contents
1927 push @defaults, "perl-syntax-check-flags=-c -T";
1929 #---------------------------------------------------------------
1930 # Define abbreviations which will be expanded into the above primitives.
1931 # These may be defined recursively.
1932 #---------------------------------------------------------------
1935 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
1936 'fnl' => [qw(freeze-newlines)],
1937 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1938 'fws' => [qw(freeze-whitespace)],
1939 'freeze-blank-lines' =>
1940 [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1941 'fbl' => [qw(freeze-blank-lines)],
1942 'indent-only' => [qw(freeze-newlines freeze-whitespace)],
1943 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1944 'nooutdent-long-lines' =>
1945 [qw(nooutdent-long-quotes nooutdent-long-comments)],
1946 'noll' => [qw(nooutdent-long-lines)],
1947 'io' => [qw(indent-only)],
1948 'delete-all-comments' =>
1949 [qw(delete-block-comments delete-side-comments delete-pod)],
1950 'nodelete-all-comments' =>
1951 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1952 'dac' => [qw(delete-all-comments)],
1953 'ndac' => [qw(nodelete-all-comments)],
1954 'gnu' => [qw(gnu-style)],
1955 'pbp' => [qw(perl-best-practices)],
1956 'tee-all-comments' =>
1957 [qw(tee-block-comments tee-side-comments tee-pod)],
1958 'notee-all-comments' =>
1959 [qw(notee-block-comments notee-side-comments notee-pod)],
1960 'tac' => [qw(tee-all-comments)],
1961 'ntac' => [qw(notee-all-comments)],
1962 'html' => [qw(format=html)],
1963 'nhtml' => [qw(format=tidy)],
1964 'tidy' => [qw(format=tidy)],
1966 'utf8' => [qw(character-encoding=utf8)],
1967 'UTF8' => [qw(character-encoding=utf8)],
1969 'swallow-optional-blank-lines' => [qw(kbl=0)],
1970 'noswallow-optional-blank-lines' => [qw(kbl=1)],
1971 'sob' => [qw(kbl=0)],
1972 'nsob' => [qw(kbl=1)],
1974 'break-after-comma-arrows' => [qw(cab=0)],
1975 'nobreak-after-comma-arrows' => [qw(cab=1)],
1976 'baa' => [qw(cab=0)],
1977 'nbaa' => [qw(cab=1)],
1979 'blanks-before-subs' => [qw(blbs=1 blbp=1)],
1980 'bbs' => [qw(blbs=1 blbp=1)],
1981 'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
1982 'nbbs' => [qw(blbs=0 blbp=0)],
1984 'break-at-old-trinary-breakpoints' => [qw(bot)],
1986 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1987 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1988 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1989 'icp' => [qw(cpi=2 cbi=2 csbi=2)],
1990 'nicp' => [qw(cpi=0 cbi=0 csbi=0)],
1992 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1993 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1994 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1995 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],
1996 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],
1998 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1999 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
2000 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
2002 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
2003 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
2004 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
2006 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
2007 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
2008 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
2010 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
2011 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
2012 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
2014 'otr' => [qw(opr ohbr osbr)],
2015 'opening-token-right' => [qw(opr ohbr osbr)],
2016 'notr' => [qw(nopr nohbr nosbr)],
2017 'noopening-token-right' => [qw(nopr nohbr nosbr)],
2019 'sot' => [qw(sop sohb sosb)],
2020 'nsot' => [qw(nsop nsohb nsosb)],
2021 'stack-opening-tokens' => [qw(sop sohb sosb)],
2022 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
2024 'sct' => [qw(scp schb scsb)],
2025 'stack-closing-tokens' => => [qw(scp schb scsb)],
2026 'nsct' => [qw(nscp nschb nscsb)],
2027 'nostack-closing-tokens' => [qw(nscp nschb nscsb)],
2029 'sac' => [qw(sot sct)],
2030 'nsac' => [qw(nsot nsct)],
2031 'stack-all-containers' => [qw(sot sct)],
2032 'nostack-all-containers' => [qw(nsot nsct)],
2034 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
2035 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
2036 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
2037 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
2038 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
2039 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
2041 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)],
2042 'sobb' => [qw(bbvt=2 bbvtl=*)],
2043 'nostack-opening-block-brace' => [qw(bbvt=0)],
2044 'nsobb' => [qw(bbvt=0)],
2046 'converge' => [qw(it=4)],
2047 'noconverge' => [qw(it=1)],
2048 'conv' => [qw(it=4)],
2049 'nconv' => [qw(it=1)],
2051 # 'mangle' originally deleted pod and comments, but to keep it
2052 # reversible, it no longer does. But if you really want to
2053 # delete them, just use:
2056 # An interesting use for 'mangle' is to do this:
2057 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
2058 # which will form as many one-line blocks as possible
2063 keep-old-blank-lines=0
2065 delete-old-whitespace
2068 maximum-consecutive-blank-lines=0
2069 maximum-line-length=100000
2073 noblanks-before-blocks
2074 blank-lines-before-subs=0
2075 blank-lines-before-packages=0
2080 # 'extrude' originally deleted pod and comments, but to keep it
2081 # reversible, it no longer does. But if you really want to
2082 # delete them, just use
2085 # An interesting use for 'extrude' is to do this:
2086 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
2087 # which will break up all one-line blocks.
2094 delete-old-whitespace
2097 maximum-consecutive-blank-lines=0
2098 maximum-line-length=1
2101 noblanks-before-blocks
2102 blank-lines-before-subs=0
2103 blank-lines-before-packages=0
2110 # this style tries to follow the GNU Coding Standards (which do
2111 # not really apply to perl but which are followed by some perl
2115 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
2119 # Style suggested in Damian Conway's Perl Best Practices
2120 'perl-best-practices' => [
2121 qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
2122 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
2125 # Additional styles can be added here
2128 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
2130 # Uncomment next line to dump all expansions for debugging:
2131 # dump_short_names(\%expansion);
2133 \@option_string, \@defaults, \%expansion,
2134 \%option_category, \%option_range
2137 } # end of generate_options
2139 # Memoize process_command_line. Given same @ARGV passed in, return same
2140 # values and same @ARGV back.
2141 # This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds
2142 # up masontidy (https://metacpan.org/module/masontidy)
2144 my %process_command_line_cache;
2146 sub process_command_line {
2149 $perltidyrc_stream, $is_Windows, $Windows_type,
2150 $rpending_complaint, $dump_options_type
2153 my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type;
2155 my $cache_key = join( chr(28), @ARGV );
2156 if ( my $result = $process_command_line_cache{$cache_key} ) {
2157 my ( $argv, @retvals ) = @$result;
2162 my @retvals = _process_command_line(@_);
2163 $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ]
2164 if $retvals[0]->{'memoize'};
2169 return _process_command_line(@_);
2173 # (note the underscore here)
2174 sub _process_command_line {
2177 $perltidyrc_stream, $is_Windows, $Windows_type,
2178 $rpending_complaint, $dump_options_type
2183 # Save any current Getopt::Long configuration
2184 # and set to Getopt::Long defaults. Use eval to avoid
2185 # breaking old versions of Perl without these routines.
2186 # Previous configuration is reset at the exit of this routine.
2188 eval { $glc = Getopt::Long::Configure() };
2190 eval { Getopt::Long::ConfigDefaults() };
2192 else { $glc = undef }
2195 $roption_string, $rdefaults, $rexpansion,
2196 $roption_category, $roption_range
2197 ) = generate_options();
2199 #---------------------------------------------------------------
2200 # set the defaults by passing the above list through GetOptions
2201 #---------------------------------------------------------------
2207 # do not load the defaults if we are just dumping perltidyrc
2208 unless ( $dump_options_type eq 'perltidyrc' ) {
2209 for $i (@$rdefaults) { push @ARGV, "--" . $i }
2211 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2212 Die "Programming Bug: error in setting default options";
2217 my @raw_options = ();
2218 my $config_file = "";
2219 my $saw_ignore_profile = 0;
2220 my $saw_dump_profile = 0;
2223 #---------------------------------------------------------------
2224 # Take a first look at the command-line parameters. Do as many
2225 # immediate dumps as possible, which can avoid confusion if the
2226 # perltidyrc file has an error.
2227 #---------------------------------------------------------------
2228 foreach $i (@ARGV) {
2231 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
2232 $saw_ignore_profile = 1;
2235 # note: this must come before -pro and -profile, below:
2236 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
2237 $saw_dump_profile = 1;
2239 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
2242 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
2246 # resolve <dir>/.../<file>, meaning look upwards from directory
2247 if ( defined($config_file) ) {
2248 if ( my ( $start_dir, $search_file ) =
2249 ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
2251 $start_dir = '.' if !$start_dir;
2252 $start_dir = Cwd::realpath($start_dir);
2253 if ( my $found_file =
2254 find_file_upwards( $start_dir, $search_file ) )
2256 $config_file = $found_file;
2260 unless ( -e $config_file ) {
2261 Warn "cannot find file given with -pro=$config_file: $!\n";
2265 elsif ( $i =~ /^-(pro|profile)=?$/ ) {
2266 Die "usage: -pro=filename or --profile=filename, no spaces\n";
2268 elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
2272 elsif ( $i =~ /^-(version|v)$/ ) {
2276 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
2277 dump_defaults(@$rdefaults);
2280 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
2281 dump_long_names(@$roption_string);
2284 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
2285 dump_short_names($rexpansion);
2288 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
2289 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
2294 if ( $saw_dump_profile && $saw_ignore_profile ) {
2295 Warn "No profile to dump because of -npro\n";
2299 #---------------------------------------------------------------
2300 # read any .perltidyrc configuration file
2301 #---------------------------------------------------------------
2302 unless ($saw_ignore_profile) {
2304 # resolve possible conflict between $perltidyrc_stream passed
2305 # as call parameter to perltidy and -pro=filename on command
2307 if ($perltidyrc_stream) {
2310 Conflict: a perltidyrc configuration file was specified both as this
2311 perltidy call parameter: $perltidyrc_stream
2312 and with this -profile=$config_file.
2313 Using -profile=$config_file.
2317 $config_file = $perltidyrc_stream;
2321 # look for a config file if we don't have one yet
2322 my $rconfig_file_chatter;
2323 $$rconfig_file_chatter = "";
2325 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2326 $rpending_complaint )
2327 unless $config_file;
2329 # open any config file
2332 ( $fh_config, $config_file ) =
2333 Perl::Tidy::streamhandle( $config_file, 'r' );
2334 unless ($fh_config) {
2335 $$rconfig_file_chatter .=
2336 "# $config_file exists but cannot be opened\n";
2340 if ($saw_dump_profile) {
2341 dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
2347 my ( $rconfig_list, $death_message ) =
2348 read_config_file( $fh_config, $config_file, $rexpansion );
2349 Die $death_message if ($death_message);
2351 # process any .perltidyrc parameters right now so we can
2353 if (@$rconfig_list) {
2354 local @ARGV = @$rconfig_list;
2356 expand_command_abbreviations( $rexpansion, \@raw_options,
2359 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2361 "Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
2364 # Anything left in this local @ARGV is an error and must be
2365 # invalid bare words from the configuration file. We cannot
2366 # check this earlier because bare words may have been valid
2367 # values for parameters. We had to wait for GetOptions to have
2371 my $str = "\'" . pop(@ARGV) . "\'";
2372 while ( my $param = pop(@ARGV) ) {
2373 if ( length($str) < 70 ) {
2374 $str .= ", '$param'";
2382 There are $count unrecognized values in the configuration file '$config_file':
2384 Use leading dashes for parameters. Use -npro to ignore this file.
2388 # Undo any options which cause premature exit. They are not
2389 # appropriate for a config file, and it could be hard to
2390 # diagnose the cause of the premature exit.
2399 dump-want-left-space
2400 dump-want-right-space
2408 if ( defined( $Opts{$_} ) ) {
2410 Warn "ignoring --$_ in config file: $config_file\n";
2417 #---------------------------------------------------------------
2418 # now process the command line parameters
2419 #---------------------------------------------------------------
2420 expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2422 local $SIG{'__WARN__'} = sub { Warn $_[0] };
2423 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2424 Die "Error on command line; for help try 'perltidy -h'\n";
2427 # reset Getopt::Long configuration back to its previous value
2428 eval { Getopt::Long::Configure($glc) } if defined $glc;
2430 return ( \%Opts, $config_file, \@raw_options, $roption_string,
2431 $rexpansion, $roption_category, $roption_range );
2432 } # end of _process_command_line
2436 my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2438 #---------------------------------------------------------------
2439 # check and handle any interactions among the basic options..
2440 #---------------------------------------------------------------
2442 # Since -vt, -vtc, and -cti are abbreviations, but under
2443 # msdos, an unquoted input parameter like vtc=1 will be
2444 # seen as 2 parameters, vtc and 1, so the abbreviations
2445 # won't be seen. Therefore, we will catch them here if
2448 if ( defined $rOpts->{'vertical-tightness'} ) {
2449 my $vt = $rOpts->{'vertical-tightness'};
2450 $rOpts->{'paren-vertical-tightness'} = $vt;
2451 $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2452 $rOpts->{'brace-vertical-tightness'} = $vt;
2455 if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2456 my $vtc = $rOpts->{'vertical-tightness-closing'};
2457 $rOpts->{'paren-vertical-tightness-closing'} = $vtc;
2458 $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2459 $rOpts->{'brace-vertical-tightness-closing'} = $vtc;
2462 if ( defined $rOpts->{'closing-token-indentation'} ) {
2463 my $cti = $rOpts->{'closing-token-indentation'};
2464 $rOpts->{'closing-square-bracket-indentation'} = $cti;
2465 $rOpts->{'closing-brace-indentation'} = $cti;
2466 $rOpts->{'closing-paren-indentation'} = $cti;
2469 # In quiet mode, there is no log file and hence no way to report
2470 # results of syntax check, so don't do it.
2471 if ( $rOpts->{'quiet'} ) {
2472 $rOpts->{'check-syntax'} = 0;
2475 # can't check syntax if no output
2476 if ( $rOpts->{'format'} ne 'tidy' ) {
2477 $rOpts->{'check-syntax'} = 0;
2480 # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2481 # wide variety of nasty problems on these systems, because they cannot
2482 # reliably run backticks. Don't even think about changing this!
2483 if ( $rOpts->{'check-syntax'}
2485 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2487 $rOpts->{'check-syntax'} = 0;
2490 # It's really a bad idea to check syntax as root unless you wrote
2491 # the script yourself. FIXME: not sure if this works with VMS
2492 unless ($is_Windows) {
2494 if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2495 $rOpts->{'check-syntax'} = 0;
2496 $$rpending_complaint .=
2497 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2501 # check iteration count and quietly fix if necessary:
2502 # - iterations option only applies to code beautification mode
2503 # - the convergence check should stop most runs on iteration 2, and
2504 # virtually all on iteration 3. But we'll allow up to 6.
2505 if ( $rOpts->{'format'} ne 'tidy' ) {
2506 $rOpts->{'iterations'} = 1;
2508 elsif ( defined( $rOpts->{'iterations'} ) ) {
2509 if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2510 elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 }
2513 $rOpts->{'iterations'} = 1;
2516 my $check_blank_count = sub {
2517 my ( $key, $abbrev ) = @_;
2518 if ( $rOpts->{$key} ) {
2519 if ( $rOpts->{$key} < 0 ) {
2521 Warn "negative value of $abbrev, setting 0\n";
2523 if ( $rOpts->{$key} > 100 ) {
2524 Warn "unreasonably large value of $abbrev, reducing\n";
2525 $rOpts->{$key} = 100;
2530 # check for reasonable number of blank lines and fix to avoid problems
2531 $check_blank_count->( 'blank-lines-before-subs', '-blbs' );
2532 $check_blank_count->( 'blank-lines-before-packages', '-blbp' );
2533 $check_blank_count->( 'blank-lines-after-block-opening', '-blao' );
2534 $check_blank_count->( 'blank-lines-before-block-closing', '-blbc' );
2536 # setting a non-negative logfile gap causes logfile to be saved
2537 if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2538 $rOpts->{'logfile'} = 1;
2541 # set short-cut flag when only indentation is to be done.
2542 # Note that the user may or may not have already set the
2544 if ( !$rOpts->{'add-whitespace'}
2545 && !$rOpts->{'delete-old-whitespace'}
2546 && !$rOpts->{'add-newlines'}
2547 && !$rOpts->{'delete-old-newlines'} )
2549 $rOpts->{'indent-only'} = 1;
2552 # -isbc implies -ibc
2553 if ( $rOpts->{'indent-spaced-block-comments'} ) {
2554 $rOpts->{'indent-block-comments'} = 1;
2557 # -bli flag implies -bl
2558 if ( $rOpts->{'brace-left-and-indent'} ) {
2559 $rOpts->{'opening-brace-on-new-line'} = 1;
2562 if ( $rOpts->{'opening-brace-always-on-right'}
2563 && $rOpts->{'opening-brace-on-new-line'} )
2566 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2567 'opening-brace-on-new-line' (-bl). Ignoring -bl.
2569 $rOpts->{'opening-brace-on-new-line'} = 0;
2572 # it simplifies things if -bl is 0 rather than undefined
2573 if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2574 $rOpts->{'opening-brace-on-new-line'} = 0;
2577 # -sbl defaults to -bl if not defined
2578 if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2579 $rOpts->{'opening-sub-brace-on-new-line'} =
2580 $rOpts->{'opening-brace-on-new-line'};
2583 if ( $rOpts->{'entab-leading-whitespace'} ) {
2584 if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2585 Warn "-et=n must use a positive integer; ignoring -et\n";
2586 $rOpts->{'entab-leading-whitespace'} = undef;
2589 # entab leading whitespace has priority over the older 'tabs' option
2590 if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2593 # set a default tabsize to be used in guessing the starting indentation
2594 # level if and only if this run does not use tabs and the old code does
2596 if ( $rOpts->{'default-tabsize'} ) {
2597 if ( $rOpts->{'default-tabsize'} < 0 ) {
2598 Warn "negative value of -dt, setting 0\n";
2599 $rOpts->{'default-tabsize'} = 0;
2601 if ( $rOpts->{'default-tabsize'} > 20 ) {
2602 Warn "unreasonably large value of -dt, reducing\n";
2603 $rOpts->{'default-tabsize'} = 20;
2607 $rOpts->{'default-tabsize'} = 8;
2610 # Define $tabsize, the number of spaces per tab for use in
2611 # guessing the indentation of source lines with leading tabs.
2612 # Assume same as for this run if tabs are used , otherwise assume
2613 # a default value, typically 8
2615 $rOpts->{'entab-leading-whitespace'}
2616 ? $rOpts->{'entab-leading-whitespace'}
2617 : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
2618 : $rOpts->{'default-tabsize'};
2622 sub find_file_upwards {
2623 my ( $search_dir, $search_file ) = @_;
2625 $search_dir =~ s{/+$}{};
2626 $search_file =~ s{^/+}{};
2629 my $try_path = "$search_dir/$search_file";
2630 if ( -f $try_path ) {
2633 elsif ( $search_dir eq '/' ) {
2637 $search_dir = dirname($search_dir);
2642 sub expand_command_abbreviations {
2644 # go through @ARGV and expand any abbreviations
2646 my ( $rexpansion, $rraw_options, $config_file ) = @_;
2649 # set a pass limit to prevent an infinite loop;
2650 # 10 should be plenty, but it may be increased to allow deeply
2651 # nested expansions.
2652 my $max_passes = 10;
2655 # keep looping until all expansions have been converted into actual
2657 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2659 my $abbrev_count = 0;
2661 # loop over each item in @ARGV..
2662 foreach $word (@ARGV) {
2664 # convert any leading 'no-' to just 'no'
2665 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2667 # if it is a dash flag (instead of a file name)..
2668 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2673 # save the raw input for debug output in case of circular refs
2674 if ( $pass_count == 0 ) {
2675 push( @$rraw_options, $word );
2678 # recombine abbreviation and flag, if necessary,
2679 # to allow abbreviations with arguments such as '-vt=1'
2680 if ( $rexpansion->{ $abr . $flags } ) {
2681 $abr = $abr . $flags;
2685 # if we see this dash item in the expansion hash..
2686 if ( $rexpansion->{$abr} ) {
2689 # stuff all of the words that it expands to into the
2690 # new arg list for the next pass
2691 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2692 next unless $abbrev; # for safety; shouldn't happen
2693 push( @new_argv, '--' . $abbrev . $flags );
2697 # not in expansion hash, must be actual long name
2699 push( @new_argv, $word );
2703 # not a dash item, so just save it for the next pass
2705 push( @new_argv, $word );
2707 } # end of this pass
2709 # update parameter list @ARGV to the new one
2711 last unless ( $abbrev_count > 0 );
2713 # make sure we are not in an infinite loop
2714 if ( $pass_count == $max_passes ) {
2717 I'm tired. We seem to be in an infinite loop trying to expand aliases.
2718 Here are the raw options;
2721 my $num = @new_argv;
2724 After $max_passes passes here is ARGV
2730 After $max_passes passes ARGV has $num entries
2736 Please check your configuration file $config_file for circular-references.
2737 To deactivate it, use -npro.
2742 Program bug - circular-references in the %expansion hash, probably due to
2743 a recent program change.
2746 } # end of check for circular references
2747 } # end of loop over all passes
2750 # Debug routine -- this will dump the expansion hash
2751 sub dump_short_names {
2752 my $rexpansion = shift;
2754 List of short names. This list shows how all abbreviations are
2755 translated into other abbreviations and, eventually, into long names.
2756 New abbreviations may be defined in a .perltidyrc file.
2757 For a list of all long names, use perltidy --dump-long-names (-dln).
2758 --------------------------------------------------------------------------
2760 foreach my $abbrev ( sort keys %$rexpansion ) {
2761 my @list = @{ $$rexpansion{$abbrev} };
2762 print STDOUT "$abbrev --> @list\n";
2766 sub check_vms_filename {
2768 # given a valid filename (the perltidy input file)
2769 # create a modified filename and separator character
2772 # Contributed by Michael Cartmell
2774 my ( $base, $path ) = fileparse( $_[0] );
2776 # remove explicit ; version
2777 $base =~ s/;-?\d*$//
2779 # remove explicit . version ie two dots in filename NB ^ escapes a dot
2780 or $base =~ s/( # begin capture $1
2781 (?:^|[^^])\. # match a dot not preceded by a caret
2782 (?: # followed by nothing
2784 .*[^^] # anything ending in a non caret
2787 \.-?\d*$ # match . version number
2790 # normalise filename, if there are no unescaped dots then append one
2791 $base .= '.' unless $base =~ /(?:^|[^^])\./;
2793 # if we don't already have an extension then we just append the extension
2794 my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2795 return ( $path . $base, $separator );
2800 # TODO: are these more standard names?
2801 # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2803 # Returns a string that determines what MS OS we are on.
2804 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2805 # Returns blank string if not an MS system.
2806 # Original code contributed by: Yves Orton
2807 # We need to know this to decide where to look for config files
2809 my $rpending_complaint = shift;
2811 return $os unless $^O =~ /win32|dos/i; # is it a MS box?
2813 # Systems built from Perl source may not have Win32.pm
2814 # But probably have Win32::GetOSVersion() anyway so the
2815 # following line is not 'required':
2816 # return $os unless eval('require Win32');
2818 # Use the standard API call to determine the version
2819 my ( $undef, $major, $minor, $build, $id );
2820 eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2823 # NAME ID MAJOR MINOR
2824 # Windows NT 4 2 4 0
2825 # Windows 2000 2 5 0
2827 # Windows Server 2003 2 5 2
2829 return "win32s" unless $id; # If id==0 then its a win32s box.
2830 $os = { # Magic numbers from MSDN
2831 # documentation of GetOSVersion
2838 0 => "2000", # or NT 4, see below
2845 # If $os is undefined, the above code is out of date. Suggested updates
2847 unless ( defined $os ) {
2849 $$rpending_complaint .= <<EOS;
2850 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2851 We won't be able to look for a system-wide config file.
2855 # Unfortunately the logic used for the various versions isn't so clever..
2856 # so we have to handle an outside case.
2857 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2862 ( $^O !~ /win32|dos/i )
2865 && ( $^O ne 'MacOS' );
2868 sub look_for_Windows {
2870 # determine Windows sub-type and location of
2871 # system-wide configuration files
2872 my $rpending_complaint = shift;
2873 my $is_Windows = ( $^O =~ /win32|dos/i );
2874 my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
2875 return ( $is_Windows, $Windows_type );
2878 sub find_config_file {
2880 # look for a .perltidyrc configuration file
2881 # For Windows also look for a file named perltidy.ini
2882 my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2883 $rpending_complaint ) = @_;
2885 $$rconfig_file_chatter .= "# Config file search...system reported as:";
2887 $$rconfig_file_chatter .= "Windows $Windows_type\n";
2890 $$rconfig_file_chatter .= " $^O\n";
2893 # sub to check file existence and record all tests
2894 my $exists_config_file = sub {
2895 my $config_file = shift;
2896 return 0 unless $config_file;
2897 $$rconfig_file_chatter .= "# Testing: $config_file\n";
2898 return -f $config_file;
2903 # look in current directory first
2904 $config_file = ".perltidyrc";
2905 return $config_file if $exists_config_file->($config_file);
2907 $config_file = "perltidy.ini";
2908 return $config_file if $exists_config_file->($config_file);
2911 # Default environment vars.
2912 my @envs = qw(PERLTIDY HOME);
2914 # Check the NT/2k/XP locations, first a local machine def, then a
2916 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2918 # Now go through the environment ...
2919 foreach my $var (@envs) {
2920 $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2921 if ( defined( $ENV{$var} ) ) {
2922 $$rconfig_file_chatter .= " = $ENV{$var}\n";
2924 # test ENV{ PERLTIDY } as file:
2925 if ( $var eq 'PERLTIDY' ) {
2926 $config_file = "$ENV{$var}";
2927 return $config_file if $exists_config_file->($config_file);
2930 # test ENV as directory:
2931 $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2932 return $config_file if $exists_config_file->($config_file);
2935 $config_file = catfile( $ENV{$var}, "perltidy.ini" );
2936 return $config_file if $exists_config_file->($config_file);
2940 $$rconfig_file_chatter .= "\n";
2944 # then look for a system-wide definition
2945 # where to look varies with OS
2948 if ($Windows_type) {
2949 my ( $os, $system, $allusers ) =
2950 Win_Config_Locs( $rpending_complaint, $Windows_type );
2952 # Check All Users directory, if there is one.
2953 # i.e. C:\Documents and Settings\User\perltidy.ini
2956 $config_file = catfile( $allusers, ".perltidyrc" );
2957 return $config_file if $exists_config_file->($config_file);
2959 $config_file = catfile( $allusers, "perltidy.ini" );
2960 return $config_file if $exists_config_file->($config_file);
2963 # Check system directory.
2964 # retain old code in case someone has been able to create
2965 # a file with a leading period.
2966 $config_file = catfile( $system, ".perltidyrc" );
2967 return $config_file if $exists_config_file->($config_file);
2969 $config_file = catfile( $system, "perltidy.ini" );
2970 return $config_file if $exists_config_file->($config_file);
2974 # Place to add customization code for other systems
2975 elsif ( $^O eq 'OS2' ) {
2977 elsif ( $^O eq 'MacOS' ) {
2979 elsif ( $^O eq 'VMS' ) {
2982 # Assume some kind of Unix
2985 $config_file = "/usr/local/etc/perltidyrc";
2986 return $config_file if $exists_config_file->($config_file);
2988 $config_file = "/etc/perltidyrc";
2989 return $config_file if $exists_config_file->($config_file);
2992 # Couldn't find a config file
2996 sub Win_Config_Locs {
2998 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2999 # or undef if its not a win32 OS. In list context returns OS, System
3000 # Directory, and All Users Directory. All Users will be empty on a
3001 # 9x/Me box. Contributed by: Yves Orton.
3003 my $rpending_complaint = shift;
3004 my $os = (@_) ? shift : Win_OS_Type();
3010 if ( $os =~ /9[58]|Me/ ) {
3011 $system = "C:/Windows";
3013 elsif ( $os =~ /NT|XP|200?/ ) {
3014 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
3017 ? "C:/WinNT/profiles/All Users/"
3018 : "C:/Documents and Settings/All Users/";
3022 # This currently would only happen on a win32s computer. I don't have
3023 # one to test, so I am unsure how to proceed. Suggestions welcome!
3024 $$rpending_complaint .=
3025 "I dont know a sensible place to look for config files on an $os system.\n";
3028 return wantarray ? ( $os, $system, $allusers ) : $os;
3031 sub dump_config_file {
3033 my $config_file = shift;
3034 my $rconfig_file_chatter = shift;
3035 print STDOUT "$$rconfig_file_chatter";
3037 print STDOUT "# Dump of file: '$config_file'\n";
3038 while ( my $line = $fh->getline() ) { print STDOUT $line }
3039 eval { $fh->close() };
3042 print STDOUT "# ...no config file found\n";
3046 sub read_config_file {
3048 my ( $fh, $config_file, $rexpansion ) = @_;
3049 my @config_list = ();
3051 # file is bad if non-empty $death_message is returned
3052 my $death_message = "";
3056 my $opening_brace_line;
3057 while ( my $line = $fh->getline() ) {
3060 ( $line, $death_message ) =
3061 strip_comment( $line, $config_file, $line_no );
3062 last if ($death_message);
3064 $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
3070 # Look for complete or partial abbreviation definition of the form
3071 # name { body } or name { or name { body
3072 # See rules in perltidy's perldoc page
3073 # Section: Other Controls - Creating a new abbreviation
3074 if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) {
3075 my $oldname = $name;
3076 ( $name, $body ) = ( $2, $3 );
3078 # Cannot start new abbreviation unless old abbreviation is complete
3079 last if ($opening_brace_line);
3081 $opening_brace_line = $line_no unless ( $body && $body =~ s/\}$// );
3083 # handle a new alias definition
3084 if ( ${$rexpansion}{$name} ) {
3086 my @names = sort keys %$rexpansion;
3088 "Here is a list of all installed aliases\n(@names)\n"
3089 . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
3092 ${$rexpansion}{$name} = [];
3095 # leading opening braces not allowed
3096 elsif ( $line =~ /^{/ ) {
3097 $opening_brace_line = undef;
3099 "Unexpected '{' at line $line_no in config file '$config_file'\n";
3103 # Look for abbreviation closing: body } or }
3104 elsif ( $line =~ /^(.*)?\}$/ ) {
3106 if ($opening_brace_line) {
3107 $opening_brace_line = undef;
3111 "Unexpected '}' at line $line_no in config file '$config_file'\n";
3116 # Now store any parameters
3119 my ( $rbody_parts, $msg ) = parse_args($body);
3121 $death_message = <<EOM;
3122 Error reading file '$config_file' at line number $line_no.
3124 Please fix this line or use -npro to avoid reading this file
3131 # remove leading dashes if this is an alias
3132 foreach (@$rbody_parts) { s/^\-+//; }
3133 push @{ ${$rexpansion}{$name} }, @$rbody_parts;
3136 push( @config_list, @$rbody_parts );
3141 if ($opening_brace_line) {
3143 "Didn't see a '}' to match the '{' at line $opening_brace_line in config file '$config_file'\n";
3145 eval { $fh->close() };
3146 return ( \@config_list, $death_message );
3151 # Strip any comment from a command line
3152 my ( $instr, $config_file, $line_no ) = @_;
3155 # check for full-line comment
3156 if ( $instr =~ /^\s*#/ ) {
3157 return ( "", $msg );
3160 # nothing to do if no comments
3161 if ( $instr !~ /#/ ) {
3162 return ( $instr, $msg );
3165 # handle case of no quotes
3166 elsif ( $instr !~ /['"]/ ) {
3168 # We now require a space before the # of a side comment
3169 # this allows something like:
3171 # Otherwise, it would have to be quoted:
3173 $instr =~ s/\s+\#.*$//;
3174 return ( $instr, $msg );
3177 # handle comments and quotes
3179 my $quote_char = "";
3182 # looking for ending quote character
3184 if ( $instr =~ /\G($quote_char)/gc ) {
3188 elsif ( $instr =~ /\G(.)/gc ) {
3192 # error..we reached the end without seeing the ending quote char
3195 Error reading file $config_file at line number $line_no.
3196 Did not see ending quote character <$quote_char> in this text:
3198 Please fix this line or use -npro to avoid reading this file
3204 # accumulating characters and looking for start of a quoted string
3206 if ( $instr =~ /\G([\"\'])/gc ) {
3211 # Note: not yet enforcing the space-before-hash rule for side
3212 # comments if the parameter is quoted.
3213 elsif ( $instr =~ /\G#/gc ) {
3216 elsif ( $instr =~ /\G(.)/gc ) {
3224 return ( $outstr, $msg );
3229 # Parse a command string containing multiple string with possible
3230 # quotes, into individual commands. It might look like this, for example:
3232 # -wba=" + - " -some-thing -wbb='. && ||'
3234 # There is no need, at present, to handle escaped quote characters.
3235 # (They are not perltidy tokens, so needn't be in strings).
3238 my @body_parts = ();
3239 my $quote_char = "";
3244 # looking for ending quote character
3246 if ( $body =~ /\G($quote_char)/gc ) {
3249 elsif ( $body =~ /\G(.)/gc ) {
3253 # error..we reached the end without seeing the ending quote char
3255 if ( length($part) ) { push @body_parts, $part; }
3257 Did not see ending quote character <$quote_char> in this text:
3264 # accumulating characters and looking for start of a quoted string
3266 if ( $body =~ /\G([\"\'])/gc ) {
3269 elsif ( $body =~ /\G(\s+)/gc ) {
3270 if ( length($part) ) { push @body_parts, $part; }
3273 elsif ( $body =~ /\G(.)/gc ) {
3277 if ( length($part) ) { push @body_parts, $part; }
3282 return ( \@body_parts, $msg );
3285 sub dump_long_names {
3287 my @names = sort @_;
3289 # Command line long names (passed to GetOptions)
3290 #---------------------------------------------------------------
3291 # here is a summary of the Getopt codes:
3292 # <none> does not take an argument
3293 # =s takes a mandatory string
3294 # :s takes an optional string
3295 # =i takes a mandatory integer
3296 # :i takes an optional integer
3297 # ! does not take an argument and may be negated
3298 # i.e., -foo and -nofoo are allowed
3299 # a double dash signals the end of the options list
3301 #---------------------------------------------------------------
3304 foreach (@names) { print STDOUT "$_\n" }
3308 my @defaults = sort @_;
3309 print STDOUT "Default command line options:\n";
3310 foreach (@_) { print STDOUT "$_\n" }
3313 sub readable_options {
3315 # return options for this run as a string which could be
3316 # put in a perltidyrc file
3317 my ( $rOpts, $roption_string ) = @_;
3319 my $rGetopt_flags = \%Getopt_flags;
3320 my $readable_options = "# Final parameter set for this run.\n";
3321 $readable_options .=
3322 "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
3323 foreach my $opt ( @{$roption_string} ) {
3325 if ( $opt =~ /(.*)(!|=.*)$/ ) {
3329 if ( defined( $rOpts->{$opt} ) ) {
3330 $rGetopt_flags->{$opt} = $flag;
3333 foreach my $key ( sort keys %{$rOpts} ) {
3334 my $flag = $rGetopt_flags->{$key};
3335 my $value = $rOpts->{$key};
3339 if ( $flag =~ /^=/ ) {
3340 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
3341 $suffix = "=" . $value;
3343 elsif ( $flag =~ /^!/ ) {
3344 $prefix .= "no" unless ($value);
3349 $readable_options .=
3350 "# ERROR in dump_options: unrecognized flag $flag for $key\n";
3353 $readable_options .= $prefix . $key . $suffix . "\n";
3355 return $readable_options;
3359 print STDOUT <<"EOM";
3360 This is perltidy, v$VERSION
3362 Copyright 2000-2017, Steve Hancock
3364 Perltidy is free software and may be copied under the terms of the GNU
3365 General Public License, which is included in the distribution files.
3367 Complete documentation for perltidy can be found using 'man perltidy'
3368 or on the internet at http://perltidy.sourceforge.net.
3375 This is perltidy version $VERSION, a perl script indenter. Usage:
3377 perltidy [ options ] file1 file2 file3 ...
3378 (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
3379 perltidy [ options ] file1 -o outfile
3380 perltidy [ options ] file1 -st >outfile
3381 perltidy [ options ] <infile >outfile
3383 Options have short and long forms. Short forms are shown; see
3384 man pages for long forms. Note: '=s' indicates a required string,
3385 and '=n' indicates a required integer.
3389 -o=file name of the output file (only if single input file)
3390 -oext=s change output extension from 'tdy' to s
3391 -opath=path change path to be 'path' for output files
3392 -b backup original to .bak and modify file in-place
3393 -bext=s change default backup extension from 'bak' to s
3394 -q deactivate error messages (for running under editor)
3395 -w include non-critical warning messages in the .ERR error output
3396 -syn run perl -c to check syntax (default under unix systems)
3397 -log save .LOG file, which has useful diagnostics
3398 -f force perltidy to read a binary file
3399 -g like -log but writes more detailed .LOG file, for debugging scripts
3400 -opt write the set of options actually used to a .LOG file
3401 -npro ignore .perltidyrc configuration command file
3402 -pro=file read configuration commands from file instead of .perltidyrc
3403 -st send output to standard output, STDOUT
3404 -se send all error output to standard error output, STDERR
3405 -v display version number to standard output and quit
3408 -i=n use n columns per indentation level (default n=4)
3409 -t tabs: use one tab character per indentation level, not recommeded
3410 -nt no tabs: use n spaces per indentation level (default)
3411 -et=n entab leading whitespace n spaces per tab; not recommended
3412 -io "indent only": just do indentation, no other formatting.
3413 -sil=n set starting indentation level to n; use if auto detection fails
3414 -ole=s specify output line ending (s=dos or win, mac, unix)
3415 -ple keep output line endings same as input (input must be filename)
3418 -fws freeze whitespace; this disables all whitespace changes
3419 and disables the following switches:
3420 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)
3421 -bbt same as -bt but for code block braces; same as -bt if not given
3422 -bbvt block braces vertically tight; use with -bl or -bli
3423 -bbvtl=s make -bbvt to apply to selected list of block types
3424 -pt=n paren tightness (n=0, 1 or 2)
3425 -sbt=n square bracket tightness (n=0, 1, or 2)
3426 -bvt=n brace vertical tightness,
3427 n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3428 -pvt=n paren vertical tightness (see -bvt for n)
3429 -sbvt=n square bracket vertical tightness (see -bvt for n)
3430 -bvtc=n closing brace vertical tightness:
3431 n=(0=open, 1=sometimes close, 2=always close)
3432 -pvtc=n closing paren vertical tightness, see -bvtc for n.
3433 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3434 -ci=n sets continuation indentation=n, default is n=2 spaces
3435 -lp line up parentheses, brackets, and non-BLOCK braces
3436 -sfs add space before semicolon in for( ; ; )
3437 -aws allow perltidy to add whitespace (default)
3438 -dws delete all old non-essential whitespace
3439 -icb indent closing brace of a code block
3440 -cti=n closing indentation of paren, square bracket, or non-block brace:
3441 n=0 none, =1 align with opening, =2 one full indentation level
3442 -icp equivalent to -cti=2
3443 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /'
3444 -wrs=s want space right of tokens in string;
3445 -sts put space before terminal semicolon of a statement
3446 -sak=s put space between keywords given in s and '(';
3447 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3450 -fnl freeze newlines; this disables all line break changes
3451 and disables the following switches:
3452 -anl add newlines; ok to introduce new line breaks
3453 -bbs add blank line before subs and packages
3454 -bbc add blank line before block comments
3455 -bbb add blank line between major blocks
3456 -kbl=n keep old blank lines? 0=no, 1=some, 2=all
3457 -mbl=n maximum consecutive blank lines to output (default=1)
3458 -ce cuddled else; use this style: '} else {'
3459 -dnl delete old newlines (default)
3460 -l=n maximum line length; default n=80
3461 -bl opening brace on new line
3462 -sbl opening sub brace on new line. value of -bl is used if not given.
3463 -bli opening brace on new line and indented
3464 -bar opening brace always on right, even for long clauses
3465 -vt=n vertical tightness (requires -lp); n controls break after opening
3466 token: 0=never 1=no break if next line balanced 2=no break
3467 -vtc=n vertical tightness of closing container; n controls if closing
3468 token starts new line: 0=always 1=not unless list 1=never
3469 -wba=s want break after tokens in string; i.e. wba=': .'
3470 -wbb=s want break before tokens in string
3472 Following Old Breakpoints
3473 -kis keep interior semicolons. Allows multiple statements per line.
3474 -boc break at old comma breaks: turns off all automatic list formatting
3475 -bol break at old logical breakpoints: or, and, ||, && (default)
3476 -bok break at old list keyword breakpoints such as map, sort (default)
3477 -bot break at old conditional (ternary ?:) operator breakpoints (default)
3478 -boa break at old attribute breakpoints
3479 -cab=n break at commas after a comma-arrow (=>):
3480 n=0 break at all commas after =>
3481 n=1 stable: break unless this breaks an existing one-line container
3482 n=2 break only if a one-line container cannot be formed
3483 n=3 do not treat commas after => specially at all
3486 -ibc indent block comments (default)
3487 -isbc indent spaced block comments; may indent unless no leading space
3488 -msc=n minimum desired spaces to side comment, default 4
3489 -fpsc=n fix position for side comments; default 0;
3490 -csc add or update closing side comments after closing BLOCK brace
3491 -dcsc delete closing side comments created by a -csc command
3492 -cscp=s change closing side comment prefix to be other than '## end'
3493 -cscl=s change closing side comment to apply to selected list of blocks
3494 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3495 -csct=n maximum number of columns of appended text, default n=20
3496 -cscw causes warning if old side comment is overwritten with -csc
3498 -sbc use 'static block comments' identified by leading '##' (default)
3499 -sbcp=s change static block comment identifier to be other than '##'
3500 -osbc outdent static block comments
3502 -ssc use 'static side comments' identified by leading '##' (default)
3503 -sscp=s change static side comment identifier to be other than '##'
3505 Delete selected text
3506 -dac delete all comments AND pod
3507 -dbc delete block comments
3508 -dsc delete side comments
3511 Send selected text to a '.TEE' file
3512 -tac tee all comments AND pod
3513 -tbc tee block comments
3514 -tsc tee side comments
3518 -olq outdent long quoted strings (default)
3519 -olc outdent a long block comment line
3520 -ola outdent statement labels
3521 -okw outdent control keywords (redo, next, last, goto, return)
3522 -okwl=s specify alternative keywords for -okw command
3525 -mft=n maximum fields per table; default n=40
3526 -x do not format lines before hash-bang line (i.e., for VMS)
3527 -asc allows perltidy to add a ';' when missing (default)
3528 -dsm allows perltidy to delete an unnecessary ';' (default)
3530 Combinations of other parameters
3531 -gnu attempt to follow GNU Coding Standards as applied to perl
3532 -mangle remove as many newlines as possible (but keep comments and pods)
3533 -extrude insert as many newlines as possible
3535 Dump and die, debugging
3536 -dop dump options used in this run to standard output and quit
3537 -ddf dump default options to standard output and quit
3538 -dsn dump all option short names to standard output and quit
3539 -dln dump option long names to standard output and quit
3540 -dpro dump whatever configuration file is in effect to standard output
3541 -dtt dump all token types to standard output and quit
3544 -html write an html file (see 'man perl2web' for many options)
3545 Note: when -html is used, no indentation or formatting are done.
3546 Hint: try perltidy -html -css=mystyle.css filename.pl
3547 and edit mystyle.css to change the appearance of filename.html.
3548 -nnn gives line numbers
3549 -pre only writes out <pre>..</pre> code section
3550 -toc places a table of contents to subs at the top (default)
3551 -pod passes pod text through pod2html (default)
3552 -frm write html as a frame (3 files)
3553 -text=s extra extension for table of contents if -frm, default='toc'
3554 -sext=s extra extension for file content if -frm, default='src'
3556 A prefix of "n" negates short form toggle switches, and a prefix of "no"
3557 negates the long forms. For example, -nasc means don't add missing
3560 If you are unable to see this entire text, try "perltidy -h | more"
3561 For more detailed information, and additional options, try "man perltidy",
3562 or go to the perltidy home page at http://perltidy.sourceforge.net
3567 sub process_this_file {
3569 my ( $truth, $beauty ) = @_;
3571 # loop to process each line of this file
3572 while ( my $line_of_tokens = $truth->get_line() ) {
3573 $beauty->write_line($line_of_tokens);
3577 eval { $beauty->finish_formatting() };
3578 $truth->report_tokenization_errors();
3583 # Use 'perl -c' to make sure that we did not create bad syntax
3584 # This is a very good independent check for programming errors
3586 # Given names of the input and output files, ($istream, $ostream),
3587 # we do the following:
3588 # - check syntax of the input file
3589 # - if bad, all done (could be an incomplete code snippet)
3590 # - if infile syntax ok, then check syntax of the output file;
3591 # - if outfile syntax bad, issue warning; this implies a code bug!
3592 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3594 my ( $istream, $ostream, $logger_object, $rOpts ) = @_;
3595 my $infile_syntax_ok = 0;
3596 my $line_of_dashes = '-' x 42 . "\n";
3598 my $flags = $rOpts->{'perl-syntax-check-flags'};
3600 # be sure we invoke perl with -c
3601 # note: perl will accept repeated flags like '-c -c'. It is safest
3602 # to append another -c than try to find an interior bundled c, as
3603 # in -Tc, because such a 'c' might be in a quoted string, for example.
3604 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3606 # be sure we invoke perl with -x if requested
3607 # same comments about repeated parameters applies
3608 if ( $rOpts->{'look-for-hash-bang'} ) {
3609 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3612 # this shouldn't happen unless a temporary file couldn't be made
3613 if ( $istream eq '-' ) {
3614 $logger_object->write_logfile_entry(
3615 "Cannot run perl -c on STDIN and STDOUT\n");
3616 return $infile_syntax_ok;
3619 $logger_object->write_logfile_entry(
3620 "checking input file syntax with perl $flags\n");
3622 # Not all operating systems/shells support redirection of the standard
3624 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3626 my ( $istream_filename, $perl_output ) =
3627 do_syntax_check( $istream, $flags, $error_redirection );
3628 $logger_object->write_logfile_entry(
3629 "Input stream passed to Perl as file $istream_filename\n");
3630 $logger_object->write_logfile_entry($line_of_dashes);
3631 $logger_object->write_logfile_entry("$perl_output\n");
3633 if ( $perl_output =~ /syntax\s*OK/ ) {
3634 $infile_syntax_ok = 1;
3635 $logger_object->write_logfile_entry($line_of_dashes);
3636 $logger_object->write_logfile_entry(
3637 "checking output file syntax with perl $flags ...\n");
3638 my ( $ostream_filename, $perl_output ) =
3639 do_syntax_check( $ostream, $flags, $error_redirection );
3640 $logger_object->write_logfile_entry(
3641 "Output stream passed to Perl as file $ostream_filename\n");
3642 $logger_object->write_logfile_entry($line_of_dashes);
3643 $logger_object->write_logfile_entry("$perl_output\n");
3645 unless ( $perl_output =~ /syntax\s*OK/ ) {
3646 $logger_object->write_logfile_entry($line_of_dashes);
3647 $logger_object->warning(
3648 "The output file has a syntax error when tested with perl $flags $ostream !\n"
3650 $logger_object->warning(
3651 "This implies an error in perltidy; the file $ostream is bad\n"
3653 $logger_object->report_definite_bug();
3655 # the perl version number will be helpful for diagnosing the problem
3656 $logger_object->write_logfile_entry(
3657 qx/perl -v $error_redirection/ . "\n" );
3662 # Only warn of perl -c syntax errors. Other messages,
3663 # such as missing modules, are too common. They can be
3664 # seen by running with perltidy -w
3665 $logger_object->complain("A syntax check using perl $flags\n");
3666 $logger_object->complain(
3667 "for the output in file $istream_filename gives:\n");
3668 $logger_object->complain($line_of_dashes);
3669 $logger_object->complain("$perl_output\n");
3670 $logger_object->complain($line_of_dashes);
3671 $infile_syntax_ok = -1;
3672 $logger_object->write_logfile_entry($line_of_dashes);
3673 $logger_object->write_logfile_entry(
3674 "The output file will not be checked because of input file problems\n"
3677 return $infile_syntax_ok;
3680 sub do_syntax_check {
3681 my ( $stream, $flags, $error_redirection ) = @_;
3683 # We need a named input file for executing perl
3684 my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
3686 # TODO: Need to add name of file to log somewhere
3687 # otherwise Perl output is hard to read
3688 if ( !$stream_filename ) { return $stream_filename, "" }
3690 # We have to quote the filename in case it has unusual characters
3691 # or spaces. Example: this filename #CM11.pm# gives trouble.
3692 my $quoted_stream_filename = '"' . $stream_filename . '"';
3694 # Under VMS something like -T will become -t (and an error) so we
3695 # will put quotes around the flags. Double quotes seem to work on
3696 # Unix/Windows/VMS, but this may not work on all systems. (Single
3697 # quotes do not work under Windows). It could become necessary to
3698 # put double quotes around each flag, such as: -"c" -"T"
3699 # We may eventually need some system-dependent coding here.
3700 $flags = '"' . $flags . '"';
3702 # now wish for luck...
3703 my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
3706 unlink $stream_filename
3707 or Perl::Tidy::Die("couldn't unlink stream $stream_filename: $!\n");
3709 return $stream_filename, $msg;
3712 #####################################################################
3714 # This is a stripped down version of IO::Scalar
3715 # Given a reference to a scalar, it supplies either:
3716 # a getline method which reads lines (mode='r'), or
3717 # a print method which reads lines (mode='w')
3719 #####################################################################
3720 package Perl::Tidy::IOScalar;
3724 my ( $package, $rscalar, $mode ) = @_;
3725 my $ref = ref $rscalar;
3726 if ( $ref ne 'SCALAR' ) {
3728 ------------------------------------------------------------------------
3729 expecting ref to SCALAR but got ref to ($ref); trace follows:
3730 ------------------------------------------------------------------------
3734 if ( $mode eq 'w' ) {
3736 return bless [ $rscalar, $mode ], $package;
3738 elsif ( $mode eq 'r' ) {
3740 # Convert a scalar to an array.
3741 # This avoids looking for "\n" on each call to getline
3743 # NOTES: The -1 count is needed to avoid loss of trailing blank lines
3744 # (which might be important in a DATA section).
3746 if ( $rscalar && ${$rscalar} ) {
3747 @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
3749 # remove possible extra blank line introduced with split
3750 if ( @array && $array[-1] eq "\n" ) { pop @array }
3753 return bless [ \@array, $mode, $i_next ], $package;
3757 ------------------------------------------------------------------------
3758 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3759 ------------------------------------------------------------------------
3766 my $mode = $self->[1];
3767 if ( $mode ne 'r' ) {
3769 ------------------------------------------------------------------------
3770 getline call requires mode = 'r' but mode = ($mode); trace follows:
3771 ------------------------------------------------------------------------
3774 my $i = $self->[2]++;
3775 return $self->[0]->[$i];
3780 my $mode = $self->[1];
3781 if ( $mode ne 'w' ) {
3783 ------------------------------------------------------------------------
3784 print call requires mode = 'w' but mode = ($mode); trace follows:
3785 ------------------------------------------------------------------------
3788 ${ $self->[0] } .= $_[0];
3790 sub close { return }
3792 #####################################################################
3794 # This is a stripped down version of IO::ScalarArray
3795 # Given a reference to an array, it supplies either:
3796 # a getline method which reads lines (mode='r'), or
3797 # a print method which reads lines (mode='w')
3799 # NOTE: this routine assumes that there aren't any embedded
3800 # newlines within any of the array elements. There are no checks
3803 #####################################################################
3804 package Perl::Tidy::IOScalarArray;
3808 my ( $package, $rarray, $mode ) = @_;
3809 my $ref = ref $rarray;
3810 if ( $ref ne 'ARRAY' ) {
3812 ------------------------------------------------------------------------
3813 expecting ref to ARRAY but got ref to ($ref); trace follows:
3814 ------------------------------------------------------------------------
3818 if ( $mode eq 'w' ) {
3820 return bless [ $rarray, $mode ], $package;
3822 elsif ( $mode eq 'r' ) {
3824 return bless [ $rarray, $mode, $i_next ], $package;
3828 ------------------------------------------------------------------------
3829 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3830 ------------------------------------------------------------------------
3837 my $mode = $self->[1];
3838 if ( $mode ne 'r' ) {
3840 ------------------------------------------------------------------------
3841 getline requires mode = 'r' but mode = ($mode); trace follows:
3842 ------------------------------------------------------------------------
3845 my $i = $self->[2]++;
3846 return $self->[0]->[$i];
3851 my $mode = $self->[1];
3852 if ( $mode ne 'w' ) {
3854 ------------------------------------------------------------------------
3855 print requires mode = 'w' but mode = ($mode); trace follows:
3856 ------------------------------------------------------------------------
3859 push @{ $self->[0] }, $_[0];
3861 sub close { return }
3863 #####################################################################
3865 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3866 # which returns the next line to be parsed
3868 #####################################################################
3870 package Perl::Tidy::LineSource;
3874 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3876 my $input_line_ending;
3877 if ( $rOpts->{'preserve-line-endings'} ) {
3878 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3881 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3882 return undef unless $fh;
3884 # in order to check output syntax when standard output is used,
3885 # or when it is an object, we have to make a copy of the file
3886 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3889 # Turning off syntax check when input output is used.
3890 # The reason is that temporary files cause problems on
3892 $rOpts->{'check-syntax'} = 0;
3894 $$rpending_logfile_message .= <<EOM;
3895 Note: --syntax check will be skipped because standard input is used
3902 _filename => $input_file,
3903 _input_line_ending => $input_line_ending,
3904 _rinput_buffer => [],
3909 sub close_input_file {
3912 # Only close physical files, not STDIN and other objects
3913 my $filename = $self->{_filename};
3914 if ( $filename ne '-' && !ref $filename ) {
3915 eval { $self->{_fh}->close() };
3922 my $fh = $self->{_fh};
3923 my $rinput_buffer = $self->{_rinput_buffer};
3925 if ( scalar(@$rinput_buffer) ) {
3926 $line = shift @$rinput_buffer;
3929 $line = $fh->getline();
3931 # patch to read raw mac files under unix, dos
3932 # see if the first line has embedded \r's
3933 if ( $line && !$self->{_started} ) {
3934 if ( $line =~ /[\015][^\015\012]/ ) {
3936 # found one -- break the line up and store in a buffer
3937 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3938 my $count = @$rinput_buffer;
3939 $line = shift @$rinput_buffer;
3941 $self->{_started}++;
3947 #####################################################################
3949 # the Perl::Tidy::LineSink class supplies a write_line method for
3950 # actual file writing
3952 #####################################################################
3954 package Perl::Tidy::LineSink;
3958 my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3959 $rpending_logfile_message, $binmode )
3964 my $output_file_open = 0;
3966 if ( $rOpts->{'format'} eq 'tidy' ) {
3967 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3968 unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; }
3969 $output_file_open = 1;
3971 if ( $rOpts->{'character-encoding'}
3972 && $rOpts->{'character-encoding'} eq 'utf8' )
3974 if ( ref($fh) eq 'IO::File' ) {
3975 $fh->binmode(":encoding(UTF-8)");
3977 elsif ( $output_file eq '-' ) {
3978 binmode STDOUT, ":encoding(UTF-8)";
3981 elsif ( $output_file eq '-' ) { binmode STDOUT }
3985 # in order to check output syntax when standard output is used,
3986 # or when it is an object, we have to make a copy of the file
3987 if ( $output_file eq '-' || ref $output_file ) {
3988 if ( $rOpts->{'check-syntax'} ) {
3990 # Turning off syntax check when standard output is used.
3991 # The reason is that temporary files cause problems on
3993 $rOpts->{'check-syntax'} = 0;
3994 $$rpending_logfile_message .= <<EOM;
3995 Note: --syntax check will be skipped because standard output is used
4004 _output_file => $output_file,
4005 _output_file_open => $output_file_open,
4007 _tee_file => $tee_file,
4008 _tee_file_opened => 0,
4009 _line_separator => $line_separator,
4010 _binmode => $binmode,
4017 my $fh = $self->{_fh};
4019 my $output_file_open = $self->{_output_file_open};
4021 $_[0] .= $self->{_line_separator};
4023 $fh->print( $_[0] ) if ( $self->{_output_file_open} );
4025 if ( $self->{_tee_flag} ) {
4026 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
4027 my $fh_tee = $self->{_fh_tee};
4028 print $fh_tee $_[0];
4034 $self->{_tee_flag} = 1;
4039 $self->{_tee_flag} = 0;
4042 sub really_open_tee_file {
4044 my $tee_file = $self->{_tee_file};
4046 $fh_tee = IO::File->new(">$tee_file")
4047 or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n");
4048 binmode $fh_tee if $self->{_binmode};
4049 $self->{_tee_file_opened} = 1;
4050 $self->{_fh_tee} = $fh_tee;
4053 sub close_output_file {
4056 # Only close physical files, not STDOUT and other objects
4057 my $output_file = $self->{_output_file};
4058 if ( $output_file ne '-' && !ref $output_file ) {
4059 eval { $self->{_fh}->close() } if $self->{_output_file_open};
4061 $self->close_tee_file();
4064 sub close_tee_file {
4067 # Only close physical files, not STDOUT and other objects
4068 if ( $self->{_tee_file_opened} ) {
4069 my $tee_file = $self->{_tee_file};
4070 if ( $tee_file ne '-' && !ref $tee_file ) {
4071 eval { $self->{_fh_tee}->close() };
4072 $self->{_tee_file_opened} = 0;
4077 #####################################################################
4079 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
4080 # useful for program development.
4082 # Only one such file is created regardless of the number of input
4083 # files processed. This allows the results of processing many files
4084 # to be summarized in a single file.
4086 #####################################################################
4088 package Perl::Tidy::Diagnostics;
4094 _write_diagnostics_count => 0,
4095 _last_diagnostic_file => "",
4101 sub set_input_file {
4103 $self->{_input_file} = $_[0];
4106 # This is a diagnostic routine which is useful for program development.
4107 # Output from debug messages go to a file named DIAGNOSTICS, where
4108 # they are labeled by file and line. This allows many files to be
4109 # scanned at once for some particular condition of interest.
4110 sub write_diagnostics {
4113 unless ( $self->{_write_diagnostics_count} ) {
4114 open DIAGNOSTICS, ">DIAGNOSTICS"
4115 or death("couldn't open DIAGNOSTICS: $!\n");
4118 my $last_diagnostic_file = $self->{_last_diagnostic_file};
4119 my $input_file = $self->{_input_file};
4120 if ( $last_diagnostic_file ne $input_file ) {
4121 print DIAGNOSTICS "\nFILE:$input_file\n";
4123 $self->{_last_diagnostic_file} = $input_file;
4124 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
4125 print DIAGNOSTICS "$input_line_number:\t@_";
4126 $self->{_write_diagnostics_count}++;
4129 #####################################################################
4131 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
4133 #####################################################################
4135 package Perl::Tidy::Logger;
4140 my ( $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude, ) = @_;
4142 my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef;
4144 # remove any old error output file if we might write a new one
4145 unless ( $fh_warnings || ref($warning_file) ) {
4146 if ( -e $warning_file ) {
4147 unlink($warning_file)
4149 "couldn't unlink warning file $warning_file: $!\n");
4154 defined( $rOpts->{'logfile-gap'} )
4155 ? $rOpts->{'logfile-gap'}
4157 if ( $logfile_gap == 0 ) { $logfile_gap = 1 }
4160 _log_file => $log_file,
4161 _logfile_gap => $logfile_gap,
4163 _fh_warnings => $fh_warnings,
4164 _last_input_line_written => 0,
4165 _at_end_of_file => 0,
4167 _block_log_output => 0,
4168 _line_of_tokens => undef,
4169 _output_line_number => undef,
4170 _wrote_line_information_string => 0,
4171 _wrote_column_headings => 0,
4172 _warning_file => $warning_file,
4173 _warning_count => 0,
4174 _complaint_count => 0,
4175 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure
4176 _saw_brace_error => 0,
4177 _saw_extrude => $saw_extrude,
4178 _output_array => [],
4182 sub get_warning_count {
4184 return $self->{_warning_count};
4187 sub get_use_prefix {
4189 return $self->{_use_prefix};
4192 sub block_log_output {
4194 $self->{_block_log_output} = 1;
4197 sub unblock_log_output {
4199 $self->{_block_log_output} = 0;
4202 sub interrupt_logfile {
4204 $self->{_use_prefix} = 0;
4205 $self->warning("\n");
4206 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
4209 sub resume_logfile {
4211 $self->write_logfile_entry( '#' x 60 . "\n" );
4212 $self->{_use_prefix} = 1;
4215 sub we_are_at_the_last_line {
4217 unless ( $self->{_wrote_line_information_string} ) {
4218 $self->write_logfile_entry("Last line\n\n");
4220 $self->{_at_end_of_file} = 1;
4223 # record some stuff in case we go down in flames
4226 my ( $line_of_tokens, $output_line_number ) = @_;
4227 my $input_line = $line_of_tokens->{_line_text};
4228 my $input_line_number = $line_of_tokens->{_line_number};
4230 # save line information in case we have to write a logfile message
4231 $self->{_line_of_tokens} = $line_of_tokens;
4232 $self->{_output_line_number} = $output_line_number;
4233 $self->{_wrote_line_information_string} = 0;
4235 my $last_input_line_written = $self->{_last_input_line_written};
4236 my $rOpts = $self->{_rOpts};
4239 ( $input_line_number - $last_input_line_written ) >=
4240 $self->{_logfile_gap}
4242 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
4245 my $rlevels = $line_of_tokens->{_rlevels};
4246 my $structural_indentation_level = $$rlevels[0];
4247 $self->{_last_input_line_written} = $input_line_number;
4248 ( my $out_str = $input_line ) =~ s/^\s*//;
4251 $out_str = ( '.' x $structural_indentation_level ) . $out_str;
4253 if ( length($out_str) > 35 ) {
4254 $out_str = substr( $out_str, 0, 35 ) . " ....";
4256 $self->logfile_output( "", "$out_str\n" );
4260 sub write_logfile_entry {
4263 # add leading >>> to avoid confusing error messages and code
4264 $self->logfile_output( ">>>", "@_" );
4267 sub write_column_headings {
4270 $self->{_wrote_column_headings} = 1;
4271 my $routput_array = $self->{_output_array};
4272 push @{$routput_array}, <<EOM;
4273 The nesting depths in the table below are at the start of the lines.
4274 The indicated output line numbers are not always exact.
4275 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
4277 in:out indent c b nesting code + messages; (messages begin with >>>)
4278 lines levels i k (code begins with one '.' per indent level)
4279 ------ ----- - - -------- -------------------------------------------
4283 sub make_line_information_string {
4285 # make columns of information when a logfile message needs to go out
4287 my $line_of_tokens = $self->{_line_of_tokens};
4288 my $input_line_number = $line_of_tokens->{_line_number};
4289 my $line_information_string = "";
4290 if ($input_line_number) {
4292 my $output_line_number = $self->{_output_line_number};
4293 my $brace_depth = $line_of_tokens->{_curly_brace_depth};
4294 my $paren_depth = $line_of_tokens->{_paren_depth};
4295 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
4296 my $guessed_indentation_level =
4297 $line_of_tokens->{_guessed_indentation_level};
4298 my $rlevels = $line_of_tokens->{_rlevels};
4299 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
4300 my $rci_levels = $line_of_tokens->{_rci_levels};
4301 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
4303 my $structural_indentation_level = $$rlevels[0];
4305 $self->write_column_headings() unless $self->{_wrote_column_headings};
4307 # keep logfile columns aligned for scripts up to 999 lines;
4308 # for longer scripts it doesn't really matter
4309 my $extra_space = "";
4311 ( $input_line_number < 10 ) ? " "
4312 : ( $input_line_number < 100 ) ? " "
4315 ( $output_line_number < 10 ) ? " "
4316 : ( $output_line_number < 100 ) ? " "
4319 # there are 2 possible nesting strings:
4320 # the original which looks like this: (0 [1 {2
4321 # the new one, which looks like this: {{[
4322 # the new one is easier to read, and shows the order, but
4323 # could be arbitrarily long, so we use it unless it is too long
4324 my $nesting_string =
4325 "($paren_depth [$square_bracket_depth {$brace_depth";
4326 my $nesting_string_new = $$rnesting_tokens[0];
4328 my $ci_level = $$rci_levels[0];
4329 if ( $ci_level > 9 ) { $ci_level = '*' }
4330 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
4332 if ( length($nesting_string_new) <= 8 ) {
4334 $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
4336 $line_information_string =
4337 "L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
4339 return $line_information_string;
4342 sub logfile_output {
4344 my ( $prompt, $msg ) = @_;
4345 return if ( $self->{_block_log_output} );
4347 my $routput_array = $self->{_output_array};
4348 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
4349 push @{$routput_array}, "$msg";
4352 my $line_information_string = $self->make_line_information_string();
4353 $self->{_wrote_line_information_string} = 1;
4355 if ($line_information_string) {
4356 push @{$routput_array}, "$line_information_string $prompt$msg";
4359 push @{$routput_array}, "$msg";
4364 sub get_saw_brace_error {
4366 return $self->{_saw_brace_error};
4369 sub increment_brace_error {
4371 $self->{_saw_brace_error}++;
4376 use constant BRACE_WARNING_LIMIT => 10;
4377 my $saw_brace_error = $self->{_saw_brace_error};
4379 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
4383 $self->{_saw_brace_error} = $saw_brace_error;
4385 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
4386 $self->warning("No further warnings of this type will be given\n");
4392 # handle non-critical warning messages based on input flag
4394 my $rOpts = $self->{_rOpts};
4396 # these appear in .ERR output only if -w flag is used
4397 if ( $rOpts->{'warning-output'} ) {
4401 # otherwise, they go to the .LOG file
4403 $self->{_complaint_count}++;
4404 $self->write_logfile_entry(@_);
4410 # report errors to .ERR file (or stdout)
4412 use constant WARNING_LIMIT => 50;
4414 my $rOpts = $self->{_rOpts};
4415 unless ( $rOpts->{'quiet'} ) {
4417 my $warning_count = $self->{_warning_count};
4418 my $fh_warnings = $self->{_fh_warnings};
4419 if ( !$fh_warnings ) {
4420 my $warning_file = $self->{_warning_file};
4421 ( $fh_warnings, my $filename ) =
4422 Perl::Tidy::streamhandle( $warning_file, 'w' );
4423 $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n");
4424 Perl::Tidy::Warn "## Please see file $filename\n"
4425 unless ref($warning_file);
4426 $self->{_fh_warnings} = $fh_warnings;
4427 $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n");
4430 if ( $warning_count < WARNING_LIMIT ) {
4431 if ( $self->get_use_prefix() > 0 ) {
4432 my $input_line_number =
4433 Perl::Tidy::Tokenizer::get_input_line_number();
4434 if ( !defined($input_line_number) ) { $input_line_number = -1 }
4435 $fh_warnings->print("$input_line_number:\t@_");
4436 $self->write_logfile_entry("WARNING: @_");
4439 $fh_warnings->print(@_);
4440 $self->write_logfile_entry(@_);
4444 $self->{_warning_count} = $warning_count;
4446 if ( $warning_count == WARNING_LIMIT ) {
4447 $fh_warnings->print("No further warnings will be given\n");
4452 # programming bug codes:
4454 # 0 = maybe, not sure.
4456 sub report_possible_bug {
4458 my $saw_code_bug = $self->{_saw_code_bug};
4459 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4462 sub report_definite_bug {
4464 $self->{_saw_code_bug} = 1;
4467 sub ask_user_for_bug_report {
4470 my ( $infile_syntax_ok, $formatter ) = @_;
4471 my $saw_code_bug = $self->{_saw_code_bug};
4472 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4473 $self->warning(<<EOM);
4475 You may have encountered a code bug in perltidy. If you think so, and
4476 the problem is not listed in the BUGS file at
4477 http://perltidy.sourceforge.net, please report it so that it can be
4478 corrected. Include the smallest possible script which has the problem,
4479 along with the .LOG file. See the manual pages for contact information.
4484 elsif ( $saw_code_bug == 1 ) {
4485 if ( $self->{_saw_extrude} ) {
4486 $self->warning(<<EOM);
4488 You may have encountered a bug in perltidy. However, since you are using the
4489 -extrude option, the problem may be with perl or one of its modules, which have
4490 occasional problems with this type of file. If you believe that the
4491 problem is with perltidy, and the problem is not listed in the BUGS file at
4492 http://perltidy.sourceforge.net, please report it so that it can be corrected.
4493 Include the smallest possible script which has the problem, along with the .LOG
4494 file. See the manual pages for contact information.
4499 $self->warning(<<EOM);
4501 Oops, you seem to have encountered a bug in perltidy. Please check the
4502 BUGS file at http://perltidy.sourceforge.net. If the problem is not
4503 listed there, please report it so that it can be corrected. Include the
4504 smallest possible script which produces this message, along with the
4505 .LOG file if appropriate. See the manual pages for contact information.
4506 Your efforts are appreciated.
4509 my $added_semicolon_count = 0;
4511 $added_semicolon_count =
4512 $formatter->get_added_semicolon_count();
4514 if ( $added_semicolon_count > 0 ) {
4515 $self->warning(<<EOM);
4517 The log file shows that perltidy added $added_semicolon_count semicolons.
4518 Please rerun with -nasc to see if that is the cause of the syntax error. Even
4519 if that is the problem, please report it so that it can be fixed.
4529 # called after all formatting to summarize errors
4531 my ( $infile_syntax_ok, $formatter ) = @_;
4533 my $rOpts = $self->{_rOpts};
4534 my $warning_count = $self->{_warning_count};
4535 my $saw_code_bug = $self->{_saw_code_bug};
4538 ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4539 || $saw_code_bug == 1
4540 || $rOpts->{'logfile'};
4541 my $log_file = $self->{_log_file};
4542 if ($warning_count) {
4543 if ($save_logfile) {
4544 $self->block_log_output(); # avoid echoing this to the logfile
4546 "The logfile $log_file may contain useful information\n");
4547 $self->unblock_log_output();
4550 if ( $self->{_complaint_count} > 0 ) {
4552 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4556 if ( $self->{_saw_brace_error}
4557 && ( $self->{_logfile_gap} > 1 || !$save_logfile ) )
4559 $self->warning("To save a full .LOG file rerun with -g\n");
4562 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4564 if ($save_logfile) {
4565 my $log_file = $self->{_log_file};
4566 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4568 my $routput_array = $self->{_output_array};
4569 foreach ( @{$routput_array} ) { $fh->print($_) }
4570 if ( $log_file ne '-' && !ref $log_file ) {
4571 eval { $fh->close() };
4577 #####################################################################
4579 # The Perl::Tidy::DevNull class supplies a dummy print method
4581 #####################################################################
4583 package Perl::Tidy::DevNull;
4584 sub new { return bless {}, $_[0] }
4585 sub print { return }
4586 sub close { return }
4588 #####################################################################
4590 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4592 #####################################################################
4594 package Perl::Tidy::HtmlWriter;
4604 %short_to_long_names
4608 $missing_html_entities
4611 # replace unsafe characters with HTML entity representation if HTML::Entities
4613 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4617 my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4618 $html_src_extension )
4621 my $html_file_opened = 0;
4623 ( $html_fh, my $html_filename ) =
4624 Perl::Tidy::streamhandle( $html_file, 'w' );
4626 Perl::Tidy::Warn("can't open $html_file: $!\n");
4629 $html_file_opened = 1;
4631 if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4632 $input_file = "NONAME";
4635 # write the table of contents to a string
4637 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4640 my @pre_string_stack;
4641 if ( $rOpts->{'html-pre-only'} ) {
4643 # pre section goes directly to the output stream
4644 $html_pre_fh = $html_fh;
4645 $html_pre_fh->print( <<"PRE_END");
4651 # pre section go out to a temporary string
4653 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4654 push @pre_string_stack, \$pre_string;
4657 # pod text gets diverted if the 'pod2html' is used
4660 if ( $rOpts->{'pod2html'} ) {
4661 if ( $rOpts->{'html-pre-only'} ) {
4662 undef $rOpts->{'pod2html'};
4665 eval "use Pod::Html";
4668 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4669 undef $rOpts->{'pod2html'};
4672 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4679 if ( $rOpts->{'frames'} ) {
4680 unless ($extension) {
4682 "cannot use frames without a specified output extension; ignoring -frm\n";
4683 undef $rOpts->{'frames'};
4686 $toc_filename = $input_file . $html_toc_extension . $extension;
4687 $src_filename = $input_file . $html_src_extension . $extension;
4691 # ----------------------------------------------------------
4692 # Output is now directed as follows:
4693 # html_toc_fh <-- table of contents items
4694 # html_pre_fh <-- the <pre> section of formatted code, except:
4695 # html_pod_fh <-- pod goes here with the pod2html option
4696 # ----------------------------------------------------------
4698 my $title = $rOpts->{'title'};
4700 ( $title, my $path ) = fileparse($input_file);
4702 my $toc_item_count = 0;
4703 my $in_toc_package = "";
4706 _input_file => $input_file, # name of input file
4707 _title => $title, # title, unescaped
4708 _html_file => $html_file, # name of .html output file
4709 _toc_filename => $toc_filename, # for frames option
4710 _src_filename => $src_filename, # for frames option
4711 _html_file_opened => $html_file_opened, # a flag
4712 _html_fh => $html_fh, # the output stream
4713 _html_pre_fh => $html_pre_fh, # pre section goes here
4714 _rpre_string_stack => \@pre_string_stack, # stack of pre sections
4715 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html
4716 _rpod_string => \$pod_string, # string holding pod
4717 _pod_cut_count => 0, # how many =cut's?
4718 _html_toc_fh => $html_toc_fh, # fh for table of contents
4719 _rtoc_string => \$toc_string, # string holding toc
4720 _rtoc_item_count => \$toc_item_count, # how many toc items
4721 _rin_toc_package => \$in_toc_package, # package name
4722 _rtoc_name_count => {}, # hash to track unique names
4723 _rpackage_stack => [], # stack to check for package
4725 _rlast_level => \$last_level, # brace indentation level
4731 # Add an item to the html table of contents.
4732 # This is called even if no table of contents is written,
4733 # because we still want to put the anchors in the <pre> text.
4734 # We are given an anchor name and its type; types are:
4735 # 'package', 'sub', '__END__', '__DATA__', 'EOF'
4736 # There must be an 'EOF' call at the end to wrap things up.
4738 my ( $name, $type ) = @_;
4739 my $html_toc_fh = $self->{_html_toc_fh};
4740 my $html_pre_fh = $self->{_html_pre_fh};
4741 my $rtoc_name_count = $self->{_rtoc_name_count};
4742 my $rtoc_item_count = $self->{_rtoc_item_count};
4743 my $rlast_level = $self->{_rlast_level};
4744 my $rin_toc_package = $self->{_rin_toc_package};
4745 my $rpackage_stack = $self->{_rpackage_stack};
4747 # packages contain sublists of subs, so to avoid errors all package
4748 # items are written and finished with the following routines
4749 my $end_package_list = sub {
4750 if ($$rin_toc_package) {
4751 $html_toc_fh->print("</ul>\n</li>\n");
4752 $$rin_toc_package = "";
4756 my $start_package_list = sub {
4757 my ( $unique_name, $package ) = @_;
4758 if ($$rin_toc_package) { $end_package_list->() }
4759 $html_toc_fh->print(<<EOM);
4760 <li><a href=\"#$unique_name\">package $package</a>
4763 $$rin_toc_package = $package;
4766 # start the table of contents on the first item
4767 unless ($$rtoc_item_count) {
4769 # but just quit if we hit EOF without any other entries
4770 # in this case, there will be no toc
4771 return if ( $type eq 'EOF' );
4772 $html_toc_fh->print( <<"TOC_END");
4773 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4777 $$rtoc_item_count++;
4779 # make a unique anchor name for this location:
4780 # - packages get a 'package-' prefix
4781 # - subs use their names
4782 my $unique_name = $name;
4783 if ( $type eq 'package' ) { $unique_name = "package-$name" }
4785 # append '-1', '-2', etc if necessary to make unique; this will
4786 # be unique because subs and packages cannot have a '-'
4787 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4788 $unique_name .= "-$count";
4791 # - all names get terminal '-' if pod2html is used, to avoid
4792 # conflicts with anchor names created by pod2html
4793 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4795 # start/stop lists of subs
4796 if ( $type eq 'sub' ) {
4797 my $package = $rpackage_stack->[$$rlast_level];
4798 unless ($package) { $package = 'main' }
4800 # if we're already in a package/sub list, be sure its the right
4801 # package or else close it
4802 if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4803 $end_package_list->();
4806 # start a package/sub list if necessary
4807 unless ($$rin_toc_package) {
4808 $start_package_list->( $unique_name, $package );
4812 # now write an entry in the toc for this item
4813 if ( $type eq 'package' ) {
4814 $start_package_list->( $unique_name, $name );
4816 elsif ( $type eq 'sub' ) {
4817 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4820 $end_package_list->();
4821 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4824 # write the anchor in the <pre> section
4825 $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4827 # end the table of contents, if any, on the end of file
4828 if ( $type eq 'EOF' ) {
4829 $html_toc_fh->print( <<"TOC_END");
4831 <!-- END CODE INDEX -->
4838 # This is the official list of tokens which may be identified by the
4839 # user. Long names are used as getopt keys. Short names are
4840 # convenient short abbreviations for specifying input. Short names
4841 # somewhat resemble token type characters, but are often different
4842 # because they may only be alphanumeric, to allow command line
4843 # input. Also, note that because of case insensitivity of html,
4844 # this table must be in a single case only (I've chosen to use all
4846 # When adding NEW_TOKENS: update this hash table
4847 # short names => long names
4848 %short_to_long_names = (
4858 'pu' => 'punctuation',
4859 'i' => 'identifier',
4861 'h' => 'here-doc-target',
4862 'hh' => 'here-doc-text',
4864 'sc' => 'semicolon',
4865 'm' => 'subroutine',
4869 # Now we have to map actual token types into one of the above short
4870 # names; any token types not mapped will get 'punctuation'
4873 # The values of this hash table correspond to the keys of the
4874 # previous hash table.
4875 # The keys of this hash table are token types and can be seen
4876 # by running with --dump-token-types (-dtt).
4878 # When adding NEW_TOKENS: update this hash table
4879 # $type => $short_name
4880 %token_short_names = (
4905 # These token types will all be called identifiers for now
4906 # FIXME: could separate user defined modules as separate type
4907 my @identifier = qw" i t U C Y Z G :: CORE::";
4908 @token_short_names{@identifier} = ('i') x scalar(@identifier);
4910 # These token types will be called 'structure'
4911 my @structure = qw" { } ";
4912 @token_short_names{@structure} = ('s') x scalar(@structure);
4914 # OLD NOTES: save for reference
4915 # Any of these could be added later if it would be useful.
4916 # For now, they will by default become punctuation
4917 # my @list = qw" L R [ ] ";
4918 # @token_long_names{@list} = ('non-structure') x scalar(@list);
4921 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4923 # @token_long_names{@list} = ('math') x scalar(@list);
4925 # my @list = qw" & &= ~ ~= ^ ^= | |= ";
4926 # @token_long_names{@list} = ('bit') x scalar(@list);
4928 # my @list = qw" == != < > <= <=> ";
4929 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4931 # my @list = qw" && || ! &&= ||= //= ";
4932 # @token_long_names{@list} = ('logical') x scalar(@list);
4934 # my @list = qw" . .= =~ !~ x x= ";
4935 # @token_long_names{@list} = ('string-operators') x scalar(@list);
4938 # my @list = qw" .. -> <> ... \ ? ";
4939 # @token_long_names{@list} = ('misc-operators') x scalar(@list);
4943 sub make_getopt_long_names {
4945 my ($rgetopt_names) = @_;
4946 while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4947 push @$rgetopt_names, "html-color-$name=s";
4948 push @$rgetopt_names, "html-italic-$name!";
4949 push @$rgetopt_names, "html-bold-$name!";
4951 push @$rgetopt_names, "html-color-background=s";
4952 push @$rgetopt_names, "html-linked-style-sheet=s";
4953 push @$rgetopt_names, "nohtml-style-sheets";
4954 push @$rgetopt_names, "html-pre-only";
4955 push @$rgetopt_names, "html-line-numbers";
4956 push @$rgetopt_names, "html-entities!";
4957 push @$rgetopt_names, "stylesheet";
4958 push @$rgetopt_names, "html-table-of-contents!";
4959 push @$rgetopt_names, "pod2html!";
4960 push @$rgetopt_names, "frames!";
4961 push @$rgetopt_names, "html-toc-extension=s";
4962 push @$rgetopt_names, "html-src-extension=s";
4964 # Pod::Html parameters:
4965 push @$rgetopt_names, "backlink=s";
4966 push @$rgetopt_names, "cachedir=s";
4967 push @$rgetopt_names, "htmlroot=s";
4968 push @$rgetopt_names, "libpods=s";
4969 push @$rgetopt_names, "podpath=s";
4970 push @$rgetopt_names, "podroot=s";
4971 push @$rgetopt_names, "title=s";
4973 # Pod::Html parameters with leading 'pod' which will be removed
4974 # before the call to Pod::Html
4975 push @$rgetopt_names, "podquiet!";
4976 push @$rgetopt_names, "podverbose!";
4977 push @$rgetopt_names, "podrecurse!";
4978 push @$rgetopt_names, "podflush";
4979 push @$rgetopt_names, "podheader!";
4980 push @$rgetopt_names, "podindex!";
4983 sub make_abbreviated_names {
4985 # We're appending things like this to the expansion list:
4986 # 'hcc' => [qw(html-color-comment)],
4987 # 'hck' => [qw(html-color-keyword)],
4990 my ($rexpansion) = @_;
4992 # abbreviations for color/bold/italic properties
4993 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4994 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
4995 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
4996 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
4997 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4998 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
5001 # abbreviations for all other html options
5002 ${$rexpansion}{"hcbg"} = ["html-color-background"];
5003 ${$rexpansion}{"pre"} = ["html-pre-only"];
5004 ${$rexpansion}{"toc"} = ["html-table-of-contents"];
5005 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
5006 ${$rexpansion}{"nnn"} = ["html-line-numbers"];
5007 ${$rexpansion}{"hent"} = ["html-entities"];
5008 ${$rexpansion}{"nhent"} = ["nohtml-entities"];
5009 ${$rexpansion}{"css"} = ["html-linked-style-sheet"];
5010 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
5011 ${$rexpansion}{"ss"} = ["stylesheet"];
5012 ${$rexpansion}{"pod"} = ["pod2html"];
5013 ${$rexpansion}{"npod"} = ["nopod2html"];
5014 ${$rexpansion}{"frm"} = ["frames"];
5015 ${$rexpansion}{"nfrm"} = ["noframes"];
5016 ${$rexpansion}{"text"} = ["html-toc-extension"];
5017 ${$rexpansion}{"sext"} = ["html-src-extension"];
5022 # This will be called once after options have been parsed
5026 # X11 color names for default settings that seemed to look ok
5027 # (these color names are only used for programming clarity; the hex
5028 # numbers are actually written)
5029 use constant ForestGreen => "#228B22";
5030 use constant SaddleBrown => "#8B4513";
5031 use constant magenta4 => "#8B008B";
5032 use constant IndianRed3 => "#CD5555";
5033 use constant DeepSkyBlue4 => "#00688B";
5034 use constant MediumOrchid3 => "#B452CD";
5035 use constant black => "#000000";
5036 use constant white => "#FFFFFF";
5037 use constant red => "#FF0000";
5039 # set default color, bold, italic properties
5040 # anything not listed here will be given the default (punctuation) color --
5041 # these types currently not listed and get default: ws pu s sc cm co p
5042 # When adding NEW_TOKENS: add an entry here if you don't want defaults
5044 # set_default_properties( $short_name, default_color, bold?, italic? );
5045 set_default_properties( 'c', ForestGreen, 0, 0 );
5046 set_default_properties( 'pd', ForestGreen, 0, 1 );
5047 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown
5048 set_default_properties( 'q', IndianRed3, 0, 0 );
5049 set_default_properties( 'hh', IndianRed3, 0, 1 );
5050 set_default_properties( 'h', IndianRed3, 1, 0 );
5051 set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
5052 set_default_properties( 'w', black, 0, 0 );
5053 set_default_properties( 'n', MediumOrchid3, 0, 0 );
5054 set_default_properties( 'v', MediumOrchid3, 0, 0 );
5055 set_default_properties( 'j', IndianRed3, 1, 0 );
5056 set_default_properties( 'm', red, 1, 0 );
5058 set_default_color( 'html-color-background', white );
5059 set_default_color( 'html-color-punctuation', black );
5061 # setup property lookup tables for tokens based on their short names
5062 # every token type has a short name, and will use these tables
5063 # to do the html markup
5064 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
5065 $html_color{$short_name} = $rOpts->{"html-color-$long_name"};
5066 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
5067 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
5070 # write style sheet to STDOUT and die if requested
5071 if ( defined( $rOpts->{'stylesheet'} ) ) {
5072 write_style_sheet_file('-');
5076 # make sure user gives a file name after -css
5077 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
5078 $css_linkname = $rOpts->{'html-linked-style-sheet'};
5079 if ( $css_linkname =~ /^-/ ) {
5080 Perl::Tidy::Die "You must specify a valid filename after -css\n";
5084 # check for conflict
5085 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
5086 $rOpts->{'nohtml-style-sheets'} = 0;
5087 warning("You can't specify both -css and -nss; -nss ignored\n");
5090 # write a style sheet file if necessary
5091 if ($css_linkname) {
5093 # if the selected filename exists, don't write, because user may
5094 # have done some work by hand to create it; use backup name instead
5095 # Also, this will avoid a potential disaster in which the user
5096 # forgets to specify the style sheet, like this:
5097 # perltidy -html -css myfile1.pl myfile2.pl
5098 # This would cause myfile1.pl to parsed as the style sheet by GetOpts
5099 my $css_filename = $css_linkname;
5100 unless ( -e $css_filename ) {
5101 write_style_sheet_file($css_filename);
5104 $missing_html_entities = 1 unless $rOpts->{'html-entities'};
5107 sub write_style_sheet_file {
5109 my $css_filename = shift;
5111 unless ( $fh = IO::File->new("> $css_filename") ) {
5112 Perl::Tidy::Die "can't open $css_filename: $!\n";
5114 write_style_sheet_data($fh);
5115 eval { $fh->close };
5118 sub write_style_sheet_data {
5120 # write the style sheet data to an open file handle
5123 my $bg_color = $rOpts->{'html-color-background'};
5124 my $text_color = $rOpts->{'html-color-punctuation'};
5126 # pre-bgcolor is new, and may not be defined
5127 my $pre_bg_color = $rOpts->{'html-pre-color-background'};
5128 $pre_bg_color = $bg_color unless $pre_bg_color;
5130 $fh->print(<<"EOM");
5131 /* default style sheet generated by perltidy */
5132 body {background: $bg_color; color: $text_color}
5133 pre { color: $text_color;
5134 background: $pre_bg_color;
5135 font-family: courier;
5140 foreach my $short_name ( sort keys %short_to_long_names ) {
5141 my $long_name = $short_to_long_names{$short_name};
5143 my $abbrev = '.' . $short_name;
5144 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment
5145 my $color = $html_color{$short_name};
5146 if ( !defined($color) ) { $color = $text_color }
5147 $fh->print("$abbrev \{ color: $color;");
5149 if ( $html_bold{$short_name} ) {
5150 $fh->print(" font-weight:bold;");
5153 if ( $html_italic{$short_name} ) {
5154 $fh->print(" font-style:italic;");
5156 $fh->print("} /* $long_name */\n");
5160 sub set_default_color {
5162 # make sure that options hash $rOpts->{$key} contains a valid color
5163 my ( $key, $color ) = @_;
5164 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
5165 $rOpts->{$key} = check_RGB($color);
5170 # if color is a 6 digit hex RGB value, prepend a #, otherwise
5171 # assume that it is a valid ascii color name
5173 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
5177 sub set_default_properties {
5178 my ( $short_name, $color, $bold, $italic ) = @_;
5180 set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
5182 $key = "html-bold-$short_to_long_names{$short_name}";
5183 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
5184 $key = "html-italic-$short_to_long_names{$short_name}";
5185 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
5190 # Use Pod::Html to process the pod and make the page
5191 # then merge the perltidy code sections into it.
5192 # return 1 if success, 0 otherwise
5194 my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
5195 my $input_file = $self->{_input_file};
5196 my $title = $self->{_title};
5197 my $success_flag = 0;
5199 # don't try to use pod2html if no pod
5200 unless ($pod_string) {
5201 return $success_flag;
5204 # Pod::Html requires a real temporary filename
5205 my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile();
5208 "unable to open temporary file $tmpfile; cannot use pod2html\n";
5209 return $success_flag;
5212 #------------------------------------------------------------------
5213 # Warning: a temporary file is open; we have to clean up if
5214 # things go bad. From here on all returns should be by going to
5215 # RETURN so that the temporary file gets unlinked.
5216 #------------------------------------------------------------------
5218 # write the pod text to the temporary file
5219 $fh_tmp->print($pod_string);
5222 # Hand off the pod to pod2html.
5223 # Note that we can use the same temporary filename for input and output
5224 # because of the way pod2html works.
5228 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
5231 # Flags with string args:
5232 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
5233 # "podpath=s", "podroot=s"
5234 # Note: -css=s is handled by perltidy itself
5235 foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
5236 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
5239 # Toggle switches; these have extra leading 'pod'
5240 # "header!", "index!", "recurse!", "quiet!", "verbose!"
5241 foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
5242 my $kwd = $kw; # allows us to strip 'pod'
5243 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
5244 elsif ( defined( $rOpts->{$kw} ) ) {
5246 push @args, "--no$kwd";
5252 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
5254 # Must clean up if pod2html dies (it can);
5255 # Be careful not to overwrite callers __DIE__ routine
5256 local $SIG{__DIE__} = sub {
5257 unlink $tmpfile if -e $tmpfile;
5258 Perl::Tidy::Die $_[0];
5263 $fh_tmp = IO::File->new( $tmpfile, 'r' );
5266 # this error shouldn't happen ... we just used this filename
5268 "unable to open temporary file $tmpfile; cannot use pod2html\n";
5272 my $html_fh = $self->{_html_fh};
5278 # This routine will write the html selectively and store the toc
5279 my $html_print = sub {
5281 $html_fh->print($_) unless ($no_print);
5282 if ($in_toc) { push @toc, $_ }
5286 # loop over lines of html output from pod2html and merge in
5287 # the necessary perltidy html sections
5288 my ( $saw_body, $saw_index, $saw_body_end );
5289 while ( my $line = $fh_tmp->getline() ) {
5291 if ( $line =~ /^\s*<html>\s*$/i ) {
5292 my $date = localtime;
5293 $html_print->("<!-- Generated by perltidy on $date -->\n");
5294 $html_print->($line);
5297 # Copy the perltidy css, if any, after <body> tag
5298 elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
5300 $html_print->($css_string) if $css_string;
5301 $html_print->($line);
5303 # add a top anchor and heading
5304 $html_print->("<a name=\"-top-\"></a>\n");
5305 $title = escape_html($title);
5306 $html_print->("<h1>$title</h1>\n");
5309 # check for start of index, old pod2html
5310 # before Pod::Html VERSION 1.15_02 it is delimited by comments as:
5311 # <!-- INDEX BEGIN -->
5315 # <!-- INDEX END -->
5317 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
5320 # when frames are used, an extra table of contents in the
5321 # contents panel is confusing, so don't print it
5322 $no_print = $rOpts->{'frames'}
5323 || !$rOpts->{'html-table-of-contents'};
5324 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5325 $html_print->($line);
5328 # check for start of index, new pod2html
5329 # After Pod::Html VERSION 1.15_02 it is delimited as:
5333 elsif ( $line =~ /^\s*<ul\s+id="index">/i ) {
5337 # when frames are used, an extra table of contents in the
5338 # contents panel is confusing, so don't print it
5339 $no_print = $rOpts->{'frames'}
5340 || !$rOpts->{'html-table-of-contents'};
5341 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5342 $html_print->($line);
5345 # Check for end of index, old pod2html
5346 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
5348 $html_print->($line);
5350 # Copy the perltidy toc, if any, after the Pod::Html toc
5352 $html_print->("<hr />\n") if $rOpts->{'frames'};
5353 $html_print->("<h2>Code Index:</h2>\n");
5354 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5355 $html_print->(@toc);
5361 # must track <ul> depth level for new pod2html
5362 elsif ( $line =~ /\s*<ul>\s*$/i && $in_toc eq 'UL' ) {
5364 $html_print->($line);
5367 # Check for end of index, for new pod2html
5368 elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) {
5370 $html_print->($line);
5372 # Copy the perltidy toc, if any, after the Pod::Html toc
5373 if ( $ul_level <= 0 ) {
5376 $html_print->("<hr />\n") if $rOpts->{'frames'};
5377 $html_print->("<h2>Code Index:</h2>\n");
5378 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5379 $html_print->(@toc);
5387 # Copy one perltidy section after each marker
5388 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
5390 $html_print->($1) if $1;
5392 # Intermingle code and pod sections if we saw multiple =cut's.
5393 if ( $self->{_pod_cut_count} > 1 ) {
5394 my $rpre_string = shift(@$rpre_string_stack);
5395 if ($$rpre_string) {
5396 $html_print->('<pre>');
5397 $html_print->($$rpre_string);
5398 $html_print->('</pre>');
5402 # shouldn't happen: we stored a string before writing
5405 "Problem merging html stream with pod2html; order may be wrong\n";
5407 $html_print->($line);
5410 # If didn't see multiple =cut lines, we'll put the pod out first
5411 # and then the code, because it's less confusing.
5414 # since we are not intermixing code and pod, we don't need
5415 # or want any <hr> lines which separated pod and code
5416 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
5420 # Copy any remaining code section before the </body> tag
5421 elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
5423 if (@$rpre_string_stack) {
5424 unless ( $self->{_pod_cut_count} > 1 ) {
5425 $html_print->('<hr />');
5427 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
5428 $html_print->('<pre>');
5429 $html_print->($$rpre_string);
5430 $html_print->('</pre>');
5433 $html_print->($line);
5436 $html_print->($line);
5441 unless ($saw_body) {
5442 Perl::Tidy::Warn "Did not see <body> in pod2html output\n";
5445 unless ($saw_body_end) {
5446 Perl::Tidy::Warn "Did not see </body> in pod2html output\n";
5449 unless ($saw_index) {
5450 Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n";
5455 eval { $html_fh->close() };
5457 # note that we have to unlink tmpfile before making frames
5458 # because the tmpfile may be one of the names used for frames
5459 if ( -e $tmpfile ) {
5460 unless ( unlink($tmpfile) ) {
5461 Perl::Tidy::Warn("couldn't unlink temporary file $tmpfile: $!\n");
5466 if ( $success_flag && $rOpts->{'frames'} ) {
5467 $self->make_frame( \@toc );
5469 return $success_flag;
5474 # Make a frame with table of contents in the left panel
5475 # and the text in the right panel.
5477 # $html_filename contains the no-frames html output
5478 # $rtoc is a reference to an array with the table of contents
5481 my $input_file = $self->{_input_file};
5482 my $html_filename = $self->{_html_file};
5483 my $toc_filename = $self->{_toc_filename};
5484 my $src_filename = $self->{_src_filename};
5485 my $title = $self->{_title};
5486 $title = escape_html($title);
5488 # FUTURE input parameter:
5489 my $top_basename = "";
5491 # We need to produce 3 html files:
5492 # 1. - the table of contents
5493 # 2. - the contents (source code) itself
5494 # 3. - the frame which contains them
5496 # get basenames for relative links
5497 my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5498 my ( $src_basename, $src_path ) = fileparse($src_filename);
5500 # 1. Make the table of contents panel, with appropriate changes
5501 # to the anchor names
5502 my $src_frame_name = 'SRC';
5504 write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5507 # 2. The current .html filename is renamed to be the contents panel
5508 rename( $html_filename, $src_filename )
5509 or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n";
5511 # 3. Then use the original html filename for the frame
5513 $title, $html_filename, $top_basename,
5514 $toc_basename, $src_basename, $src_frame_name
5518 sub write_toc_html {
5520 # write a separate html table of contents file for frames
5521 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5522 my $fh = IO::File->new( $toc_filename, 'w' )
5523 or Perl::Tidy::Die "Cannot open $toc_filename:$!\n";
5527 <title>$title</title>
5530 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5534 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5535 $fh->print( join "", @$rtoc );
5544 sub write_frame_html {
5546 # write an html file to be the table of contents frame
5548 $title, $frame_filename, $top_basename,
5549 $toc_basename, $src_basename, $src_frame_name
5552 my $fh = IO::File->new( $frame_filename, 'w' )
5553 or Perl::Tidy::Die "Cannot open $toc_basename:$!\n";
5556 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5557 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5558 <?xml version="1.0" encoding="iso-8859-1" ?>
5559 <html xmlns="http://www.w3.org/1999/xhtml">
5561 <title>$title</title>
5565 # two left panels, one right, if master index file
5566 if ($top_basename) {
5568 <frameset cols="20%,80%">
5569 <frameset rows="30%,70%">
5570 <frame src = "$top_basename" />
5571 <frame src = "$toc_basename" />
5576 # one left panels, one right, if no master index file
5579 <frameset cols="20%,*">
5580 <frame src = "$toc_basename" />
5584 <frame src = "$src_basename" name = "$src_frame_name" />
5587 <p>If you see this message, you are using a non-frame-capable web client.</p>
5588 <p>This document contains:</p>
5590 <li><a href="$toc_basename">A table of contents</a></li>
5591 <li><a href="$src_basename">The source code</a></li>
5600 sub change_anchor_names {
5602 # add a filename and target to anchors
5603 # also return the first anchor
5604 my ( $rlines, $filename, $target ) = @_;
5606 foreach my $line (@$rlines) {
5608 # We're looking for lines like this:
5609 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5610 # ---- - -------- -----------------
5612 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5616 my $href = "$filename#$name";
5617 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5618 unless ($first_anchor) { $first_anchor = $href }
5621 return $first_anchor;
5624 sub close_html_file {
5626 return unless $self->{_html_file_opened};
5628 my $html_fh = $self->{_html_fh};
5629 my $rtoc_string = $self->{_rtoc_string};
5631 # There are 3 basic paths to html output...
5633 # ---------------------------------
5634 # Path 1: finish up if in -pre mode
5635 # ---------------------------------
5636 if ( $rOpts->{'html-pre-only'} ) {
5637 $html_fh->print( <<"PRE_END");
5640 eval { $html_fh->close() };
5645 $self->add_toc_item( 'EOF', 'EOF' );
5647 my $rpre_string_stack = $self->{_rpre_string_stack};
5649 # Patch to darken the <pre> background color in case of pod2html and
5650 # interleaved code/documentation. Otherwise, the distinction
5651 # between code and documentation is blurred.
5652 if ( $rOpts->{pod2html}
5653 && $self->{_pod_cut_count} >= 1
5654 && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5656 $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5659 # put the css or its link into a string, if used
5661 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5663 # use css linked to another file
5664 if ( $rOpts->{'html-linked-style-sheet'} ) {
5666 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5670 # use css embedded in this file
5671 elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5672 $fh_css->print( <<'ENDCSS');
5673 <style type="text/css">
5676 write_style_sheet_data($fh_css);
5677 $fh_css->print( <<"ENDCSS");
5683 # -----------------------------------------------------------
5684 # path 2: use pod2html if requested
5685 # If we fail for some reason, continue on to path 3
5686 # -----------------------------------------------------------
5687 if ( $rOpts->{'pod2html'} ) {
5688 my $rpod_string = $self->{_rpod_string};
5689 $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5690 $rpre_string_stack )
5694 # --------------------------------------------------
5695 # path 3: write code in html, with pod only in italics
5696 # --------------------------------------------------
5697 my $input_file = $self->{_input_file};
5698 my $title = escape_html($input_file);
5699 my $date = localtime;
5700 $html_fh->print( <<"HTML_START");
5701 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5702 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5703 <!-- Generated by perltidy on $date -->
5704 <html xmlns="http://www.w3.org/1999/xhtml">
5706 <title>$title</title>
5709 # output the css, if used
5711 $html_fh->print($css_string);
5712 $html_fh->print( <<"ENDCSS");
5719 $html_fh->print( <<"HTML_START");
5721 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5725 $html_fh->print("<a name=\"-top-\"></a>\n");
5726 $html_fh->print( <<"EOM");
5730 # copy the table of contents
5732 && !$rOpts->{'frames'}
5733 && $rOpts->{'html-table-of-contents'} )
5735 $html_fh->print($$rtoc_string);
5738 # copy the pre section(s)
5739 my $fname_comment = $input_file;
5740 $fname_comment =~ s/--+/-/g; # protect HTML comment tags
5741 $html_fh->print( <<"END_PRE");
5743 <!-- contents of filename: $fname_comment -->
5747 foreach my $rpre_string (@$rpre_string_stack) {
5748 $html_fh->print($$rpre_string);
5751 # and finish the html page
5752 $html_fh->print( <<"HTML_END");
5757 eval { $html_fh->close() }; # could be object without close method
5759 if ( $rOpts->{'frames'} ) {
5760 my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5761 $self->make_frame( \@toc );
5767 my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5768 my ( @colored_tokens, $j, $string, $type, $token, $level );
5769 my $rlast_level = $self->{_rlast_level};
5770 my $rpackage_stack = $self->{_rpackage_stack};
5772 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5773 $type = $$rtoken_type[$j];
5774 $token = $$rtokens[$j];
5775 $level = $$rlevels[$j];
5776 $level = 0 if ( $level < 0 );
5778 #-------------------------------------------------------
5779 # Update the package stack. The package stack is needed to keep
5780 # the toc correct because some packages may be declared within
5781 # blocks and go out of scope when we leave the block.
5782 #-------------------------------------------------------
5783 if ( $level > $$rlast_level ) {
5784 unless ( $rpackage_stack->[ $level - 1 ] ) {
5785 $rpackage_stack->[ $level - 1 ] = 'main';
5787 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5789 elsif ( $level < $$rlast_level ) {
5790 my $package = $rpackage_stack->[$level];
5791 unless ($package) { $package = 'main' }
5793 # if we change packages due to a nesting change, we
5794 # have to make an entry in the toc
5795 if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5796 $self->add_toc_item( $package, 'package' );
5799 $$rlast_level = $level;
5801 #-------------------------------------------------------
5802 # Intercept a sub name here; split it
5803 # into keyword 'sub' and sub name; and add an
5805 #-------------------------------------------------------
5806 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5807 $token = $self->markup_html_element( $1, 'k' );
5808 push @colored_tokens, $token;
5812 # but don't include sub declarations in the toc;
5813 # these wlll have leading token types 'i;'
5814 my $signature = join "", @$rtoken_type;
5815 unless ( $signature =~ /^i;/ ) {
5816 my $subname = $token;
5817 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5818 $self->add_toc_item( $subname, 'sub' );
5822 #-------------------------------------------------------
5823 # Intercept a package name here; split it
5824 # into keyword 'package' and name; add to the toc,
5825 # and update the package stack
5826 #-------------------------------------------------------
5827 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5828 $token = $self->markup_html_element( $1, 'k' );
5829 push @colored_tokens, $token;
5832 $self->add_toc_item( "$token", 'package' );
5833 $rpackage_stack->[$level] = $token;
5836 $token = $self->markup_html_element( $token, $type );
5837 push @colored_tokens, $token;
5839 return ( \@colored_tokens );
5842 sub markup_html_element {
5844 my ( $token, $type ) = @_;
5846 return $token if ( $type eq 'b' ); # skip a blank token
5847 return $token if ( $token =~ /^\s*$/ ); # skip a blank line
5848 $token = escape_html($token);
5850 # get the short abbreviation for this token type
5851 my $short_name = $token_short_names{$type};
5852 if ( !defined($short_name) ) {
5853 $short_name = "pu"; # punctuation is default
5856 # handle style sheets..
5857 if ( !$rOpts->{'nohtml-style-sheets'} ) {
5858 if ( $short_name ne 'pu' ) {
5859 $token = qq(<span class="$short_name">) . $token . "</span>";
5863 # handle no style sheets..
5865 my $color = $html_color{$short_name};
5867 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5868 $token = qq(<font color="$color">) . $token . "</font>";
5870 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5871 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
5879 if ($missing_html_entities) {
5880 $token =~ s/\&/&/g;
5881 $token =~ s/\</</g;
5882 $token =~ s/\>/>/g;
5883 $token =~ s/\"/"/g;
5886 HTML::Entities::encode_entities($token);
5891 sub finish_formatting {
5893 # called after last line
5895 $self->close_html_file();
5902 return unless $self->{_html_file_opened};
5903 my $html_pre_fh = $self->{_html_pre_fh};
5904 my ($line_of_tokens) = @_;
5905 my $line_type = $line_of_tokens->{_line_type};
5906 my $input_line = $line_of_tokens->{_line_text};
5907 my $line_number = $line_of_tokens->{_line_number};
5910 # markup line of code..
5912 if ( $line_type eq 'CODE' ) {
5913 my $rtoken_type = $line_of_tokens->{_rtoken_type};
5914 my $rtokens = $line_of_tokens->{_rtokens};
5915 my $rlevels = $line_of_tokens->{_rlevels};
5917 if ( $input_line =~ /(^\s*)/ ) {
5923 my ($rcolored_tokens) =
5924 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5925 $html_line .= join '', @$rcolored_tokens;
5928 # markup line of non-code..
5931 if ( $line_type eq 'HERE' ) { $line_character = 'H' }
5932 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
5933 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
5934 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5935 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
5936 elsif ( $line_type eq 'END_START' ) {
5937 $line_character = 'k';
5938 $self->add_toc_item( '__END__', '__END__' );
5940 elsif ( $line_type eq 'DATA_START' ) {
5941 $line_character = 'k';
5942 $self->add_toc_item( '__DATA__', '__DATA__' );
5944 elsif ( $line_type =~ /^POD/ ) {
5945 $line_character = 'P';
5946 if ( $rOpts->{'pod2html'} ) {
5947 my $html_pod_fh = $self->{_html_pod_fh};
5948 if ( $line_type eq 'POD_START' ) {
5950 my $rpre_string_stack = $self->{_rpre_string_stack};
5951 my $rpre_string = $rpre_string_stack->[-1];
5953 # if we have written any non-blank lines to the
5954 # current pre section, start writing to a new output
5956 if ( $$rpre_string =~ /\S/ ) {
5959 Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5960 $self->{_html_pre_fh} = $html_pre_fh;
5961 push @$rpre_string_stack, \$pre_string;
5963 # leave a marker in the pod stream so we know
5964 # where to put the pre section we just
5966 my $for_html = '=for html'; # don't confuse pod utils
5967 $html_pod_fh->print(<<EOM);
5970 <!-- pERLTIDY sECTION -->
5975 # otherwise, just clear the current string and start
5979 $html_pod_fh->print("\n");
5982 $html_pod_fh->print( $input_line . "\n" );
5983 if ( $line_type eq 'POD_END' ) {
5984 $self->{_pod_cut_count}++;
5985 $html_pod_fh->print("\n");
5990 else { $line_character = 'Q' }
5991 $html_line = $self->markup_html_element( $input_line, $line_character );
5994 # add the line number if requested
5995 if ( $rOpts->{'html-line-numbers'} ) {
5997 ( $line_number < 10 ) ? " "
5998 : ( $line_number < 100 ) ? " "
5999 : ( $line_number < 1000 ) ? " "
6001 $html_line = $extra_space . $line_number . " " . $html_line;
6005 $html_pre_fh->print("$html_line\n");
6008 #####################################################################
6010 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
6011 # line breaks to the token stream
6013 # WARNING: This is not a real class for speed reasons. Only one
6014 # Formatter may be used.
6016 #####################################################################
6018 package Perl::Tidy::Formatter;
6022 # Caution: these debug flags produce a lot of output
6023 # They should all be 0 except when debugging small scripts
6024 use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
6025 use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
6026 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
6027 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
6028 use constant FORMATTER_DEBUG_FLAG_CI => 0;
6029 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
6030 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
6031 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
6032 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
6033 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
6034 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
6035 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
6036 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
6037 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
6039 my $debug_warning = sub {
6040 print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
6043 FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
6044 FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
6045 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
6046 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
6047 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
6048 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
6049 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
6050 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
6051 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
6052 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
6053 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
6054 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
6055 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
6056 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
6063 $max_gnu_stack_index
6064 $gnu_position_predictor
6065 $line_start_index_to_go
6066 $last_indentation_written
6067 $last_unadjusted_indentation
6069 $last_output_short_opening_token
6071 $saw_VERSION_in_this_file
6076 $gnu_sequence_number
6077 $last_output_indentation
6083 @type_sequence_to_go
6084 @container_environment_to_go
6085 @bond_strength_to_go
6086 @forced_breakpoint_to_go
6087 @token_lengths_to_go
6088 @summed_lengths_to_go
6090 @leading_spaces_to_go
6091 @reduced_spaces_to_go
6092 @matching_token_to_go
6094 @nesting_blocks_to_go
6096 @nesting_depth_to_go
6098 @old_breakpoint_to_go
6104 %saved_opening_indentation
6107 $comma_count_in_batch
6108 $old_line_count_in_batch
6109 $last_nonblank_index_to_go
6110 $last_nonblank_type_to_go
6111 $last_nonblank_token_to_go
6112 $last_last_nonblank_index_to_go
6113 $last_last_nonblank_type_to_go
6114 $last_last_nonblank_token_to_go
6115 @nonblank_lines_at_depth
6118 @whitespace_level_stack
6119 $whitespace_last_level
6121 $in_format_skipping_section
6122 $format_skipping_pattern_begin
6123 $format_skipping_pattern_end
6125 $forced_breakpoint_count
6126 $forced_breakpoint_undo_count
6127 @forced_breakpoint_undo_stack
6128 %postponed_breakpoint
6132 $first_embedded_tab_at
6133 $last_embedded_tab_at
6134 $deleted_semicolon_count
6135 $first_deleted_semicolon_at
6136 $last_deleted_semicolon_at
6137 $added_semicolon_count
6138 $first_added_semicolon_at
6139 $last_added_semicolon_at
6140 $first_tabbing_disagreement
6141 $last_tabbing_disagreement
6142 $in_tabbing_disagreement
6143 $tabbing_disagreement_count
6147 $last_line_leading_type
6148 $last_line_leading_level
6149 $last_last_line_leading_level
6152 %block_opening_line_number
6153 $csc_new_statement_ok
6156 $accumulating_text_for_block
6158 $rleading_block_if_elsif_text
6159 $leading_block_text_level
6160 $leading_block_text_length_exceeded
6161 $leading_block_text_line_length
6162 $leading_block_text_line_number
6163 $closing_side_comment_prefix_pattern
6164 $closing_side_comment_list_pattern
6166 $blank_lines_after_opening_block_pattern
6167 $blank_lines_before_closing_block_pattern
6169 $last_nonblank_token
6171 $last_last_nonblank_token
6172 $last_last_nonblank_type
6173 $last_nonblank_block_type
6176 %is_if_brace_follower
6177 %space_after_keyword
6180 %is_last_next_redo_return
6181 %is_other_brace_follower
6182 %is_else_brace_follower
6183 %is_anon_sub_brace_follower
6184 %is_anon_sub_1_brace_follower
6186 %is_sort_map_grep_eval
6187 %is_sort_map_grep_eval_do
6188 %is_block_without_semicolon
6193 %is_if_unless_and_or_last_next_redo_return
6194 %ok_to_add_semicolon_for_block_type
6200 $is_static_block_comment
6201 $index_start_one_line_block
6202 $semicolons_before_block_self_destruct
6203 $index_max_forced_break
6206 $vertical_aligner_object
6211 $last_line_had_side_comment
6214 $static_block_comment_pattern
6215 $static_side_comment_pattern
6216 %opening_vertical_tightness
6217 %closing_vertical_tightness
6218 %closing_token_indentation
6219 $some_closing_token_indentation
6221 %opening_token_right
6222 %stack_opening_token
6223 %stack_closing_token
6225 $block_brace_vertical_tightness_pattern
6228 $rOpts_add_whitespace
6229 $rOpts_block_brace_tightness
6230 $rOpts_block_brace_vertical_tightness
6231 $rOpts_brace_left_and_indent
6232 $rOpts_comma_arrow_breakpoints
6233 $rOpts_break_at_old_keyword_breakpoints
6234 $rOpts_break_at_old_comma_breakpoints
6235 $rOpts_break_at_old_logical_breakpoints
6236 $rOpts_break_at_old_ternary_breakpoints
6237 $rOpts_break_at_old_attribute_breakpoints
6238 $rOpts_closing_side_comment_else_flag
6239 $rOpts_closing_side_comment_maximum_text
6240 $rOpts_continuation_indentation
6242 $rOpts_delete_old_whitespace
6243 $rOpts_fuzzy_line_length
6244 $rOpts_indent_columns
6245 $rOpts_line_up_parentheses
6246 $rOpts_maximum_fields_per_table
6247 $rOpts_maximum_line_length
6248 $rOpts_variable_maximum_line_length
6249 $rOpts_short_concatenation_item_length
6250 $rOpts_keep_old_blank_lines
6251 $rOpts_ignore_old_breakpoints
6252 $rOpts_format_skipping
6253 $rOpts_space_function_paren
6254 $rOpts_space_keyword_paren
6255 $rOpts_keep_interior_semicolons
6256 $rOpts_ignore_side_comment_lengths
6257 $rOpts_stack_closing_block_brace
6258 $rOpts_whitespace_cycle
6259 $rOpts_tight_secret_operators
6263 %is_keyword_returning_list
6267 %right_bond_strength
6287 # default list of block types for which -bli would apply
6288 $bli_list_string = 'if else elsif unless while for foreach do : sub';
6291 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
6292 <= >= == =~ !~ != ++ -- /= x=
6294 @is_digraph{@_} = (1) x scalar(@_);
6296 @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
6297 @is_trigraph{@_} = (1) x scalar(@_);
6300 = **= += *= &= <<= &&=
6301 -= /= |= >>= ||= //=
6305 @is_assignment{@_} = (1) x scalar(@_);
6315 @is_keyword_returning_list{@_} = (1) x scalar(@_);
6317 @_ = qw(is if unless and or err last next redo return);
6318 @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
6320 @_ = qw(last next redo return);
6321 @is_last_next_redo_return{@_} = (1) x scalar(@_);
6323 @_ = qw(sort map grep);
6324 @is_sort_map_grep{@_} = (1) x scalar(@_);
6326 @_ = qw(sort map grep eval);
6327 @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
6329 @_ = qw(sort map grep eval do);
6330 @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
6333 @is_if_unless{@_} = (1) x scalar(@_);
6335 @_ = qw(and or err);
6336 @is_and_or{@_} = (1) x scalar(@_);
6338 # Identify certain operators which often occur in chains.
6339 # Note: the minus (-) causes a side effect of padding of the first line in
6340 # something like this (by sub set_logical_padding):
6341 # Checkbutton => 'Transmission checked',
6342 # -variable => \$TRANS
6343 # This usually improves appearance so it seems ok.
6344 @_ = qw(&& || and or : ? . + - * /);
6345 @is_chain_operator{@_} = (1) x scalar(@_);
6347 # We can remove semicolons after blocks preceded by these keywords
6349 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6350 unless while until for foreach given when default);
6351 @is_block_without_semicolon{@_} = (1) x scalar(@_);
6353 # We will allow semicolons to be added within these block types
6354 # as well as sub and package blocks.
6356 # 1. Note that these keywords are omitted:
6357 # switch case given when default sort map grep
6358 # 2. It is also ok to add for sub and package blocks and a labeled block
6359 # 3. But not okay for other perltidy types including:
6361 # 4. Test files: blktype.t, blktype1.t, semicolon.t
6363 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6364 unless do while until eval for foreach );
6365 @ok_to_add_semicolon_for_block_type{@_} = (1) x scalar(@_);
6367 # 'L' is token for opening { at hash key
6369 @is_opening_type{@_} = (1) x scalar(@_);
6371 # 'R' is token for closing } at hash key
6373 @is_closing_type{@_} = (1) x scalar(@_);
6376 @is_opening_token{@_} = (1) x scalar(@_);
6379 @is_closing_token{@_} = (1) x scalar(@_);
6381 # Patterns for standardizing matches to block types for regular subs and
6382 # anonymous subs. Examples
6383 # 'sub process' is a named sub
6384 # 'sub ::m' is a named sub
6385 # 'sub' is an anonymous sub
6386 # 'sub:' is a label, not a sub
6387 # 'substr' is a keyword
6388 $SUB_PATTERN = '^sub\s+(::|\w)';
6389 $ASUB_PATTERN = '^sub$';
6393 use constant WS_YES => 1;
6394 use constant WS_OPTIONAL => 0;
6395 use constant WS_NO => -1;
6397 # Token bond strengths.
6398 use constant NO_BREAK => 10000;
6399 use constant VERY_STRONG => 100;
6400 use constant STRONG => 2.1;
6401 use constant NOMINAL => 1.1;
6402 use constant WEAK => 0.8;
6403 use constant VERY_WEAK => 0.55;
6405 # values for testing indexes in output array
6406 use constant UNDEFINED_INDEX => -1;
6408 # Maximum number of little messages; probably need not be changed.
6409 use constant MAX_NAG_MESSAGES => 6;
6411 # increment between sequence numbers for each type
6412 # For example, ?: pairs might have numbers 7,11,15,...
6413 use constant TYPE_SEQUENCE_INCREMENT => 4;
6417 # methods to count instances
6419 sub get_count { $_count; }
6420 sub _increment_count { ++$_count }
6421 sub _decrement_count { --$_count }
6426 # trim leading and trailing whitespace from a string
6435 $max = ( $max < $_ ) ? $_ : $max;
6443 $min = ( $min > $_ ) ? $_ : $min;
6450 # given a string containing words separated by whitespace,
6451 # return the list of words
6456 return split( /\s+/, $str );
6459 # interface to Perl::Tidy::Logger routines
6461 if ($logger_object) {
6462 $logger_object->warning(@_);
6467 if ($logger_object) {
6468 $logger_object->complain(@_);
6472 sub write_logfile_entry {
6473 if ($logger_object) {
6474 $logger_object->write_logfile_entry(@_);
6479 if ($logger_object) {
6480 $logger_object->black_box(@_);
6484 sub report_definite_bug {
6485 if ($logger_object) {
6486 $logger_object->report_definite_bug();
6490 sub get_saw_brace_error {
6491 if ($logger_object) {
6492 $logger_object->get_saw_brace_error();
6496 sub we_are_at_the_last_line {
6497 if ($logger_object) {
6498 $logger_object->we_are_at_the_last_line();
6502 # interface to Perl::Tidy::Diagnostics routine
6503 sub write_diagnostics {
6505 if ($diagnostics_object) {
6506 $diagnostics_object->write_diagnostics(@_);
6510 sub get_added_semicolon_count {
6512 return $added_semicolon_count;
6516 $_[0]->_decrement_count();
6523 # we are given an object with a write_line() method to take lines
6525 sink_object => undef,
6526 diagnostics_object => undef,
6527 logger_object => undef,
6529 my %args = ( %defaults, @_ );
6531 $logger_object = $args{logger_object};
6532 $diagnostics_object = $args{diagnostics_object};
6534 # we create another object with a get_line() and peek_ahead() method
6535 my $sink_object = $args{sink_object};
6536 $file_writer_object =
6537 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6539 # initialize the leading whitespace stack to negative levels
6540 # so that we can never run off the end of the stack
6541 $gnu_position_predictor = 0; # where the current token is predicted to be
6542 $max_gnu_stack_index = 0;
6543 $max_gnu_item_index = -1;
6544 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6545 @gnu_item_list = ();
6546 $last_output_indentation = 0;
6547 $last_indentation_written = 0;
6548 $last_unadjusted_indentation = 0;
6549 $last_leading_token = "";
6550 $last_output_short_opening_token = 0;
6552 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6553 $saw_END_or_DATA_ = 0;
6555 @block_type_to_go = ();
6556 @type_sequence_to_go = ();
6557 @container_environment_to_go = ();
6558 @bond_strength_to_go = ();
6559 @forced_breakpoint_to_go = ();
6560 @summed_lengths_to_go = (); # line length to start of ith token
6561 @token_lengths_to_go = ();
6563 @matching_token_to_go = ();
6564 @mate_index_to_go = ();
6565 @nesting_blocks_to_go = ();
6566 @ci_levels_to_go = ();
6567 @nesting_depth_to_go = (0);
6568 @nobreak_to_go = ();
6569 @old_breakpoint_to_go = ();
6572 @leading_spaces_to_go = ();
6573 @reduced_spaces_to_go = ();
6577 @whitespace_level_stack = ();
6578 $whitespace_last_level = -1;
6581 @has_broken_sublist = ();
6582 @want_comma_break = ();
6585 $first_tabbing_disagreement = 0;
6586 $last_tabbing_disagreement = 0;
6587 $tabbing_disagreement_count = 0;
6588 $in_tabbing_disagreement = 0;
6589 $input_line_tabbing = undef;
6591 $last_line_type = "";
6592 $last_last_line_leading_level = 0;
6593 $last_line_leading_level = 0;
6594 $last_line_leading_type = '#';
6596 $last_nonblank_token = ';';
6597 $last_nonblank_type = ';';
6598 $last_last_nonblank_token = ';';
6599 $last_last_nonblank_type = ';';
6600 $last_nonblank_block_type = "";
6601 $last_output_level = 0;
6602 $looking_for_else = 0;
6603 $embedded_tab_count = 0;
6604 $first_embedded_tab_at = 0;
6605 $last_embedded_tab_at = 0;
6606 $deleted_semicolon_count = 0;
6607 $first_deleted_semicolon_at = 0;
6608 $last_deleted_semicolon_at = 0;
6609 $added_semicolon_count = 0;
6610 $first_added_semicolon_at = 0;
6611 $last_added_semicolon_at = 0;
6612 $last_line_had_side_comment = 0;
6613 $is_static_block_comment = 0;
6614 %postponed_breakpoint = ();
6616 # variables for adding side comments
6617 %block_leading_text = ();
6618 %block_opening_line_number = ();
6619 $csc_new_statement_ok = 1;
6620 %csc_block_label = ();
6622 %saved_opening_indentation = ();
6623 $in_format_skipping_section = 0;
6625 reset_block_text_accumulator();
6627 prepare_for_new_input_lines();
6629 $vertical_aligner_object =
6630 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6631 $logger_object, $diagnostics_object );
6633 if ( $rOpts->{'entab-leading-whitespace'} ) {
6634 write_logfile_entry(
6635 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6638 elsif ( $rOpts->{'tabs'} ) {
6639 write_logfile_entry("Indentation will be with a tab character\n");
6642 write_logfile_entry(
6643 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6646 # This was the start of a formatter referent, but object-oriented
6647 # coding has turned out to be too slow here.
6648 $formatter_self = {};
6650 bless $formatter_self, $class;
6652 # Safety check..this is not a class yet
6653 if ( _increment_count() > 1 ) {
6655 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
6657 return $formatter_self;
6660 sub prepare_for_new_input_lines {
6662 $gnu_sequence_number++; # increment output batch counter
6663 %last_gnu_equals = ();
6664 %gnu_comma_count = ();
6665 %gnu_arrow_count = ();
6666 $line_start_index_to_go = 0;
6667 $max_gnu_item_index = UNDEFINED_INDEX;
6668 $index_max_forced_break = UNDEFINED_INDEX;
6669 $max_index_to_go = UNDEFINED_INDEX;
6670 $last_nonblank_index_to_go = UNDEFINED_INDEX;
6671 $last_nonblank_type_to_go = '';
6672 $last_nonblank_token_to_go = '';
6673 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6674 $last_last_nonblank_type_to_go = '';
6675 $last_last_nonblank_token_to_go = '';
6676 $forced_breakpoint_count = 0;
6677 $forced_breakpoint_undo_count = 0;
6678 $rbrace_follower = undef;
6679 $summed_lengths_to_go[0] = 0;
6680 $old_line_count_in_batch = 1;
6681 $comma_count_in_batch = 0;
6682 $starting_in_quote = 0;
6684 destroy_one_line_block();
6690 my ($line_of_tokens) = @_;
6692 my $line_type = $line_of_tokens->{_line_type};
6693 my $input_line = $line_of_tokens->{_line_text};
6695 if ( $rOpts->{notidy} ) {
6696 write_unindented_line($input_line);
6697 $last_line_type = $line_type;
6701 # _line_type codes are:
6702 # SYSTEM - system-specific code before hash-bang line
6703 # CODE - line of perl code (including comments)
6704 # POD_START - line starting pod, such as '=head'
6705 # POD - pod documentation text
6706 # POD_END - last line of pod section, '=cut'
6707 # HERE - text of here-document
6708 # HERE_END - last line of here-doc (target word)
6709 # FORMAT - format section
6710 # FORMAT_END - last line of format section, '.'
6711 # DATA_START - __DATA__ line
6712 # DATA - unidentified text following __DATA__
6713 # END_START - __END__ line
6714 # END - unidentified text following __END__
6715 # ERROR - we are in big trouble, probably not a perl script
6717 # put a blank line after an =cut which comes before __END__ and __DATA__
6718 # (required by podchecker)
6719 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6720 $file_writer_object->reset_consecutive_blank_lines();
6721 if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6724 # handle line of code..
6725 if ( $line_type eq 'CODE' ) {
6727 # let logger see all non-blank lines of code
6728 if ( $input_line !~ /^\s*$/ ) {
6729 my $output_line_number =
6730 $vertical_aligner_object->get_output_line_number();
6731 black_box( $line_of_tokens, $output_line_number );
6733 print_line_of_tokens($line_of_tokens);
6736 # handle line of non-code..
6742 if ( $line_type =~ /^POD/ ) {
6744 # Pod docs should have a preceding blank line. But stay
6745 # out of __END__ and __DATA__ sections, because
6746 # the user may be using this section for any purpose whatsoever
6747 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6748 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
6749 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
6751 && $line_type eq 'POD_START'
6752 && !$saw_END_or_DATA_ )
6758 # leave the blank counters in a predictable state
6759 # after __END__ or __DATA__
6760 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6761 $file_writer_object->reset_consecutive_blank_lines();
6762 $saw_END_or_DATA_ = 1;
6765 # write unindented non-code line
6766 if ( !$skip_line ) {
6767 if ($tee_line) { $file_writer_object->tee_on() }
6768 write_unindented_line($input_line);
6769 if ($tee_line) { $file_writer_object->tee_off() }
6772 $last_line_type = $line_type;
6775 sub create_one_line_block {
6776 $index_start_one_line_block = $_[0];
6777 $semicolons_before_block_self_destruct = $_[1];
6780 sub destroy_one_line_block {
6781 $index_start_one_line_block = UNDEFINED_INDEX;
6782 $semicolons_before_block_self_destruct = 0;
6785 sub leading_spaces_to_go {
6787 # return the number of indentation spaces for a token in the output stream;
6788 # these were previously stored by 'set_leading_whitespace'.
6791 if ( $ii < 0 ) { $ii = 0 }
6792 return get_SPACES( $leading_spaces_to_go[$ii] );
6798 # return the number of leading spaces associated with an indentation
6799 # variable $indentation is either a constant number of spaces or an object
6800 # with a get_SPACES method.
6801 my $indentation = shift;
6802 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6805 sub get_RECOVERABLE_SPACES {
6807 # return the number of spaces (+ means shift right, - means shift left)
6808 # that we would like to shift a group of lines with the same indentation
6809 # to get them to line up with their opening parens
6810 my $indentation = shift;
6811 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6814 sub get_AVAILABLE_SPACES_to_go {
6816 my $item = $leading_spaces_to_go[ $_[0] ];
6818 # return the number of available leading spaces associated with an
6819 # indentation variable. $indentation is either a constant number of
6820 # spaces or an object with a get_AVAILABLE_SPACES method.
6821 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6824 sub new_lp_indentation_item {
6826 # this is an interface to the IndentationItem class
6827 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6829 # A negative level implies not to store the item in the item_list
6831 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6833 my $item = Perl::Tidy::IndentationItem->new(
6835 $ci_level, $available_spaces,
6836 $index, $gnu_sequence_number,
6837 $align_paren, $max_gnu_stack_index,
6838 $line_start_index_to_go,
6841 if ( $level >= 0 ) {
6842 $gnu_item_list[$max_gnu_item_index] = $item;
6848 sub set_leading_whitespace {
6850 # This routine defines leading whitespace
6851 # given: the level and continuation_level of a token,
6852 # define: space count of leading string which would apply if it
6853 # were the first token of a new line.
6855 my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
6857 # Adjust levels if necessary to recycle whitespace:
6858 # given $level_abs, the absolute level
6859 # define $level, a possibly reduced level for whitespace
6860 my $level = $level_abs;
6861 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
6862 if ( $level_abs < $whitespace_last_level ) {
6863 pop(@whitespace_level_stack);
6865 if ( !@whitespace_level_stack ) {
6866 push @whitespace_level_stack, $level_abs;
6868 elsif ( $level_abs > $whitespace_last_level ) {
6869 $level = $whitespace_level_stack[-1] +
6870 ( $level_abs - $whitespace_last_level );
6873 # 1 Try to break at a block brace
6875 $level > $rOpts_whitespace_cycle
6876 && $last_nonblank_type eq '{'
6877 && $last_nonblank_token eq '{'
6880 # 2 Then either a brace or bracket
6881 || ( $level > $rOpts_whitespace_cycle + 1
6882 && $last_nonblank_token =~ /^[\{\[]$/ )
6884 # 3 Then a paren too
6885 || $level > $rOpts_whitespace_cycle + 2
6890 push @whitespace_level_stack, $level;
6892 $level = $whitespace_level_stack[-1];
6894 $whitespace_last_level = $level_abs;
6896 # modify for -bli, which adds one continuation indentation for
6898 if ( $rOpts_brace_left_and_indent
6899 && $max_index_to_go == 0
6900 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6905 # patch to avoid trouble when input file has negative indentation.
6906 # other logic should catch this error.
6907 if ( $level < 0 ) { $level = 0 }
6909 #-------------------------------------------
6910 # handle the standard indentation scheme
6911 #-------------------------------------------
6912 unless ($rOpts_line_up_parentheses) {
6914 $ci_level * $rOpts_continuation_indentation +
6915 $level * $rOpts_indent_columns;
6917 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6919 if ($in_continued_quote) {
6923 $leading_spaces_to_go[$max_index_to_go] = $space_count;
6924 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6928 #-------------------------------------------------------------
6929 # handle case of -lp indentation..
6930 #-------------------------------------------------------------
6932 # The continued_quote flag means that this is the first token of a
6933 # line, and it is the continuation of some kind of multi-line quote
6934 # or pattern. It requires special treatment because it must have no
6935 # added leading whitespace. So we create a special indentation item
6936 # which is not in the stack.
6937 if ($in_continued_quote) {
6938 my $space_count = 0;
6939 my $available_space = 0;
6940 $level = -1; # flag to prevent storing in item_list
6941 $leading_spaces_to_go[$max_index_to_go] =
6942 $reduced_spaces_to_go[$max_index_to_go] =
6943 new_lp_indentation_item( $space_count, $level, $ci_level,
6944 $available_space, 0 );
6948 # get the top state from the stack
6949 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6950 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6951 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6953 my $type = $types_to_go[$max_index_to_go];
6954 my $token = $tokens_to_go[$max_index_to_go];
6955 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6957 if ( $type eq '{' || $type eq '(' ) {
6959 $gnu_comma_count{ $total_depth + 1 } = 0;
6960 $gnu_arrow_count{ $total_depth + 1 } = 0;
6962 # If we come to an opening token after an '=' token of some type,
6963 # see if it would be helpful to 'break' after the '=' to save space
6964 my $last_equals = $last_gnu_equals{$total_depth};
6965 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6967 # find the position if we break at the '='
6968 my $i_test = $last_equals;
6969 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6972 ##my $too_close = ($i_test==$max_index_to_go-1);
6974 my $test_position = total_line_length( $i_test, $max_index_to_go );
6975 my $mll = maximum_line_length($i_test);
6979 # the equals is not just before an open paren (testing)
6982 # if we are beyond the midpoint
6983 $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
6985 # or we are beyond the 1/4 point and there was an old
6986 # break at the equals
6988 $gnu_position_predictor >
6989 $mll - $rOpts_maximum_line_length * 3 / 4
6991 $old_breakpoint_to_go[$last_equals]
6992 || ( $last_equals > 0
6993 && $old_breakpoint_to_go[ $last_equals - 1 ] )
6994 || ( $last_equals > 1
6995 && $types_to_go[ $last_equals - 1 ] eq 'b'
6996 && $old_breakpoint_to_go[ $last_equals - 2 ] )
7002 # then make the switch -- note that we do not set a real
7003 # breakpoint here because we may not really need one; sub
7004 # scan_list will do that if necessary
7005 $line_start_index_to_go = $i_test + 1;
7006 $gnu_position_predictor = $test_position;
7012 maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
7014 # Check for decreasing depth ..
7015 # Note that one token may have both decreasing and then increasing
7016 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
7017 # in this example we would first go back to (1,0) then up to (2,0)
7019 if ( $level < $current_level || $ci_level < $current_ci_level ) {
7021 # loop to find the first entry at or completely below this level
7022 my ( $lev, $ci_lev );
7024 if ($max_gnu_stack_index) {
7026 # save index of token which closes this level
7027 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
7029 # Undo any extra indentation if we saw no commas
7030 my $available_spaces =
7031 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
7033 my $comma_count = 0;
7034 my $arrow_count = 0;
7035 if ( $type eq '}' || $type eq ')' ) {
7036 $comma_count = $gnu_comma_count{$total_depth};
7037 $arrow_count = $gnu_arrow_count{$total_depth};
7038 $comma_count = 0 unless $comma_count;
7039 $arrow_count = 0 unless $arrow_count;
7041 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
7042 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
7044 if ( $available_spaces > 0 ) {
7046 if ( $comma_count <= 0 || $arrow_count > 0 ) {
7048 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
7050 $gnu_stack[$max_gnu_stack_index]
7051 ->get_SEQUENCE_NUMBER();
7053 # Be sure this item was created in this batch. This
7054 # should be true because we delete any available
7055 # space from open items at the end of each batch.
7056 if ( $gnu_sequence_number != $seqno
7057 || $i > $max_gnu_item_index )
7060 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
7062 report_definite_bug();
7066 if ( $arrow_count == 0 ) {
7068 ->permanently_decrease_AVAILABLE_SPACES(
7073 ->tentatively_decrease_AVAILABLE_SPACES(
7080 $j <= $max_gnu_item_index ;
7085 ->decrease_SPACES($available_spaces);
7092 --$max_gnu_stack_index;
7093 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
7094 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
7096 # stop when we reach a level at or below the current level
7097 if ( $lev <= $level && $ci_lev <= $ci_level ) {
7099 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
7100 $current_level = $lev;
7101 $current_ci_level = $ci_lev;
7106 # reached bottom of stack .. should never happen because
7107 # only negative levels can get here, and $level was forced
7108 # to be positive above.
7111 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
7113 report_definite_bug();
7119 # handle increasing depth
7120 if ( $level > $current_level || $ci_level > $current_ci_level ) {
7122 # Compute the standard incremental whitespace. This will be
7123 # the minimum incremental whitespace that will be used. This
7124 # choice results in a smooth transition between the gnu-style
7125 # and the standard style.
7126 my $standard_increment =
7127 ( $level - $current_level ) * $rOpts_indent_columns +
7128 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
7130 # Now we have to define how much extra incremental space
7131 # ("$available_space") we want. This extra space will be
7132 # reduced as necessary when long lines are encountered or when
7133 # it becomes clear that we do not have a good list.
7134 my $available_space = 0;
7135 my $align_paren = 0;
7138 # initialization on empty stack..
7139 if ( $max_gnu_stack_index == 0 ) {
7140 $space_count = $level * $rOpts_indent_columns;
7143 # if this is a BLOCK, add the standard increment
7144 elsif ($last_nonblank_block_type) {
7145 $space_count += $standard_increment;
7148 # if last nonblank token was not structural indentation,
7149 # just use standard increment
7150 elsif ( $last_nonblank_type ne '{' ) {
7151 $space_count += $standard_increment;
7154 # otherwise use the space to the first non-blank level change token
7157 $space_count = $gnu_position_predictor;
7159 my $min_gnu_indentation =
7160 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
7162 $available_space = $space_count - $min_gnu_indentation;
7163 if ( $available_space >= $standard_increment ) {
7164 $min_gnu_indentation += $standard_increment;
7166 elsif ( $available_space > 1 ) {
7167 $min_gnu_indentation += $available_space + 1;
7169 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
7170 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
7171 $min_gnu_indentation += 2;
7174 $min_gnu_indentation += 1;
7178 $min_gnu_indentation += $standard_increment;
7180 $available_space = $space_count - $min_gnu_indentation;
7182 if ( $available_space < 0 ) {
7183 $space_count = $min_gnu_indentation;
7184 $available_space = 0;
7189 # update state, but not on a blank token
7190 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
7192 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
7194 ++$max_gnu_stack_index;
7195 $gnu_stack[$max_gnu_stack_index] =
7196 new_lp_indentation_item( $space_count, $level, $ci_level,
7197 $available_space, $align_paren );
7199 # If the opening paren is beyond the half-line length, then
7200 # we will use the minimum (standard) indentation. This will
7201 # help avoid problems associated with running out of space
7202 # near the end of a line. As a result, in deeply nested
7203 # lists, there will be some indentations which are limited
7204 # to this minimum standard indentation. But the most deeply
7205 # nested container will still probably be able to shift its
7206 # parameters to the right for proper alignment, so in most
7207 # cases this will not be noticeable.
7208 if ( $available_space > 0 && $space_count > $halfway ) {
7209 $gnu_stack[$max_gnu_stack_index]
7210 ->tentatively_decrease_AVAILABLE_SPACES($available_space);
7215 # Count commas and look for non-list characters. Once we see a
7216 # non-list character, we give up and don't look for any more commas.
7217 if ( $type eq '=>' ) {
7218 $gnu_arrow_count{$total_depth}++;
7220 # tentatively treating '=>' like '=' for estimating breaks
7221 # TODO: this could use some experimentation
7222 $last_gnu_equals{$total_depth} = $max_index_to_go;
7225 elsif ( $type eq ',' ) {
7226 $gnu_comma_count{$total_depth}++;
7229 elsif ( $is_assignment{$type} ) {
7230 $last_gnu_equals{$total_depth} = $max_index_to_go;
7233 # this token might start a new line
7234 # if this is a non-blank..
7235 if ( $type ne 'b' ) {
7240 # this is the first nonblank token of the line
7241 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
7243 # or previous character was one of these:
7244 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
7246 # or previous character was opening and this does not close it
7247 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
7248 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
7250 # or this token is one of these:
7251 || $type =~ /^([\.]|\|\||\&\&)$/
7253 # or this is a closing structure
7254 || ( $last_nonblank_type_to_go eq '}'
7255 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
7257 # or previous token was keyword 'return'
7258 || ( $last_nonblank_type_to_go eq 'k'
7259 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
7261 # or starting a new line at certain keywords is fine
7263 && $is_if_unless_and_or_last_next_redo_return{$token} )
7265 # or this is after an assignment after a closing structure
7267 $is_assignment{$last_nonblank_type_to_go}
7269 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
7271 # and it is significantly to the right
7272 || $gnu_position_predictor > $halfway
7277 check_for_long_gnu_style_lines();
7278 $line_start_index_to_go = $max_index_to_go;
7280 # back up 1 token if we want to break before that type
7281 # otherwise, we may strand tokens like '?' or ':' on a line
7282 if ( $line_start_index_to_go > 0 ) {
7283 if ( $last_nonblank_type_to_go eq 'k' ) {
7285 if ( $want_break_before{$last_nonblank_token_to_go} ) {
7286 $line_start_index_to_go--;
7289 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
7290 $line_start_index_to_go--;
7296 # remember the predicted position of this token on the output line
7297 if ( $max_index_to_go > $line_start_index_to_go ) {
7298 $gnu_position_predictor =
7299 total_line_length( $line_start_index_to_go, $max_index_to_go );
7302 $gnu_position_predictor =
7303 $space_count + $token_lengths_to_go[$max_index_to_go];
7306 # store the indentation object for this token
7307 # this allows us to manipulate the leading whitespace
7308 # (in case we have to reduce indentation to fit a line) without
7309 # having to change any token values
7310 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
7311 $reduced_spaces_to_go[$max_index_to_go] =
7312 ( $max_gnu_stack_index > 0 && $ci_level )
7313 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
7314 : $gnu_stack[$max_gnu_stack_index];
7318 sub check_for_long_gnu_style_lines {
7320 # look at the current estimated maximum line length, and
7321 # remove some whitespace if it exceeds the desired maximum
7323 # this is only for the '-lp' style
7324 return unless ($rOpts_line_up_parentheses);
7326 # nothing can be done if no stack items defined for this line
7327 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7329 # see if we have exceeded the maximum desired line length
7330 # keep 2 extra free because they are needed in some cases
7331 # (result of trial-and-error testing)
7333 $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
7335 return if ( $spaces_needed <= 0 );
7337 # We are over the limit, so try to remove a requested number of
7338 # spaces from leading whitespace. We are only allowed to remove
7339 # from whitespace items created on this batch, since others have
7340 # already been used and cannot be undone.
7341 my @candidates = ();
7344 # loop over all whitespace items created for the current batch
7345 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7346 my $item = $gnu_item_list[$i];
7348 # item must still be open to be a candidate (otherwise it
7349 # cannot influence the current token)
7350 next if ( $item->get_CLOSED() >= 0 );
7352 my $available_spaces = $item->get_AVAILABLE_SPACES();
7354 if ( $available_spaces > 0 ) {
7355 push( @candidates, [ $i, $available_spaces ] );
7359 return unless (@candidates);
7361 # sort by available whitespace so that we can remove whitespace
7362 # from the maximum available first
7363 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
7365 # keep removing whitespace until we are done or have no more
7367 foreach $candidate (@candidates) {
7368 my ( $i, $available_spaces ) = @{$candidate};
7369 my $deleted_spaces =
7370 ( $available_spaces > $spaces_needed )
7372 : $available_spaces;
7374 # remove the incremental space from this item
7375 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
7379 # update the leading whitespace of this item and all items
7380 # that came after it
7381 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
7383 my $old_spaces = $gnu_item_list[$i]->get_SPACES();
7384 if ( $old_spaces >= $deleted_spaces ) {
7385 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
7388 # shouldn't happen except for code bug:
7390 my $level = $gnu_item_list[$i_debug]->get_LEVEL();
7391 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();
7392 my $old_level = $gnu_item_list[$i]->get_LEVEL();
7393 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
7395 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
7397 report_definite_bug();
7400 $gnu_position_predictor -= $deleted_spaces;
7401 $spaces_needed -= $deleted_spaces;
7402 last unless ( $spaces_needed > 0 );
7406 sub finish_lp_batch {
7408 # This routine is called once after each output stream batch is
7409 # finished to undo indentation for all incomplete -lp
7410 # indentation levels. It is too risky to leave a level open,
7411 # because then we can't backtrack in case of a long line to follow.
7412 # This means that comments and blank lines will disrupt this
7413 # indentation style. But the vertical aligner may be able to
7414 # get the space back if there are side comments.
7416 # this is only for the 'lp' style
7417 return unless ($rOpts_line_up_parentheses);
7419 # nothing can be done if no stack items defined for this line
7420 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7422 # loop over all whitespace items created for the current batch
7424 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7425 my $item = $gnu_item_list[$i];
7427 # only look for open items
7428 next if ( $item->get_CLOSED() >= 0 );
7430 # Tentatively remove all of the available space
7431 # (The vertical aligner will try to get it back later)
7432 my $available_spaces = $item->get_AVAILABLE_SPACES();
7433 if ( $available_spaces > 0 ) {
7435 # delete incremental space for this item
7437 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
7439 # Reduce the total indentation space of any nodes that follow
7440 # Note that any such nodes must necessarily be dependents
7442 foreach ( $i + 1 .. $max_gnu_item_index ) {
7443 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
7450 sub reduce_lp_indentation {
7452 # reduce the leading whitespace at token $i if possible by $spaces_needed
7453 # (a large value of $spaces_needed will remove all excess space)
7454 # NOTE: to be called from scan_list only for a sequence of tokens
7455 # contained between opening and closing parens/braces/brackets
7457 my ( $i, $spaces_wanted ) = @_;
7458 my $deleted_spaces = 0;
7460 my $item = $leading_spaces_to_go[$i];
7461 my $available_spaces = $item->get_AVAILABLE_SPACES();
7464 $available_spaces > 0
7465 && ( ( $spaces_wanted <= $available_spaces )
7466 || !$item->get_HAVE_CHILD() )
7470 # we'll remove these spaces, but mark them as recoverable
7472 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
7475 return $deleted_spaces;
7478 sub token_sequence_length {
7480 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
7481 # returns 0 if $ibeg > $iend (shouldn't happen)
7482 my ( $ibeg, $iend ) = @_;
7483 return 0 if ( $iend < 0 || $ibeg > $iend );
7484 return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
7485 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
7488 sub total_line_length {
7490 # return length of a line of tokens ($ibeg .. $iend)
7491 my ( $ibeg, $iend ) = @_;
7492 return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
7495 sub maximum_line_length_for_level {
7497 # return maximum line length for line starting with a given level
7498 my $maximum_line_length = $rOpts_maximum_line_length;
7500 # Modify if -vmll option is selected
7501 if ($rOpts_variable_maximum_line_length) {
7503 if ( $level < 0 ) { $level = 0 }
7504 $maximum_line_length += $level * $rOpts_indent_columns;
7506 return $maximum_line_length;
7509 sub maximum_line_length {
7511 # return maximum line length for line starting with the token at given index
7512 return maximum_line_length_for_level( $levels_to_go[ $_[0] ] );
7516 sub excess_line_length {
7518 # return number of characters by which a line of tokens ($ibeg..$iend)
7519 # exceeds the allowable line length.
7520 my ( $ibeg, $iend ) = @_;
7521 return total_line_length( $ibeg, $iend ) - maximum_line_length($ibeg);
7524 sub finish_formatting {
7526 # flush buffer and write any informative messages
7530 $file_writer_object->decrement_output_line_number()
7531 ; # fix up line number since it was incremented
7532 we_are_at_the_last_line();
7533 if ( $added_semicolon_count > 0 ) {
7534 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
7536 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
7537 write_logfile_entry("$added_semicolon_count $what added:\n");
7538 write_logfile_entry(
7539 " $first at input line $first_added_semicolon_at\n");
7541 if ( $added_semicolon_count > 1 ) {
7542 write_logfile_entry(
7543 " Last at input line $last_added_semicolon_at\n");
7545 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
7546 write_logfile_entry("\n");
7549 if ( $deleted_semicolon_count > 0 ) {
7550 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
7552 ( $deleted_semicolon_count > 1 )
7555 write_logfile_entry(
7556 "$deleted_semicolon_count unnecessary $what deleted:\n");
7557 write_logfile_entry(
7558 " $first at input line $first_deleted_semicolon_at\n");
7560 if ( $deleted_semicolon_count > 1 ) {
7561 write_logfile_entry(
7562 " Last at input line $last_deleted_semicolon_at\n");
7564 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
7565 write_logfile_entry("\n");
7568 if ( $embedded_tab_count > 0 ) {
7569 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
7571 ( $embedded_tab_count > 1 )
7572 ? "quotes or patterns"
7573 : "quote or pattern";
7574 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
7575 write_logfile_entry(
7576 "This means the display of this script could vary with device or software\n"
7578 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
7580 if ( $embedded_tab_count > 1 ) {
7581 write_logfile_entry(
7582 " Last at input line $last_embedded_tab_at\n");
7584 write_logfile_entry("\n");
7587 if ($first_tabbing_disagreement) {
7588 write_logfile_entry(
7589 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
7593 if ($in_tabbing_disagreement) {
7594 write_logfile_entry(
7595 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
7600 if ($last_tabbing_disagreement) {
7602 write_logfile_entry(
7603 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
7607 write_logfile_entry("No indentation disagreement seen\n");
7610 if ($first_tabbing_disagreement) {
7611 write_logfile_entry(
7612 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
7615 write_logfile_entry("\n");
7617 $vertical_aligner_object->report_anything_unusual();
7619 $file_writer_object->report_line_length_errors();
7624 # This routine is called to check the Opts hash after it is defined
7628 make_static_block_comment_pattern();
7629 make_static_side_comment_pattern();
7630 make_closing_side_comment_prefix();
7631 make_closing_side_comment_list_pattern();
7632 $format_skipping_pattern_begin =
7633 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
7634 $format_skipping_pattern_end =
7635 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
7637 # If closing side comments ARE selected, then we can safely
7638 # delete old closing side comments unless closing side comment
7639 # warnings are requested. This is a good idea because it will
7640 # eliminate any old csc's which fall below the line count threshold.
7641 # We cannot do this if warnings are turned on, though, because we
7642 # might delete some text which has been added. So that must
7643 # be handled when comments are created.
7644 if ( $rOpts->{'closing-side-comments'} ) {
7645 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
7646 $rOpts->{'delete-closing-side-comments'} = 1;
7650 # If closing side comments ARE NOT selected, but warnings ARE
7651 # selected and we ARE DELETING csc's, then we will pretend to be
7652 # adding with a huge interval. This will force the comments to be
7653 # generated for comparison with the old comments, but not added.
7654 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
7655 if ( $rOpts->{'delete-closing-side-comments'} ) {
7656 $rOpts->{'delete-closing-side-comments'} = 0;
7657 $rOpts->{'closing-side-comments'} = 1;
7658 $rOpts->{'closing-side-comment-interval'} = 100000000;
7663 make_block_brace_vertical_tightness_pattern();
7664 make_blank_line_pattern();
7666 if ( $rOpts->{'line-up-parentheses'} ) {
7668 if ( $rOpts->{'indent-only'}
7669 || !$rOpts->{'add-newlines'}
7670 || !$rOpts->{'delete-old-newlines'} )
7672 Perl::Tidy::Warn <<EOM;
7673 -----------------------------------------------------------------------
7674 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
7676 The -lp indentation logic requires that perltidy be able to coordinate
7677 arbitrarily large numbers of line breakpoints. This isn't possible
7678 with these flags. Sometimes an acceptable workaround is to use -wocb=3
7679 -----------------------------------------------------------------------
7681 $rOpts->{'line-up-parentheses'} = 0;
7685 # At present, tabs are not compatible with the line-up-parentheses style
7686 # (it would be possible to entab the total leading whitespace
7687 # just prior to writing the line, if desired).
7688 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
7689 Perl::Tidy::Warn <<EOM;
7690 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
7692 $rOpts->{'tabs'} = 0;
7695 # Likewise, tabs are not compatible with outdenting..
7696 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
7697 Perl::Tidy::Warn <<EOM;
7698 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
7700 $rOpts->{'tabs'} = 0;
7703 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
7704 Perl::Tidy::Warn <<EOM;
7705 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
7707 $rOpts->{'tabs'} = 0;
7710 if ( !$rOpts->{'space-for-semicolon'} ) {
7711 $want_left_space{'f'} = -1;
7714 if ( $rOpts->{'space-terminal-semicolon'} ) {
7715 $want_left_space{';'} = 1;
7718 # implement outdenting preferences for keywords
7719 %outdent_keyword = ();
7720 unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
7721 @_ = qw(next last redo goto return); # defaults
7724 # FUTURE: if not a keyword, assume that it is an identifier
7726 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
7727 $outdent_keyword{$_} = 1;
7730 Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword";
7734 # implement user whitespace preferences
7735 if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
7736 @want_left_space{@_} = (1) x scalar(@_);
7739 if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
7740 @want_right_space{@_} = (1) x scalar(@_);
7743 if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
7744 @want_left_space{@_} = (-1) x scalar(@_);
7747 if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
7748 @want_right_space{@_} = (-1) x scalar(@_);
7750 if ( $rOpts->{'dump-want-left-space'} ) {
7751 dump_want_left_space(*STDOUT);
7755 if ( $rOpts->{'dump-want-right-space'} ) {
7756 dump_want_right_space(*STDOUT);
7760 # default keywords for which space is introduced before an opening paren
7761 # (at present, including them messes up vertical alignment)
7762 @_ = qw(my local our and or err eq ne if else elsif until
7763 unless while for foreach return switch case given when catch);
7764 @space_after_keyword{@_} = (1) x scalar(@_);
7766 # first remove any or all of these if desired
7767 if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7769 # -nsak='*' selects all the above keywords
7770 if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
7771 @space_after_keyword{@_} = (0) x scalar(@_);
7774 # then allow user to add to these defaults
7775 if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
7776 @space_after_keyword{@_} = (1) x scalar(@_);
7779 # implement user break preferences
7780 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7781 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7782 . : ? && || and or err xor
7785 my $break_after = sub {
7786 foreach my $tok (@_) {
7787 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
7788 my $lbs = $left_bond_strength{$tok};
7789 my $rbs = $right_bond_strength{$tok};
7790 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7791 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7797 my $break_before = sub {
7798 foreach my $tok (@_) {
7799 my $lbs = $left_bond_strength{$tok};
7800 my $rbs = $right_bond_strength{$tok};
7801 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7802 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7808 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7809 $break_before->(@all_operators)
7810 if ( $rOpts->{'break-before-all-operators'} );
7812 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7813 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7815 # make note if breaks are before certain key types
7816 %want_break_before = ();
7817 foreach my $tok ( @all_operators, ',' ) {
7818 $want_break_before{$tok} =
7819 $left_bond_strength{$tok} < $right_bond_strength{$tok};
7822 # Coordinate ?/: breaks, which must be similar
7823 if ( !$want_break_before{':'} ) {
7824 $want_break_before{'?'} = $want_break_before{':'};
7825 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7826 $left_bond_strength{'?'} = NO_BREAK;
7829 # Define here tokens which may follow the closing brace of a do statement
7830 # on the same line, as in:
7831 # } while ( $something);
7832 @_ = qw(until while unless if ; : );
7834 @is_do_follower{@_} = (1) x scalar(@_);
7836 # These tokens may follow the closing brace of an if or elsif block.
7837 # In other words, for cuddled else we want code to look like:
7838 # } elsif ( $something) {
7840 if ( $rOpts->{'cuddled-else'} ) {
7841 @_ = qw(else elsif);
7842 @is_if_brace_follower{@_} = (1) x scalar(@_);
7845 %is_if_brace_follower = ();
7848 # nothing can follow the closing curly of an else { } block:
7849 %is_else_brace_follower = ();
7851 # what can follow a multi-line anonymous sub definition closing curly:
7852 @_ = qw# ; : => or and && || ~~ !~~ ) #;
7854 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7856 # what can follow a one-line anonymous sub closing curly:
7857 # one-line anonymous subs also have ']' here...
7858 # see tk3.t and PP.pm
7859 @_ = qw# ; : => or and && || ) ] ~~ !~~ #;
7861 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7863 # What can follow a closing curly of a block
7864 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7865 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7866 @_ = qw# ; : => or and && || ) #;
7869 # allow cuddled continue if cuddled else is specified
7870 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7872 @is_other_brace_follower{@_} = (1) x scalar(@_);
7874 $right_bond_strength{'{'} = WEAK;
7875 $left_bond_strength{'{'} = VERY_STRONG;
7877 # make -l=0 equal to -l=infinite
7878 if ( !$rOpts->{'maximum-line-length'} ) {
7879 $rOpts->{'maximum-line-length'} = 1000000;
7882 # make -lbl=0 equal to -lbl=infinite
7883 if ( !$rOpts->{'long-block-line-count'} ) {
7884 $rOpts->{'long-block-line-count'} = 1000000;
7887 my $enc = $rOpts->{'character-encoding'};
7888 if ( $enc && $enc !~ /^(none|utf8)$/i ) {
7889 Perl::Tidy::Die <<EOM;
7890 Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
7894 my $ole = $rOpts->{'output-line-ending'};
7903 # Patch for RT #99514, a memoization issue.
7904 # Normally, the user enters one of 'dos', 'win', etc, and we change the
7905 # value in the options parameter to be the corresponding line ending
7906 # character. But, if we are using memoization, on later passes through
7907 # here the option parameter will already have the desired ending
7908 # character rather than the keyword 'dos', 'win', etc. So
7909 # we must check to see if conversion has already been done and, if so,
7910 # bypass the conversion step.
7911 my %endings_inverted = (
7912 "\015\012" => 'dos',
7913 "\015\012" => 'win',
7918 if ( defined( $endings_inverted{$ole} ) ) {
7920 # we already have valid line ending, nothing more to do
7924 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7925 my $str = join " ", keys %endings;
7926 Perl::Tidy::Die <<EOM;
7927 Unrecognized line ending '$ole'; expecting one of: $str
7930 if ( $rOpts->{'preserve-line-endings'} ) {
7931 Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
7932 $rOpts->{'preserve-line-endings'} = undef;
7937 # hashes used to simplify setting whitespace
7939 '{' => $rOpts->{'brace-tightness'},
7940 '}' => $rOpts->{'brace-tightness'},
7941 '(' => $rOpts->{'paren-tightness'},
7942 ')' => $rOpts->{'paren-tightness'},
7943 '[' => $rOpts->{'square-bracket-tightness'},
7944 ']' => $rOpts->{'square-bracket-tightness'},
7953 # frequently used parameters
7954 $rOpts_add_newlines = $rOpts->{'add-newlines'};
7955 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
7956 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7957 $rOpts_block_brace_vertical_tightness =
7958 $rOpts->{'block-brace-vertical-tightness'};
7959 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
7960 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7961 $rOpts_break_at_old_ternary_breakpoints =
7962 $rOpts->{'break-at-old-ternary-breakpoints'};
7963 $rOpts_break_at_old_attribute_breakpoints =
7964 $rOpts->{'break-at-old-attribute-breakpoints'};
7965 $rOpts_break_at_old_comma_breakpoints =
7966 $rOpts->{'break-at-old-comma-breakpoints'};
7967 $rOpts_break_at_old_keyword_breakpoints =
7968 $rOpts->{'break-at-old-keyword-breakpoints'};
7969 $rOpts_break_at_old_logical_breakpoints =
7970 $rOpts->{'break-at-old-logical-breakpoints'};
7971 $rOpts_closing_side_comment_else_flag =
7972 $rOpts->{'closing-side-comment-else-flag'};
7973 $rOpts_closing_side_comment_maximum_text =
7974 $rOpts->{'closing-side-comment-maximum-text'};
7975 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7976 $rOpts_cuddled_else = $rOpts->{'cuddled-else'};
7977 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
7978 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
7979 $rOpts_indent_columns = $rOpts->{'indent-columns'};
7980 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
7981 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7982 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
7983 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
7985 $rOpts_variable_maximum_line_length =
7986 $rOpts->{'variable-maximum-line-length'};
7987 $rOpts_short_concatenation_item_length =
7988 $rOpts->{'short-concatenation-item-length'};
7990 $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
7991 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
7992 $rOpts_format_skipping = $rOpts->{'format-skipping'};
7993 $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
7994 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
7995 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7996 $rOpts_ignore_side_comment_lengths =
7997 $rOpts->{'ignore-side-comment-lengths'};
7999 # Note that both opening and closing tokens can access the opening
8000 # and closing flags of their container types.
8001 %opening_vertical_tightness = (
8002 '(' => $rOpts->{'paren-vertical-tightness'},
8003 '{' => $rOpts->{'brace-vertical-tightness'},
8004 '[' => $rOpts->{'square-bracket-vertical-tightness'},
8005 ')' => $rOpts->{'paren-vertical-tightness'},
8006 '}' => $rOpts->{'brace-vertical-tightness'},
8007 ']' => $rOpts->{'square-bracket-vertical-tightness'},
8010 %closing_vertical_tightness = (
8011 '(' => $rOpts->{'paren-vertical-tightness-closing'},
8012 '{' => $rOpts->{'brace-vertical-tightness-closing'},
8013 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
8014 ')' => $rOpts->{'paren-vertical-tightness-closing'},
8015 '}' => $rOpts->{'brace-vertical-tightness-closing'},
8016 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
8019 $rOpts_tight_secret_operators = $rOpts->{'tight-secret-operators'};
8021 # assume flag for '>' same as ')' for closing qw quotes
8022 %closing_token_indentation = (
8023 ')' => $rOpts->{'closing-paren-indentation'},
8024 '}' => $rOpts->{'closing-brace-indentation'},
8025 ']' => $rOpts->{'closing-square-bracket-indentation'},
8026 '>' => $rOpts->{'closing-paren-indentation'},
8029 # flag indicating if any closing tokens are indented
8030 $some_closing_token_indentation =
8031 $rOpts->{'closing-paren-indentation'}
8032 || $rOpts->{'closing-brace-indentation'}
8033 || $rOpts->{'closing-square-bracket-indentation'}
8034 || $rOpts->{'indent-closing-brace'};
8036 %opening_token_right = (
8037 '(' => $rOpts->{'opening-paren-right'},
8038 '{' => $rOpts->{'opening-hash-brace-right'},
8039 '[' => $rOpts->{'opening-square-bracket-right'},
8042 %stack_opening_token = (
8043 '(' => $rOpts->{'stack-opening-paren'},
8044 '{' => $rOpts->{'stack-opening-hash-brace'},
8045 '[' => $rOpts->{'stack-opening-square-bracket'},
8048 %stack_closing_token = (
8049 ')' => $rOpts->{'stack-closing-paren'},
8050 '}' => $rOpts->{'stack-closing-hash-brace'},
8051 ']' => $rOpts->{'stack-closing-square-bracket'},
8053 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
8056 sub make_static_block_comment_pattern {
8058 # create the pattern used to identify static block comments
8059 $static_block_comment_pattern = '^\s*##';
8061 # allow the user to change it
8062 if ( $rOpts->{'static-block-comment-prefix'} ) {
8063 my $prefix = $rOpts->{'static-block-comment-prefix'};
8064 $prefix =~ s/^\s*//;
8065 my $pattern = $prefix;
8067 # user may give leading caret to force matching left comments only
8068 if ( $prefix !~ /^\^#/ ) {
8069 if ( $prefix !~ /^#/ ) {
8071 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
8073 $pattern = '^\s*' . $prefix;
8075 eval "'##'=~/$pattern/";
8078 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
8080 $static_block_comment_pattern = $pattern;
8084 sub make_format_skipping_pattern {
8085 my ( $opt_name, $default ) = @_;
8086 my $param = $rOpts->{$opt_name};
8087 unless ($param) { $param = $default }
8089 if ( $param !~ /^#/ ) {
8091 "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
8093 my $pattern = '^' . $param . '\s';
8094 eval "'#'=~/$pattern/";
8097 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
8102 sub make_closing_side_comment_list_pattern {
8104 # turn any input list into a regex for recognizing selected block types
8105 $closing_side_comment_list_pattern = '^\w+';
8106 if ( defined( $rOpts->{'closing-side-comment-list'} )
8107 && $rOpts->{'closing-side-comment-list'} )
8109 $closing_side_comment_list_pattern =
8110 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
8114 sub make_bli_pattern {
8116 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
8117 && $rOpts->{'brace-left-and-indent-list'} )
8119 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
8122 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
8125 sub make_block_brace_vertical_tightness_pattern {
8127 # turn any input list into a regex for recognizing selected block types
8128 $block_brace_vertical_tightness_pattern =
8129 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
8130 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
8131 && $rOpts->{'block-brace-vertical-tightness-list'} )
8133 $block_brace_vertical_tightness_pattern =
8134 make_block_pattern( '-bbvtl',
8135 $rOpts->{'block-brace-vertical-tightness-list'} );
8139 sub make_blank_line_pattern {
8141 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
8142 my $key = 'blank-lines-before-closing-block-list';
8143 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
8144 $blank_lines_before_closing_block_pattern =
8145 make_block_pattern( '-blbcl', $rOpts->{$key} );
8148 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
8149 $key = 'blank-lines-after-opening-block-list';
8150 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
8151 $blank_lines_after_opening_block_pattern =
8152 make_block_pattern( '-blaol', $rOpts->{$key} );
8156 sub make_block_pattern {
8158 # given a string of block-type keywords, return a regex to match them
8159 # The only tricky part is that labels are indicated with a single ':'
8160 # and the 'sub' token text may have additional text after it (name of
8165 # input string: "if else elsif unless while for foreach do : sub";
8166 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
8170 # To distinguish between anonymous subs and named subs, use 'sub' to
8171 # indicate a named sub, and 'asub' to indicate an anonymous sub
8173 my ( $abbrev, $string ) = @_;
8174 my @list = split_words($string);
8178 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
8181 if ( $i eq 'sub' ) {
8183 elsif ( $i eq 'asub' ) {
8185 elsif ( $i eq ';' ) {
8188 elsif ( $i eq '{' ) {
8191 elsif ( $i eq ':' ) {
8192 push @words, '\w+:';
8194 elsif ( $i =~ /^\w/ ) {
8199 "unrecognized block type $i after $abbrev, ignoring\n";
8202 my $pattern = '(' . join( '|', @words ) . ')$';
8203 my $sub_patterns = "";
8204 if ( $seen{'sub'} ) {
8205 $sub_patterns .= '|' . $SUB_PATTERN;
8207 if ( $seen{'asub'} ) {
8208 $sub_patterns .= '|' . $ASUB_PATTERN;
8210 if ($sub_patterns) {
8211 $pattern = '(' . $pattern . $sub_patterns . ')';
8213 $pattern = '^' . $pattern;
8217 sub make_static_side_comment_pattern {
8219 # create the pattern used to identify static side comments
8220 $static_side_comment_pattern = '^##';
8222 # allow the user to change it
8223 if ( $rOpts->{'static-side-comment-prefix'} ) {
8224 my $prefix = $rOpts->{'static-side-comment-prefix'};
8225 $prefix =~ s/^\s*//;
8226 my $pattern = '^' . $prefix;
8227 eval "'##'=~/$pattern/";
8230 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
8232 $static_side_comment_pattern = $pattern;
8236 sub make_closing_side_comment_prefix {
8238 # Be sure we have a valid closing side comment prefix
8239 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
8240 my $csc_prefix_pattern;
8241 if ( !defined($csc_prefix) ) {
8242 $csc_prefix = '## end';
8243 $csc_prefix_pattern = '^##\s+end';
8246 my $test_csc_prefix = $csc_prefix;
8247 if ( $test_csc_prefix !~ /^#/ ) {
8248 $test_csc_prefix = '#' . $test_csc_prefix;
8251 # make a regex to recognize the prefix
8252 my $test_csc_prefix_pattern = $test_csc_prefix;
8254 # escape any special characters
8255 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
8257 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
8259 # allow exact number of intermediate spaces to vary
8260 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
8262 # make sure we have a good pattern
8263 # if we fail this we probably have an error in escaping
8265 eval "'##'=~/$test_csc_prefix_pattern/";
8268 # shouldn't happen..must have screwed up escaping, above
8269 report_definite_bug();
8271 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
8273 # just warn and keep going with defaults
8274 Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n";
8276 "Using default -cscp instead; please check output\n";
8279 $csc_prefix = $test_csc_prefix;
8280 $csc_prefix_pattern = $test_csc_prefix_pattern;
8283 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
8284 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
8287 sub dump_want_left_space {
8291 These values are the main control of whitespace to the left of a token type;
8292 They may be altered with the -wls parameter.
8293 For a list of token types, use perltidy --dump-token-types (-dtt)
8294 1 means the token wants a space to its left
8295 -1 means the token does not want a space to its left
8296 ------------------------------------------------------------------------
8298 foreach ( sort keys %want_left_space ) {
8299 print $fh "$_\t$want_left_space{$_}\n";
8303 sub dump_want_right_space {
8307 These values are the main control of whitespace to the right of a token type;
8308 They may be altered with the -wrs parameter.
8309 For a list of token types, use perltidy --dump-token-types (-dtt)
8310 1 means the token wants a space to its right
8311 -1 means the token does not want a space to its right
8312 ------------------------------------------------------------------------
8314 foreach ( sort keys %want_right_space ) {
8315 print $fh "$_\t$want_right_space{$_}\n";
8319 { # begin is_essential_whitespace
8321 my %is_sort_grep_map;
8326 @_ = qw(sort grep map);
8327 @is_sort_grep_map{@_} = (1) x scalar(@_);
8329 @_ = qw(for foreach);
8330 @is_for_foreach{@_} = (1) x scalar(@_);
8334 sub is_essential_whitespace {
8336 # Essential whitespace means whitespace which cannot be safely deleted
8337 # without risking the introduction of a syntax error.
8338 # We are given three tokens and their types:
8339 # ($tokenl, $typel) is the token to the left of the space in question
8340 # ($tokenr, $typer) is the token to the right of the space in question
8341 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
8343 # This is a slow routine but is not needed too often except when -mangle
8346 # Note: This routine should almost never need to be changed. It is
8347 # for avoiding syntax problems rather than for formatting.
8348 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
8352 # never combine two bare words or numbers
8353 # examples: and ::ok(1)
8355 # for bla::bla:: abc
8356 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
8357 # $input eq"quit" to make $inputeq"quit"
8358 # my $size=-s::SINK if $file; <==OK but we won't do it
8359 # don't join something like: for bla::bla:: abc
8360 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
8361 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
8362 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
8364 # do not combine a number with a concatenation dot
8365 # example: pom.caputo:
8366 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
8367 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
8368 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
8370 # do not join a minus with a bare word, because you might form
8371 # a file test operator. Example from Complex.pm:
8372 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
8373 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
8375 # and something like this could become ambiguous without space
8377 # use constant III=>1;
8381 || ( ( $tokenl eq '-' )
8382 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
8384 # '= -' should not become =- or you will get a warning
8386 # || ($tokenr eq '-')
8388 # keep a space between a quote and a bareword to prevent the
8389 # bareword from becoming a quote modifier.
8390 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8392 # keep a space between a token ending in '$' and any word;
8393 # this caused trouble: "die @$ if $@"
8394 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
8395 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8397 # perl is very fussy about spaces before <<
8398 || ( $tokenr =~ /^\<\</ )
8400 # avoid combining tokens to create new meanings. Example:
8401 # $a+ +$b must not become $a++$b
8402 || ( $is_digraph{ $tokenl . $tokenr } )
8403 || ( $is_trigraph{ $tokenl . $tokenr } )
8405 # another example: do not combine these two &'s:
8406 # allow_options & &OPT_EXECCGI
8407 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
8409 # don't combine $$ or $# with any alphanumeric
8410 # (testfile mangle.t with --mangle)
8411 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
8413 # retain any space after possible filehandle
8414 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
8415 || ( $typel eq 'Z' )
8417 # Perl is sensitive to whitespace after the + here:
8418 # $b = xvals $a + 0.1 * yvals $a;
8419 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
8421 # keep paren separate in 'use Foo::Bar ()'
8425 && $tokenll eq 'use' )
8427 # keep any space between filehandle and paren:
8428 # file mangle.t with --mangle:
8429 || ( $typel eq 'Y' && $tokenr eq '(' )
8431 # retain any space after here doc operator ( hereerr.t)
8432 || ( $typel eq 'h' )
8434 # be careful with a space around ++ and --, to avoid ambiguity as to
8435 # which token it applies
8436 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
8437 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
8439 # need space after foreach my; for example, this will fail in
8440 # older versions of Perl:
8441 # foreach my$ft(@filetypes)...
8446 && $is_for_foreach{$tokenll}
8450 # must have space between grep and left paren; "grep(" will fail
8451 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
8453 # don't stick numbers next to left parens, as in:
8454 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
8455 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
8457 # We must be sure that a space between a ? and a quoted string
8458 # remains if the space before the ? remains. [Loca.pm, lockarea]
8460 # $b=join $comma ? ',' : ':', @_; # ok
8461 # $b=join $comma?',' : ':', @_; # ok!
8462 # $b=join $comma ?',' : ':', @_; # error!
8463 # Not really required:
8464 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
8466 # do not remove space between an '&' and a bare word because
8467 # it may turn into a function evaluation, like here
8468 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
8469 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
8470 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8472 # space stacked labels (TODO: check if really necessary)
8473 || ( $typel eq 'J' && $typer eq 'J' )
8475 ; # the value of this long logic sequence is the result we want
8481 my %secret_operators;
8482 my %is_leading_secret_token;
8486 # token lists for perl secret operators as compiled by Philippe Bruhat
8487 # at: https://metacpan.org/module/perlsecret
8488 %secret_operators = (
8489 'Goatse' => [qw#= ( ) =#], #=( )=
8490 'Venus1' => [qw#0 +#], # 0+
8491 'Venus2' => [qw#+ 0#], # +0
8492 'Enterprise' => [qw#) x ! !#], # ()x!!
8493 'Kite1' => [qw#~ ~ <>#], # ~~<>
8494 'Kite2' => [qw#~~ <>#], # ~~<>
8495 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
8498 # The following operators and constants are not included because they
8499 # are normally kept tight by perltidy:
8503 # Make a lookup table indexed by the first token of each operator:
8504 # first token => [list, list, ...]
8505 foreach my $value ( values(%secret_operators) ) {
8506 my $tok = $value->[0];
8507 push @{ $is_leading_secret_token{$tok} }, $value;
8511 sub secret_operator_whitespace {
8513 my ( $jmax, $rtokens, $rtoken_type, $rwhite_space_flag ) = @_;
8515 # Loop over all tokens in this line
8516 my ( $j, $token, $type );
8517 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8519 $token = $$rtokens[$j];
8520 $type = $$rtoken_type[$j];
8522 # Skip unless this token might start a secret operator
8523 next if ( $type eq 'b' );
8524 next unless ( $is_leading_secret_token{$token} );
8526 # Loop over all secret operators with this leading token
8527 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
8529 foreach my $tok ( @{$rpattern} ) {
8533 if ( $jend <= $jmax && $$rtoken_type[$jend] eq 'b' );
8534 if ( $jend > $jmax || $tok ne $$rtokens[$jend] ) {
8542 # set flags to prevent spaces within this operator
8543 for ( my $jj = $j + 1 ; $jj <= $jend ; $jj++ ) {
8544 $rwhite_space_flag->[$jj] = WS_NO;
8549 } ## End Loop over all operators
8550 } ## End loop over all tokens
8554 sub set_white_space_flag {
8556 # This routine examines each pair of nonblank tokens and
8557 # sets values for array @white_space_flag.
8559 # $white_space_flag[$j] is a flag indicating whether a white space
8560 # BEFORE token $j is needed, with the following values:
8562 # WS_NO = -1 do not want a space before token $j
8563 # WS_OPTIONAL= 0 optional space or $j is a whitespace
8564 # WS_YES = 1 want a space before token $j
8567 # The values for the first token will be defined based
8568 # upon the contents of the "to_go" output array.
8570 # Note: retain debug print statements because they are usually
8571 # required after adding new token types.
8575 # initialize these global hashes, which control the use of
8576 # whitespace around tokens:
8581 # %space_after_keyword
8583 # Many token types are identical to the tokens themselves.
8584 # See the tokenizer for a complete list. Here are some special types:
8586 # f = semicolon in for statement
8589 # Note that :: is excluded since it should be contained in an identifier
8590 # Note that '->' is excluded because it never gets space
8591 # parentheses and brackets are excluded since they are handled specially
8592 # curly braces are included but may be overridden by logic, such as
8595 # NEW_TOKENS: create a whitespace rule here. This can be as
8596 # simple as adding your new letter to @spaces_both_sides, for
8600 @is_opening_type{@_} = (1) x scalar(@_);
8603 @is_closing_type{@_} = (1) x scalar(@_);
8605 my @spaces_both_sides = qw"
8606 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
8607 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
8608 &&= ||= //= <=> A k f w F n C Y U G v
8611 my @spaces_left_side = qw"
8612 t ! ~ m p { \ h pp mm Z j
8614 push( @spaces_left_side, '#' ); # avoids warning message
8616 my @spaces_right_side = qw"
8617 ; } ) ] R J ++ -- **=
8619 push( @spaces_right_side, ',' ); # avoids warning message
8621 # Note that we are in a BEGIN block here. Later in processing
8622 # the values of %want_left_space and %want_right_space
8623 # may be overridden by any user settings specified by the
8624 # -wls and -wrs parameters. However the binary_whitespace_rules
8625 # are hardwired and have priority.
8626 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
8627 @want_right_space{@spaces_both_sides} =
8628 (1) x scalar(@spaces_both_sides);
8629 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);
8630 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
8631 @want_left_space{@spaces_right_side} =
8632 (-1) x scalar(@spaces_right_side);
8633 @want_right_space{@spaces_right_side} =
8634 (1) x scalar(@spaces_right_side);
8635 $want_left_space{'->'} = WS_NO;
8636 $want_right_space{'->'} = WS_NO;
8637 $want_left_space{'**'} = WS_NO;
8638 $want_right_space{'**'} = WS_NO;
8639 $want_right_space{'CORE::'} = WS_NO;
8641 # These binary_ws_rules are hardwired and have priority over the above
8642 # settings. It would be nice to allow adjustment by the user,
8643 # but it would be complicated to specify.
8645 # hash type information must stay tightly bound
8647 $binary_ws_rules{'i'}{'L'} = WS_NO;
8648 $binary_ws_rules{'i'}{'{'} = WS_YES;
8649 $binary_ws_rules{'k'}{'{'} = WS_YES;
8650 $binary_ws_rules{'U'}{'{'} = WS_YES;
8651 $binary_ws_rules{'i'}{'['} = WS_NO;
8652 $binary_ws_rules{'R'}{'L'} = WS_NO;
8653 $binary_ws_rules{'R'}{'{'} = WS_NO;
8654 $binary_ws_rules{'t'}{'L'} = WS_NO;
8655 $binary_ws_rules{'t'}{'{'} = WS_NO;
8656 $binary_ws_rules{'}'}{'L'} = WS_NO;
8657 $binary_ws_rules{'}'}{'{'} = WS_NO;
8658 $binary_ws_rules{'$'}{'L'} = WS_NO;
8659 $binary_ws_rules{'$'}{'{'} = WS_NO;
8660 $binary_ws_rules{'@'}{'L'} = WS_NO;
8661 $binary_ws_rules{'@'}{'{'} = WS_NO;
8662 $binary_ws_rules{'='}{'L'} = WS_YES;
8663 $binary_ws_rules{'J'}{'J'} = WS_YES;
8665 # the following includes ') {'
8666 # as in : if ( xxx ) { yyy }
8667 $binary_ws_rules{']'}{'L'} = WS_NO;
8668 $binary_ws_rules{']'}{'{'} = WS_NO;
8669 $binary_ws_rules{')'}{'{'} = WS_YES;
8670 $binary_ws_rules{')'}{'['} = WS_NO;
8671 $binary_ws_rules{']'}{'['} = WS_NO;
8672 $binary_ws_rules{']'}{'{'} = WS_NO;
8673 $binary_ws_rules{'}'}{'['} = WS_NO;
8674 $binary_ws_rules{'R'}{'['} = WS_NO;
8676 $binary_ws_rules{']'}{'++'} = WS_NO;
8677 $binary_ws_rules{']'}{'--'} = WS_NO;
8678 $binary_ws_rules{')'}{'++'} = WS_NO;
8679 $binary_ws_rules{')'}{'--'} = WS_NO;
8681 $binary_ws_rules{'R'}{'++'} = WS_NO;
8682 $binary_ws_rules{'R'}{'--'} = WS_NO;
8684 $binary_ws_rules{'i'}{'Q'} = WS_YES;
8685 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
8687 # FIXME: we could to split 'i' into variables and functions
8688 # and have no space for functions but space for variables. For now,
8689 # I have a special patch in the special rules below
8690 $binary_ws_rules{'i'}{'('} = WS_NO;
8692 $binary_ws_rules{'w'}{'('} = WS_NO;
8693 $binary_ws_rules{'w'}{'{'} = WS_YES;
8694 } ## end BEGIN block
8696 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
8697 my ( $last_token, $last_type, $last_block_type, $token, $type,
8699 my (@white_space_flag);
8700 my $j_tight_closing_paren = -1;
8702 if ( $max_index_to_go >= 0 ) {
8703 $token = $tokens_to_go[$max_index_to_go];
8704 $type = $types_to_go[$max_index_to_go];
8705 $block_type = $block_type_to_go[$max_index_to_go];
8707 #---------------------------------------------------------------
8708 # Patch due to splitting of tokens with leading ->
8709 #---------------------------------------------------------------
8711 # This routine is dealing with the raw tokens from the tokenizer,
8712 # but to get started it needs the previous token, which will
8713 # have been stored in the '_to_go' arrays.
8715 # This patch avoids requiring two iterations to
8716 # converge for cases such as the following, where a paren
8717 # comes in on a line following a variable with leading arrow:
8718 # $self->{main}->add_content_defer_opening
8719 # ($name, $wmkf, $self->{attrs}, $self);
8720 # In this case when we see the opening paren on line 2 we need
8721 # to know if the last token on the previous line had an arrow,
8722 # but it has already been split off so we have to add it back
8723 # in to avoid getting an unwanted space before the paren.
8724 if ( $type =~ /^[wi]$/ ) {
8725 my $im = $iprev_to_go[$max_index_to_go];
8726 my $tm = ( $im >= 0 ) ? $types_to_go[$im] : "";
8727 if ( $tm eq '->' ) { $token = $tm . $token }
8730 #---------------------------------------------------------------
8731 # End patch due to splitting of tokens with leading ->
8732 #---------------------------------------------------------------
8742 # main loop over all tokens to define the whitespace flags
8743 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8745 if ( $$rtoken_type[$j] eq 'b' ) {
8746 $white_space_flag[$j] = WS_OPTIONAL;
8750 # set a default value, to be changed as needed
8752 $last_token = $token;
8754 $last_block_type = $block_type;
8755 $token = $$rtokens[$j];
8756 $type = $$rtoken_type[$j];
8757 $block_type = $$rblock_type[$j];
8759 #---------------------------------------------------------------
8760 # Whitespace Rules Section 1:
8761 # Handle space on the inside of opening braces.
8762 #---------------------------------------------------------------
8765 if ( $is_opening_type{$last_type} ) {
8767 $j_tight_closing_paren = -1;
8769 # let's keep empty matched braces together: () {} []
8771 if ( $token eq $matching_token{$last_token} ) {
8781 # we're considering the right of an opening brace
8782 # tightness = 0 means always pad inside with space
8783 # tightness = 1 means pad inside if "complex"
8784 # tightness = 2 means never pad inside with space
8787 if ( $last_type eq '{'
8788 && $last_token eq '{'
8789 && $last_block_type )
8791 $tightness = $rOpts_block_brace_tightness;
8793 else { $tightness = $tightness{$last_token} }
8795 #=============================================================
8796 # Patch for test problem fabrice_bug.pl
8797 # We must always avoid spaces around a bare word beginning
8799 # my $before = ${^PREMATCH};
8800 # Because all of the following cause an error in perl:
8801 # my $before = ${ ^PREMATCH };
8802 # my $before = ${ ^PREMATCH};
8803 # my $before = ${^PREMATCH };
8804 # So if brace tightness flag is -bt=0 we must temporarily reset
8805 # to bt=1. Note that here we must set tightness=1 and not 2 so
8806 # that the closing space
8807 # is also avoided (via the $j_tight_closing_paren flag in coding)
8808 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
8810 #=============================================================
8812 if ( $tightness <= 0 ) {
8815 elsif ( $tightness > 1 ) {
8820 # Patch to count '-foo' as single token so that
8821 # each of $a{-foo} and $a{foo} and $a{'foo'} do
8822 # not get spaces with default formatting.
8826 && $last_token eq '{'
8827 && $$rtoken_type[ $j + 1 ] eq 'w' );
8829 # $j_next is where a closing token should be if
8830 # the container has a single token
8832 ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
8835 my $tok_next = $$rtokens[$j_next];
8836 my $type_next = $$rtoken_type[$j_next];
8838 # for tightness = 1, if there is just one token
8839 # within the matching pair, we will keep it tight
8841 $tok_next eq $matching_token{$last_token}
8843 # but watch out for this: [ [ ] (misc.t)
8844 && $last_token ne $token
8846 # double diamond is usually spaced
8852 # remember where to put the space for the closing paren
8853 $j_tight_closing_paren = $j_next;
8861 } # end setting space flag inside opening tokens
8863 if FORMATTER_DEBUG_FLAG_WHITE;
8865 #---------------------------------------------------------------
8866 # Whitespace Rules Section 2:
8867 # Handle space on inside of closing brace pairs.
8868 #---------------------------------------------------------------
8871 if ( $is_closing_type{$type} ) {
8873 if ( $j == $j_tight_closing_paren ) {
8875 $j_tight_closing_paren = -1;
8880 if ( !defined($ws) ) {
8883 if ( $type eq '}' && $token eq '}' && $block_type ) {
8884 $tightness = $rOpts_block_brace_tightness;
8886 else { $tightness = $tightness{$token} }
8888 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
8891 } # end setting space flag inside closing tokens
8894 if FORMATTER_DEBUG_FLAG_WHITE;
8896 #---------------------------------------------------------------
8897 # Whitespace Rules Section 3:
8898 # Use the binary rule table.
8899 #---------------------------------------------------------------
8900 if ( !defined($ws) ) {
8901 $ws = $binary_ws_rules{$last_type}{$type};
8904 if FORMATTER_DEBUG_FLAG_WHITE;
8906 #---------------------------------------------------------------
8907 # Whitespace Rules Section 4:
8908 # Handle some special cases.
8909 #---------------------------------------------------------------
8910 if ( $token eq '(' ) {
8912 # This will have to be tweaked as tokenization changes.
8913 # We usually want a space at '} (', for example:
8914 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
8917 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
8918 # At present, the above & block is marked as type L/R so this case
8919 # won't go through here.
8920 if ( $last_type eq '}' ) { $ws = WS_YES }
8922 # NOTE: some older versions of Perl had occasional problems if
8923 # spaces are introduced between keywords or functions and opening
8924 # parens. So the default is not to do this except is certain
8925 # cases. The current Perl seems to tolerate spaces.
8927 # Space between keyword and '('
8928 elsif ( $last_type eq 'k' ) {
8930 unless ( $rOpts_space_keyword_paren
8931 || $space_after_keyword{$last_token} );
8934 # Space between function and '('
8935 # -----------------------------------------------------
8936 # 'w' and 'i' checks for something like:
8937 # myfun( &myfun( ->myfun(
8938 # -----------------------------------------------------
8939 elsif (( $last_type =~ /^[wUG]$/ )
8940 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
8942 $ws = WS_NO unless ($rOpts_space_function_paren);
8945 # space between something like $i and ( in
8946 # for $i ( 0 .. 20 ) {
8947 # FIXME: eventually, type 'i' needs to be split into multiple
8948 # token types so this can be a hardwired rule.
8949 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
8953 # allow constant function followed by '()' to retain no space
8954 elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
8959 # patch for SWITCH/CASE: make space at ']{' optional
8960 # since the '{' might begin a case or when block
8961 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
8965 # keep space between 'sub' and '{' for anonymous sub definition
8966 if ( $type eq '{' ) {
8967 if ( $last_token eq 'sub' ) {
8971 # this is needed to avoid no space in '){'
8972 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
8974 # avoid any space before the brace or bracket in something like
8975 # @opts{'a','b',...}
8976 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
8981 elsif ( $type eq 'i' ) {
8983 # never a space before ->
8984 if ( $token =~ /^\-\>/ ) {
8989 # retain any space between '-' and bare word
8990 elsif ( $type eq 'w' || $type eq 'C' ) {
8991 $ws = WS_OPTIONAL if $last_type eq '-';
8993 # never a space before ->
8994 if ( $token =~ /^\-\>/ ) {
8999 # retain any space between '-' and bare word
9000 # example: avoid space between 'USER' and '-' here:
9001 # $myhash{USER-NAME}='steve';
9002 elsif ( $type eq 'm' || $type eq '-' ) {
9003 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
9006 # always space before side comment
9007 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
9009 # always preserver whatever space was used after a possible
9010 # filehandle (except _) or here doc operator
9013 && ( ( $last_type eq 'Z' && $last_token ne '_' )
9014 || $last_type eq 'h' )
9021 if FORMATTER_DEBUG_FLAG_WHITE;
9023 #---------------------------------------------------------------
9024 # Whitespace Rules Section 5:
9025 # Apply default rules not covered above.
9026 #---------------------------------------------------------------
9028 # If we fall through to here, look at the pre-defined hash tables for
9029 # the two tokens, and:
9030 # if (they are equal) use the common value
9031 # if (either is zero or undef) use the other
9032 # if (either is -1) use it
9046 if ( !defined($ws) ) {
9047 my $wl = $want_left_space{$type};
9048 my $wr = $want_right_space{$last_type};
9049 if ( !defined($wl) ) { $wl = 0 }
9050 if ( !defined($wr) ) { $wr = 0 }
9051 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
9054 if ( !defined($ws) ) {
9057 "WS flag is undefined for tokens $last_token $token\n");
9060 # Treat newline as a whitespace. Otherwise, we might combine
9061 # 'Send' and '-recipients' here according to the above rules:
9062 # my $msg = new Fax::Send
9063 # -recipients => $to,
9065 if ( $ws == 0 && $j == 0 ) { $ws = 1 }
9070 && ( $last_type !~ /^[Zh]$/ ) )
9073 # If this happens, we have a non-fatal but undesirable
9074 # hole in the above rules which should be patched.
9076 "WS flag is zero for tokens $last_token $token\n");
9078 $white_space_flag[$j] = $ws;
9080 FORMATTER_DEBUG_FLAG_WHITE && do {
9081 my $str = substr( $last_token, 0, 15 );
9082 $str .= ' ' x ( 16 - length($str) );
9083 if ( !defined($ws_1) ) { $ws_1 = "*" }
9084 if ( !defined($ws_2) ) { $ws_2 = "*" }
9085 if ( !defined($ws_3) ) { $ws_3 = "*" }
9086 if ( !defined($ws_4) ) { $ws_4 = "*" }
9088 "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
9092 if ($rOpts_tight_secret_operators) {
9093 secret_operator_whitespace( $jmax, $rtokens, $rtoken_type,
9094 \@white_space_flag );
9097 return \@white_space_flag;
9098 } ## end sub set_white_space_flag
9100 { # begin print_line_of_tokens
9107 my $rcontainer_type;
9108 my $rcontainer_environment;
9111 my $rnesting_tokens;
9113 my $rnesting_blocks;
9116 my $guessed_indentation_level;
9118 # These local token variables are stored by store_token_to_go:
9121 my $container_environment;
9123 my $in_continued_quote;
9126 my $no_internal_newlines;
9132 # routine to pull the jth token from the line of tokens
9135 $token = $$rtokens[$j];
9136 $type = $$rtoken_type[$j];
9137 $block_type = $$rblock_type[$j];
9138 $container_type = $$rcontainer_type[$j];
9139 $container_environment = $$rcontainer_environment[$j];
9140 $type_sequence = $$rtype_sequence[$j];
9141 $level = $$rlevels[$j];
9142 $slevel = $$rslevels[$j];
9143 $nesting_blocks = $$rnesting_blocks[$j];
9144 $ci_level = $$rci_levels[$j];
9150 sub save_current_token {
9153 $block_type, $ci_level,
9154 $container_environment, $container_type,
9155 $in_continued_quote, $level,
9156 $nesting_blocks, $no_internal_newlines,
9158 $type, $type_sequence,
9162 sub restore_current_token {
9164 $block_type, $ci_level,
9165 $container_environment, $container_type,
9166 $in_continued_quote, $level,
9167 $nesting_blocks, $no_internal_newlines,
9169 $type, $type_sequence,
9176 # Returns the length of a token, given:
9177 # $token=text of the token
9179 # $not_first_token = should be TRUE if this is not the first token of
9180 # the line. It might the index of this token in an array. It is
9181 # used to test for a side comment vs a block comment.
9182 # Note: Eventually this should be the only routine determining the
9183 # length of a token in this package.
9184 my ( $token, $type, $not_first_token ) = @_;
9185 my $token_length = length($token);
9187 # We mark lengths of side comments as just 1 if we are
9188 # ignoring their lengths when setting line breaks.
9190 if ( $rOpts_ignore_side_comment_lengths
9193 return $token_length;
9198 # return length of ith token in @{$rtokens}
9200 return token_length( $$rtokens[$i], $$rtoken_type[$i], $i );
9203 # Routine to place the current token into the output stream.
9204 # Called once per output token.
9205 sub store_token_to_go {
9207 my $flag = $no_internal_newlines;
9208 if ( $_[0] ) { $flag = 1 }
9210 $tokens_to_go[ ++$max_index_to_go ] = $token;
9211 $types_to_go[$max_index_to_go] = $type;
9212 $nobreak_to_go[$max_index_to_go] = $flag;
9213 $old_breakpoint_to_go[$max_index_to_go] = 0;
9214 $forced_breakpoint_to_go[$max_index_to_go] = 0;
9215 $block_type_to_go[$max_index_to_go] = $block_type;
9216 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
9217 $container_environment_to_go[$max_index_to_go] = $container_environment;
9218 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;
9219 $ci_levels_to_go[$max_index_to_go] = $ci_level;
9220 $mate_index_to_go[$max_index_to_go] = -1;
9221 $matching_token_to_go[$max_index_to_go] = '';
9222 $bond_strength_to_go[$max_index_to_go] = 0;
9224 # Note: negative levels are currently retained as a diagnostic so that
9225 # the 'final indentation level' is correctly reported for bad scripts.
9226 # But this means that every use of $level as an index must be checked.
9227 # If this becomes too much of a problem, we might give up and just clip
9229 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
9230 $levels_to_go[$max_index_to_go] = $level;
9231 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
9233 # link the non-blank tokens
9234 my $iprev = $max_index_to_go - 1;
9235 $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
9236 $iprev_to_go[$max_index_to_go] = $iprev;
9237 $inext_to_go[$iprev] = $max_index_to_go
9238 if ( $iprev >= 0 && $type ne 'b' );
9239 $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
9241 $token_lengths_to_go[$max_index_to_go] =
9242 token_length( $token, $type, $max_index_to_go );
9244 # We keep a running sum of token lengths from the start of this batch:
9245 # summed_lengths_to_go[$i] = total length to just before token $i
9246 # summed_lengths_to_go[$i+1] = total length to just after token $i
9247 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
9248 $summed_lengths_to_go[$max_index_to_go] +
9249 $token_lengths_to_go[$max_index_to_go];
9251 # Define the indentation that this token would have if it started
9252 # a new line. We have to do this now because we need to know this
9253 # when considering one-line blocks.
9254 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
9256 # remember previous nonblank tokens seen
9257 if ( $type ne 'b' ) {
9258 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
9259 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
9260 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
9261 $last_nonblank_index_to_go = $max_index_to_go;
9262 $last_nonblank_type_to_go = $type;
9263 $last_nonblank_token_to_go = $token;
9264 if ( $type eq ',' ) {
9265 $comma_count_in_batch++;
9269 FORMATTER_DEBUG_FLAG_STORE && do {
9270 my ( $a, $b, $c ) = caller();
9272 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
9276 sub insert_new_token_to_go {
9278 # insert a new token into the output stream. use same level as
9279 # previous token; assumes a character at max_index_to_go.
9280 save_current_token();
9281 ( $token, $type, $slevel, $no_internal_newlines ) = @_;
9283 if ( $max_index_to_go == UNDEFINED_INDEX ) {
9284 warning("code bug: bad call to insert_new_token_to_go\n");
9286 $level = $levels_to_go[$max_index_to_go];
9288 # FIXME: it seems to be necessary to use the next, rather than
9289 # previous, value of this variable when creating a new blank (align.t)
9290 #my $slevel = $nesting_depth_to_go[$max_index_to_go];
9291 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
9292 $ci_level = $ci_levels_to_go[$max_index_to_go];
9293 $container_environment = $container_environment_to_go[$max_index_to_go];
9294 $in_continued_quote = 0;
9296 $type_sequence = "";
9297 store_token_to_go();
9298 restore_current_token();
9302 sub print_line_of_tokens {
9304 my $line_of_tokens = shift;
9306 # This routine is called once per input line to process all of
9307 # the tokens on that line. This is the first stage of
9310 # Full-line comments and blank lines may be processed immediately.
9312 # For normal lines of code, the tokens are stored one-by-one,
9313 # via calls to 'sub store_token_to_go', until a known line break
9314 # point is reached. Then, the batch of collected tokens is
9315 # passed along to 'sub output_line_to_go' for further
9316 # processing. This routine decides if there should be
9317 # whitespace between each pair of non-white tokens, so later
9318 # routines only need to decide on any additional line breaks.
9319 # Any whitespace is initially a single space character. Later,
9320 # the vertical aligner may expand that to be multiple space
9321 # characters if necessary for alignment.
9323 # extract input line number for error messages
9324 $input_line_number = $line_of_tokens->{_line_number};
9326 $rtoken_type = $line_of_tokens->{_rtoken_type};
9327 $rtokens = $line_of_tokens->{_rtokens};
9328 $rlevels = $line_of_tokens->{_rlevels};
9329 $rslevels = $line_of_tokens->{_rslevels};
9330 $rblock_type = $line_of_tokens->{_rblock_type};
9331 $rcontainer_type = $line_of_tokens->{_rcontainer_type};
9332 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
9333 $rtype_sequence = $line_of_tokens->{_rtype_sequence};
9334 $input_line = $line_of_tokens->{_line_text};
9335 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
9336 $rci_levels = $line_of_tokens->{_rci_levels};
9337 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
9339 $in_continued_quote = $starting_in_quote =
9340 $line_of_tokens->{_starting_in_quote};
9341 $in_quote = $line_of_tokens->{_ending_in_quote};
9342 $ending_in_quote = $in_quote;
9343 $guessed_indentation_level =
9344 $line_of_tokens->{_guessed_indentation_level};
9349 my $next_nonblank_token;
9350 my $next_nonblank_token_type;
9351 my $rwhite_space_flag;
9353 $jmax = @$rtokens - 1;
9355 $container_type = "";
9356 $container_environment = "";
9357 $type_sequence = "";
9358 $no_internal_newlines = 1 - $rOpts_add_newlines;
9359 $is_static_block_comment = 0;
9361 # Handle a continued quote..
9362 if ($in_continued_quote) {
9364 # A line which is entirely a quote or pattern must go out
9365 # verbatim. Note: the \n is contained in $input_line.
9367 if ( ( $input_line =~ "\t" ) ) {
9368 note_embedded_tab();
9370 write_unindented_line("$input_line");
9371 $last_line_had_side_comment = 0;
9376 # Write line verbatim if we are in a formatting skip section
9377 if ($in_format_skipping_section) {
9378 write_unindented_line("$input_line");
9379 $last_line_had_side_comment = 0;
9381 # Note: extra space appended to comment simplifies pattern matching
9383 && $$rtoken_type[0] eq '#'
9384 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
9386 $in_format_skipping_section = 0;
9387 write_logfile_entry("Exiting formatting skip section\n");
9388 $file_writer_object->reset_consecutive_blank_lines();
9393 # See if we are entering a formatting skip section
9394 if ( $rOpts_format_skipping
9396 && $$rtoken_type[0] eq '#'
9397 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
9400 $in_format_skipping_section = 1;
9401 write_logfile_entry("Entering formatting skip section\n");
9402 write_unindented_line("$input_line");
9403 $last_line_had_side_comment = 0;
9407 # delete trailing blank tokens
9408 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
9410 # Handle a blank line..
9413 # If keep-old-blank-lines is zero, we delete all
9414 # old blank lines and let the blank line rules generate any
9416 if ($rOpts_keep_old_blank_lines) {
9418 $file_writer_object->write_blank_code_line(
9419 $rOpts_keep_old_blank_lines == 2 );
9420 $last_line_leading_type = 'b';
9422 $last_line_had_side_comment = 0;
9426 # see if this is a static block comment (starts with ## by default)
9427 my $is_static_block_comment_without_leading_space = 0;
9429 && $$rtoken_type[0] eq '#'
9430 && $rOpts->{'static-block-comments'}
9431 && $input_line =~ /$static_block_comment_pattern/o )
9433 $is_static_block_comment = 1;
9434 $is_static_block_comment_without_leading_space =
9435 substr( $input_line, 0, 1 ) eq '#';
9438 # Check for comments which are line directives
9439 # Treat exactly as static block comments without leading space
9440 # reference: perlsyn, near end, section Plain Old Comments (Not!)
9441 # example: '# line 42 "new_filename.plx"'
9444 && $$rtoken_type[0] eq '#'
9445 && $input_line =~ /^\# \s*
9447 (?:\s("?)([^"]+)\2)? \s*
9451 $is_static_block_comment = 1;
9452 $is_static_block_comment_without_leading_space = 1;
9455 # create a hanging side comment if appropriate
9456 my $is_hanging_side_comment;
9459 && $$rtoken_type[0] eq '#' # only token is a comment
9460 && $last_line_had_side_comment # last line had side comment
9461 && $input_line =~ /^\s/ # there is some leading space
9462 && !$is_static_block_comment # do not make static comment hanging
9463 && $rOpts->{'hanging-side-comments'} # user is allowing
9464 # hanging side comments
9469 # We will insert an empty qw string at the start of the token list
9470 # to force this comment to be a side comment. The vertical aligner
9471 # should then line it up with the previous side comment.
9472 $is_hanging_side_comment = 1;
9473 unshift @$rtoken_type, 'q';
9474 unshift @$rtokens, '';
9475 unshift @$rlevels, $$rlevels[0];
9476 unshift @$rslevels, $$rslevels[0];
9477 unshift @$rblock_type, '';
9478 unshift @$rcontainer_type, '';
9479 unshift @$rcontainer_environment, '';
9480 unshift @$rtype_sequence, '';
9481 unshift @$rnesting_tokens, $$rnesting_tokens[0];
9482 unshift @$rci_levels, $$rci_levels[0];
9483 unshift @$rnesting_blocks, $$rnesting_blocks[0];
9487 # remember if this line has a side comment
9488 $last_line_had_side_comment =
9489 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
9491 # Handle a block (full-line) comment..
9492 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
9494 if ( $rOpts->{'delete-block-comments'} ) { return }
9496 if ( $rOpts->{'tee-block-comments'} ) {
9497 $file_writer_object->tee_on();
9500 destroy_one_line_block();
9501 output_line_to_go();
9503 # output a blank line before block comments
9505 # unless we follow a blank or comment line
9506 $last_line_leading_type !~ /^[#b]$/
9509 && $rOpts->{'blanks-before-comments'}
9511 # not if this is an empty comment line
9512 && $$rtokens[0] ne '#'
9514 # not after a short line ending in an opening token
9515 # because we already have space above this comment.
9516 # Note that the first comment in this if block, after
9517 # the 'if (', does not get a blank line because of this.
9518 && !$last_output_short_opening_token
9520 # never before static block comments
9521 && !$is_static_block_comment
9524 flush(); # switching to new output stream
9525 $file_writer_object->write_blank_code_line();
9526 $last_line_leading_type = 'b';
9529 # TRIM COMMENTS -- This could be turned off as a option
9530 $$rtokens[0] =~ s/\s*$//; # trim right end
9533 $rOpts->{'indent-block-comments'}
9534 && ( !$rOpts->{'indent-spaced-block-comments'}
9535 || $input_line =~ /^\s+/ )
9536 && !$is_static_block_comment_without_leading_space
9540 store_token_to_go();
9541 output_line_to_go();
9544 flush(); # switching to new output stream
9545 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
9546 $last_line_leading_type = '#';
9548 if ( $rOpts->{'tee-block-comments'} ) {
9549 $file_writer_object->tee_off();
9554 # compare input/output indentation except for continuation lines
9555 # (because they have an unknown amount of initial blank space)
9556 # and lines which are quotes (because they may have been outdented)
9557 # Note: this test is placed here because we know the continuation flag
9558 # at this point, which allows us to avoid non-meaningful checks.
9559 my $structural_indentation_level = $$rlevels[0];
9560 compare_indentation_levels( $guessed_indentation_level,
9561 $structural_indentation_level )
9562 unless ( $is_hanging_side_comment
9563 || $$rci_levels[0] > 0
9564 || $guessed_indentation_level == 0 && $$rtoken_type[0] eq 'Q' );
9566 # Patch needed for MakeMaker. Do not break a statement
9567 # in which $VERSION may be calculated. See MakeMaker.pm;
9568 # this is based on the coding in it.
9569 # The first line of a file that matches this will be eval'd:
9570 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9572 # *VERSION = \'1.01';
9573 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
9574 # We will pass such a line straight through without breaking
9575 # it unless -npvl is used.
9577 # Patch for problem reported in RT #81866, where files
9578 # had been flattened into a single line and couldn't be
9579 # tidied without -npvl. There are two parts to this patch:
9580 # First, it is not done for a really long line (80 tokens for now).
9581 # Second, we will only allow up to one semicolon
9582 # before the VERSION. We need to allow at least one semicolon
9583 # for statements like this:
9584 # require Exporter; our $VERSION = $Exporter::VERSION;
9585 # where both statements must be on a single line for MakeMaker
9587 my $is_VERSION_statement = 0;
9588 if ( !$saw_VERSION_in_this_file
9591 /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
9593 $saw_VERSION_in_this_file = 1;
9594 $is_VERSION_statement = 1;
9595 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
9596 $no_internal_newlines = 1;
9599 # take care of indentation-only
9600 # NOTE: In previous versions we sent all qw lines out immediately here.
9601 # No longer doing this: also write a line which is entirely a 'qw' list
9602 # to allow stacking of opening and closing tokens. Note that interior
9603 # qw lines will still go out at the end of this routine.
9604 if ( $rOpts->{'indent-only'} ) {
9606 my $line = $input_line;
9608 # delete side comments if requested with -io, but
9609 # we will not allow deleting of closing side comments with -io
9610 # because the coding would be more complex
9611 if ( $rOpts->{'delete-side-comments'}
9612 && $rtoken_type->[$jmax] eq '#' )
9614 $line = join "", @{$rtokens}[ 0 .. $jmax - 1 ];
9622 $container_type = "";
9623 $container_environment = "";
9624 $type_sequence = "";
9625 store_token_to_go();
9626 output_line_to_go();
9630 push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding
9631 push( @$rtoken_type, 'b', 'b' );
9632 ($rwhite_space_flag) =
9633 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
9635 # if the buffer hasn't been flushed, add a leading space if
9636 # necessary to keep essential whitespace. This is really only
9637 # necessary if we are squeezing out all ws.
9638 if ( $max_index_to_go >= 0 ) {
9640 $old_line_count_in_batch++;
9643 is_essential_whitespace(
9644 $last_last_nonblank_token,
9645 $last_last_nonblank_type,
9646 $tokens_to_go[$max_index_to_go],
9647 $types_to_go[$max_index_to_go],
9653 my $slevel = $$rslevels[0];
9654 insert_new_token_to_go( ' ', 'b', $slevel,
9655 $no_internal_newlines );
9659 # If we just saw the end of an elsif block, write nag message
9660 # if we do not see another elseif or an else.
9661 if ($looking_for_else) {
9663 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
9664 write_logfile_entry("(No else block)\n");
9666 $looking_for_else = 0;
9669 # This is a good place to kill incomplete one-line blocks
9672 ( $semicolons_before_block_self_destruct == 0 )
9673 && ( $max_index_to_go >= 0 )
9674 && ( $types_to_go[$max_index_to_go] eq ';' )
9675 && ( $$rtokens[0] ne '}' )
9678 # Patch for RT #98902. Honor request to break at old commas.
9679 || ( $rOpts_break_at_old_comma_breakpoints
9680 && $max_index_to_go >= 0
9681 && $types_to_go[$max_index_to_go] eq ',' )
9684 $forced_breakpoint_to_go[$max_index_to_go] = 1
9685 if ($rOpts_break_at_old_comma_breakpoints);
9686 destroy_one_line_block();
9687 output_line_to_go();
9690 # loop to process the tokens one-by-one
9694 foreach $j ( 0 .. $jmax ) {
9696 # pull out the local values for this token
9699 if ( $type eq '#' ) {
9701 # trim trailing whitespace
9702 # (there is no option at present to prevent this)
9706 $rOpts->{'delete-side-comments'}
9708 # delete closing side comments if necessary
9709 || ( $rOpts->{'delete-closing-side-comments'}
9710 && $token =~ /$closing_side_comment_prefix_pattern/o
9711 && $last_nonblank_block_type =~
9712 /$closing_side_comment_list_pattern/o )
9715 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9716 unstore_token_to_go();
9722 # If we are continuing after seeing a right curly brace, flush
9723 # buffer unless we see what we are looking for, as in
9725 if ( $rbrace_follower && $type ne 'b' ) {
9727 unless ( $rbrace_follower->{$token} ) {
9728 output_line_to_go();
9730 $rbrace_follower = undef;
9733 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
9734 $next_nonblank_token = $$rtokens[$j_next];
9735 $next_nonblank_token_type = $$rtoken_type[$j_next];
9737 #--------------------------------------------------------
9738 # Start of section to patch token text
9739 #--------------------------------------------------------
9741 # Modify certain tokens here for whitespace
9742 # The following is not yet done, but could be:
9744 if ( $type =~ /^[wit]$/ ) {
9747 # change '$ var' to '$var' etc
9748 # '-> new' to '->new'
9749 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
9753 # Split identifiers with leading arrows, inserting blanks if
9754 # necessary. It is easier and safer here than in the
9755 # tokenizer. For example '->new' becomes two tokens, '->' and
9756 # 'new' with a possible blank between.
9758 # Note: there is a related patch in sub set_white_space_flag
9759 if ( $token =~ /^\-\>(.*)$/ && $1 ) {
9760 my $token_save = $1;
9761 my $type_save = $type;
9763 # store a blank to left of arrow if necessary
9764 if ( $max_index_to_go >= 0
9765 && $types_to_go[$max_index_to_go] ne 'b'
9766 && $want_left_space{'->'} == WS_YES )
9768 insert_new_token_to_go( ' ', 'b', $slevel,
9769 $no_internal_newlines );
9772 # then store the arrow
9775 store_token_to_go();
9777 # then reset the current token to be the remainder,
9778 # and reset the whitespace flag according to the arrow
9779 $$rwhite_space_flag[$j] = $want_right_space{'->'};
9780 $token = $token_save;
9784 if ( $token =~ /$SUB_PATTERN/ ) { $token =~ s/\s+/ /g }
9786 # trim identifiers of trailing blanks which can occur
9787 # under some unusual circumstances, such as if the
9788 # identifier 'witch' has trailing blanks on input here:
9792 # () # prototype may be on new line ...
9794 if ( $type eq 'i' ) { $token =~ s/\s+$//g }
9797 # change 'LABEL :' to 'LABEL:'
9798 elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
9800 # patch to add space to something like "x10"
9801 # This avoids having to split this token in the pre-tokenizer
9802 elsif ( $type eq 'n' ) {
9803 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
9806 elsif ( $type eq 'Q' ) {
9807 note_embedded_tab() if ( $token =~ "\t" );
9809 # make note of something like '$var = s/xxx/yyy/;'
9810 # in case it should have been '$var =~ s/xxx/yyy/;'
9812 $token =~ /^(s|tr|y|m|\/)/
9813 && $last_nonblank_token =~ /^(=|==|!=)$/
9815 # preceded by simple scalar
9816 && $last_last_nonblank_type eq 'i'
9817 && $last_last_nonblank_token =~ /^\$/
9819 # followed by some kind of termination
9820 # (but give complaint if we can's see far enough ahead)
9821 && $next_nonblank_token =~ /^[; \)\}]$/
9823 # scalar is not declared
9825 $types_to_go[0] eq 'k'
9826 && $tokens_to_go[0] =~ /^(my|our|local)$/
9830 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
9832 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
9837 # trim blanks from right of qw quotes
9838 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
9839 elsif ( $type eq 'q' ) {
9841 note_embedded_tab() if ( $token =~ "\t" );
9844 #--------------------------------------------------------
9845 # End of section to patch token text
9846 #--------------------------------------------------------
9848 # insert any needed whitespace
9849 if ( ( $type ne 'b' )
9850 && ( $max_index_to_go >= 0 )
9851 && ( $types_to_go[$max_index_to_go] ne 'b' )
9852 && $rOpts_add_whitespace )
9854 my $ws = $$rwhite_space_flag[$j];
9857 insert_new_token_to_go( ' ', 'b', $slevel,
9858 $no_internal_newlines );
9862 # Do not allow breaks which would promote a side comment to a
9863 # block comment. In order to allow a break before an opening
9864 # or closing BLOCK, followed by a side comment, those sections
9865 # of code will handle this flag separately.
9866 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
9867 my $is_opening_BLOCK =
9871 && $block_type ne 't' );
9872 my $is_closing_BLOCK =
9876 && $block_type ne 't' );
9878 if ( $side_comment_follows
9879 && !$is_opening_BLOCK
9880 && !$is_closing_BLOCK )
9882 $no_internal_newlines = 1;
9885 # We're only going to handle breaking for code BLOCKS at this
9886 # (top) level. Other indentation breaks will be handled by
9887 # sub scan_list, which is better suited to dealing with them.
9888 if ($is_opening_BLOCK) {
9890 # Tentatively output this token. This is required before
9891 # calling starting_one_line_block. We may have to unstore
9892 # it, though, if we have to break before it.
9893 store_token_to_go($side_comment_follows);
9895 # Look ahead to see if we might form a one-line block
9897 starting_one_line_block( $j, $jmax, $level, $slevel,
9898 $ci_level, $rtokens, $rtoken_type, $rblock_type );
9899 clear_breakpoint_undo_stack();
9901 # to simplify the logic below, set a flag to indicate if
9902 # this opening brace is far from the keyword which introduces it
9903 my $keyword_on_same_line = 1;
9904 if ( ( $max_index_to_go >= 0 )
9905 && ( $last_nonblank_type eq ')' ) )
9907 if ( $block_type =~ /^(if|else|elsif)$/
9908 && ( $tokens_to_go[0] eq '}' )
9909 && $rOpts_cuddled_else )
9911 $keyword_on_same_line = 1;
9913 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
9915 $keyword_on_same_line = 0;
9919 # decide if user requested break before '{'
9922 # use -bl flag if not a sub block of any type
9923 #$block_type !~ /^sub/
9924 $block_type !~ /^sub\b/
9925 ? $rOpts->{'opening-brace-on-new-line'}
9927 # use -sbl flag for a named sub block
9928 : $block_type !~ /$ASUB_PATTERN/
9929 ? $rOpts->{'opening-sub-brace-on-new-line'}
9931 # use -asbl flag for an anonymous sub block
9932 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
9934 # Break before an opening '{' ...
9940 # and we were unable to start looking for a block,
9941 && $index_start_one_line_block == UNDEFINED_INDEX
9943 # or if it will not be on same line as its keyword, so that
9944 # it will be outdented (eval.t, overload.t), and the user
9945 # has not insisted on keeping it on the right
9946 || ( !$keyword_on_same_line
9947 && !$rOpts->{'opening-brace-always-on-right'} )
9952 # but only if allowed
9953 unless ($no_internal_newlines) {
9955 # since we already stored this token, we must unstore it
9956 unstore_token_to_go();
9958 # then output the line
9959 output_line_to_go();
9961 # and now store this token at the start of a new line
9962 store_token_to_go($side_comment_follows);
9966 # Now update for side comment
9967 if ($side_comment_follows) { $no_internal_newlines = 1 }
9969 # now output this line
9970 unless ($no_internal_newlines) {
9971 output_line_to_go();
9975 elsif ($is_closing_BLOCK) {
9977 # If there is a pending one-line block ..
9978 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9980 # we have to terminate it if..
9983 # it is too long (final length may be different from
9984 # initial estimate). note: must allow 1 space for this token
9985 excess_line_length( $index_start_one_line_block,
9986 $max_index_to_go ) >= 0
9988 # or if it has too many semicolons
9989 || ( $semicolons_before_block_self_destruct == 0
9990 && $last_nonblank_type ne ';' )
9993 destroy_one_line_block();
9997 # put a break before this closing curly brace if appropriate
9998 unless ( $no_internal_newlines
9999 || $index_start_one_line_block != UNDEFINED_INDEX )
10002 # add missing semicolon if ...
10003 # there are some tokens
10005 ( $max_index_to_go > 0 )
10007 # and we don't have one
10008 && ( $last_nonblank_type ne ';' )
10010 # and we are allowed to do so.
10011 && $rOpts->{'add-semicolons'}
10013 # and we are allowed to for this block type
10014 && ( $ok_to_add_semicolon_for_block_type{$block_type}
10015 || $block_type =~ /^(sub|package)/
10016 || $block_type =~ /^\w+\:$/ )
10021 save_current_token();
10024 $level = $levels_to_go[$max_index_to_go];
10025 $slevel = $nesting_depth_to_go[$max_index_to_go];
10027 $nesting_blocks_to_go[$max_index_to_go];
10028 $ci_level = $ci_levels_to_go[$max_index_to_go];
10030 $container_type = "";
10031 $container_environment = "";
10032 $type_sequence = "";
10034 # Note - we remove any blank AFTER extracting its
10035 # parameters such as level, etc, above
10036 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
10037 unstore_token_to_go();
10039 store_token_to_go();
10041 note_added_semicolon();
10042 restore_current_token();
10045 # then write out everything before this closing curly brace
10046 output_line_to_go();
10050 # Now update for side comment
10051 if ($side_comment_follows) { $no_internal_newlines = 1 }
10053 # store the closing curly brace
10054 store_token_to_go();
10056 # ok, we just stored a closing curly brace. Often, but
10057 # not always, we want to end the line immediately.
10058 # So now we have to check for special cases.
10060 # if this '}' successfully ends a one-line block..
10061 my $is_one_line_block = 0;
10062 my $keep_going = 0;
10063 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10065 # Remember the type of token just before the
10066 # opening brace. It would be more general to use
10067 # a stack, but this will work for one-line blocks.
10068 $is_one_line_block =
10069 $types_to_go[$index_start_one_line_block];
10071 # we have to actually make it by removing tentative
10072 # breaks that were set within it
10073 undo_forced_breakpoint_stack(0);
10074 set_nobreaks( $index_start_one_line_block,
10075 $max_index_to_go - 1 );
10077 # then re-initialize for the next one-line block
10078 destroy_one_line_block();
10080 # then decide if we want to break after the '}' ..
10081 # We will keep going to allow certain brace followers as in:
10082 # do { $ifclosed = 1; last } unless $losing;
10084 # But make a line break if the curly ends a
10085 # significant block:
10088 $is_block_without_semicolon{$block_type}
10090 # Follow users break point for
10091 # one line block types U & G, such as a 'try' block
10092 || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
10095 # if needless semicolon follows we handle it later
10096 && $next_nonblank_token ne ';'
10099 output_line_to_go() unless ($no_internal_newlines);
10103 # set string indicating what we need to look for brace follower
10105 if ( $block_type eq 'do' ) {
10106 $rbrace_follower = \%is_do_follower;
10108 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
10109 $rbrace_follower = \%is_if_brace_follower;
10111 elsif ( $block_type eq 'else' ) {
10112 $rbrace_follower = \%is_else_brace_follower;
10115 # added eval for borris.t
10116 elsif ($is_sort_map_grep_eval{$block_type}
10117 || $is_one_line_block eq 'G' )
10119 $rbrace_follower = undef;
10124 elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
10126 if ($is_one_line_block) {
10127 $rbrace_follower = \%is_anon_sub_1_brace_follower;
10130 $rbrace_follower = \%is_anon_sub_brace_follower;
10134 # None of the above: specify what can follow a closing
10135 # brace of a block which is not an
10136 # if/elsif/else/do/sort/map/grep/eval
10138 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
10140 $rbrace_follower = \%is_other_brace_follower;
10143 # See if an elsif block is followed by another elsif or else;
10145 if ( $block_type eq 'elsif' ) {
10147 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
10148 $looking_for_else = 1; # ok, check on next line
10152 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
10153 write_logfile_entry("No else block :(\n");
10158 # keep going after certain block types (map,sort,grep,eval)
10159 # added eval for borris.t
10165 # if no more tokens, postpone decision until re-entring
10166 elsif ( ( $next_nonblank_token_type eq 'b' )
10167 && $rOpts_add_newlines )
10169 unless ($rbrace_follower) {
10170 output_line_to_go() unless ($no_internal_newlines);
10174 elsif ($rbrace_follower) {
10176 unless ( $rbrace_follower->{$next_nonblank_token} ) {
10177 output_line_to_go() unless ($no_internal_newlines);
10179 $rbrace_follower = undef;
10183 output_line_to_go() unless ($no_internal_newlines);
10186 } # end treatment of closing block token
10189 elsif ( $type eq ';' ) {
10191 # kill one-line blocks with too many semicolons
10192 $semicolons_before_block_self_destruct--;
10194 ( $semicolons_before_block_self_destruct < 0 )
10195 || ( $semicolons_before_block_self_destruct == 0
10196 && $next_nonblank_token_type !~ /^[b\}]$/ )
10199 destroy_one_line_block();
10202 # Remove unnecessary semicolons, but not after bare
10203 # blocks, where it could be unsafe if the brace is
10207 $last_nonblank_token eq '}'
10209 $is_block_without_semicolon{
10210 $last_nonblank_block_type}
10211 || $last_nonblank_block_type =~ /$SUB_PATTERN/
10212 || $last_nonblank_block_type =~ /^\w+:$/ )
10214 || $last_nonblank_type eq ';'
10219 $rOpts->{'delete-semicolons'}
10221 # don't delete ; before a # because it would promote it
10222 # to a block comment
10223 && ( $next_nonblank_token_type ne '#' )
10226 note_deleted_semicolon();
10227 output_line_to_go()
10228 unless ( $no_internal_newlines
10229 || $index_start_one_line_block != UNDEFINED_INDEX );
10233 write_logfile_entry("Extra ';'\n");
10236 store_token_to_go();
10238 output_line_to_go()
10239 unless ( $no_internal_newlines
10240 || ( $rOpts_keep_interior_semicolons && $j < $jmax )
10241 || ( $next_nonblank_token eq '}' ) );
10245 # handle here_doc target string
10246 elsif ( $type eq 'h' ) {
10247 $no_internal_newlines =
10248 1; # no newlines after seeing here-target
10249 destroy_one_line_block();
10250 store_token_to_go();
10253 # handle all other token types
10256 # if this is a blank...
10257 if ( $type eq 'b' ) {
10259 # make it just one character
10260 $token = ' ' if $rOpts_add_whitespace;
10262 # delete it if unwanted by whitespace rules
10263 # or we are deleting all whitespace
10264 my $ws = $$rwhite_space_flag[ $j + 1 ];
10265 if ( ( defined($ws) && $ws == -1 )
10266 || $rOpts_delete_old_whitespace )
10269 # unless it might make a syntax error
10271 unless is_essential_whitespace(
10272 $last_last_nonblank_token,
10273 $last_last_nonblank_type,
10274 $tokens_to_go[$max_index_to_go],
10275 $types_to_go[$max_index_to_go],
10276 $$rtokens[ $j + 1 ],
10277 $$rtoken_type[ $j + 1 ]
10281 store_token_to_go();
10284 # remember two previous nonblank OUTPUT tokens
10285 if ( $type ne '#' && $type ne 'b' ) {
10286 $last_last_nonblank_token = $last_nonblank_token;
10287 $last_last_nonblank_type = $last_nonblank_type;
10288 $last_nonblank_token = $token;
10289 $last_nonblank_type = $type;
10290 $last_nonblank_block_type = $block_type;
10293 # unset the continued-quote flag since it only applies to the
10294 # first token, and we want to resume normal formatting if
10295 # there are additional tokens on the line
10296 $in_continued_quote = 0;
10298 } # end of loop over all tokens in this 'line_of_tokens'
10300 # we have to flush ..
10303 # if there is a side comment
10304 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
10306 # if this line ends in a quote
10307 # NOTE: This is critically important for insuring that quoted lines
10308 # do not get processed by things like -sot and -sct
10311 # if this is a VERSION statement
10312 || $is_VERSION_statement
10314 # to keep a label at the end of a line
10317 # if we are instructed to keep all old line breaks
10318 || !$rOpts->{'delete-old-newlines'}
10321 destroy_one_line_block();
10322 output_line_to_go();
10325 # mark old line breakpoints in current output stream
10326 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
10327 $old_breakpoint_to_go[$max_index_to_go] = 1;
10329 } ## end sub print_line_of_tokens
10330 } ## end block print_line_of_tokens
10332 # sub output_line_to_go sends one logical line of tokens on down the
10333 # pipeline to the VerticalAligner package, breaking the line into continuation
10334 # lines as necessary. The line of tokens is ready to go in the "to_go"
10336 sub output_line_to_go {
10338 # debug stuff; this routine can be called from many points
10339 FORMATTER_DEBUG_FLAG_OUTPUT && do {
10340 my ( $a, $b, $c ) = caller;
10342 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
10344 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
10345 write_diagnostics("$output_str\n");
10348 # just set a tentative breakpoint if we might be in a one-line block
10349 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10350 set_forced_breakpoint($max_index_to_go);
10354 my $cscw_block_comment;
10355 $cscw_block_comment = add_closing_side_comment()
10356 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
10358 my $comma_arrow_count_contained = match_opening_and_closing_tokens();
10360 # tell the -lp option we are outputting a batch so it can close
10361 # any unfinished items in its stack
10364 # If this line ends in a code block brace, set breaks at any
10365 # previous closing code block braces to breakup a chain of code
10366 # blocks on one line. This is very rare but can happen for
10367 # user-defined subs. For example we might be looking at this:
10368 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
10369 my $saw_good_break = 0; # flag to force breaks even if short line
10372 # looking for opening or closing block brace
10373 $block_type_to_go[$max_index_to_go]
10375 # but not one of these which are never duplicated on a line:
10376 # until|while|for|if|elsif|else
10377 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
10380 my $lev = $nesting_depth_to_go[$max_index_to_go];
10382 # Walk backwards from the end and
10383 # set break at any closing block braces at the same level.
10384 # But quit if we are not in a chain of blocks.
10385 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
10386 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
10387 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
10389 if ( $block_type_to_go[$i] ) {
10390 if ( $tokens_to_go[$i] eq '}' ) {
10391 set_forced_breakpoint($i);
10392 $saw_good_break = 1;
10396 # quit if we see anything besides words, function, blanks
10398 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
10403 my $imax = $max_index_to_go;
10405 # trim any blank tokens
10406 if ( $max_index_to_go >= 0 ) {
10407 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
10408 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
10411 # anything left to write?
10412 if ( $imin <= $imax ) {
10414 # add a blank line before certain key types but not after a comment
10415 if ( $last_line_leading_type !~ /^[#]/ ) {
10416 my $want_blank = 0;
10417 my $leading_token = $tokens_to_go[$imin];
10418 my $leading_type = $types_to_go[$imin];
10420 # blank lines before subs except declarations and one-liners
10421 # MCONVERSION LOCATION - for sub tokenization change
10422 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
10423 $want_blank = $rOpts->{'blank-lines-before-subs'}
10425 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10426 $imax ) !~ /^[\;\}]$/
10430 # break before all package declarations
10431 # MCONVERSION LOCATION - for tokenizaton change
10432 elsif ($leading_token =~ /^(package\s)/
10433 && $leading_type eq 'i' )
10435 $want_blank = $rOpts->{'blank-lines-before-packages'};
10438 # break before certain key blocks except one-liners
10439 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
10440 $want_blank = $rOpts->{'blank-lines-before-subs'}
10442 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10447 # Break before certain block types if we haven't had a
10448 # break at this level for a while. This is the
10449 # difficult decision..
10450 elsif ($leading_type eq 'k'
10451 && $last_line_leading_type ne 'b'
10452 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
10454 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
10455 if ( !defined($lc) ) { $lc = 0 }
10458 $rOpts->{'blanks-before-blocks'}
10459 && $lc >= $rOpts->{'long-block-line-count'}
10460 && $file_writer_object->get_consecutive_nonblank_lines() >=
10461 $rOpts->{'long-block-line-count'}
10463 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10468 # Check for blank lines wanted before a closing brace
10469 if ( $leading_token eq '}' ) {
10470 if ( $rOpts->{'blank-lines-before-closing-block'}
10471 && $block_type_to_go[$imin]
10472 && $block_type_to_go[$imin] =~
10473 /$blank_lines_before_closing_block_pattern/ )
10475 my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
10476 if ( $nblanks > $want_blank ) {
10477 $want_blank = $nblanks;
10484 # future: send blank line down normal path to VerticalAligner
10485 Perl::Tidy::VerticalAligner::flush();
10486 $file_writer_object->require_blank_code_lines($want_blank);
10490 # update blank line variables and count number of consecutive
10491 # non-blank, non-comment lines at this level
10492 $last_last_line_leading_level = $last_line_leading_level;
10493 $last_line_leading_level = $levels_to_go[$imin];
10494 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
10495 $last_line_leading_type = $types_to_go[$imin];
10496 if ( $last_line_leading_level == $last_last_line_leading_level
10497 && $last_line_leading_type ne 'b'
10498 && $last_line_leading_type ne '#'
10499 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
10501 $nonblank_lines_at_depth[$last_line_leading_level]++;
10504 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
10507 FORMATTER_DEBUG_FLAG_FLUSH && do {
10508 my ( $package, $file, $line ) = caller;
10510 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
10513 # add a couple of extra terminal blank tokens
10516 # set all forced breakpoints for good list formatting
10517 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
10521 || $old_line_count_in_batch > 1
10523 # must always call scan_list() with unbalanced batches because it
10524 # is maintaining some stacks
10525 || is_unbalanced_batch()
10527 # call scan_list if we might want to break at commas
10529 $comma_count_in_batch
10530 && ( $rOpts_maximum_fields_per_table > 0
10531 || $rOpts_comma_arrow_breakpoints == 0 )
10534 # call scan_list if user may want to break open some one-line
10536 || ( $comma_arrow_count_contained
10537 && $rOpts_comma_arrow_breakpoints != 3 )
10540 ## This caused problems in one version of perl for unknown reasons:
10541 ## $saw_good_break ||= scan_list();
10542 my $sgb = scan_list();
10543 $saw_good_break ||= $sgb;
10546 # let $ri_first and $ri_last be references to lists of
10547 # first and last tokens of line fragments to output..
10548 my ( $ri_first, $ri_last );
10550 # write a single line if..
10553 # we aren't allowed to add any newlines
10554 !$rOpts_add_newlines
10556 # or, we don't already have an interior breakpoint
10557 # and we didn't see a good breakpoint
10559 !$forced_breakpoint_count
10560 && !$saw_good_break
10562 # and this line is 'short'
10567 @$ri_first = ($imin);
10568 @$ri_last = ($imax);
10571 # otherwise use multiple lines
10574 ( $ri_first, $ri_last, my $colon_count ) =
10575 set_continuation_breaks($saw_good_break);
10577 break_all_chain_tokens( $ri_first, $ri_last );
10579 break_equals( $ri_first, $ri_last );
10581 # now we do a correction step to clean this up a bit
10582 # (The only time we would not do this is for debugging)
10583 if ( $rOpts->{'recombine'} ) {
10584 ( $ri_first, $ri_last ) =
10585 recombine_breakpoints( $ri_first, $ri_last );
10588 insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
10591 # do corrector step if -lp option is used
10592 my $do_not_pad = 0;
10593 if ($rOpts_line_up_parentheses) {
10594 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
10596 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
10598 # Insert any requested blank lines after an opening brace. We have to
10599 # skip back before any side comment to find the terminal token
10601 for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
10602 next if $types_to_go[$iterm] eq '#';
10603 next if $types_to_go[$iterm] eq 'b';
10607 # write requested number of blank lines after an opening block brace
10608 if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
10609 if ( $rOpts->{'blank-lines-after-opening-block'}
10610 && $block_type_to_go[$iterm]
10611 && $block_type_to_go[$iterm] =~
10612 /$blank_lines_after_opening_block_pattern/ )
10614 my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
10615 Perl::Tidy::VerticalAligner::flush();
10616 $file_writer_object->require_blank_code_lines($nblanks);
10621 prepare_for_new_input_lines();
10623 # output any new -cscw block comment
10624 if ($cscw_block_comment) {
10626 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
10630 sub note_added_semicolon {
10631 $last_added_semicolon_at = $input_line_number;
10632 if ( $added_semicolon_count == 0 ) {
10633 $first_added_semicolon_at = $last_added_semicolon_at;
10635 $added_semicolon_count++;
10636 write_logfile_entry("Added ';' here\n");
10639 sub note_deleted_semicolon {
10640 $last_deleted_semicolon_at = $input_line_number;
10641 if ( $deleted_semicolon_count == 0 ) {
10642 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
10644 $deleted_semicolon_count++;
10645 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
10648 sub note_embedded_tab {
10649 $embedded_tab_count++;
10650 $last_embedded_tab_at = $input_line_number;
10651 if ( !$first_embedded_tab_at ) {
10652 $first_embedded_tab_at = $last_embedded_tab_at;
10655 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
10656 write_logfile_entry("Embedded tabs in quote or pattern\n");
10660 sub starting_one_line_block {
10662 # after seeing an opening curly brace, look for the closing brace
10663 # and see if the entire block will fit on a line. This routine is
10664 # not always right because it uses the old whitespace, so a check
10665 # is made later (at the closing brace) to make sure we really
10666 # have a one-line block. We have to do this preliminary check,
10667 # though, because otherwise we would always break at a semicolon
10668 # within a one-line block if the block contains multiple statements.
10670 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
10674 # kill any current block - we can only go 1 deep
10675 destroy_one_line_block();
10678 # 1=distance from start of block to opening brace exceeds line length
10683 # shouldn't happen: there must have been a prior call to
10684 # store_token_to_go to put the opening brace in the output stream
10685 if ( $max_index_to_go < 0 ) {
10686 warning("program bug: store_token_to_go called incorrectly\n");
10687 report_definite_bug();
10691 # cannot use one-line blocks with cuddled else/elsif lines
10692 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
10697 my $block_type = $$rblock_type[$j];
10699 # find the starting keyword for this block (such as 'if', 'else', ...)
10701 if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
10702 $i_start = $max_index_to_go;
10705 # the previous nonblank token should start these block types
10706 elsif (( $last_last_nonblank_token_to_go eq $block_type )
10707 || ( $block_type =~ /^sub\b/ )
10708 || $block_type =~ /\(\)/ )
10710 $i_start = $last_last_nonblank_index_to_go;
10712 # For signatures and extended syntax ...
10713 # If this brace follows a parenthesized list, we should look back to
10714 # find the keyword before the opening paren because otherwise we might
10715 # form a one line block which stays intack, and cause the parenthesized
10716 # expression to break open. That looks bad. However, actually
10717 # searching for the opening paren is slow and tedius.
10718 # The actual keyword is often at the start of a line, but might not be.
10719 # For example, we might have an anonymous sub with signature list
10720 # following a =>. It is safe to mark the start anywhere before the
10721 # opening paren, so we just go back to the prevoious break (or start of
10722 # the line) if that is before the opening paren. The minor downside is
10723 # that we may very occasionally break open a block unnecessarily.
10724 if ( $tokens_to_go[$i_start] eq ')' ) {
10725 $i_start = $index_max_forced_break + 1;
10726 if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
10727 my $lev = $levels_to_go[$i_start];
10728 if ( $lev > $level ) { return 0 }
10732 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
10734 # For something like "if (xxx) {", the keyword "if" will be
10735 # just after the most recent break. This will be 0 unless
10736 # we have just killed a one-line block and are starting another.
10738 # Note: cannot use inext_index_to_go[] here because that array
10739 # is still being constructed.
10740 $i_start = $index_max_forced_break + 1;
10741 if ( $types_to_go[$i_start] eq 'b' ) {
10745 # Patch to avoid breaking short blocks defined with extended_syntax:
10746 # Strip off any trailing () which was added in the parser to mark
10747 # the opening keyword. For example, in the following
10748 # create( TypeFoo $e) {$bubba}
10749 # the blocktype would be marked as create()
10750 my $stripped_block_type = $block_type;
10751 $stripped_block_type =~ s/\(\)$//;
10753 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
10758 # patch for SWITCH/CASE to retain one-line case/when blocks
10759 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
10761 # Note: cannot use inext_index_to_go[] here because that array
10762 # is still being constructed.
10763 $i_start = $index_max_forced_break + 1;
10764 if ( $types_to_go[$i_start] eq 'b' ) {
10767 unless ( $tokens_to_go[$i_start] eq $block_type ) {
10776 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
10780 # see if length is too long to even start
10781 if ( $pos > maximum_line_length($i_start) ) {
10785 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
10787 # old whitespace could be arbitrarily large, so don't use it
10788 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
10789 else { $pos += rtoken_length($i) }
10791 # Return false result if we exceed the maximum line length,
10792 if ( $pos > maximum_line_length($i_start) ) {
10796 # or encounter another opening brace before finding the closing brace.
10797 elsif ($$rtokens[$i] eq '{'
10798 && $$rtoken_type[$i] eq '{'
10799 && $$rblock_type[$i] )
10804 # if we find our closing brace..
10805 elsif ($$rtokens[$i] eq '}'
10806 && $$rtoken_type[$i] eq '}'
10807 && $$rblock_type[$i] )
10810 # be sure any trailing comment also fits on the line
10812 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
10814 # Patch for one-line sort/map/grep/eval blocks with side comments:
10815 # We will ignore the side comment length for sort/map/grep/eval
10816 # because this can lead to statements which change every time
10817 # perltidy is run. Here is an example from Denis Moskowitz which
10818 # oscillates between these two states without this patch:
10821 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10825 ## $_->foo ne 'bar'
10826 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10830 # When the first line is input it gets broken apart by the main
10831 # line break logic in sub print_line_of_tokens.
10832 # When the second line is input it gets recombined by
10833 # print_line_of_tokens and passed to the output routines. The
10834 # output routines (set_continuation_breaks) do not break it apart
10835 # because the bond strengths are set to the highest possible value
10836 # for grep/map/eval/sort blocks, so the first version gets output.
10837 # It would be possible to fix this by changing bond strengths,
10838 # but they are high to prevent errors in older versions of perl.
10840 if ( $$rtoken_type[$i_nonblank] eq '#'
10841 && !$is_sort_map_grep{$block_type} )
10844 $pos += rtoken_length($i_nonblank);
10846 if ( $i_nonblank > $i + 1 ) {
10848 # source whitespace could be anything, assume
10849 # at least one space before the hash on output
10850 if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 }
10851 else { $pos += rtoken_length( $i + 1 ) }
10854 if ( $pos >= maximum_line_length($i_start) ) {
10859 # ok, it's a one-line block
10860 create_one_line_block( $i_start, 20 );
10864 # just keep going for other characters
10869 # Allow certain types of new one-line blocks to form by joining
10870 # input lines. These can be safely done, but for other block types,
10871 # we keep old one-line blocks but do not form new ones. It is not
10872 # always a good idea to make as many one-line blocks as possible,
10873 # so other types are not done. The user can always use -mangle.
10874 if ( $is_sort_map_grep_eval{$block_type} ) {
10875 create_one_line_block( $i_start, 1 );
10881 sub unstore_token_to_go {
10883 # remove most recent token from output stream
10884 if ( $max_index_to_go > 0 ) {
10885 $max_index_to_go--;
10888 $max_index_to_go = UNDEFINED_INDEX;
10893 sub want_blank_line {
10895 $file_writer_object->want_blank_line() unless $in_format_skipping_section;
10898 sub write_unindented_line {
10900 $file_writer_object->write_line( $_[0] );
10905 # Undo continuation indentation in certain sequences
10906 # For example, we can undo continuation indentation in sort/map/grep chains
10907 # my $dat1 = pack( "n*",
10908 # map { $_, $lookup->{$_} }
10909 # sort { $a <=> $b }
10910 # grep { $lookup->{$_} ne $default } keys %$lookup );
10911 # To align the map/sort/grep keywords like this:
10912 # my $dat1 = pack( "n*",
10913 # map { $_, $lookup->{$_} }
10914 # sort { $a <=> $b }
10915 # grep { $lookup->{$_} ne $default } keys %$lookup );
10916 my ( $ri_first, $ri_last ) = @_;
10917 my ( $line_1, $line_2, $lev_last );
10918 my $this_line_is_semicolon_terminated;
10919 my $max_line = @$ri_first - 1;
10921 # looking at each line of this batch..
10922 # We are looking at leading tokens and looking for a sequence
10923 # all at the same level and higher level than enclosing lines.
10924 foreach my $line ( 0 .. $max_line ) {
10926 my $ibeg = $$ri_first[$line];
10927 my $lev = $levels_to_go[$ibeg];
10930 # if we have started a chain..
10933 # see if it continues..
10934 if ( $lev == $lev_last ) {
10935 if ( $types_to_go[$ibeg] eq 'k'
10936 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10939 # chain continues...
10940 # check for chain ending at end of a statement
10941 if ( $line == $max_line ) {
10943 # see of this line ends a statement
10944 my $iend = $$ri_last[$line];
10945 $this_line_is_semicolon_terminated =
10946 $types_to_go[$iend] eq ';'
10948 # with possible side comment
10949 || ( $types_to_go[$iend] eq '#'
10950 && $iend - $ibeg >= 2
10951 && $types_to_go[ $iend - 2 ] eq ';'
10952 && $types_to_go[ $iend - 1 ] eq 'b' );
10954 $line_2 = $line if ($this_line_is_semicolon_terminated);
10962 elsif ( $lev < $lev_last ) {
10964 # chain ends with previous line
10965 $line_2 = $line - 1;
10967 elsif ( $lev > $lev_last ) {
10973 # undo the continuation indentation if a chain ends
10974 if ( defined($line_2) && defined($line_1) ) {
10975 my $continuation_line_count = $line_2 - $line_1 + 1;
10976 @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10977 (0) x ($continuation_line_count);
10978 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10979 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
10984 # not in a chain yet..
10987 # look for start of a new sort/map/grep chain
10988 if ( $lev > $lev_last ) {
10989 if ( $types_to_go[$ibeg] eq 'k'
10990 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
11003 # If there is a single, long parameter within parens, like this:
11005 # $self->command( "/msg "
11006 # . $infoline->chan
11007 # . " You said $1, but did you know that it's square was "
11008 # . $1 * $1 . " ?" );
11010 # we can remove the continuation indentation of the 2nd and higher lines
11011 # to achieve this effect, which is more pleasing:
11013 # $self->command("/msg "
11014 # . $infoline->chan
11015 # . " You said $1, but did you know that it's square was "
11016 # . $1 * $1 . " ?");
11018 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
11019 my $max_line = @$ri_first - 1;
11021 # must be multiple lines
11022 return unless $max_line > $line_open;
11024 my $lev_start = $levels_to_go[$i_start];
11025 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
11027 # see if all additional lines in this container have continuation
11030 my $line_1 = 1 + $line_open;
11031 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
11032 my $ibeg = $$ri_first[$n];
11033 my $iend = $$ri_last[$n];
11034 if ( $ibeg eq $closing_index ) { $n--; last }
11035 return if ( $lev_start != $levels_to_go[$ibeg] );
11036 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
11037 last if ( $closing_index <= $iend );
11040 # we can reduce the indentation of all continuation lines
11041 my $continuation_line_count = $n - $line_open;
11042 @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
11043 (0) x ($continuation_line_count);
11044 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
11045 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
11050 # insert $pad_spaces before token number $ipad
11051 my ( $ipad, $pad_spaces ) = @_;
11052 if ( $pad_spaces > 0 ) {
11053 $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
11055 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
11056 $tokens_to_go[$ipad] = "";
11064 $token_lengths_to_go[$ipad] += $pad_spaces;
11065 for ( my $i = $ipad ; $i <= $max_index_to_go ; $i++ ) {
11066 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
11075 @_ = qw( + - * / );
11076 @is_math_op{@_} = (1) x scalar(@_);
11079 sub set_logical_padding {
11081 # Look at a batch of lines and see if extra padding can improve the
11082 # alignment when there are certain leading operators. Here is an
11083 # example, in which some extra space is introduced before
11084 # '( $year' to make it line up with the subsequent lines:
11086 # if ( ( $Year < 1601 )
11087 # || ( $Year > 2899 )
11088 # || ( $EndYear < 1601 )
11089 # || ( $EndYear > 2899 ) )
11091 # &Error_OutOfRange;
11094 my ( $ri_first, $ri_last ) = @_;
11095 my $max_line = @$ri_first - 1;
11097 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line,
11099 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
11101 # looking at each line of this batch..
11102 foreach $line ( 0 .. $max_line - 1 ) {
11104 # see if the next line begins with a logical operator
11105 $ibeg = $$ri_first[$line];
11106 $iend = $$ri_last[$line];
11107 $ibeg_next = $$ri_first[ $line + 1 ];
11108 $tok_next = $tokens_to_go[$ibeg_next];
11109 $type_next = $types_to_go[$ibeg_next];
11111 $has_leading_op_next = ( $tok_next =~ /^\w/ )
11112 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
11113 : $is_chain_operator{$type_next}; # and, or
11115 next unless ($has_leading_op_next);
11117 # next line must not be at lesser depth
11119 if ( $nesting_depth_to_go[$ibeg] >
11120 $nesting_depth_to_go[$ibeg_next] );
11122 # identify the token in this line to be padded on the left
11125 # handle lines at same depth...
11126 if ( $nesting_depth_to_go[$ibeg] ==
11127 $nesting_depth_to_go[$ibeg_next] )
11130 # if this is not first line of the batch ...
11133 # and we have leading operator..
11134 next if $has_leading_op;
11136 # Introduce padding if..
11137 # 1. the previous line is at lesser depth, or
11138 # 2. the previous line ends in an assignment
11139 # 3. the previous line ends in a 'return'
11140 # 4. the previous line ends in a comma
11141 # Example 1: previous line at lesser depth
11142 # if ( ( $Year < 1601 ) # <- we are here but
11143 # || ( $Year > 2899 ) # list has not yet
11144 # || ( $EndYear < 1601 ) # collapsed vertically
11145 # || ( $EndYear > 2899 ) )
11148 # Example 2: previous line ending in assignment:
11150 # $year % 4 ? 0 # <- We are here
11151 # : $year % 100 ? 1
11152 # : $year % 400 ? 0
11155 # Example 3: previous line ending in comma:
11162 # be sure levels agree (do not indent after an indented 'if')
11164 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
11166 # allow padding on first line after a comma but only if:
11167 # (1) this is line 2 and
11168 # (2) there are at more than three lines and
11169 # (3) lines 3 and 4 have the same leading operator
11170 # These rules try to prevent padding within a long
11171 # comma-separated list.
11173 if ( $types_to_go[$iendm] eq ','
11177 my $ibeg_next_next = $$ri_first[ $line + 2 ];
11178 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
11179 $ok_comma = $tok_next_next eq $tok_next;
11184 $is_assignment{ $types_to_go[$iendm] }
11186 || ( $nesting_depth_to_go[$ibegm] <
11187 $nesting_depth_to_go[$ibeg] )
11188 || ( $types_to_go[$iendm] eq 'k'
11189 && $tokens_to_go[$iendm] eq 'return' )
11192 # we will add padding before the first token
11196 # for first line of the batch..
11199 # WARNING: Never indent if first line is starting in a
11200 # continued quote, which would change the quote.
11201 next if $starting_in_quote;
11203 # if this is text after closing '}'
11204 # then look for an interior token to pad
11205 if ( $types_to_go[$ibeg] eq '}' ) {
11209 # otherwise, we might pad if it looks really good
11212 # we might pad token $ibeg, so be sure that it
11213 # is at the same depth as the next line.
11215 if ( $nesting_depth_to_go[$ibeg] !=
11216 $nesting_depth_to_go[$ibeg_next] );
11218 # We can pad on line 1 of a statement if at least 3
11219 # lines will be aligned. Otherwise, it
11220 # can look very confusing.
11222 # We have to be careful not to pad if there are too few
11223 # lines. The current rule is:
11224 # (1) in general we require at least 3 consecutive lines
11225 # with the same leading chain operator token,
11226 # (2) but an exception is that we only require two lines
11227 # with leading colons if there are no more lines. For example,
11228 # the first $i in the following snippet would get padding
11229 # by the second rule:
11231 # $i == 1 ? ( "First", "Color" )
11232 # : $i == 2 ? ( "Then", "Rarity" )
11233 # : ( "Then", "Name" );
11235 if ( $max_line > 1 ) {
11236 my $leading_token = $tokens_to_go[$ibeg_next];
11239 # never indent line 1 of a '.' series because
11240 # previous line is most likely at same level.
11241 # TODO: we should also look at the leasing_spaces
11242 # of the last output line and skip if it is same
11244 next if ( $leading_token eq '.' );
11247 foreach my $l ( 2 .. 3 ) {
11248 last if ( $line + $l > $max_line );
11249 my $ibeg_next_next = $$ri_first[ $line + $l ];
11250 if ( $tokens_to_go[$ibeg_next_next] ne
11253 $tokens_differ = 1;
11258 next if ($tokens_differ);
11259 next if ( $count < 3 && $leading_token ne ':' );
11269 # find interior token to pad if necessary
11270 if ( !defined($ipad) ) {
11272 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
11274 # find any unclosed container
11276 unless ( $type_sequence_to_go[$i]
11277 && $mate_index_to_go[$i] > $iend );
11279 # find next nonblank token to pad
11280 $ipad = $inext_to_go[$i];
11281 last if ( $ipad > $iend );
11286 # We cannot pad a leading token at the lowest level because
11287 # it could cause a bug in which the starting indentation
11288 # level is guessed incorrectly each time the code is run
11289 # though perltidy, thus causing the code to march off to
11290 # the right. For example, the following snippet would have
11293 ## ov_method mycan( $package, '(""' ), $package
11294 ## or ov_method mycan( $package, '(0+' ), $package
11295 ## or ov_method mycan( $package, '(bool' ), $package
11296 ## or ov_method mycan( $package, '(nomethod' ), $package;
11298 # If this snippet is within a block this won't happen
11299 # unless the user just processes the snippet alone within
11300 # an editor. In that case either the user will see and
11301 # fix the problem or it will be corrected next time the
11302 # entire file is processed with perltidy.
11303 next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
11305 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
11306 ## IT DID MORE HARM THAN GOOD
11308 ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
11311 ##? # do not put leading padding for just 2 lines of math
11312 ##? if ( $ipad == $ibeg
11314 ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
11315 ##? && $is_math_op{$type_next}
11316 ##? && $line + 2 <= $max_line )
11318 ##? my $ibeg_next_next = $$ri_first[ $line + 2 ];
11319 ##? my $type_next_next = $types_to_go[$ibeg_next_next];
11320 ##? next if !$is_math_op{$type_next_next};
11323 # next line must not be at greater depth
11324 my $iend_next = $$ri_last[ $line + 1 ];
11326 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
11327 $nesting_depth_to_go[$ipad] );
11329 # lines must be somewhat similar to be padded..
11330 my $inext_next = $inext_to_go[$ibeg_next];
11331 my $type = $types_to_go[$ipad];
11332 my $type_next = $types_to_go[ $ipad + 1 ];
11334 # see if there are multiple continuation lines
11335 my $logical_continuation_lines = 1;
11336 if ( $line + 2 <= $max_line ) {
11337 my $leading_token = $tokens_to_go[$ibeg_next];
11338 my $ibeg_next_next = $$ri_first[ $line + 2 ];
11339 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
11340 && $nesting_depth_to_go[$ibeg_next] eq
11341 $nesting_depth_to_go[$ibeg_next_next] )
11343 $logical_continuation_lines++;
11347 # see if leading types match
11348 my $types_match = $types_to_go[$inext_next] eq $type;
11349 my $matches_without_bang;
11351 # if first line has leading ! then compare the following token
11352 if ( !$types_match && $type eq '!' ) {
11353 $types_match = $matches_without_bang =
11354 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
11359 # either we have multiple continuation lines to follow
11360 # and we are not padding the first token
11361 ( $logical_continuation_lines > 1 && $ipad > 0 )
11369 # and keywords must match if keyword
11372 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
11378 #----------------------begin special checks--------------
11381 # A check is needed before we can make the pad.
11382 # If we are in a list with some long items, we want each
11383 # item to stand out. So in the following example, the
11384 # first line beginning with '$casefold->' would look good
11385 # padded to align with the next line, but then it
11386 # would be indented more than the last line, so we
11390 # $casefold->{code} eq '0041'
11391 # && $casefold->{status} eq 'C'
11392 # && $casefold->{mapping} eq '0061',
11397 # It would be faster, and almost as good, to use a comma
11398 # count, and not pad if comma_count > 1 and the previous
11399 # line did not end with a comma.
11403 my $ibg = $$ri_first[ $line + 1 ];
11404 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
11406 # just use simplified formula for leading spaces to avoid
11407 # needless sub calls
11408 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
11410 # look at each line beyond the next ..
11412 foreach $l ( $line + 2 .. $max_line ) {
11413 my $ibg = $$ri_first[$l];
11415 # quit looking at the end of this container
11417 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
11418 || ( $nesting_depth_to_go[$ibg] < $depth );
11420 # cannot do the pad if a later line would be
11422 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
11428 # don't pad if we end in a broken list
11429 if ( $l == $max_line ) {
11430 my $i2 = $$ri_last[$l];
11431 if ( $types_to_go[$i2] eq '#' ) {
11432 my $i1 = $$ri_first[$l];
11435 terminal_type( \@types_to_go, \@block_type_to_go,
11442 # a minus may introduce a quoted variable, and we will
11443 # add the pad only if this line begins with a bare word,
11444 # such as for the word 'Button' here:
11446 # Button => "Print letter \"~$_\"",
11447 # -command => [ sub { print "$_[0]\n" }, $_ ],
11448 # -accelerator => "Meta+$_"
11451 # On the other hand, if 'Button' is quoted, it looks best
11454 # 'Button' => "Print letter \"~$_\"",
11455 # -command => [ sub { print "$_[0]\n" }, $_ ],
11456 # -accelerator => "Meta+$_"
11458 if ( $types_to_go[$ibeg_next] eq 'm' ) {
11459 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
11462 next unless $ok_to_pad;
11464 #----------------------end special check---------------
11466 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
11467 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
11468 $pad_spaces = $length_2 - $length_1;
11470 # If the first line has a leading ! and the second does
11471 # not, then remove one space to try to align the next
11472 # leading characters, which are often the same. For example:
11474 # || $ts == $self->Holder
11475 # || $self->Holder->Type eq "Arena" )
11477 # This usually helps readability, but if there are subsequent
11478 # ! operators things will still get messed up. For example:
11480 # if ( !exists $Net::DNS::typesbyname{$qtype}
11481 # && exists $Net::DNS::classesbyname{$qtype}
11482 # && !exists $Net::DNS::classesbyname{$qclass}
11483 # && exists $Net::DNS::typesbyname{$qclass} )
11484 # We can't fix that.
11485 if ($matches_without_bang) { $pad_spaces-- }
11487 # make sure this won't change if -lp is used
11488 my $indentation_1 = $leading_spaces_to_go[$ibeg];
11489 if ( ref($indentation_1) ) {
11490 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
11491 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
11492 unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 )
11499 # we might be able to handle a pad of -1 by removing a blank
11501 if ( $pad_spaces < 0 ) {
11503 if ( $pad_spaces == -1 ) {
11504 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
11506 pad_token( $ipad - 1, $pad_spaces );
11512 # now apply any padding for alignment
11513 if ( $ipad >= 0 && $pad_spaces ) {
11515 my $length_t = total_line_length( $ibeg, $iend );
11516 if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
11518 pad_token( $ipad, $pad_spaces );
11526 $has_leading_op = $has_leading_op_next;
11527 } # end of loop over lines
11532 sub correct_lp_indentation {
11534 # When the -lp option is used, we need to make a last pass through
11535 # each line to correct the indentation positions in case they differ
11536 # from the predictions. This is necessary because perltidy uses a
11537 # predictor/corrector method for aligning with opening parens. The
11538 # predictor is usually good, but sometimes stumbles. The corrector
11539 # tries to patch things up once the actual opening paren locations
11541 my ( $ri_first, $ri_last ) = @_;
11542 my $do_not_pad = 0;
11544 # Note on flag '$do_not_pad':
11545 # We want to avoid a situation like this, where the aligner inserts
11546 # whitespace before the '=' to align it with a previous '=', because
11547 # otherwise the parens might become mis-aligned in a situation like
11548 # this, where the '=' has become aligned with the previous line,
11549 # pushing the opening '(' forward beyond where we want it.
11551 # $mkFloor::currentRoom = '';
11552 # $mkFloor::c_entry = $c->Entry(
11554 # -relief => 'sunken',
11558 # We leave it to the aligner to decide how to do this.
11560 # first remove continuation indentation if appropriate
11561 my $max_line = @$ri_first - 1;
11563 # looking at each line of this batch..
11564 my ( $ibeg, $iend );
11566 foreach $line ( 0 .. $max_line ) {
11567 $ibeg = $$ri_first[$line];
11568 $iend = $$ri_last[$line];
11570 # looking at each token in this output line..
11572 foreach $i ( $ibeg .. $iend ) {
11574 # How many space characters to place before this token
11575 # for special alignment. Actual padding is done in the
11578 # looking for next unvisited indentation item
11579 my $indentation = $leading_spaces_to_go[$i];
11580 if ( !$indentation->get_MARKED() ) {
11581 $indentation->set_MARKED(1);
11583 # looking for indentation item for which we are aligning
11584 # with parens, braces, and brackets
11585 next unless ( $indentation->get_ALIGN_PAREN() );
11587 # skip closed container on this line
11588 if ( $i > $ibeg ) {
11589 my $im = max( $ibeg, $iprev_to_go[$i] );
11590 if ( $type_sequence_to_go[$im]
11591 && $mate_index_to_go[$im] <= $iend )
11597 if ( $line == 1 && $i == $ibeg ) {
11601 # Ok, let's see what the error is and try to fix it
11603 my $predicted_pos = $indentation->get_SPACES();
11604 if ( $i > $ibeg ) {
11606 # token is mid-line - use length to previous token
11607 $actual_pos = total_line_length( $ibeg, $i - 1 );
11609 # for mid-line token, we must check to see if all
11610 # additional lines have continuation indentation,
11611 # and remove it if so. Otherwise, we do not get
11613 my $closing_index = $indentation->get_CLOSED();
11614 if ( $closing_index > $iend ) {
11615 my $ibeg_next = $$ri_first[ $line + 1 ];
11616 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
11617 undo_lp_ci( $line, $i, $closing_index, $ri_first,
11622 elsif ( $line > 0 ) {
11624 # handle case where token starts a new line;
11625 # use length of previous line
11626 my $ibegm = $$ri_first[ $line - 1 ];
11627 my $iendm = $$ri_last[ $line - 1 ];
11628 $actual_pos = total_line_length( $ibegm, $iendm );
11632 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
11636 # token is first character of first line of batch
11637 $actual_pos = $predicted_pos;
11640 my $move_right = $actual_pos - $predicted_pos;
11642 # done if no error to correct (gnu2.t)
11643 if ( $move_right == 0 ) {
11644 $indentation->set_RECOVERABLE_SPACES($move_right);
11648 # if we have not seen closure for this indentation in
11649 # this batch, we can only pass on a request to the
11651 my $closing_index = $indentation->get_CLOSED();
11653 if ( $closing_index < 0 ) {
11654 $indentation->set_RECOVERABLE_SPACES($move_right);
11658 # If necessary, look ahead to see if there is really any
11659 # leading whitespace dependent on this whitespace, and
11660 # also find the longest line using this whitespace.
11661 # Since it is always safe to move left if there are no
11662 # dependents, we only need to do this if we may have
11663 # dependent nodes or need to move right.
11665 my $right_margin = 0;
11666 my $have_child = $indentation->get_HAVE_CHILD();
11668 my %saw_indentation;
11669 my $line_count = 1;
11670 $saw_indentation{$indentation} = $indentation;
11672 if ( $have_child || $move_right > 0 ) {
11674 my $max_length = 0;
11675 if ( $i == $ibeg ) {
11676 $max_length = total_line_length( $ibeg, $iend );
11679 # look ahead at the rest of the lines of this batch..
11681 foreach $line_t ( $line + 1 .. $max_line ) {
11682 my $ibeg_t = $$ri_first[$line_t];
11683 my $iend_t = $$ri_last[$line_t];
11684 last if ( $closing_index <= $ibeg_t );
11686 # remember all different indentation objects
11687 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
11688 $saw_indentation{$indentation_t} = $indentation_t;
11691 # remember longest line in the group
11692 my $length_t = total_line_length( $ibeg_t, $iend_t );
11693 if ( $length_t > $max_length ) {
11694 $max_length = $length_t;
11697 $right_margin = maximum_line_length($ibeg) - $max_length;
11698 if ( $right_margin < 0 ) { $right_margin = 0 }
11701 my $first_line_comma_count =
11702 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
11703 my $comma_count = $indentation->get_COMMA_COUNT();
11704 my $arrow_count = $indentation->get_ARROW_COUNT();
11706 # This is a simple approximate test for vertical alignment:
11707 # if we broke just after an opening paren, brace, bracket,
11708 # and there are 2 or more commas in the first line,
11709 # and there are no '=>'s,
11710 # then we are probably vertically aligned. We could set
11711 # an exact flag in sub scan_list, but this is good
11713 my $indentation_count = keys %saw_indentation;
11714 my $is_vertically_aligned =
11716 && $first_line_comma_count > 1
11717 && $indentation_count == 1
11718 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
11720 # Make the move if possible ..
11723 # we can always move left
11726 # but we should only move right if we are sure it will
11727 # not spoil vertical alignment
11728 || ( $comma_count == 0 )
11729 || ( $comma_count > 0 && !$is_vertically_aligned )
11733 ( $move_right <= $right_margin )
11737 foreach ( keys %saw_indentation ) {
11738 $saw_indentation{$_}
11739 ->permanently_decrease_AVAILABLE_SPACES( -$move );
11743 # Otherwise, record what we want and the vertical aligner
11744 # will try to recover it.
11746 $indentation->set_RECOVERABLE_SPACES($move_right);
11751 return $do_not_pad;
11754 # flush is called to output any tokens in the pipeline, so that
11755 # an alternate source of lines can be written in the correct order
11758 destroy_one_line_block();
11759 output_line_to_go();
11760 Perl::Tidy::VerticalAligner::flush();
11763 sub reset_block_text_accumulator {
11765 # save text after 'if' and 'elsif' to append after 'else'
11766 if ($accumulating_text_for_block) {
11768 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
11769 push @{$rleading_block_if_elsif_text}, $leading_block_text;
11772 $accumulating_text_for_block = "";
11773 $leading_block_text = "";
11774 $leading_block_text_level = 0;
11775 $leading_block_text_length_exceeded = 0;
11776 $leading_block_text_line_number = 0;
11777 $leading_block_text_line_length = 0;
11780 sub set_block_text_accumulator {
11782 $accumulating_text_for_block = $tokens_to_go[$i];
11783 if ( $accumulating_text_for_block !~ /^els/ ) {
11784 $rleading_block_if_elsif_text = [];
11786 $leading_block_text = "";
11787 $leading_block_text_level = $levels_to_go[$i];
11788 $leading_block_text_line_number =
11789 $vertical_aligner_object->get_output_line_number();
11790 $leading_block_text_length_exceeded = 0;
11792 # this will contain the column number of the last character
11793 # of the closing side comment
11794 $leading_block_text_line_length =
11795 length($csc_last_label) +
11796 length($accumulating_text_for_block) +
11797 length( $rOpts->{'closing-side-comment-prefix'} ) +
11798 $leading_block_text_level * $rOpts_indent_columns + 3;
11801 sub accumulate_block_text {
11804 # accumulate leading text for -csc, ignoring any side comments
11805 if ( $accumulating_text_for_block
11806 && !$leading_block_text_length_exceeded
11807 && $types_to_go[$i] ne '#' )
11810 my $added_length = $token_lengths_to_go[$i];
11811 $added_length += 1 if $i == 0;
11812 my $new_line_length = $leading_block_text_line_length + $added_length;
11814 # we can add this text if we don't exceed some limits..
11817 # we must not have already exceeded the text length limit
11818 length($leading_block_text) <
11819 $rOpts_closing_side_comment_maximum_text
11822 # the new total line length must be below the line length limit
11823 # or the new length must be below the text length limit
11824 # (ie, we may allow one token to exceed the text length limit)
11827 maximum_line_length_for_level($leading_block_text_level)
11829 || length($leading_block_text) + $added_length <
11830 $rOpts_closing_side_comment_maximum_text
11833 # UNLESS: we are adding a closing paren before the brace we seek.
11834 # This is an attempt to avoid situations where the ... to be
11835 # added are longer than the omitted right paren, as in:
11837 # foreach my $item (@a_rather_long_variable_name_here) {
11839 # } ## end foreach my $item (@a_rather_long_variable_name_here...
11842 $tokens_to_go[$i] eq ')'
11845 $i + 1 <= $max_index_to_go
11846 && $block_type_to_go[ $i + 1 ] eq
11847 $accumulating_text_for_block
11849 || ( $i + 2 <= $max_index_to_go
11850 && $block_type_to_go[ $i + 2 ] eq
11851 $accumulating_text_for_block )
11857 # add an extra space at each newline
11858 if ( $i == 0 ) { $leading_block_text .= ' ' }
11860 # add the token text
11861 $leading_block_text .= $tokens_to_go[$i];
11862 $leading_block_text_line_length = $new_line_length;
11865 # show that text was truncated if necessary
11866 elsif ( $types_to_go[$i] ne 'b' ) {
11867 $leading_block_text_length_exceeded = 1;
11868 ## Please see file perltidy.ERR
11869 $leading_block_text .= '...';
11875 my %is_if_elsif_else_unless_while_until_for_foreach;
11879 # These block types may have text between the keyword and opening
11880 # curly. Note: 'else' does not, but must be included to allow trailing
11881 # if/elsif text to be appended.
11882 # patch for SWITCH/CASE: added 'case' and 'when'
11883 @_ = qw(if elsif else unless while until for foreach case when catch);
11884 @is_if_elsif_else_unless_while_until_for_foreach{@_} =
11888 sub accumulate_csc_text {
11890 # called once per output buffer when -csc is used. Accumulates
11891 # the text placed after certain closing block braces.
11892 # Defines and returns the following for this buffer:
11894 my $block_leading_text = ""; # the leading text of the last '}'
11895 my $rblock_leading_if_elsif_text;
11896 my $i_block_leading_text =
11897 -1; # index of token owning block_leading_text
11898 my $block_line_count = 100; # how many lines the block spans
11899 my $terminal_type = 'b'; # type of last nonblank token
11900 my $i_terminal = 0; # index of last nonblank token
11901 my $terminal_block_type = "";
11903 # update most recent statement label
11904 $csc_last_label = "" unless ($csc_last_label);
11905 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
11906 my $block_label = $csc_last_label;
11908 # Loop over all tokens of this batch
11909 for my $i ( 0 .. $max_index_to_go ) {
11910 my $type = $types_to_go[$i];
11911 my $block_type = $block_type_to_go[$i];
11912 my $token = $tokens_to_go[$i];
11914 # remember last nonblank token type
11915 if ( $type ne '#' && $type ne 'b' ) {
11916 $terminal_type = $type;
11917 $terminal_block_type = $block_type;
11921 my $type_sequence = $type_sequence_to_go[$i];
11922 if ( $block_type && $type_sequence ) {
11924 if ( $token eq '}' ) {
11926 # restore any leading text saved when we entered this block
11927 if ( defined( $block_leading_text{$type_sequence} ) ) {
11928 ( $block_leading_text, $rblock_leading_if_elsif_text )
11929 = @{ $block_leading_text{$type_sequence} };
11930 $i_block_leading_text = $i;
11931 delete $block_leading_text{$type_sequence};
11932 $rleading_block_if_elsif_text =
11933 $rblock_leading_if_elsif_text;
11936 if ( defined( $csc_block_label{$type_sequence} ) ) {
11937 $block_label = $csc_block_label{$type_sequence};
11938 delete $csc_block_label{$type_sequence};
11941 # if we run into a '}' then we probably started accumulating
11942 # at something like a trailing 'if' clause..no harm done.
11943 if ( $accumulating_text_for_block
11944 && $levels_to_go[$i] <= $leading_block_text_level )
11946 my $lev = $levels_to_go[$i];
11947 reset_block_text_accumulator();
11950 if ( defined( $block_opening_line_number{$type_sequence} ) )
11952 my $output_line_number =
11953 $vertical_aligner_object->get_output_line_number();
11954 $block_line_count =
11955 $output_line_number -
11956 $block_opening_line_number{$type_sequence} + 1;
11957 delete $block_opening_line_number{$type_sequence};
11961 # Error: block opening line undefined for this line..
11962 # This shouldn't be possible, but it is not a
11963 # significant problem.
11967 elsif ( $token eq '{' ) {
11970 $vertical_aligner_object->get_output_line_number();
11971 $block_opening_line_number{$type_sequence} = $line_number;
11973 # set a label for this block, except for
11974 # a bare block which already has the label
11975 # A label can only be used on the next {
11976 if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
11977 $csc_block_label{$type_sequence} = $csc_last_label;
11978 $csc_last_label = "";
11980 if ( $accumulating_text_for_block
11981 && $levels_to_go[$i] == $leading_block_text_level )
11984 if ( $accumulating_text_for_block eq $block_type ) {
11986 # save any leading text before we enter this block
11987 $block_leading_text{$type_sequence} = [
11988 $leading_block_text,
11989 $rleading_block_if_elsif_text
11991 $block_opening_line_number{$type_sequence} =
11992 $leading_block_text_line_number;
11993 reset_block_text_accumulator();
11997 # shouldn't happen, but not a serious error.
11998 # We were accumulating -csc text for block type
11999 # $accumulating_text_for_block and unexpectedly
12000 # encountered a '{' for block type $block_type.
12007 && $csc_new_statement_ok
12008 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
12009 && $token =~ /$closing_side_comment_list_pattern/o )
12011 set_block_text_accumulator($i);
12015 # note: ignoring type 'q' because of tricks being played
12016 # with 'q' for hanging side comments
12017 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
12018 $csc_new_statement_ok =
12019 ( $block_type || $type eq 'J' || $type eq ';' );
12022 && $accumulating_text_for_block
12023 && $levels_to_go[$i] == $leading_block_text_level )
12025 reset_block_text_accumulator();
12028 accumulate_block_text($i);
12033 # Treat an 'else' block specially by adding preceding 'if' and
12034 # 'elsif' text. Otherwise, the 'end else' is not helpful,
12035 # especially for cuddled-else formatting.
12036 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
12037 $block_leading_text =
12038 make_else_csc_text( $i_terminal, $terminal_block_type,
12039 $block_leading_text, $rblock_leading_if_elsif_text );
12042 # if this line ends in a label then remember it for the next pass
12043 $csc_last_label = "";
12044 if ( $terminal_type eq 'J' ) {
12045 $csc_last_label = $tokens_to_go[$i_terminal];
12048 return ( $terminal_type, $i_terminal, $i_block_leading_text,
12049 $block_leading_text, $block_line_count, $block_label );
12053 sub make_else_csc_text {
12055 # create additional -csc text for an 'else' and optionally 'elsif',
12056 # depending on the value of switch
12057 # $rOpts_closing_side_comment_else_flag:
12059 # = 0 add 'if' text to trailing else
12060 # = 1 same as 0 plus:
12061 # add 'if' to 'elsif's if can fit in line length
12062 # add last 'elsif' to trailing else if can fit in one line
12063 # = 2 same as 1 but do not check if exceed line length
12065 # $rif_elsif_text = a reference to a list of all previous closing
12066 # side comments created for this if block
12068 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
12069 my $csc_text = $block_leading_text;
12071 if ( $block_type eq 'elsif'
12072 && $rOpts_closing_side_comment_else_flag == 0 )
12077 my $count = @{$rif_elsif_text};
12078 return $csc_text unless ($count);
12080 my $if_text = '[ if' . $rif_elsif_text->[0];
12082 # always show the leading 'if' text on 'else'
12083 if ( $block_type eq 'else' ) {
12084 $csc_text .= $if_text;
12087 # see if that's all
12088 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
12092 my $last_elsif_text = "";
12093 if ( $count > 1 ) {
12094 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
12095 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
12098 # tentatively append one more item
12099 my $saved_text = $csc_text;
12100 if ( $block_type eq 'else' ) {
12101 $csc_text .= $last_elsif_text;
12104 $csc_text .= ' ' . $if_text;
12107 # all done if no length checks requested
12108 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
12112 # undo it if line length exceeded
12114 length($csc_text) +
12115 length($block_type) +
12116 length( $rOpts->{'closing-side-comment-prefix'} ) +
12117 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
12118 if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
12119 $csc_text = $saved_text;
12124 { # sub balance_csc_text
12139 sub balance_csc_text {
12141 # Append characters to balance a closing side comment so that editors
12142 # such as vim can correctly jump through code.
12144 # input = ## end foreach my $foo ( sort { $b ...
12145 # output = ## end foreach my $foo ( sort { $b ...})
12147 # NOTE: This routine does not currently filter out structures within
12148 # quoted text because the bounce algorithms in text editors do not
12149 # necessarily do this either (a version of vim was checked and
12150 # did not do this).
12152 # Some complex examples which will cause trouble for some editors:
12153 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
12154 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
12155 # if ( $1 eq '{' ) {
12156 # test file test1/braces.pl has many such examples.
12160 # loop to examine characters one-by-one, RIGHT to LEFT and
12161 # build a balancing ending, LEFT to RIGHT.
12162 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
12164 my $char = substr( $csc, $pos, 1 );
12166 # ignore everything except structural characters
12167 next unless ( $matching_char{$char} );
12169 # pop most recently appended character
12170 my $top = chop($csc);
12172 # push it back plus the mate to the newest character
12173 # unless they balance each other.
12174 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
12177 # return the balanced string
12182 sub add_closing_side_comment {
12184 # add closing side comments after closing block braces if -csc used
12185 my $cscw_block_comment;
12187 #---------------------------------------------------------------
12188 # Step 1: loop through all tokens of this line to accumulate
12189 # the text needed to create the closing side comments. Also see
12190 # how the line ends.
12191 #---------------------------------------------------------------
12193 my ( $terminal_type, $i_terminal, $i_block_leading_text,
12194 $block_leading_text, $block_line_count, $block_label )
12195 = accumulate_csc_text();
12197 #---------------------------------------------------------------
12198 # Step 2: make the closing side comment if this ends a block
12199 #---------------------------------------------------------------
12200 my $have_side_comment = $i_terminal != $max_index_to_go;
12202 # if this line might end in a block closure..
12204 $terminal_type eq '}'
12209 # the block is long enough
12210 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
12212 # or there is an existing comment to check
12213 || ( $have_side_comment
12214 && $rOpts->{'closing-side-comment-warnings'} )
12217 # .. and if this is one of the types of interest
12218 && $block_type_to_go[$i_terminal] =~
12219 /$closing_side_comment_list_pattern/o
12221 # .. but not an anonymous sub
12222 # These are not normally of interest, and their closing braces are
12223 # often followed by commas or semicolons anyway. This also avoids
12224 # possible erratic output due to line numbering inconsistencies
12225 # in the cases where their closing braces terminate a line.
12226 && $block_type_to_go[$i_terminal] ne 'sub'
12228 # ..and the corresponding opening brace must is not in this batch
12229 # (because we do not need to tag one-line blocks, although this
12230 # should also be caught with a positive -csci value)
12231 && $mate_index_to_go[$i_terminal] < 0
12236 # this is the last token (line doesn't have a side comment)
12237 !$have_side_comment
12239 # or the old side comment is a closing side comment
12240 || $tokens_to_go[$max_index_to_go] =~
12241 /$closing_side_comment_prefix_pattern/o
12246 # then make the closing side comment text
12247 if ($block_label) { $block_label .= " " }
12249 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
12251 # append any extra descriptive text collected above
12252 if ( $i_block_leading_text == $i_terminal ) {
12253 $token .= $block_leading_text;
12256 $token = balance_csc_text($token)
12257 if $rOpts->{'closing-side-comments-balanced'};
12259 $token =~ s/\s*$//; # trim any trailing whitespace
12261 # handle case of existing closing side comment
12262 if ($have_side_comment) {
12264 # warn if requested and tokens differ significantly
12265 if ( $rOpts->{'closing-side-comment-warnings'} ) {
12266 my $old_csc = $tokens_to_go[$max_index_to_go];
12267 my $new_csc = $token;
12268 $new_csc =~ s/\s+//g; # trim all whitespace
12269 $old_csc =~ s/\s+//g; # trim all whitespace
12270 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
12271 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
12272 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
12273 my $new_trailing_dots = $1;
12274 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
12276 # Patch to handle multiple closing side comments at
12277 # else and elsif's. These have become too complicated
12278 # to check, so if we see an indication of
12279 # '[ if' or '[ # elsif', then assume they were made
12281 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
12282 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
12284 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
12285 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
12288 # if old comment is contained in new comment,
12289 # only compare the common part.
12290 if ( length($new_csc) > length($old_csc) ) {
12291 $new_csc = substr( $new_csc, 0, length($old_csc) );
12294 # if the new comment is shorter and has been limited,
12295 # only compare the common part.
12296 if ( length($new_csc) < length($old_csc)
12297 && $new_trailing_dots )
12299 $old_csc = substr( $old_csc, 0, length($new_csc) );
12302 # any remaining difference?
12303 if ( $new_csc ne $old_csc ) {
12305 # just leave the old comment if we are below the threshold
12306 # for creating side comments
12307 if ( $block_line_count <
12308 $rOpts->{'closing-side-comment-interval'} )
12313 # otherwise we'll make a note of it
12317 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
12320 # save the old side comment in a new trailing block comment
12321 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
12324 $cscw_block_comment =
12325 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
12330 # No differences.. we can safely delete old comment if we
12331 # are below the threshold
12332 if ( $block_line_count <
12333 $rOpts->{'closing-side-comment-interval'} )
12336 unstore_token_to_go()
12337 if ( $types_to_go[$max_index_to_go] eq '#' );
12338 unstore_token_to_go()
12339 if ( $types_to_go[$max_index_to_go] eq 'b' );
12344 # switch to the new csc (unless we deleted it!)
12345 $tokens_to_go[$max_index_to_go] = $token if $token;
12348 # handle case of NO existing closing side comment
12351 # insert the new side comment into the output token stream
12353 my $block_type = '';
12354 my $type_sequence = '';
12355 my $container_environment =
12356 $container_environment_to_go[$max_index_to_go];
12357 my $level = $levels_to_go[$max_index_to_go];
12358 my $slevel = $nesting_depth_to_go[$max_index_to_go];
12359 my $no_internal_newlines = 0;
12361 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
12362 my $ci_level = $ci_levels_to_go[$max_index_to_go];
12363 my $in_continued_quote = 0;
12365 # first insert a blank token
12366 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
12368 # then the side comment
12369 insert_new_token_to_go( $token, $type, $slevel,
12370 $no_internal_newlines );
12373 return $cscw_block_comment;
12376 sub previous_nonblank_token {
12380 return "" if ( $im < 0 );
12381 if ( $types_to_go[$im] eq 'b' ) { $im--; }
12382 return "" if ( $im < 0 );
12383 $name = $tokens_to_go[$im];
12385 # prepend any sub name to an isolated -> to avoid unwanted alignments
12386 # [test case is test8/penco.pl]
12387 if ( $name eq '->' ) {
12389 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
12390 $name = $tokens_to_go[$im] . $name;
12396 sub send_lines_to_vertical_aligner {
12398 my ( $ri_first, $ri_last, $do_not_pad ) = @_;
12400 my $rindentation_list = [0]; # ref to indentations for each line
12402 # define the array @matching_token_to_go for the output tokens
12403 # which will be non-blank for each special token (such as =>)
12404 # for which alignment is required.
12405 set_vertical_alignment_markers( $ri_first, $ri_last );
12407 # flush if necessary to avoid unwanted alignment
12408 my $must_flush = 0;
12409 if ( @$ri_first > 1 ) {
12411 # flush before a long if statement
12412 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
12417 Perl::Tidy::VerticalAligner::flush();
12420 undo_ci( $ri_first, $ri_last );
12422 set_logical_padding( $ri_first, $ri_last );
12424 # loop to prepare each line for shipment
12425 my $n_last_line = @$ri_first - 1;
12427 for my $n ( 0 .. $n_last_line ) {
12428 my $ibeg = $$ri_first[$n];
12429 my $iend = $$ri_last[$n];
12431 my ( $rtokens, $rfields, $rpatterns ) =
12432 make_alignment_patterns( $ibeg, $iend );
12434 # Set flag to show how much level changes between this line
12435 # and the next line, if we have it.
12437 if ( $n < $n_last_line ) {
12438 my $ibegp = $$ri_first[ $n + 1 ];
12439 $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
12442 my ( $indentation, $lev, $level_end, $terminal_type,
12443 $is_semicolon_terminated, $is_outdented_line )
12444 = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
12445 $ri_first, $ri_last, $rindentation_list, $ljump );
12447 # we will allow outdenting of long lines..
12448 my $outdent_long_lines = (
12450 # which are long quotes, if allowed
12451 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
12453 # which are long block comments, if allowed
12455 $types_to_go[$ibeg] eq '#'
12456 && $rOpts->{'outdent-long-comments'}
12458 # but not if this is a static block comment
12459 && !$is_static_block_comment
12464 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
12466 my $rvertical_tightness_flags =
12467 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
12468 $ri_first, $ri_last );
12470 # flush an outdented line to avoid any unwanted vertical alignment
12471 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
12473 # Set a flag at the final ':' of a ternary chain to request
12474 # vertical alignment of the final term. Here is a
12475 # slightly complex example:
12477 # $self->{_text} = (
12479 # : $type eq 'item' ? "the $section entry"
12480 # : "the section on $section"
12484 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
12485 # : ' elsewhere in this document'
12488 my $is_terminal_ternary = 0;
12489 if ( $tokens_to_go[$ibeg] eq ':'
12490 || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
12492 my $last_leading_type = ":";
12494 my $iprev = $$ri_first[ $n - 1 ];
12495 $last_leading_type = $types_to_go[$iprev];
12497 if ( $terminal_type ne ';'
12498 && $n_last_line > $n
12499 && $level_end == $lev )
12501 my $inext = $$ri_first[ $n + 1 ];
12502 $level_end = $levels_to_go[$inext];
12503 $terminal_type = $types_to_go[$inext];
12506 $is_terminal_ternary = $last_leading_type eq ':'
12507 && ( ( $terminal_type eq ';' && $level_end <= $lev )
12508 || ( $terminal_type ne ':' && $level_end < $lev ) )
12510 # the terminal term must not contain any ternary terms, as in
12512 # $Is_MSWin32 ? ".\\echo$$"
12513 # : $Is_MacOS ? ":echo$$"
12514 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
12516 && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ];
12519 # send this new line down the pipe
12520 my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
12521 Perl::Tidy::VerticalAligner::valign_input(
12528 $forced_breakpoint_to_go[$iend] || $in_comma_list,
12529 $outdent_long_lines,
12530 $is_terminal_ternary,
12531 $is_semicolon_terminated,
12533 $rvertical_tightness_flags,
12537 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
12539 # flush an outdented line to avoid any unwanted vertical alignment
12540 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
12544 # Set flag indicating if this line ends in an opening
12545 # token and is very short, so that a blank line is not
12546 # needed if the subsequent line is a comment.
12547 # Examples of what we are looking for:
12553 $last_output_short_opening_token
12555 # line ends in opening token
12556 = $types_to_go[$iend] =~ /^[\{\(\[L]$/
12560 # line has either single opening token
12563 # or is a single token followed by opening token.
12564 # Note that sub identifiers have blanks like 'sub doit'
12565 || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
12568 # and limit total to 10 character widths
12569 && token_sequence_length( $ibeg, $iend ) <= 10;
12571 } # end of loop to output each line
12573 # remember indentation of lines containing opening containers for
12574 # later use by sub set_adjusted_indentation
12575 save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
12578 { # begin make_alignment_patterns
12580 my %block_type_map;
12585 # map related block names into a common name to
12587 %block_type_map = (
12598 # map certain keywords to the same 'if' class to align
12599 # long if/elsif sequences. [elsif.pl]
12605 'default' => 'given',
12606 'case' => 'switch',
12608 # treat an 'undef' similar to numbers and quotes
12613 sub make_alignment_patterns {
12615 # Here we do some important preliminary work for the
12616 # vertical aligner. We create three arrays for one
12617 # output line. These arrays contain strings that can
12618 # be tested by the vertical aligner to see if
12619 # consecutive lines can be aligned vertically.
12621 # The three arrays are indexed on the vertical
12622 # alignment fields and are:
12623 # @tokens - a list of any vertical alignment tokens for this line.
12624 # These are tokens, such as '=' '&&' '#' etc which
12625 # we want to might align vertically. These are
12626 # decorated with various information such as
12627 # nesting depth to prevent unwanted vertical
12628 # alignment matches.
12629 # @fields - the actual text of the line between the vertical alignment
12631 # @patterns - a modified list of token types, one for each alignment
12632 # field. These should normally each match before alignment is
12633 # allowed, even when the alignment tokens match.
12634 my ( $ibeg, $iend ) = @_;
12638 my $i_start = $ibeg;
12642 my @container_name = ("");
12643 my @multiple_comma_arrows = (undef);
12645 my $j = 0; # field index
12648 for $i ( $ibeg .. $iend ) {
12650 # Keep track of containers balanced on this line only.
12651 # These are used below to prevent unwanted cross-line alignments.
12652 # Unbalanced containers already avoid aligning across
12653 # container boundaries.
12654 if ( $tokens_to_go[$i] eq '(' ) {
12656 # if container is balanced on this line...
12657 my $i_mate = $mate_index_to_go[$i];
12658 if ( $i_mate > $i && $i_mate <= $iend ) {
12660 my $seqno = $type_sequence_to_go[$i];
12661 my $count = comma_arrow_count($seqno);
12662 $multiple_comma_arrows[$depth] = $count && $count > 1;
12664 # Append the previous token name to make the container name
12665 # more unique. This name will also be given to any commas
12666 # within this container, and it helps avoid undesirable
12667 # alignments of different types of containers.
12668 my $name = previous_nonblank_token($i);
12670 $container_name[$depth] = "+" . $name;
12672 # Make the container name even more unique if necessary.
12673 # If we are not vertically aligning this opening paren,
12674 # append a character count to avoid bad alignment because
12675 # it usually looks bad to align commas within containers
12676 # for which the opening parens do not align. Here
12677 # is an example very BAD alignment of commas (because
12678 # the atan2 functions are not all aligned):
12680 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
12681 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
12682 # $X * atan2( $X, 1 ) -
12683 # $Y * atan2( $Y, 1 );
12685 # On the other hand, it is usually okay to align commas if
12686 # opening parens align, such as:
12687 # glVertex3d( $cx + $s * $xs, $cy, $z );
12688 # glVertex3d( $cx, $cy + $s * $ys, $z );
12689 # glVertex3d( $cx - $s * $xs, $cy, $z );
12690 # glVertex3d( $cx, $cy - $s * $ys, $z );
12692 # To distinguish between these situations, we will
12693 # append the length of the line from the previous matching
12694 # token, or beginning of line, to the function name. This
12695 # will allow the vertical aligner to reject undesirable
12698 # if we are not aligning on this paren...
12699 if ( $matching_token_to_go[$i] eq '' ) {
12701 # Sum length from previous alignment, or start of line.
12703 ( $i_start == $ibeg )
12704 ? total_line_length( $i_start, $i - 1 )
12705 : token_sequence_length( $i_start, $i - 1 );
12707 # tack length onto the container name to make unique
12708 $container_name[$depth] .= "-" . $len;
12712 elsif ( $tokens_to_go[$i] eq ')' ) {
12713 $depth-- if $depth > 0;
12716 # if we find a new synchronization token, we are done with
12718 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
12720 my $tok = my $raw_tok = $matching_token_to_go[$i];
12722 # make separators in different nesting depths unique
12723 # by appending the nesting depth digit.
12724 if ( $raw_tok ne '#' ) {
12725 $tok .= "$nesting_depth_to_go[$i]";
12728 # also decorate commas with any container name to avoid
12729 # unwanted cross-line alignments.
12730 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
12731 if ( $container_name[$depth] ) {
12732 $tok .= $container_name[$depth];
12736 # Patch to avoid aligning leading and trailing if, unless.
12737 # Mark trailing if, unless statements with container names.
12738 # This makes them different from leading if, unless which
12739 # are not so marked at present. If we ever need to name
12740 # them too, we could use ci to distinguish them.
12741 # Example problem to avoid:
12742 # return ( 2, "DBERROR" )
12743 # if ( $retval == 2 );
12744 # if ( scalar @_ ) {
12745 # my ( $a, $b, $c, $d, $e, $f ) = @_;
12747 if ( $raw_tok eq '(' ) {
12748 my $ci = $ci_levels_to_go[$ibeg];
12749 if ( $container_name[$depth] =~ /^\+(if|unless)/
12752 $tok .= $container_name[$depth];
12756 # Decorate block braces with block types to avoid
12757 # unwanted alignments such as the following:
12758 # foreach ( @{$routput_array} ) { $fh->print($_) }
12759 # eval { $fh->close() };
12760 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
12761 my $block_type = $block_type_to_go[$i];
12763 # map certain related block types to allow
12764 # else blocks to align
12765 $block_type = $block_type_map{$block_type}
12766 if ( defined( $block_type_map{$block_type} ) );
12768 # remove sub names to allow one-line sub braces to align
12769 # regardless of name
12770 #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
12771 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
12773 # allow all control-type blocks to align
12774 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
12776 $tok .= $block_type;
12779 # concatenate the text of the consecutive tokens to form
12782 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
12784 # store the alignment token for this field
12785 push( @tokens, $tok );
12787 # get ready for the next batch
12790 $patterns[$j] = "";
12793 # continue accumulating tokens
12794 # handle non-keywords..
12795 if ( $types_to_go[$i] ne 'k' ) {
12796 my $type = $types_to_go[$i];
12798 # Mark most things before arrows as a quote to
12799 # get them to line up. Testfile: mixed.pl.
12800 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
12801 my $next_type = $types_to_go[ $i + 1 ];
12802 my $i_next_nonblank =
12803 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12805 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
12808 # Patch to ignore leading minus before words,
12809 # by changing pattern 'mQ' into just 'Q',
12810 # so that we can align things like this:
12811 # Button => "Print letter \"~$_\"",
12812 # -command => [ sub { print "$_[0]\n" }, $_ ],
12813 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
12817 # patch to make numbers and quotes align
12818 if ( $type eq 'n' ) { $type = 'Q' }
12820 # patch to ignore any ! in patterns
12821 if ( $type eq '!' ) { $type = '' }
12823 $patterns[$j] .= $type;
12826 # for keywords we have to use the actual text
12829 my $tok = $tokens_to_go[$i];
12831 # but map certain keywords to a common string to allow
12833 $tok = $keyword_map{$tok}
12834 if ( defined( $keyword_map{$tok} ) );
12835 $patterns[$j] .= $tok;
12839 # done with this line .. join text of tokens to make the last field
12840 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
12841 return ( \@tokens, \@fields, \@patterns );
12844 } # end make_alignment_patterns
12846 { # begin unmatched_indexes
12848 # closure to keep track of unbalanced containers.
12849 # arrays shared by the routines in this block:
12850 my @unmatched_opening_indexes_in_this_batch;
12851 my @unmatched_closing_indexes_in_this_batch;
12852 my %comma_arrow_count;
12854 sub is_unbalanced_batch {
12855 @unmatched_opening_indexes_in_this_batch +
12856 @unmatched_closing_indexes_in_this_batch;
12859 sub comma_arrow_count {
12861 return $comma_arrow_count{$seqno};
12864 sub match_opening_and_closing_tokens {
12866 # Match up indexes of opening and closing braces, etc, in this batch.
12867 # This has to be done after all tokens are stored because unstoring
12868 # of tokens would otherwise cause trouble.
12870 @unmatched_opening_indexes_in_this_batch = ();
12871 @unmatched_closing_indexes_in_this_batch = ();
12872 %comma_arrow_count = ();
12873 my $comma_arrow_count_contained = 0;
12875 my ( $i, $i_mate, $token );
12876 foreach $i ( 0 .. $max_index_to_go ) {
12877 if ( $type_sequence_to_go[$i] ) {
12878 $token = $tokens_to_go[$i];
12879 if ( $token =~ /^[\(\[\{\?]$/ ) {
12880 push @unmatched_opening_indexes_in_this_batch, $i;
12882 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
12884 $i_mate = pop @unmatched_opening_indexes_in_this_batch;
12885 if ( defined($i_mate) && $i_mate >= 0 ) {
12886 if ( $type_sequence_to_go[$i_mate] ==
12887 $type_sequence_to_go[$i] )
12889 $mate_index_to_go[$i] = $i_mate;
12890 $mate_index_to_go[$i_mate] = $i;
12891 my $seqno = $type_sequence_to_go[$i];
12892 if ( $comma_arrow_count{$seqno} ) {
12893 $comma_arrow_count_contained +=
12894 $comma_arrow_count{$seqno};
12898 push @unmatched_opening_indexes_in_this_batch,
12900 push @unmatched_closing_indexes_in_this_batch, $i;
12904 push @unmatched_closing_indexes_in_this_batch, $i;
12908 elsif ( $tokens_to_go[$i] eq '=>' ) {
12909 if (@unmatched_opening_indexes_in_this_batch) {
12910 my $j = $unmatched_opening_indexes_in_this_batch[-1];
12911 my $seqno = $type_sequence_to_go[$j];
12912 $comma_arrow_count{$seqno}++;
12916 return $comma_arrow_count_contained;
12919 sub save_opening_indentation {
12921 # This should be called after each batch of tokens is output. It
12922 # saves indentations of lines of all unmatched opening tokens.
12923 # These will be used by sub get_opening_indentation.
12925 my ( $ri_first, $ri_last, $rindentation_list ) = @_;
12927 # we no longer need indentations of any saved indentations which
12928 # are unmatched closing tokens in this batch, because we will
12929 # never encounter them again. So we can delete them to keep
12930 # the hash size down.
12931 foreach (@unmatched_closing_indexes_in_this_batch) {
12932 my $seqno = $type_sequence_to_go[$_];
12933 delete $saved_opening_indentation{$seqno};
12936 # we need to save indentations of any unmatched opening tokens
12937 # in this batch because we may need them in a subsequent batch.
12938 foreach (@unmatched_opening_indexes_in_this_batch) {
12939 my $seqno = $type_sequence_to_go[$_];
12940 $saved_opening_indentation{$seqno} = [
12941 lookup_opening_indentation(
12942 $_, $ri_first, $ri_last, $rindentation_list
12947 } # end unmatched_indexes
12949 sub get_opening_indentation {
12951 # get the indentation of the line which output the opening token
12952 # corresponding to a given closing token in the current output batch.
12955 # $i_closing - index in this line of a closing token ')' '}' or ']'
12957 # $ri_first - reference to list of the first index $i for each output
12958 # line in this batch
12959 # $ri_last - reference to list of the last index $i for each output line
12961 # $rindentation_list - reference to a list containing the indentation
12962 # used for each line.
12965 # -the indentation of the line which contained the opening token
12966 # which matches the token at index $i_opening
12967 # -and its offset (number of columns) from the start of the line
12969 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
12971 # first, see if the opening token is in the current batch
12972 my $i_opening = $mate_index_to_go[$i_closing];
12973 my ( $indent, $offset, $is_leading, $exists );
12975 if ( $i_opening >= 0 ) {
12977 # it is..look up the indentation
12978 ( $indent, $offset, $is_leading ) =
12979 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
12980 $rindentation_list );
12983 # if not, it should have been stored in the hash by a previous batch
12985 my $seqno = $type_sequence_to_go[$i_closing];
12987 if ( $saved_opening_indentation{$seqno} ) {
12988 ( $indent, $offset, $is_leading ) =
12989 @{ $saved_opening_indentation{$seqno} };
12992 # some kind of serious error
12993 # (example is badfile.t)
13002 # if no sequence number it must be an unbalanced container
13010 return ( $indent, $offset, $is_leading, $exists );
13013 sub lookup_opening_indentation {
13015 # get the indentation of the line in the current output batch
13016 # which output a selected opening token
13019 # $i_opening - index of an opening token in the current output batch
13020 # whose line indentation we need
13021 # $ri_first - reference to list of the first index $i for each output
13022 # line in this batch
13023 # $ri_last - reference to list of the last index $i for each output line
13025 # $rindentation_list - reference to a list containing the indentation
13026 # used for each line. (NOTE: the first slot in
13027 # this list is the last returned line number, and this is
13028 # followed by the list of indentations).
13031 # -the indentation of the line which contained token $i_opening
13032 # -and its offset (number of columns) from the start of the line
13034 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
13036 my $nline = $rindentation_list->[0]; # line number of previous lookup
13038 # reset line location if necessary
13039 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
13041 # find the correct line
13042 unless ( $i_opening > $ri_last->[-1] ) {
13043 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
13046 # error - token index is out of bounds - shouldn't happen
13049 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
13051 report_definite_bug();
13052 $nline = $#{$ri_last};
13055 $rindentation_list->[0] =
13056 $nline; # save line number to start looking next call
13057 my $ibeg = $ri_start->[$nline];
13058 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
13059 my $is_leading = ( $ibeg == $i_opening );
13060 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
13064 my %is_if_elsif_else_unless_while_until_for_foreach;
13068 # These block types may have text between the keyword and opening
13069 # curly. Note: 'else' does not, but must be included to allow trailing
13070 # if/elsif text to be appended.
13071 # patch for SWITCH/CASE: added 'case' and 'when'
13072 @_ = qw(if elsif else unless while until for foreach case when);
13073 @is_if_elsif_else_unless_while_until_for_foreach{@_} =
13077 sub set_adjusted_indentation {
13079 # This routine has the final say regarding the actual indentation of
13080 # a line. It starts with the basic indentation which has been
13081 # defined for the leading token, and then takes into account any
13082 # options that the user has set regarding special indenting and
13085 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
13086 $rindentation_list, $level_jump )
13089 # we need to know the last token of this line
13090 my ( $terminal_type, $i_terminal ) =
13091 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
13093 my $is_outdented_line = 0;
13095 my $is_semicolon_terminated = $terminal_type eq ';'
13096 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
13098 ##########################################################
13099 # Section 1: set a flag and a default indentation
13101 # Most lines are indented according to the initial token.
13102 # But it is common to outdent to the level just after the
13103 # terminal token in certain cases...
13104 # adjust_indentation flag:
13105 # 0 - do not adjust
13107 # 2 - vertically align with opening token
13109 ##########################################################
13110 my $adjust_indentation = 0;
13111 my $default_adjust_indentation = $adjust_indentation;
13114 $opening_indentation, $opening_offset,
13115 $is_leading, $opening_exists
13118 # if we are at a closing token of some type..
13119 if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
13121 # get the indentation of the line containing the corresponding
13124 $opening_indentation, $opening_offset,
13125 $is_leading, $opening_exists
13127 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
13128 $rindentation_list );
13130 # First set the default behavior:
13133 # default behavior is to outdent closing lines
13134 # of the form: "); }; ]; )->xxx;"
13135 $is_semicolon_terminated
13137 # and 'cuddled parens' of the form: ")->pack("
13139 $terminal_type eq '('
13140 && $types_to_go[$ibeg] eq ')'
13141 && ( $nesting_depth_to_go[$iend] + 1 ==
13142 $nesting_depth_to_go[$ibeg] )
13145 # and when the next line is at a lower indentation level
13146 # PATCH: and only if the style allows undoing continuation
13147 # for all closing token types. We should really wait until
13148 # the indentation of the next line is known and then make
13149 # a decision, but that would require another pass.
13150 || ( $level_jump < 0 && !$some_closing_token_indentation )
13153 $adjust_indentation = 1;
13156 # outdent something like '),'
13158 $terminal_type eq ','
13160 # allow just one character before the comma
13161 && $i_terminal == $ibeg + 1
13163 # require LIST environment; otherwise, we may outdent too much -
13164 # this can happen in calls without parentheses (overload.t);
13165 && $container_environment_to_go[$i_terminal] eq 'LIST'
13168 $adjust_indentation = 1;
13171 # undo continuation indentation of a terminal closing token if
13172 # it is the last token before a level decrease. This will allow
13173 # a closing token to line up with its opening counterpart, and
13174 # avoids a indentation jump larger than 1 level.
13175 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
13176 && $i_terminal == $ibeg )
13178 my $ci = $ci_levels_to_go[$ibeg];
13179 my $lev = $levels_to_go[$ibeg];
13180 my $next_type = $types_to_go[ $ibeg + 1 ];
13181 my $i_next_nonblank =
13182 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
13183 if ( $i_next_nonblank <= $max_index_to_go
13184 && $levels_to_go[$i_next_nonblank] < $lev )
13186 $adjust_indentation = 1;
13189 # Patch for RT #96101, in which closing brace of anonymous subs
13190 # was not outdented. We should look ahead and see if there is
13191 # a level decrease at the next token (i.e., a closing token),
13192 # but right now we do not have that information. For now
13193 # we see if we are in a list, and this works well.
13194 # See test files 'sub*.t' for good test cases.
13195 if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
13196 && $container_environment_to_go[$i_terminal] eq 'LIST'
13197 && !$rOpts->{'indent-closing-brace'} )
13200 $opening_indentation, $opening_offset,
13201 $is_leading, $opening_exists
13203 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
13204 $rindentation_list );
13205 my $indentation = $leading_spaces_to_go[$ibeg];
13206 if ( defined($opening_indentation)
13207 && get_SPACES($indentation) >
13208 get_SPACES($opening_indentation) )
13210 $adjust_indentation = 1;
13215 # YVES patch 1 of 2:
13216 # Undo ci of line with leading closing eval brace,
13217 # but not beyond the indention of the line with
13218 # the opening brace.
13219 if ( $block_type_to_go[$ibeg] eq 'eval'
13220 && !$rOpts->{'line-up-parentheses'}
13221 && !$rOpts->{'indent-closing-brace'} )
13224 $opening_indentation, $opening_offset,
13225 $is_leading, $opening_exists
13227 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
13228 $rindentation_list );
13229 my $indentation = $leading_spaces_to_go[$ibeg];
13230 if ( defined($opening_indentation)
13231 && get_SPACES($indentation) >
13232 get_SPACES($opening_indentation) )
13234 $adjust_indentation = 1;
13238 $default_adjust_indentation = $adjust_indentation;
13240 # Now modify default behavior according to user request:
13241 # handle option to indent non-blocks of the form ); }; ];
13242 # But don't do special indentation to something like ')->pack('
13243 if ( !$block_type_to_go[$ibeg] ) {
13244 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
13246 if ( $i_terminal <= $ibeg + 1
13247 || $is_semicolon_terminated )
13249 $adjust_indentation = 2;
13252 $adjust_indentation = 0;
13255 elsif ( $cti == 2 ) {
13256 if ($is_semicolon_terminated) {
13257 $adjust_indentation = 3;
13260 $adjust_indentation = 0;
13263 elsif ( $cti == 3 ) {
13264 $adjust_indentation = 3;
13268 # handle option to indent blocks
13271 $rOpts->{'indent-closing-brace'}
13273 $i_terminal == $ibeg # isolated terminal '}'
13274 || $is_semicolon_terminated
13278 $adjust_indentation = 3;
13283 # if at ');', '};', '>;', and '];' of a terminal qw quote
13284 elsif ($$rpatterns[0] =~ /^qb*;$/
13285 && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
13287 if ( $closing_token_indentation{$1} == 0 ) {
13288 $adjust_indentation = 1;
13291 $adjust_indentation = 3;
13295 # if line begins with a ':', align it with any
13296 # previous line leading with corresponding ?
13297 elsif ( $types_to_go[$ibeg] eq ':' ) {
13299 $opening_indentation, $opening_offset,
13300 $is_leading, $opening_exists
13302 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
13303 $rindentation_list );
13304 if ($is_leading) { $adjust_indentation = 2; }
13307 ##########################################################
13308 # Section 2: set indentation according to flag set above
13310 # Select the indentation object to define leading
13311 # whitespace. If we are outdenting something like '} } );'
13312 # then we want to use one level below the last token
13313 # ($i_terminal) in order to get it to fully outdent through
13315 ##########################################################
13318 my $level_end = $levels_to_go[$iend];
13320 if ( $adjust_indentation == 0 ) {
13321 $indentation = $leading_spaces_to_go[$ibeg];
13322 $lev = $levels_to_go[$ibeg];
13324 elsif ( $adjust_indentation == 1 ) {
13325 $indentation = $reduced_spaces_to_go[$i_terminal];
13326 $lev = $levels_to_go[$i_terminal];
13329 # handle indented closing token which aligns with opening token
13330 elsif ( $adjust_indentation == 2 ) {
13332 # handle option to align closing token with opening token
13333 $lev = $levels_to_go[$ibeg];
13335 # calculate spaces needed to align with opening token
13337 get_SPACES($opening_indentation) + $opening_offset;
13339 # Indent less than the previous line.
13341 # Problem: For -lp we don't exactly know what it was if there
13342 # were recoverable spaces sent to the aligner. A good solution
13343 # would be to force a flush of the vertical alignment buffer, so
13344 # that we would know. For now, this rule is used for -lp:
13346 # When the last line did not start with a closing token we will
13347 # be optimistic that the aligner will recover everything wanted.
13349 # This rule will prevent us from breaking a hierarchy of closing
13350 # tokens, and in a worst case will leave a closing paren too far
13351 # indented, but this is better than frequently leaving it not
13353 my $last_spaces = get_SPACES($last_indentation_written);
13354 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
13356 get_RECOVERABLE_SPACES($last_indentation_written);
13359 # reset the indentation to the new space count if it works
13360 # only options are all or none: nothing in-between looks good
13361 $lev = $levels_to_go[$ibeg];
13362 if ( $space_count < $last_spaces ) {
13363 if ($rOpts_line_up_parentheses) {
13364 my $lev = $levels_to_go[$ibeg];
13366 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
13369 $indentation = $space_count;
13373 # revert to default if it doesn't work
13375 $space_count = leading_spaces_to_go($ibeg);
13376 if ( $default_adjust_indentation == 0 ) {
13377 $indentation = $leading_spaces_to_go[$ibeg];
13379 elsif ( $default_adjust_indentation == 1 ) {
13380 $indentation = $reduced_spaces_to_go[$i_terminal];
13381 $lev = $levels_to_go[$i_terminal];
13386 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
13389 # handle -icb (indented closing code block braces)
13390 # Updated method for indented block braces: indent one full level if
13391 # there is no continuation indentation. This will occur for major
13392 # structures such as sub, if, else, but not for things like map
13395 # Note: only code blocks without continuation indentation are
13396 # handled here (if, else, unless, ..). In the following snippet,
13397 # the terminal brace of the sort block will have continuation
13398 # indentation as shown so it will not be handled by the coding
13399 # here. We would have to undo the continuation indentation to do
13400 # this, but it probably looks ok as is. This is a possible future
13401 # update for semicolon terminated lines.
13403 # if ($sortby eq 'date' or $sortby eq 'size') {
13405 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
13410 if ( $block_type_to_go[$ibeg]
13411 && $ci_levels_to_go[$i_terminal] == 0 )
13413 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
13414 $indentation = $spaces + $rOpts_indent_columns;
13416 # NOTE: for -lp we could create a new indentation object, but
13417 # there is probably no need to do it
13420 # handle -icp and any -icb block braces which fall through above
13421 # test such as the 'sort' block mentioned above.
13424 # There are currently two ways to handle -icp...
13425 # One way is to use the indentation of the previous line:
13426 # $indentation = $last_indentation_written;
13428 # The other way is to use the indentation that the previous line
13429 # would have had if it hadn't been adjusted:
13430 $indentation = $last_unadjusted_indentation;
13432 # Current method: use the minimum of the two. This avoids
13433 # inconsistent indentation.
13434 if ( get_SPACES($last_indentation_written) <
13435 get_SPACES($indentation) )
13437 $indentation = $last_indentation_written;
13441 # use previous indentation but use own level
13442 # to cause list to be flushed properly
13443 $lev = $levels_to_go[$ibeg];
13446 # remember indentation except for multi-line quotes, which get
13448 unless ( $ibeg == 0 && $starting_in_quote ) {
13449 $last_indentation_written = $indentation;
13450 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
13451 $last_leading_token = $tokens_to_go[$ibeg];
13454 # be sure lines with leading closing tokens are not outdented more
13455 # than the line which contained the corresponding opening token.
13457 #############################################################
13458 # updated per bug report in alex_bug.pl: we must not
13459 # mess with the indentation of closing logical braces so
13460 # we must treat something like '} else {' as if it were
13461 # an isolated brace my $is_isolated_block_brace = (
13462 # $iend == $ibeg ) && $block_type_to_go[$ibeg];
13463 #############################################################
13464 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
13465 && ( $iend == $ibeg
13466 || $is_if_elsif_else_unless_while_until_for_foreach{
13467 $block_type_to_go[$ibeg]
13470 # only do this for a ':; which is aligned with its leading '?'
13471 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
13472 if ( defined($opening_indentation)
13473 && !$is_isolated_block_brace
13474 && !$is_unaligned_colon )
13476 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
13477 $indentation = $opening_indentation;
13481 # remember the indentation of each line of this batch
13482 push @{$rindentation_list}, $indentation;
13484 # outdent lines with certain leading tokens...
13487 # must be first word of this batch
13493 # certain leading keywords if requested
13495 $rOpts->{'outdent-keywords'}
13496 && $types_to_go[$ibeg] eq 'k'
13497 && $outdent_keyword{ $tokens_to_go[$ibeg] }
13500 # or labels if requested
13501 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
13503 # or static block comments if requested
13504 || ( $types_to_go[$ibeg] eq '#'
13505 && $rOpts->{'outdent-static-block-comments'}
13506 && $is_static_block_comment )
13511 my $space_count = leading_spaces_to_go($ibeg);
13512 if ( $space_count > 0 ) {
13513 $space_count -= $rOpts_continuation_indentation;
13514 $is_outdented_line = 1;
13515 if ( $space_count < 0 ) { $space_count = 0 }
13517 # do not promote a spaced static block comment to non-spaced;
13518 # this is not normally necessary but could be for some
13519 # unusual user inputs (such as -ci = -i)
13520 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
13524 if ($rOpts_line_up_parentheses) {
13526 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
13529 $indentation = $space_count;
13534 return ( $indentation, $lev, $level_end, $terminal_type,
13535 $is_semicolon_terminated, $is_outdented_line );
13539 sub set_vertical_tightness_flags {
13541 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
13543 # Define vertical tightness controls for the nth line of a batch.
13544 # We create an array of parameters which tell the vertical aligner
13545 # if we should combine this line with the next line to achieve the
13546 # desired vertical tightness. The array of parameters contains:
13548 # [0] type: 1=opening non-block 2=closing non-block
13549 # 3=opening block brace 4=closing block brace
13551 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
13552 # if closing: spaces of padding to use
13553 # [2] sequence number of container
13554 # [3] valid flag: do not append if this flag is false. Will be
13555 # true if appropriate -vt flag is set. Otherwise, Will be
13556 # made true only for 2 line container in parens with -lp
13558 # These flags are used by sub set_leading_whitespace in
13559 # the vertical aligner
13561 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
13563 #--------------------------------------------------------------
13564 # Vertical Tightness Flags Section 1:
13565 # Handle Lines 1 .. n-1 but not the last line
13566 # For non-BLOCK tokens, we will need to examine the next line
13567 # too, so we won't consider the last line.
13568 #--------------------------------------------------------------
13569 if ( $n < $n_last_line ) {
13571 #--------------------------------------------------------------
13572 # Vertical Tightness Flags Section 1a:
13573 # Look for Type 1, last token of this line is a non-block opening token
13574 #--------------------------------------------------------------
13575 my $ibeg_next = $$ri_first[ $n + 1 ];
13576 my $token_end = $tokens_to_go[$iend];
13577 my $iend_next = $$ri_last[ $n + 1 ];
13579 $type_sequence_to_go[$iend]
13580 && !$block_type_to_go[$iend]
13581 && $is_opening_token{$token_end}
13583 $opening_vertical_tightness{$token_end} > 0
13585 # allow 2-line method call to be closed up
13586 || ( $rOpts_line_up_parentheses
13587 && $token_end eq '('
13589 && $types_to_go[ $iend - 1 ] ne 'b' )
13594 # avoid multiple jumps in nesting depth in one line if
13596 my $ovt = $opening_vertical_tightness{$token_end};
13597 my $iend_next = $$ri_last[ $n + 1 ];
13600 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
13601 $nesting_depth_to_go[$ibeg_next] )
13605 # If -vt flag has not been set, mark this as invalid
13606 # and aligner will validate it if it sees the closing paren
13608 my $valid_flag = $ovt;
13609 @{$rvertical_tightness_flags} =
13610 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
13614 #--------------------------------------------------------------
13615 # Vertical Tightness Flags Section 1b:
13616 # Look for Type 2, first token of next line is a non-block closing
13617 # token .. and be sure this line does not have a side comment
13618 #--------------------------------------------------------------
13619 my $token_next = $tokens_to_go[$ibeg_next];
13620 if ( $type_sequence_to_go[$ibeg_next]
13621 && !$block_type_to_go[$ibeg_next]
13622 && $is_closing_token{$token_next}
13623 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
13625 my $ovt = $opening_vertical_tightness{$token_next};
13626 my $cvt = $closing_vertical_tightness{$token_next};
13629 # never append a trailing line like )->pack(
13630 # because it will throw off later alignment
13632 $nesting_depth_to_go[$ibeg_next] ==
13633 $nesting_depth_to_go[ $iend_next + 1 ] + 1
13638 $container_environment_to_go[$ibeg_next] ne 'LIST'
13642 # allow closing up 2-line method calls
13643 || ( $rOpts_line_up_parentheses
13644 && $token_next eq ')' )
13651 # decide which trailing closing tokens to append..
13653 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
13655 my $str = join( '',
13656 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
13658 # append closing token if followed by comment or ';'
13659 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
13663 my $valid_flag = $cvt;
13664 @{$rvertical_tightness_flags} = (
13666 $tightness{$token_next} == 2 ? 0 : 1,
13667 $type_sequence_to_go[$ibeg_next], $valid_flag,
13673 #--------------------------------------------------------------
13674 # Vertical Tightness Flags Section 1c:
13675 # Implement the Opening Token Right flag (Type 2)..
13676 # If requested, move an isolated trailing opening token to the end of
13677 # the previous line which ended in a comma. We could do this
13678 # in sub recombine_breakpoints but that would cause problems
13679 # with -lp formatting. The problem is that indentation will
13680 # quickly move far to the right in nested expressions. By
13681 # doing it after indentation has been set, we avoid changes
13682 # to the indentation. Actual movement of the token takes place
13683 # in sub valign_output_step_B.
13684 #--------------------------------------------------------------
13686 $opening_token_right{ $tokens_to_go[$ibeg_next] }
13688 # previous line is not opening
13689 # (use -sot to combine with it)
13690 && !$is_opening_token{$token_end}
13692 # previous line ended in one of these
13693 # (add other cases if necessary; '=>' and '.' are not necessary
13694 && !$block_type_to_go[$ibeg_next]
13696 # this is a line with just an opening token
13697 && ( $iend_next == $ibeg_next
13698 || $iend_next == $ibeg_next + 2
13699 && $types_to_go[$iend_next] eq '#' )
13701 # looks bad if we align vertically with the wrong container
13702 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
13705 my $valid_flag = 1;
13706 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
13707 @{$rvertical_tightness_flags} =
13708 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
13711 #--------------------------------------------------------------
13712 # Vertical Tightness Flags Section 1d:
13713 # Stacking of opening and closing tokens (Type 2)
13714 #--------------------------------------------------------------
13716 my $token_beg_next = $tokens_to_go[$ibeg_next];
13718 # patch to make something like 'qw(' behave like an opening paren
13720 if ( $types_to_go[$ibeg_next] eq 'q' ) {
13721 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
13722 $token_beg_next = $1;
13726 if ( $is_closing_token{$token_end}
13727 && $is_closing_token{$token_beg_next} )
13729 $stackable = $stack_closing_token{$token_beg_next}
13730 unless ( $block_type_to_go[$ibeg_next] )
13731 ; # shouldn't happen; just checking
13733 elsif ($is_opening_token{$token_end}
13734 && $is_opening_token{$token_beg_next} )
13736 $stackable = $stack_opening_token{$token_beg_next}
13737 unless ( $block_type_to_go[$ibeg_next] )
13738 ; # shouldn't happen; just checking
13743 my $is_semicolon_terminated;
13744 if ( $n + 1 == $n_last_line ) {
13745 my ( $terminal_type, $i_terminal ) = terminal_type(
13746 \@types_to_go, \@block_type_to_go,
13747 $ibeg_next, $iend_next
13749 $is_semicolon_terminated = $terminal_type eq ';'
13750 && $nesting_depth_to_go[$iend_next] <
13751 $nesting_depth_to_go[$ibeg_next];
13754 # this must be a line with just an opening token
13755 # or end in a semicolon
13757 $is_semicolon_terminated
13758 || ( $iend_next == $ibeg_next
13759 || $iend_next == $ibeg_next + 2
13760 && $types_to_go[$iend_next] eq '#' )
13763 my $valid_flag = 1;
13764 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
13765 @{$rvertical_tightness_flags} =
13766 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
13772 #--------------------------------------------------------------
13773 # Vertical Tightness Flags Section 2:
13774 # Handle type 3, opening block braces on last line of the batch
13775 # Check for a last line with isolated opening BLOCK curly
13776 #--------------------------------------------------------------
13777 elsif ($rOpts_block_brace_vertical_tightness
13779 && $types_to_go[$iend] eq '{'
13780 && $block_type_to_go[$iend] =~
13781 /$block_brace_vertical_tightness_pattern/o )
13783 @{$rvertical_tightness_flags} =
13784 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
13787 #--------------------------------------------------------------
13788 # Vertical Tightness Flags Section 3:
13789 # Handle type 4, a closing block brace on the last line of the batch Check
13790 # for a last line with isolated closing BLOCK curly
13791 #--------------------------------------------------------------
13792 elsif ($rOpts_stack_closing_block_brace
13794 && $block_type_to_go[$iend]
13795 && $types_to_go[$iend] eq '}' )
13797 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
13798 @{$rvertical_tightness_flags} =
13799 ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
13802 # pack in the sequence numbers of the ends of this line
13803 $rvertical_tightness_flags->[4] = get_seqno($ibeg);
13804 $rvertical_tightness_flags->[5] = get_seqno($iend);
13805 return $rvertical_tightness_flags;
13810 # get opening and closing sequence numbers of a token for the vertical
13811 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
13812 # to be treated somewhat like opening and closing tokens for stacking
13813 # tokens by the vertical aligner.
13815 my $seqno = $type_sequence_to_go[$ii];
13816 if ( $types_to_go[$ii] eq 'q' ) {
13819 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
13822 if ( !$ending_in_quote ) {
13823 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
13831 my %is_vertical_alignment_type;
13832 my %is_vertical_alignment_keyword;
13833 my %is_terminal_alignment_type;
13837 # Removed =~ from list to improve chances of alignment
13839 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
13840 { ? : => && || // ~~ !~~
13842 @is_vertical_alignment_type{@_} = (1) x scalar(@_);
13844 # only align these at end of line
13846 @is_terminal_alignment_type{@_} = (1) x scalar(@_);
13848 # eq and ne were removed from this list to improve alignment chances
13849 @_ = qw(if unless and or err for foreach while until);
13850 @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
13853 sub set_vertical_alignment_markers {
13855 # This routine takes the first step toward vertical alignment of the
13856 # lines of output text. It looks for certain tokens which can serve as
13857 # vertical alignment markers (such as an '=').
13859 # Method: We look at each token $i in this output batch and set
13860 # $matching_token_to_go[$i] equal to those tokens at which we would
13861 # accept vertical alignment.
13863 # nothing to do if we aren't allowed to change whitespace
13864 if ( !$rOpts_add_whitespace ) {
13865 for my $i ( 0 .. $max_index_to_go ) {
13866 $matching_token_to_go[$i] = '';
13871 my ( $ri_first, $ri_last ) = @_;
13873 # remember the index of last nonblank token before any sidecomment
13874 my $i_terminal = $max_index_to_go;
13875 if ( $types_to_go[$i_terminal] eq '#' ) {
13876 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
13877 if ( $i_terminal > 0 ) { --$i_terminal }
13881 # look at each line of this batch..
13882 my $last_vertical_alignment_before_index;
13883 my $vert_last_nonblank_type;
13884 my $vert_last_nonblank_token;
13885 my $vert_last_nonblank_block_type;
13886 my $max_line = @$ri_first - 1;
13887 my ( $i, $type, $token, $block_type, $alignment_type );
13888 my ( $ibeg, $iend, $line );
13890 foreach $line ( 0 .. $max_line ) {
13891 $ibeg = $$ri_first[$line];
13892 $iend = $$ri_last[$line];
13893 $last_vertical_alignment_before_index = -1;
13894 $vert_last_nonblank_type = '';
13895 $vert_last_nonblank_token = '';
13896 $vert_last_nonblank_block_type = '';
13898 # look at each token in this output line..
13899 foreach $i ( $ibeg .. $iend ) {
13900 $alignment_type = '';
13901 $type = $types_to_go[$i];
13902 $block_type = $block_type_to_go[$i];
13903 $token = $tokens_to_go[$i];
13905 # check for flag indicating that we should not align
13907 if ( $matching_token_to_go[$i] ) {
13908 $matching_token_to_go[$i] = '';
13912 #--------------------------------------------------------
13913 # First see if we want to align BEFORE this token
13914 #--------------------------------------------------------
13916 # The first possible token that we can align before
13917 # is index 2 because: 1) it doesn't normally make sense to
13918 # align before the first token and 2) the second
13919 # token must be a blank if we are to align before
13921 if ( $i < $ibeg + 2 ) { }
13923 # must follow a blank token
13924 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
13926 # align a side comment --
13927 elsif ( $type eq '#' ) {
13931 # it is a static side comment
13933 $rOpts->{'static-side-comments'}
13934 && $token =~ /$static_side_comment_pattern/o
13937 # or a closing side comment
13938 || ( $vert_last_nonblank_block_type
13940 /$closing_side_comment_prefix_pattern/o )
13943 $alignment_type = $type;
13944 } ## Example of a static side comment
13947 # otherwise, do not align two in a row to create a
13949 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
13951 # align before one of these keywords
13952 # (within a line, since $i>1)
13953 elsif ( $type eq 'k' ) {
13955 # /^(if|unless|and|or|eq|ne)$/
13956 if ( $is_vertical_alignment_keyword{$token} ) {
13957 $alignment_type = $token;
13961 # align before one of these types..
13962 # Note: add '.' after new vertical aligner is operational
13963 elsif ( $is_vertical_alignment_type{$type} ) {
13964 $alignment_type = $token;
13966 # Do not align a terminal token. Although it might
13967 # occasionally look ok to do this, this has been found to be
13968 # a good general rule. The main problems are:
13969 # (1) that the terminal token (such as an = or :) might get
13970 # moved far to the right where it is hard to see because
13971 # nothing follows it, and
13972 # (2) doing so may prevent other good alignments.
13973 # Current exceptions are && and ||
13974 if ( $i == $iend || $i >= $i_terminal ) {
13975 $alignment_type = ""
13976 unless ( $is_terminal_alignment_type{$type} );
13979 # Do not align leading ': (' or '. ('. This would prevent
13980 # alignment in something like the following:
13982 # ( $input_line_number < 10 ) ? " "
13983 # : ( $input_line_number < 100 ) ? " "
13987 # ( $case_matters ? $accessor : " lc($accessor) " )
13988 # . ( $yesno ? " eq " : " ne " )
13989 if ( $i == $ibeg + 2
13990 && $types_to_go[$ibeg] =~ /^[\.\:]$/
13991 && $types_to_go[ $i - 1 ] eq 'b' )
13993 $alignment_type = "";
13996 # For a paren after keyword, only align something like this:
13998 # elsif ( $b ) { &b }
13999 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
14000 $alignment_type = ""
14001 unless $vert_last_nonblank_token =~
14002 /^(if|unless|elsif)$/;
14005 # be sure the alignment tokens are unique
14006 # This didn't work well: reason not determined
14007 # if ($token ne $type) {$alignment_type .= $type}
14010 # NOTE: This is deactivated because it causes the previous
14011 # if/elsif alignment to fail
14012 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
14013 #{ $alignment_type = $type; }
14015 if ($alignment_type) {
14016 $last_vertical_alignment_before_index = $i;
14019 #--------------------------------------------------------
14020 # Next see if we want to align AFTER the previous nonblank
14021 #--------------------------------------------------------
14023 # We want to line up ',' and interior ';' tokens, with the added
14024 # space AFTER these tokens. (Note: interior ';' is included
14025 # because it may occur in short blocks).
14028 # we haven't already set it
14031 # and its not the first token of the line
14034 # and it follows a blank
14035 && $types_to_go[ $i - 1 ] eq 'b'
14037 # and previous token IS one of these:
14038 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
14040 # and it's NOT one of these
14041 && ( $type !~ /^[b\#\)\]\}]$/ )
14043 # then go ahead and align
14047 $alignment_type = $vert_last_nonblank_type;
14050 #--------------------------------------------------------
14051 # then store the value
14052 #--------------------------------------------------------
14053 $matching_token_to_go[$i] = $alignment_type;
14054 if ( $type ne 'b' ) {
14055 $vert_last_nonblank_type = $type;
14056 $vert_last_nonblank_token = $token;
14057 $vert_last_nonblank_block_type = $block_type;
14064 sub terminal_type {
14066 # returns type of last token on this line (terminal token), as follows:
14067 # returns # for a full-line comment
14068 # returns ' ' for a blank line
14069 # otherwise returns final token type
14071 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
14073 # check for full-line comment..
14074 if ( $$rtype[$ibeg] eq '#' ) {
14075 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
14079 # start at end and walk backwards..
14080 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
14082 # skip past any side comment and blanks
14083 next if ( $$rtype[$i] eq 'b' );
14084 next if ( $$rtype[$i] eq '#' );
14086 # found it..make sure it is a BLOCK termination,
14087 # but hide a terminal } after sort/grep/map because it is not
14088 # necessarily the end of the line. (terminal.t)
14089 my $terminal_type = $$rtype[$i];
14091 $terminal_type eq '}'
14092 && ( !$$rblock_type[$i]
14093 || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
14096 $terminal_type = 'b';
14098 return wantarray ? ( $terminal_type, $i ) : $terminal_type;
14102 return wantarray ? ( ' ', $ibeg ) : ' ';
14106 { # set_bond_strengths
14108 my %is_good_keyword_breakpoint;
14109 my %is_lt_gt_le_ge;
14111 my %binary_bond_strength;
14118 sub bias_table_key {
14119 my ( $type, $token ) = @_;
14120 my $bias_table_key = $type;
14121 if ( $type eq 'k' ) {
14122 $bias_table_key = $token;
14123 if ( $token eq 'err' ) { $bias_table_key = 'or' }
14125 return $bias_table_key;
14128 sub set_bond_strengths {
14132 @_ = qw(if unless while until for foreach);
14133 @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
14135 @_ = qw(lt gt le ge);
14136 @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
14138 # The decision about where to break a line depends upon a "bond
14139 # strength" between tokens. The LOWER the bond strength, the MORE
14140 # likely a break. A bond strength may be any value but to simplify
14141 # things there are several pre-defined strength levels:
14143 # NO_BREAK => 10000;
14144 # VERY_STRONG => 100;
14148 # VERY_WEAK => 0.55;
14150 # The strength values are based on trial-and-error, and need to be
14151 # tweaked occasionally to get desired results. Some comments:
14153 # 1. Only relative strengths are important. small differences
14154 # in strengths can make big formatting differences.
14155 # 2. Each indentation level adds one unit of bond strength.
14156 # 3. A value of NO_BREAK makes an unbreakable bond
14157 # 4. A value of VERY_WEAK is the strength of a ','
14158 # 5. Values below NOMINAL are considered ok break points.
14159 # 6. Values above NOMINAL are considered poor break points.
14161 # The bond strengths should roughly follow precedence order where
14162 # possible. If you make changes, please check the results very
14163 # carefully on a variety of scripts. Testing with the -extrude
14164 # options is particularly helpful in exercising all of the rules.
14166 # Wherever possible, bond strengths are defined in the following
14167 # tables. There are two main stages to setting bond strengths and
14168 # two types of tables:
14170 # The first stage involves looking at each token individually and
14171 # defining left and right bond strengths, according to if we want
14172 # to break to the left or right side, and how good a break point it
14173 # is. For example tokens like =, ||, && make good break points and
14174 # will have low strengths, but one might want to break on either
14175 # side to put them at the end of one line or beginning of the next.
14177 # The second stage involves looking at certain pairs of tokens and
14178 # defining a bond strength for that particular pair. This second
14179 # stage has priority.
14181 #---------------------------------------------------------------
14182 # Bond Strength BEGIN Section 1.
14183 # Set left and right bond strengths of individual tokens.
14184 #---------------------------------------------------------------
14186 # NOTE: NO_BREAK's set in this section first are HINTS which will
14187 # probably not be honored. Essential NO_BREAKS's should be set in
14188 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
14189 # of this subroutine.
14191 # Note that we are setting defaults in this section. The user
14192 # cannot change bond strengths but can cause the left and right
14193 # bond strengths of any token type to be swapped through the use of
14194 # the -wba and -wbb flags. In this way the user can determine if a
14195 # breakpoint token should appear at the end of one line or the
14196 # beginning of the next line.
14198 # The hash keys in this section are token types, plus the text of
14199 # certain keywords like 'or', 'and'.
14201 # no break around possible filehandle
14202 $left_bond_strength{'Z'} = NO_BREAK;
14203 $right_bond_strength{'Z'} = NO_BREAK;
14205 # never put a bare word on a new line:
14206 # example print (STDERR, "bla"); will fail with break after (
14207 $left_bond_strength{'w'} = NO_BREAK;
14209 # blanks always have infinite strength to force breaks after
14211 $right_bond_strength{'b'} = NO_BREAK;
14213 # try not to break on exponentation
14214 @_ = qw" ** .. ... <=> ";
14215 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14216 @right_bond_strength{@_} = (STRONG) x scalar(@_);
14218 # The comma-arrow has very low precedence but not a good break point
14219 $left_bond_strength{'=>'} = NO_BREAK;
14220 $right_bond_strength{'=>'} = NOMINAL;
14222 # ok to break after label
14223 $left_bond_strength{'J'} = NO_BREAK;
14224 $right_bond_strength{'J'} = NOMINAL;
14225 $left_bond_strength{'j'} = STRONG;
14226 $right_bond_strength{'j'} = STRONG;
14227 $left_bond_strength{'A'} = STRONG;
14228 $right_bond_strength{'A'} = STRONG;
14230 $left_bond_strength{'->'} = STRONG;
14231 $right_bond_strength{'->'} = VERY_STRONG;
14233 $left_bond_strength{'CORE::'} = NOMINAL;
14234 $right_bond_strength{'CORE::'} = NO_BREAK;
14236 # breaking AFTER modulus operator is ok:
14238 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14239 @right_bond_strength{@_} =
14240 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
14242 # Break AFTER math operators * and /
14244 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14245 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
14247 # Break AFTER weakest math operators + and -
14248 # Make them weaker than * but a bit stronger than '.'
14250 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14251 @right_bond_strength{@_} =
14252 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
14254 # breaking BEFORE these is just ok:
14256 @right_bond_strength{@_} = (STRONG) x scalar(@_);
14257 @left_bond_strength{@_} = (NOMINAL) x scalar(@_);
14259 # breaking before the string concatenation operator seems best
14260 # because it can be hard to see at the end of a line
14261 $right_bond_strength{'.'} = STRONG;
14262 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
14265 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14266 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
14268 # make these a little weaker than nominal so that they get
14269 # favored for end-of-line characters
14270 @_ = qw"!= == =~ !~ ~~ !~~";
14271 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14272 @right_bond_strength{@_} =
14273 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
14275 # break AFTER these
14276 @_ = qw" < > | & >= <=";
14277 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
14278 @right_bond_strength{@_} =
14279 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
14281 # breaking either before or after a quote is ok
14282 # but bias for breaking before a quote
14283 $left_bond_strength{'Q'} = NOMINAL;
14284 $right_bond_strength{'Q'} = NOMINAL + 0.02;
14285 $left_bond_strength{'q'} = NOMINAL;
14286 $right_bond_strength{'q'} = NOMINAL;
14288 # starting a line with a keyword is usually ok
14289 $left_bond_strength{'k'} = NOMINAL;
14291 # we usually want to bond a keyword strongly to what immediately
14292 # follows, rather than leaving it stranded at the end of a line
14293 $right_bond_strength{'k'} = STRONG;
14295 $left_bond_strength{'G'} = NOMINAL;
14296 $right_bond_strength{'G'} = STRONG;
14298 # assignment operators
14300 = **= += *= &= <<= &&=
14301 -= /= |= >>= ||= //=
14306 # Default is to break AFTER various assignment operators
14307 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14308 @right_bond_strength{@_} =
14309 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
14311 # Default is to break BEFORE '&&' and '||' and '//'
14312 # set strength of '||' to same as '=' so that chains like
14313 # $a = $b || $c || $d will break before the first '||'
14314 $right_bond_strength{'||'} = NOMINAL;
14315 $left_bond_strength{'||'} = $right_bond_strength{'='};
14317 # same thing for '//'
14318 $right_bond_strength{'//'} = NOMINAL;
14319 $left_bond_strength{'//'} = $right_bond_strength{'='};
14321 # set strength of && a little higher than ||
14322 $right_bond_strength{'&&'} = NOMINAL;
14323 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
14325 $left_bond_strength{';'} = VERY_STRONG;
14326 $right_bond_strength{';'} = VERY_WEAK;
14327 $left_bond_strength{'f'} = VERY_STRONG;
14329 # make right strength of for ';' a little less than '='
14330 # to make for contents break after the ';' to avoid this:
14331 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
14332 # $number_of_fields )
14333 # and make it weaker than ',' and 'and' too
14334 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
14336 # The strengths of ?/: should be somewhere between
14337 # an '=' and a quote (NOMINAL),
14338 # make strength of ':' slightly less than '?' to help
14339 # break long chains of ? : after the colons
14340 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
14341 $right_bond_strength{':'} = NO_BREAK;
14342 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
14343 $right_bond_strength{'?'} = NO_BREAK;
14345 $left_bond_strength{','} = VERY_STRONG;
14346 $right_bond_strength{','} = VERY_WEAK;
14348 # remaining digraphs and trigraphs not defined above
14349 @_ = qw( :: <> ++ --);
14350 @left_bond_strength{@_} = (WEAK) x scalar(@_);
14351 @right_bond_strength{@_} = (STRONG) x scalar(@_);
14353 # Set bond strengths of certain keywords
14354 # make 'or', 'err', 'and' slightly weaker than a ','
14355 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
14356 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
14357 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
14358 $left_bond_strength{'xor'} = NOMINAL;
14359 $right_bond_strength{'and'} = NOMINAL;
14360 $right_bond_strength{'or'} = NOMINAL;
14361 $right_bond_strength{'err'} = NOMINAL;
14362 $right_bond_strength{'xor'} = STRONG;
14364 #---------------------------------------------------------------
14365 # Bond Strength BEGIN Section 2.
14366 # Set binary rules for bond strengths between certain token types.
14367 #---------------------------------------------------------------
14369 # We have a little problem making tables which apply to the
14370 # container tokens. Here is a list of container tokens and
14373 # type tokens // meaning
14374 # { {, [, ( // indent
14375 # } }, ], ) // outdent
14376 # [ [ // left non-structural [ (enclosing an array index)
14377 # ] ] // right non-structural square bracket
14378 # ( ( // left non-structural paren
14379 # ) ) // right non-structural paren
14380 # L { // left non-structural curly brace (enclosing a key)
14381 # R } // right non-structural curly brace
14383 # Some rules apply to token types and some to just the token
14384 # itself. We solve the problem by combining type and token into a
14385 # new hash key for the container types.
14387 # If a rule applies to a token 'type' then we need to make rules
14388 # for each of these 'type.token' combinations:
14399 # If a rule applies to a token then we need to make rules for
14400 # these 'type.token' combinations:
14409 # allow long lines before final { in an if statement, as in:
14414 # Otherwise, the line before the { tends to be too short.
14416 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
14417 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
14419 # break on something like '} (', but keep this stronger than a ','
14420 # example is in 'howe.pl'
14421 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
14422 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
14424 # keep matrix and hash indices together
14425 # but make them a little below STRONG to allow breaking open
14426 # something like {'some-word'}{'some-very-long-word'} at the }{
14428 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
14429 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
14430 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
14431 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
14433 # increase strength to the point where a break in the following
14434 # will be after the opening paren rather than at the arrow:
14436 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
14438 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14439 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14440 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14441 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14442 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14443 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14445 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
14446 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
14447 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
14448 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
14450 #---------------------------------------------------------------
14451 # Binary NO_BREAK rules
14452 #---------------------------------------------------------------
14454 # use strict requires that bare word and => not be separated
14455 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
14456 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
14458 # Never break between a bareword and a following paren because
14459 # perl may give an error. For example, if a break is placed
14460 # between 'to_filehandle' and its '(' the following line will
14461 # give a syntax error [Carp.pm]: my( $no) =fileno(
14462 # to_filehandle( $in)) ;
14463 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
14464 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
14465 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
14466 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
14468 # use strict requires that bare word within braces not start new
14470 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
14472 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
14474 # use strict requires that bare word and => not be separated
14475 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
14477 # use strict does not allow separating type info from trailing { }
14478 # testfile is readmail.pl
14479 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
14480 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
14482 # As a defensive measure, do not break between a '(' and a
14483 # filehandle. In some cases, this can cause an error. For
14484 # example, the following program works:
14491 # But this program fails:
14499 # This is normally only a problem with the 'extrude' option
14500 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
14501 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
14503 # never break between sub name and opening paren
14504 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
14505 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
14507 # keep '}' together with ';'
14508 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
14510 # Breaking before a ++ can cause perl to guess wrong. For
14511 # example the following line will cause a syntax error
14512 # with -extrude if we break between '$i' and '++' [fixstyle2]
14513 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
14514 $nobreak_lhs{'++'} = NO_BREAK;
14516 # Do not break before a possible file handle
14517 $nobreak_lhs{'Z'} = NO_BREAK;
14519 # use strict hates bare words on any new line. For
14520 # example, a break before the underscore here provokes the
14521 # wrath of use strict:
14522 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
14523 $nobreak_rhs{'F'} = NO_BREAK;
14524 $nobreak_rhs{'CORE::'} = NO_BREAK;
14526 #---------------------------------------------------------------
14527 # Bond Strength BEGIN Section 3.
14528 # Define tables and values for applying a small bias to the above
14530 #---------------------------------------------------------------
14531 # Adding a small 'bias' to strengths is a simple way to make a line
14532 # break at the first of a sequence of identical terms. For
14533 # example, to force long string of conditional operators to break
14534 # with each line ending in a ':', we can add a small number to the
14535 # bond strength of each ':' (colon.t)
14536 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
14537 $delta_bias = 0.0001; # a very small strength level
14541 # patch-its always ok to break at end of line
14542 $nobreak_to_go[$max_index_to_go] = 0;
14544 # we start a new set of bias values for each line
14546 @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
14547 my $code_bias = -.01; # bias for closing block braces
14552 my $last_nonblank_type = $type;
14553 my $last_nonblank_token = $token;
14554 my $list_str = $left_bond_strength{'?'};
14556 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
14557 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
14560 # main loop to compute bond strengths between each pair of tokens
14561 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
14562 $last_type = $type;
14563 if ( $type ne 'b' ) {
14564 $last_nonblank_type = $type;
14565 $last_nonblank_token = $token;
14567 $type = $types_to_go[$i];
14569 # strength on both sides of a blank is the same
14570 if ( $type eq 'b' && $last_type ne 'b' ) {
14571 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
14575 $token = $tokens_to_go[$i];
14576 $block_type = $block_type_to_go[$i];
14578 $next_type = $types_to_go[$i_next];
14579 $next_token = $tokens_to_go[$i_next];
14580 $total_nesting_depth = $nesting_depth_to_go[$i_next];
14581 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
14582 $next_nonblank_type = $types_to_go[$i_next_nonblank];
14583 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14585 # We are computing the strength of the bond between the current
14586 # token and the NEXT token.
14588 #---------------------------------------------------------------
14589 # Bond Strength Section 1:
14590 # First Approximation.
14591 # Use minimum of individual left and right tabulated bond
14593 #---------------------------------------------------------------
14594 my $bsr = $right_bond_strength{$type};
14595 my $bsl = $left_bond_strength{$next_nonblank_type};
14597 # define right bond strengths of certain keywords
14598 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
14599 $bsr = $right_bond_strength{$token};
14601 elsif ( $token eq 'ne' or $token eq 'eq' ) {
14605 # set terminal bond strength to the nominal value
14606 # this will cause good preceding breaks to be retained
14607 if ( $i_next_nonblank > $max_index_to_go ) {
14611 # define right bond strengths of certain keywords
14612 if ( $next_nonblank_type eq 'k'
14613 && defined( $left_bond_strength{$next_nonblank_token} ) )
14615 $bsl = $left_bond_strength{$next_nonblank_token};
14617 elsif ($next_nonblank_token eq 'ne'
14618 or $next_nonblank_token eq 'eq' )
14622 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
14623 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
14626 # Use the minimum of the left and right strengths. Note: it might
14627 # seem that we would want to keep a NO_BREAK if either token has
14628 # this value. This didn't work, for example because in an arrow
14629 # list, it prevents the comma from separating from the following
14630 # bare word (which is probably quoted by its arrow). So necessary
14631 # NO_BREAK's have to be handled as special cases in the final
14633 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
14634 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
14635 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
14636 my $bond_str_1 = $bond_str;
14638 #---------------------------------------------------------------
14639 # Bond Strength Section 2:
14640 # Apply hardwired rules..
14641 #---------------------------------------------------------------
14643 # Patch to put terminal or clauses on a new line: Weaken the bond
14644 # at an || followed by die or similar keyword to make the terminal
14645 # or clause fall on a new line, like this:
14647 # my $class = shift
14648 # || die "Cannot add broadcast: No class identifier found";
14650 # Otherwise the break will be at the previous '=' since the || and
14651 # = have the same starting strength and the or is biased, like
14655 # shift || die "Cannot add broadcast: No class identifier found";
14657 # In any case if the user places a break at either the = or the ||
14658 # it should remain there.
14659 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
14660 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
14661 if ( $want_break_before{$token} && $i > 0 ) {
14662 $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
14665 $bond_str -= $delta_bias;
14670 # good to break after end of code blocks
14671 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
14673 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
14674 $code_bias += $delta_bias;
14677 if ( $type eq 'k' ) {
14679 # allow certain control keywords to stand out
14680 if ( $next_nonblank_type eq 'k'
14681 && $is_last_next_redo_return{$token} )
14683 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
14686 # Don't break after keyword my. This is a quick fix for a
14687 # rare problem with perl. An example is this line from file
14690 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
14691 # $this->{'question'} ) )
14693 if ( $token eq 'my' ) {
14694 $bond_str = NO_BREAK;
14699 # good to break before 'if', 'unless', etc
14700 if ( $is_if_brace_follower{$next_nonblank_token} ) {
14701 $bond_str = VERY_WEAK;
14704 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
14706 # FIXME: needs more testing
14707 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
14708 $bond_str = $list_str if ( $bond_str > $list_str );
14711 # keywords like 'unless', 'if', etc, within statements
14713 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
14714 $bond_str = VERY_WEAK / 1.05;
14718 # try not to break before a comma-arrow
14719 elsif ( $next_nonblank_type eq '=>' ) {
14720 if ( $bond_str < STRONG ) { $bond_str = STRONG }
14723 #---------------------------------------------------------------
14724 # Additional hardwired NOBREAK rules
14725 #---------------------------------------------------------------
14727 # map1.t -- correct for a quirk in perl
14729 && $next_nonblank_type eq 'i'
14730 && $last_nonblank_type eq 'k'
14731 && $is_sort_map_grep{$last_nonblank_token} )
14733 # /^(sort|map|grep)$/ )
14735 $bond_str = NO_BREAK;
14738 # extrude.t: do not break before paren at:
14740 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
14741 $bond_str = NO_BREAK;
14744 # in older version of perl, use strict can cause problems with
14745 # breaks before bare words following opening parens. For example,
14746 # this will fail under older versions if a break is made between
14747 # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
14748 # command"); close MAIL;
14749 if ( $type eq '{' ) {
14751 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
14753 # but it's fine to break if the word is followed by a '=>'
14754 # or if it is obviously a sub call
14755 my $i_next_next_nonblank = $i_next_nonblank + 1;
14756 my $next_next_type = $types_to_go[$i_next_next_nonblank];
14757 if ( $next_next_type eq 'b'
14758 && $i_next_nonblank < $max_index_to_go )
14760 $i_next_next_nonblank++;
14761 $next_next_type = $types_to_go[$i_next_next_nonblank];
14764 # We'll check for an old breakpoint and keep a leading
14765 # bareword if it was that way in the input file.
14766 # Presumably it was ok that way. For example, the
14767 # following would remain unchanged:
14770 # January, February, March, April,
14771 # May, June, July, August,
14772 # September, October, November, December,
14775 # This should be sufficient:
14777 !$old_breakpoint_to_go[$i]
14778 && ( $next_next_type eq ','
14779 || $next_next_type eq '}' )
14782 $bond_str = NO_BREAK;
14787 # Do not break between a possible filehandle and a ? or / and do
14788 # not introduce a break after it if there is no blank
14790 elsif ( $type eq 'Z' ) {
14795 # if there is no blank and we do not want one. Examples:
14796 # print $x++ # do not break after $x
14797 # print HTML"HELLO" # break ok after HTML
14800 && defined( $want_left_space{$next_type} )
14801 && $want_left_space{$next_type} == WS_NO
14804 # or we might be followed by the start of a quote
14805 || $next_nonblank_type =~ /^[\/\?]$/
14808 $bond_str = NO_BREAK;
14812 # Breaking before a ? before a quote can cause trouble if
14813 # they are not separated by a blank.
14814 # Example: a syntax error occurs if you break before the ? here
14815 # my$logic=join$all?' && ':' || ',@regexps;
14816 # From: Professional_Perl_Programming_Code/multifind.pl
14817 if ( $next_nonblank_type eq '?' ) {
14818 $bond_str = NO_BREAK
14819 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
14822 # Breaking before a . followed by a number
14823 # can cause trouble if there is no intervening space
14824 # Example: a syntax error occurs if you break before the .2 here
14825 # $str .= pack($endian.2, ensurrogate($ord));
14826 # From: perl58/Unicode.pm
14827 elsif ( $next_nonblank_type eq '.' ) {
14828 $bond_str = NO_BREAK
14829 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
14832 # patch to put cuddled elses back together when on multiple
14833 # lines, as in: } \n else \n { \n
14834 if ($rOpts_cuddled_else) {
14836 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
14837 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
14839 $bond_str = NO_BREAK;
14842 my $bond_str_2 = $bond_str;
14844 #---------------------------------------------------------------
14845 # End of hardwired rules
14846 #---------------------------------------------------------------
14848 #---------------------------------------------------------------
14849 # Bond Strength Section 3:
14850 # Apply table rules. These have priority over the above
14852 #---------------------------------------------------------------
14854 my $tabulated_bond_str;
14856 my $rtype = $next_nonblank_type;
14857 if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
14858 if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
14859 $rtype = $next_nonblank_type . $next_nonblank_token;
14862 if ( $binary_bond_strength{$ltype}{$rtype} ) {
14863 $bond_str = $binary_bond_strength{$ltype}{$rtype};
14864 $tabulated_bond_str = $bond_str;
14867 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
14868 $bond_str = NO_BREAK;
14869 $tabulated_bond_str = $bond_str;
14871 my $bond_str_3 = $bond_str;
14873 # If the hardwired rules conflict with the tabulated bond
14874 # strength then there is an inconsistency that should be fixed
14875 FORMATTER_DEBUG_FLAG_BOND_TABLES
14876 && $tabulated_bond_str
14878 && $bond_str_1 != $bond_str_2
14879 && $bond_str_2 != $tabulated_bond_str
14882 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
14885 #-----------------------------------------------------------------
14886 # Bond Strength Section 4:
14887 # Modify strengths of certain tokens which often occur in sequence
14888 # by adding a small bias to each one in turn so that the breaks
14889 # occur from left to right.
14891 # Note that we only changing strengths by small amounts here,
14892 # and usually increasing, so we should not be altering any NO_BREAKs.
14893 # Other routines which check for NO_BREAKs will use a tolerance
14894 # of one to avoid any problem.
14895 #-----------------------------------------------------------------
14897 # The bias tables use special keys
14898 my $left_key = bias_table_key( $type, $token );
14900 bias_table_key( $next_nonblank_type, $next_nonblank_token );
14902 # add any bias set by sub scan_list at old comma break points.
14903 if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
14906 elsif ( defined( $bias{$left_key} ) ) {
14907 if ( !$want_break_before{$left_key} ) {
14908 $bias{$left_key} += $delta_bias;
14909 $bond_str += $bias{$left_key};
14914 if ( defined( $bias{$right_key} ) ) {
14915 if ( $want_break_before{$right_key} ) {
14917 # for leading '.' align all but 'short' quotes; the idea
14918 # is to not place something like "\n" on a single line.
14919 if ( $right_key eq '.' ) {
14921 $last_nonblank_type eq '.'
14924 $rOpts_short_concatenation_item_length )
14925 && ( $token !~ /^[\)\]\}]$/ )
14928 $bias{$right_key} += $delta_bias;
14932 $bias{$right_key} += $delta_bias;
14934 $bond_str += $bias{$right_key};
14937 my $bond_str_4 = $bond_str;
14939 #---------------------------------------------------------------
14940 # Bond Strength Section 5:
14941 # Fifth Approximation.
14942 # Take nesting depth into account by adding the nesting depth
14943 # to the bond strength.
14944 #---------------------------------------------------------------
14947 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
14948 if ( $total_nesting_depth > 0 ) {
14949 $strength = $bond_str + $total_nesting_depth;
14952 $strength = $bond_str;
14956 $strength = NO_BREAK;
14959 # always break after side comment
14960 if ( $type eq '#' ) { $strength = 0 }
14962 $bond_strength_to_go[$i] = $strength;
14964 FORMATTER_DEBUG_FLAG_BOND && do {
14965 my $str = substr( $token, 0, 15 );
14966 $str .= ' ' x ( 16 - length($str) );
14968 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
14971 } ## end sub set_bond_strengths
14974 sub pad_array_to_go {
14976 # to simplify coding in scan_list and set_bond_strengths, it helps
14977 # to create some extra blank tokens at the end of the arrays
14978 $tokens_to_go[ $max_index_to_go + 1 ] = '';
14979 $tokens_to_go[ $max_index_to_go + 2 ] = '';
14980 $types_to_go[ $max_index_to_go + 1 ] = 'b';
14981 $types_to_go[ $max_index_to_go + 2 ] = 'b';
14982 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
14983 $nesting_depth_to_go[$max_index_to_go];
14986 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
14987 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
14989 # shouldn't happen:
14990 unless ( get_saw_brace_error() ) {
14992 "Program bug in scan_list: hit nesting error which should have been caught\n"
14994 report_definite_bug();
14998 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
15003 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
15004 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
15008 { # begin scan_list
15011 $block_type, $current_depth,
15013 $i_last_nonblank_token, $last_colon_sequence_number,
15014 $last_nonblank_token, $last_nonblank_type,
15015 $last_nonblank_block_type, $last_old_breakpoint_count,
15016 $minimum_depth, $next_nonblank_block_type,
15017 $next_nonblank_token, $next_nonblank_type,
15018 $old_breakpoint_count, $starting_breakpoint_count,
15019 $starting_depth, $token,
15020 $type, $type_sequence,
15024 @breakpoint_stack, @breakpoint_undo_stack,
15025 @comma_index, @container_type,
15026 @identifier_count_stack, @index_before_arrow,
15027 @interrupted_list, @item_count_stack,
15028 @last_comma_index, @last_dot_index,
15029 @last_nonblank_type, @old_breakpoint_count_stack,
15030 @opening_structure_index_stack, @rfor_semicolon_list,
15031 @has_old_logical_breakpoints, @rand_or_list,
15035 # routine to define essential variables when we go 'up' to
15037 sub check_for_new_minimum_depth {
15039 if ( $depth < $minimum_depth ) {
15041 $minimum_depth = $depth;
15043 # these arrays need not retain values between calls
15044 $breakpoint_stack[$depth] = $starting_breakpoint_count;
15045 $container_type[$depth] = "";
15046 $identifier_count_stack[$depth] = 0;
15047 $index_before_arrow[$depth] = -1;
15048 $interrupted_list[$depth] = 1;
15049 $item_count_stack[$depth] = 0;
15050 $last_nonblank_type[$depth] = "";
15051 $opening_structure_index_stack[$depth] = -1;
15053 $breakpoint_undo_stack[$depth] = undef;
15054 $comma_index[$depth] = undef;
15055 $last_comma_index[$depth] = undef;
15056 $last_dot_index[$depth] = undef;
15057 $old_breakpoint_count_stack[$depth] = undef;
15058 $has_old_logical_breakpoints[$depth] = 0;
15059 $rand_or_list[$depth] = [];
15060 $rfor_semicolon_list[$depth] = [];
15061 $i_equals[$depth] = -1;
15063 # these arrays must retain values between calls
15064 if ( !defined( $has_broken_sublist[$depth] ) ) {
15065 $dont_align[$depth] = 0;
15066 $has_broken_sublist[$depth] = 0;
15067 $want_comma_break[$depth] = 0;
15072 # routine to decide which commas to break at within a container;
15074 # $bp_count = number of comma breakpoints set
15075 # $do_not_break_apart = a flag indicating if container need not
15077 sub set_comma_breakpoints {
15081 my $do_not_break_apart = 0;
15084 if ( $item_count_stack[$dd] ) {
15086 # handle commas not in containers...
15087 if ( $dont_align[$dd] ) {
15088 do_uncontained_comma_breaks($dd);
15091 # handle commas within containers...
15093 my $fbc = $forced_breakpoint_count;
15095 # always open comma lists not preceded by keywords,
15096 # barewords, identifiers (that is, anything that doesn't
15097 # look like a function call)
15098 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
15100 set_comma_breakpoints_do(
15102 $opening_structure_index_stack[$dd],
15104 $item_count_stack[$dd],
15105 $identifier_count_stack[$dd],
15107 $next_nonblank_type,
15108 $container_type[$dd],
15109 $interrupted_list[$dd],
15110 \$do_not_break_apart,
15113 $bp_count = $forced_breakpoint_count - $fbc;
15114 $do_not_break_apart = 0 if $must_break_open;
15117 return ( $bp_count, $do_not_break_apart );
15120 sub do_uncontained_comma_breaks {
15122 # Handle commas not in containers...
15123 # This is a catch-all routine for commas that we
15124 # don't know what to do with because the don't fall
15125 # within containers. We will bias the bond strength
15126 # to break at commas which ended lines in the input
15127 # file. This usually works better than just trying
15128 # to put as many items on a line as possible. A
15129 # downside is that if the input file is garbage it
15130 # won't work very well. However, the user can always
15131 # prevent following the old breakpoints with the
15135 my $old_comma_break_count = 0;
15136 foreach my $ii ( @{ $comma_index[$dd] } ) {
15137 if ( $old_breakpoint_to_go[$ii] ) {
15138 $old_comma_break_count++;
15139 $bond_strength_to_go[$ii] = $bias;
15141 # reduce bias magnitude to force breaks in order
15146 # Also put a break before the first comma if
15147 # (1) there was a break there in the input, and
15148 # (2) there was exactly one old break before the first comma break
15149 # (3) OLD: there are multiple old comma breaks
15150 # (3) NEW: there are one or more old comma breaks (see return example)
15152 # For example, we will follow the user and break after
15153 # 'print' in this snippet:
15155 # "conformability (Not the same dimension)\n",
15156 # "\t", $have, " is ", text_unit($hu), "\n",
15157 # "\t", $want, " is ", text_unit($wu), "\n",
15160 # Another example, just one comma, where we will break after
15163 # $x * cos($a) - $y * sin($a),
15164 # $x * sin($a) + $y * cos($a);
15166 # Breaking a print statement:
15168 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
15169 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
15171 # But we will not force a break after the opening paren here
15172 # (causes a blinker):
15173 # $heap->{stream}->set_output_filter(
15174 # poe::filter::reference->new('myotherfreezer') ),
15177 my $i_first_comma = $comma_index[$dd]->[0];
15178 if ( $old_breakpoint_to_go[$i_first_comma] ) {
15179 my $level_comma = $levels_to_go[$i_first_comma];
15182 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
15183 if ( $old_breakpoint_to_go[$ii] ) {
15185 last if ( $obp_count > 1 );
15187 if ( $levels_to_go[$ii] == $level_comma );
15191 # Changed rule from multiple old commas to just one here:
15192 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
15194 # Do not to break before an opening token because
15195 # it can lead to "blinkers".
15196 my $ibreakm = $ibreak;
15197 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
15198 if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
15200 set_forced_breakpoint($ibreak);
15206 my %is_logical_container;
15209 @_ = qw# if elsif unless while and or err not && | || ? : ! #;
15210 @is_logical_container{@_} = (1) x scalar(@_);
15213 sub set_for_semicolon_breakpoints {
15215 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
15216 set_forced_breakpoint($_);
15220 sub set_logical_breakpoints {
15223 $item_count_stack[$dd] == 0
15224 && $is_logical_container{ $container_type[$dd] }
15226 || $has_old_logical_breakpoints[$dd]
15230 # Look for breaks in this order:
15233 foreach my $i ( 0 .. 3 ) {
15234 if ( $rand_or_list[$dd][$i] ) {
15235 foreach ( @{ $rand_or_list[$dd][$i] } ) {
15236 set_forced_breakpoint($_);
15239 # break at any 'if' and 'unless' too
15240 foreach ( @{ $rand_or_list[$dd][4] } ) {
15241 set_forced_breakpoint($_);
15243 $rand_or_list[$dd] = [];
15250 sub is_unbreakable_container {
15252 # never break a container of one of these types
15253 # because bad things can happen (map1.t)
15255 $is_sort_map_grep{ $container_type[$dd] };
15260 # This routine is responsible for setting line breaks for all lists,
15261 # so that hierarchical structure can be displayed and so that list
15262 # items can be vertically aligned. The output of this routine is
15263 # stored in the array @forced_breakpoint_to_go, which is used to set
15264 # final breakpoints.
15266 $starting_depth = $nesting_depth_to_go[0];
15269 $current_depth = $starting_depth;
15271 $last_colon_sequence_number = -1;
15272 $last_nonblank_token = ';';
15273 $last_nonblank_type = ';';
15274 $last_nonblank_block_type = ' ';
15275 $last_old_breakpoint_count = 0;
15276 $minimum_depth = $current_depth + 1; # forces update in check below
15277 $old_breakpoint_count = 0;
15278 $starting_breakpoint_count = $forced_breakpoint_count;
15281 $type_sequence = '';
15283 my $total_depth_variation = 0;
15284 my $i_old_assignment_break;
15285 my $depth_last = $starting_depth;
15287 check_for_new_minimum_depth($current_depth);
15289 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
15290 my $want_previous_breakpoint = -1;
15292 my $saw_good_breakpoint;
15293 my $i_line_end = -1;
15294 my $i_line_start = -1;
15296 # loop over all tokens in this batch
15297 while ( ++$i <= $max_index_to_go ) {
15298 if ( $type ne 'b' ) {
15299 $i_last_nonblank_token = $i - 1;
15300 $last_nonblank_type = $type;
15301 $last_nonblank_token = $token;
15302 $last_nonblank_block_type = $block_type;
15303 } ## end if ( $type ne 'b' )
15304 $type = $types_to_go[$i];
15305 $block_type = $block_type_to_go[$i];
15306 $token = $tokens_to_go[$i];
15307 $type_sequence = $type_sequence_to_go[$i];
15308 my $next_type = $types_to_go[ $i + 1 ];
15309 my $next_token = $tokens_to_go[ $i + 1 ];
15310 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
15311 $next_nonblank_type = $types_to_go[$i_next_nonblank];
15312 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15313 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
15315 # set break if flag was set
15316 if ( $want_previous_breakpoint >= 0 ) {
15317 set_forced_breakpoint($want_previous_breakpoint);
15318 $want_previous_breakpoint = -1;
15321 $last_old_breakpoint_count = $old_breakpoint_count;
15322 if ( $old_breakpoint_to_go[$i] ) {
15324 $i_line_start = $i_next_nonblank;
15326 $old_breakpoint_count++;
15328 # Break before certain keywords if user broke there and
15329 # this is a 'safe' break point. The idea is to retain
15330 # any preferred breaks for sequential list operations,
15331 # like a schwartzian transform.
15332 if ($rOpts_break_at_old_keyword_breakpoints) {
15334 $next_nonblank_type eq 'k'
15335 && $is_keyword_returning_list{$next_nonblank_token}
15336 && ( $type =~ /^[=\)\]\}Riw]$/
15338 && $is_keyword_returning_list{$token} )
15342 # we actually have to set this break next time through
15343 # the loop because if we are at a closing token (such
15344 # as '}') which forms a one-line block, this break might
15346 $want_previous_breakpoint = $i;
15347 } ## end if ( $next_nonblank_type...)
15348 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
15350 # Break before attributes if user broke there
15351 if ($rOpts_break_at_old_attribute_breakpoints) {
15352 if ( $next_nonblank_type eq 'A' ) {
15353 $want_previous_breakpoint = $i;
15357 # remember an = break as possible good break point
15358 if ( $is_assignment{$type} ) {
15359 $i_old_assignment_break = $i;
15361 elsif ( $is_assignment{$next_nonblank_type} ) {
15362 $i_old_assignment_break = $i_next_nonblank;
15364 } ## end if ( $old_breakpoint_to_go...)
15365 next if ( $type eq 'b' );
15366 $depth = $nesting_depth_to_go[ $i + 1 ];
15368 $total_depth_variation += abs( $depth - $depth_last );
15369 $depth_last = $depth;
15371 # safety check - be sure we always break after a comment
15372 # Shouldn't happen .. an error here probably means that the
15373 # nobreak flag did not get turned off correctly during
15375 if ( $type eq '#' ) {
15376 if ( $i != $max_index_to_go ) {
15378 "Non-fatal program bug: backup logic needed to break after a comment\n"
15380 report_definite_bug();
15381 $nobreak_to_go[$i] = 0;
15382 set_forced_breakpoint($i);
15383 } ## end if ( $i != $max_index_to_go)
15384 } ## end if ( $type eq '#' )
15386 # Force breakpoints at certain tokens in long lines.
15387 # Note that such breakpoints will be undone later if these tokens
15388 # are fully contained within parens on a line.
15391 # break before a keyword within a line
15395 # if one of these keywords:
15396 && $token =~ /^(if|unless|while|until|for)$/
15398 # but do not break at something like '1 while'
15399 && ( $last_nonblank_type ne 'n' || $i > 2 )
15401 # and let keywords follow a closing 'do' brace
15402 && $last_nonblank_block_type ne 'do'
15407 # or container is broken (by side-comment, etc)
15408 || ( $next_nonblank_token eq '('
15409 && $mate_index_to_go[$i_next_nonblank] < $i )
15413 set_forced_breakpoint( $i - 1 );
15414 } ## end if ( $type eq 'k' && $i...)
15416 # remember locations of '||' and '&&' for possible breaks if we
15417 # decide this is a long logical expression.
15418 if ( $type eq '||' ) {
15419 push @{ $rand_or_list[$depth][2] }, $i;
15420 ++$has_old_logical_breakpoints[$depth]
15421 if ( ( $i == $i_line_start || $i == $i_line_end )
15422 && $rOpts_break_at_old_logical_breakpoints );
15423 } ## end if ( $type eq '||' )
15424 elsif ( $type eq '&&' ) {
15425 push @{ $rand_or_list[$depth][3] }, $i;
15426 ++$has_old_logical_breakpoints[$depth]
15427 if ( ( $i == $i_line_start || $i == $i_line_end )
15428 && $rOpts_break_at_old_logical_breakpoints );
15429 } ## end elsif ( $type eq '&&' )
15430 elsif ( $type eq 'f' ) {
15431 push @{ $rfor_semicolon_list[$depth] }, $i;
15433 elsif ( $type eq 'k' ) {
15434 if ( $token eq 'and' ) {
15435 push @{ $rand_or_list[$depth][1] }, $i;
15436 ++$has_old_logical_breakpoints[$depth]
15437 if ( ( $i == $i_line_start || $i == $i_line_end )
15438 && $rOpts_break_at_old_logical_breakpoints );
15439 } ## end if ( $token eq 'and' )
15441 # break immediately at 'or's which are probably not in a logical
15442 # block -- but we will break in logical breaks below so that
15443 # they do not add to the forced_breakpoint_count
15444 elsif ( $token eq 'or' ) {
15445 push @{ $rand_or_list[$depth][0] }, $i;
15446 ++$has_old_logical_breakpoints[$depth]
15447 if ( ( $i == $i_line_start || $i == $i_line_end )
15448 && $rOpts_break_at_old_logical_breakpoints );
15449 if ( $is_logical_container{ $container_type[$depth] } ) {
15452 if ($is_long_line) { set_forced_breakpoint($i) }
15453 elsif ( ( $i == $i_line_start || $i == $i_line_end )
15454 && $rOpts_break_at_old_logical_breakpoints )
15456 $saw_good_breakpoint = 1;
15458 } ## end else [ if ( $is_logical_container...)]
15459 } ## end elsif ( $token eq 'or' )
15460 elsif ( $token eq 'if' || $token eq 'unless' ) {
15461 push @{ $rand_or_list[$depth][4] }, $i;
15462 if ( ( $i == $i_line_start || $i == $i_line_end )
15463 && $rOpts_break_at_old_logical_breakpoints )
15465 set_forced_breakpoint($i);
15467 } ## end elsif ( $token eq 'if' ||...)
15468 } ## end elsif ( $type eq 'k' )
15469 elsif ( $is_assignment{$type} ) {
15470 $i_equals[$depth] = $i;
15473 if ($type_sequence) {
15475 # handle any postponed closing breakpoints
15476 if ( $token =~ /^[\)\]\}\:]$/ ) {
15477 if ( $type eq ':' ) {
15478 $last_colon_sequence_number = $type_sequence;
15480 # retain break at a ':' line break
15481 if ( ( $i == $i_line_start || $i == $i_line_end )
15482 && $rOpts_break_at_old_ternary_breakpoints )
15485 set_forced_breakpoint($i);
15487 # break at previous '='
15488 if ( $i_equals[$depth] > 0 ) {
15489 set_forced_breakpoint( $i_equals[$depth] );
15490 $i_equals[$depth] = -1;
15492 } ## end if ( ( $i == $i_line_start...))
15493 } ## end if ( $type eq ':' )
15494 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
15495 my $inc = ( $type eq ':' ) ? 0 : 1;
15496 set_forced_breakpoint( $i - $inc );
15497 delete $postponed_breakpoint{$type_sequence};
15499 } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
15501 # set breaks at ?/: if they will get separated (and are
15502 # not a ?/: chain), or if the '?' is at the end of the
15504 elsif ( $token eq '?' ) {
15505 my $i_colon = $mate_index_to_go[$i];
15507 $i_colon <= 0 # the ':' is not in this batch
15508 || $i == 0 # this '?' is the first token of the line
15510 $max_index_to_go # or this '?' is the last token
15514 # don't break at a '?' if preceded by ':' on
15515 # this line of previous ?/: pair on this line.
15516 # This is an attempt to preserve a chain of ?/:
15517 # expressions (elsif2.t). And don't break if
15518 # this has a side comment.
15519 set_forced_breakpoint($i)
15521 $type_sequence == (
15522 $last_colon_sequence_number +
15523 TYPE_SEQUENCE_INCREMENT
15525 || $tokens_to_go[$max_index_to_go] eq '#'
15527 set_closing_breakpoint($i);
15528 } ## end if ( $i_colon <= 0 ||...)
15529 } ## end elsif ( $token eq '?' )
15530 } ## end if ($type_sequence)
15532 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
15534 #------------------------------------------------------------
15535 # Handle Increasing Depth..
15537 # prepare for a new list when depth increases
15538 # token $i is a '(','{', or '['
15539 #------------------------------------------------------------
15540 if ( $depth > $current_depth ) {
15542 $breakpoint_stack[$depth] = $forced_breakpoint_count;
15543 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
15544 $has_broken_sublist[$depth] = 0;
15545 $identifier_count_stack[$depth] = 0;
15546 $index_before_arrow[$depth] = -1;
15547 $interrupted_list[$depth] = 0;
15548 $item_count_stack[$depth] = 0;
15549 $last_comma_index[$depth] = undef;
15550 $last_dot_index[$depth] = undef;
15551 $last_nonblank_type[$depth] = $last_nonblank_type;
15552 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
15553 $opening_structure_index_stack[$depth] = $i;
15554 $rand_or_list[$depth] = [];
15555 $rfor_semicolon_list[$depth] = [];
15556 $i_equals[$depth] = -1;
15557 $want_comma_break[$depth] = 0;
15558 $container_type[$depth] =
15559 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
15560 ? $last_nonblank_token
15562 $has_old_logical_breakpoints[$depth] = 0;
15564 # if line ends here then signal closing token to break
15565 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
15567 set_closing_breakpoint($i);
15570 # Not all lists of values should be vertically aligned..
15571 $dont_align[$depth] =
15573 # code BLOCKS are handled at a higher level
15574 ( $block_type ne "" )
15576 # certain paren lists
15577 || ( $type eq '(' ) && (
15579 # it does not usually look good to align a list of
15580 # identifiers in a parameter list, as in:
15581 # my($var1, $var2, ...)
15582 # (This test should probably be refined, for now I'm just
15583 # testing for any keyword)
15584 ( $last_nonblank_type eq 'k' )
15586 # a trailing '(' usually indicates a non-list
15587 || ( $next_nonblank_type eq '(' )
15590 # patch to outdent opening brace of long if/for/..
15591 # statements (like this one). See similar coding in
15592 # set_continuation breaks. We have also catch it here for
15593 # short line fragments which otherwise will not go through
15594 # set_continuation_breaks.
15598 # if we have the ')' but not its '(' in this batch..
15599 && ( $last_nonblank_token eq ')' )
15600 && $mate_index_to_go[$i_last_nonblank_token] < 0
15602 # and user wants brace to left
15603 && !$rOpts->{'opening-brace-always-on-right'}
15605 && ( $type eq '{' ) # should be true
15606 && ( $token eq '{' ) # should be true
15609 set_forced_breakpoint( $i - 1 );
15610 } ## end if ( $block_type && ( ...))
15611 } ## end if ( $depth > $current_depth)
15613 #------------------------------------------------------------
15614 # Handle Decreasing Depth..
15616 # finish off any old list when depth decreases
15617 # token $i is a ')','}', or ']'
15618 #------------------------------------------------------------
15619 elsif ( $depth < $current_depth ) {
15621 check_for_new_minimum_depth($depth);
15623 # force all outer logical containers to break after we see on
15625 $has_old_logical_breakpoints[$depth] ||=
15626 $has_old_logical_breakpoints[$current_depth];
15628 # Patch to break between ') {' if the paren list is broken.
15629 # There is similar logic in set_continuation_breaks for
15630 # non-broken lists.
15632 && $next_nonblank_block_type
15633 && $interrupted_list[$current_depth]
15634 && $next_nonblank_type eq '{'
15635 && !$rOpts->{'opening-brace-always-on-right'} )
15637 set_forced_breakpoint($i);
15638 } ## end if ( $token eq ')' && ...
15640 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
15642 # set breaks at commas if necessary
15643 my ( $bp_count, $do_not_break_apart ) =
15644 set_comma_breakpoints($current_depth);
15646 my $i_opening = $opening_structure_index_stack[$current_depth];
15647 my $saw_opening_structure = ( $i_opening >= 0 );
15649 # this term is long if we had to break at interior commas..
15650 my $is_long_term = $bp_count > 0;
15652 # If this is a short container with one or more comma arrows,
15653 # then we will mark it as a long term to open it if requested.
15654 # $rOpts_comma_arrow_breakpoints =
15655 # 0 - open only if comma precedes closing brace
15656 # 1 - stable: except for one line blocks
15657 # 2 - try to form 1 line blocks
15659 # 4 - always open up if vt=0
15660 # 5 - stable: even for one line blocks if vt=0
15661 if ( !$is_long_term
15662 && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
15663 && $index_before_arrow[ $depth + 1 ] > 0
15664 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
15667 $is_long_term = $rOpts_comma_arrow_breakpoints == 4
15668 || ( $rOpts_comma_arrow_breakpoints == 0
15669 && $last_nonblank_token eq ',' )
15670 || ( $rOpts_comma_arrow_breakpoints == 5
15671 && $old_breakpoint_to_go[$i_opening] );
15672 } ## end if ( !$is_long_term &&...)
15674 # mark term as long if the length between opening and closing
15675 # parens exceeds allowed line length
15676 if ( !$is_long_term && $saw_opening_structure ) {
15677 my $i_opening_minus = find_token_starting_list($i_opening);
15679 # Note: we have to allow for one extra space after a
15680 # closing token so that we do not strand a comma or
15681 # semicolon, hence the '>=' here (oneline.t)
15683 excess_line_length( $i_opening_minus, $i ) >= 0;
15684 } ## end if ( !$is_long_term &&...)
15686 # We've set breaks after all comma-arrows. Now we have to
15687 # undo them if this can be a one-line block
15688 # (the only breakpoints set will be due to comma-arrows)
15691 # user doesn't require breaking after all comma-arrows
15692 ( $rOpts_comma_arrow_breakpoints != 0 )
15693 && ( $rOpts_comma_arrow_breakpoints != 4 )
15695 # and if the opening structure is in this batch
15696 && $saw_opening_structure
15698 # and either on the same old line
15700 $old_breakpoint_count_stack[$current_depth] ==
15701 $last_old_breakpoint_count
15703 # or user wants to form long blocks with arrows
15704 || $rOpts_comma_arrow_breakpoints == 2
15707 # and we made some breakpoints between the opening and closing
15708 && ( $breakpoint_undo_stack[$current_depth] <
15709 $forced_breakpoint_undo_count )
15711 # and this block is short enough to fit on one line
15712 # Note: use < because need 1 more space for possible comma
15717 undo_forced_breakpoint_stack(
15718 $breakpoint_undo_stack[$current_depth] );
15719 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
15721 # now see if we have any comma breakpoints left
15722 my $has_comma_breakpoints =
15723 ( $breakpoint_stack[$current_depth] !=
15724 $forced_breakpoint_count );
15726 # update broken-sublist flag of the outer container
15727 $has_broken_sublist[$depth] =
15728 $has_broken_sublist[$depth]
15729 || $has_broken_sublist[$current_depth]
15731 || $has_comma_breakpoints;
15733 # Having come to the closing ')', '}', or ']', now we have to decide if we
15734 # should 'open up' the structure by placing breaks at the opening and
15735 # closing containers. This is a tricky decision. Here are some of the
15736 # basic considerations:
15738 # -If this is a BLOCK container, then any breakpoints will have already
15739 # been set (and according to user preferences), so we need do nothing here.
15741 # -If we have a comma-separated list for which we can align the list items,
15742 # then we need to do so because otherwise the vertical aligner cannot
15743 # currently do the alignment.
15745 # -If this container does itself contain a container which has been broken
15746 # open, then it should be broken open to properly show the structure.
15748 # -If there is nothing to align, and no other reason to break apart,
15749 # then do not do it.
15751 # We will not break open the parens of a long but 'simple' logical expression.
15754 # This is an example of a simple logical expression and its formatting:
15756 # if ( $bigwasteofspace1 && $bigwasteofspace2
15757 # || $bigwasteofspace3 && $bigwasteofspace4 )
15759 # Most people would prefer this than the 'spacey' version:
15762 # $bigwasteofspace1 && $bigwasteofspace2
15763 # || $bigwasteofspace3 && $bigwasteofspace4
15766 # To illustrate the rules for breaking logical expressions, consider:
15770 # and ( exists $ids_excl_uc{$id_uc}
15771 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
15773 # This is on the verge of being difficult to read. The current default is to
15774 # open it up like this:
15779 # and ( exists $ids_excl_uc{$id_uc}
15780 # or grep $id_uc =~ /$_/, @ids_excl_uc )
15783 # This is a compromise which tries to avoid being too dense and to spacey.
15784 # A more spaced version would be:
15790 # exists $ids_excl_uc{$id_uc}
15791 # or grep $id_uc =~ /$_/, @ids_excl_uc
15795 # Some people might prefer the spacey version -- an option could be added. The
15796 # innermost expression contains a long block '( exists $ids_... ')'.
15798 # Here is how the logic goes: We will force a break at the 'or' that the
15799 # innermost expression contains, but we will not break apart its opening and
15800 # closing containers because (1) it contains no multi-line sub-containers itself,
15801 # and (2) there is no alignment to be gained by breaking it open like this
15804 # exists $ids_excl_uc{$id_uc}
15805 # or grep $id_uc =~ /$_/, @ids_excl_uc
15808 # (although this looks perfectly ok and might be good for long expressions). The
15809 # outer 'if' container, though, contains a broken sub-container, so it will be
15810 # broken open to avoid too much density. Also, since it contains no 'or's, there
15811 # will be a forced break at its 'and'.
15813 # set some flags telling something about this container..
15814 my $is_simple_logical_expression = 0;
15815 if ( $item_count_stack[$current_depth] == 0
15816 && $saw_opening_structure
15817 && $tokens_to_go[$i_opening] eq '('
15818 && $is_logical_container{ $container_type[$current_depth] }
15822 # This seems to be a simple logical expression with
15823 # no existing breakpoints. Set a flag to prevent
15825 if ( !$has_comma_breakpoints ) {
15826 $is_simple_logical_expression = 1;
15829 # This seems to be a simple logical expression with
15830 # breakpoints (broken sublists, for example). Break
15831 # at all 'or's and '||'s.
15833 set_logical_breakpoints($current_depth);
15835 } ## end if ( $item_count_stack...)
15838 && @{ $rfor_semicolon_list[$current_depth] } )
15840 set_for_semicolon_breakpoints($current_depth);
15842 # open up a long 'for' or 'foreach' container to allow
15843 # leading term alignment unless -lp is used.
15844 $has_comma_breakpoints = 1
15845 unless $rOpts_line_up_parentheses;
15846 } ## end if ( $is_long_term && ...)
15850 # breaks for code BLOCKS are handled at a higher level
15853 # we do not need to break at the top level of an 'if'
15855 && !$is_simple_logical_expression
15857 ## modification to keep ': (' containers vertically tight;
15858 ## but probably better to let user set -vt=1 to avoid
15859 ## inconsistency with other paren types
15860 ## && ($container_type[$current_depth] ne ':')
15862 # otherwise, we require one of these reasons for breaking:
15865 # - this term has forced line breaks
15866 $has_comma_breakpoints
15868 # - the opening container is separated from this batch
15869 # for some reason (comment, blank line, code block)
15870 # - this is a non-paren container spanning multiple lines
15871 || !$saw_opening_structure
15873 # - this is a long block contained in another breakable
15876 && $container_environment_to_go[$i_opening] ne
15882 # For -lp option, we must put a breakpoint before
15883 # the token which has been identified as starting
15884 # this indentation level. This is necessary for
15885 # proper alignment.
15886 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
15888 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
15889 if ( $i_opening + 1 < $max_index_to_go
15890 && $types_to_go[ $i_opening + 1 ] eq 'b' )
15892 $item = $leading_spaces_to_go[ $i_opening + 2 ];
15894 if ( defined($item) ) {
15895 my $i_start_2 = $item->get_STARTING_INDEX();
15897 defined($i_start_2)
15899 # we are breaking after an opening brace, paren,
15900 # so don't break before it too
15901 && $i_start_2 ne $i_opening
15905 # Only break for breakpoints at the same
15906 # indentation level as the opening paren
15907 my $test1 = $nesting_depth_to_go[$i_opening];
15908 my $test2 = $nesting_depth_to_go[$i_start_2];
15909 if ( $test2 == $test1 ) {
15910 set_forced_breakpoint( $i_start_2 - 1 );
15912 } ## end if ( defined($i_start_2...))
15913 } ## end if ( defined($item) )
15914 } ## end if ( $rOpts_line_up_parentheses...)
15916 # break after opening structure.
15917 # note: break before closing structure will be automatic
15918 if ( $minimum_depth <= $current_depth ) {
15920 set_forced_breakpoint($i_opening)
15921 unless ( $do_not_break_apart
15922 || is_unbreakable_container($current_depth) );
15924 # break at ',' of lower depth level before opening token
15925 if ( $last_comma_index[$depth] ) {
15926 set_forced_breakpoint( $last_comma_index[$depth] );
15929 # break at '.' of lower depth level before opening token
15930 if ( $last_dot_index[$depth] ) {
15931 set_forced_breakpoint( $last_dot_index[$depth] );
15934 # break before opening structure if preceded by another
15935 # closing structure and a comma. This is normally
15936 # done by the previous closing brace, but not
15937 # if it was a one-line block.
15938 if ( $i_opening > 2 ) {
15940 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
15944 if ( $types_to_go[$i_prev] eq ','
15945 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
15947 set_forced_breakpoint($i_prev);
15950 # also break before something like ':(' or '?('
15953 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
15955 my $token_prev = $tokens_to_go[$i_prev];
15956 if ( $want_break_before{$token_prev} ) {
15957 set_forced_breakpoint($i_prev);
15959 } ## end elsif ( $types_to_go[$i_prev...])
15960 } ## end if ( $i_opening > 2 )
15961 } ## end if ( $minimum_depth <=...)
15963 # break after comma following closing structure
15964 if ( $next_type eq ',' ) {
15965 set_forced_breakpoint( $i + 1 );
15968 # break before an '=' following closing structure
15970 $is_assignment{$next_nonblank_type}
15971 && ( $breakpoint_stack[$current_depth] !=
15972 $forced_breakpoint_count )
15975 set_forced_breakpoint($i);
15976 } ## end if ( $is_assignment{$next_nonblank_type...})
15978 # break at any comma before the opening structure Added
15979 # for -lp, but seems to be good in general. It isn't
15980 # obvious how far back to look; the '5' below seems to
15981 # work well and will catch the comma in something like
15982 # push @list, myfunc( $param, $param, ..
15984 my $icomma = $last_comma_index[$depth];
15985 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
15986 unless ( $forced_breakpoint_to_go[$icomma] ) {
15987 set_forced_breakpoint($icomma);
15990 } # end logic to open up a container
15992 # Break open a logical container open if it was already open
15993 elsif ($is_simple_logical_expression
15994 && $has_old_logical_breakpoints[$current_depth] )
15996 set_logical_breakpoints($current_depth);
15999 # Handle long container which does not get opened up
16000 elsif ($is_long_term) {
16002 # must set fake breakpoint to alert outer containers that
16004 set_fake_breakpoint();
16005 } ## end elsif ($is_long_term)
16007 } ## end elsif ( $depth < $current_depth)
16009 #------------------------------------------------------------
16010 # Handle this token
16011 #------------------------------------------------------------
16013 $current_depth = $depth;
16015 # handle comma-arrow
16016 if ( $type eq '=>' ) {
16017 next if ( $last_nonblank_type eq '=>' );
16018 next if $rOpts_break_at_old_comma_breakpoints;
16019 next if $rOpts_comma_arrow_breakpoints == 3;
16020 $want_comma_break[$depth] = 1;
16021 $index_before_arrow[$depth] = $i_last_nonblank_token;
16023 } ## end if ( $type eq '=>' )
16025 elsif ( $type eq '.' ) {
16026 $last_dot_index[$depth] = $i;
16029 # Turn off alignment if we are sure that this is not a list
16030 # environment. To be safe, we will do this if we see certain
16031 # non-list tokens, such as ';', and also the environment is
16032 # not a list. Note that '=' could be in any of the = operators
16033 # (lextest.t). We can't just use the reported environment
16034 # because it can be incorrect in some cases.
16035 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
16036 && $container_environment_to_go[$i] ne 'LIST' )
16038 $dont_align[$depth] = 1;
16039 $want_comma_break[$depth] = 0;
16040 $index_before_arrow[$depth] = -1;
16041 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
16043 # now just handle any commas
16044 next unless ( $type eq ',' );
16046 $last_dot_index[$depth] = undef;
16047 $last_comma_index[$depth] = $i;
16049 # break here if this comma follows a '=>'
16050 # but not if there is a side comment after the comma
16051 if ( $want_comma_break[$depth] ) {
16053 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
16054 if ($rOpts_comma_arrow_breakpoints) {
16055 $want_comma_break[$depth] = 0;
16056 ##$index_before_arrow[$depth] = -1;
16061 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
16063 # break before the previous token if it looks safe
16064 # Example of something that we will not try to break before:
16065 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
16066 # Also we don't want to break at a binary operator (like +):
16070 # $y - $R, -fill => 'black',
16072 my $ibreak = $index_before_arrow[$depth] - 1;
16074 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
16076 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
16077 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
16078 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
16080 # don't break pointer calls, such as the following:
16081 # File::Spec->curdir => 1,
16082 # (This is tokenized as adjacent 'w' tokens)
16083 ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
16085 # And don't break before a comma, as in the following:
16086 # ( LONGER_THAN,=> 1,
16087 # EIGHTY_CHARACTERS,=> 2,
16088 # CAUSES_FORMATTING,=> 3,
16091 # This example is for -tso but should be general rule
16092 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
16093 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
16095 set_forced_breakpoint($ibreak);
16097 } ## end if ( $types_to_go[$ibreak...])
16098 } ## end if ( $ibreak > 0 && $tokens_to_go...)
16100 $want_comma_break[$depth] = 0;
16101 $index_before_arrow[$depth] = -1;
16103 # handle list which mixes '=>'s and ','s:
16104 # treat any list items so far as an interrupted list
16105 $interrupted_list[$depth] = 1;
16107 } ## end if ( $want_comma_break...)
16109 # break after all commas above starting depth
16110 if ( $depth < $starting_depth && !$dont_align[$depth] ) {
16111 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
16115 # add this comma to the list..
16116 my $item_count = $item_count_stack[$depth];
16117 if ( $item_count == 0 ) {
16119 # but do not form a list with no opening structure
16122 # open INFILE_COPY, ">$input_file_copy"
16123 # or die ("very long message");
16125 if ( ( $opening_structure_index_stack[$depth] < 0 )
16126 && $container_environment_to_go[$i] eq 'BLOCK' )
16128 $dont_align[$depth] = 1;
16130 } ## end if ( $item_count == 0 )
16132 $comma_index[$depth][$item_count] = $i;
16133 ++$item_count_stack[$depth];
16134 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
16135 $identifier_count_stack[$depth]++;
16137 } ## end while ( ++$i <= $max_index_to_go)
16139 #-------------------------------------------
16140 # end of loop over all tokens in this batch
16141 #-------------------------------------------
16143 # set breaks for any unfinished lists ..
16144 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
16146 $interrupted_list[$dd] = 1;
16147 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
16148 set_comma_breakpoints($dd);
16149 set_logical_breakpoints($dd)
16150 if ( $has_old_logical_breakpoints[$dd] );
16151 set_for_semicolon_breakpoints($dd);
16153 # break open container...
16154 my $i_opening = $opening_structure_index_stack[$dd];
16155 set_forced_breakpoint($i_opening)
16157 is_unbreakable_container($dd)
16159 # Avoid a break which would place an isolated ' or "
16162 && $i_opening >= $max_index_to_go - 2
16163 && $token =~ /^['"]$/ )
16165 } ## end for ( my $dd = $current_depth...)
16167 # Return a flag indicating if the input file had some good breakpoints.
16168 # This flag will be used to force a break in a line shorter than the
16169 # allowed line length.
16170 if ( $has_old_logical_breakpoints[$current_depth] ) {
16171 $saw_good_breakpoint = 1;
16174 # A complex line with one break at an = has a good breakpoint.
16175 # This is not complex ($total_depth_variation=0):
16179 # This is complex ($total_depth_variation=6):
16181 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
16182 elsif ($i_old_assignment_break
16183 && $total_depth_variation > 4
16184 && $old_breakpoint_count == 1 )
16186 $saw_good_breakpoint = 1;
16187 } ## end elsif ( $i_old_assignment_break...)
16189 return $saw_good_breakpoint;
16190 } ## end sub scan_list
16193 sub find_token_starting_list {
16195 # When testing to see if a block will fit on one line, some
16196 # previous token(s) may also need to be on the line; particularly
16197 # if this is a sub call. So we will look back at least one
16198 # token. NOTE: This isn't perfect, but not critical, because
16199 # if we mis-identify a block, it will be wrapped and therefore
16200 # fixed the next time it is formatted.
16201 my $i_opening_paren = shift;
16202 my $i_opening_minus = $i_opening_paren;
16203 my $im1 = $i_opening_paren - 1;
16204 my $im2 = $i_opening_paren - 2;
16205 my $im3 = $i_opening_paren - 3;
16206 my $typem1 = $types_to_go[$im1];
16207 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
16208 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
16209 $i_opening_minus = $i_opening_paren;
16211 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
16212 $i_opening_minus = $im1 if $im1 >= 0;
16214 # walk back to improve length estimate
16215 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
16216 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
16217 $i_opening_minus = $j;
16219 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
16221 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
16222 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
16223 $i_opening_minus = $im2;
16225 return $i_opening_minus;
16228 { # begin set_comma_breakpoints_do
16230 my %is_keyword_with_special_leading_term;
16234 # These keywords have prototypes which allow a special leading item
16235 # followed by a list
16237 qw(formline grep kill map printf sprintf push chmod join pack unshift);
16238 @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
16241 sub set_comma_breakpoints_do {
16243 # Given a list with some commas, set breakpoints at some of the
16244 # commas, if necessary, to make it easy to read. This list is
16247 $depth, $i_opening_paren, $i_closing_paren,
16248 $item_count, $identifier_count, $rcomma_index,
16249 $next_nonblank_type, $list_type, $interrupted,
16250 $rdo_not_break_apart, $must_break_open,
16253 # nothing to do if no commas seen
16254 return if ( $item_count < 1 );
16255 my $i_first_comma = $$rcomma_index[0];
16256 my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
16257 my $i_last_comma = $i_true_last_comma;
16258 if ( $i_last_comma >= $max_index_to_go ) {
16259 $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
16260 return if ( $item_count < 1 );
16263 #---------------------------------------------------------------
16264 # find lengths of all items in the list to calculate page layout
16265 #---------------------------------------------------------------
16266 my $comma_count = $item_count;
16272 my @max_length = ( 0, 0 );
16273 my $first_term_length;
16274 my $i = $i_opening_paren;
16277 for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
16278 $is_odd = 1 - $is_odd;
16279 $i_prev_plus = $i + 1;
16280 $i = $$rcomma_index[$j];
16283 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
16285 ( $types_to_go[$i_prev_plus] eq 'b' )
16288 push @i_term_begin, $i_term_begin;
16289 push @i_term_end, $i_term_end;
16290 push @i_term_comma, $i;
16292 # note: currently adding 2 to all lengths (for comma and space)
16294 2 + token_sequence_length( $i_term_begin, $i_term_end );
16295 push @item_lengths, $length;
16298 $first_term_length = $length;
16302 if ( $length > $max_length[$is_odd] ) {
16303 $max_length[$is_odd] = $length;
16308 # now we have to make a distinction between the comma count and item
16309 # count, because the item count will be one greater than the comma
16310 # count if the last item is not terminated with a comma
16312 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
16313 ? $i_last_comma + 1
16316 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
16317 ? $i_closing_paren - 2
16318 : $i_closing_paren - 1;
16319 my $i_effective_last_comma = $i_last_comma;
16321 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
16323 if ( $last_item_length > 0 ) {
16325 # add 2 to length because other lengths include a comma and a blank
16326 $last_item_length += 2;
16327 push @item_lengths, $last_item_length;
16328 push @i_term_begin, $i_b + 1;
16329 push @i_term_end, $i_e;
16330 push @i_term_comma, undef;
16332 my $i_odd = $item_count % 2;
16334 if ( $last_item_length > $max_length[$i_odd] ) {
16335 $max_length[$i_odd] = $last_item_length;
16339 $i_effective_last_comma = $i_e + 1;
16341 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
16342 $identifier_count++;
16346 #---------------------------------------------------------------
16347 # End of length calculations
16348 #---------------------------------------------------------------
16350 #---------------------------------------------------------------
16351 # Compound List Rule 1:
16352 # Break at (almost) every comma for a list containing a broken
16353 # sublist. This has higher priority than the Interrupted List
16355 #---------------------------------------------------------------
16356 if ( $has_broken_sublist[$depth] ) {
16358 # Break at every comma except for a comma between two
16359 # simple, small terms. This prevents long vertical
16360 # columns of, say, just 0's.
16361 my $small_length = 10; # 2 + actual maximum length wanted
16363 # We'll insert a break in long runs of small terms to
16364 # allow alignment in uniform tables.
16365 my $skipped_count = 0;
16366 my $columns = table_columns_available($i_first_comma);
16367 my $fields = int( $columns / $small_length );
16368 if ( $rOpts_maximum_fields_per_table
16369 && $fields > $rOpts_maximum_fields_per_table )
16371 $fields = $rOpts_maximum_fields_per_table;
16373 my $max_skipped_count = $fields - 1;
16375 my $is_simple_last_term = 0;
16376 my $is_simple_next_term = 0;
16377 foreach my $j ( 0 .. $item_count ) {
16378 $is_simple_last_term = $is_simple_next_term;
16379 $is_simple_next_term = 0;
16380 if ( $j < $item_count
16381 && $i_term_end[$j] == $i_term_begin[$j]
16382 && $item_lengths[$j] <= $small_length )
16384 $is_simple_next_term = 1;
16387 if ( $is_simple_last_term
16388 && $is_simple_next_term
16389 && $skipped_count < $max_skipped_count )
16394 $skipped_count = 0;
16395 my $i = $i_term_comma[ $j - 1 ];
16396 last unless defined $i;
16397 set_forced_breakpoint($i);
16401 # always break at the last comma if this list is
16402 # interrupted; we wouldn't want to leave a terminal '{', for
16404 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
16408 #my ( $a, $b, $c ) = caller();
16409 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
16410 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
16411 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
16413 #---------------------------------------------------------------
16414 # Interrupted List Rule:
16415 # A list is forced to use old breakpoints if it was interrupted
16416 # by side comments or blank lines, or requested by user.
16417 #---------------------------------------------------------------
16418 if ( $rOpts_break_at_old_comma_breakpoints
16420 || $i_opening_paren < 0 )
16422 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
16426 #---------------------------------------------------------------
16427 # Looks like a list of items. We have to look at it and size it up.
16428 #---------------------------------------------------------------
16430 my $opening_token = $tokens_to_go[$i_opening_paren];
16431 my $opening_environment =
16432 $container_environment_to_go[$i_opening_paren];
16434 #-------------------------------------------------------------------
16435 # Return if this will fit on one line
16436 #-------------------------------------------------------------------
16438 my $i_opening_minus = find_token_starting_list($i_opening_paren);
16440 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
16442 #-------------------------------------------------------------------
16443 # Now we know that this block spans multiple lines; we have to set
16444 # at least one breakpoint -- real or fake -- as a signal to break
16445 # open any outer containers.
16446 #-------------------------------------------------------------------
16447 set_fake_breakpoint();
16449 # be sure we do not extend beyond the current list length
16450 if ( $i_effective_last_comma >= $max_index_to_go ) {
16451 $i_effective_last_comma = $max_index_to_go - 1;
16454 # Set a flag indicating if we need to break open to keep -lp
16455 # items aligned. This is necessary if any of the list terms
16456 # exceeds the available space after the '('.
16457 my $need_lp_break_open = $must_break_open;
16458 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
16459 my $columns_if_unbroken =
16460 maximum_line_length($i_opening_minus) -
16461 total_line_length( $i_opening_minus, $i_opening_paren );
16462 $need_lp_break_open =
16463 ( $max_length[0] > $columns_if_unbroken )
16464 || ( $max_length[1] > $columns_if_unbroken )
16465 || ( $first_term_length > $columns_if_unbroken );
16468 # Specify if the list must have an even number of fields or not.
16469 # It is generally safest to assume an even number, because the
16470 # list items might be a hash list. But if we can be sure that
16471 # it is not a hash, then we can allow an odd number for more
16473 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
16475 if ( $identifier_count >= $item_count - 1
16476 || $is_assignment{$next_nonblank_type}
16477 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
16483 # do we have a long first term which should be
16484 # left on a line by itself?
16485 my $use_separate_first_term = (
16486 $odd_or_even == 1 # only if we can use 1 field/line
16487 && $item_count > 3 # need several items
16488 && $first_term_length >
16489 2 * $max_length[0] - 2 # need long first term
16490 && $first_term_length >
16491 2 * $max_length[1] - 2 # need long first term
16494 # or do we know from the type of list that the first term should
16496 if ( !$use_separate_first_term ) {
16497 if ( $is_keyword_with_special_leading_term{$list_type} ) {
16498 $use_separate_first_term = 1;
16500 # should the container be broken open?
16501 if ( $item_count < 3 ) {
16502 if ( $i_first_comma - $i_opening_paren < 4 ) {
16503 $$rdo_not_break_apart = 1;
16506 elsif ($first_term_length < 20
16507 && $i_first_comma - $i_opening_paren < 4 )
16509 my $columns = table_columns_available($i_first_comma);
16510 if ( $first_term_length < $columns ) {
16511 $$rdo_not_break_apart = 1;
16518 if ($use_separate_first_term) {
16520 # ..set a break and update starting values
16521 $use_separate_first_term = 1;
16522 set_forced_breakpoint($i_first_comma);
16523 $i_opening_paren = $i_first_comma;
16524 $i_first_comma = $$rcomma_index[1];
16526 return if $comma_count == 1;
16527 shift @item_lengths;
16528 shift @i_term_begin;
16530 shift @i_term_comma;
16533 # if not, update the metrics to include the first term
16535 if ( $first_term_length > $max_length[0] ) {
16536 $max_length[0] = $first_term_length;
16540 # Field width parameters
16541 my $pair_width = ( $max_length[0] + $max_length[1] );
16543 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
16545 # Number of free columns across the page width for laying out tables
16546 my $columns = table_columns_available($i_first_comma);
16548 # Estimated maximum number of fields which fit this space
16549 # This will be our first guess
16550 my $number_of_fields_max =
16551 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
16553 my $number_of_fields = $number_of_fields_max;
16555 # Find the best-looking number of fields
16556 # and make this our second guess if possible
16557 my ( $number_of_fields_best, $ri_ragged_break_list,
16558 $new_identifier_count )
16559 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
16562 if ( $number_of_fields_best != 0
16563 && $number_of_fields_best < $number_of_fields_max )
16565 $number_of_fields = $number_of_fields_best;
16568 # ----------------------------------------------------------------------
16569 # If we are crowded and the -lp option is being used, try to
16570 # undo some indentation
16571 # ----------------------------------------------------------------------
16573 $rOpts_line_up_parentheses
16575 $number_of_fields == 0
16576 || ( $number_of_fields == 1
16577 && $number_of_fields != $number_of_fields_best )
16581 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
16582 if ( $available_spaces > 0 ) {
16584 my $spaces_wanted = $max_width - $columns; # for 1 field
16586 if ( $number_of_fields_best == 0 ) {
16587 $number_of_fields_best =
16588 get_maximum_fields_wanted( \@item_lengths );
16591 if ( $number_of_fields_best != 1 ) {
16592 my $spaces_wanted_2 =
16593 1 + $pair_width - $columns; # for 2 fields
16594 if ( $available_spaces > $spaces_wanted_2 ) {
16595 $spaces_wanted = $spaces_wanted_2;
16599 if ( $spaces_wanted > 0 ) {
16600 my $deleted_spaces =
16601 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
16604 if ( $deleted_spaces > 0 ) {
16605 $columns = table_columns_available($i_first_comma);
16606 $number_of_fields_max =
16607 maximum_number_of_fields( $columns, $odd_or_even,
16608 $max_width, $pair_width );
16609 $number_of_fields = $number_of_fields_max;
16611 if ( $number_of_fields_best == 1
16612 && $number_of_fields >= 1 )
16614 $number_of_fields = $number_of_fields_best;
16621 # try for one column if two won't work
16622 if ( $number_of_fields <= 0 ) {
16623 $number_of_fields = int( $columns / $max_width );
16626 # The user can place an upper bound on the number of fields,
16627 # which can be useful for doing maintenance on tables
16628 if ( $rOpts_maximum_fields_per_table
16629 && $number_of_fields > $rOpts_maximum_fields_per_table )
16631 $number_of_fields = $rOpts_maximum_fields_per_table;
16634 # How many columns (characters) and lines would this container take
16635 # if no additional whitespace were added?
16636 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
16637 $i_effective_last_comma + 1 );
16638 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
16639 my $packed_lines = 1 + int( $packed_columns / $columns );
16641 # are we an item contained in an outer list?
16642 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
16644 if ( $number_of_fields <= 0 ) {
16646 # #---------------------------------------------------------------
16647 # # We're in trouble. We can't find a single field width that works.
16648 # # There is no simple answer here; we may have a single long list
16650 # #---------------------------------------------------------------
16652 # In many cases, it may be best to not force a break if there is just one
16653 # comma, because the standard continuation break logic will do a better
16656 # In the common case that all but one of the terms can fit
16657 # on a single line, it may look better not to break open the
16658 # containing parens. Consider, for example
16662 # sort { $color_value{$::a} <=> $color_value{$::b}; }
16665 # which will look like this with the container broken:
16669 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
16672 # Here is an example of this rule for a long last term:
16674 # log_message( 0, 256, 128,
16675 # "Number of routes in adj-RIB-in to be considered: $peercount" );
16677 # And here is an example with a long first term:
16680 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
16681 # $r, $pu, $ps, $cu, $cs, $tt
16683 # if $style eq 'all';
16685 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
16686 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
16687 my $long_first_term =
16688 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
16690 # break at every comma ...
16693 # if requested by user or is best looking
16694 $number_of_fields_best == 1
16696 # or if this is a sublist of a larger list
16697 || $in_hierarchical_list
16699 # or if multiple commas and we don't have a long first or last
16701 || ( $comma_count > 1
16702 && !( $long_last_term || $long_first_term ) )
16705 foreach ( 0 .. $comma_count - 1 ) {
16706 set_forced_breakpoint( $$rcomma_index[$_] );
16709 elsif ($long_last_term) {
16711 set_forced_breakpoint($i_last_comma);
16712 $$rdo_not_break_apart = 1 unless $must_break_open;
16714 elsif ($long_first_term) {
16716 set_forced_breakpoint($i_first_comma);
16720 # let breaks be defined by default bond strength logic
16725 # --------------------------------------------------------
16726 # We have a tentative field count that seems to work.
16727 # How many lines will this require?
16728 # --------------------------------------------------------
16729 my $formatted_lines = $item_count / ($number_of_fields);
16730 if ( $formatted_lines != int $formatted_lines ) {
16731 $formatted_lines = 1 + int $formatted_lines;
16734 # So far we've been trying to fill out to the right margin. But
16735 # compact tables are easier to read, so let's see if we can use fewer
16736 # fields without increasing the number of lines.
16737 $number_of_fields =
16738 compactify_table( $item_count, $number_of_fields, $formatted_lines,
16741 # How many spaces across the page will we fill?
16742 my $columns_per_line =
16743 ( int $number_of_fields / 2 ) * $pair_width +
16744 ( $number_of_fields % 2 ) * $max_width;
16746 my $formatted_columns;
16748 if ( $number_of_fields > 1 ) {
16749 $formatted_columns =
16750 ( $pair_width * ( int( $item_count / 2 ) ) +
16751 ( $item_count % 2 ) * $max_width );
16754 $formatted_columns = $max_width * $item_count;
16756 if ( $formatted_columns < $packed_columns ) {
16757 $formatted_columns = $packed_columns;
16760 my $unused_columns = $formatted_columns - $packed_columns;
16762 # set some empirical parameters to help decide if we should try to
16763 # align; high sparsity does not look good, especially with few lines
16764 my $sparsity = ($unused_columns) / ($formatted_columns);
16765 my $max_allowed_sparsity =
16766 ( $item_count < 3 ) ? 0.1
16767 : ( $packed_lines == 1 ) ? 0.15
16768 : ( $packed_lines == 2 ) ? 0.4
16771 # Begin check for shortcut methods, which avoid treating a list
16772 # as a table for relatively small parenthesized lists. These
16773 # are usually easier to read if not formatted as tables.
16775 $packed_lines <= 2 # probably can fit in 2 lines
16776 && $item_count < 9 # doesn't have too many items
16777 && $opening_environment eq 'BLOCK' # not a sub-container
16778 && $opening_token eq '(' # is paren list
16782 # Shortcut method 1: for -lp and just one comma:
16783 # This is a no-brainer, just break at the comma.
16785 $rOpts_line_up_parentheses # -lp
16786 && $item_count == 2 # two items, one comma
16787 && !$must_break_open
16790 my $i_break = $$rcomma_index[0];
16791 set_forced_breakpoint($i_break);
16792 $$rdo_not_break_apart = 1;
16793 set_non_alignment_flags( $comma_count, $rcomma_index );
16798 # method 2 is for most small ragged lists which might look
16799 # best if not displayed as a table.
16801 ( $number_of_fields == 2 && $item_count == 3 )
16803 $new_identifier_count > 0 # isn't all quotes
16804 && $sparsity > 0.15
16805 ) # would be fairly spaced gaps if aligned
16809 my $break_count = set_ragged_breakpoints( \@i_term_comma,
16810 $ri_ragged_break_list );
16811 ++$break_count if ($use_separate_first_term);
16813 # NOTE: we should really use the true break count here,
16814 # which can be greater if there are large terms and
16815 # little space, but usually this will work well enough.
16816 unless ($must_break_open) {
16818 if ( $break_count <= 1 ) {
16819 $$rdo_not_break_apart = 1;
16821 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
16823 $$rdo_not_break_apart = 1;
16826 set_non_alignment_flags( $comma_count, $rcomma_index );
16830 } # end shortcut methods
16834 FORMATTER_DEBUG_FLAG_SPARSE && do {
16836 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
16840 #---------------------------------------------------------------
16841 # Compound List Rule 2:
16842 # If this list is too long for one line, and it is an item of a
16843 # larger list, then we must format it, regardless of sparsity
16844 # (ian.t). One reason that we have to do this is to trigger
16845 # Compound List Rule 1, above, which causes breaks at all commas of
16846 # all outer lists. In this way, the structure will be properly
16848 #---------------------------------------------------------------
16850 # Decide if this list is too long for one line unless broken
16851 my $total_columns = table_columns_available($i_opening_paren);
16852 my $too_long = $packed_columns > $total_columns;
16854 # For a paren list, include the length of the token just before the
16855 # '(' because this is likely a sub call, and we would have to
16856 # include the sub name on the same line as the list. This is still
16857 # imprecise, but not too bad. (steve.t)
16858 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
16860 $too_long = excess_line_length( $i_opening_minus,
16861 $i_effective_last_comma + 1 ) > 0;
16864 # FIXME: For an item after a '=>', try to include the length of the
16865 # thing before the '=>'. This is crude and should be improved by
16866 # actually looking back token by token.
16867 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
16868 my $i_opening_minus = $i_opening_paren - 4;
16869 if ( $i_opening_minus >= 0 ) {
16870 $too_long = excess_line_length( $i_opening_minus,
16871 $i_effective_last_comma + 1 ) > 0;
16875 # Always break lists contained in '[' and '{' if too long for 1 line,
16876 # and always break lists which are too long and part of a more complex
16878 my $must_break_open_container = $must_break_open
16880 && ( $in_hierarchical_list || $opening_token ne '(' ) );
16882 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
16884 #---------------------------------------------------------------
16885 # The main decision:
16886 # Now decide if we will align the data into aligned columns. Do not
16887 # attempt to align columns if this is a tiny table or it would be
16888 # too spaced. It seems that the more packed lines we have, the
16889 # sparser the list that can be allowed and still look ok.
16890 #---------------------------------------------------------------
16892 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
16893 || ( $formatted_lines < 2 )
16894 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
16898 #---------------------------------------------------------------
16899 # too sparse: would look ugly if aligned in a table;
16900 #---------------------------------------------------------------
16902 # use old breakpoints if this is a 'big' list
16903 # FIXME: goal is to improve set_ragged_breakpoints so that
16904 # this is not necessary.
16905 if ( $packed_lines > 2 && $item_count > 10 ) {
16906 write_logfile_entry("List sparse: using old breakpoints\n");
16907 copy_old_breakpoints( $i_first_comma, $i_last_comma );
16910 # let the continuation logic handle it if 2 lines
16913 my $break_count = set_ragged_breakpoints( \@i_term_comma,
16914 $ri_ragged_break_list );
16915 ++$break_count if ($use_separate_first_term);
16917 unless ($must_break_open_container) {
16918 if ( $break_count <= 1 ) {
16919 $$rdo_not_break_apart = 1;
16921 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
16923 $$rdo_not_break_apart = 1;
16926 set_non_alignment_flags( $comma_count, $rcomma_index );
16931 #---------------------------------------------------------------
16932 # go ahead and format as a table
16933 #---------------------------------------------------------------
16934 write_logfile_entry(
16935 "List: auto formatting with $number_of_fields fields/row\n");
16937 my $j_first_break =
16938 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
16941 my $j = $j_first_break ;
16942 $j < $comma_count ;
16943 $j += $number_of_fields
16946 my $i = $$rcomma_index[$j];
16947 set_forced_breakpoint($i);
16953 sub set_non_alignment_flags {
16955 # set flag which indicates that these commas should not be
16957 my ( $comma_count, $rcomma_index ) = @_;
16958 foreach ( 0 .. $comma_count - 1 ) {
16959 $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
16963 sub study_list_complexity {
16965 # Look for complex tables which should be formatted with one term per line.
16966 # Returns the following:
16968 # \@i_ragged_break_list = list of good breakpoints to avoid lines
16969 # which are hard to read
16970 # $number_of_fields_best = suggested number of fields based on
16971 # complexity; = 0 if any number may be used.
16973 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
16974 my $item_count = @{$ri_term_begin};
16975 my $complex_item_count = 0;
16976 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
16977 my $i_max = @{$ritem_lengths} - 1;
16978 ##my @item_complexity;
16980 my $i_last_last_break = -3;
16981 my $i_last_break = -2;
16982 my @i_ragged_break_list;
16984 my $definitely_complex = 30;
16985 my $definitely_simple = 12;
16986 my $quote_count = 0;
16988 for my $i ( 0 .. $i_max ) {
16989 my $ib = $ri_term_begin->[$i];
16990 my $ie = $ri_term_end->[$i];
16992 # define complexity: start with the actual term length
16993 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
16995 ##TBD: join types here and check for variations
16996 ##my $str=join "", @tokens_to_go[$ib..$ie];
16999 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
17003 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
17007 if ( $ib eq $ie ) {
17008 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
17009 $complex_item_count++;
17010 $weighted_length *= 2;
17016 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
17017 $complex_item_count++;
17018 $weighted_length *= 2;
17020 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
17021 $weighted_length += 4;
17025 # add weight for extra tokens.
17026 $weighted_length += 2 * ( $ie - $ib );
17028 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
17029 ## print "# COMPLEXITY:$weighted_length $BUB\n";
17031 ##push @item_complexity, $weighted_length;
17033 # now mark a ragged break after this item it if it is 'long and
17035 if ( $weighted_length >= $definitely_complex ) {
17037 # if we broke after the previous term
17038 # then break before it too
17039 if ( $i_last_break == $i - 1
17041 && $i_last_last_break != $i - 2 )
17044 ## FIXME: don't strand a small term
17045 pop @i_ragged_break_list;
17046 push @i_ragged_break_list, $i - 2;
17047 push @i_ragged_break_list, $i - 1;
17050 push @i_ragged_break_list, $i;
17051 $i_last_last_break = $i_last_break;
17052 $i_last_break = $i;
17055 # don't break before a small last term -- it will
17056 # not look good on a line by itself.
17057 elsif ($i == $i_max
17058 && $i_last_break == $i - 1
17059 && $weighted_length <= $definitely_simple )
17061 pop @i_ragged_break_list;
17065 my $identifier_count = $i_max + 1 - $quote_count;
17067 # Need more tuning here..
17068 if ( $max_width > 12
17069 && $complex_item_count > $item_count / 2
17070 && $number_of_fields_best != 2 )
17072 $number_of_fields_best = 1;
17075 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
17078 sub get_maximum_fields_wanted {
17080 # Not all tables look good with more than one field of items.
17081 # This routine looks at a table and decides if it should be
17082 # formatted with just one field or not.
17083 # This coding is still under development.
17084 my ($ritem_lengths) = @_;
17086 my $number_of_fields_best = 0;
17088 # For just a few items, we tentatively assume just 1 field.
17089 my $item_count = @{$ritem_lengths};
17090 if ( $item_count <= 5 ) {
17091 $number_of_fields_best = 1;
17094 # For larger tables, look at it both ways and see what looks best
17098 my @max_length = ( 0, 0 );
17099 my @last_length_2 = ( undef, undef );
17100 my @first_length_2 = ( undef, undef );
17101 my $last_length = undef;
17102 my $total_variation_1 = 0;
17103 my $total_variation_2 = 0;
17104 my @total_variation_2 = ( 0, 0 );
17105 for ( my $j = 0 ; $j < $item_count ; $j++ ) {
17107 $is_odd = 1 - $is_odd;
17108 my $length = $ritem_lengths->[$j];
17109 if ( $length > $max_length[$is_odd] ) {
17110 $max_length[$is_odd] = $length;
17113 if ( defined($last_length) ) {
17114 my $dl = abs( $length - $last_length );
17115 $total_variation_1 += $dl;
17117 $last_length = $length;
17119 my $ll = $last_length_2[$is_odd];
17120 if ( defined($ll) ) {
17121 my $dl = abs( $length - $ll );
17122 $total_variation_2[$is_odd] += $dl;
17125 $first_length_2[$is_odd] = $length;
17127 $last_length_2[$is_odd] = $length;
17129 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
17131 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
17132 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
17133 $number_of_fields_best = 1;
17136 return ($number_of_fields_best);
17139 sub table_columns_available {
17140 my $i_first_comma = shift;
17142 maximum_line_length($i_first_comma) -
17143 leading_spaces_to_go($i_first_comma);
17145 # Patch: the vertical formatter does not line up lines whose lengths
17146 # exactly equal the available line length because of allowances
17147 # that must be made for side comments. Therefore, the number of
17148 # available columns is reduced by 1 character.
17153 sub maximum_number_of_fields {
17155 # how many fields will fit in the available space?
17156 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
17157 my $max_pairs = int( $columns / $pair_width );
17158 my $number_of_fields = $max_pairs * 2;
17159 if ( $odd_or_even == 1
17160 && $max_pairs * $pair_width + $max_width <= $columns )
17162 $number_of_fields++;
17164 return $number_of_fields;
17167 sub compactify_table {
17169 # given a table with a certain number of fields and a certain number
17170 # of lines, see if reducing the number of fields will make it look
17172 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
17173 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
17177 $min_fields = $number_of_fields ;
17178 $min_fields >= $odd_or_even
17179 && $min_fields * $formatted_lines >= $item_count ;
17180 $min_fields -= $odd_or_even
17183 $number_of_fields = $min_fields;
17186 return $number_of_fields;
17189 sub set_ragged_breakpoints {
17191 # Set breakpoints in a list that cannot be formatted nicely as a
17193 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
17195 my $break_count = 0;
17196 foreach (@$ri_ragged_break_list) {
17197 my $j = $ri_term_comma->[$_];
17199 set_forced_breakpoint($j);
17203 return $break_count;
17206 sub copy_old_breakpoints {
17207 my ( $i_first_comma, $i_last_comma ) = @_;
17208 for my $i ( $i_first_comma .. $i_last_comma ) {
17209 if ( $old_breakpoint_to_go[$i] ) {
17210 set_forced_breakpoint($i);
17216 my ( $i, $j ) = @_;
17217 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
17219 FORMATTER_DEBUG_FLAG_NOBREAK && do {
17220 my ( $a, $b, $c ) = caller();
17222 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
17225 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
17228 # shouldn't happen; non-critical error
17230 FORMATTER_DEBUG_FLAG_NOBREAK && do {
17231 my ( $a, $b, $c ) = caller();
17233 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
17238 sub set_fake_breakpoint {
17240 # Just bump up the breakpoint count as a signal that there are breaks.
17241 # This is useful if we have breaks but may want to postpone deciding where
17243 $forced_breakpoint_count++;
17246 sub set_forced_breakpoint {
17249 return unless defined $i && $i >= 0;
17251 # when called with certain tokens, use bond strengths to decide
17252 # if we break before or after it
17253 my $token = $tokens_to_go[$i];
17255 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
17256 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
17259 # breaks are forced before 'if' and 'unless'
17260 elsif ( $is_if_unless{$token} ) { $i-- }
17262 if ( $i >= 0 && $i <= $max_index_to_go ) {
17263 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
17265 FORMATTER_DEBUG_FLAG_FORCE && do {
17266 my ( $a, $b, $c ) = caller();
17268 "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
17271 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
17272 $forced_breakpoint_to_go[$i_nonblank] = 1;
17274 if ( $i_nonblank > $index_max_forced_break ) {
17275 $index_max_forced_break = $i_nonblank;
17277 $forced_breakpoint_count++;
17278 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
17281 # if we break at an opening container..break at the closing
17282 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
17283 set_closing_breakpoint($i_nonblank);
17289 sub clear_breakpoint_undo_stack {
17290 $forced_breakpoint_undo_count = 0;
17293 sub undo_forced_breakpoint_stack {
17295 my $i_start = shift;
17296 if ( $i_start < 0 ) {
17298 my ( $a, $b, $c ) = caller();
17300 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
17304 while ( $forced_breakpoint_undo_count > $i_start ) {
17306 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
17307 if ( $i >= 0 && $i <= $max_index_to_go ) {
17308 $forced_breakpoint_to_go[$i] = 0;
17309 $forced_breakpoint_count--;
17311 FORMATTER_DEBUG_FLAG_UNDOBP && do {
17312 my ( $a, $b, $c ) = caller();
17314 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
17318 # shouldn't happen, but not a critical error
17320 FORMATTER_DEBUG_FLAG_UNDOBP && do {
17321 my ( $a, $b, $c ) = caller();
17323 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
17329 { # begin recombine_breakpoints
17340 @is_amp_amp{@_} = (1) x scalar(@_);
17343 @is_ternary{@_} = (1) x scalar(@_);
17345 @_ = qw( + - * / );
17346 @is_math_op{@_} = (1) x scalar(@_);
17349 @is_plus_minus{@_} = (1) x scalar(@_);
17352 @is_mult_div{@_} = (1) x scalar(@_);
17355 sub DUMP_BREAKPOINTS {
17357 # Debug routine to dump current breakpoints...not normally called
17358 # We are given indexes to the current lines:
17359 # $ri_beg = ref to array of BEGinning indexes of each line
17360 # $ri_end = ref to array of ENDing indexes of each line
17361 my ( $ri_beg, $ri_end, $msg ) = @_;
17362 print STDERR "----Dumping breakpoints from: $msg----\n";
17363 for my $n ( 0 .. @{$ri_end} - 1 ) {
17364 my $ibeg = $$ri_beg[$n];
17365 my $iend = $$ri_end[$n];
17367 foreach my $i ( $ibeg .. $iend ) {
17368 $text .= $tokens_to_go[$i];
17370 print STDERR "$n ($ibeg:$iend) $text\n";
17372 print STDERR "----\n";
17375 sub recombine_breakpoints {
17377 # sub set_continuation_breaks is very liberal in setting line breaks
17378 # for long lines, always setting breaks at good breakpoints, even
17379 # when that creates small lines. Sometimes small line fragments
17380 # are produced which would look better if they were combined.
17381 # That's the task of this routine.
17383 # We are given indexes to the current lines:
17384 # $ri_beg = ref to array of BEGinning indexes of each line
17385 # $ri_end = ref to array of ENDing indexes of each line
17386 my ( $ri_beg, $ri_end ) = @_;
17388 # Make a list of all good joining tokens between the lines
17391 my $nmax = @$ri_end - 1;
17392 for my $n ( 1 .. $nmax ) {
17393 my $ibeg_1 = $$ri_beg[ $n - 1 ];
17394 my $iend_1 = $$ri_end[ $n - 1 ];
17395 my $iend_2 = $$ri_end[$n];
17396 my $ibeg_2 = $$ri_beg[$n];
17398 my ( $itok, $itokp, $itokm );
17400 foreach my $itest ( $iend_1, $ibeg_2 ) {
17401 my $type = $types_to_go[$itest];
17402 if ( $is_math_op{$type}
17403 || $is_amp_amp{$type}
17404 || $is_assignment{$type}
17410 $joint[$n] = [$itok];
17413 my $more_to_do = 1;
17415 # We keep looping over all of the lines of this batch
17416 # until there are no more possible recombinations
17417 my $nmax_last = @$ri_end;
17418 while ($more_to_do) {
17422 my $nmax = @$ri_end - 1;
17424 # Safety check for infinite loop
17425 unless ( $nmax < $nmax_last ) {
17427 # Shouldn't happen because splice below decreases nmax on each
17430 "Program bug-infinite loop in recombine breakpoints\n";
17432 $nmax_last = $nmax;
17434 my $previous_outdentable_closing_paren;
17435 my $leading_amp_count = 0;
17436 my $this_line_is_semicolon_terminated;
17438 # loop over all remaining lines in this batch
17439 for $n ( 1 .. $nmax ) {
17441 #----------------------------------------------------------
17442 # If we join the current pair of lines,
17443 # line $n-1 will become the left part of the joined line
17444 # line $n will become the right part of the joined line
17446 # Here are Indexes of the endpoint tokens of the two lines:
17448 # -----line $n-1--- | -----line $n-----
17449 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
17452 # We want to decide if we should remove the line break
17453 # between the tokens at $iend_1 and $ibeg_2
17455 # We will apply a number of ad-hoc tests to see if joining
17456 # here will look ok. The code will just issue a 'next'
17457 # command if the join doesn't look good. If we get through
17458 # the gauntlet of tests, the lines will be recombined.
17459 #----------------------------------------------------------
17461 # beginning and ending tokens of the lines we are working on
17462 my $ibeg_1 = $$ri_beg[ $n - 1 ];
17463 my $iend_1 = $$ri_end[ $n - 1 ];
17464 my $iend_2 = $$ri_end[$n];
17465 my $ibeg_2 = $$ri_beg[$n];
17466 my $ibeg_nmax = $$ri_beg[$nmax];
17468 my $type_iend_1 = $types_to_go[$iend_1];
17469 my $type_iend_2 = $types_to_go[$iend_2];
17470 my $type_ibeg_1 = $types_to_go[$ibeg_1];
17471 my $type_ibeg_2 = $types_to_go[$ibeg_2];
17473 # some beginning indexes of other lines, which may not exist
17474 my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1;
17475 my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1;
17476 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
17480 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
17481 # $nesting_depth_to_go[$ibeg_1] );
17483 FORMATTER_DEBUG_FLAG_RECOMBINE && do {
17485 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
17488 # If line $n is the last line, we set some flags and
17489 # do any special checks for it
17490 if ( $n == $nmax ) {
17492 # a terminal '{' should stay where it is
17493 next if $type_ibeg_2 eq '{';
17495 # set flag if statement $n ends in ';'
17496 $this_line_is_semicolon_terminated = $type_iend_2 eq ';'
17498 # with possible side comment
17499 || ( $type_iend_2 eq '#'
17500 && $iend_2 - $ibeg_2 >= 2
17501 && $types_to_go[ $iend_2 - 2 ] eq ';'
17502 && $types_to_go[ $iend_2 - 1 ] eq 'b' );
17505 #----------------------------------------------------------
17506 # Recombine Section 1:
17507 # Examine the special token joining this line pair, if any.
17508 # Put as many tests in this section to avoid duplicate code and
17509 # to make formatting independent of whether breaks are to the
17510 # left or right of an operator.
17511 #----------------------------------------------------------
17513 my ($itok) = @{ $joint[$n] };
17516 # FIXME: Patch - may not be necessary
17518 $type_iend_1 eq 'b'
17523 $type_iend_2 eq 'b'
17528 my $type = $types_to_go[$itok];
17530 if ( $type eq ':' ) {
17532 # do not join at a colon unless it disobeys the break request
17533 if ( $itok eq $iend_1 ) {
17534 next unless $want_break_before{$type};
17537 $leading_amp_count++;
17538 next if $want_break_before{$type};
17542 # handle math operators + - * /
17543 elsif ( $is_math_op{$type} ) {
17545 # Combine these lines if this line is a single
17546 # number, or if it is a short term with same
17547 # operator as the previous line. For example, in
17548 # the following code we will combine all of the
17549 # short terms $A, $B, $C, $D, $E, $F, together
17550 # instead of leaving them one per line:
17552 # $A * $B * $C * $D * $E * $F *
17553 # ( 2. * $eps * $sigma * $area ) *
17554 # ( 1. / $tcold**3 - 1. / $thot**3 );
17556 # This can be important in math-intensive code.
17560 my $itokp = min( $inext_to_go[$itok], $iend_2 );
17561 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
17562 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
17563 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
17565 # check for a number on the right
17566 if ( $types_to_go[$itokp] eq 'n' ) {
17568 # ok if nothing else on right
17569 if ( $itokp == $iend_2 ) {
17574 # look one more token to right..
17575 # okay if math operator or some termination
17577 ( ( $itokpp == $iend_2 )
17578 && $is_math_op{ $types_to_go[$itokpp] } )
17579 || $types_to_go[$itokpp] =~ /^[#,;]$/;
17583 # check for a number on the left
17584 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
17586 # okay if nothing else to left
17587 if ( $itokm == $ibeg_1 ) {
17591 # otherwise look one more token to left
17594 # okay if math operator, comma, or assignment
17595 $good_combo = ( $itokmm == $ibeg_1 )
17596 && ( $is_math_op{ $types_to_go[$itokmm] }
17597 || $types_to_go[$itokmm] =~ /^[,]$/
17598 || $is_assignment{ $types_to_go[$itokmm] }
17603 # look for a single short token either side of the
17605 if ( !$good_combo ) {
17607 # Slight adjustment factor to make results
17608 # independent of break before or after operator in
17609 # long summed lists. (An operator and a space make
17611 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
17615 # numbers or id's on both sides of this joint
17616 $types_to_go[$itokp] =~ /^[in]$/
17617 && $types_to_go[$itokm] =~ /^[in]$/
17619 # one of the two lines must be short:
17622 # no more than 2 nonblank tokens right of
17627 && token_sequence_length( $itokp, $iend_2 )
17629 $rOpts_short_concatenation_item_length
17632 # no more than 2 nonblank tokens left of
17637 && token_sequence_length( $ibeg_1, $itokm )
17639 $rOpts_short_concatenation_item_length
17644 # keep pure terms; don't mix +- with */
17646 $is_plus_minus{$type}
17647 && ( $is_mult_div{ $types_to_go[$itokmm] }
17648 || $is_mult_div{ $types_to_go[$itokpp] } )
17651 $is_mult_div{$type}
17652 && ( $is_plus_minus{ $types_to_go[$itokmm] }
17653 || $is_plus_minus{ $types_to_go[$itokpp] } )
17659 # it is also good to combine if we can reduce to 2 lines
17660 if ( !$good_combo ) {
17662 # index on other line where same token would be in a
17665 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
17670 && $types_to_go[$iother] ne $type;
17673 next unless ($good_combo);
17677 elsif ( $is_amp_amp{$type} ) {
17681 elsif ( $is_assignment{$type} ) {
17683 } ## end assignment
17686 #----------------------------------------------------------
17687 # Recombine Section 2:
17688 # Examine token at $iend_1 (right end of first line of pair)
17689 #----------------------------------------------------------
17691 # an isolated '}' may join with a ';' terminated segment
17692 if ( $type_iend_1 eq '}' ) {
17694 # Check for cases where combining a semicolon terminated
17695 # statement with a previous isolated closing paren will
17696 # allow the combined line to be outdented. This is
17697 # generally a good move. For example, we can join up
17698 # the last two lines here:
17700 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
17701 # $size, $atime, $mtime, $ctime, $blksize, $blocks
17707 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
17708 # $size, $atime, $mtime, $ctime, $blksize, $blocks
17711 # which makes the parens line up.
17713 # Another example, from Joe Matarazzo, probably looks best
17714 # with the 'or' clause appended to the trailing paren:
17715 # $self->some_method(
17718 # ) or die "Some_method didn't work";
17720 # But we do not want to do this for something like the -lp
17721 # option where the paren is not outdentable because the
17722 # trailing clause will be far to the right.
17724 # The logic here is synchronized with the logic in sub
17725 # sub set_adjusted_indentation, which actually does
17728 $previous_outdentable_closing_paren =
17729 $this_line_is_semicolon_terminated
17731 # only one token on last line
17732 && $ibeg_1 == $iend_1
17734 # must be structural paren
17735 && $tokens_to_go[$iend_1] eq ')'
17737 # style must allow outdenting,
17738 && !$closing_token_indentation{')'}
17740 # only leading '&&', '||', and ':' if no others seen
17741 # (but note: our count made below could be wrong
17742 # due to intervening comments)
17743 && ( $leading_amp_count == 0
17744 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
17746 # but leading colons probably line up with a
17747 # previous colon or question (count could be wrong).
17748 && $type_ibeg_2 ne ':'
17750 # only one step in depth allowed. this line must not
17751 # begin with a ')' itself.
17752 && ( $nesting_depth_to_go[$iend_1] ==
17753 $nesting_depth_to_go[$iend_2] + 1 );
17755 # YVES patch 2 of 2:
17756 # Allow cuddled eval chains, like this:
17763 # This patch works together with a patch in
17764 # setting adjusted indentation (where the closing eval
17765 # brace is outdented if possible).
17766 # The problem is that an 'eval' block has continuation
17767 # indentation and it looks better to undo it in some
17768 # cases. If we do not use this patch we would get:
17776 # The alternative, for uncuddled style, is to create
17777 # a patch in set_adjusted_indentation which undoes
17778 # the indentation of a leading line like 'or do {'.
17779 # This doesn't work well with -icb through
17781 $block_type_to_go[$iend_1] eq 'eval'
17782 && !$rOpts->{'line-up-parentheses'}
17783 && !$rOpts->{'indent-closing-brace'}
17784 && $tokens_to_go[$iend_2] eq '{'
17786 ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
17787 || ( $type_ibeg_2 eq 'k'
17788 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
17789 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
17793 $previous_outdentable_closing_paren ||= 1;
17798 $previous_outdentable_closing_paren
17800 # handle '.' and '?' specially below
17801 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
17806 # honor breaks at opening brace
17807 # Added to prevent recombining something like this:
17808 # } || eval { package main;
17809 elsif ( $type_iend_1 eq '{' ) {
17810 next if $forced_breakpoint_to_go[$iend_1];
17813 # do not recombine lines with ending &&, ||,
17814 elsif ( $is_amp_amp{$type_iend_1} ) {
17815 next unless $want_break_before{$type_iend_1};
17818 # Identify and recombine a broken ?/: chain
17819 elsif ( $type_iend_1 eq '?' ) {
17821 # Do not recombine different levels
17823 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
17825 # do not recombine unless next line ends in :
17826 next unless $type_iend_2 eq ':';
17829 # for lines ending in a comma...
17830 elsif ( $type_iend_1 eq ',' ) {
17832 # Do not recombine at comma which is following the
17834 # TODO: might be best to make a special flag
17835 next if ( $old_breakpoint_to_go[$iend_1] );
17837 # an isolated '},' may join with an identifier + ';'
17838 # this is useful for the class of a 'bless' statement (bless.t)
17839 if ( $type_ibeg_1 eq '}'
17840 && $type_ibeg_2 eq 'i' )
17843 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
17844 && ( $iend_2 == ( $ibeg_2 + 1 ) )
17845 && $this_line_is_semicolon_terminated );
17847 # override breakpoint
17848 $forced_breakpoint_to_go[$iend_1] = 0;
17854 # do not recombine after a comma unless this will leave
17856 next unless ( $n + 1 >= $nmax );
17858 # do not recombine if there is a change in indentation depth
17861 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
17863 # do not recombine a "complex expression" after a
17864 # comma. "complex" means no parens.
17866 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
17867 if ( $tokens_to_go[$ii] eq '(' ) {
17872 next if $saw_paren;
17877 elsif ( $type_iend_1 eq '(' ) {
17879 # No longer doing this
17882 elsif ( $type_iend_1 eq ')' ) {
17884 # No longer doing this
17887 # keep a terminal for-semicolon
17888 elsif ( $type_iend_1 eq 'f' ) {
17892 # if '=' at end of line ...
17893 elsif ( $is_assignment{$type_iend_1} ) {
17895 # keep break after = if it was in input stream
17896 # this helps prevent 'blinkers'
17897 next if $old_breakpoint_to_go[$iend_1]
17899 # don't strand an isolated '='
17900 && $iend_1 != $ibeg_1;
17902 my $is_short_quote =
17903 ( $type_ibeg_2 eq 'Q'
17904 && $ibeg_2 == $iend_2
17905 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
17906 $rOpts_short_concatenation_item_length );
17908 ( $type_ibeg_1 eq '?'
17909 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
17911 # always join an isolated '=', a short quote, or if this
17912 # will put ?/: at start of adjacent lines
17913 if ( $ibeg_1 != $iend_1
17914 && !$is_short_quote
17921 # unless we can reduce this to two lines
17924 # or three lines, the last with a leading semicolon
17925 || ( $nmax == $n + 2
17926 && $types_to_go[$ibeg_nmax] eq ';' )
17928 # or the next line ends with a here doc
17929 || $type_iend_2 eq 'h'
17931 # or the next line ends in an open paren or brace
17932 # and the break hasn't been forced [dima.t]
17933 || ( !$forced_breakpoint_to_go[$iend_1]
17934 && $type_iend_2 eq '{' )
17937 # do not recombine if the two lines might align well
17938 # this is a very approximate test for this
17940 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
17945 # Recombine if we can make two lines
17948 # -lp users often prefer this:
17949 # my $title = function($env, $env, $sysarea,
17950 # "bubba Borrower Entry");
17951 # so we will recombine if -lp is used we have
17953 && ( !$rOpts_line_up_parentheses
17954 || $type_iend_2 ne ',' )
17958 # otherwise, scan the rhs line up to last token for
17959 # complexity. Note that we are not counting the last
17960 # token in case it is an opening paren.
17962 my $depth = $nesting_depth_to_go[$ibeg_2];
17963 for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
17964 if ( $nesting_depth_to_go[$i] != $depth ) {
17966 last if ( $tv > 1 );
17968 $depth = $nesting_depth_to_go[$i];
17971 # ok to recombine if no level changes before last token
17974 # otherwise, do not recombine if more than two
17976 next if ( $tv > 1 );
17978 # check total complexity of the two adjacent lines
17979 # that will occur if we do this join
17981 ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
17982 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
17983 if ( $nesting_depth_to_go[$i] != $depth ) {
17985 last if ( $tv > 2 );
17987 $depth = $nesting_depth_to_go[$i];
17990 # do not recombine if total is more than 2 level changes
17991 next if ( $tv > 2 );
17996 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
17997 $forced_breakpoint_to_go[$iend_1] = 0;
18002 elsif ( $type_iend_1 eq 'k' ) {
18004 # make major control keywords stand out
18009 #/^(last|next|redo|return)$/
18010 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
18012 # but only if followed by multiple lines
18016 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
18018 unless $want_break_before{ $tokens_to_go[$iend_1] };
18022 #----------------------------------------------------------
18023 # Recombine Section 3:
18024 # Examine token at $ibeg_2 (left end of second line of pair)
18025 #----------------------------------------------------------
18027 # join lines identified above as capable of
18028 # causing an outdented line with leading closing paren
18029 # Note that we are skipping the rest of this section
18030 if ($previous_outdentable_closing_paren) {
18031 $forced_breakpoint_to_go[$iend_1] = 0;
18034 # handle lines with leading &&, ||
18035 elsif ( $is_amp_amp{$type_ibeg_2} ) {
18037 $leading_amp_count++;
18039 # ok to recombine if it follows a ? or :
18040 # and is followed by an open paren..
18042 ( $is_ternary{$type_ibeg_1}
18043 && $tokens_to_go[$iend_2] eq '(' )
18045 # or is followed by a ? or : at same depth
18047 # We are looking for something like this. We can
18048 # recombine the && line with the line above to make the
18049 # structure more clear:
18051 # exists $G->{Attr}->{V}
18052 # && exists $G->{Attr}->{V}->{$u}
18053 # ? %{ $G->{Attr}->{V}->{$u} }
18056 # We should probably leave something like this alone:
18058 # exists $G->{Attr}->{E}
18059 # && exists $G->{Attr}->{E}->{$u}
18060 # && exists $G->{Attr}->{E}->{$u}->{$v}
18061 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
18063 # so that we either have all of the &&'s (or ||'s)
18064 # on one line, as in the first example, or break at
18065 # each one as in the second example. However, it
18066 # sometimes makes things worse to check for this because
18067 # it prevents multiple recombinations. So this is not done.
18069 && $is_ternary{ $types_to_go[$ibeg_3] }
18070 && $nesting_depth_to_go[$ibeg_3] ==
18071 $nesting_depth_to_go[$ibeg_2] );
18073 next if !$ok && $want_break_before{$type_ibeg_2};
18074 $forced_breakpoint_to_go[$iend_1] = 0;
18076 # tweak the bond strength to give this joint priority
18081 # Identify and recombine a broken ?/: chain
18082 elsif ( $type_ibeg_2 eq '?' ) {
18084 # Do not recombine different levels
18085 my $lev = $levels_to_go[$ibeg_2];
18086 next if ( $lev ne $levels_to_go[$ibeg_1] );
18088 # Do not recombine a '?' if either next line or
18089 # previous line does not start with a ':'. The reasons
18090 # are that (1) no alignment of the ? will be possible
18091 # and (2) the expression is somewhat complex, so the
18092 # '?' is harder to see in the interior of the line.
18093 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
18094 my $precedes_colon =
18095 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
18096 next unless ( $follows_colon || $precedes_colon );
18098 # we will always combining a ? line following a : line
18099 if ( !$follows_colon ) {
18101 # ...otherwise recombine only if it looks like a chain.
18102 # we will just look at a few nearby lines to see if
18103 # this looks like a chain.
18104 my $local_count = 0;
18105 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
18108 && $types_to_go[$ii] eq ':'
18109 && $levels_to_go[$ii] == $lev;
18111 next unless ( $local_count > 1 );
18113 $forced_breakpoint_to_go[$iend_1] = 0;
18116 # do not recombine lines with leading '.'
18117 elsif ( $type_ibeg_2 eq '.' ) {
18118 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
18122 # ... unless there is just one and we can reduce
18123 # this to two lines if we do. For example, this
18127 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
18129 # looks better than this:
18130 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
18131 # . '$args .= $pat;'
18136 && $type_ibeg_1 ne $type_ibeg_2
18139 # ... or this would strand a short quote , like this
18140 # . "some long quote"
18143 || ( $types_to_go[$i_next_nonblank] eq 'Q'
18144 && $i_next_nonblank >= $iend_2 - 1
18145 && $token_lengths_to_go[$i_next_nonblank] <
18146 $rOpts_short_concatenation_item_length )
18150 # handle leading keyword..
18151 elsif ( $type_ibeg_2 eq 'k' ) {
18153 # handle leading "or"
18154 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
18157 $this_line_is_semicolon_terminated
18160 # following 'if' or 'unless' or 'or'
18161 $type_ibeg_1 eq 'k'
18162 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
18164 # important: only combine a very simple or
18165 # statement because the step below may have
18166 # combined a trailing 'and' with this or,
18167 # and we do not want to then combine
18168 # everything together
18169 && ( $iend_2 - $ibeg_2 <= 7 )
18173 $forced_breakpoint_to_go[$iend_1] = 0
18174 unless $old_breakpoint_to_go[$iend_1];
18177 # handle leading 'and'
18178 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
18180 # Decide if we will combine a single terminal 'and'
18181 # after an 'if' or 'unless'.
18183 # This looks best with the 'and' on the same
18184 # line as the 'if':
18187 # if $seconds and $nu < 2;
18189 # But this looks better as shown:
18192 # if !$this->{Parents}{$_}
18193 # or $this->{Parents}{$_} eq $_;
18197 $this_line_is_semicolon_terminated
18200 # following 'if' or 'unless' or 'or'
18201 $type_ibeg_1 eq 'k'
18202 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
18203 || $tokens_to_go[$ibeg_1] eq 'or' )
18208 # handle leading "if" and "unless"
18209 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
18211 # FIXME: This is still experimental..may not be too useful
18214 $this_line_is_semicolon_terminated
18216 # previous line begins with 'and' or 'or'
18217 && $type_ibeg_1 eq 'k'
18218 && $is_and_or{ $tokens_to_go[$ibeg_1] }
18223 # handle all other leading keywords
18226 # keywords look best at start of lines,
18227 # but combine things like "1 while"
18228 unless ( $is_assignment{$type_iend_1} ) {
18230 if ( ( $type_iend_1 ne 'k' )
18231 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
18236 # similar treatment of && and || as above for 'and' and 'or':
18237 # NOTE: This block of code is currently bypassed because
18238 # of a previous block but is retained for possible future use.
18239 elsif ( $is_amp_amp{$type_ibeg_2} ) {
18241 # maybe looking at something like:
18242 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
18246 $this_line_is_semicolon_terminated
18248 # previous line begins with an 'if' or 'unless' keyword
18249 && $type_ibeg_1 eq 'k'
18250 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
18255 # handle line with leading = or similar
18256 elsif ( $is_assignment{$type_ibeg_2} ) {
18257 next unless ( $n == 1 || $n == $nmax );
18258 next if $old_breakpoint_to_go[$iend_1];
18262 # unless we can reduce this to two lines
18265 # or three lines, the last with a leading semicolon
18266 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
18268 # or the next line ends with a here doc
18269 || $type_iend_2 eq 'h'
18271 # or this is a short line ending in ;
18272 || ( $n == $nmax && $this_line_is_semicolon_terminated )
18274 $forced_breakpoint_to_go[$iend_1] = 0;
18277 #----------------------------------------------------------
18278 # Recombine Section 4:
18279 # Combine the lines if we arrive here and it is possible
18280 #----------------------------------------------------------
18282 # honor hard breakpoints
18283 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
18285 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
18287 # combined line cannot be too long
18288 my $excess = excess_line_length( $ibeg_1, $iend_2 );
18289 next if ( $excess > 0 );
18291 # Require a few extra spaces before recombining lines if we are
18292 # at an old breakpoint unless this is a simple list or terminal
18293 # line. The goal is to avoid oscillating between two
18294 # quasi-stable end states. For example this snippet caused
18298 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
18302 if ( $old_breakpoint_to_go[$iend_1]
18303 && !$this_line_is_semicolon_terminated
18306 && $type_iend_2 ne ',' );
18308 # do not recombine if we would skip in indentation levels
18309 if ( $n < $nmax ) {
18310 my $if_next = $$ri_beg[ $n + 1 ];
18313 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
18314 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
18316 # but an isolated 'if (' is undesirable
18319 && $iend_1 - $ibeg_1 <= 2
18320 && $type_ibeg_1 eq 'k'
18321 && $tokens_to_go[$ibeg_1] eq 'if'
18322 && $tokens_to_go[$iend_1] ne '('
18328 next if ( $bs >= NO_BREAK - 1 );
18330 # remember the pair with the greatest bond strength
18337 if ( $bs > $bs_best ) {
18344 # recombine the pair with the greatest bond strength
18346 splice @$ri_beg, $n_best, 1;
18347 splice @$ri_end, $n_best - 1, 1;
18348 splice @joint, $n_best, 1;
18350 # keep going if we are still making progress
18354 return ( $ri_beg, $ri_end );
18356 } # end recombine_breakpoints
18358 sub break_all_chain_tokens {
18360 # scan the current breakpoints looking for breaks at certain "chain
18361 # operators" (. : && || + etc) which often occur repeatedly in a long
18362 # statement. If we see a break at any one, break at all similar tokens
18363 # within the same container.
18365 my ( $ri_left, $ri_right ) = @_;
18367 my %saw_chain_type;
18368 my %left_chain_type;
18369 my %right_chain_type;
18370 my %interior_chain_type;
18371 my $nmax = @$ri_right - 1;
18373 # scan the left and right end tokens of all lines
18375 for my $n ( 0 .. $nmax ) {
18376 my $il = $$ri_left[$n];
18377 my $ir = $$ri_right[$n];
18378 my $typel = $types_to_go[$il];
18379 my $typer = $types_to_go[$ir];
18380 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
18381 $typer = '+' if ( $typer eq '-' );
18382 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
18383 $typer = '*' if ( $typer eq '/' );
18384 my $tokenl = $tokens_to_go[$il];
18385 my $tokenr = $tokens_to_go[$ir];
18387 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
18388 next if ( $typel eq '?' );
18389 push @{ $left_chain_type{$typel} }, $il;
18390 $saw_chain_type{$typel} = 1;
18393 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
18394 next if ( $typer eq '?' );
18395 push @{ $right_chain_type{$typer} }, $ir;
18396 $saw_chain_type{$typer} = 1;
18400 return unless $count;
18402 # now look for any interior tokens of the same types
18404 for my $n ( 0 .. $nmax ) {
18405 my $il = $$ri_left[$n];
18406 my $ir = $$ri_right[$n];
18407 for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
18408 my $type = $types_to_go[$i];
18409 $type = '+' if ( $type eq '-' );
18410 $type = '*' if ( $type eq '/' );
18411 if ( $saw_chain_type{$type} ) {
18412 push @{ $interior_chain_type{$type} }, $i;
18417 return unless $count;
18419 # now make a list of all new break points
18422 # loop over all chain types
18423 foreach my $type ( keys %saw_chain_type ) {
18425 # quit if just ONE continuation line with leading . For example--
18426 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
18428 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
18430 # loop over all interior chain tokens
18431 foreach my $itest ( @{ $interior_chain_type{$type} } ) {
18433 # loop over all left end tokens of same type
18434 if ( $left_chain_type{$type} ) {
18435 next if $nobreak_to_go[ $itest - 1 ];
18436 foreach my $i ( @{ $left_chain_type{$type} } ) {
18437 next unless in_same_container( $i, $itest );
18438 push @insert_list, $itest - 1;
18440 # Break at matching ? if this : is at a different level.
18441 # For example, the ? before $THRf_DEAD in the following
18442 # should get a break if its : gets a break.
18445 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
18446 # : ( $_ & 4 ) ? $THRf_R_DETACHED
18447 # : $THRf_R_JOINABLE;
18449 && $levels_to_go[$i] != $levels_to_go[$itest] )
18451 my $i_question = $mate_index_to_go[$itest];
18452 if ( $i_question > 0 ) {
18453 push @insert_list, $i_question - 1;
18460 # loop over all right end tokens of same type
18461 if ( $right_chain_type{$type} ) {
18462 next if $nobreak_to_go[$itest];
18463 foreach my $i ( @{ $right_chain_type{$type} } ) {
18464 next unless in_same_container( $i, $itest );
18465 push @insert_list, $itest;
18467 # break at matching ? if this : is at a different level
18469 && $levels_to_go[$i] != $levels_to_go[$itest] )
18471 my $i_question = $mate_index_to_go[$itest];
18472 if ( $i_question >= 0 ) {
18473 push @insert_list, $i_question;
18482 # insert any new break points
18483 if (@insert_list) {
18484 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18490 # Look for assignment operators that could use a breakpoint.
18491 # For example, in the following snippet
18493 # $HOME = $ENV{HOME}
18496 # || die "no home directory for user $<";
18498 # we could break at the = to get this, which is a little nicer:
18503 # || die "no home directory for user $<";
18505 # The logic here follows the logic in set_logical_padding, which
18506 # will add the padding in the second line to improve alignment.
18508 my ( $ri_left, $ri_right ) = @_;
18509 my $nmax = @$ri_right - 1;
18510 return unless ( $nmax >= 2 );
18512 # scan the left ends of first two lines
18515 for my $n ( 1 .. 2 ) {
18516 my $il = $$ri_left[$n];
18517 my $typel = $types_to_go[$il];
18518 my $tokenl = $tokens_to_go[$il];
18520 my $has_leading_op = ( $tokenl =~ /^\w/ )
18521 ? $is_chain_operator{$tokenl} # + - * / : ? && ||
18522 : $is_chain_operator{$typel}; # and, or
18523 return unless ($has_leading_op);
18526 unless ( $tokenl eq $tokbeg
18527 && $nesting_depth_to_go[$il] eq $depth_beg );
18530 $depth_beg = $nesting_depth_to_go[$il];
18533 # now look for any interior tokens of the same types
18534 my $il = $$ri_left[0];
18535 my $ir = $$ri_right[0];
18537 # now make a list of all new break points
18539 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
18540 my $type = $types_to_go[$i];
18541 if ( $is_assignment{$type}
18542 && $nesting_depth_to_go[$i] eq $depth_beg )
18544 if ( $want_break_before{$type} ) {
18545 push @insert_list, $i - 1;
18548 push @insert_list, $i;
18553 # Break after a 'return' followed by a chain of operators
18554 # return ( $^O !~ /win32|dos/i )
18555 # && ( $^O ne 'VMS' )
18556 # && ( $^O ne 'OS2' )
18557 # && ( $^O ne 'MacOS' );
18560 # ( $^O !~ /win32|dos/i )
18561 # && ( $^O ne 'VMS' )
18562 # && ( $^O ne 'OS2' )
18563 # && ( $^O ne 'MacOS' );
18565 if ( $types_to_go[$i] eq 'k'
18566 && $tokens_to_go[$i] eq 'return'
18568 && $nesting_depth_to_go[$i] eq $depth_beg )
18570 push @insert_list, $i;
18573 return unless (@insert_list);
18575 # One final check...
18576 # scan second and third lines and be sure there are no assignments
18577 # we want to avoid breaking at an = to make something like this:
18579 # $html_icons{"$type-$state"}
18580 # or $icon = $html_icons{$type}
18581 # or $icon = $html_icons{$state} )
18582 for my $n ( 1 .. 2 ) {
18583 my $il = $$ri_left[$n];
18584 my $ir = $$ri_right[$n];
18585 for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
18586 my $type = $types_to_go[$i];
18588 if ( $is_assignment{$type}
18589 && $nesting_depth_to_go[$i] eq $depth_beg );
18593 # ok, insert any new break point
18594 if (@insert_list) {
18595 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18599 sub insert_final_breaks {
18601 my ( $ri_left, $ri_right ) = @_;
18603 my $nmax = @$ri_right - 1;
18605 # scan the left and right end tokens of all lines
18607 my $i_first_colon = -1;
18608 for my $n ( 0 .. $nmax ) {
18609 my $il = $$ri_left[$n];
18610 my $ir = $$ri_right[$n];
18611 my $typel = $types_to_go[$il];
18612 my $typer = $types_to_go[$ir];
18613 return if ( $typel eq '?' );
18614 return if ( $typer eq '?' );
18615 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
18616 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
18619 # For long ternary chains,
18620 # if the first : we see has its # ? is in the interior
18621 # of a preceding line, then see if there are any good
18622 # breakpoints before the ?.
18623 if ( $i_first_colon > 0 ) {
18624 my $i_question = $mate_index_to_go[$i_first_colon];
18625 if ( $i_question > 0 ) {
18627 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
18628 my $token = $tokens_to_go[$ii];
18629 my $type = $types_to_go[$ii];
18631 # For now, a good break is either a comma or a 'return'.
18632 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
18633 && in_same_container( $ii, $i_question ) )
18635 push @insert_list, $ii;
18640 # insert any new break points
18641 if (@insert_list) {
18642 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18648 sub in_same_container {
18650 # check to see if tokens at i1 and i2 are in the
18651 # same container, and not separated by a comma, ? or :
18652 my ( $i1, $i2 ) = @_;
18653 my $type = $types_to_go[$i1];
18654 my $depth = $nesting_depth_to_go[$i1];
18655 return unless ( $nesting_depth_to_go[$i2] == $depth );
18656 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
18658 ###########################################################
18659 # This is potentially a very slow routine and not critical.
18660 # For safety just give up for large differences.
18661 # See test file 'infinite_loop.txt'
18662 # TODO: replace this loop with a data structure
18663 ###########################################################
18664 return if ( $i2 - $i1 > 200 );
18666 for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
18667 next if ( $nesting_depth_to_go[$i] > $depth );
18668 return if ( $nesting_depth_to_go[$i] < $depth );
18670 my $tok = $tokens_to_go[$i];
18671 $tok = ',' if $tok eq '=>'; # treat => same as ,
18673 # Example: we would not want to break at any of these .'s
18674 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
18675 if ( $type ne ':' ) {
18676 return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
18679 return if ( $tok =~ /^[\,]$/ );
18685 sub set_continuation_breaks {
18687 # Define an array of indexes for inserting newline characters to
18688 # keep the line lengths below the maximum desired length. There is
18689 # an implied break after the last token, so it need not be included.
18692 # This routine is part of series of routines which adjust line
18693 # lengths. It is only called if a statement is longer than the
18694 # maximum line length, or if a preliminary scanning located
18695 # desirable break points. Sub scan_list has already looked at
18696 # these tokens and set breakpoints (in array
18697 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
18698 # after commas, after opening parens, and before closing parens).
18699 # This routine will honor these breakpoints and also add additional
18700 # breakpoints as necessary to keep the line length below the maximum
18701 # requested. It bases its decision on where the 'bond strength' is
18704 # Output: returns references to the arrays:
18707 # which contain the indexes $i of the first and last tokens on each
18710 # In addition, the array:
18711 # $forced_breakpoint_to_go[$i]
18712 # may be updated to be =1 for any index $i after which there must be
18713 # a break. This signals later routines not to undo the breakpoint.
18715 my $saw_good_break = shift;
18716 my @i_first = (); # the first index to output
18717 my @i_last = (); # the last index to output
18718 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
18719 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
18721 set_bond_strengths();
18724 my $imax = $max_index_to_go;
18725 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
18726 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
18727 my $i_begin = $imin; # index for starting next iteration
18729 my $leading_spaces = leading_spaces_to_go($imin);
18730 my $line_count = 0;
18731 my $last_break_strength = NO_BREAK;
18732 my $i_last_break = -1;
18733 my $max_bias = 0.001;
18734 my $tiny_bias = 0.0001;
18735 my $leading_alignment_token = "";
18736 my $leading_alignment_type = "";
18738 # see if any ?/:'s are in order
18739 my $colons_in_order = 1;
18741 my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ];
18742 my $colon_count = @colon_list;
18743 foreach (@colon_list) {
18744 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
18748 # This is a sufficient but not necessary condition for colon chain
18749 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
18751 #-------------------------------------------------------
18752 # BEGINNING of main loop to set continuation breakpoints
18753 # Keep iterating until we reach the end
18754 #-------------------------------------------------------
18755 while ( $i_begin <= $imax ) {
18756 my $lowest_strength = NO_BREAK;
18757 my $starting_sum = $summed_lengths_to_go[$i_begin];
18760 my $lowest_next_token = '';
18761 my $lowest_next_type = 'b';
18762 my $i_lowest_next_nonblank = -1;
18764 #-------------------------------------------------------
18765 # BEGINNING of inner loop to find the best next breakpoint
18766 #-------------------------------------------------------
18767 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
18768 my $type = $types_to_go[$i_test];
18769 my $token = $tokens_to_go[$i_test];
18770 my $next_type = $types_to_go[ $i_test + 1 ];
18771 my $next_token = $tokens_to_go[ $i_test + 1 ];
18772 my $i_next_nonblank = $inext_to_go[$i_test];
18773 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
18774 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18775 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
18776 my $strength = $bond_strength_to_go[$i_test];
18777 my $maximum_line_length = maximum_line_length($i_begin);
18779 # use old breaks as a tie-breaker. For example to
18780 # prevent blinkers with -pbp in this code:
18783 ## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
18786 # At the same time try to prevent a leading * in this code
18787 # with the default formatting:
18790 ## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
18791 ## * ( $x**( $a - 1 ) )
18792 ## * ( ( 1 - $x )**( $b - 1 ) );
18794 # reduce strength a bit to break ties at an old breakpoint ...
18796 $old_breakpoint_to_go[$i_test]
18798 # which is a 'good' breakpoint, meaning ...
18799 # we don't want to break before it
18800 && !$want_break_before{$type}
18802 # and either we want to break before the next token
18803 # or the next token is not short (i.e. not a '*', '/' etc.)
18804 && $i_next_nonblank <= $imax
18805 && ( $want_break_before{$next_nonblank_type}
18806 || $token_lengths_to_go[$i_next_nonblank] > 2
18807 || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
18810 $strength -= $tiny_bias;
18813 # otherwise increase strength a bit if this token would be at the
18814 # maximum line length. This is necessary to avoid blinking
18815 # in the above example when the -iob flag is added.
18819 $summed_lengths_to_go[ $i_test + 1 ] -
18821 if ( $len >= $maximum_line_length ) {
18822 $strength += $tiny_bias;
18826 my $must_break = 0;
18828 # Force an immediate break at certain operators
18829 # with lower level than the start of the line,
18830 # unless we've already seen a better break.
18832 ##############################################
18833 # Note on an issue with a preceding ?
18834 ##############################################
18835 # We don't include a ? in the above list, but there may
18836 # be a break at a previous ? if the line is long.
18837 # Because of this we do not want to force a break if
18838 # there is a previous ? on this line. For now the best way
18839 # to do this is to not break if we have seen a lower strength
18840 # point, which is probably a ?.
18842 # Example of unwanted breaks we are avoiding at a '.' following a ?
18843 # from pod2html using perltidy -gnu:
18845 # ? "\n<A NAME=\""
18847 # . "\">\n$text</A>\n"
18848 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
18851 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
18852 || ( $next_nonblank_type eq 'k'
18853 && $next_nonblank_token =~ /^(and|or)$/ )
18855 && ( $nesting_depth_to_go[$i_begin] >
18856 $nesting_depth_to_go[$i_next_nonblank] )
18857 && ( $strength <= $lowest_strength )
18860 set_forced_breakpoint($i_next_nonblank);
18865 # Try to put a break where requested by scan_list
18866 $forced_breakpoint_to_go[$i_test]
18868 # break between ) { in a continued line so that the '{' can
18870 # See similar logic in scan_list which catches instances
18871 # where a line is just something like ') {'. We have to
18872 # be careful because the corresponding block keyword might
18873 # not be on the first line, such as 'for' here:
18877 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
18883 && ( $token eq ')' )
18884 && ( $next_nonblank_type eq '{' )
18885 && ($next_nonblank_block_type)
18886 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
18888 # RT #104427: Dont break before opening sub brace because
18889 # sub block breaks handled at higher level, unless
18890 # it looks like the preceeding list is long and broken
18892 $next_nonblank_block_type =~ /^sub\b/
18893 && ( $nesting_depth_to_go[$i_begin] ==
18894 $nesting_depth_to_go[$i_next_nonblank] )
18897 && !$rOpts->{'opening-brace-always-on-right'}
18900 # There is an implied forced break at a terminal opening brace
18901 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
18905 # Forced breakpoints must sometimes be overridden, for example
18906 # because of a side comment causing a NO_BREAK. It is easier
18907 # to catch this here than when they are set.
18908 if ( $strength < NO_BREAK - 1 ) {
18909 $strength = $lowest_strength - $tiny_bias;
18914 # quit if a break here would put a good terminal token on
18915 # the next line and we already have a possible break
18918 && ( $next_nonblank_type =~ /^[\;\,]$/ )
18922 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
18924 ) > $maximum_line_length
18928 last if ( $i_lowest >= 0 );
18931 # Avoid a break which would strand a single punctuation
18932 # token. For example, we do not want to strand a leading
18933 # '.' which is followed by a long quoted string.
18934 # But note that we do want to do this with -extrude (l=1)
18935 # so please test any changes to this code on -extrude.
18938 && ( $i_test == $i_begin )
18939 && ( $i_test < $imax )
18940 && ( $token eq $type )
18944 $summed_lengths_to_go[ $i_test + 1 ] -
18946 ) < $maximum_line_length
18950 $i_test = min( $imax, $inext_to_go[$i_test] );
18954 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
18957 # break at previous best break if it would have produced
18958 # a leading alignment of certain common tokens, and it
18959 # is different from the latest candidate break
18961 if ($leading_alignment_type);
18963 # Force at least one breakpoint if old code had good
18964 # break It is only called if a breakpoint is required or
18965 # desired. This will probably need some adjustments
18966 # over time. A goal is to try to be sure that, if a new
18967 # side comment is introduced into formatted text, then
18968 # the same breakpoints will occur. scbreak.t
18971 $i_test == $imax # we are at the end
18972 && !$forced_breakpoint_count #
18973 && $saw_good_break # old line had good break
18974 && $type =~ /^[#;\{]$/ # and this line ends in
18975 # ';' or side comment
18976 && $i_last_break < 0 # and we haven't made a break
18977 && $i_lowest >= 0 # and we saw a possible break
18978 && $i_lowest < $imax - 1 # (but not just before this ;)
18979 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
18982 # Do not skip past an important break point in a short final
18983 # segment. For example, without this check we would miss the
18984 # break at the final / in the following code:
18987 # ( $tau * $mass_pellet * $q_0 *
18988 # ( 1. - exp( -$t_stop / $tau ) ) -
18989 # 4. * $pi * $factor * $k_ice *
18990 # ( $t_melt - $t_ice ) *
18993 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
18995 if ( $line_count > 2
18996 && $i_lowest < $i_test
18997 && $i_test > $imax - 2
18998 && $nesting_depth_to_go[$i_begin] >
18999 $nesting_depth_to_go[$i_lowest]
19000 && $lowest_strength < $last_break_strength - .5 * WEAK )
19002 # Make this break for math operators for now
19003 my $ir = $inext_to_go[$i_lowest];
19004 my $il = $iprev_to_go[$ir];
19006 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
19007 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
19010 # Update the minimum bond strength location
19011 $lowest_strength = $strength;
19012 $i_lowest = $i_test;
19013 $lowest_next_token = $next_nonblank_token;
19014 $lowest_next_type = $next_nonblank_type;
19015 $i_lowest_next_nonblank = $i_next_nonblank;
19016 last if $must_break;
19018 # set flags to remember if a break here will produce a
19019 # leading alignment of certain common tokens
19020 if ( $line_count > 0
19022 && ( $lowest_strength - $last_break_strength <= $max_bias )
19025 my $i_last_end = $iprev_to_go[$i_begin];
19026 my $tok_beg = $tokens_to_go[$i_begin];
19027 my $type_beg = $types_to_go[$i_begin];
19030 # check for leading alignment of certain tokens
19032 $tok_beg eq $next_nonblank_token
19033 && $is_chain_operator{$tok_beg}
19034 && ( $type_beg eq 'k'
19035 || $type_beg eq $tok_beg )
19036 && $nesting_depth_to_go[$i_begin] >=
19037 $nesting_depth_to_go[$i_next_nonblank]
19040 || ( $tokens_to_go[$i_last_end] eq $token
19041 && $is_chain_operator{$token}
19042 && ( $type eq 'k' || $type eq $token )
19043 && $nesting_depth_to_go[$i_last_end] >=
19044 $nesting_depth_to_go[$i_test] )
19047 $leading_alignment_token = $next_nonblank_token;
19048 $leading_alignment_type = $next_nonblank_type;
19053 my $too_long = ( $i_test >= $imax );
19054 if ( !$too_long ) {
19057 $summed_lengths_to_go[ $i_test + 2 ] -
19059 $too_long = $next_length > $maximum_line_length;
19061 # To prevent blinkers we will avoid leaving a token exactly at
19062 # the line length limit unless it is the last token or one of
19063 # several "good" types.
19065 # The following code was a blinker with -pbp before this
19067 ## $last_nonblank_token eq '('
19068 ## && $is_indirect_object_taker{ $paren_type
19069 ## [$paren_depth] }
19070 # The issue causing the problem is that if the
19071 # term [$paren_depth] gets broken across a line then
19072 # the whitespace routine doesn't see both opening and closing
19073 # brackets and will format like '[ $paren_depth ]'. This
19074 # leads to an oscillation in length depending if we break
19075 # before the closing bracket or not.
19077 && $i_test + 1 < $imax
19078 && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
19080 $too_long = $next_length >= $maximum_line_length;
19084 FORMATTER_DEBUG_FLAG_BREAK
19087 my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
19088 my $i_testp2 = $i_test + 2;
19089 if ( $i_testp2 > $max_index_to_go + 1 ) {
19090 $i_testp2 = $max_index_to_go + 1;
19092 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
19093 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
19095 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
19098 # allow one extra terminal token after exceeding line length
19099 # if it would strand this token.
19100 if ( $rOpts_fuzzy_line_length
19102 && $i_lowest == $i_test
19103 && $token_lengths_to_go[$i_test] > 1
19104 && $next_nonblank_type =~ /^[\;\,]$/ )
19111 ( $i_test == $imax ) # we're done if no more tokens,
19113 ( $i_lowest >= 0 ) # or no more space and we have a break
19119 #-------------------------------------------------------
19120 # END of inner loop to find the best next breakpoint
19121 # Now decide exactly where to put the breakpoint
19122 #-------------------------------------------------------
19124 # it's always ok to break at imax if no other break was found
19125 if ( $i_lowest < 0 ) { $i_lowest = $imax }
19127 # semi-final index calculation
19128 my $i_next_nonblank = $inext_to_go[$i_lowest];
19129 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
19130 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
19132 #-------------------------------------------------------
19133 # ?/: rule 1 : if a break here will separate a '?' on this
19134 # line from its closing ':', then break at the '?' instead.
19135 #-------------------------------------------------------
19137 foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
19138 next unless ( $tokens_to_go[$i] eq '?' );
19140 # do not break if probable sequence of ?/: statements
19141 next if ($is_colon_chain);
19143 # do not break if statement is broken by side comment
19146 $tokens_to_go[$max_index_to_go] eq '#'
19147 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
19148 $max_index_to_go ) !~ /^[\;\}]$/
19151 # no break needed if matching : is also on the line
19153 if ( $mate_index_to_go[$i] >= 0
19154 && $mate_index_to_go[$i] <= $i_next_nonblank );
19157 if ( $want_break_before{'?'} ) { $i_lowest-- }
19161 #-------------------------------------------------------
19162 # END of inner loop to find the best next breakpoint:
19163 # Break the line after the token with index i=$i_lowest
19164 #-------------------------------------------------------
19166 # final index calculation
19167 $i_next_nonblank = $inext_to_go[$i_lowest];
19168 $next_nonblank_type = $types_to_go[$i_next_nonblank];
19169 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
19171 FORMATTER_DEBUG_FLAG_BREAK
19173 "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
19175 #-------------------------------------------------------
19176 # ?/: rule 2 : if we break at a '?', then break at its ':'
19178 # Note: this rule is also in sub scan_list to handle a break
19179 # at the start and end of a line (in case breaks are dictated
19180 # by side comments).
19181 #-------------------------------------------------------
19182 if ( $next_nonblank_type eq '?' ) {
19183 set_closing_breakpoint($i_next_nonblank);
19185 elsif ( $types_to_go[$i_lowest] eq '?' ) {
19186 set_closing_breakpoint($i_lowest);
19189 #-------------------------------------------------------
19190 # ?/: rule 3 : if we break at a ':' then we save
19191 # its location for further work below. We may need to go
19192 # back and break at its '?'.
19193 #-------------------------------------------------------
19194 if ( $next_nonblank_type eq ':' ) {
19195 push @i_colon_breaks, $i_next_nonblank;
19197 elsif ( $types_to_go[$i_lowest] eq ':' ) {
19198 push @i_colon_breaks, $i_lowest;
19201 # here we should set breaks for all '?'/':' pairs which are
19202 # separated by this line
19206 # save this line segment, after trimming blanks at the ends
19208 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
19210 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
19212 # set a forced breakpoint at a container opening, if necessary, to
19213 # signal a break at a closing container. Excepting '(' for now.
19214 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
19215 && !$forced_breakpoint_to_go[$i_lowest] )
19217 set_closing_breakpoint($i_lowest);
19220 # get ready to go again
19221 $i_begin = $i_lowest + 1;
19222 $last_break_strength = $lowest_strength;
19223 $i_last_break = $i_lowest;
19224 $leading_alignment_token = "";
19225 $leading_alignment_type = "";
19226 $lowest_next_token = '';
19227 $lowest_next_type = 'b';
19229 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
19233 # update indentation size
19234 if ( $i_begin <= $imax ) {
19235 $leading_spaces = leading_spaces_to_go($i_begin);
19239 #-------------------------------------------------------
19240 # END of main loop to set continuation breakpoints
19241 # Now go back and make any necessary corrections
19242 #-------------------------------------------------------
19244 #-------------------------------------------------------
19245 # ?/: rule 4 -- if we broke at a ':', then break at
19246 # corresponding '?' unless this is a chain of ?: expressions
19247 #-------------------------------------------------------
19248 if (@i_colon_breaks) {
19250 # using a simple method for deciding if we are in a ?/: chain --
19251 # this is a chain if it has multiple ?/: pairs all in order;
19253 # Note that if line starts in a ':' we count that above as a break
19254 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
19256 unless ($is_chain) {
19257 my @insert_list = ();
19258 foreach (@i_colon_breaks) {
19259 my $i_question = $mate_index_to_go[$_];
19260 if ( $i_question >= 0 ) {
19261 if ( $want_break_before{'?'} ) {
19262 $i_question = $iprev_to_go[$i_question];
19265 if ( $i_question >= 0 ) {
19266 push @insert_list, $i_question;
19269 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
19273 return ( \@i_first, \@i_last, $colon_count );
19276 sub insert_additional_breaks {
19278 # this routine will add line breaks at requested locations after
19279 # sub set_continuation_breaks has made preliminary breaks.
19281 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
19284 my $line_number = 0;
19286 foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
19288 $i_f = $$ri_first[$line_number];
19289 $i_l = $$ri_last[$line_number];
19290 while ( $i_break_left >= $i_l ) {
19293 # shouldn't happen unless caller passes bad indexes
19294 if ( $line_number >= @$ri_last ) {
19296 "Non-fatal program bug: couldn't set break at $i_break_left\n"
19298 report_definite_bug();
19301 $i_f = $$ri_first[$line_number];
19302 $i_l = $$ri_last[$line_number];
19305 # Do not leave a blank at the end of a line; back up if necessary
19306 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
19308 my $i_break_right = $inext_to_go[$i_break_left];
19309 if ( $i_break_left >= $i_f
19310 && $i_break_left < $i_l
19311 && $i_break_right > $i_f
19312 && $i_break_right <= $i_l )
19314 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
19315 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
19320 sub set_closing_breakpoint {
19322 # set a breakpoint at a matching closing token
19323 # at present, this is only used to break at a ':' which matches a '?'
19324 my $i_break = shift;
19326 if ( $mate_index_to_go[$i_break] >= 0 ) {
19328 # CAUTION: infinite recursion possible here:
19329 # set_closing_breakpoint calls set_forced_breakpoint, and
19330 # set_forced_breakpoint call set_closing_breakpoint
19331 # ( test files attrib.t, BasicLyx.pm.html).
19332 # Don't reduce the '2' in the statement below
19333 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
19335 # break before } ] and ), but sub set_forced_breakpoint will decide
19336 # to break before or after a ? and :
19337 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
19338 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
19342 my $type_sequence = $type_sequence_to_go[$i_break];
19343 if ($type_sequence) {
19344 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
19345 $postponed_breakpoint{$type_sequence} = 1;
19350 sub compare_indentation_levels {
19352 # check to see if output line tabbing agrees with input line
19353 # this can be very useful for debugging a script which has an extra
19355 my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
19356 if ( $guessed_indentation_level ne $structural_indentation_level ) {
19357 $last_tabbing_disagreement = $input_line_number;
19359 if ($in_tabbing_disagreement) {
19362 $tabbing_disagreement_count++;
19364 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
19365 write_logfile_entry(
19366 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
19369 $in_tabbing_disagreement = $input_line_number;
19370 $first_tabbing_disagreement = $in_tabbing_disagreement
19371 unless ($first_tabbing_disagreement);
19376 if ($in_tabbing_disagreement) {
19378 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
19379 write_logfile_entry(
19380 "End indentation disagreement from input line $in_tabbing_disagreement\n"
19383 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
19384 write_logfile_entry(
19385 "No further tabbing disagreements will be noted\n");
19388 $in_tabbing_disagreement = 0;
19393 #####################################################################
19395 # the Perl::Tidy::IndentationItem class supplies items which contain
19396 # how much whitespace should be used at the start of a line
19398 #####################################################################
19400 package Perl::Tidy::IndentationItem;
19402 # Indexes for indentation items
19403 use constant SPACES => 0; # total leading white spaces
19404 use constant LEVEL => 1; # the indentation 'level'
19405 use constant CI_LEVEL => 2; # the 'continuation level'
19406 use constant AVAILABLE_SPACES => 3; # how many left spaces available
19408 use constant CLOSED => 4; # index where we saw closing '}'
19409 use constant COMMA_COUNT => 5; # how many commas at this level?
19410 use constant SEQUENCE_NUMBER => 6; # output batch number
19411 use constant INDEX => 7; # index in output batch list
19412 use constant HAVE_CHILD => 8; # any dependents?
19413 use constant RECOVERABLE_SPACES => 9; # how many spaces to the right
19414 # we would like to move to get
19415 # alignment (negative if left)
19416 use constant ALIGN_PAREN => 10; # do we want to try to align
19417 # with an opening structure?
19418 use constant MARKED => 11; # if visited by corrector logic
19419 use constant STACK_DEPTH => 12; # indentation nesting depth
19420 use constant STARTING_INDEX => 13; # first token index of this level
19421 use constant ARROW_COUNT => 14; # how many =>'s
19425 # Create an 'indentation_item' which describes one level of leading
19426 # whitespace when the '-lp' indentation is used. We return
19427 # a reference to an anonymous array of associated variables.
19428 # See above constants for storage scheme.
19430 $class, $spaces, $level,
19431 $ci_level, $available_spaces, $index,
19432 $gnu_sequence_number, $align_paren, $stack_depth,
19436 my $arrow_count = 0;
19437 my $comma_count = 0;
19438 my $have_child = 0;
19439 my $want_right_spaces = 0;
19442 $spaces, $level, $ci_level,
19443 $available_spaces, $closed, $comma_count,
19444 $gnu_sequence_number, $index, $have_child,
19445 $want_right_spaces, $align_paren, $marked,
19446 $stack_depth, $starting_index, $arrow_count,
19450 sub permanently_decrease_AVAILABLE_SPACES {
19452 # make a permanent reduction in the available indentation spaces
19453 # at one indentation item. NOTE: if there are child nodes, their
19454 # total SPACES must be reduced by the caller.
19456 my ( $item, $spaces_needed ) = @_;
19457 my $available_spaces = $item->get_AVAILABLE_SPACES();
19458 my $deleted_spaces =
19459 ( $available_spaces > $spaces_needed )
19461 : $available_spaces;
19462 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
19463 $item->decrease_SPACES($deleted_spaces);
19464 $item->set_RECOVERABLE_SPACES(0);
19466 return $deleted_spaces;
19469 sub tentatively_decrease_AVAILABLE_SPACES {
19471 # We are asked to tentatively delete $spaces_needed of indentation
19472 # for a indentation item. We may want to undo this later. NOTE: if
19473 # there are child nodes, their total SPACES must be reduced by the
19475 my ( $item, $spaces_needed ) = @_;
19476 my $available_spaces = $item->get_AVAILABLE_SPACES();
19477 my $deleted_spaces =
19478 ( $available_spaces > $spaces_needed )
19480 : $available_spaces;
19481 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
19482 $item->decrease_SPACES($deleted_spaces);
19483 $item->increase_RECOVERABLE_SPACES($deleted_spaces);
19484 return $deleted_spaces;
19487 sub get_STACK_DEPTH {
19489 return $self->[STACK_DEPTH];
19494 return $self->[SPACES];
19499 return $self->[MARKED];
19503 my ( $self, $value ) = @_;
19504 if ( defined($value) ) {
19505 $self->[MARKED] = $value;
19507 return $self->[MARKED];
19510 sub get_AVAILABLE_SPACES {
19512 return $self->[AVAILABLE_SPACES];
19515 sub decrease_SPACES {
19516 my ( $self, $value ) = @_;
19517 if ( defined($value) ) {
19518 $self->[SPACES] -= $value;
19520 return $self->[SPACES];
19523 sub decrease_AVAILABLE_SPACES {
19524 my ( $self, $value ) = @_;
19525 if ( defined($value) ) {
19526 $self->[AVAILABLE_SPACES] -= $value;
19528 return $self->[AVAILABLE_SPACES];
19531 sub get_ALIGN_PAREN {
19533 return $self->[ALIGN_PAREN];
19536 sub get_RECOVERABLE_SPACES {
19538 return $self->[RECOVERABLE_SPACES];
19541 sub set_RECOVERABLE_SPACES {
19542 my ( $self, $value ) = @_;
19543 if ( defined($value) ) {
19544 $self->[RECOVERABLE_SPACES] = $value;
19546 return $self->[RECOVERABLE_SPACES];
19549 sub increase_RECOVERABLE_SPACES {
19550 my ( $self, $value ) = @_;
19551 if ( defined($value) ) {
19552 $self->[RECOVERABLE_SPACES] += $value;
19554 return $self->[RECOVERABLE_SPACES];
19559 return $self->[CI_LEVEL];
19564 return $self->[LEVEL];
19567 sub get_SEQUENCE_NUMBER {
19569 return $self->[SEQUENCE_NUMBER];
19574 return $self->[INDEX];
19577 sub get_STARTING_INDEX {
19579 return $self->[STARTING_INDEX];
19582 sub set_HAVE_CHILD {
19583 my ( $self, $value ) = @_;
19584 if ( defined($value) ) {
19585 $self->[HAVE_CHILD] = $value;
19587 return $self->[HAVE_CHILD];
19590 sub get_HAVE_CHILD {
19592 return $self->[HAVE_CHILD];
19595 sub set_ARROW_COUNT {
19596 my ( $self, $value ) = @_;
19597 if ( defined($value) ) {
19598 $self->[ARROW_COUNT] = $value;
19600 return $self->[ARROW_COUNT];
19603 sub get_ARROW_COUNT {
19605 return $self->[ARROW_COUNT];
19608 sub set_COMMA_COUNT {
19609 my ( $self, $value ) = @_;
19610 if ( defined($value) ) {
19611 $self->[COMMA_COUNT] = $value;
19613 return $self->[COMMA_COUNT];
19616 sub get_COMMA_COUNT {
19618 return $self->[COMMA_COUNT];
19622 my ( $self, $value ) = @_;
19623 if ( defined($value) ) {
19624 $self->[CLOSED] = $value;
19626 return $self->[CLOSED];
19631 return $self->[CLOSED];
19634 #####################################################################
19636 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
19637 # contain a single output line
19639 #####################################################################
19641 package Perl::Tidy::VerticalAligner::Line;
19648 use constant JMAX => 0;
19649 use constant JMAX_ORIGINAL_LINE => 1;
19650 use constant RTOKENS => 2;
19651 use constant RFIELDS => 3;
19652 use constant RPATTERNS => 4;
19653 use constant INDENTATION => 5;
19654 use constant LEADING_SPACE_COUNT => 6;
19655 use constant OUTDENT_LONG_LINES => 7;
19656 use constant LIST_TYPE => 8;
19657 use constant IS_HANGING_SIDE_COMMENT => 9;
19658 use constant RALIGNMENTS => 10;
19659 use constant MAXIMUM_LINE_LENGTH => 11;
19660 use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
19663 $_index_map{jmax} = JMAX;
19664 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;
19665 $_index_map{rtokens} = RTOKENS;
19666 $_index_map{rfields} = RFIELDS;
19667 $_index_map{rpatterns} = RPATTERNS;
19668 $_index_map{indentation} = INDENTATION;
19669 $_index_map{leading_space_count} = LEADING_SPACE_COUNT;
19670 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;
19671 $_index_map{list_type} = LIST_TYPE;
19672 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
19673 $_index_map{ralignments} = RALIGNMENTS;
19674 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;
19675 $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
19677 my @_default_data = ();
19678 $_default_data[JMAX] = undef;
19679 $_default_data[JMAX_ORIGINAL_LINE] = undef;
19680 $_default_data[RTOKENS] = undef;
19681 $_default_data[RFIELDS] = undef;
19682 $_default_data[RPATTERNS] = undef;
19683 $_default_data[INDENTATION] = undef;
19684 $_default_data[LEADING_SPACE_COUNT] = undef;
19685 $_default_data[OUTDENT_LONG_LINES] = undef;
19686 $_default_data[LIST_TYPE] = undef;
19687 $_default_data[IS_HANGING_SIDE_COMMENT] = undef;
19688 $_default_data[RALIGNMENTS] = [];
19689 $_default_data[MAXIMUM_LINE_LENGTH] = undef;
19690 $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
19694 # methods to count object population
19696 sub get_count { $_count; }
19697 sub _increment_count { ++$_count }
19698 sub _decrement_count { --$_count }
19701 # Constructor may be called as a class method
19703 my ( $caller, %arg ) = @_;
19704 my $caller_is_obj = ref($caller);
19705 my $class = $caller_is_obj || $caller;
19707 my $self = bless [], $class;
19709 $self->[RALIGNMENTS] = [];
19712 foreach ( keys %_index_map ) {
19713 $index = $_index_map{$_};
19714 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
19715 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
19716 else { $self->[$index] = $_default_data[$index] }
19719 $self->_increment_count();
19724 $_[0]->_decrement_count();
19727 sub get_jmax { $_[0]->[JMAX] }
19728 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }
19729 sub get_rtokens { $_[0]->[RTOKENS] }
19730 sub get_rfields { $_[0]->[RFIELDS] }
19731 sub get_rpatterns { $_[0]->[RPATTERNS] }
19732 sub get_indentation { $_[0]->[INDENTATION] }
19733 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }
19734 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }
19735 sub get_list_type { $_[0]->[LIST_TYPE] }
19736 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
19737 sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
19739 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
19740 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
19741 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
19742 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
19744 sub get_starting_column {
19745 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
19748 sub increment_column {
19749 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
19751 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
19753 sub current_field_width {
19757 return $self->get_column($j);
19760 return $self->get_column($j) - $self->get_column( $j - 1 );
19764 sub field_width_growth {
19767 return $self->get_column($j) - $self->get_starting_column($j);
19770 sub starting_field_width {
19774 return $self->get_starting_column($j);
19777 return $self->get_starting_column($j) -
19778 $self->get_starting_column( $j - 1 );
19782 sub increase_field_width {
19785 my ( $j, $pad ) = @_;
19786 my $jmax = $self->get_jmax();
19787 for my $k ( $j .. $jmax ) {
19788 $self->increment_column( $k, $pad );
19792 sub get_available_space_on_right {
19794 my $jmax = $self->get_jmax();
19795 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
19798 sub set_jmax { $_[0]->[JMAX] = $_[1] }
19799 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }
19800 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }
19801 sub set_rfields { $_[0]->[RFIELDS] = $_[1] }
19802 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }
19803 sub set_indentation { $_[0]->[INDENTATION] = $_[1] }
19804 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
19805 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }
19806 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }
19807 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
19808 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
19812 #####################################################################
19814 # the Perl::Tidy::VerticalAligner::Alignment class holds information
19815 # on a single column being aligned
19817 #####################################################################
19818 package Perl::Tidy::VerticalAligner::Alignment;
19826 # Symbolic array indexes
19827 use constant COLUMN => 0; # the current column number
19828 use constant STARTING_COLUMN => 1; # column number when created
19829 use constant MATCHING_TOKEN => 2; # what token we are matching
19830 use constant STARTING_LINE => 3; # the line index of creation
19831 use constant ENDING_LINE => 4; # the most recent line to use it
19832 use constant SAVED_COLUMN => 5; # the most recent line to use it
19833 use constant SERIAL_NUMBER => 6; # unique number for this alignment
19834 # (just its index in an array)
19836 # Correspondence between variables and array indexes
19838 $_index_map{column} = COLUMN;
19839 $_index_map{starting_column} = STARTING_COLUMN;
19840 $_index_map{matching_token} = MATCHING_TOKEN;
19841 $_index_map{starting_line} = STARTING_LINE;
19842 $_index_map{ending_line} = ENDING_LINE;
19843 $_index_map{saved_column} = SAVED_COLUMN;
19844 $_index_map{serial_number} = SERIAL_NUMBER;
19846 my @_default_data = ();
19847 $_default_data[COLUMN] = undef;
19848 $_default_data[STARTING_COLUMN] = undef;
19849 $_default_data[MATCHING_TOKEN] = undef;
19850 $_default_data[STARTING_LINE] = undef;
19851 $_default_data[ENDING_LINE] = undef;
19852 $_default_data[SAVED_COLUMN] = undef;
19853 $_default_data[SERIAL_NUMBER] = undef;
19855 # class population count
19858 sub get_count { $_count; }
19859 sub _increment_count { ++$_count }
19860 sub _decrement_count { --$_count }
19865 my ( $caller, %arg ) = @_;
19866 my $caller_is_obj = ref($caller);
19867 my $class = $caller_is_obj || $caller;
19869 my $self = bless [], $class;
19871 foreach ( keys %_index_map ) {
19872 my $index = $_index_map{$_};
19873 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
19874 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
19875 else { $self->[$index] = $_default_data[$index] }
19877 $self->_increment_count();
19882 $_[0]->_decrement_count();
19885 sub get_column { return $_[0]->[COLUMN] }
19886 sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
19887 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }
19888 sub get_starting_line { return $_[0]->[STARTING_LINE] }
19889 sub get_ending_line { return $_[0]->[ENDING_LINE] }
19890 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }
19892 sub set_column { $_[0]->[COLUMN] = $_[1] }
19893 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
19894 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }
19895 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }
19896 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }
19897 sub increment_column { $_[0]->[COLUMN] += $_[1] }
19899 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
19900 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }
19904 package Perl::Tidy::VerticalAligner;
19906 # The Perl::Tidy::VerticalAligner package collects output lines and
19907 # attempts to line up certain common tokens, such as => and #, which are
19908 # identified by the calling routine.
19910 # There are two main routines: valign_input and flush. Append acts as a
19911 # storage buffer, collecting lines into a group which can be vertically
19912 # aligned. When alignment is no longer possible or desirable, it dumps
19913 # the group to flush.
19915 # valign_input -----> flush
19923 # Caution: these debug flags produce a lot of output
19924 # They should all be 0 except when debugging small scripts
19926 use constant VALIGN_DEBUG_FLAG_APPEND => 0;
19927 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
19928 use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
19929 use constant VALIGN_DEBUG_FLAG_TABS => 0;
19931 my $debug_warning = sub {
19932 print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
19935 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
19936 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
19937 VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
19938 VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS');
19943 $vertical_aligner_self
19945 $maximum_alignment_index
19949 $previous_minimum_jmax_seen
19950 $previous_maximum_jmax_seen
19951 $maximum_line_index
19956 $last_level_written
19957 $last_leading_space_count
19961 $last_comment_column
19962 $last_side_comment_line_number
19963 $last_side_comment_length
19964 $last_side_comment_level
19965 $outdented_line_count
19966 $first_outdented_line_at
19967 $last_outdented_line_at
19968 $diagnostics_object
19970 $file_writer_object
19971 @side_comment_history
19972 $comment_leading_space_count
19973 $is_matching_terminal_line
19974 $consecutive_block_comments
19981 $cached_line_leading_space_count
19982 $cached_seqno_string
19984 $valign_buffer_filling
19988 $last_nonblank_seqno_string
19992 $rOpts_maximum_line_length
19993 $rOpts_variable_maximum_line_length
19994 $rOpts_continuation_indentation
19995 $rOpts_indent_columns
19997 $rOpts_entab_leading_whitespace
20000 $rOpts_fixed_position_side_comment
20001 $rOpts_minimum_space_to_comment
20009 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
20012 # variables describing the entire space group:
20013 $ralignment_list = [];
20015 $last_level_written = -1;
20016 $extra_indent_ok = 0; # can we move all lines to the right?
20017 $last_side_comment_length = 0;
20018 $maximum_jmax_seen = 0;
20019 $minimum_jmax_seen = 0;
20020 $previous_minimum_jmax_seen = 0;
20021 $previous_maximum_jmax_seen = 0;
20023 # variables describing each line of the group
20024 @group_lines = (); # list of all lines in group
20026 $outdented_line_count = 0;
20027 $first_outdented_line_at = 0;
20028 $last_outdented_line_at = 0;
20029 $last_side_comment_line_number = 0;
20030 $last_side_comment_level = -1;
20031 $is_matching_terminal_line = 0;
20033 # most recent 3 side comments; [ line number, column ]
20034 $side_comment_history[0] = [ -300, 0 ];
20035 $side_comment_history[1] = [ -200, 0 ];
20036 $side_comment_history[2] = [ -100, 0 ];
20038 # valign_output_step_B cache:
20039 $cached_line_text = "";
20040 $cached_line_type = 0;
20041 $cached_line_flag = 0;
20043 $cached_line_valid = 0;
20044 $cached_line_leading_space_count = 0;
20045 $cached_seqno_string = "";
20047 # string of sequence numbers joined together
20048 $seqno_string = "";
20049 $last_nonblank_seqno_string = "";
20051 # frequently used parameters
20052 $rOpts_indent_columns = $rOpts->{'indent-columns'};
20053 $rOpts_tabs = $rOpts->{'tabs'};
20054 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
20055 $rOpts_fixed_position_side_comment =
20056 $rOpts->{'fixed-position-side-comment'};
20057 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
20058 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
20059 $rOpts_variable_maximum_line_length =
20060 $rOpts->{'variable-maximum-line-length'};
20061 $rOpts_valign = $rOpts->{'valign'};
20063 $consecutive_block_comments = 0;
20064 forget_side_comment();
20066 initialize_for_new_group();
20068 $vertical_aligner_self = {};
20069 bless $vertical_aligner_self, $class;
20070 return $vertical_aligner_self;
20073 sub initialize_for_new_group {
20074 $maximum_line_index = -1; # lines in the current group
20075 $maximum_alignment_index = -1; # alignments in current group
20076 $zero_count = 0; # count consecutive lines without tokens
20077 $current_line = undef; # line being matched for alignment
20078 $group_maximum_gap = 0; # largest gap introduced
20080 $marginal_match = 0;
20081 $comment_leading_space_count = 0;
20082 $last_leading_space_count = 0;
20085 # interface to Perl::Tidy::Diagnostics routines
20086 sub write_diagnostics {
20087 if ($diagnostics_object) {
20088 $diagnostics_object->write_diagnostics(@_);
20092 # interface to Perl::Tidy::Logger routines
20094 if ($logger_object) {
20095 $logger_object->warning(@_);
20099 sub write_logfile_entry {
20100 if ($logger_object) {
20101 $logger_object->write_logfile_entry(@_);
20105 sub report_definite_bug {
20106 if ($logger_object) {
20107 $logger_object->report_definite_bug();
20113 # return the number of leading spaces associated with an indentation
20114 # variable $indentation is either a constant number of spaces or an
20115 # object with a get_SPACES method.
20116 my $indentation = shift;
20117 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
20120 sub get_RECOVERABLE_SPACES {
20122 # return the number of spaces (+ means shift right, - means shift left)
20123 # that we would like to shift a group of lines with the same indentation
20124 # to get them to line up with their opening parens
20125 my $indentation = shift;
20126 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
20129 sub get_STACK_DEPTH {
20131 my $indentation = shift;
20132 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
20135 sub make_alignment {
20136 my ( $col, $token ) = @_;
20138 # make one new alignment at column $col which aligns token $token
20139 ++$maximum_alignment_index;
20140 my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
20142 starting_column => $col,
20143 matching_token => $token,
20144 starting_line => $maximum_line_index,
20145 ending_line => $maximum_line_index,
20146 serial_number => $maximum_alignment_index,
20148 $ralignment_list->[$maximum_alignment_index] = $alignment;
20152 sub dump_alignments {
20154 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
20155 for my $i ( 0 .. $maximum_alignment_index ) {
20156 my $column = $ralignment_list->[$i]->get_column();
20157 my $starting_column = $ralignment_list->[$i]->get_starting_column();
20158 my $matching_token = $ralignment_list->[$i]->get_matching_token();
20159 my $starting_line = $ralignment_list->[$i]->get_starting_line();
20160 my $ending_line = $ralignment_list->[$i]->get_ending_line();
20162 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
20166 sub save_alignment_columns {
20167 for my $i ( 0 .. $maximum_alignment_index ) {
20168 $ralignment_list->[$i]->save_column();
20172 sub restore_alignment_columns {
20173 for my $i ( 0 .. $maximum_alignment_index ) {
20174 $ralignment_list->[$i]->restore_column();
20178 sub forget_side_comment {
20179 $last_comment_column = 0;
20182 sub maximum_line_length_for_level {
20184 # return maximum line length for line starting with a given level
20185 my $maximum_line_length = $rOpts_maximum_line_length;
20186 if ($rOpts_variable_maximum_line_length) {
20188 if ( $level < 0 ) { $level = 0 }
20189 $maximum_line_length += $level * $rOpts_indent_columns;
20191 return $maximum_line_length;
20196 # Place one line in the current vertical group.
20198 # The input parameters are:
20199 # $level = indentation level of this line
20200 # $rfields = reference to array of fields
20201 # $rpatterns = reference to array of patterns, one per field
20202 # $rtokens = reference to array of tokens starting fields 1,2,..
20204 # Here is an example of what this package does. In this example,
20205 # we are trying to line up both the '=>' and the '#'.
20207 # '18' => 'grave', # \`
20208 # '19' => 'acute', # `'
20209 # '20' => 'caron', # \v
20210 # <-tabs-><f1-><--field 2 ---><-f3->
20213 # col1 col2 col3 col4
20215 # The calling routine has already broken the entire line into 3 fields as
20216 # indicated. (So the work of identifying promising common tokens has
20217 # already been done).
20219 # In this example, there will be 2 tokens being matched: '=>' and '#'.
20220 # They are the leading parts of fields 2 and 3, but we do need to know
20221 # what they are so that we can dump a group of lines when these tokens
20224 # The fields contain the actual characters of each field. The patterns
20225 # are like the fields, but they contain mainly token types instead
20226 # of tokens, so they have fewer characters. They are used to be
20227 # sure we are matching fields of similar type.
20229 # In this example, there will be 4 column indexes being adjusted. The
20230 # first one is always at zero. The interior columns are at the start of
20231 # the matching tokens, and the last one tracks the maximum line length.
20233 # Each time a new line comes in, it joins the current vertical
20234 # group if possible. Otherwise it causes the current group to be dumped
20235 # and a new group is started.
20237 # For each new group member, the column locations are increased, as
20238 # necessary, to make room for the new fields. When the group is finally
20239 # output, these column numbers are used to compute the amount of spaces of
20240 # padding needed for each field.
20242 # Programming note: the fields are assumed not to have any tab characters.
20243 # Tabs have been previously removed except for tabs in quoted strings and
20244 # side comments. Tabs in these fields can mess up the column counting.
20245 # The log file warns the user if there are any such tabs.
20248 $level, $level_end,
20249 $indentation, $rfields,
20250 $rtokens, $rpatterns,
20251 $is_forced_break, $outdent_long_lines,
20252 $is_terminal_ternary, $is_terminal_statement,
20253 $do_not_pad, $rvertical_tightness_flags,
20257 # number of fields is $jmax
20258 # number of tokens between fields is $jmax-1
20259 my $jmax = $#{$rfields};
20261 my $leading_space_count = get_SPACES($indentation);
20263 # set outdented flag to be sure we either align within statements or
20264 # across statement boundaries, but not both.
20265 my $is_outdented = $last_leading_space_count > $leading_space_count;
20266 $last_leading_space_count = $leading_space_count;
20268 # Patch: undo for hanging side comment
20269 my $is_hanging_side_comment =
20270 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
20271 $is_outdented = 0 if $is_hanging_side_comment;
20273 # Forget side comment alignment after seeing 2 or more block comments
20274 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
20275 if ($is_block_comment) {
20276 $consecutive_block_comments++;
20279 if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
20280 $consecutive_block_comments = 0;
20283 VALIGN_DEBUG_FLAG_APPEND0 && do {
20285 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
20288 # Validate cached line if necessary: If we can produce a container
20289 # with just 2 lines total by combining an existing cached opening
20290 # token with the closing token to follow, then we will mark both
20291 # cached flags as valid.
20292 if ($rvertical_tightness_flags) {
20293 if ( $maximum_line_index <= 0
20294 && $cached_line_type
20296 && $rvertical_tightness_flags->[2]
20297 && $rvertical_tightness_flags->[2] == $cached_seqno )
20299 $rvertical_tightness_flags->[3] ||= 1;
20300 $cached_line_valid ||= 1;
20304 # do not join an opening block brace with an unbalanced line
20305 # unless requested with a flag value of 2
20306 if ( $cached_line_type == 3
20307 && $maximum_line_index < 0
20308 && $cached_line_flag < 2
20309 && $level_jump != 0 )
20311 $cached_line_valid = 0;
20314 # patch until new aligner is finished
20315 if ($do_not_pad) { my_flush() }
20317 # shouldn't happen:
20318 if ( $level < 0 ) { $level = 0 }
20320 # do not align code across indentation level changes
20321 # or if vertical alignment is turned off for debugging
20322 if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
20324 # we are allowed to shift a group of lines to the right if its
20325 # level is greater than the previous and next group
20327 ( $level < $group_level && $last_level_written < $group_level );
20331 # If we know that this line will get flushed out by itself because
20332 # of level changes, we can leave the extra_indent_ok flag set.
20333 # That way, if we get an external flush call, we will still be
20334 # able to do some -lp alignment if necessary.
20335 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
20337 $group_level = $level;
20339 # wait until after the above flush to get the leading space
20340 # count because it may have been changed if the -icp flag is in
20342 $leading_space_count = get_SPACES($indentation);
20346 # --------------------------------------------------------------------
20347 # Patch to collect outdentable block COMMENTS
20348 # --------------------------------------------------------------------
20349 my $is_blank_line = "";
20350 if ( $group_type eq 'COMMENT' ) {
20354 && $outdent_long_lines
20355 && $leading_space_count == $comment_leading_space_count
20360 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
20368 # --------------------------------------------------------------------
20369 # add dummy fields for terminal ternary
20370 # --------------------------------------------------------------------
20371 my $j_terminal_match;
20372 if ( $is_terminal_ternary && $current_line ) {
20373 $j_terminal_match =
20374 fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
20375 $jmax = @{$rfields} - 1;
20378 # --------------------------------------------------------------------
20379 # add dummy fields for else statement
20380 # --------------------------------------------------------------------
20381 if ( $rfields->[0] =~ /^else\s*$/
20383 && $level_jump == 0 )
20385 $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
20386 $jmax = @{$rfields} - 1;
20389 # --------------------------------------------------------------------
20390 # Step 1. Handle simple line of code with no fields to match.
20391 # --------------------------------------------------------------------
20392 if ( $jmax <= 0 ) {
20395 if ( $maximum_line_index >= 0
20396 && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
20399 # flush the current group if it has some aligned columns..
20400 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
20402 # flush current group if we are just collecting side comments..
20405 # ...and we haven't seen a comment lately
20406 ( $zero_count > 3 )
20408 # ..or if this new line doesn't fit to the left of the comments
20409 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
20410 $group_lines[0]->get_column(0) )
20417 # patch to start new COMMENT group if this comment may be outdented
20418 if ( $is_block_comment
20419 && $outdent_long_lines
20420 && $maximum_line_index < 0 )
20422 $group_type = 'COMMENT';
20423 $comment_leading_space_count = $leading_space_count;
20424 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
20428 # just write this line directly if no current group, no side comment,
20429 # and no space recovery is needed.
20430 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
20432 valign_output_step_B( $leading_space_count, $$rfields[0], 0,
20433 $outdent_long_lines, $rvertical_tightness_flags, $level );
20441 # programming check: (shouldn't happen)
20442 # an error here implies an incorrect call was made
20443 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
20445 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
20447 report_definite_bug();
20450 # --------------------------------------------------------------------
20451 # create an object to hold this line
20452 # --------------------------------------------------------------------
20453 my $new_line = new Perl::Tidy::VerticalAligner::Line(
20455 jmax_original_line => $jmax,
20456 rtokens => $rtokens,
20457 rfields => $rfields,
20458 rpatterns => $rpatterns,
20459 indentation => $indentation,
20460 leading_space_count => $leading_space_count,
20461 outdent_long_lines => $outdent_long_lines,
20463 is_hanging_side_comment => $is_hanging_side_comment,
20464 maximum_line_length => maximum_line_length_for_level($level),
20465 rvertical_tightness_flags => $rvertical_tightness_flags,
20468 # Initialize a global flag saying if the last line of the group should
20469 # match end of group and also terminate the group. There should be no
20470 # returns between here and where the flag is handled at the bottom.
20471 my $col_matching_terminal = 0;
20472 if ( defined($j_terminal_match) ) {
20474 # remember the column of the terminal ? or { to match with
20475 $col_matching_terminal = $current_line->get_column($j_terminal_match);
20477 # set global flag for sub decide_if_aligned
20478 $is_matching_terminal_line = 1;
20481 # --------------------------------------------------------------------
20482 # It simplifies things to create a zero length side comment
20484 # --------------------------------------------------------------------
20485 make_side_comment( $new_line, $level_end );
20487 # --------------------------------------------------------------------
20488 # Decide if this is a simple list of items.
20489 # There are 3 list types: none, comma, comma-arrow.
20490 # We use this below to be less restrictive in deciding what to align.
20491 # --------------------------------------------------------------------
20492 if ($is_forced_break) {
20493 decide_if_list($new_line);
20496 if ($current_line) {
20498 # --------------------------------------------------------------------
20499 # Allow hanging side comment to join current group, if any
20500 # This will help keep side comments aligned, because otherwise we
20501 # will have to start a new group, making alignment less likely.
20502 # --------------------------------------------------------------------
20503 join_hanging_comment( $new_line, $current_line )
20504 if $is_hanging_side_comment;
20506 # --------------------------------------------------------------------
20507 # If there is just one previous line, and it has more fields
20508 # than the new line, try to join fields together to get a match with
20509 # the new line. At the present time, only a single leading '=' is
20510 # allowed to be compressed out. This is useful in rare cases where
20511 # a table is forced to use old breakpoints because of side comments,
20512 # and the table starts out something like this:
20513 # my %MonthChars = ('0', 'Jan', # side comment
20516 # Eliminating the '=' field will allow the remaining fields to line up.
20517 # This situation does not occur if there are no side comments
20518 # because scan_list would put a break after the opening '('.
20519 # --------------------------------------------------------------------
20520 eliminate_old_fields( $new_line, $current_line );
20522 # --------------------------------------------------------------------
20523 # If the new line has more fields than the current group,
20524 # see if we can match the first fields and combine the remaining
20525 # fields of the new line.
20526 # --------------------------------------------------------------------
20527 eliminate_new_fields( $new_line, $current_line );
20529 # --------------------------------------------------------------------
20530 # Flush previous group unless all common tokens and patterns match..
20531 # --------------------------------------------------------------------
20532 check_match( $new_line, $current_line );
20534 # --------------------------------------------------------------------
20535 # See if there is space for this line in the current group (if any)
20536 # --------------------------------------------------------------------
20537 if ($current_line) {
20538 check_fit( $new_line, $current_line );
20542 # --------------------------------------------------------------------
20543 # Append this line to the current group (or start new group)
20544 # --------------------------------------------------------------------
20545 add_to_group($new_line);
20547 # Future update to allow this to vary:
20548 $current_line = $new_line if ( $maximum_line_index == 0 );
20550 # output this group if it ends in a terminal else or ternary line
20551 if ( defined($j_terminal_match) ) {
20553 # if there is only one line in the group (maybe due to failure to match
20554 # perfectly with previous lines), then align the ? or { of this
20555 # terminal line with the previous one unless that would make the line
20557 if ( $maximum_line_index == 0 ) {
20558 my $col_now = $current_line->get_column($j_terminal_match);
20559 my $pad = $col_matching_terminal - $col_now;
20560 my $padding_available =
20561 $current_line->get_available_space_on_right();
20562 if ( $pad > 0 && $pad <= $padding_available ) {
20563 $current_line->increase_field_width( $j_terminal_match, $pad );
20567 $is_matching_terminal_line = 0;
20570 # --------------------------------------------------------------------
20571 # Step 8. Some old debugging stuff
20572 # --------------------------------------------------------------------
20573 VALIGN_DEBUG_FLAG_APPEND && do {
20574 print STDOUT "APPEND fields:";
20575 dump_array(@$rfields);
20576 print STDOUT "APPEND tokens:";
20577 dump_array(@$rtokens);
20578 print STDOUT "APPEND patterns:";
20579 dump_array(@$rpatterns);
20586 sub join_hanging_comment {
20589 my $jmax = $line->get_jmax();
20590 return 0 unless $jmax == 1; # must be 2 fields
20591 my $rtokens = $line->get_rtokens();
20592 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment..
20593 my $rfields = $line->get_rfields();
20594 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty...
20595 my $old_line = shift;
20596 my $maximum_field_index = $old_line->get_jmax();
20598 unless $maximum_field_index > $jmax; # the current line has more fields
20599 my $rpatterns = $line->get_rpatterns();
20601 $line->set_is_hanging_side_comment(1);
20602 $jmax = $maximum_field_index;
20603 $line->set_jmax($jmax);
20604 $$rfields[$jmax] = $$rfields[1];
20605 $$rtokens[ $jmax - 1 ] = $$rtokens[0];
20606 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
20607 for ( my $j = 1 ; $j < $jmax ; $j++ ) {
20608 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
20609 $$rtokens[ $j - 1 ] = "";
20610 $$rpatterns[ $j - 1 ] = "";
20615 sub eliminate_old_fields {
20617 my $new_line = shift;
20618 my $jmax = $new_line->get_jmax();
20619 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
20620 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
20622 # there must be one previous line
20623 return unless ( $maximum_line_index == 0 );
20625 my $old_line = shift;
20626 my $maximum_field_index = $old_line->get_jmax();
20628 ###############################################
20629 # this line must have fewer fields
20630 return unless $maximum_field_index > $jmax;
20631 ###############################################
20633 # Identify specific cases where field elimination is allowed:
20634 # case=1: both lines have comma-separated lists, and the first
20635 # line has an equals
20636 # case=2: both lines have leading equals
20638 # case 1 is the default
20641 # See if case 2: both lines have leading '='
20642 # We'll require similar leading patterns in this case
20643 my $old_rtokens = $old_line->get_rtokens();
20644 my $rtokens = $new_line->get_rtokens();
20645 my $rpatterns = $new_line->get_rpatterns();
20646 my $old_rpatterns = $old_line->get_rpatterns();
20647 if ( $rtokens->[0] =~ /^=\d*$/
20648 && $old_rtokens->[0] eq $rtokens->[0]
20649 && $old_rpatterns->[0] eq $rpatterns->[0] )
20654 # not too many fewer fields in new line for case 1
20655 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
20657 # case 1 must have side comment
20658 my $old_rfields = $old_line->get_rfields();
20661 && length( $$old_rfields[$maximum_field_index] ) == 0 );
20663 my $rfields = $new_line->get_rfields();
20665 my $hid_equals = 0;
20667 my @new_alignments = ();
20668 my @new_fields = ();
20669 my @new_matching_patterns = ();
20670 my @new_matching_tokens = ();
20674 my $current_field = '';
20675 my $current_pattern = '';
20677 # loop over all old tokens
20679 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
20680 $current_field .= $$old_rfields[$k];
20681 $current_pattern .= $$old_rpatterns[$k];
20682 last if ( $j > $jmax - 1 );
20684 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
20686 $new_fields[$j] = $current_field;
20687 $new_matching_patterns[$j] = $current_pattern;
20688 $current_field = '';
20689 $current_pattern = '';
20690 $new_matching_tokens[$j] = $$old_rtokens[$k];
20691 $new_alignments[$j] = $old_line->get_alignment($k);
20696 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
20697 last if ( $case == 2 ); # avoid problems with stuff
20698 # like: $a=$b=$c=$d;
20702 if ( $in_match && $case == 1 )
20703 ; # disallow gaps in matching field types in case 1
20707 # Modify the current state if we are successful.
20708 # We must exactly reach the ends of both lists for success.
20709 if ( ( $j == $jmax )
20710 && ( $current_field eq '' )
20711 && ( $case != 1 || $hid_equals ) )
20713 $k = $maximum_field_index;
20714 $current_field .= $$old_rfields[$k];
20715 $current_pattern .= $$old_rpatterns[$k];
20716 $new_fields[$j] = $current_field;
20717 $new_matching_patterns[$j] = $current_pattern;
20719 $new_alignments[$j] = $old_line->get_alignment($k);
20720 $maximum_field_index = $j;
20722 $old_line->set_alignments(@new_alignments);
20723 $old_line->set_jmax($jmax);
20724 $old_line->set_rtokens( \@new_matching_tokens );
20725 $old_line->set_rfields( \@new_fields );
20726 $old_line->set_rpatterns( \@$rpatterns );
20730 # create an empty side comment if none exists
20731 sub make_side_comment {
20732 my $new_line = shift;
20733 my $level_end = shift;
20734 my $jmax = $new_line->get_jmax();
20735 my $rtokens = $new_line->get_rtokens();
20737 # if line does not have a side comment...
20738 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
20739 my $rfields = $new_line->get_rfields();
20740 my $rpatterns = $new_line->get_rpatterns();
20741 $$rtokens[$jmax] = '#';
20742 $$rfields[ ++$jmax ] = '';
20743 $$rpatterns[$jmax] = '#';
20744 $new_line->set_jmax($jmax);
20745 $new_line->set_jmax_original_line($jmax);
20748 # line has a side comment..
20751 # don't remember old side comment location for very long
20752 my $line_number = $vertical_aligner_self->get_output_line_number();
20753 my $rfields = $new_line->get_rfields();
20755 $line_number - $last_side_comment_line_number > 12
20757 # and don't remember comment location across block level changes
20758 || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
20761 forget_side_comment();
20763 $last_side_comment_line_number = $line_number;
20764 $last_side_comment_level = $level_end;
20768 sub decide_if_list {
20772 # A list will be taken to be a line with a forced break in which all
20773 # of the field separators are commas or comma-arrows (except for the
20776 # List separator tokens are things like ',3' or '=>2',
20777 # where the trailing digit is the nesting depth. Allow braces
20778 # to allow nested list items.
20779 my $rtokens = $line->get_rtokens();
20780 my $test_token = $$rtokens[0];
20781 if ( $test_token =~ /^(\,|=>)/ ) {
20782 my $list_type = $test_token;
20783 my $jmax = $line->get_jmax();
20785 foreach ( 1 .. $jmax - 2 ) {
20786 if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
20791 $line->set_list_type($list_type);
20795 sub eliminate_new_fields {
20797 return unless ( $maximum_line_index >= 0 );
20798 my ( $new_line, $old_line ) = @_;
20799 my $jmax = $new_line->get_jmax();
20801 my $old_rtokens = $old_line->get_rtokens();
20802 my $rtokens = $new_line->get_rtokens();
20803 my $is_assignment =
20804 ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
20806 # must be monotonic variation
20807 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
20809 # must be more fields in the new line
20810 my $maximum_field_index = $old_line->get_jmax();
20811 return unless ( $maximum_field_index < $jmax );
20813 unless ($is_assignment) {
20815 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
20816 ; # only if monotonic
20818 # never combine fields of a comma list
20820 unless ( $maximum_field_index > 1 )
20821 && ( $new_line->get_list_type() !~ /^,/ );
20824 my $rfields = $new_line->get_rfields();
20825 my $rpatterns = $new_line->get_rpatterns();
20826 my $old_rpatterns = $old_line->get_rpatterns();
20828 # loop over all OLD tokens except comment and check match
20831 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
20832 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
20833 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
20840 # first tokens agree, so combine extra new tokens
20842 for $k ( $maximum_field_index .. $jmax - 1 ) {
20844 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
20845 $$rfields[$k] = "";
20846 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
20847 $$rpatterns[$k] = "";
20850 $$rtokens[ $maximum_field_index - 1 ] = '#';
20851 $$rfields[$maximum_field_index] = $$rfields[$jmax];
20852 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
20853 $jmax = $maximum_field_index;
20855 $new_line->set_jmax($jmax);
20858 sub fix_terminal_ternary {
20860 # Add empty fields as necessary to align a ternary term
20865 # : $year % 100 ? 1
20866 # : $year % 400 ? 0
20869 # returns 1 if the terminal item should be indented
20871 my ( $rfields, $rtokens, $rpatterns ) = @_;
20873 my $jmax = @{$rfields} - 1;
20874 my $old_line = $group_lines[$maximum_line_index];
20875 my $rfields_old = $old_line->get_rfields();
20877 my $rpatterns_old = $old_line->get_rpatterns();
20878 my $rtokens_old = $old_line->get_rtokens();
20879 my $maximum_field_index = $old_line->get_jmax();
20881 # look for the question mark after the :
20883 my $depth_question;
20885 for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
20886 my $tok = $rtokens_old->[$j];
20887 if ( $tok =~ /^\?(\d+)$/ ) {
20888 $depth_question = $1;
20890 # depth must be correct
20891 next unless ( $depth_question eq $group_level );
20894 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
20895 $pad = " " x length($1);
20898 return; # shouldn't happen
20903 return unless ( defined($jquestion) ); # shouldn't happen
20905 # Now splice the tokens and patterns of the previous line
20906 # into the else line to insure a match. Add empty fields
20908 my $jadd = $jquestion;
20910 # Work on copies of the actual arrays in case we have
20911 # to return due to an error
20912 my @fields = @{$rfields};
20913 my @patterns = @{$rpatterns};
20914 my @tokens = @{$rtokens};
20916 VALIGN_DEBUG_FLAG_TERNARY && do {
20918 print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
20919 print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
20920 print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
20921 print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
20922 print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
20923 print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
20926 # handle cases of leading colon on this line
20927 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
20929 my ( $colon, $therest ) = ( $1, $2 );
20931 # Handle sub-case of first field with leading colon plus additional code
20932 # This is the usual situation as at the '1' below:
20934 # : $year % 400 ? 0
20938 # Split the first field after the leading colon and insert padding.
20939 # Note that this padding will remain even if the terminal value goes
20940 # out on a separate line. This does not seem to look to bad, so no
20941 # mechanism has been included to undo it.
20942 my $field1 = shift @fields;
20943 unshift @fields, ( $colon, $pad . $therest );
20945 # change the leading pattern from : to ?
20946 return unless ( $patterns[0] =~ s/^\:/?/ );
20948 # install leading tokens and patterns of existing line
20949 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
20950 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
20952 # insert appropriate number of empty fields
20953 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
20956 # handle sub-case of first field just equal to leading colon.
20957 # This can happen for example in the example below where
20958 # the leading '(' would create a new alignment token
20959 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
20960 # : ( $mname = $name . '->' );
20963 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
20965 # prepend a leading ? onto the second pattern
20966 $patterns[1] = "?b" . $patterns[1];
20968 # pad the second field
20969 $fields[1] = $pad . $fields[1];
20971 # install leading tokens and patterns of existing line, replacing
20972 # leading token and inserting appropriate number of empty fields
20973 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
20974 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
20975 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
20979 # Handle case of no leading colon on this line. This will
20980 # be the case when -wba=':' is used. For example,
20981 # $year % 400 ? 0 :
20985 # install leading tokens and patterns of existing line
20986 $patterns[0] = '?' . 'b' . $patterns[0];
20987 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
20988 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
20990 # insert appropriate number of empty fields
20991 $jadd = $jquestion + 1;
20992 $fields[0] = $pad . $fields[0];
20993 splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
20996 VALIGN_DEBUG_FLAG_TERNARY && do {
20998 print STDOUT "MODIFIED TOKENS=<@tokens>\n";
20999 print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
21000 print STDOUT "MODIFIED FIELDS=<@fields>\n";
21003 # all ok .. update the arrays
21004 @{$rfields} = @fields;
21005 @{$rtokens} = @tokens;
21006 @{$rpatterns} = @patterns;
21008 # force a flush after this line
21012 sub fix_terminal_else {
21014 # Add empty fields as necessary to align a balanced terminal
21015 # else block to a previous if/elsif/unless block,
21018 # if ( 1 || $x ) { print "ok 13\n"; }
21019 # else { print "not ok 13\n"; }
21021 # returns 1 if the else block should be indented
21023 my ( $rfields, $rtokens, $rpatterns ) = @_;
21024 my $jmax = @{$rfields} - 1;
21025 return unless ( $jmax > 0 );
21027 # check for balanced else block following if/elsif/unless
21028 my $rfields_old = $current_line->get_rfields();
21030 # TBD: add handling for 'case'
21031 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
21033 # look for the opening brace after the else, and extract the depth
21034 my $tok_brace = $rtokens->[0];
21036 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
21038 # probably: "else # side_comment"
21041 my $rpatterns_old = $current_line->get_rpatterns();
21042 my $rtokens_old = $current_line->get_rtokens();
21043 my $maximum_field_index = $current_line->get_jmax();
21045 # be sure the previous if/elsif is followed by an opening paren
21047 my $tok_paren = '(' . $depth_brace;
21048 my $tok_test = $rtokens_old->[$jparen];
21049 return unless ( $tok_test eq $tok_paren ); # shouldn't happen
21051 # Now find the opening block brace
21053 for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
21054 my $tok = $rtokens_old->[$j];
21055 if ( $tok eq $tok_brace ) {
21060 return unless ( defined($jbrace) ); # shouldn't happen
21062 # Now splice the tokens and patterns of the previous line
21063 # into the else line to insure a match. Add empty fields
21065 my $jadd = $jbrace - $jparen;
21066 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
21067 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
21068 splice( @{$rfields}, 1, 0, ('') x $jadd );
21070 # force a flush after this line if it does not follow a case
21072 unless ( $rfields_old->[0] =~ /^case\s*$/ );
21075 { # sub check_match
21076 my %is_good_alignment;
21080 # Vertically aligning on certain "good" tokens is usually okay
21081 # so we can be less restrictive in marginal cases.
21082 @_ = qw( { ? => = );
21084 @is_good_alignment{@_} = (1) x scalar(@_);
21089 # See if the current line matches the current vertical alignment group.
21090 # If not, flush the current group.
21091 my $new_line = shift;
21092 my $old_line = shift;
21094 # uses global variables:
21095 # $previous_minimum_jmax_seen
21096 # $maximum_jmax_seen
21097 # $maximum_line_index
21099 my $jmax = $new_line->get_jmax();
21100 my $maximum_field_index = $old_line->get_jmax();
21102 # flush if this line has too many fields
21103 if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
21105 # flush if adding this line would make a non-monotonic field count
21107 ( $maximum_field_index > $jmax ) # this has too few fields
21109 ( $previous_minimum_jmax_seen <
21110 $jmax ) # and wouldn't be monotonic
21111 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
21118 # otherwise see if this line matches the current group
21119 my $jmax_original_line = $new_line->get_jmax_original_line();
21120 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
21121 my $rtokens = $new_line->get_rtokens();
21122 my $rfields = $new_line->get_rfields();
21123 my $rpatterns = $new_line->get_rpatterns();
21124 my $list_type = $new_line->get_list_type();
21126 my $group_list_type = $old_line->get_list_type();
21127 my $old_rpatterns = $old_line->get_rpatterns();
21128 my $old_rtokens = $old_line->get_rtokens();
21130 my $jlimit = $jmax - 1;
21131 if ( $maximum_field_index > $jmax ) {
21132 $jlimit = $jmax_original_line;
21133 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
21136 # handle comma-separated lists ..
21137 if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
21138 for my $j ( 0 .. $jlimit ) {
21139 my $old_tok = $$old_rtokens[$j];
21140 next unless $old_tok;
21141 my $new_tok = $$rtokens[$j];
21142 next unless $new_tok;
21144 # lists always match ...
21145 # unless they would align any '=>'s with ','s
21147 if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
21148 || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
21152 # do detailed check for everything else except hanging side comments
21153 elsif ( !$is_hanging_side_comment ) {
21155 my $leading_space_count = $new_line->get_leading_space_count();
21159 my $saw_good_alignment;
21161 for my $j ( 0 .. $jlimit ) {
21163 my $old_tok = $$old_rtokens[$j];
21164 my $new_tok = $$rtokens[$j];
21166 # Note on encoding used for alignment tokens:
21167 # -------------------------------------------
21168 # Tokens are "decorated" with information which can help
21169 # prevent unwanted alignments. Consider for example the
21170 # following two lines:
21171 # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
21172 # local ( $i, $f ) = &'bdiv( $xn, $xd );
21173 # There are three alignment tokens in each line, a comma,
21174 # an =, and a comma. In the first line these three tokens
21176 # ,4+local-18 =3 ,4+split-7
21177 # and in the second line they are encoded as
21178 # ,4+local-18 =3 ,4+&'bdiv-8
21179 # Tokens always at least have token name and nesting
21180 # depth. So in this example the ='s are at depth 3 and
21181 # the ,'s are at depth 4. This prevents aligning tokens
21182 # of different depths. Commas contain additional
21183 # information, as follows:
21184 # , {depth} + {container name} - {spaces to opening paren}
21185 # This allows us to reject matching the rightmost commas
21186 # in the above two lines, since they are for different
21187 # function calls. This encoding is done in
21188 # 'sub send_lines_to_vertical_aligner'.
21190 # Pick off actual token.
21191 # Everything up to the first digit is the actual token.
21192 my $alignment_token = $new_tok;
21193 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
21195 # see if the decorated tokens match
21196 my $tokens_match = $new_tok eq $old_tok
21198 # Exception for matching terminal : of ternary statement..
21199 # consider containers prefixed by ? and : a match
21200 || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
21202 # No match if the alignment tokens differ...
21203 if ( !$tokens_match ) {
21205 # ...Unless this is a side comment
21209 # and there is either at least one alignment token
21210 # or this is a single item following a list. This
21211 # latter rule is required for 'December' to join
21212 # the following list:
21214 # '', 'January', 'February', 'March',
21215 # 'April', 'May', 'June', 'July',
21216 # 'August', 'September', 'October', 'November',
21219 # If it doesn't then the -lp formatting will fail.
21220 && ( $j > 0 || $old_tok =~ /^,/ )
21223 $marginal_match = 1
21224 if ( $marginal_match == 0
21225 && $maximum_line_index == 0 );
21232 # Calculate amount of padding required to fit this in.
21233 # $pad is the number of spaces by which we must increase
21234 # the current field to squeeze in this field.
21236 length( $$rfields[$j] ) - $old_line->current_field_width($j);
21237 if ( $j == 0 ) { $pad += $leading_space_count; }
21239 # remember max pads to limit marginal cases
21240 if ( $alignment_token ne '#' ) {
21241 if ( $pad > $max_pad ) { $max_pad = $pad }
21242 if ( $pad < $min_pad ) { $min_pad = $pad }
21244 if ( $is_good_alignment{$alignment_token} ) {
21245 $saw_good_alignment = 1;
21248 # If patterns don't match, we have to be careful...
21249 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
21251 # flag this as a marginal match since patterns differ
21252 $marginal_match = 1
21253 if ( $marginal_match == 0 && $maximum_line_index == 0 );
21255 # We have to be very careful about aligning commas
21256 # when the pattern's don't match, because it can be
21257 # worse to create an alignment where none is needed
21258 # than to omit one. Here's an example where the ','s
21259 # are not in named containers. The first line below
21260 # should not match the next two:
21261 # ( $a, $b ) = ( $b, $r );
21262 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
21263 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
21264 if ( $alignment_token eq ',' ) {
21266 # do not align commas unless they are in named containers
21267 goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
21270 # do not align parens unless patterns match;
21271 # large ugly spaces can occur in math expressions.
21272 elsif ( $alignment_token eq '(' ) {
21274 # But we can allow a match if the parens don't
21275 # require any padding.
21276 if ( $pad != 0 ) { goto NO_MATCH }
21279 # Handle an '=' alignment with different patterns to
21281 elsif ( $alignment_token eq '=' ) {
21283 # It is best to be a little restrictive when
21284 # aligning '=' tokens. Here is an example of
21285 # two lines that we will not align:
21288 # The problem is that one is a 'my' declaration,
21289 # and the other isn't, so they're not very similar.
21290 # We will filter these out by comparing the first
21291 # letter of the pattern. This is crude, but works
21294 substr( $$old_rpatterns[$j], 0, 1 ) ne
21295 substr( $$rpatterns[$j], 0, 1 ) )
21300 # If we pass that test, we'll call it a marginal match.
21301 # Here is an example of a marginal match:
21303 # $op = compile_bblock($op);
21304 # The left tokens are both identifiers, but
21305 # one accesses a hash and the other doesn't.
21306 # We'll let this be a tentative match and undo
21307 # it later if we don't find more than 2 lines
21309 elsif ( $maximum_line_index == 0 ) {
21311 2; # =2 prevents being undone below
21316 # Don't let line with fewer fields increase column widths
21318 if ( $maximum_field_index > $jmax ) {
21320 # Exception: suspend this rule to allow last lines to join
21321 if ( $pad > 0 ) { goto NO_MATCH; }
21323 } ## end for my $j ( 0 .. $jlimit)
21325 # Turn off the "marginal match" flag in some cases...
21326 # A "marginal match" occurs when the alignment tokens agree
21327 # but there are differences in the other tokens (patterns).
21328 # If we leave the marginal match flag set, then the rule is that we
21329 # will align only if there are more than two lines in the group.
21330 # We will turn of the flag if we almost have a match
21331 # and either we have seen a good alignment token or we
21332 # just need a small pad (2 spaces) to fit. These rules are
21333 # the result of experimentation. Tokens which misaligned by just
21334 # one or two characters are annoying. On the other hand,
21335 # large gaps to less important alignment tokens are also annoying.
21336 if ( $marginal_match == 1
21337 && $jmax == $maximum_field_index
21338 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
21341 $marginal_match = 0;
21343 ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
21346 # We have a match (even if marginal).
21347 # If the current line has fewer fields than the current group
21348 # but otherwise matches, copy the remaining group fields to
21349 # make it a perfect match.
21350 if ( $maximum_field_index > $jmax ) {
21351 my $comment = $$rfields[$jmax];
21352 for $jmax ( $jlimit .. $maximum_field_index ) {
21353 $$rtokens[$jmax] = $$old_rtokens[$jmax];
21354 $$rfields[ ++$jmax ] = '';
21355 $$rpatterns[$jmax] = $$old_rpatterns[$jmax];
21357 $$rfields[$jmax] = $comment;
21358 $new_line->set_jmax($jmax);
21363 ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
21371 return unless ( $maximum_line_index >= 0 );
21372 my $new_line = shift;
21373 my $old_line = shift;
21375 my $jmax = $new_line->get_jmax();
21376 my $leading_space_count = $new_line->get_leading_space_count();
21377 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
21378 my $rtokens = $new_line->get_rtokens();
21379 my $rfields = $new_line->get_rfields();
21380 my $rpatterns = $new_line->get_rpatterns();
21382 my $group_list_type = $group_lines[0]->get_list_type();
21384 my $padding_so_far = 0;
21385 my $padding_available = $old_line->get_available_space_on_right();
21387 # save current columns in case this doesn't work
21388 save_alignment_columns();
21390 my ( $j, $pad, $eight );
21391 my $maximum_field_index = $old_line->get_jmax();
21392 for $j ( 0 .. $jmax ) {
21394 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
21397 $pad += $leading_space_count;
21400 # remember largest gap of the group, excluding gap to side comment
21402 && $group_maximum_gap < -$pad
21404 && $j < $jmax - 1 )
21406 $group_maximum_gap = -$pad;
21411 ## This patch helps sometimes, but it doesn't check to see if
21412 ## the line is too long even without the side comment. It needs
21414 ##don't let a long token with no trailing side comment push
21415 ##side comments out, or end a group. (sidecmt1.t)
21416 ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
21418 # This line will need space; lets see if we want to accept it..
21421 # not if this won't fit
21422 ( $pad > $padding_available )
21424 # previously, there were upper bounds placed on padding here
21425 # (maximum_whitespace_columns), but they were not really helpful
21430 # revert to starting state then flush; things didn't work out
21431 restore_alignment_columns();
21436 # patch to avoid excessive gaps in previous lines,
21437 # due to a line of fewer fields.
21438 # return join( ".",
21439 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
21440 # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
21441 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
21443 # looks ok, squeeze this field in
21444 $old_line->increase_field_width( $j, $pad );
21445 $padding_available -= $pad;
21447 # remember largest gap of the group, excluding gap to side comment
21448 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
21449 $group_maximum_gap = $pad;
21456 # The current line either starts a new alignment group or is
21457 # accepted into the current alignment group.
21458 my $new_line = shift;
21459 $group_lines[ ++$maximum_line_index ] = $new_line;
21461 # initialize field lengths if starting new group
21462 if ( $maximum_line_index == 0 ) {
21464 my $jmax = $new_line->get_jmax();
21465 my $rfields = $new_line->get_rfields();
21466 my $rtokens = $new_line->get_rtokens();
21468 my $col = $new_line->get_leading_space_count();
21470 for $j ( 0 .. $jmax ) {
21471 $col += length( $$rfields[$j] );
21473 # create initial alignments for the new group
21475 if ( $j < $jmax ) { $token = $$rtokens[$j] }
21476 my $alignment = make_alignment( $col, $token );
21477 $new_line->set_alignment( $j, $alignment );
21480 $maximum_jmax_seen = $jmax;
21481 $minimum_jmax_seen = $jmax;
21484 # use previous alignments otherwise
21486 my @new_alignments =
21487 $group_lines[ $maximum_line_index - 1 ]->get_alignments();
21488 $new_line->set_alignments(@new_alignments);
21491 # remember group jmax extremes for next call to valign_input
21492 $previous_minimum_jmax_seen = $minimum_jmax_seen;
21493 $previous_maximum_jmax_seen = $maximum_jmax_seen;
21498 # debug routine to dump array contents
21500 print STDOUT "(@_)\n";
21503 # flush() sends the current Perl::Tidy::VerticalAligner group down the
21504 # pipeline to Perl::Tidy::FileWriter.
21506 # This is the external flush, which also empties the buffer and cache
21509 # the buffer must be emptied first, then any cached text
21510 dump_valign_buffer();
21512 if ( $maximum_line_index < 0 ) {
21513 if ($cached_line_type) {
21514 $seqno_string = $cached_seqno_string;
21515 valign_output_step_C( $cached_line_text,
21516 $cached_line_leading_space_count,
21517 $last_level_written );
21518 $cached_line_type = 0;
21519 $cached_line_text = "";
21520 $cached_seqno_string = "";
21528 sub reduce_valign_buffer_indentation {
21531 if ( $valign_buffer_filling && $diff ) {
21532 my $max_valign_buffer = @valign_buffer;
21533 for ( my $i = 0 ; $i < $max_valign_buffer ; $i++ ) {
21534 my ( $line, $leading_space_count, $level ) =
21535 @{ $valign_buffer[$i] };
21536 my $ws = substr( $line, 0, $diff );
21537 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
21538 $line = substr( $line, $diff );
21540 if ( $leading_space_count >= $diff ) {
21541 $leading_space_count -= $diff;
21542 $level = level_change( $leading_space_count, $diff, $level );
21544 $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
21551 # compute decrease in level when we remove $diff spaces from the
21553 my ( $leading_space_count, $diff, $level ) = @_;
21554 if ($rOpts_indent_columns) {
21556 int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
21557 my $nlev = int( $leading_space_count / $rOpts_indent_columns );
21558 $level -= ( $olev - $nlev );
21559 if ( $level < 0 ) { $level = 0 }
21564 sub dump_valign_buffer {
21565 if (@valign_buffer) {
21566 foreach (@valign_buffer) {
21567 valign_output_step_D( @{$_} );
21569 @valign_buffer = ();
21571 $valign_buffer_filling = "";
21574 # This is the internal flush, which leaves the cache intact
21577 return if ( $maximum_line_index < 0 );
21579 # handle a group of comment lines
21580 if ( $group_type eq 'COMMENT' ) {
21582 VALIGN_DEBUG_FLAG_APPEND0 && do {
21583 my ( $a, $b, $c ) = caller();
21585 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
21588 my $leading_space_count = $comment_leading_space_count;
21589 my $leading_string = get_leading_string($leading_space_count);
21591 # zero leading space count if any lines are too long
21592 my $max_excess = 0;
21593 for my $i ( 0 .. $maximum_line_index ) {
21594 my $str = $group_lines[$i];
21597 $leading_space_count -
21598 maximum_line_length_for_level($group_level);
21599 if ( $excess > $max_excess ) {
21600 $max_excess = $excess;
21604 if ( $max_excess > 0 ) {
21605 $leading_space_count -= $max_excess;
21606 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
21607 $last_outdented_line_at =
21608 $file_writer_object->get_output_line_number();
21609 unless ($outdented_line_count) {
21610 $first_outdented_line_at = $last_outdented_line_at;
21612 $outdented_line_count += ( $maximum_line_index + 1 );
21615 # write the group of lines
21616 my $outdent_long_lines = 0;
21617 for my $i ( 0 .. $maximum_line_index ) {
21618 valign_output_step_B( $leading_space_count, $group_lines[$i], 0,
21619 $outdent_long_lines, "", $group_level );
21623 # handle a group of code lines
21626 VALIGN_DEBUG_FLAG_APPEND0 && do {
21627 my $group_list_type = $group_lines[0]->get_list_type();
21628 my ( $a, $b, $c ) = caller();
21629 my $maximum_field_index = $group_lines[0]->get_jmax();
21631 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
21635 # some small groups are best left unaligned
21636 my $do_not_align = decide_if_aligned();
21638 # optimize side comment location
21639 $do_not_align = adjust_side_comment($do_not_align);
21641 # recover spaces for -lp option if possible
21642 my $extra_leading_spaces = get_extra_leading_spaces();
21644 # all lines of this group have the same basic leading spacing
21645 my $group_leader_length = $group_lines[0]->get_leading_space_count();
21647 # add extra leading spaces if helpful
21648 my $min_ci_gap = improve_continuation_indentation( $do_not_align,
21649 $group_leader_length );
21651 # loop to output all lines
21652 for my $i ( 0 .. $maximum_line_index ) {
21653 my $line = $group_lines[$i];
21654 valign_output_step_A( $line, $min_ci_gap, $do_not_align,
21655 $group_leader_length, $extra_leading_spaces );
21658 initialize_for_new_group();
21661 sub decide_if_aligned {
21663 # Do not try to align two lines which are not really similar
21664 return unless $maximum_line_index == 1;
21665 return if ($is_matching_terminal_line);
21667 my $group_list_type = $group_lines[0]->get_list_type();
21669 my $do_not_align = (
21671 # always align lists
21676 # don't align if it was just a marginal match
21679 # don't align two lines with big gap
21680 || $group_maximum_gap > 12
21682 # or lines with differing number of alignment tokens
21683 # TODO: this could be improved. It occasionally rejects
21685 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
21689 # But try to convert them into a simple comment group if the first line
21690 # a has side comment
21691 my $rfields = $group_lines[0]->get_rfields();
21692 my $maximum_field_index = $group_lines[0]->get_jmax();
21694 && ( $maximum_line_index > 0 )
21695 && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
21700 return $do_not_align;
21703 sub adjust_side_comment {
21705 my $do_not_align = shift;
21707 # let's see if we can move the side comment field out a little
21708 # to improve readability (the last field is always a side comment field)
21709 my $have_side_comment = 0;
21710 my $first_side_comment_line = -1;
21711 my $maximum_field_index = $group_lines[0]->get_jmax();
21712 for my $i ( 0 .. $maximum_line_index ) {
21713 my $line = $group_lines[$i];
21715 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
21716 $have_side_comment = 1;
21717 $first_side_comment_line = $i;
21722 my $kmax = $maximum_field_index + 1;
21724 if ($have_side_comment) {
21726 my $line = $group_lines[0];
21728 # the maximum space without exceeding the line length:
21729 my $avail = $line->get_available_space_on_right();
21731 # try to use the previous comment column
21732 my $side_comment_column = $line->get_column( $kmax - 2 );
21733 my $move = $last_comment_column - $side_comment_column;
21735 ## my $sc_line0 = $side_comment_history[0]->[0];
21736 ## my $sc_col0 = $side_comment_history[0]->[1];
21737 ## my $sc_line1 = $side_comment_history[1]->[0];
21738 ## my $sc_col1 = $side_comment_history[1]->[1];
21739 ## my $sc_line2 = $side_comment_history[2]->[0];
21740 ## my $sc_col2 = $side_comment_history[2]->[1];
21742 ## # FUTURE UPDATES:
21743 ## # Be sure to ignore 'do not align' and '} # end comments'
21744 ## # Find first $move > 0 and $move <= $avail as follows:
21745 ## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
21746 ## # 2. try sc_col2 if (line-sc_line2) < 12
21747 ## # 3. try min possible space, plus up to 8,
21748 ## # 4. try min possible space
21750 if ( $kmax > 0 && !$do_not_align ) {
21752 # but if this doesn't work, give up and use the minimum space
21753 if ( $move > $avail ) {
21754 $move = $rOpts_minimum_space_to_comment - 1;
21757 # but we want some minimum space to the comment
21758 my $min_move = $rOpts_minimum_space_to_comment - 1;
21760 && $last_side_comment_length > 0
21761 && ( $first_side_comment_line == 0 )
21762 && $group_level == $last_level_written )
21767 if ( $move < $min_move ) {
21771 # previously, an upper bound was placed on $move here,
21772 # (maximum_space_to_comment), but it was not helpful
21774 # don't exceed the available space
21775 if ( $move > $avail ) { $move = $avail }
21777 # we can only increase space, never decrease
21779 $line->increase_field_width( $maximum_field_index - 1, $move );
21782 # remember this column for the next group
21783 $last_comment_column = $line->get_column( $kmax - 2 );
21787 # try to at least line up the existing side comment location
21788 if ( $kmax > 0 && $move > 0 && $move < $avail ) {
21789 $line->increase_field_width( $maximum_field_index - 1, $move );
21793 # reset side comment column if we can't align
21795 forget_side_comment();
21799 return $do_not_align;
21802 sub improve_continuation_indentation {
21803 my ( $do_not_align, $group_leader_length ) = @_;
21805 # See if we can increase the continuation indentation
21806 # to move all continuation lines closer to the next field
21807 # (unless it is a comment).
21809 # '$min_ci_gap'is the extra indentation that we may need to introduce.
21810 # We will only introduce this to fields which already have some ci.
21811 # Without this variable, we would occasionally get something like this
21814 # use overload '+' => \&plus,
21816 # '*' => \&multiply,
21819 # 'atan2' => \&atan2,
21821 # Whereas with this variable, we can shift variables over to get this:
21823 # use overload '+' => \&plus,
21825 # '*' => \&multiply,
21828 # 'atan2' => \&atan2,
21830 ## Deactivated####################
21831 # The trouble with this patch is that it may, for example,
21832 # move in some 'or's or ':'s, and leave some out, so that the
21833 # left edge alignment suffers.
21835 ###########################################
21837 my $maximum_field_index = $group_lines[0]->get_jmax();
21839 my $min_ci_gap = maximum_line_length_for_level($group_level);
21840 if ( $maximum_field_index > 1 && !$do_not_align ) {
21842 for my $i ( 0 .. $maximum_line_index ) {
21843 my $line = $group_lines[$i];
21844 my $leading_space_count = $line->get_leading_space_count();
21845 my $rfields = $line->get_rfields();
21848 $line->get_column(0) -
21849 $leading_space_count -
21850 length( $$rfields[0] );
21852 if ( $leading_space_count > $group_leader_length ) {
21853 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
21857 if ( $min_ci_gap >= maximum_line_length_for_level($group_level) ) {
21864 return $min_ci_gap;
21867 sub valign_output_step_A {
21869 ###############################################################
21870 # This is Step A in writing vertically aligned lines.
21871 # The line is prepared according to the alignments which have
21872 # been found and shipped to the next step.
21873 ###############################################################
21875 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
21876 $extra_leading_spaces )
21878 my $rfields = $line->get_rfields();
21879 my $leading_space_count = $line->get_leading_space_count();
21880 my $outdent_long_lines = $line->get_outdent_long_lines();
21881 my $maximum_field_index = $line->get_jmax();
21882 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
21884 # add any extra spaces
21885 if ( $leading_space_count > $group_leader_length ) {
21886 $leading_space_count += $min_ci_gap;
21889 my $str = $$rfields[0];
21891 # loop to concatenate all fields of this line and needed padding
21892 my $total_pad_count = 0;
21894 for $j ( 1 .. $maximum_field_index ) {
21896 # skip zero-length side comments
21898 if ( ( $j == $maximum_field_index )
21899 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
21902 # compute spaces of padding before this field
21903 my $col = $line->get_column( $j - 1 );
21904 $pad = $col - ( length($str) + $leading_space_count );
21906 if ($do_not_align) {
21908 ( $j < $maximum_field_index )
21910 : $rOpts_minimum_space_to_comment - 1;
21913 # if the -fpsc flag is set, move the side comment to the selected
21914 # column if and only if it is possible, ignoring constraints on
21915 # line length and minimum space to comment
21916 if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
21918 my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
21919 if ( $newpad >= 0 ) { $pad = $newpad; }
21922 # accumulate the padding
21923 if ( $pad > 0 ) { $total_pad_count += $pad; }
21926 if ( !defined $$rfields[$j] ) {
21927 write_diagnostics("UNDEFined field at j=$j\n");
21930 # only add padding when we have a finite field;
21931 # this avoids extra terminal spaces if we have empty fields
21932 if ( length( $$rfields[$j] ) > 0 ) {
21933 $str .= ' ' x $total_pad_count;
21934 $total_pad_count = 0;
21935 $str .= $$rfields[$j];
21938 $total_pad_count = 0;
21941 # update side comment history buffer
21942 if ( $j == $maximum_field_index ) {
21943 my $lineno = $file_writer_object->get_output_line_number();
21944 shift @side_comment_history;
21945 push @side_comment_history, [ $lineno, $col ];
21949 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
21951 # ship this line off
21952 valign_output_step_B( $leading_space_count + $extra_leading_spaces,
21953 $str, $side_comment_length, $outdent_long_lines,
21954 $rvertical_tightness_flags, $group_level );
21957 sub get_extra_leading_spaces {
21959 #----------------------------------------------------------
21960 # Define any extra indentation space (for the -lp option).
21962 # If a list has side comments, sub scan_list must dump the
21963 # list before it sees everything. When this happens, it sets
21964 # the indentation to the standard scheme, but notes how
21965 # many spaces it would have liked to use. We may be able
21966 # to recover that space here in the event that all of the
21967 # lines of a list are back together again.
21968 #----------------------------------------------------------
21970 my $extra_leading_spaces = 0;
21971 if ($extra_indent_ok) {
21972 my $object = $group_lines[0]->get_indentation();
21973 if ( ref($object) ) {
21974 my $extra_indentation_spaces_wanted =
21975 get_RECOVERABLE_SPACES($object);
21977 # all indentation objects must be the same
21979 for $i ( 1 .. $maximum_line_index ) {
21980 if ( $object != $group_lines[$i]->get_indentation() ) {
21981 $extra_indentation_spaces_wanted = 0;
21986 if ($extra_indentation_spaces_wanted) {
21988 # the maximum space without exceeding the line length:
21989 my $avail = $group_lines[0]->get_available_space_on_right();
21990 $extra_leading_spaces =
21991 ( $avail > $extra_indentation_spaces_wanted )
21992 ? $extra_indentation_spaces_wanted
21995 # update the indentation object because with -icp the terminal
21996 # ');' will use the same adjustment.
21997 $object->permanently_decrease_AVAILABLE_SPACES(
21998 -$extra_leading_spaces );
22002 return $extra_leading_spaces;
22005 sub combine_fields {
22007 # combine all fields except for the comment field ( sidecmt.t )
22008 # Uses global variables:
22010 # $maximum_line_index
22012 my $maximum_field_index = $group_lines[0]->get_jmax();
22013 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
22014 my $line = $group_lines[$j];
22015 my $rfields = $line->get_rfields();
22016 foreach ( 1 .. $maximum_field_index - 1 ) {
22017 $$rfields[0] .= $$rfields[$_];
22019 $$rfields[1] = $$rfields[$maximum_field_index];
22021 $line->set_jmax(1);
22022 $line->set_column( 0, 0 );
22023 $line->set_column( 1, 0 );
22026 $maximum_field_index = 1;
22028 for $j ( 0 .. $maximum_line_index ) {
22029 my $line = $group_lines[$j];
22030 my $rfields = $line->get_rfields();
22031 for $k ( 0 .. $maximum_field_index ) {
22032 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
22034 $pad += $group_lines[$j]->get_leading_space_count();
22037 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
22043 sub get_output_line_number {
22045 # the output line number reported to a caller is the number of items
22046 # written plus the number of items in the buffer
22048 1 + $maximum_line_index + $file_writer_object->get_output_line_number();
22051 sub valign_output_step_B {
22053 ###############################################################
22054 # This is Step B in writing vertically aligned lines.
22055 # Vertical tightness is applied according to preset flags.
22056 # In particular this routine handles stacking of opening
22057 # and closing tokens.
22058 ###############################################################
22060 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
22061 $rvertical_tightness_flags, $level )
22064 # handle outdenting of long lines:
22065 if ($outdent_long_lines) {
22068 $side_comment_length +
22069 $leading_space_count -
22070 maximum_line_length_for_level($level);
22071 if ( $excess > 0 ) {
22072 $leading_space_count = 0;
22073 $last_outdented_line_at =
22074 $file_writer_object->get_output_line_number();
22076 unless ($outdented_line_count) {
22077 $first_outdented_line_at = $last_outdented_line_at;
22079 $outdented_line_count++;
22083 # Make preliminary leading whitespace. It could get changed
22084 # later by entabbing, so we have to keep track of any changes
22085 # to the leading_space_count from here on.
22086 my $leading_string =
22087 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
22089 # Unpack any recombination data; it was packed by
22090 # sub send_lines_to_vertical_aligner. Contents:
22092 # [0] type: 1=opening non-block 2=closing non-block
22093 # 3=opening block brace 4=closing block brace
22094 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
22095 # if closing: spaces of padding to use
22096 # [2] sequence number of container
22097 # [3] valid flag: do not append if this flag is false
22099 my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
22101 if ($rvertical_tightness_flags) {
22103 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
22105 ) = @{$rvertical_tightness_flags};
22108 $seqno_string = $seqno_end;
22110 # handle any cached line ..
22111 # either append this line to it or write it out
22112 if ( length($cached_line_text) ) {
22114 # Dump an invalid cached line
22115 if ( !$cached_line_valid ) {
22116 valign_output_step_C( $cached_line_text,
22117 $cached_line_leading_space_count,
22118 $last_level_written );
22121 # Handle cached line ending in OPENING tokens
22122 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
22124 my $gap = $leading_space_count - length($cached_line_text);
22126 # handle option of just one tight opening per line:
22127 if ( $cached_line_flag == 1 ) {
22128 if ( defined($open_or_close) && $open_or_close == 1 ) {
22133 if ( $gap >= 0 && defined($seqno_beg) ) {
22134 $leading_string = $cached_line_text . ' ' x $gap;
22135 $leading_space_count = $cached_line_leading_space_count;
22136 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
22137 $level = $last_level_written;
22140 valign_output_step_C( $cached_line_text,
22141 $cached_line_leading_space_count,
22142 $last_level_written );
22146 # Handle cached line ending in CLOSING tokens
22148 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
22151 # The new line must start with container
22154 # The container combination must be okay..
22157 # okay to combine like types
22158 ( $open_or_close == $cached_line_type )
22160 # closing block brace may append to non-block
22161 || ( $cached_line_type == 2 && $open_or_close == 4 )
22163 # something like ');'
22164 || ( !$open_or_close && $cached_line_type == 2 )
22168 # The combined line must fit
22170 length($test_line) <=
22171 maximum_line_length_for_level($last_level_written) )
22175 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
22177 # Patch to outdent closing tokens ending # in ');'
22178 # If we are joining a line like ');' to a previous stacked
22179 # set of closing tokens, then decide if we may outdent the
22180 # combined stack to the indentation of the ');'. Since we
22181 # should not normally outdent any of the other tokens more than
22182 # the indentation of the lines that contained them, we will
22183 # only do this if all of the corresponding opening
22184 # tokens were on the same line. This can happen with
22185 # -sot and -sct. For example, it is ok here:
22186 # __PACKAGE__->load_components( qw(
22191 # But, for example, we do not outdent in this example because
22192 # that would put the closing sub brace out farther than the
22193 # opening sub brace:
22195 # perltidy -sot -sct
22197 # '<Control-f>' => sub {
22199 # my $e = $c->XEvent;
22200 # itemsUnderArea $c;
22203 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
22205 # The way to tell this is if the stacked sequence numbers
22206 # of this output line are the reverse of the stacked
22207 # sequence numbers of the previous non-blank line of
22208 # sequence numbers. So we can join if the previous
22209 # nonblank string of tokens is the mirror image. For
22210 # example if stack )}] is 13:8:6 then we are looking for a
22211 # leading stack like [{( which is 6:8:13 We only need to
22212 # check the two ends, because the intermediate tokens must
22213 # fall in order. Note on speed: having to split on colons
22214 # and eliminate multiple colons might appear to be slow,
22215 # but it's not an issue because we almost never come
22216 # through here. In a typical file we don't.
22217 $seqno_string =~ s/^:+//;
22218 $last_nonblank_seqno_string =~ s/^:+//;
22219 $seqno_string =~ s/:+/:/g;
22220 $last_nonblank_seqno_string =~ s/:+/:/g;
22222 # how many spaces can we outdent?
22224 $cached_line_leading_space_count - $leading_space_count;
22226 && length($seqno_string)
22227 && length($last_nonblank_seqno_string) ==
22228 length($seqno_string) )
22231 ( split ':', $last_nonblank_seqno_string );
22232 my @seqno_now = ( split ':', $seqno_string );
22233 if ( $seqno_now[-1] == $seqno_last[0]
22234 && $seqno_now[0] == $seqno_last[-1] )
22238 # for absolute safety, be sure we only remove
22240 my $ws = substr( $test_line, 0, $diff );
22241 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
22243 $test_line = substr( $test_line, $diff );
22244 $cached_line_leading_space_count -= $diff;
22245 $last_level_written =
22247 $cached_line_leading_space_count,
22248 $diff, $last_level_written );
22249 reduce_valign_buffer_indentation($diff);
22252 # shouldn't happen, but not critical:
22254 ## ERROR transferring indentation here
22261 $leading_string = "";
22262 $leading_space_count = $cached_line_leading_space_count;
22263 $level = $last_level_written;
22266 valign_output_step_C( $cached_line_text,
22267 $cached_line_leading_space_count,
22268 $last_level_written );
22272 $cached_line_type = 0;
22273 $cached_line_text = "";
22275 # make the line to be written
22276 my $line = $leading_string . $str;
22278 # write or cache this line
22279 if ( !$open_or_close || $side_comment_length > 0 ) {
22280 valign_output_step_C( $line, $leading_space_count, $level );
22283 $cached_line_text = $line;
22284 $cached_line_type = $open_or_close;
22285 $cached_line_flag = $tightness_flag;
22286 $cached_seqno = $seqno;
22287 $cached_line_valid = $valid;
22288 $cached_line_leading_space_count = $leading_space_count;
22289 $cached_seqno_string = $seqno_string;
22292 $last_level_written = $level;
22293 $last_side_comment_length = $side_comment_length;
22294 $extra_indent_ok = 0;
22297 sub valign_output_step_C {
22299 ###############################################################
22300 # This is Step C in writing vertically aligned lines.
22301 # Lines are either stored in a buffer or passed along to the next step.
22302 # The reason for storing lines is that we may later want to reduce their
22303 # indentation when -sot and -sct are both used.
22304 ###############################################################
22307 # Dump any saved lines if we see a line with an unbalanced opening or
22309 dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
22311 # Either store or write this line
22312 if ($valign_buffer_filling) {
22313 push @valign_buffer, [@args];
22316 valign_output_step_D(@args);
22319 # For lines starting or ending with opening or closing tokens..
22320 if ($seqno_string) {
22321 $last_nonblank_seqno_string = $seqno_string;
22323 # Start storing lines when we see a line with multiple stacked opening
22325 # patch for RT #94354, requested by Colin Williams
22326 if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
22329 # This test is efficient but a little subtle: The first test says
22330 # that we have multiple sequence numbers and hence multiple opening
22331 # or closing tokens in this line. The second part of the test
22332 # rejects stacked closing and ternary tokens. So if we get here
22333 # then we should have stacked unbalanced opening tokens.
22335 # Here is a complex example:
22337 # Foo($Bar[0], { # (side comment)
22341 # The first line has sequence 6::4. It does not begin with
22342 # a closing token or ternary, so it passes the test and must be
22343 # stacked opening tokens.
22345 # The last line has sequence 4:6 but is a stack of closing tokens,
22346 # so it gets rejected.
22348 # Note that the sequence number of an opening token for a qw quote
22349 # is a negative number and will be rejected.
22350 # For example, for the following line:
22351 # skip_symbols([qw(
22352 # $seqno_string='10:5:-1'. It would be okay to accept it but
22353 # I decided not to do this after testing.
22355 $valign_buffer_filling = $seqno_string;
22361 sub valign_output_step_D {
22363 ###############################################################
22364 # This is Step D in writing vertically aligned lines.
22365 # Write one vertically aligned line of code to the output object.
22366 ###############################################################
22368 my ( $line, $leading_space_count, $level ) = @_;
22370 # The line is currently correct if there is no tabbing (recommended!)
22371 # We may have to lop off some leading spaces and replace with tabs.
22372 if ( $leading_space_count > 0 ) {
22374 # Nothing to do if no tabs
22375 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
22376 || $rOpts_indent_columns <= 0 )
22382 # Handle entab option
22383 elsif ($rOpts_entab_leading_whitespace) {
22385 $leading_space_count % $rOpts_entab_leading_whitespace;
22387 int( $leading_space_count / $rOpts_entab_leading_whitespace );
22388 my $leading_string = "\t" x $tab_count . ' ' x $space_count;
22389 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
22390 substr( $line, 0, $leading_space_count ) = $leading_string;
22394 # shouldn't happen - program error counting whitespace
22396 VALIGN_DEBUG_FLAG_TABS
22398 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
22403 # Handle option of one tab per level
22405 my $leading_string = ( "\t" x $level );
22407 $leading_space_count - $level * $rOpts_indent_columns;
22409 # shouldn't happen:
22410 if ( $space_count < 0 ) {
22412 # But it could be an outdented comment
22413 if ( $line !~ /^\s*#/ ) {
22414 VALIGN_DEBUG_FLAG_TABS
22416 "Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
22419 $leading_string = ( ' ' x $leading_space_count );
22422 $leading_string .= ( ' ' x $space_count );
22424 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
22425 substr( $line, 0, $leading_space_count ) = $leading_string;
22429 # shouldn't happen - program error counting whitespace
22430 # we'll skip entabbing
22431 VALIGN_DEBUG_FLAG_TABS
22433 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
22438 $file_writer_object->write_code_line( $line . "\n" );
22441 { # begin get_leading_string
22443 my @leading_string_cache;
22445 sub get_leading_string {
22447 # define the leading whitespace string for this line..
22448 my $leading_whitespace_count = shift;
22450 # Handle case of zero whitespace, which includes multi-line quotes
22451 # (which may have a finite level; this prevents tab problems)
22452 if ( $leading_whitespace_count <= 0 ) {
22456 # look for previous result
22457 elsif ( $leading_string_cache[$leading_whitespace_count] ) {
22458 return $leading_string_cache[$leading_whitespace_count];
22461 # must compute a string for this number of spaces
22462 my $leading_string;
22464 # Handle simple case of no tabs
22465 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
22466 || $rOpts_indent_columns <= 0 )
22468 $leading_string = ( ' ' x $leading_whitespace_count );
22471 # Handle entab option
22472 elsif ($rOpts_entab_leading_whitespace) {
22474 $leading_whitespace_count % $rOpts_entab_leading_whitespace;
22475 my $tab_count = int(
22476 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
22477 $leading_string = "\t" x $tab_count . ' ' x $space_count;
22480 # Handle option of one tab per level
22482 $leading_string = ( "\t" x $group_level );
22484 $leading_whitespace_count - $group_level * $rOpts_indent_columns;
22486 # shouldn't happen:
22487 if ( $space_count < 0 ) {
22488 VALIGN_DEBUG_FLAG_TABS
22490 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
22493 # -- skip entabbing
22494 $leading_string = ( ' ' x $leading_whitespace_count );
22497 $leading_string .= ( ' ' x $space_count );
22500 $leading_string_cache[$leading_whitespace_count] = $leading_string;
22501 return $leading_string;
22503 } # end get_leading_string
22505 sub report_anything_unusual {
22507 if ( $outdented_line_count > 0 ) {
22508 write_logfile_entry(
22509 "$outdented_line_count long lines were outdented:\n");
22510 write_logfile_entry(
22511 " First at output line $first_outdented_line_at\n");
22513 if ( $outdented_line_count > 1 ) {
22514 write_logfile_entry(
22515 " Last at output line $last_outdented_line_at\n");
22517 write_logfile_entry(
22518 " use -noll to prevent outdenting, -l=n to increase line length\n"
22520 write_logfile_entry("\n");
22524 #####################################################################
22526 # the Perl::Tidy::FileWriter class writes the output file
22528 #####################################################################
22530 package Perl::Tidy::FileWriter;
22532 # Maximum number of little messages; probably need not be changed.
22533 use constant MAX_NAG_MESSAGES => 6;
22535 sub write_logfile_entry {
22537 my $logger_object = $self->{_logger_object};
22538 if ($logger_object) {
22539 $logger_object->write_logfile_entry(@_);
22545 my ( $line_sink_object, $rOpts, $logger_object ) = @_;
22548 _line_sink_object => $line_sink_object,
22549 _logger_object => $logger_object,
22551 _output_line_number => 1,
22552 _consecutive_blank_lines => 0,
22553 _consecutive_nonblank_lines => 0,
22554 _first_line_length_error => 0,
22555 _max_line_length_error => 0,
22556 _last_line_length_error => 0,
22557 _first_line_length_error_at => 0,
22558 _max_line_length_error_at => 0,
22559 _last_line_length_error_at => 0,
22560 _line_length_error_count => 0,
22561 _max_output_line_length => 0,
22562 _max_output_line_length_at => 0,
22568 $self->{_line_sink_object}->tee_on();
22573 $self->{_line_sink_object}->tee_off();
22576 sub get_output_line_number {
22578 return $self->{_output_line_number};
22581 sub decrement_output_line_number {
22583 $self->{_output_line_number}--;
22586 sub get_consecutive_nonblank_lines {
22588 return $self->{_consecutive_nonblank_lines};
22591 sub reset_consecutive_blank_lines {
22593 $self->{_consecutive_blank_lines} = 0;
22596 sub want_blank_line {
22598 unless ( $self->{_consecutive_blank_lines} ) {
22599 $self->write_blank_code_line();
22603 sub require_blank_code_lines {
22605 # write out the requested number of blanks regardless of the value of -mbl
22606 # unless -mbl=0. This allows extra blank lines to be written for subs and
22607 # packages even with the default -mbl=1
22610 my $need = $count - $self->{_consecutive_blank_lines};
22611 my $rOpts = $self->{_rOpts};
22612 my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
22613 for ( my $i = 0 ; $i < $need ; $i++ ) {
22614 $self->write_blank_code_line($forced);
22618 sub write_blank_code_line {
22620 my $forced = shift;
22621 my $rOpts = $self->{_rOpts};
22624 && $self->{_consecutive_blank_lines} >=
22625 $rOpts->{'maximum-consecutive-blank-lines'} );
22626 $self->{_consecutive_blank_lines}++;
22627 $self->{_consecutive_nonblank_lines} = 0;
22628 $self->write_line("\n");
22631 sub write_code_line {
22635 if ( $a =~ /^\s*$/ ) {
22636 my $rOpts = $self->{_rOpts};
22638 if ( $self->{_consecutive_blank_lines} >=
22639 $rOpts->{'maximum-consecutive-blank-lines'} );
22640 $self->{_consecutive_blank_lines}++;
22641 $self->{_consecutive_nonblank_lines} = 0;
22644 $self->{_consecutive_blank_lines} = 0;
22645 $self->{_consecutive_nonblank_lines}++;
22647 $self->write_line($a);
22654 # TODO: go through and see if the test is necessary here
22655 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
22657 $self->{_line_sink_object}->write_line($a);
22659 # This calculation of excess line length ignores any internal tabs
22660 my $rOpts = $self->{_rOpts};
22661 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
22662 if ( $a =~ /^\t+/g ) {
22663 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
22666 # Note that we just incremented output line number to future value
22667 # so we must subtract 1 for current line number
22668 if ( length($a) > 1 + $self->{_max_output_line_length} ) {
22669 $self->{_max_output_line_length} = length($a) - 1;
22670 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
22673 if ( $exceed > 0 ) {
22674 my $output_line_number = $self->{_output_line_number};
22675 $self->{_last_line_length_error} = $exceed;
22676 $self->{_last_line_length_error_at} = $output_line_number - 1;
22677 if ( $self->{_line_length_error_count} == 0 ) {
22678 $self->{_first_line_length_error} = $exceed;
22679 $self->{_first_line_length_error_at} = $output_line_number - 1;
22683 $self->{_last_line_length_error} > $self->{_max_line_length_error} )
22685 $self->{_max_line_length_error} = $exceed;
22686 $self->{_max_line_length_error_at} = $output_line_number - 1;
22689 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
22690 $self->write_logfile_entry(
22691 "Line length exceeded by $exceed characters\n");
22693 $self->{_line_length_error_count}++;
22698 sub report_line_length_errors {
22700 my $rOpts = $self->{_rOpts};
22701 my $line_length_error_count = $self->{_line_length_error_count};
22702 if ( $line_length_error_count == 0 ) {
22703 $self->write_logfile_entry(
22704 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
22705 my $max_output_line_length = $self->{_max_output_line_length};
22706 my $max_output_line_length_at = $self->{_max_output_line_length_at};
22707 $self->write_logfile_entry(
22708 " Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
22714 my $word = ( $line_length_error_count > 1 ) ? "s" : "";
22715 $self->write_logfile_entry(
22716 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
22719 $word = ( $line_length_error_count > 1 ) ? "First" : "";
22720 my $first_line_length_error = $self->{_first_line_length_error};
22721 my $first_line_length_error_at = $self->{_first_line_length_error_at};
22722 $self->write_logfile_entry(
22723 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
22726 if ( $line_length_error_count > 1 ) {
22727 my $max_line_length_error = $self->{_max_line_length_error};
22728 my $max_line_length_error_at = $self->{_max_line_length_error_at};
22729 my $last_line_length_error = $self->{_last_line_length_error};
22730 my $last_line_length_error_at = $self->{_last_line_length_error_at};
22731 $self->write_logfile_entry(
22732 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
22734 $self->write_logfile_entry(
22735 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
22741 #####################################################################
22743 # The Perl::Tidy::Debugger class shows line tokenization
22745 #####################################################################
22747 package Perl::Tidy::Debugger;
22751 my ( $class, $filename ) = @_;
22754 _debug_file => $filename,
22755 _debug_file_opened => 0,
22760 sub really_open_debug_file {
22763 my $debug_file = $self->{_debug_file};
22765 unless ( $fh = IO::File->new("> $debug_file") ) {
22766 Perl::Tidy::Warn("can't open $debug_file: $!\n");
22768 $self->{_debug_file_opened} = 1;
22769 $self->{_fh} = $fh;
22771 "Use -dump-token-types (-dtt) to get a list of token type codes\n";
22774 sub close_debug_file {
22777 my $fh = $self->{_fh};
22778 if ( $self->{_debug_file_opened} ) {
22780 eval { $self->{_fh}->close() };
22784 sub write_debug_entry {
22786 # This is a debug dump routine which may be modified as necessary
22787 # to dump tokens on a line-by-line basis. The output will be written
22788 # to the .DEBUG file when the -D flag is entered.
22790 my $line_of_tokens = shift;
22792 my $input_line = $line_of_tokens->{_line_text};
22793 my $rtoken_type = $line_of_tokens->{_rtoken_type};
22794 my $rtokens = $line_of_tokens->{_rtokens};
22795 my $rlevels = $line_of_tokens->{_rlevels};
22796 my $rslevels = $line_of_tokens->{_rslevels};
22797 my $rblock_type = $line_of_tokens->{_rblock_type};
22798 my $input_line_number = $line_of_tokens->{_line_number};
22799 my $line_type = $line_of_tokens->{_line_type};
22803 my $token_str = "$input_line_number: ";
22804 my $reconstructed_original = "$input_line_number: ";
22805 my $block_str = "$input_line_number: ";
22807 #$token_str .= "$line_type: ";
22808 #$reconstructed_original .= "$line_type: ";
22811 my @next_char = ( '"', '"' );
22813 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
22814 my $fh = $self->{_fh};
22816 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
22819 if ( $$rtoken_type[$j] eq 'k' ) {
22820 $pattern .= $$rtokens[$j];
22823 $pattern .= $$rtoken_type[$j];
22825 $reconstructed_original .= $$rtokens[$j];
22826 $block_str .= "($$rblock_type[$j])";
22827 $num = length( $$rtokens[$j] );
22828 my $type_str = $$rtoken_type[$j];
22830 # be sure there are no blank tokens (shouldn't happen)
22831 # This can only happen if a programming error has been made
22832 # because all valid tokens are non-blank
22833 if ( $type_str eq ' ' ) {
22834 print $fh "BLANK TOKEN on the next line\n";
22835 $type_str = $next_char[$i_next];
22836 $i_next = 1 - $i_next;
22839 if ( length($type_str) == 1 ) {
22840 $type_str = $type_str x $num;
22842 $token_str .= $type_str;
22845 # Write what you want here ...
22846 # print $fh "$input_line\n";
22847 # print $fh "$pattern\n";
22848 print $fh "$reconstructed_original\n";
22849 print $fh "$token_str\n";
22851 #print $fh "$block_str\n";
22854 #####################################################################
22856 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
22857 # method for returning the next line to be parsed, as well as a
22858 # 'peek_ahead()' method
22860 # The input parameter is an object with a 'get_line()' method
22861 # which returns the next line to be parsed
22863 #####################################################################
22865 package Perl::Tidy::LineBuffer;
22870 my $line_source_object = shift;
22873 _line_source_object => $line_source_object,
22874 _rlookahead_buffer => [],
22880 my $buffer_index = shift;
22882 my $line_source_object = $self->{_line_source_object};
22883 my $rlookahead_buffer = $self->{_rlookahead_buffer};
22884 if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
22885 $line = $$rlookahead_buffer[$buffer_index];
22888 $line = $line_source_object->get_line();
22889 push( @$rlookahead_buffer, $line );
22897 my $line_source_object = $self->{_line_source_object};
22898 my $rlookahead_buffer = $self->{_rlookahead_buffer};
22900 if ( scalar(@$rlookahead_buffer) ) {
22901 $line = shift @$rlookahead_buffer;
22904 $line = $line_source_object->get_line();
22909 ########################################################################
22911 # the Perl::Tidy::Tokenizer package is essentially a filter which
22912 # reads lines of perl source code from a source object and provides
22913 # corresponding tokenized lines through its get_line() method. Lines
22914 # flow from the source_object to the caller like this:
22916 # source_object --> LineBuffer_object --> Tokenizer --> calling routine
22917 # get_line() get_line() get_line() line_of_tokens
22919 # The source object can be any object with a get_line() method which
22920 # supplies one line (a character string) perl call.
22921 # The LineBuffer object is created by the Tokenizer.
22922 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
22923 # containing one tokenized line for each call to its get_line() method.
22925 # WARNING: This is not a real class yet. Only one tokenizer my be used.
22927 ########################################################################
22929 package Perl::Tidy::Tokenizer;
22933 # Caution: these debug flags produce a lot of output
22934 # They should all be 0 except when debugging small scripts
22936 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
22937 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
22938 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
22939 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
22940 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
22942 my $debug_warning = sub {
22943 print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n";
22946 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
22947 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
22948 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
22949 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
22950 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
22956 # PACKAGE VARIABLES for processing an entire FILE.
22960 $last_nonblank_token
22961 $last_nonblank_type
22962 $last_nonblank_block_type
22970 %user_function_prototype
22972 %is_block_list_function
22973 %saw_function_definition
22977 $square_bracket_depth
22982 @nesting_sequence_number
22983 @current_sequence_number
22985 @paren_semicolon_count
22986 @paren_structural_type
22988 @brace_structural_type
22991 @square_bracket_type
22992 @square_bracket_structural_type
22994 @nested_ternary_flag
22995 @nested_statement_type
22996 @starting_line_of_current_depth
22999 # GLOBAL CONSTANTS for routines in this package
23001 %is_indirect_object_taker
23003 %expecting_operator_token
23004 %expecting_operator_types
23005 %expecting_term_types
23006 %expecting_term_token
23008 %is_file_test_operator
23011 %is_valid_token_type
23013 %is_code_block_token
23015 @opening_brace_names
23016 @closing_brace_names
23017 %is_keyword_taking_list
23018 %is_q_qq_qw_qx_qr_s_y_tr_m
23021 # possible values of operator_expected()
23022 use constant TERM => -1;
23023 use constant UNKNOWN => 0;
23024 use constant OPERATOR => 1;
23026 # possible values of context
23027 use constant SCALAR_CONTEXT => -1;
23028 use constant UNKNOWN_CONTEXT => 0;
23029 use constant LIST_CONTEXT => 1;
23031 # Maximum number of little messages; probably need not be changed.
23032 use constant MAX_NAG_MESSAGES => 6;
23036 # methods to count instances
23038 sub get_count { $_count; }
23039 sub _increment_count { ++$_count }
23040 sub _decrement_count { --$_count }
23044 $_[0]->_decrement_count();
23051 # Note: 'tabs' and 'indent_columns' are temporary and should be
23054 source_object => undef,
23055 debugger_object => undef,
23056 diagnostics_object => undef,
23057 logger_object => undef,
23058 starting_level => undef,
23059 indent_columns => 4,
23061 look_for_hash_bang => 0,
23063 look_for_autoloader => 1,
23064 look_for_selfloader => 1,
23065 starting_line_number => 1,
23066 extended_syntax => 0,
23068 my %args = ( %defaults, @_ );
23070 # we are given an object with a get_line() method to supply source lines
23071 my $source_object = $args{source_object};
23073 # we create another object with a get_line() and peek_ahead() method
23074 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
23076 # Tokenizer state data is as follows:
23077 # _rhere_target_list reference to list of here-doc targets
23078 # _here_doc_target the target string for a here document
23079 # _here_quote_character the type of here-doc quoting (" ' ` or none)
23080 # to determine if interpolation is done
23081 # _quote_target character we seek if chasing a quote
23082 # _line_start_quote line where we started looking for a long quote
23083 # _in_here_doc flag indicating if we are in a here-doc
23084 # _in_pod flag set if we are in pod documentation
23085 # _in_error flag set if we saw severe error (binary in script)
23086 # _in_data flag set if we are in __DATA__ section
23087 # _in_end flag set if we are in __END__ section
23088 # _in_format flag set if we are in a format description
23089 # _in_attribute_list flag telling if we are looking for attributes
23090 # _in_quote flag telling if we are chasing a quote
23091 # _starting_level indentation level of first line
23092 # _line_buffer_object object with get_line() method to supply source code
23093 # _diagnostics_object place to write debugging information
23094 # _unexpected_error_count error count used to limit output
23095 # _lower_case_labels_at line numbers where lower case labels seen
23096 $tokenizer_self = {
23097 _rhere_target_list => [],
23099 _here_doc_target => "",
23100 _here_quote_character => "",
23106 _in_attribute_list => 0,
23108 _quote_target => "",
23109 _line_start_quote => -1,
23110 _starting_level => $args{starting_level},
23111 _know_starting_level => defined( $args{starting_level} ),
23112 _tabsize => $args{tabsize},
23113 _indent_columns => $args{indent_columns},
23114 _look_for_hash_bang => $args{look_for_hash_bang},
23115 _trim_qw => $args{trim_qw},
23116 _continuation_indentation => $args{continuation_indentation},
23117 _outdent_labels => $args{outdent_labels},
23118 _last_line_number => $args{starting_line_number} - 1,
23119 _saw_perl_dash_P => 0,
23120 _saw_perl_dash_w => 0,
23121 _saw_use_strict => 0,
23122 _saw_v_string => 0,
23123 _look_for_autoloader => $args{look_for_autoloader},
23124 _look_for_selfloader => $args{look_for_selfloader},
23125 _saw_autoloader => 0,
23126 _saw_selfloader => 0,
23127 _saw_hash_bang => 0,
23130 _saw_negative_indentation => 0,
23131 _started_tokenizing => 0,
23132 _line_buffer_object => $line_buffer_object,
23133 _debugger_object => $args{debugger_object},
23134 _diagnostics_object => $args{diagnostics_object},
23135 _logger_object => $args{logger_object},
23136 _unexpected_error_count => 0,
23137 _started_looking_for_here_target_at => 0,
23138 _nearly_matched_here_target_at => undef,
23140 _rlower_case_labels_at => undef,
23141 _extended_syntax => $args{extended_syntax},
23144 prepare_for_a_new_file();
23145 find_starting_indentation_level();
23147 bless $tokenizer_self, $class;
23149 # This is not a full class yet, so die if an attempt is made to
23150 # create more than one object.
23152 if ( _increment_count() > 1 ) {
23154 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
23157 return $tokenizer_self;
23161 # interface to Perl::Tidy::Logger routines
23163 my $logger_object = $tokenizer_self->{_logger_object};
23164 if ($logger_object) {
23165 $logger_object->warning(@_);
23170 my $logger_object = $tokenizer_self->{_logger_object};
23171 if ($logger_object) {
23172 $logger_object->complain(@_);
23176 sub write_logfile_entry {
23177 my $logger_object = $tokenizer_self->{_logger_object};
23178 if ($logger_object) {
23179 $logger_object->write_logfile_entry(@_);
23183 sub interrupt_logfile {
23184 my $logger_object = $tokenizer_self->{_logger_object};
23185 if ($logger_object) {
23186 $logger_object->interrupt_logfile();
23190 sub resume_logfile {
23191 my $logger_object = $tokenizer_self->{_logger_object};
23192 if ($logger_object) {
23193 $logger_object->resume_logfile();
23197 sub increment_brace_error {
23198 my $logger_object = $tokenizer_self->{_logger_object};
23199 if ($logger_object) {
23200 $logger_object->increment_brace_error();
23204 sub report_definite_bug {
23205 my $logger_object = $tokenizer_self->{_logger_object};
23206 if ($logger_object) {
23207 $logger_object->report_definite_bug();
23211 sub brace_warning {
23212 my $logger_object = $tokenizer_self->{_logger_object};
23213 if ($logger_object) {
23214 $logger_object->brace_warning(@_);
23218 sub get_saw_brace_error {
23219 my $logger_object = $tokenizer_self->{_logger_object};
23220 if ($logger_object) {
23221 $logger_object->get_saw_brace_error();
23228 # interface to Perl::Tidy::Diagnostics routines
23229 sub write_diagnostics {
23230 if ( $tokenizer_self->{_diagnostics_object} ) {
23231 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
23235 sub report_tokenization_errors {
23239 my $level = get_indentation_level();
23240 if ( $level != $tokenizer_self->{_starting_level} ) {
23241 warning("final indentation level: $level\n");
23244 check_final_nesting_depths();
23246 if ( $tokenizer_self->{_look_for_hash_bang}
23247 && !$tokenizer_self->{_saw_hash_bang} )
23250 "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
23253 if ( $tokenizer_self->{_in_format} ) {
23254 warning("hit EOF while in format description\n");
23257 if ( $tokenizer_self->{_in_pod} ) {
23259 # Just write log entry if this is after __END__ or __DATA__
23260 # because this happens to often, and it is not likely to be
23262 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
23263 write_logfile_entry(
23264 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
23270 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
23276 if ( $tokenizer_self->{_in_here_doc} ) {
23277 my $here_doc_target = $tokenizer_self->{_here_doc_target};
23278 my $started_looking_for_here_target_at =
23279 $tokenizer_self->{_started_looking_for_here_target_at};
23280 if ($here_doc_target) {
23282 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
23287 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
23290 my $nearly_matched_here_target_at =
23291 $tokenizer_self->{_nearly_matched_here_target_at};
23292 if ($nearly_matched_here_target_at) {
23294 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
23299 if ( $tokenizer_self->{_in_quote} ) {
23300 my $line_start_quote = $tokenizer_self->{_line_start_quote};
23301 my $quote_target = $tokenizer_self->{_quote_target};
23303 ( $tokenizer_self->{_in_attribute_list} )
23307 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
23311 unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
23312 if ( $] < 5.006 ) {
23313 write_logfile_entry("Suggest including '-w parameter'\n");
23316 write_logfile_entry("Suggest including 'use warnings;'\n");
23320 if ( $tokenizer_self->{_saw_perl_dash_P} ) {
23321 write_logfile_entry("Use of -P parameter for defines is discouraged\n");
23324 unless ( $tokenizer_self->{_saw_use_strict} ) {
23325 write_logfile_entry("Suggest including 'use strict;'\n");
23328 # it is suggested that labels have at least one upper case character
23329 # for legibility and to avoid code breakage as new keywords are introduced
23330 if ( $tokenizer_self->{_rlower_case_labels_at} ) {
23331 my @lower_case_labels_at =
23332 @{ $tokenizer_self->{_rlower_case_labels_at} };
23333 write_logfile_entry(
23334 "Suggest using upper case characters in label(s)\n");
23336 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
23340 sub report_v_string {
23342 # warn if this version can't handle v-strings
23344 unless ( $tokenizer_self->{_saw_v_string} ) {
23345 $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
23347 if ( $] < 5.006 ) {
23349 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
23354 sub get_input_line_number {
23355 return $tokenizer_self->{_last_line_number};
23358 # returns the next tokenized line
23363 # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
23364 # $square_bracket_depth, $paren_depth
23366 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
23367 $tokenizer_self->{_line_text} = $input_line;
23369 return undef unless ($input_line);
23371 my $input_line_number = ++$tokenizer_self->{_last_line_number};
23373 # Find and remove what characters terminate this line, including any
23375 my $input_line_separator = "";
23376 if ( chomp($input_line) ) { $input_line_separator = $/ }
23378 # TODO: what other characters should be included here?
23379 if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
23380 $input_line_separator = $2 . $input_line_separator;
23383 # for backwards compatibility we keep the line text terminated with
23384 # a newline character
23385 $input_line .= "\n";
23386 $tokenizer_self->{_line_text} = $input_line; # update
23388 # create a data structure describing this line which will be
23389 # returned to the caller.
23391 # _line_type codes are:
23392 # SYSTEM - system-specific code before hash-bang line
23393 # CODE - line of perl code (including comments)
23394 # POD_START - line starting pod, such as '=head'
23395 # POD - pod documentation text
23396 # POD_END - last line of pod section, '=cut'
23397 # HERE - text of here-document
23398 # HERE_END - last line of here-doc (target word)
23399 # FORMAT - format section
23400 # FORMAT_END - last line of format section, '.'
23401 # DATA_START - __DATA__ line
23402 # DATA - unidentified text following __DATA__
23403 # END_START - __END__ line
23404 # END - unidentified text following __END__
23405 # ERROR - we are in big trouble, probably not a perl script
23408 # _curly_brace_depth - depth of curly braces at start of line
23409 # _square_bracket_depth - depth of square brackets at start of line
23410 # _paren_depth - depth of parens at start of line
23411 # _starting_in_quote - this line continues a multi-line quote
23412 # (so don't trim leading blanks!)
23413 # _ending_in_quote - this line ends in a multi-line quote
23414 # (so don't trim trailing blanks!)
23415 my $line_of_tokens = {
23416 _line_type => 'EOF',
23417 _line_text => $input_line,
23418 _line_number => $input_line_number,
23419 _rtoken_type => undef,
23422 _rslevels => undef,
23423 _rblock_type => undef,
23424 _rcontainer_type => undef,
23425 _rcontainer_environment => undef,
23426 _rtype_sequence => undef,
23427 _rnesting_tokens => undef,
23428 _rci_levels => undef,
23429 _rnesting_blocks => undef,
23430 _guessed_indentation_level => 0,
23431 _starting_in_quote => 0, # to be set by subroutine
23432 _ending_in_quote => 0,
23433 _curly_brace_depth => $brace_depth,
23434 _square_bracket_depth => $square_bracket_depth,
23435 _paren_depth => $paren_depth,
23436 _quote_character => '',
23439 # must print line unchanged if we are in a here document
23440 if ( $tokenizer_self->{_in_here_doc} ) {
23442 $line_of_tokens->{_line_type} = 'HERE';
23443 my $here_doc_target = $tokenizer_self->{_here_doc_target};
23444 my $here_quote_character = $tokenizer_self->{_here_quote_character};
23445 my $candidate_target = $input_line;
23446 chomp $candidate_target;
23447 if ( $candidate_target eq $here_doc_target ) {
23448 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
23449 $line_of_tokens->{_line_type} = 'HERE_END';
23450 write_logfile_entry("Exiting HERE document $here_doc_target\n");
23452 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
23453 if (@$rhere_target_list) { # there can be multiple here targets
23454 ( $here_doc_target, $here_quote_character ) =
23455 @{ shift @$rhere_target_list };
23456 $tokenizer_self->{_here_doc_target} = $here_doc_target;
23457 $tokenizer_self->{_here_quote_character} =
23458 $here_quote_character;
23459 write_logfile_entry(
23460 "Entering HERE document $here_doc_target\n");
23461 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
23462 $tokenizer_self->{_started_looking_for_here_target_at} =
23463 $input_line_number;
23466 $tokenizer_self->{_in_here_doc} = 0;
23467 $tokenizer_self->{_here_doc_target} = "";
23468 $tokenizer_self->{_here_quote_character} = "";
23472 # check for error of extra whitespace
23473 # note for PERL6: leading whitespace is allowed
23475 $candidate_target =~ s/\s*$//;
23476 $candidate_target =~ s/^\s*//;
23477 if ( $candidate_target eq $here_doc_target ) {
23478 $tokenizer_self->{_nearly_matched_here_target_at} =
23479 $input_line_number;
23482 return $line_of_tokens;
23485 # must print line unchanged if we are in a format section
23486 elsif ( $tokenizer_self->{_in_format} ) {
23488 if ( $input_line =~ /^\.[\s#]*$/ ) {
23489 write_logfile_entry("Exiting format section\n");
23490 $tokenizer_self->{_in_format} = 0;
23491 $line_of_tokens->{_line_type} = 'FORMAT_END';
23494 $line_of_tokens->{_line_type} = 'FORMAT';
23496 return $line_of_tokens;
23499 # must print line unchanged if we are in pod documentation
23500 elsif ( $tokenizer_self->{_in_pod} ) {
23502 $line_of_tokens->{_line_type} = 'POD';
23503 if ( $input_line =~ /^=cut/ ) {
23504 $line_of_tokens->{_line_type} = 'POD_END';
23505 write_logfile_entry("Exiting POD section\n");
23506 $tokenizer_self->{_in_pod} = 0;
23508 if ( $input_line =~ /^\#\!.*perl\b/ ) {
23510 "Hash-bang in pod can cause older versions of perl to fail! \n"
23514 return $line_of_tokens;
23517 # must print line unchanged if we have seen a severe error (i.e., we
23518 # are seeing illegal tokens and cannot continue. Syntax errors do
23519 # not pass this route). Calling routine can decide what to do, but
23520 # the default can be to just pass all lines as if they were after __END__
23521 elsif ( $tokenizer_self->{_in_error} ) {
23522 $line_of_tokens->{_line_type} = 'ERROR';
23523 return $line_of_tokens;
23526 # print line unchanged if we are __DATA__ section
23527 elsif ( $tokenizer_self->{_in_data} ) {
23529 # ...but look for POD
23530 # Note that the _in_data and _in_end flags remain set
23531 # so that we return to that state after seeing the
23532 # end of a pod section
23533 if ( $input_line =~ /^=(?!cut)/ ) {
23534 $line_of_tokens->{_line_type} = 'POD_START';
23535 write_logfile_entry("Entering POD section\n");
23536 $tokenizer_self->{_in_pod} = 1;
23537 return $line_of_tokens;
23540 $line_of_tokens->{_line_type} = 'DATA';
23541 return $line_of_tokens;
23545 # print line unchanged if we are in __END__ section
23546 elsif ( $tokenizer_self->{_in_end} ) {
23548 # ...but look for POD
23549 # Note that the _in_data and _in_end flags remain set
23550 # so that we return to that state after seeing the
23551 # end of a pod section
23552 if ( $input_line =~ /^=(?!cut)/ ) {
23553 $line_of_tokens->{_line_type} = 'POD_START';
23554 write_logfile_entry("Entering POD section\n");
23555 $tokenizer_self->{_in_pod} = 1;
23556 return $line_of_tokens;
23559 $line_of_tokens->{_line_type} = 'END';
23560 return $line_of_tokens;
23564 # check for a hash-bang line if we haven't seen one
23565 if ( !$tokenizer_self->{_saw_hash_bang} ) {
23566 if ( $input_line =~ /^\#\!.*perl\b/ ) {
23567 $tokenizer_self->{_saw_hash_bang} = $input_line_number;
23569 # check for -w and -P flags
23570 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
23571 $tokenizer_self->{_saw_perl_dash_P} = 1;
23574 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
23575 $tokenizer_self->{_saw_perl_dash_w} = 1;
23578 if ( ( $input_line_number > 1 )
23579 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
23582 # this is helpful for VMS systems; we may have accidentally
23583 # tokenized some DCL commands
23584 if ( $tokenizer_self->{_started_tokenizing} ) {
23586 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
23590 complain("Useless hash-bang after line 1\n");
23594 # Report the leading hash-bang as a system line
23595 # This will prevent -dac from deleting it
23597 $line_of_tokens->{_line_type} = 'SYSTEM';
23598 return $line_of_tokens;
23603 # wait for a hash-bang before parsing if the user invoked us with -x
23604 if ( $tokenizer_self->{_look_for_hash_bang}
23605 && !$tokenizer_self->{_saw_hash_bang} )
23607 $line_of_tokens->{_line_type} = 'SYSTEM';
23608 return $line_of_tokens;
23611 # a first line of the form ': #' will be marked as SYSTEM
23612 # since lines of this form may be used by tcsh
23613 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
23614 $line_of_tokens->{_line_type} = 'SYSTEM';
23615 return $line_of_tokens;
23618 # now we know that it is ok to tokenize the line...
23619 # the line tokenizer will modify any of these private variables:
23620 # _rhere_target_list
23627 my $ending_in_quote_last = $tokenizer_self->{_in_quote};
23628 tokenize_this_line($line_of_tokens);
23630 # Now finish defining the return structure and return it
23631 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
23633 # handle severe error (binary data in script)
23634 if ( $tokenizer_self->{_in_error} ) {
23635 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages
23636 warning("Giving up after error\n");
23637 $line_of_tokens->{_line_type} = 'ERROR';
23638 reset_indentation_level(0); # avoid error messages
23639 return $line_of_tokens;
23642 # handle start of pod documentation
23643 if ( $tokenizer_self->{_in_pod} ) {
23645 # This gets tricky..above a __DATA__ or __END__ section, perl
23646 # accepts '=cut' as the start of pod section. But afterwards,
23647 # only pod utilities see it and they may ignore an =cut without
23648 # leading =head. In any case, this isn't good.
23649 if ( $input_line =~ /^=cut\b/ ) {
23650 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
23651 complain("=cut while not in pod ignored\n");
23652 $tokenizer_self->{_in_pod} = 0;
23653 $line_of_tokens->{_line_type} = 'POD_END';
23656 $line_of_tokens->{_line_type} = 'POD_START';
23658 "=cut starts a pod section .. this can fool pod utilities.\n"
23660 write_logfile_entry("Entering POD section\n");
23665 $line_of_tokens->{_line_type} = 'POD_START';
23666 write_logfile_entry("Entering POD section\n");
23669 return $line_of_tokens;
23672 # update indentation levels for log messages
23673 if ( $input_line !~ /^\s*$/ ) {
23674 my $rlevels = $line_of_tokens->{_rlevels};
23675 $line_of_tokens->{_guessed_indentation_level} =
23676 guess_old_indentation_level($input_line);
23679 # see if this line contains here doc targets
23680 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
23681 if (@$rhere_target_list) {
23683 my ( $here_doc_target, $here_quote_character ) =
23684 @{ shift @$rhere_target_list };
23685 $tokenizer_self->{_in_here_doc} = 1;
23686 $tokenizer_self->{_here_doc_target} = $here_doc_target;
23687 $tokenizer_self->{_here_quote_character} = $here_quote_character;
23688 write_logfile_entry("Entering HERE document $here_doc_target\n");
23689 $tokenizer_self->{_started_looking_for_here_target_at} =
23690 $input_line_number;
23693 # NOTE: __END__ and __DATA__ statements are written unformatted
23694 # because they can theoretically contain additional characters
23695 # which are not tokenized (and cannot be read with <DATA> either!).
23696 if ( $tokenizer_self->{_in_data} ) {
23697 $line_of_tokens->{_line_type} = 'DATA_START';
23698 write_logfile_entry("Starting __DATA__ section\n");
23699 $tokenizer_self->{_saw_data} = 1;
23701 # keep parsing after __DATA__ if use SelfLoader was seen
23702 if ( $tokenizer_self->{_saw_selfloader} ) {
23703 $tokenizer_self->{_in_data} = 0;
23704 write_logfile_entry(
23705 "SelfLoader seen, continuing; -nlsl deactivates\n");
23708 return $line_of_tokens;
23711 elsif ( $tokenizer_self->{_in_end} ) {
23712 $line_of_tokens->{_line_type} = 'END_START';
23713 write_logfile_entry("Starting __END__ section\n");
23714 $tokenizer_self->{_saw_end} = 1;
23716 # keep parsing after __END__ if use AutoLoader was seen
23717 if ( $tokenizer_self->{_saw_autoloader} ) {
23718 $tokenizer_self->{_in_end} = 0;
23719 write_logfile_entry(
23720 "AutoLoader seen, continuing; -nlal deactivates\n");
23722 return $line_of_tokens;
23725 # now, finally, we know that this line is type 'CODE'
23726 $line_of_tokens->{_line_type} = 'CODE';
23728 # remember if we have seen any real code
23729 if ( !$tokenizer_self->{_started_tokenizing}
23730 && $input_line !~ /^\s*$/
23731 && $input_line !~ /^\s*#/ )
23733 $tokenizer_self->{_started_tokenizing} = 1;
23736 if ( $tokenizer_self->{_debugger_object} ) {
23737 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
23740 # Note: if keyword 'format' occurs in this line code, it is still CODE
23741 # (keyword 'format' need not start a line)
23742 if ( $tokenizer_self->{_in_format} ) {
23743 write_logfile_entry("Entering format section\n");
23746 if ( $tokenizer_self->{_in_quote}
23747 and ( $tokenizer_self->{_line_start_quote} < 0 ) )
23750 #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
23752 ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
23754 $tokenizer_self->{_line_start_quote} = $input_line_number;
23755 write_logfile_entry(
23756 "Start multi-line quote or pattern ending in $quote_target\n");
23759 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
23760 and !$tokenizer_self->{_in_quote} )
23762 $tokenizer_self->{_line_start_quote} = -1;
23763 write_logfile_entry("End of multi-line quote or pattern\n");
23766 # we are returning a line of CODE
23767 return $line_of_tokens;
23770 sub find_starting_indentation_level {
23772 # We need to find the indentation level of the first line of the
23773 # script being formatted. Often it will be zero for an entire file,
23774 # but if we are formatting a local block of code (within an editor for
23775 # example) it may not be zero. The user may specify this with the
23776 # -sil=n parameter but normally doesn't so we have to guess.
23778 # USES GLOBAL VARIABLES: $tokenizer_self
23779 my $starting_level = 0;
23781 # use value if given as parameter
23782 if ( $tokenizer_self->{_know_starting_level} ) {
23783 $starting_level = $tokenizer_self->{_starting_level};
23786 # if we know there is a hash_bang line, the level must be zero
23787 elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
23788 $tokenizer_self->{_know_starting_level} = 1;
23791 # otherwise figure it out from the input file
23796 # keep looking at lines until we find a hash bang or piece of code
23799 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
23802 # if first line is #! then assume starting level is zero
23803 if ( $i == 1 && $line =~ /^\#\!/ ) {
23804 $starting_level = 0;
23807 next if ( $line =~ /^\s*#/ ); # skip past comments
23808 next if ( $line =~ /^\s*$/ ); # skip past blank lines
23809 $starting_level = guess_old_indentation_level($line);
23812 $msg = "Line $i implies starting-indentation-level = $starting_level\n";
23813 write_logfile_entry("$msg");
23815 $tokenizer_self->{_starting_level} = $starting_level;
23816 reset_indentation_level($starting_level);
23819 sub guess_old_indentation_level {
23822 # Guess the indentation level of an input line.
23824 # For the first line of code this result will define the starting
23825 # indentation level. It will mainly be non-zero when perltidy is applied
23826 # within an editor to a local block of code.
23828 # This is an impossible task in general because we can't know what tabs
23829 # meant for the old script and how many spaces were used for one
23830 # indentation level in the given input script. For example it may have
23831 # been previously formatted with -i=7 -et=3. But we can at least try to
23832 # make sure that perltidy guesses correctly if it is applied repeatedly to
23833 # a block of code within an editor, so that the block stays at the same
23834 # level when perltidy is applied repeatedly.
23836 # USES GLOBAL VARIABLES: $tokenizer_self
23839 # find leading tabs, spaces, and any statement label
23841 if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
23843 # If there are leading tabs, we use the tab scheme for this run, if
23844 # any, so that the code will remain stable when editing.
23845 if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} }
23847 if ($2) { $spaces += length($2) }
23849 # correct for outdented labels
23850 if ( $3 && $tokenizer_self->{'_outdent_labels'} ) {
23851 $spaces += $tokenizer_self->{_continuation_indentation};
23855 # compute indentation using the value of -i for this run.
23856 # If -i=0 is used for this run (which is possible) it doesn't matter
23857 # what we do here but we'll guess that the old run used 4 spaces per level.
23858 my $indent_columns = $tokenizer_self->{_indent_columns};
23859 $indent_columns = 4 if ( !$indent_columns );
23860 $level = int( $spaces / $indent_columns );
23864 # This is a currently unused debug routine
23865 sub dump_functions {
23869 foreach $pkg ( keys %is_user_function ) {
23870 print $fh "\nnon-constant subs in package $pkg\n";
23872 foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
23874 if ( $is_block_list_function{$pkg}{$sub} ) {
23875 $msg = 'block_list';
23878 if ( $is_block_function{$pkg}{$sub} ) {
23881 print $fh "$sub $msg\n";
23885 foreach $pkg ( keys %is_constant ) {
23886 print $fh "\nconstants and constant subs in package $pkg\n";
23888 foreach $sub ( keys %{ $is_constant{$pkg} } ) {
23889 print $fh "$sub\n";
23896 # count number of 1's in a string of 1's and 0's
23897 # example: ones_count("010101010101") gives 6
23898 return ( my $cis = $_[0] ) =~ tr/1/0/;
23901 sub prepare_for_a_new_file {
23903 # previous tokens needed to determine what to expect next
23904 $last_nonblank_token = ';'; # the only possible starting state which
23905 $last_nonblank_type = ';'; # will make a leading brace a code block
23906 $last_nonblank_block_type = '';
23908 # scalars for remembering statement types across multiple lines
23909 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
23910 $in_attribute_list = 0;
23912 # scalars for remembering where we are in the file
23913 $current_package = "main";
23914 $context = UNKNOWN_CONTEXT;
23916 # hashes used to remember function information
23917 %is_constant = (); # user-defined constants
23918 %is_user_function = (); # user-defined functions
23919 %user_function_prototype = (); # their prototypes
23920 %is_block_function = ();
23921 %is_block_list_function = ();
23922 %saw_function_definition = ();
23924 # variables used to track depths of various containers
23925 # and report nesting errors
23928 $square_bracket_depth = 0;
23929 @current_depth[ 0 .. $#closing_brace_names ] =
23930 (0) x scalar @closing_brace_names;
23933 @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
23934 ( 0 .. $#closing_brace_names );
23935 @current_sequence_number = ();
23936 $paren_type[$paren_depth] = '';
23937 $paren_semicolon_count[$paren_depth] = 0;
23938 $paren_structural_type[$brace_depth] = '';
23939 $brace_type[$brace_depth] = ';'; # identify opening brace as code block
23940 $brace_structural_type[$brace_depth] = '';
23941 $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
23942 $brace_package[$paren_depth] = $current_package;
23943 $square_bracket_type[$square_bracket_depth] = '';
23944 $square_bracket_structural_type[$square_bracket_depth] = '';
23946 initialize_tokenizer_state();
23949 { # begin tokenize_this_line
23951 use constant BRACE => 0;
23952 use constant SQUARE_BRACKET => 1;
23953 use constant PAREN => 2;
23954 use constant QUESTION_COLON => 3;
23956 # TV1: scalars for processing one LINE.
23957 # Re-initialized on each entry to sub tokenize_this_line.
23959 $block_type, $container_type, $expecting,
23960 $i, $i_tok, $input_line,
23961 $input_line_number, $last_nonblank_i, $max_token_index,
23962 $next_tok, $next_type, $peeked_ahead,
23963 $prototype, $rhere_target_list, $rtoken_map,
23964 $rtoken_type, $rtokens, $tok,
23965 $type, $type_sequence, $indent_flag,
23968 # TV2: refs to ARRAYS for processing one LINE
23969 # Re-initialized on each call.
23970 my $routput_token_list = []; # stack of output token indexes
23971 my $routput_token_type = []; # token types
23972 my $routput_block_type = []; # types of code block
23973 my $routput_container_type = []; # paren types, such as if, elsif, ..
23974 my $routput_type_sequence = []; # nesting sequential number
23975 my $routput_indent_flag = []; #
23977 # TV3: SCALARS for quote variables. These are initialized with a
23978 # subroutine call and continually updated as lines are processed.
23979 my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
23980 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
23982 # TV4: SCALARS for multi-line identifiers and
23983 # statements. These are initialized with a subroutine call
23984 # and continually updated as lines are processed.
23985 my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
23987 # TV5: SCALARS for tracking indentation level.
23988 # Initialized once and continually updated as lines are
23991 $nesting_token_string, $nesting_type_string,
23992 $nesting_block_string, $nesting_block_flag,
23993 $nesting_list_string, $nesting_list_flag,
23994 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
23995 $in_statement_continuation, $level_in_tokenizer,
23996 $slevel_in_tokenizer, $rslevel_stack,
23999 # TV6: SCALARS for remembering several previous
24000 # tokens. Initialized once and continually updated as
24001 # lines are processed.
24003 $last_nonblank_container_type, $last_nonblank_type_sequence,
24004 $last_last_nonblank_token, $last_last_nonblank_type,
24005 $last_last_nonblank_block_type, $last_last_nonblank_container_type,
24006 $last_last_nonblank_type_sequence, $last_nonblank_prototype,
24009 # ----------------------------------------------------------------
24010 # beginning of tokenizer variable access and manipulation routines
24011 # ----------------------------------------------------------------
24013 sub initialize_tokenizer_state {
24015 # TV1: initialized on each call
24016 # TV2: initialized on each call
24020 $quote_character = "";
24023 $quoted_string_1 = "";
24024 $quoted_string_2 = "";
24025 $allowed_quote_modifiers = "";
24028 $id_scan_state = '';
24031 $indented_if_level = 0;
24034 $nesting_token_string = "";
24035 $nesting_type_string = "";
24036 $nesting_block_string = '1'; # initially in a block
24037 $nesting_block_flag = 1;
24038 $nesting_list_string = '0'; # initially not in a list
24039 $nesting_list_flag = 0; # initially not in a list
24040 $ci_string_in_tokenizer = "";
24041 $continuation_string_in_tokenizer = "0";
24042 $in_statement_continuation = 0;
24043 $level_in_tokenizer = 0;
24044 $slevel_in_tokenizer = 0;
24045 $rslevel_stack = [];
24048 $last_nonblank_container_type = '';
24049 $last_nonblank_type_sequence = '';
24050 $last_last_nonblank_token = ';';
24051 $last_last_nonblank_type = ';';
24052 $last_last_nonblank_block_type = '';
24053 $last_last_nonblank_container_type = '';
24054 $last_last_nonblank_type_sequence = '';
24055 $last_nonblank_prototype = "";
24058 sub save_tokenizer_state {
24061 $block_type, $container_type, $expecting,
24062 $i, $i_tok, $input_line,
24063 $input_line_number, $last_nonblank_i, $max_token_index,
24064 $next_tok, $next_type, $peeked_ahead,
24065 $prototype, $rhere_target_list, $rtoken_map,
24066 $rtoken_type, $rtokens, $tok,
24067 $type, $type_sequence, $indent_flag,
24071 $routput_token_list, $routput_token_type,
24072 $routput_block_type, $routput_container_type,
24073 $routput_type_sequence, $routput_indent_flag,
24077 $in_quote, $quote_type,
24078 $quote_character, $quote_pos,
24079 $quote_depth, $quoted_string_1,
24080 $quoted_string_2, $allowed_quote_modifiers,
24084 [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
24087 $nesting_token_string, $nesting_type_string,
24088 $nesting_block_string, $nesting_block_flag,
24089 $nesting_list_string, $nesting_list_flag,
24090 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
24091 $in_statement_continuation, $level_in_tokenizer,
24092 $slevel_in_tokenizer, $rslevel_stack,
24096 $last_nonblank_container_type,
24097 $last_nonblank_type_sequence,
24098 $last_last_nonblank_token,
24099 $last_last_nonblank_type,
24100 $last_last_nonblank_block_type,
24101 $last_last_nonblank_container_type,
24102 $last_last_nonblank_type_sequence,
24103 $last_nonblank_prototype,
24105 return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
24108 sub restore_tokenizer_state {
24110 my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
24112 $block_type, $container_type, $expecting,
24113 $i, $i_tok, $input_line,
24114 $input_line_number, $last_nonblank_i, $max_token_index,
24115 $next_tok, $next_type, $peeked_ahead,
24116 $prototype, $rhere_target_list, $rtoken_map,
24117 $rtoken_type, $rtokens, $tok,
24118 $type, $type_sequence, $indent_flag,
24122 $routput_token_list, $routput_token_type,
24123 $routput_block_type, $routput_container_type,
24124 $routput_type_sequence, $routput_type_sequence,
24128 $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
24129 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
24132 ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
24136 $nesting_token_string, $nesting_type_string,
24137 $nesting_block_string, $nesting_block_flag,
24138 $nesting_list_string, $nesting_list_flag,
24139 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
24140 $in_statement_continuation, $level_in_tokenizer,
24141 $slevel_in_tokenizer, $rslevel_stack,
24145 $last_nonblank_container_type,
24146 $last_nonblank_type_sequence,
24147 $last_last_nonblank_token,
24148 $last_last_nonblank_type,
24149 $last_last_nonblank_block_type,
24150 $last_last_nonblank_container_type,
24151 $last_last_nonblank_type_sequence,
24152 $last_nonblank_prototype,
24156 sub get_indentation_level {
24158 # patch to avoid reporting error if indented if is not terminated
24159 if ($indented_if_level) { return $level_in_tokenizer - 1 }
24160 return $level_in_tokenizer;
24163 sub reset_indentation_level {
24164 $level_in_tokenizer = $_[0];
24165 $slevel_in_tokenizer = $_[0];
24166 push @{$rslevel_stack}, $slevel_in_tokenizer;
24170 $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
24173 # ------------------------------------------------------------
24174 # end of tokenizer variable access and manipulation routines
24175 # ------------------------------------------------------------
24177 # ------------------------------------------------------------
24178 # beginning of various scanner interface routines
24179 # ------------------------------------------------------------
24180 sub scan_replacement_text {
24182 # check for here-docs in replacement text invoked by
24183 # a substitution operator with executable modifier 'e'.
24186 # $replacement_text
24188 # $rht = reference to any here-doc targets
24189 my ($replacement_text) = @_;
24192 return undef unless ( $replacement_text =~ /<</ );
24194 write_logfile_entry("scanning replacement text for here-doc targets\n");
24196 # save the logger object for error messages
24197 my $logger_object = $tokenizer_self->{_logger_object};
24199 # localize all package variables
24201 $tokenizer_self, $last_nonblank_token,
24202 $last_nonblank_type, $last_nonblank_block_type,
24203 $statement_type, $in_attribute_list,
24204 $current_package, $context,
24205 %is_constant, %is_user_function,
24206 %user_function_prototype, %is_block_function,
24207 %is_block_list_function, %saw_function_definition,
24208 $brace_depth, $paren_depth,
24209 $square_bracket_depth, @current_depth,
24210 @total_depth, $total_depth,
24211 @nesting_sequence_number, @current_sequence_number,
24212 @paren_type, @paren_semicolon_count,
24213 @paren_structural_type, @brace_type,
24214 @brace_structural_type, @brace_context,
24215 @brace_package, @square_bracket_type,
24216 @square_bracket_structural_type, @depth_array,
24217 @starting_line_of_current_depth, @nested_ternary_flag,
24218 @nested_statement_type,
24221 # save all lexical variables
24222 my $rstate = save_tokenizer_state();
24223 _decrement_count(); # avoid error check for multiple tokenizers
24225 # make a new tokenizer
24227 my $rpending_logfile_message;
24228 my $source_object =
24229 Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
24230 $rpending_logfile_message );
24231 my $tokenizer = Perl::Tidy::Tokenizer->new(
24232 source_object => $source_object,
24233 logger_object => $logger_object,
24234 starting_line_number => $input_line_number,
24237 # scan the replacement text
24238 1 while ( $tokenizer->get_line() );
24240 # remove any here doc targets
24242 if ( $tokenizer_self->{_in_here_doc} ) {
24246 $tokenizer_self->{_here_doc_target},
24247 $tokenizer_self->{_here_quote_character}
24249 if ( $tokenizer_self->{_rhere_target_list} ) {
24250 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
24251 $tokenizer_self->{_rhere_target_list} = undef;
24253 $tokenizer_self->{_in_here_doc} = undef;
24256 # now its safe to report errors
24257 $tokenizer->report_tokenization_errors();
24259 # restore all tokenizer lexical variables
24260 restore_tokenizer_state($rstate);
24262 # return the here doc targets
24266 sub scan_bare_identifier {
24267 ( $i, $tok, $type, $prototype ) =
24268 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
24269 $rtoken_map, $max_token_index );
24272 sub scan_identifier {
24273 ( $i, $tok, $type, $id_scan_state, $identifier ) =
24274 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
24275 $max_token_index, $expecting, $paren_type[$paren_depth] );
24279 ( $i, $tok, $type, $id_scan_state ) =
24280 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
24281 $id_scan_state, $max_token_index );
24286 ( $i, $type, $number ) =
24287 scan_number_do( $input_line, $i, $rtoken_map, $type,
24288 $max_token_index );
24292 # a sub to warn if token found where term expected
24293 sub error_if_expecting_TERM {
24294 if ( $expecting == TERM ) {
24295 if ( $really_want_term{$last_nonblank_type} ) {
24296 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
24297 $rtoken_type, $input_line );
24303 # a sub to warn if token found where operator expected
24304 sub error_if_expecting_OPERATOR {
24305 if ( $expecting == OPERATOR ) {
24306 my $thing = defined $_[0] ? $_[0] : $tok;
24307 unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
24308 $rtoken_map, $rtoken_type, $input_line );
24309 if ( $i_tok == 0 ) {
24310 interrupt_logfile();
24311 warning("Missing ';' above?\n");
24318 # ------------------------------------------------------------
24319 # end scanner interfaces
24320 # ------------------------------------------------------------
24322 my %is_for_foreach;
24323 @_ = qw(for foreach);
24324 @is_for_foreach{@_} = (1) x scalar(@_);
24328 @is_my_our{@_} = (1) x scalar(@_);
24330 # These keywords may introduce blocks after parenthesized expressions,
24332 # keyword ( .... ) { BLOCK }
24333 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
24334 my %is_blocktype_with_paren;
24336 qw(if elsif unless while until for foreach switch case given when catch);
24337 @is_blocktype_with_paren{@_} = (1) x scalar(@_);
24339 # ------------------------------------------------------------
24340 # begin hash of code for handling most token types
24341 # ------------------------------------------------------------
24342 my $tokenization_code = {
24344 # no special code for these types yet, but syntax checks
24379 error_if_expecting_TERM()
24380 if ( $expecting == TERM );
24383 error_if_expecting_TERM()
24384 if ( $expecting == TERM );
24388 # start looking for a scalar
24389 error_if_expecting_OPERATOR("Scalar")
24390 if ( $expecting == OPERATOR );
24393 if ( $identifier eq '$^W' ) {
24394 $tokenizer_self->{_saw_perl_dash_w} = 1;
24397 # Check for identifier in indirect object slot
24398 # (vorboard.pl, sort.t). Something like:
24399 # /^(print|printf|sort|exec|system)$/
24401 $is_indirect_object_taker{$last_nonblank_token}
24403 || ( ( $last_nonblank_token eq '(' )
24404 && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
24405 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
24414 $paren_semicolon_count[$paren_depth] = 0;
24416 $container_type = $want_paren;
24419 elsif ( $statement_type =~ /^sub\b/ ) {
24420 $container_type = $statement_type;
24423 $container_type = $last_nonblank_token;
24425 # We can check for a syntax error here of unexpected '(',
24426 # but this is going to get messy...
24428 $expecting == OPERATOR
24430 # be sure this is not a method call of the form
24431 # &method(...), $method->(..), &{method}(...),
24432 # $ref[2](list) is ok & short for $ref[2]->(list)
24433 # NOTE: at present, braces in something like &{ xxx }
24434 # are not marked as a block, we might have a method call
24435 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
24440 # ref: camel 3 p 703.
24441 if ( $last_last_nonblank_token eq 'do' ) {
24443 "do SUBROUTINE is deprecated; consider & or -> notation\n"
24448 # if this is an empty list, (), then it is not an
24449 # error; for example, we might have a constant pi and
24450 # invoke it with pi() or just pi;
24451 my ( $next_nonblank_token, $i_next ) =
24452 find_next_nonblank_token( $i, $rtokens,
24453 $max_token_index );
24454 if ( $next_nonblank_token ne ')' ) {
24456 error_if_expecting_OPERATOR('(');
24458 if ( $last_nonblank_type eq 'C' ) {
24460 "$last_nonblank_token has a void prototype\n";
24462 elsif ( $last_nonblank_type eq 'i' ) {
24464 && $last_nonblank_token =~ /^\$/ )
24467 "Do you mean '$last_nonblank_token->(' ?\n";
24471 interrupt_logfile();
24475 } ## end if ( $next_nonblank_token...
24476 } ## end else [ if ( $last_last_nonblank_token...
24477 } ## end if ( $expecting == OPERATOR...
24479 $paren_type[$paren_depth] = $container_type;
24480 ( $type_sequence, $indent_flag ) =
24481 increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
24483 # propagate types down through nested parens
24484 # for example: the second paren in 'if ((' would be structural
24485 # since the first is.
24487 if ( $last_nonblank_token eq '(' ) {
24488 $type = $last_nonblank_type;
24491 # We exclude parens as structural after a ',' because it
24492 # causes subtle problems with continuation indentation for
24493 # something like this, where the first 'or' will not get
24498 # ( not defined $check )
24500 # or $check eq "new"
24501 # or $check eq "old",
24504 # Likewise, we exclude parens where a statement can start
24505 # because of problems with continuation indentation, like
24508 # ($firstline =~ /^#\!.*perl/)
24509 # and (print $File::Find::name, "\n")
24512 # (ref($usage_fref) =~ /CODE/)
24514 # : (&blast_usage, &blast_params, &blast_general_params);
24520 if ( $last_nonblank_type eq ')' ) {
24522 "Syntax error? found token '$last_nonblank_type' then '('\n"
24525 $paren_structural_type[$paren_depth] = $type;
24529 ( $type_sequence, $indent_flag ) =
24530 decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
24532 if ( $paren_structural_type[$paren_depth] eq '{' ) {
24536 $container_type = $paren_type[$paren_depth];
24538 # restore statement type as 'sub' at closing paren of a signature
24539 # so that a subsequent ':' is identified as an attribute
24540 if ( $container_type =~ /^sub\b/ ) {
24541 $statement_type = $container_type;
24544 # /^(for|foreach)$/
24545 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
24546 my $num_sc = $paren_semicolon_count[$paren_depth];
24547 if ( $num_sc > 0 && $num_sc != 2 ) {
24548 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
24552 if ( $paren_depth > 0 ) { $paren_depth-- }
24555 if ( $last_nonblank_type eq ',' ) {
24556 complain("Repeated ','s \n");
24559 # patch for operator_expected: note if we are in the list (use.t)
24560 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
24561 ## FIXME: need to move this elsewhere, perhaps check after a '('
24562 ## elsif ($last_nonblank_token eq '(') {
24563 ## warning("Leading ','s illegal in some versions of perl\n");
24567 $context = UNKNOWN_CONTEXT;
24568 $statement_type = '';
24571 # /^(for|foreach)$/
24572 if ( $is_for_foreach{ $paren_type[$paren_depth] } )
24573 { # mark ; in for loop
24575 # Be careful: we do not want a semicolon such as the
24576 # following to be included:
24578 # for (sort {strcoll($a,$b);} keys %investments) {
24580 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
24581 && $square_bracket_depth ==
24582 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
24586 $paren_semicolon_count[$paren_depth]++;
24592 error_if_expecting_OPERATOR("String")
24593 if ( $expecting == OPERATOR );
24596 $allowed_quote_modifiers = "";
24599 error_if_expecting_OPERATOR("String")
24600 if ( $expecting == OPERATOR );
24603 $allowed_quote_modifiers = "";
24606 error_if_expecting_OPERATOR("String")
24607 if ( $expecting == OPERATOR );
24610 $allowed_quote_modifiers = "";
24615 if ( $expecting == UNKNOWN ) { # indeterminate, must guess..
24617 ( $is_pattern, $msg ) =
24618 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
24619 $max_token_index );
24622 write_diagnostics("DIVIDE:$msg\n");
24623 write_logfile_entry($msg);
24626 else { $is_pattern = ( $expecting == TERM ) }
24631 $allowed_quote_modifiers = '[msixpodualngc]';
24633 else { # not a pattern; check for a /= token
24635 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=
24641 #DEBUG - collecting info on what tokens follow a divide
24642 # for development of guessing algorithm
24643 #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
24644 # #write_diagnostics( "DIVIDE? $input_line\n" );
24650 # if we just saw a ')', we will label this block with
24651 # its type. We need to do this to allow sub
24652 # code_block_type to determine if this brace starts a
24653 # code block or anonymous hash. (The type of a paren
24654 # pair is the preceding token, such as 'if', 'else',
24656 $container_type = "";
24658 # ATTRS: for a '{' following an attribute list, reset
24659 # things to look like we just saw the sub name
24660 if ( $statement_type =~ /^sub/ ) {
24661 $last_nonblank_token = $statement_type;
24662 $last_nonblank_type = 'i';
24663 $statement_type = "";
24666 # patch for SWITCH/CASE: hide these keywords from an immediately
24667 # following opening brace
24668 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
24669 && $statement_type eq $last_nonblank_token )
24671 $last_nonblank_token = ";";
24674 elsif ( $last_nonblank_token eq ')' ) {
24675 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
24677 # defensive move in case of a nesting error (pbug.t)
24678 # in which this ')' had no previous '('
24679 # this nesting error will have been caught
24680 if ( !defined($last_nonblank_token) ) {
24681 $last_nonblank_token = 'if';
24684 # check for syntax error here;
24685 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
24686 if ( $tokenizer_self->{'_extended_syntax'} ) {
24688 # we append a trailing () to mark this as an unknown
24689 # block type. This allows perltidy to format some
24690 # common extensions of perl syntax.
24691 # This is used by sub code_block_type
24692 $last_nonblank_token .= '()';
24696 join( ' ', sort keys %is_blocktype_with_paren );
24698 "syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
24704 # patch for paren-less for/foreach glitch, part 2.
24705 # see note below under 'qw'
24706 elsif ($last_nonblank_token eq 'qw'
24707 && $is_for_foreach{$want_paren} )
24709 $last_nonblank_token = $want_paren;
24710 if ( $last_last_nonblank_token eq $want_paren ) {
24712 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
24719 # now identify which of the three possible types of
24720 # curly braces we have: hash index container, anonymous
24721 # hash reference, or code block.
24723 # non-structural (hash index) curly brace pair
24724 # get marked 'L' and 'R'
24725 if ( is_non_structural_brace() ) {
24728 # patch for SWITCH/CASE:
24729 # allow paren-less identifier after 'when'
24730 # if the brace is preceded by a space
24731 if ( $statement_type eq 'when'
24732 && $last_nonblank_type eq 'i'
24733 && $last_last_nonblank_type eq 'k'
24734 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
24737 $block_type = $statement_type;
24741 # code and anonymous hash have the same type, '{', but are
24742 # distinguished by 'block_type',
24743 # which will be blank for an anonymous hash
24746 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
24747 $max_token_index );
24749 # patch to promote bareword type to function taking block
24751 && $last_nonblank_type eq 'w'
24752 && $last_nonblank_i >= 0 )
24754 if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
24755 $routput_token_type->[$last_nonblank_i] = 'G';
24759 # patch for SWITCH/CASE: if we find a stray opening block brace
24760 # where we might accept a 'case' or 'when' block, then take it
24761 if ( $statement_type eq 'case'
24762 || $statement_type eq 'when' )
24764 if ( !$block_type || $block_type eq '}' ) {
24765 $block_type = $statement_type;
24770 $brace_type[ ++$brace_depth ] = $block_type;
24771 $brace_package[$brace_depth] = $current_package;
24772 $brace_structural_type[$brace_depth] = $type;
24773 $brace_context[$brace_depth] = $context;
24774 ( $type_sequence, $indent_flag ) =
24775 increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
24778 $block_type = $brace_type[$brace_depth];
24779 if ($block_type) { $statement_type = '' }
24780 if ( defined( $brace_package[$brace_depth] ) ) {
24781 $current_package = $brace_package[$brace_depth];
24784 # can happen on brace error (caught elsewhere)
24787 ( $type_sequence, $indent_flag ) =
24788 decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
24790 if ( $brace_structural_type[$brace_depth] eq 'L' ) {
24794 # propagate type information for 'do' and 'eval' blocks, and also
24795 # for smartmatch operator. This is necessary to enable us to know
24796 # if an operator or term is expected next.
24797 if ( $is_block_operator{$block_type} ) {
24798 $tok = $block_type;
24801 $context = $brace_context[$brace_depth];
24802 if ( $brace_depth > 0 ) { $brace_depth--; }
24804 '&' => sub { # maybe sub call? start looking
24806 # We have to check for sub call unless we are sure we
24807 # are expecting an operator. This example from s2p
24808 # got mistaken as a q operator in an early version:
24809 # print BODY &q(<<'EOT');
24810 if ( $expecting != OPERATOR ) {
24812 # But only look for a sub call if we are expecting a term or
24813 # if there is no existing space after the &.
24814 # For example we probably don't want & as sub call here:
24815 # Fcntl::S_IRUSR & $mode;
24816 if ( $expecting == TERM || $next_type ne 'b' ) {
24823 '<' => sub { # angle operator or less than?
24825 if ( $expecting != OPERATOR ) {
24827 find_angle_operator_termination( $input_line, $i, $rtoken_map,
24828 $expecting, $max_token_index );
24830 if ( $type eq '<' && $expecting == TERM ) {
24831 error_if_expecting_TERM();
24832 interrupt_logfile();
24833 warning("Unterminated <> operator?\n");
24840 '?' => sub { # ?: conditional or starting pattern?
24844 if ( $expecting == UNKNOWN ) {
24847 ( $is_pattern, $msg ) =
24848 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
24849 $max_token_index );
24851 if ($msg) { write_logfile_entry($msg) }
24853 else { $is_pattern = ( $expecting == TERM ) }
24858 $allowed_quote_modifiers = '[msixpodualngc]';
24861 ( $type_sequence, $indent_flag ) =
24862 increase_nesting_depth( QUESTION_COLON,
24863 $$rtoken_map[$i_tok] );
24866 '*' => sub { # typeglob, or multiply?
24868 if ( $expecting == TERM ) {
24873 if ( $$rtokens[ $i + 1 ] eq '=' ) {
24878 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
24882 if ( $$rtokens[ $i + 1 ] eq '=' ) {
24890 '.' => sub { # what kind of . ?
24892 if ( $expecting != OPERATOR ) {
24894 if ( $type eq '.' ) {
24895 error_if_expecting_TERM()
24896 if ( $expecting == TERM );
24904 # if this is the first nonblank character, call it a label
24905 # since perl seems to just swallow it
24906 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
24910 # ATTRS: check for a ':' which introduces an attribute list
24911 # (this might eventually get its own token type)
24912 elsif ( $statement_type =~ /^sub\b/ ) {
24914 $in_attribute_list = 1;
24917 # check for scalar attribute, such as
24918 # my $foo : shared = 1;
24919 elsif ($is_my_our{$statement_type}
24920 && $current_depth[QUESTION_COLON] == 0 )
24923 $in_attribute_list = 1;
24926 # otherwise, it should be part of a ?/: operator
24928 ( $type_sequence, $indent_flag ) =
24929 decrease_nesting_depth( QUESTION_COLON,
24930 $$rtoken_map[$i_tok] );
24931 if ( $last_nonblank_token eq '?' ) {
24932 warning("Syntax error near ? :\n");
24936 '+' => sub { # what kind of plus?
24938 if ( $expecting == TERM ) {
24939 my $number = scan_number();
24941 # unary plus is safest assumption if not a number
24942 if ( !defined($number) ) { $type = 'p'; }
24944 elsif ( $expecting == OPERATOR ) {
24947 if ( $next_type eq 'w' ) { $type = 'p' }
24952 error_if_expecting_OPERATOR("Array")
24953 if ( $expecting == OPERATOR );
24956 '%' => sub { # hash or modulo?
24958 # first guess is hash if no following blank
24959 if ( $expecting == UNKNOWN ) {
24960 if ( $next_type ne 'b' ) { $expecting = TERM }
24962 if ( $expecting == TERM ) {
24967 $square_bracket_type[ ++$square_bracket_depth ] =
24968 $last_nonblank_token;
24969 ( $type_sequence, $indent_flag ) =
24970 increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
24972 # It may seem odd, but structural square brackets have
24973 # type '{' and '}'. This simplifies the indentation logic.
24974 if ( !is_non_structural_brace() ) {
24977 $square_bracket_structural_type[$square_bracket_depth] = $type;
24980 ( $type_sequence, $indent_flag ) =
24981 decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
24983 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
24988 # propagate type information for smartmatch operator. This is
24989 # necessary to enable us to know if an operator or term is expected
24991 if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
24992 $tok = $square_bracket_type[$square_bracket_depth];
24995 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
24997 '-' => sub { # what kind of minus?
24999 if ( ( $expecting != OPERATOR )
25000 && $is_file_test_operator{$next_tok} )
25002 my ( $next_nonblank_token, $i_next ) =
25003 find_next_nonblank_token( $i + 1, $rtokens,
25004 $max_token_index );
25006 # check for a quoted word like "-w=>xx";
25007 # it is sufficient to just check for a following '='
25008 if ( $next_nonblank_token eq '=' ) {
25017 elsif ( $expecting == TERM ) {
25018 my $number = scan_number();
25020 # maybe part of bareword token? unary is safest
25021 if ( !defined($number) ) { $type = 'm'; }
25024 elsif ( $expecting == OPERATOR ) {
25028 if ( $next_type eq 'w' ) {
25036 # check for special variables like ${^WARNING_BITS}
25037 if ( $expecting == TERM ) {
25039 # FIXME: this should work but will not catch errors
25040 # because we also have to be sure that previous token is
25041 # a type character ($,@,%).
25042 if ( $last_nonblank_token eq '{'
25043 && ( $next_tok =~ /^[A-Za-z_]/ ) )
25046 if ( $next_tok eq 'W' ) {
25047 $tokenizer_self->{_saw_perl_dash_w} = 1;
25049 $tok = $tok . $next_tok;
25055 unless ( error_if_expecting_TERM() ) {
25057 # Something like this is valid but strange:
25059 complain("The '^' seems unusual here\n");
25065 '::' => sub { # probably a sub call
25066 scan_bare_identifier();
25068 '<<' => sub { # maybe a here-doc?
25070 unless ( $i < $max_token_index )
25071 ; # here-doc not possible if end of line
25073 if ( $expecting != OPERATOR ) {
25074 my ( $found_target, $here_doc_target, $here_quote_character,
25077 $found_target, $here_doc_target, $here_quote_character, $i,
25080 = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
25081 $max_token_index );
25083 if ($found_target) {
25084 push @{$rhere_target_list},
25085 [ $here_doc_target, $here_quote_character ];
25087 if ( length($here_doc_target) > 80 ) {
25088 my $truncated = substr( $here_doc_target, 0, 80 );
25089 complain("Long here-target: '$truncated' ...\n");
25091 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
25093 "Unconventional here-target: '$here_doc_target'\n"
25097 elsif ( $expecting == TERM ) {
25098 unless ($saw_error) {
25100 # shouldn't happen..
25101 warning("Program bug; didn't find here doc target\n");
25102 report_definite_bug();
25111 # if -> points to a bare word, we must scan for an identifier,
25112 # otherwise something like ->y would look like the y operator
25116 # type = 'pp' for pre-increment, '++' for post-increment
25118 if ( $expecting == TERM ) { $type = 'pp' }
25119 elsif ( $expecting == UNKNOWN ) {
25120 my ( $next_nonblank_token, $i_next ) =
25121 find_next_nonblank_token( $i, $rtokens, $max_token_index );
25122 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
25127 if ( $last_nonblank_type eq $tok ) {
25128 complain("Repeated '=>'s \n");
25131 # patch for operator_expected: note if we are in the list (use.t)
25132 # TODO: make version numbers a new token type
25133 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
25136 # type = 'mm' for pre-decrement, '--' for post-decrement
25139 if ( $expecting == TERM ) { $type = 'mm' }
25140 elsif ( $expecting == UNKNOWN ) {
25141 my ( $next_nonblank_token, $i_next ) =
25142 find_next_nonblank_token( $i, $rtokens, $max_token_index );
25143 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
25148 error_if_expecting_TERM()
25149 if ( $expecting == TERM );
25153 error_if_expecting_TERM()
25154 if ( $expecting == TERM );
25158 error_if_expecting_TERM()
25159 if ( $expecting == TERM );
25163 # ------------------------------------------------------------
25164 # end hash of code for handling individual token types
25165 # ------------------------------------------------------------
25167 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
25169 # These block types terminate statements and do not need a trailing
25171 # patched for SWITCH/CASE/
25172 my %is_zero_continuation_block_type;
25173 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
25174 if elsif else unless while until for foreach switch case given when);
25175 @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
25177 my %is_not_zero_continuation_block_type;
25178 @_ = qw(sort grep map do eval);
25179 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
25181 my %is_logical_container;
25182 @_ = qw(if elsif unless while and or err not && ! || for foreach);
25183 @is_logical_container{@_} = (1) x scalar(@_);
25185 my %is_binary_type;
25187 @is_binary_type{@_} = (1) x scalar(@_);
25189 my %is_binary_keyword;
25190 @_ = qw(and or err eq ne cmp);
25191 @is_binary_keyword{@_} = (1) x scalar(@_);
25193 # 'L' is token for opening { at hash key
25194 my %is_opening_type;
25195 @_ = qw" L { ( [ ";
25196 @is_opening_type{@_} = (1) x scalar(@_);
25198 # 'R' is token for closing } at hash key
25199 my %is_closing_type;
25200 @_ = qw" R } ) ] ";
25201 @is_closing_type{@_} = (1) x scalar(@_);
25203 my %is_redo_last_next_goto;
25204 @_ = qw(redo last next goto);
25205 @is_redo_last_next_goto{@_} = (1) x scalar(@_);
25207 my %is_use_require;
25208 @_ = qw(use require);
25209 @is_use_require{@_} = (1) x scalar(@_);
25211 my %is_sub_package;
25212 @_ = qw(sub package);
25213 @is_sub_package{@_} = (1) x scalar(@_);
25215 # This hash holds the hash key in $tokenizer_self for these keywords:
25216 my %is_format_END_DATA = (
25217 'format' => '_in_format',
25218 '__END__' => '_in_end',
25219 '__DATA__' => '_in_data',
25222 # original ref: camel 3 p 147,
25223 # but perl may accept undocumented flags
25224 # perl 5.10 adds 'p' (preserve)
25225 # Perl version 5.22 added 'n'
25226 # From http://perldoc.perl.org/perlop.html we have
25227 # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
25228 # s/PATTERN/REPLACEMENT/msixpodualngcer
25229 # y/SEARCHLIST/REPLACEMENTLIST/cdsr
25230 # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
25231 # qr/STRING/msixpodualn
25232 my %quote_modifiers = (
25233 's' => '[msixpodualngcer]',
25236 'm' => '[msixpodualngc]',
25237 'qr' => '[msixpodualn]',
25244 # table showing how many quoted things to look for after quote operator..
25245 # s, y, tr have 2 (pattern and replacement)
25246 # others have 1 (pattern only)
25247 my %quote_items = (
25259 sub tokenize_this_line {
25261 # This routine breaks a line of perl code into tokens which are of use in
25262 # indentation and reformatting. One of my goals has been to define tokens
25263 # such that a newline may be inserted between any pair of tokens without
25264 # changing or invalidating the program. This version comes close to this,
25265 # although there are necessarily a few exceptions which must be caught by
25266 # the formatter. Many of these involve the treatment of bare words.
25268 # The tokens and their types are returned in arrays. See previous
25269 # routine for their names.
25271 # See also the array "valid_token_types" in the BEGIN section for an
25274 # To simplify things, token types are either a single character, or they
25275 # are identical to the tokens themselves.
25277 # As a debugging aid, the -D flag creates a file containing a side-by-side
25278 # comparison of the input string and its tokenization for each line of a file.
25279 # This is an invaluable debugging aid.
25281 # In addition to tokens, and some associated quantities, the tokenizer
25282 # also returns flags indication any special line types. These include
25283 # quotes, here_docs, formats.
25285 # -----------------------------------------------------------------------
25287 # How to add NEW_TOKENS:
25289 # New token types will undoubtedly be needed in the future both to keep up
25290 # with changes in perl and to help adapt the tokenizer to other applications.
25292 # Here are some notes on the minimal steps. I wrote these notes while
25293 # adding the 'v' token type for v-strings, which are things like version
25294 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You
25295 # can use your editor to search for the string "NEW_TOKENS" to find the
25296 # appropriate sections to change):
25298 # *. Try to talk somebody else into doing it! If not, ..
25300 # *. Make a backup of your current version in case things don't work out!
25302 # *. Think of a new, unused character for the token type, and add to
25303 # the array @valid_token_types in the BEGIN section of this package.
25304 # For example, I used 'v' for v-strings.
25306 # *. Implement coding to recognize the $type of the token in this routine.
25307 # This is the hardest part, and is best done by imitating or modifying
25308 # some of the existing coding. For example, to recognize v-strings, I
25309 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
25310 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
25312 # *. Update sub operator_expected. This update is critically important but
25313 # the coding is trivial. Look at the comments in that routine for help.
25314 # For v-strings, which should behave like numbers, I just added 'v' to the
25315 # regex used to handle numbers and strings (types 'n' and 'Q').
25317 # *. Implement a 'bond strength' rule in sub set_bond_strengths in
25318 # Perl::Tidy::Formatter for breaking lines around this token type. You can
25319 # skip this step and take the default at first, then adjust later to get
25320 # desired results. For adding type 'v', I looked at sub bond_strength and
25321 # saw that number type 'n' was using default strengths, so I didn't do
25322 # anything. I may tune it up someday if I don't like the way line
25323 # breaks with v-strings look.
25325 # *. Implement a 'whitespace' rule in sub set_white_space_flag in
25326 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
25327 # and saw that type 'n' used spaces on both sides, so I just added 'v'
25328 # to the array @spaces_both_sides.
25330 # *. Update HtmlWriter package so that users can colorize the token as
25331 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
25332 # that package. For v-strings, I initially chose to use a default color
25333 # equal to the default for numbers, but it might be nice to change that
25336 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
25338 # *. Run lots and lots of debug tests. Start with special files designed
25339 # to test the new token type. Run with the -D flag to create a .DEBUG
25340 # file which shows the tokenization. When these work ok, test as many old
25341 # scripts as possible. Start with all of the '.t' files in the 'test'
25342 # directory of the distribution file. Compare .tdy output with previous
25343 # version and updated version to see the differences. Then include as
25344 # many more files as possible. My own technique has been to collect a huge
25345 # number of perl scripts (thousands!) into one directory and run perltidy
25346 # *, then run diff between the output of the previous version and the
25349 # *. For another example, search for the smartmatch operator '~~'
25350 # with your editor to see where updates were made for it.
25352 # -----------------------------------------------------------------------
25354 my $line_of_tokens = shift;
25355 my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
25357 # patch while coding change is underway
25358 # make callers private data to allow access
25359 # $tokenizer_self = $caller_tokenizer_self;
25361 # extract line number for use in error messages
25362 $input_line_number = $line_of_tokens->{_line_number};
25364 # reinitialize for multi-line quote
25365 $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
25367 # check for pod documentation
25368 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
25370 # must not be in multi-line quote
25371 # and must not be in an equation
25372 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
25374 $tokenizer_self->{_in_pod} = 1;
25379 $input_line = $untrimmed_input_line;
25383 # trim start of this line unless we are continuing a quoted line
25384 # do not trim end because we might end in a quote (test: deken4.pl)
25385 # Perl::Tidy::Formatter will delete needless trailing blanks
25386 unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
25387 $input_line =~ s/^\s*//; # trim left end
25390 # Set a flag to indicate if we might be at an __END__ or __DATA__ line
25391 # This will be used below to avoid quoting a bare word followed by
25393 my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/;
25395 # update the copy of the line for use in error messages
25396 # This must be exactly what we give the pre_tokenizer
25397 $tokenizer_self->{_line_text} = $input_line;
25399 # re-initialize for the main loop
25400 $routput_token_list = []; # stack of output token indexes
25401 $routput_token_type = []; # token types
25402 $routput_block_type = []; # types of code block
25403 $routput_container_type = []; # paren types, such as if, elsif, ..
25404 $routput_type_sequence = []; # nesting sequential number
25406 $rhere_target_list = [];
25408 $tok = $last_nonblank_token;
25409 $type = $last_nonblank_type;
25410 $prototype = $last_nonblank_prototype;
25411 $last_nonblank_i = -1;
25412 $block_type = $last_nonblank_block_type;
25413 $container_type = $last_nonblank_container_type;
25414 $type_sequence = $last_nonblank_type_sequence;
25418 # tokenization is done in two stages..
25419 # stage 1 is a very simple pre-tokenization
25420 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
25422 # a little optimization for a full-line comment
25423 if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
25424 $max_tokens_wanted = 1 # no use tokenizing a comment
25427 # start by breaking the line into pre-tokens
25428 ( $rtokens, $rtoken_map, $rtoken_type ) =
25429 pre_tokenize( $input_line, $max_tokens_wanted );
25431 $max_token_index = scalar(@$rtokens) - 1;
25432 push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
25433 push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced
25434 push( @$rtoken_type, 'b', 'b', 'b' );
25436 # initialize for main loop
25437 for $i ( 0 .. $max_token_index + 3 ) {
25438 $routput_token_type->[$i] = "";
25439 $routput_block_type->[$i] = "";
25440 $routput_container_type->[$i] = "";
25441 $routput_type_sequence->[$i] = "";
25442 $routput_indent_flag->[$i] = 0;
25447 # ------------------------------------------------------------
25448 # begin main tokenization loop
25449 # ------------------------------------------------------------
25451 # we are looking at each pre-token of one line and combining them
25453 while ( ++$i <= $max_token_index ) {
25455 if ($in_quote) { # continue looking for end of a quote
25456 $type = $quote_type;
25458 unless ( @{$routput_token_list} )
25459 { # initialize if continuation line
25460 push( @{$routput_token_list}, $i );
25461 $routput_token_type->[$i] = $type;
25464 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
25466 # scan for the end of the quote or pattern
25468 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25469 $quoted_string_1, $quoted_string_2
25472 $i, $in_quote, $quote_character,
25473 $quote_pos, $quote_depth, $quoted_string_1,
25474 $quoted_string_2, $rtokens, $rtoken_map,
25478 # all done if we didn't find it
25479 last if ($in_quote);
25481 # save pattern and replacement text for rescanning
25482 my $qs1 = $quoted_string_1;
25483 my $qs2 = $quoted_string_2;
25485 # re-initialize for next search
25486 $quote_character = '';
25489 $quoted_string_1 = "";
25490 $quoted_string_2 = "";
25491 last if ( ++$i > $max_token_index );
25493 # look for any modifiers
25494 if ($allowed_quote_modifiers) {
25496 # check for exact quote modifiers
25497 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
25498 my $str = $$rtokens[$i];
25499 my $saw_modifier_e;
25500 while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
25501 my $pos = pos($str);
25502 my $char = substr( $str, $pos - 1, 1 );
25503 $saw_modifier_e ||= ( $char eq 'e' );
25506 # For an 'e' quote modifier we must scan the replacement
25507 # text for here-doc targets.
25508 if ($saw_modifier_e) {
25510 my $rht = scan_replacement_text($qs1);
25512 # Change type from 'Q' to 'h' for quotes with
25513 # here-doc targets so that the formatter (see sub
25514 # print_line_of_tokens) will not make any line
25515 # breaks after this point.
25517 push @{$rhere_target_list}, @{$rht};
25519 if ( $i_tok < 0 ) {
25520 my $ilast = $routput_token_list->[-1];
25521 $routput_token_type->[$ilast] = $type;
25526 if ( defined( pos($str) ) ) {
25529 if ( pos($str) == length($str) ) {
25530 last if ( ++$i > $max_token_index );
25533 # Looks like a joined quote modifier
25534 # and keyword, maybe something like
25535 # s/xxx/yyy/gefor @k=...
25536 # Example is "galgen.pl". Would have to split
25537 # the word and insert a new token in the
25538 # pre-token list. This is so rare that I haven't
25539 # done it. Will just issue a warning citation.
25541 # This error might also be triggered if my quote
25542 # modifier characters are incomplete
25546 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
25547 Please put a space between quote modifiers and trailing keywords.
25550 # print "token $$rtokens[$i]\n";
25551 # my $num = length($str) - pos($str);
25552 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
25553 # print "continuing with new token $$rtokens[$i]\n";
25555 # skipping past this token does least damage
25556 last if ( ++$i > $max_token_index );
25561 # example file: rokicki4.pl
25562 # This error might also be triggered if my quote
25563 # modifier characters are incomplete
25564 write_logfile_entry(
25565 "Note: found word $str at quote modifier location\n"
25571 $allowed_quote_modifiers = "";
25575 unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) {
25577 # try to catch some common errors
25578 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
25580 if ( $last_nonblank_token eq 'eq' ) {
25581 complain("Should 'eq' be '==' here ?\n");
25583 elsif ( $last_nonblank_token eq 'ne' ) {
25584 complain("Should 'ne' be '!=' here ?\n");
25588 $last_last_nonblank_token = $last_nonblank_token;
25589 $last_last_nonblank_type = $last_nonblank_type;
25590 $last_last_nonblank_block_type = $last_nonblank_block_type;
25591 $last_last_nonblank_container_type =
25592 $last_nonblank_container_type;
25593 $last_last_nonblank_type_sequence =
25594 $last_nonblank_type_sequence;
25595 $last_nonblank_token = $tok;
25596 $last_nonblank_type = $type;
25597 $last_nonblank_prototype = $prototype;
25598 $last_nonblank_block_type = $block_type;
25599 $last_nonblank_container_type = $container_type;
25600 $last_nonblank_type_sequence = $type_sequence;
25601 $last_nonblank_i = $i_tok;
25604 # store previous token type
25605 if ( $i_tok >= 0 ) {
25606 $routput_token_type->[$i_tok] = $type;
25607 $routput_block_type->[$i_tok] = $block_type;
25608 $routput_container_type->[$i_tok] = $container_type;
25609 $routput_type_sequence->[$i_tok] = $type_sequence;
25610 $routput_indent_flag->[$i_tok] = $indent_flag;
25612 my $pre_tok = $$rtokens[$i]; # get the next pre-token
25613 my $pre_type = $$rtoken_type[$i]; # and type
25615 $type = $pre_type; # to be modified as necessary
25616 $block_type = ""; # blank for all tokens except code block braces
25617 $container_type = ""; # blank for all tokens except some parens
25618 $type_sequence = ""; # blank for all tokens except ?/:
25620 $prototype = ""; # blank for all tokens except user defined subs
25623 # this pre-token will start an output token
25624 push( @{$routput_token_list}, $i_tok );
25626 # continue gathering identifier if necessary
25627 # but do not start on blanks and comments
25628 if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
25630 if ( $id_scan_state =~ /^(sub|package)/ ) {
25637 last if ($id_scan_state);
25638 next if ( ( $i > 0 ) || $type );
25640 # didn't find any token; start over
25645 # handle whitespace tokens..
25646 next if ( $type eq 'b' );
25647 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
25648 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
25650 # Build larger tokens where possible, since we are not in a quote.
25652 # First try to assemble digraphs. The following tokens are
25653 # excluded and handled specially:
25654 # '/=' is excluded because the / might start a pattern.
25655 # 'x=' is excluded since it might be $x=, with $ on previous line
25656 # '**' and *= might be typeglobs of punctuation variables
25657 # I have allowed tokens starting with <, such as <=,
25658 # because I don't think these could be valid angle operators.
25659 # test file: storrs4.pl
25660 my $test_tok = $tok . $$rtokens[ $i + 1 ];
25661 my $combine_ok = $is_digraph{$test_tok};
25663 # check for special cases which cannot be combined
25666 # '//' must be defined_or operator if an operator is expected.
25667 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
25668 # could be migrated here for clarity
25670 # Patch for RT#102371, misparsing a // in the following snippet:
25671 # state $b //= ccc();
25672 # The solution is to always accept the digraph (or trigraph) after
25673 # token type 'Z' (possible file handle). The reason is that
25674 # sub operator_expected gives TERM expected here, which is
25675 # wrong in this case.
25676 if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
25677 my $next_type = $$rtokens[ $i + 1 ];
25679 operator_expected( $prev_type, $tok, $next_type );
25681 # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
25682 $combine_ok = 0 if ( $expecting == TERM );
25688 && ( $test_tok ne '/=' ) # might be pattern
25689 && ( $test_tok ne 'x=' ) # might be $x
25690 && ( $test_tok ne '**' ) # typeglob?
25691 && ( $test_tok ne '*=' ) # typeglob?
25697 # Now try to assemble trigraphs. Note that all possible
25698 # perl trigraphs can be constructed by appending a character
25700 $test_tok = $tok . $$rtokens[ $i + 1 ];
25702 if ( $is_trigraph{$test_tok} ) {
25707 # The only current tetragraph is the double diamond operator
25708 # and its first three characters are not a trigraph, so
25709 # we do can do a special test for it
25710 elsif ( $test_tok eq '<<>' ) {
25711 $test_tok .= $$rtokens[ $i + 2 ];
25712 if ( $is_tetragraph{$test_tok} ) {
25720 $next_tok = $$rtokens[ $i + 1 ];
25721 $next_type = $$rtoken_type[ $i + 1 ];
25723 TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
25726 $last_nonblank_token, $tok,
25727 $next_tok, $brace_depth,
25728 $brace_type[$brace_depth], $paren_depth,
25729 $paren_type[$paren_depth]
25731 print STDOUT "TOKENIZE:(@debug_list)\n";
25734 # turn off attribute list on first non-blank, non-bareword
25735 if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
25737 ###############################################################
25738 # We have the next token, $tok.
25739 # Now we have to examine this token and decide what it is
25740 # and define its $type
25742 # section 1: bare words
25743 ###############################################################
25745 if ( $pre_type eq 'w' ) {
25746 $expecting = operator_expected( $prev_type, $tok, $next_type );
25747 my ( $next_nonblank_token, $i_next ) =
25748 find_next_nonblank_token( $i, $rtokens, $max_token_index );
25750 # ATTRS: handle sub and variable attributes
25751 if ($in_attribute_list) {
25753 # treat bare word followed by open paren like qw(
25754 if ( $next_nonblank_token eq '(' ) {
25755 $in_quote = $quote_items{'q'};
25756 $allowed_quote_modifiers = $quote_modifiers{'q'};
25762 # handle bareword not followed by open paren
25769 # quote a word followed by => operator
25770 # unless the word __END__ or __DATA__ and the only word on
25772 if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) {
25774 if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
25775 if ( $is_constant{$current_package}{$tok} ) {
25778 elsif ( $is_user_function{$current_package}{$tok} ) {
25781 $user_function_prototype{$current_package}{$tok};
25783 elsif ( $tok =~ /^v\d+$/ ) {
25785 report_v_string($tok);
25787 else { $type = 'w' }
25793 # quote a bare word within braces..like xxx->{s}; note that we
25794 # must be sure this is not a structural brace, to avoid
25795 # mistaking {s} in the following for a quoted bare word:
25796 # for(@[){s}bla}BLA}
25797 # Also treat q in something like var{-q} as a bare word, not qoute operator
25799 $next_nonblank_token eq '}'
25801 $last_nonblank_type eq 'L'
25802 || ( $last_nonblank_type eq 'm'
25803 && $last_last_nonblank_type eq 'L' )
25811 # a bare word immediately followed by :: is not a keyword;
25812 # use $tok_kw when testing for keywords to avoid a mistake
25814 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
25819 # handle operator x (now we know it isn't $x=)
25820 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
25821 if ( $tok eq 'x' ) {
25823 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=
25833 # FIXME: Patch: mark something like x4 as an integer for now
25834 # It gets fixed downstream. This is easier than
25835 # splitting the pretoken.
25840 elsif ( $tok_kw eq 'CORE::' ) {
25841 $type = $tok = $tok_kw;
25844 elsif ( ( $tok eq 'strict' )
25845 and ( $last_nonblank_token eq 'use' ) )
25847 $tokenizer_self->{_saw_use_strict} = 1;
25848 scan_bare_identifier();
25851 elsif ( ( $tok eq 'warnings' )
25852 and ( $last_nonblank_token eq 'use' ) )
25854 $tokenizer_self->{_saw_perl_dash_w} = 1;
25856 # scan as identifier, so that we pick up something like:
25857 # use warnings::register
25858 scan_bare_identifier();
25862 $tok eq 'AutoLoader'
25863 && $tokenizer_self->{_look_for_autoloader}
25865 $last_nonblank_token eq 'use'
25867 # these regexes are from AutoSplit.pm, which we want
25869 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
25870 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
25874 write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
25875 $tokenizer_self->{_saw_autoloader} = 1;
25876 $tokenizer_self->{_look_for_autoloader} = 0;
25877 scan_bare_identifier();
25881 $tok eq 'SelfLoader'
25882 && $tokenizer_self->{_look_for_selfloader}
25883 && ( $last_nonblank_token eq 'use'
25884 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
25885 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
25888 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
25889 $tokenizer_self->{_saw_selfloader} = 1;
25890 $tokenizer_self->{_look_for_selfloader} = 0;
25891 scan_bare_identifier();
25894 elsif ( ( $tok eq 'constant' )
25895 and ( $last_nonblank_token eq 'use' ) )
25897 scan_bare_identifier();
25898 my ( $next_nonblank_token, $i_next ) =
25899 find_next_nonblank_token( $i, $rtokens,
25900 $max_token_index );
25902 if ($next_nonblank_token) {
25904 if ( $is_keyword{$next_nonblank_token} ) {
25906 # Assume qw is used as a quote and okay, as in:
25907 # use constant qw{ DEBUG 0 };
25908 # Not worth trying to parse for just a warning
25910 # NOTE: This warning is deactivated because recent
25911 # versions of perl do not complain here, but
25912 # the coding is retained for reference.
25913 if ( 0 && $next_nonblank_token ne 'qw' ) {
25915 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
25920 # FIXME: could check for error in which next token is
25921 # not a word (number, punctuation, ..)
25923 $is_constant{$current_package}{$next_nonblank_token}
25929 # various quote operators
25930 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
25932 if ( $expecting == OPERATOR ) {
25934 # Be careful not to call an error for a qw quote
25935 # where a parenthesized list is allowed. For example,
25936 # it could also be a for/foreach construct such as
25938 # foreach my $key qw\Uno Due Tres Quadro\ {
25939 # print "Set $key\n";
25943 # Or it could be a function call.
25944 # NOTE: Braces in something like &{ xxx } are not
25945 # marked as a block, we might have a method call.
25946 # &method(...), $method->(..), &{method}(...),
25947 # $ref[2](list) is ok & short for $ref[2]->(list)
25949 # See notes in 'sub code_block_type' and
25950 # 'sub is_non_structural_brace'
25954 && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
25955 || $is_for_foreach{$want_paren} )
25958 error_if_expecting_OPERATOR();
25961 $in_quote = $quote_items{$tok};
25962 $allowed_quote_modifiers = $quote_modifiers{$tok};
25964 # All quote types are 'Q' except possibly qw quotes.
25965 # qw quotes are special in that they may generally be trimmed
25966 # of leading and trailing whitespace. So they are given a
25967 # separate type, 'q', unless requested otherwise.
25969 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
25972 $quote_type = $type;
25975 # check for a statement label
25977 ( $next_nonblank_token eq ':' )
25978 && ( $$rtokens[ $i_next + 1 ] ne ':' )
25979 && ( $i_next <= $max_token_index ) # colon on same line
25983 if ( $tok !~ /[A-Z]/ ) {
25984 push @{ $tokenizer_self->{_rlower_case_labels_at} },
25985 $input_line_number;
25993 # 'sub' || 'package'
25994 elsif ( $is_sub_package{$tok_kw} ) {
25995 error_if_expecting_OPERATOR()
25996 if ( $expecting == OPERATOR );
26000 # Note on token types for format, __DATA__, __END__:
26001 # It simplifies things to give these type ';', so that when we
26002 # start rescanning we will be expecting a token of type TERM.
26003 # We will switch to type 'k' before outputting the tokens.
26004 elsif ( $is_format_END_DATA{$tok_kw} ) {
26005 $type = ';'; # make tokenizer look for TERM next
26006 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
26010 elsif ( $is_keyword{$tok_kw} ) {
26013 # Since for and foreach may not be followed immediately
26014 # by an opening paren, we have to remember which keyword
26015 # is associated with the next '('
26016 if ( $is_for_foreach{$tok} ) {
26017 if ( new_statement_ok() ) {
26018 $want_paren = $tok;
26022 # recognize 'use' statements, which are special
26023 elsif ( $is_use_require{$tok} ) {
26024 $statement_type = $tok;
26025 error_if_expecting_OPERATOR()
26026 if ( $expecting == OPERATOR );
26029 # remember my and our to check for trailing ": shared"
26030 elsif ( $is_my_our{$tok} ) {
26031 $statement_type = $tok;
26034 # Check for misplaced 'elsif' and 'else', but allow isolated
26035 # else or elsif blocks to be formatted. This is indicated
26036 # by a last noblank token of ';'
26037 elsif ( $tok eq 'elsif' ) {
26038 if ( $last_nonblank_token ne ';'
26039 && $last_nonblank_block_type !~
26040 /^(if|elsif|unless)$/ )
26043 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
26047 elsif ( $tok eq 'else' ) {
26049 # patched for SWITCH/CASE
26051 $last_nonblank_token ne ';'
26052 && $last_nonblank_block_type !~
26053 /^(if|elsif|unless|case|when)$/
26055 # patch to avoid an unwanted error message for
26056 # the case of a parenless 'case' (RT 105484):
26057 # switch ( 1 ) { case x { 2 } else { } }
26058 && $statement_type !~
26059 /^(if|elsif|unless|case|when)$/
26063 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
26067 elsif ( $tok eq 'continue' ) {
26068 if ( $last_nonblank_token ne ';'
26069 && $last_nonblank_block_type !~
26070 /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
26073 # note: ';' '{' and '}' in list above
26074 # because continues can follow bare blocks;
26075 # ':' is labeled block
26077 ############################################
26078 # NOTE: This check has been deactivated because
26079 # continue has an alternative usage for given/when
26080 # blocks in perl 5.10
26081 ## warning("'$tok' should follow a block\n");
26082 ############################################
26086 # patch for SWITCH/CASE if 'case' and 'when are
26087 # treated as keywords.
26088 elsif ( $tok eq 'when' || $tok eq 'case' ) {
26089 $statement_type = $tok; # next '{' is block
26093 # indent trailing if/unless/while/until
26094 # outdenting will be handled by later indentation loop
26095 ## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
26107 ## if ( $tok =~ /^(if|unless|while|until)$/
26108 ## && $next_nonblank_token ne '(' )
26110 ## $indent_flag = 1;
26114 # check for inline label following
26115 # /^(redo|last|next|goto)$/
26116 elsif (( $last_nonblank_type eq 'k' )
26117 && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
26123 # something else --
26126 scan_bare_identifier();
26127 if ( $type eq 'w' ) {
26129 if ( $expecting == OPERATOR ) {
26131 # don't complain about possible indirect object
26135 # sub new($) { ... }
26136 # $b = new A::; # calls A::new
26137 # $c = new A; # same thing but suspicious
26138 # This will call A::new but we have a 'new' in
26139 # main:: which looks like a constant.
26141 if ( $last_nonblank_type eq 'C' ) {
26142 if ( $tok !~ /::$/ ) {
26144 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
26145 Maybe indirectet object notation?
26150 error_if_expecting_OPERATOR("bareword");
26154 # mark bare words immediately followed by a paren as
26156 $next_tok = $$rtokens[ $i + 1 ];
26157 if ( $next_tok eq '(' ) {
26161 # underscore after file test operator is file handle
26162 if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
26166 # patch for SWITCH/CASE if 'case' and 'when are
26167 # not treated as keywords:
26171 && $brace_type[$brace_depth] eq 'switch'
26173 || ( $tok eq 'when'
26174 && $brace_type[$brace_depth] eq 'given' )
26177 $statement_type = $tok; # next '{' is block
26178 $type = 'k'; # for keyword syntax coloring
26181 # patch for SWITCH/CASE if switch and given not keywords
26182 # Switch is not a perl 5 keyword, but we will gamble
26183 # and mark switch followed by paren as a keyword. This
26184 # is only necessary to get html syntax coloring nice,
26185 # and does not commit this as being a switch/case.
26186 if ( $next_nonblank_token eq '('
26187 && ( $tok eq 'switch' || $tok eq 'given' ) )
26189 $type = 'k'; # for keyword syntax coloring
26195 ###############################################################
26196 # section 2: strings of digits
26197 ###############################################################
26198 elsif ( $pre_type eq 'd' ) {
26199 $expecting = operator_expected( $prev_type, $tok, $next_type );
26200 error_if_expecting_OPERATOR("Number")
26201 if ( $expecting == OPERATOR );
26202 my $number = scan_number();
26203 if ( !defined($number) ) {
26205 # shouldn't happen - we should always get a number
26206 warning("non-number beginning with digit--program bug\n");
26207 report_definite_bug();
26211 ###############################################################
26212 # section 3: all other tokens
26213 ###############################################################
26216 last if ( $tok eq '#' );
26217 my $code = $tokenization_code->{$tok};
26220 operator_expected( $prev_type, $tok, $next_type );
26227 # -----------------------------
26228 # end of main tokenization loop
26229 # -----------------------------
26231 if ( $i_tok >= 0 ) {
26232 $routput_token_type->[$i_tok] = $type;
26233 $routput_block_type->[$i_tok] = $block_type;
26234 $routput_container_type->[$i_tok] = $container_type;
26235 $routput_type_sequence->[$i_tok] = $type_sequence;
26236 $routput_indent_flag->[$i_tok] = $indent_flag;
26239 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
26240 $last_last_nonblank_token = $last_nonblank_token;
26241 $last_last_nonblank_type = $last_nonblank_type;
26242 $last_last_nonblank_block_type = $last_nonblank_block_type;
26243 $last_last_nonblank_container_type = $last_nonblank_container_type;
26244 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
26245 $last_nonblank_token = $tok;
26246 $last_nonblank_type = $type;
26247 $last_nonblank_block_type = $block_type;
26248 $last_nonblank_container_type = $container_type;
26249 $last_nonblank_type_sequence = $type_sequence;
26250 $last_nonblank_prototype = $prototype;
26253 # reset indentation level if necessary at a sub or package
26254 # in an attempt to recover from a nesting error
26255 if ( $level_in_tokenizer < 0 ) {
26256 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
26257 reset_indentation_level(0);
26258 brace_warning("resetting level to 0 at $1 $2\n");
26262 # all done tokenizing this line ...
26263 # now prepare the final list of tokens and types
26265 my @token_type = (); # stack of output token types
26266 my @block_type = (); # stack of output code block types
26267 my @container_type = (); # stack of output code container types
26268 my @type_sequence = (); # stack of output type sequence numbers
26269 my @tokens = (); # output tokens
26270 my @levels = (); # structural brace levels of output tokens
26271 my @slevels = (); # secondary nesting levels of output tokens
26272 my @nesting_tokens = (); # string of tokens leading to this depth
26273 my @nesting_types = (); # string of token types leading to this depth
26274 my @nesting_blocks = (); # string of block types leading to this depth
26275 my @nesting_lists = (); # string of list types leading to this depth
26276 my @ci_string = (); # string needed to compute continuation indentation
26277 my @container_environment = (); # BLOCK or LIST
26278 my $container_environment = '';
26279 my $im = -1; # previous $i value
26281 my $ci_string_sum = ones_count($ci_string_in_tokenizer);
26283 # Computing Token Indentation
26285 # The final section of the tokenizer forms tokens and also computes
26286 # parameters needed to find indentation. It is much easier to do it
26287 # in the tokenizer than elsewhere. Here is a brief description of how
26288 # indentation is computed. Perl::Tidy computes indentation as the sum
26291 # (1) structural indentation, such as if/else/elsif blocks
26292 # (2) continuation indentation, such as long parameter call lists.
26294 # These are occasionally called primary and secondary indentation.
26296 # Structural indentation is introduced by tokens of type '{', although
26297 # the actual tokens might be '{', '(', or '['. Structural indentation
26298 # is of two types: BLOCK and non-BLOCK. Default structural indentation
26299 # is 4 characters if the standard indentation scheme is used.
26301 # Continuation indentation is introduced whenever a line at BLOCK level
26302 # is broken before its termination. Default continuation indentation
26303 # is 2 characters in the standard indentation scheme.
26305 # Both types of indentation may be nested arbitrarily deep and
26306 # interlaced. The distinction between the two is somewhat arbitrary.
26308 # For each token, we will define two variables which would apply if
26309 # the current statement were broken just before that token, so that
26310 # that token started a new line:
26312 # $level = the structural indentation level,
26313 # $ci_level = the continuation indentation level
26315 # The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
26316 # assuming defaults. However, in some special cases it is customary
26317 # to modify $ci_level from this strict value.
26319 # The total structural indentation is easy to compute by adding and
26320 # subtracting 1 from a saved value as types '{' and '}' are seen. The
26321 # running value of this variable is $level_in_tokenizer.
26323 # The total continuation is much more difficult to compute, and requires
26324 # several variables. These variables are:
26326 # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
26327 # each indentation level, if there are intervening open secondary
26328 # structures just prior to that level.
26329 # $continuation_string_in_tokenizer = a string of 1's and 0's indicating
26330 # if the last token at that level is "continued", meaning that it
26331 # is not the first token of an expression.
26332 # $nesting_block_string = a string of 1's and 0's indicating, for each
26333 # indentation level, if the level is of type BLOCK or not.
26334 # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
26335 # $nesting_list_string = a string of 1's and 0's indicating, for each
26336 # indentation level, if it is appropriate for list formatting.
26337 # If so, continuation indentation is used to indent long list items.
26338 # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
26339 # @{$rslevel_stack} = a stack of total nesting depths at each
26340 # structural indentation level, where "total nesting depth" means
26341 # the nesting depth that would occur if every nesting token -- '{', '[',
26342 # and '(' -- , regardless of context, is used to compute a nesting
26345 #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
26346 #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
26348 my ( $ci_string_i, $level_i, $nesting_block_string_i,
26349 $nesting_list_string_i, $nesting_token_string_i,
26350 $nesting_type_string_i, );
26352 foreach $i ( @{$routput_token_list} )
26353 { # scan the list of pre-tokens indexes
26355 # self-checking for valid token types
26356 my $type = $routput_token_type->[$i];
26357 my $forced_indentation_flag = $routput_indent_flag->[$i];
26359 # See if we should undo the $forced_indentation_flag.
26360 # Forced indentation after 'if', 'unless', 'while' and 'until'
26361 # expressions without trailing parens is optional and doesn't
26362 # always look good. It is usually okay for a trailing logical
26363 # expression, but if the expression is a function call, code block,
26364 # or some kind of list it puts in an unwanted extra indentation
26365 # level which is hard to remove.
26367 # Example where extra indentation looks ok:
26369 # if $det_a < 0 and $det_b > 0
26370 # or $det_a > 0 and $det_b < 0;
26372 # Example where extra indentation is not needed because
26373 # the eval brace also provides indentation:
26374 # print "not " if defined eval {
26375 # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
26378 # The following rule works fairly well:
26379 # Undo the flag if the end of this line, or start of the next
26380 # line, is an opening container token or a comma.
26381 # This almost always works, but if not after another pass it will
26383 if ( $forced_indentation_flag && $type eq 'k' ) {
26385 my $ilast = $routput_token_list->[$ixlast];
26386 my $toklast = $routput_token_type->[$ilast];
26387 if ( $toklast eq '#' ) {
26389 $ilast = $routput_token_list->[$ixlast];
26390 $toklast = $routput_token_type->[$ilast];
26392 if ( $toklast eq 'b' ) {
26394 $ilast = $routput_token_list->[$ixlast];
26395 $toklast = $routput_token_type->[$ilast];
26397 if ( $toklast =~ /^[\{,]$/ ) {
26398 $forced_indentation_flag = 0;
26401 ( $toklast, my $i_next ) =
26402 find_next_nonblank_token( $max_token_index, $rtokens,
26403 $max_token_index );
26404 if ( $toklast =~ /^[\{,]$/ ) {
26405 $forced_indentation_flag = 0;
26410 # if we are already in an indented if, see if we should outdent
26411 if ($indented_if_level) {
26413 # don't try to nest trailing if's - shouldn't happen
26414 if ( $type eq 'k' ) {
26415 $forced_indentation_flag = 0;
26418 # check for the normal case - outdenting at next ';'
26419 elsif ( $type eq ';' ) {
26420 if ( $level_in_tokenizer == $indented_if_level ) {
26421 $forced_indentation_flag = -1;
26422 $indented_if_level = 0;
26426 # handle case of missing semicolon
26427 elsif ( $type eq '}' ) {
26428 if ( $level_in_tokenizer == $indented_if_level ) {
26429 $indented_if_level = 0;
26431 # TBD: This could be a subroutine call
26432 $level_in_tokenizer--;
26433 if ( @{$rslevel_stack} > 1 ) {
26434 pop( @{$rslevel_stack} );
26436 if ( length($nesting_block_string) > 1 )
26437 { # true for valid script
26438 chop $nesting_block_string;
26439 chop $nesting_list_string;
26446 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
26447 $level_i = $level_in_tokenizer;
26449 # This can happen by running perltidy on non-scripts
26450 # although it could also be bug introduced by programming change.
26451 # Perl silently accepts a 032 (^Z) and takes it as the end
26452 if ( !$is_valid_token_type{$type} ) {
26453 my $val = ord($type);
26455 "unexpected character decimal $val ($type) in script\n");
26456 $tokenizer_self->{_in_error} = 1;
26459 # ----------------------------------------------------------------
26460 # TOKEN TYPE PATCHES
26461 # output __END__, __DATA__, and format as type 'k' instead of ';'
26462 # to make html colors correct, etc.
26463 my $fix_type = $type;
26464 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
26466 # output anonymous 'sub' as keyword
26467 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
26469 # -----------------------------------------------------------------
26471 $nesting_token_string_i = $nesting_token_string;
26472 $nesting_type_string_i = $nesting_type_string;
26473 $nesting_block_string_i = $nesting_block_string;
26474 $nesting_list_string_i = $nesting_list_string;
26476 # set primary indentation levels based on structural braces
26477 # Note: these are set so that the leading braces have a HIGHER
26478 # level than their CONTENTS, which is convenient for indentation
26479 # Also, define continuation indentation for each token.
26480 if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
26483 # use environment before updating
26484 $container_environment =
26485 $nesting_block_flag ? 'BLOCK'
26486 : $nesting_list_flag ? 'LIST'
26489 # if the difference between total nesting levels is not 1,
26490 # there are intervening non-structural nesting types between
26491 # this '{' and the previous unclosed '{'
26492 my $intervening_secondary_structure = 0;
26493 if ( @{$rslevel_stack} ) {
26494 $intervening_secondary_structure =
26495 $slevel_in_tokenizer - $rslevel_stack->[-1];
26498 # Continuation Indentation
26500 # Having tried setting continuation indentation both in the formatter and
26501 # in the tokenizer, I can say that setting it in the tokenizer is much,
26502 # much easier. The formatter already has too much to do, and can't
26503 # make decisions on line breaks without knowing what 'ci' will be at
26504 # arbitrary locations.
26506 # But a problem with setting the continuation indentation (ci) here
26507 # in the tokenizer is that we do not know where line breaks will actually
26508 # be. As a result, we don't know if we should propagate continuation
26509 # indentation to higher levels of structure.
26511 # For nesting of only structural indentation, we never need to do this.
26512 # For example, in a long if statement, like this
26514 # if ( !$output_block_type[$i]
26515 # && ($in_statement_continuation) )
26520 # the second line has ci but we do normally give the lines within the BLOCK
26521 # any ci. This would be true if we had blocks nested arbitrarily deeply.
26523 # But consider something like this, where we have created a break after
26524 # an opening paren on line 1, and the paren is not (currently) a
26525 # structural indentation token:
26527 # my $file = $menubar->Menubutton(
26528 # qw/-text File -underline 0 -menuitems/ => [
26530 # Cascade => '~View',
26534 # The second line has ci, so it would seem reasonable to propagate it
26535 # down, giving the third line 1 ci + 1 indentation. This suggests the
26536 # following rule, which is currently used to propagating ci down: if there
26537 # are any non-structural opening parens (or brackets, or braces), before
26538 # an opening structural brace, then ci is propagated down, and otherwise
26539 # not. The variable $intervening_secondary_structure contains this
26540 # information for the current token, and the string
26541 # "$ci_string_in_tokenizer" is a stack of previous values of this
26544 # save the current states
26545 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
26546 $level_in_tokenizer++;
26548 if ($forced_indentation_flag) {
26550 # break BEFORE '?' when there is forced indentation
26551 if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
26552 if ( $type eq 'k' ) {
26553 $indented_if_level = $level_in_tokenizer;
26556 # do not change container environment here if we are not
26557 # at a real list. Adding this check prevents "blinkers"
26558 # often near 'unless" clauses, such as in the following
26563 ## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
26566 $nesting_block_string .= "$nesting_block_flag";
26570 if ( $routput_block_type->[$i] ) {
26571 $nesting_block_flag = 1;
26572 $nesting_block_string .= '1';
26575 $nesting_block_flag = 0;
26576 $nesting_block_string .= '0';
26580 # we will use continuation indentation within containers
26581 # which are not blocks and not logical expressions
26583 if ( !$routput_block_type->[$i] ) {
26585 # propagate flag down at nested open parens
26586 if ( $routput_container_type->[$i] eq '(' ) {
26587 $bit = 1 if $nesting_list_flag;
26590 # use list continuation if not a logical grouping
26591 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
26595 $is_logical_container{ $routput_container_type->[$i]
26599 $nesting_list_string .= $bit;
26600 $nesting_list_flag = $bit;
26602 $ci_string_in_tokenizer .=
26603 ( $intervening_secondary_structure != 0 ) ? '1' : '0';
26604 $ci_string_sum = ones_count($ci_string_in_tokenizer);
26605 $continuation_string_in_tokenizer .=
26606 ( $in_statement_continuation > 0 ) ? '1' : '0';
26608 # Sometimes we want to give an opening brace continuation indentation,
26609 # and sometimes not. For code blocks, we don't do it, so that the leading
26610 # '{' gets outdented, like this:
26612 # if ( !$output_block_type[$i]
26613 # && ($in_statement_continuation) )
26616 # For other types, we will give them continuation indentation. For example,
26617 # here is how a list looks with the opening paren indented:
26620 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
26621 # [ "homer", "marge", "bart" ], );
26623 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
26625 my $total_ci = $ci_string_sum;
26627 !$routput_block_type->[$i] # patch: skip for BLOCK
26628 && ($in_statement_continuation)
26629 && !( $forced_indentation_flag && $type eq ':' )
26632 $total_ci += $in_statement_continuation
26633 unless ( $ci_string_in_tokenizer =~ /1$/ );
26636 $ci_string_i = $total_ci;
26637 $in_statement_continuation = 0;
26640 elsif ($type eq '}'
26642 || $forced_indentation_flag < 0 )
26645 # only a nesting error in the script would prevent popping here
26646 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
26648 $level_i = --$level_in_tokenizer;
26650 # restore previous level values
26651 if ( length($nesting_block_string) > 1 )
26652 { # true for valid script
26653 chop $nesting_block_string;
26654 $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
26655 chop $nesting_list_string;
26656 $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
26658 chop $ci_string_in_tokenizer;
26659 $ci_string_sum = ones_count($ci_string_in_tokenizer);
26661 $in_statement_continuation =
26662 chop $continuation_string_in_tokenizer;
26664 # zero continuation flag at terminal BLOCK '}' which
26665 # ends a statement.
26666 if ( $routput_block_type->[$i] ) {
26668 # ...These include non-anonymous subs
26669 # note: could be sub ::abc { or sub 'abc
26670 if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
26672 # note: older versions of perl require the /gc modifier
26673 # here or else the \G does not work.
26674 if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
26676 $in_statement_continuation = 0;
26680 # ...and include all block types except user subs with
26681 # block prototypes and these: (sort|grep|map|do|eval)
26682 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
26684 $is_zero_continuation_block_type{
26685 $routput_block_type->[$i]
26688 $in_statement_continuation = 0;
26691 # ..but these are not terminal types:
26692 # /^(sort|grep|map|do|eval)$/ )
26694 $is_not_zero_continuation_block_type{
26695 $routput_block_type->[$i]
26700 # ..and a block introduced by a label
26701 # /^\w+\s*:$/gc ) {
26702 elsif ( $routput_block_type->[$i] =~ /:$/ ) {
26703 $in_statement_continuation = 0;
26706 # user function with block prototype
26708 $in_statement_continuation = 0;
26712 # If we are in a list, then
26713 # we must set continuation indentation at the closing
26714 # paren of something like this (paren after $check):
26717 # ( not defined $check )
26719 # or $check eq "new"
26720 # or $check eq "old",
26722 elsif ( $tok eq ')' ) {
26723 $in_statement_continuation = 1
26724 if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
26727 elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
26730 # use environment after updating
26731 $container_environment =
26732 $nesting_block_flag ? 'BLOCK'
26733 : $nesting_list_flag ? 'LIST'
26735 $ci_string_i = $ci_string_sum + $in_statement_continuation;
26736 $nesting_block_string_i = $nesting_block_string;
26737 $nesting_list_string_i = $nesting_list_string;
26740 # not a structural indentation type..
26743 $container_environment =
26744 $nesting_block_flag ? 'BLOCK'
26745 : $nesting_list_flag ? 'LIST'
26748 # zero the continuation indentation at certain tokens so
26749 # that they will be at the same level as its container. For
26750 # commas, this simplifies the -lp indentation logic, which
26751 # counts commas. For ?: it makes them stand out.
26752 if ($nesting_list_flag) {
26753 if ( $type =~ /^[,\?\:]$/ ) {
26754 $in_statement_continuation = 0;
26758 # be sure binary operators get continuation indentation
26760 $container_environment
26761 && ( $type eq 'k' && $is_binary_keyword{$tok}
26762 || $is_binary_type{$type} )
26765 $in_statement_continuation = 1;
26768 # continuation indentation is sum of any open ci from previous
26769 # levels plus the current level
26770 $ci_string_i = $ci_string_sum + $in_statement_continuation;
26772 # update continuation flag ...
26773 # if this isn't a blank or comment..
26774 if ( $type ne 'b' && $type ne '#' ) {
26776 # and we are in a BLOCK
26777 if ($nesting_block_flag) {
26779 # the next token after a ';' and label starts a new stmt
26780 if ( $type eq ';' || $type eq 'J' ) {
26781 $in_statement_continuation = 0;
26784 # otherwise, we are continuing the current statement
26786 $in_statement_continuation = 1;
26790 # if we are not in a BLOCK..
26793 # do not use continuation indentation if not list
26794 # environment (could be within if/elsif clause)
26795 if ( !$nesting_list_flag ) {
26796 $in_statement_continuation = 0;
26799 # otherwise, the token after a ',' starts a new term
26801 # Patch FOR RT#99961; no continuation after a ';'
26802 # This is needed because perltidy currently marks
26803 # a block preceded by a type character like % or @
26804 # as a non block, to simplify formatting. But these
26805 # are actually blocks and can have semicolons.
26806 # See code_block_type() and is_non_structural_brace().
26807 elsif ( $type eq ',' || $type eq ';' ) {
26808 $in_statement_continuation = 0;
26811 # otherwise, we are continuing the current term
26813 $in_statement_continuation = 1;
26819 if ( $level_in_tokenizer < 0 ) {
26820 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
26821 $tokenizer_self->{_saw_negative_indentation} = 1;
26822 warning("Starting negative indentation\n");
26826 # set secondary nesting levels based on all containment token types
26827 # Note: these are set so that the nesting depth is the depth
26828 # of the PREVIOUS TOKEN, which is convenient for setting
26829 # the strength of token bonds
26830 my $slevel_i = $slevel_in_tokenizer;
26833 if ( $is_opening_type{$type} ) {
26834 $slevel_in_tokenizer++;
26835 $nesting_token_string .= $tok;
26836 $nesting_type_string .= $type;
26840 elsif ( $is_closing_type{$type} ) {
26841 $slevel_in_tokenizer--;
26842 my $char = chop $nesting_token_string;
26844 if ( $char ne $matching_start_token{$tok} ) {
26845 $nesting_token_string .= $char . $tok;
26846 $nesting_type_string .= $type;
26849 chop $nesting_type_string;
26853 push( @block_type, $routput_block_type->[$i] );
26854 push( @ci_string, $ci_string_i );
26855 push( @container_environment, $container_environment );
26856 push( @container_type, $routput_container_type->[$i] );
26857 push( @levels, $level_i );
26858 push( @nesting_tokens, $nesting_token_string_i );
26859 push( @nesting_types, $nesting_type_string_i );
26860 push( @slevels, $slevel_i );
26861 push( @token_type, $fix_type );
26862 push( @type_sequence, $routput_type_sequence->[$i] );
26863 push( @nesting_blocks, $nesting_block_string );
26864 push( @nesting_lists, $nesting_list_string );
26866 # now form the previous token
26869 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters
26873 substr( $input_line, $$rtoken_map[$im], $num ) );
26879 $num = length($input_line) - $$rtoken_map[$im]; # make the last token
26881 push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
26884 $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
26885 $tokenizer_self->{_in_quote} = $in_quote;
26886 $tokenizer_self->{_quote_target} =
26887 $in_quote ? matching_end_token($quote_character) : "";
26888 $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
26890 $line_of_tokens->{_rtoken_type} = \@token_type;
26891 $line_of_tokens->{_rtokens} = \@tokens;
26892 $line_of_tokens->{_rblock_type} = \@block_type;
26893 $line_of_tokens->{_rcontainer_type} = \@container_type;
26894 $line_of_tokens->{_rcontainer_environment} = \@container_environment;
26895 $line_of_tokens->{_rtype_sequence} = \@type_sequence;
26896 $line_of_tokens->{_rlevels} = \@levels;
26897 $line_of_tokens->{_rslevels} = \@slevels;
26898 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
26899 $line_of_tokens->{_rci_levels} = \@ci_string;
26900 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
26904 } # end tokenize_this_line
26906 #########i#############################################################
26907 # Tokenizer routines which assist in identifying token types
26908 #######################################################################
26910 sub operator_expected {
26912 # Many perl symbols have two or more meanings. For example, '<<'
26913 # can be a shift operator or a here-doc operator. The
26914 # interpretation of these symbols depends on the current state of
26915 # the tokenizer, which may either be expecting a term or an
26916 # operator. For this example, a << would be a shift if an operator
26917 # is expected, and a here-doc if a term is expected. This routine
26918 # is called to make this decision for any current token. It returns
26919 # one of three possible values:
26921 # OPERATOR - operator expected (or at least, not a term)
26922 # UNKNOWN - can't tell
26923 # TERM - a term is expected (or at least, not an operator)
26925 # The decision is based on what has been seen so far. This
26926 # information is stored in the "$last_nonblank_type" and
26927 # "$last_nonblank_token" variables. For example, if the
26928 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
26929 # if $last_nonblank_type is 'n' (numeric), we are expecting an
26932 # If a UNKNOWN is returned, the calling routine must guess. A major
26933 # goal of this tokenizer is to minimize the possibility of returning
26934 # UNKNOWN, because a wrong guess can spoil the formatting of a
26937 # adding NEW_TOKENS: it is critically important that this routine be
26938 # updated to allow it to determine if an operator or term is to be
26939 # expected after the new token. Doing this simply involves adding
26940 # the new token character to one of the regexes in this routine or
26941 # to one of the hash lists
26942 # that it uses, which are initialized in the BEGIN section.
26943 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
26946 my ( $prev_type, $tok, $next_type ) = @_;
26948 my $op_expected = UNKNOWN;
26950 ##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
26952 # Note: function prototype is available for token type 'U' for future
26953 # program development. It contains the leading and trailing parens,
26954 # and no blanks. It might be used to eliminate token type 'C', for
26955 # example (prototype = '()'). Thus:
26956 # if ($last_nonblank_type eq 'U') {
26957 # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
26960 # A possible filehandle (or object) requires some care...
26961 if ( $last_nonblank_type eq 'Z' ) {
26964 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
26965 $op_expected = UNKNOWN;
26968 # For possible file handle like "$a", Perl uses weird parsing rules.
26970 # print $a/2,"/hi"; - division
26971 # print $a / 2,"/hi"; - division
26972 # print $a/ 2,"/hi"; - division
26973 # print $a /2,"/hi"; - pattern (and error)!
26974 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
26975 $op_expected = TERM;
26978 # Note when an operation is being done where a
26979 # filehandle might be expected, since a change in whitespace
26980 # could change the interpretation of the statement.
26982 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
26983 complain("operator in print statement not recommended\n");
26984 $op_expected = OPERATOR;
26989 # Check for smartmatch operator before preceding brace or square bracket.
26990 # For example, at the ? after the ] in the following expressions we are
26991 # expecting an operator:
26993 # qr/3/ ~~ ['1234'] ? 1 : 0;
26994 # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
26995 elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) {
26996 $op_expected = OPERATOR;
26999 # handle something after 'do' and 'eval'
27000 elsif ( $is_block_operator{$last_nonblank_token} ) {
27002 # something like $a = eval "expression";
27004 if ( $last_nonblank_type eq 'k' ) {
27005 $op_expected = TERM; # expression or list mode following keyword
27008 # something like $a = do { BLOCK } / 2;
27009 # or this ? after a smartmatch anonynmous hash or array reference:
27010 # qr/3/ ~~ ['1234'] ? 1 : 0;
27013 $op_expected = OPERATOR; # block mode following }
27017 # handle bare word..
27018 elsif ( $last_nonblank_type eq 'w' ) {
27020 # unfortunately, we can't tell what type of token to expect next
27021 # after most bare words
27022 $op_expected = UNKNOWN;
27025 # operator, but not term possible after these types
27026 # Note: moved ')' from type to token because parens in list context
27027 # get marked as '{' '}' now. This is a minor glitch in the following:
27028 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
27030 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
27031 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
27033 $op_expected = OPERATOR;
27035 # in a 'use' statement, numbers and v-strings are not true
27036 # numbers, so to avoid incorrect error messages, we will
27037 # mark them as unknown for now (use.t)
27038 # TODO: it would be much nicer to create a new token V for VERSION
27039 # number in a use statement. Then this could be a check on type V
27040 # and related patches which change $statement_type for '=>'
27041 # and ',' could be removed. Further, it would clean things up to
27042 # scan the 'use' statement with a separate subroutine.
27043 if ( ( $statement_type eq 'use' )
27044 && ( $last_nonblank_type =~ /^[nv]$/ ) )
27046 $op_expected = UNKNOWN;
27049 # expecting VERSION or {} after package NAMESPACE
27050 elsif ($statement_type =~ /^package\b/
27051 && $last_nonblank_token =~ /^package\b/ )
27053 $op_expected = TERM;
27057 # no operator after many keywords, such as "die", "warn", etc
27058 elsif ( $expecting_term_token{$last_nonblank_token} ) {
27060 # patch for dor.t (defined or).
27061 # perl functions which may be unary operators
27062 # TODO: This list is incomplete, and these should be put
27065 && $next_type eq '/'
27066 && $last_nonblank_type eq 'k'
27067 && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
27069 $op_expected = OPERATOR;
27072 $op_expected = TERM;
27076 # no operator after things like + - ** (i.e., other operators)
27077 elsif ( $expecting_term_types{$last_nonblank_type} ) {
27078 $op_expected = TERM;
27081 # a few operators, like "time", have an empty prototype () and so
27082 # take no parameters but produce a value to operate on
27083 elsif ( $expecting_operator_token{$last_nonblank_token} ) {
27084 $op_expected = OPERATOR;
27087 # post-increment and decrement produce values to be operated on
27088 elsif ( $expecting_operator_types{$last_nonblank_type} ) {
27089 $op_expected = OPERATOR;
27092 # no value to operate on after sub block
27093 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
27095 # a right brace here indicates the end of a simple block.
27096 # all non-structural right braces have type 'R'
27097 # all braces associated with block operator keywords have been given those
27098 # keywords as "last_nonblank_token" and caught above.
27099 # (This statement is order dependent, and must come after checking
27100 # $last_nonblank_token).
27101 elsif ( $last_nonblank_type eq '}' ) {
27103 # patch for dor.t (defined or).
27105 && $next_type eq '/'
27106 && $last_nonblank_token eq ']' )
27108 $op_expected = OPERATOR;
27111 # Patch for RT #116344: misparse a ternary operator after an anonymous
27113 # return ref {} ? 1 : 0;
27114 # The right brace should really be marked type 'R' in this case, and
27115 # it is safest to return an UNKNOWN here. Expecting a TERM will
27116 # cause the '?' to always be interpreted as a pattern delimiter
27117 # rather than introducing a ternary operator.
27118 elsif ( $tok eq '?' ) {
27119 $op_expected = UNKNOWN;
27122 $op_expected = TERM;
27126 # something else..what did I forget?
27129 # collecting diagnostics on unknown operator types..see what was missed
27130 $op_expected = UNKNOWN;
27132 "OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
27136 TOKENIZER_DEBUG_FLAG_EXPECT && do {
27138 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
27140 return $op_expected;
27143 sub new_statement_ok {
27145 # return true if the current token can start a new statement
27146 # USES GLOBAL VARIABLES: $last_nonblank_type
27148 return label_ok() # a label would be ok here
27150 || $last_nonblank_type eq 'J'; # or we follow a label
27156 # Decide if a bare word followed by a colon here is a label
27157 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
27158 # $brace_depth, @brace_type
27160 # if it follows an opening or closing code block curly brace..
27161 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
27162 && $last_nonblank_type eq $last_nonblank_token )
27165 # it is a label if and only if the curly encloses a code block
27166 return $brace_type[$brace_depth];
27169 # otherwise, it is a label if and only if it follows a ';' (real or fake)
27172 return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
27176 sub code_block_type {
27178 # Decide if this is a block of code, and its type.
27179 # Must be called only when $type = $token = '{'
27180 # The problem is to distinguish between the start of a block of code
27181 # and the start of an anonymous hash reference
27182 # Returns "" if not code block, otherwise returns 'last_nonblank_token'
27183 # to indicate the type of code block. (For example, 'last_nonblank_token'
27184 # might be 'if' for an if block, 'else' for an else block, etc).
27185 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
27186 # $last_nonblank_block_type, $brace_depth, @brace_type
27188 # handle case of multiple '{'s
27190 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
27192 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
27193 if ( $last_nonblank_token eq '{'
27194 && $last_nonblank_type eq $last_nonblank_token )
27197 # opening brace where a statement may appear is probably
27198 # a code block but might be and anonymous hash reference
27199 if ( $brace_type[$brace_depth] ) {
27200 return decide_if_code_block( $i, $rtokens, $rtoken_type,
27201 $max_token_index );
27204 # cannot start a code block within an anonymous hash
27210 elsif ( $last_nonblank_token eq ';' ) {
27212 # an opening brace where a statement may appear is probably
27213 # a code block but might be and anonymous hash reference
27214 return decide_if_code_block( $i, $rtokens, $rtoken_type,
27215 $max_token_index );
27218 # handle case of '}{'
27219 elsif ($last_nonblank_token eq '}'
27220 && $last_nonblank_type eq $last_nonblank_token )
27223 # a } { situation ...
27224 # could be hash reference after code block..(blktype1.t)
27225 if ($last_nonblank_block_type) {
27226 return decide_if_code_block( $i, $rtokens, $rtoken_type,
27227 $max_token_index );
27230 # must be a block if it follows a closing hash reference
27232 return $last_nonblank_token;
27236 ################################################################
27237 # NOTE: braces after type characters start code blocks, but for
27238 # simplicity these are not identified as such. See also
27239 # sub is_non_structural_brace.
27240 ################################################################
27242 ## elsif ( $last_nonblank_type eq 't' ) {
27243 ## return $last_nonblank_token;
27246 # brace after label:
27247 elsif ( $last_nonblank_type eq 'J' ) {
27248 return $last_nonblank_token;
27251 # otherwise, look at previous token. This must be a code block if
27252 # it follows any of these:
27253 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
27254 elsif ( $is_code_block_token{$last_nonblank_token} ) {
27256 # Bug Patch: Note that the opening brace after the 'if' in the following
27257 # snippet is an anonymous hash ref and not a code block!
27258 # print 'hi' if { x => 1, }->{x};
27259 # We can identify this situation because the last nonblank type
27260 # will be a keyword (instead of a closing peren)
27261 if ( $last_nonblank_token =~ /^(if|unless)$/
27262 && $last_nonblank_type eq 'k' )
27267 return $last_nonblank_token;
27271 # or a sub or package BLOCK
27272 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
27273 && $last_nonblank_token =~ /^(sub|package)\b/ )
27275 return $last_nonblank_token;
27278 elsif ( $statement_type =~ /^(sub|package)\b/ ) {
27279 return $statement_type;
27282 # user-defined subs with block parameters (like grep/map/eval)
27283 elsif ( $last_nonblank_type eq 'G' ) {
27284 return $last_nonblank_token;
27288 elsif ( $last_nonblank_type eq 'w' ) {
27289 return decide_if_code_block( $i, $rtokens, $rtoken_type,
27290 $max_token_index );
27293 # Patch for bug # RT #94338 reported by Daniel Trizen
27294 # for-loop in a parenthesized block-map triggering an error message:
27295 # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
27296 # Check for a code block within a parenthesized function call
27297 elsif ( $last_nonblank_token eq '(' ) {
27298 my $paren_type = $paren_type[$paren_depth];
27299 if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) {
27301 # We will mark this as a code block but use type 't' instead
27302 # of the name of the contining function. This will allow for
27303 # correct parsing but will usually produce better formatting.
27304 # Braces with block type 't' are not broken open automatically
27305 # in the formatter as are other code block types, and this usually
27307 return 't'; # (Not $paren_type)
27314 # handle unknown syntax ') {'
27315 # we previously appended a '()' to mark this case
27316 elsif ( $last_nonblank_token =~ /\(\)$/ ) {
27317 return $last_nonblank_token;
27320 # anything else must be anonymous hash reference
27326 sub decide_if_code_block {
27328 # USES GLOBAL VARIABLES: $last_nonblank_token
27329 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
27331 my ( $next_nonblank_token, $i_next ) =
27332 find_next_nonblank_token( $i, $rtokens, $max_token_index );
27334 # we are at a '{' where a statement may appear.
27335 # We must decide if this brace starts an anonymous hash or a code
27337 # return "" if anonymous hash, and $last_nonblank_token otherwise
27339 # initialize to be code BLOCK
27340 my $code_block_type = $last_nonblank_token;
27342 # Check for the common case of an empty anonymous hash reference:
27343 # Maybe something like sub { { } }
27344 if ( $next_nonblank_token eq '}' ) {
27345 $code_block_type = "";
27350 # To guess if this '{' is an anonymous hash reference, look ahead
27351 # and test as follows:
27353 # it is a hash reference if next come:
27354 # - a string or digit followed by a comma or =>
27355 # - bareword followed by =>
27356 # otherwise it is a code block
27358 # Examples of anonymous hash ref:
27362 # Examples of code blocks:
27363 # {1; print "hello\n", 1;}
27366 # We are only going to look ahead one more (nonblank/comment) line.
27367 # Strange formatting could cause a bad guess, but that's unlikely.
27371 # Ignore the rest of this line if it is a side comment
27372 if ( $next_nonblank_token ne '#' ) {
27373 @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
27374 @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
27376 my ( $rpre_tokens, $rpre_types ) =
27377 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
27378 # generous, and prevents
27380 # time in mangled files
27381 if ( defined($rpre_types) && @$rpre_types ) {
27382 push @pre_types, @$rpre_types;
27383 push @pre_tokens, @$rpre_tokens;
27386 # put a sentinel token to simplify stopping the search
27387 push @pre_types, '}';
27388 push @pre_types, '}';
27391 $jbeg = 1 if $pre_types[0] eq 'b';
27393 # first look for one of these
27395 # - bareword with leading -
27399 if ( $pre_types[$j] =~ /^[\'\"]/ ) {
27401 # find the closing quote; don't worry about escapes
27402 my $quote_mark = $pre_types[$j];
27403 for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
27404 if ( $pre_types[$k] eq $quote_mark ) {
27406 my $next = $pre_types[$j];
27411 elsif ( $pre_types[$j] eq 'd' ) {
27414 elsif ( $pre_types[$j] eq 'w' ) {
27417 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
27420 if ( $j > $jbeg ) {
27422 $j++ if $pre_types[$j] eq 'b';
27424 # Patched for RT #95708
27427 # it is a comma which is not a pattern delimeter except for qw
27429 $pre_types[$j] eq ','
27430 && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
27434 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
27437 $code_block_type = "";
27442 return $code_block_type;
27447 # report unexpected token type and show where it is
27448 # USES GLOBAL VARIABLES: $tokenizer_self
27449 my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
27450 $rpretoken_type, $input_line )
27453 if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
27454 my $msg = "found $found where $expecting expected";
27455 my $pos = $$rpretoken_map[$i_tok];
27456 interrupt_logfile();
27457 my $input_line_number = $tokenizer_self->{_last_line_number};
27458 my ( $offset, $numbered_line, $underline ) =
27459 make_numbered_line( $input_line_number, $input_line, $pos );
27460 $underline = write_on_underline( $underline, $pos - $offset, '^' );
27463 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
27464 my $pos_prev = $$rpretoken_map[$last_nonblank_i];
27466 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
27467 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
27470 $num = $pos - $pos_prev;
27472 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
27475 write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
27476 $trailer = " (previous token underlined)";
27478 warning( $numbered_line . "\n" );
27479 warning( $underline . "\n" );
27480 warning( $msg . $trailer . "\n" );
27485 sub is_non_structural_brace {
27487 # Decide if a brace or bracket is structural or non-structural
27488 # by looking at the previous token and type
27489 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
27491 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
27492 # Tentatively deactivated because it caused the wrong operator expectation
27494 # $user = @vars[1] / 100;
27495 # Must update sub operator_expected before re-implementing.
27496 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
27500 ################################################################
27501 # NOTE: braces after type characters start code blocks, but for
27502 # simplicity these are not identified as such. See also
27503 # sub code_block_type
27504 ################################################################
27506 ##if ($last_nonblank_type eq 't') {return 0}
27508 # otherwise, it is non-structural if it is decorated
27509 # by type information.
27510 # For example, the '{' here is non-structural: ${xxx}
27512 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
27514 # or if we follow a hash or array closing curly brace or bracket
27515 # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
27516 # because the first '}' would have been given type 'R'
27517 || $last_nonblank_type =~ /^([R\]])$/
27521 #########i#############################################################
27522 # Tokenizer routines for tracking container nesting depths
27523 #######################################################################
27525 # The following routines keep track of nesting depths of the nesting
27526 # types, ( [ { and ?. This is necessary for determining the indentation
27527 # level, and also for debugging programs. Not only do they keep track of
27528 # nesting depths of the individual brace types, but they check that each
27529 # of the other brace types is balanced within matching pairs. For
27530 # example, if the program sees this sequence:
27534 # then it can determine that there is an extra left paren somewhere
27535 # between the { and the }. And so on with every other possible
27536 # combination of outer and inner brace types. For another
27541 # which has an extra ] within the parens.
27543 # The brace types have indexes 0 .. 3 which are indexes into
27546 # The pair ? : are treated as just another nesting type, with ? acting
27547 # as the opening brace and : acting as the closing brace.
27551 # $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
27553 # saves the nesting depth of brace type $b (where $b is either of the other
27554 # nesting types) when brace type $a enters a new depth. When this depth
27555 # decreases, a check is made that the current depth of brace types $b is
27556 # unchanged, or otherwise there must have been an error. This can
27557 # be very useful for localizing errors, particularly when perl runs to
27558 # the end of a large file (such as this one) and announces that there
27559 # is a problem somewhere.
27561 # A numerical sequence number is maintained for every nesting type,
27562 # so that each matching pair can be uniquely identified in a simple
27565 sub increase_nesting_depth {
27566 my ( $aa, $pos ) = @_;
27568 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
27569 # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
27572 $current_depth[$aa]++;
27574 $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
27575 my $input_line_number = $tokenizer_self->{_last_line_number};
27576 my $input_line = $tokenizer_self->{_line_text};
27578 # Sequence numbers increment by number of items. This keeps
27579 # a unique set of numbers but still allows the relative location
27580 # of any type to be determined.
27581 $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
27582 my $seqno = $nesting_sequence_number[$aa];
27583 $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
27585 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
27586 [ $input_line_number, $input_line, $pos ];
27588 for $bb ( 0 .. $#closing_brace_names ) {
27589 next if ( $bb == $aa );
27590 $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
27593 # set a flag for indenting a nested ternary statement
27595 if ( $aa == QUESTION_COLON ) {
27596 $nested_ternary_flag[ $current_depth[$aa] ] = 0;
27597 if ( $current_depth[$aa] > 1 ) {
27598 if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
27599 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
27600 if ( $pdepth == $total_depth - 1 ) {
27602 $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
27607 $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
27608 $statement_type = "";
27609 return ( $seqno, $indent );
27612 sub decrease_nesting_depth {
27614 my ( $aa, $pos ) = @_;
27616 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
27617 # @current_sequence_number, @depth_array, @starting_line_of_current_depth
27621 my $input_line_number = $tokenizer_self->{_last_line_number};
27622 my $input_line = $tokenizer_self->{_line_text};
27626 if ( $current_depth[$aa] > 0 ) {
27628 # set a flag for un-indenting after seeing a nested ternary statement
27629 $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
27630 if ( $aa == QUESTION_COLON ) {
27631 $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
27633 $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
27635 # check that any brace types $bb contained within are balanced
27636 for $bb ( 0 .. $#closing_brace_names ) {
27637 next if ( $bb == $aa );
27639 unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
27640 $current_depth[$bb] )
27643 $current_depth[$bb] -
27644 $depth_array[$aa][$bb][ $current_depth[$aa] ];
27646 # don't whine too many times
27647 my $saw_brace_error = get_saw_brace_error();
27649 $saw_brace_error <= MAX_NAG_MESSAGES
27651 # if too many closing types have occurred, we probably
27652 # already caught this error
27653 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
27656 interrupt_logfile();
27658 $starting_line_of_current_depth[$aa]
27659 [ $current_depth[$aa] ];
27661 my $rel = [ $input_line_number, $input_line, $pos ];
27665 if ( $diff == 1 || $diff == -1 ) {
27673 ? $opening_brace_names[$bb]
27674 : $closing_brace_names[$bb];
27675 write_error_indicator_pair( @$rsl, '^' );
27677 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
27682 $starting_line_of_current_depth[$bb]
27683 [ $current_depth[$bb] ];
27686 " The most recent un-matched $bname is on line $ml\n";
27687 write_error_indicator_pair( @$rml, '^' );
27689 write_error_indicator_pair( @$rel, '^' );
27693 increment_brace_error();
27696 $current_depth[$aa]--;
27700 my $saw_brace_error = get_saw_brace_error();
27701 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
27703 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
27705 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
27707 increment_brace_error();
27709 return ( $seqno, $outdent );
27712 sub check_final_nesting_depths {
27715 # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
27717 for $aa ( 0 .. $#closing_brace_names ) {
27719 if ( $current_depth[$aa] ) {
27721 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
27724 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
27725 The most recent un-matched $opening_brace_names[$aa] is on line $sl
27727 indicate_error( $msg, @$rsl, '^' );
27728 increment_brace_error();
27733 #########i#############################################################
27734 # Tokenizer routines for looking ahead in input stream
27735 #######################################################################
27737 sub peek_ahead_for_n_nonblank_pre_tokens {
27739 # returns next n pretokens if they exist
27740 # returns undef's if hits eof without seeing any pretokens
27741 # USES GLOBAL VARIABLES: $tokenizer_self
27742 my $max_pretokens = shift;
27745 my ( $rpre_tokens, $rmap, $rpre_types );
27747 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
27749 $line =~ s/^\s*//; # trim leading blanks
27750 next if ( length($line) <= 0 ); # skip blank
27751 next if ( $line =~ /^#/ ); # skip comment
27752 ( $rpre_tokens, $rmap, $rpre_types ) =
27753 pre_tokenize( $line, $max_pretokens );
27756 return ( $rpre_tokens, $rpre_types );
27759 # look ahead for next non-blank, non-comment line of code
27760 sub peek_ahead_for_nonblank_token {
27762 # USES GLOBAL VARIABLES: $tokenizer_self
27763 my ( $rtokens, $max_token_index ) = @_;
27767 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
27769 $line =~ s/^\s*//; # trim leading blanks
27770 next if ( length($line) <= 0 ); # skip blank
27771 next if ( $line =~ /^#/ ); # skip comment
27772 my ( $rtok, $rmap, $rtype ) =
27773 pre_tokenize( $line, 2 ); # only need 2 pre-tokens
27774 my $j = $max_token_index + 1;
27777 foreach $tok (@$rtok) {
27778 last if ( $tok =~ "\n" );
27779 $$rtokens[ ++$j ] = $tok;
27786 #########i#############################################################
27787 # Tokenizer guessing routines for ambiguous situations
27788 #######################################################################
27790 sub guess_if_pattern_or_conditional {
27792 # this routine is called when we have encountered a ? following an
27793 # unknown bareword, and we must decide if it starts a pattern or not
27794 # input parameters:
27795 # $i - token index of the ? starting possible pattern
27796 # output parameters:
27797 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
27798 # msg = a warning or diagnostic message
27799 # USES GLOBAL VARIABLES: $last_nonblank_token
27800 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27801 my $is_pattern = 0;
27802 my $msg = "guessing that ? after $last_nonblank_token starts a ";
27804 if ( $i >= $max_token_index ) {
27805 $msg .= "conditional (no end to pattern found on the line)\n";
27810 my $next_token = $$rtokens[$i]; # first token after ?
27812 # look for a possible ending ? on this line..
27814 my $quote_depth = 0;
27815 my $quote_character = '';
27819 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27822 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27823 $quote_pos, $quote_depth, $max_token_index );
27827 # we didn't find an ending ? on this line,
27828 # so we bias towards conditional
27830 $msg .= "conditional (no ending ? on this line)\n";
27832 # we found an ending ?, so we bias towards a pattern
27836 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
27838 $msg .= "pattern (found ending ? and pattern expected)\n";
27841 $msg .= "pattern (uncertain, but found ending ?)\n";
27845 return ( $is_pattern, $msg );
27848 sub guess_if_pattern_or_division {
27850 # this routine is called when we have encountered a / following an
27851 # unknown bareword, and we must decide if it starts a pattern or is a
27853 # input parameters:
27854 # $i - token index of the / starting possible pattern
27855 # output parameters:
27856 # $is_pattern = 0 if probably division, =1 if probably a pattern
27857 # msg = a warning or diagnostic message
27858 # USES GLOBAL VARIABLES: $last_nonblank_token
27859 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27860 my $is_pattern = 0;
27861 my $msg = "guessing that / after $last_nonblank_token starts a ";
27863 if ( $i >= $max_token_index ) {
27864 $msg .= "division (no end to pattern found on the line)\n";
27868 my $divide_expected =
27869 numerator_expected( $i, $rtokens, $max_token_index );
27871 my $next_token = $$rtokens[$i]; # first token after slash
27873 # look for a possible ending / on this line..
27875 my $quote_depth = 0;
27876 my $quote_character = '';
27880 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27883 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27884 $quote_pos, $quote_depth, $max_token_index );
27888 # we didn't find an ending / on this line,
27889 # so we bias towards division
27890 if ( $divide_expected >= 0 ) {
27892 $msg .= "division (no ending / on this line)\n";
27895 $msg = "multi-line pattern (division not possible)\n";
27901 # we found an ending /, so we bias towards a pattern
27904 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
27906 if ( $divide_expected >= 0 ) {
27908 if ( $i - $ibeg > 60 ) {
27909 $msg .= "division (matching / too distant)\n";
27913 $msg .= "pattern (but division possible too)\n";
27919 $msg .= "pattern (division not possible)\n";
27924 if ( $divide_expected >= 0 ) {
27926 $msg .= "division (pattern not possible)\n";
27931 "pattern (uncertain, but division would not work here)\n";
27936 return ( $is_pattern, $msg );
27939 # try to resolve here-doc vs. shift by looking ahead for
27940 # non-code or the end token (currently only looks for end token)
27941 # returns 1 if it is probably a here doc, 0 if not
27942 sub guess_if_here_doc {
27944 # This is how many lines we will search for a target as part of the
27945 # guessing strategy. It is a constant because there is probably
27946 # little reason to change it.
27947 # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
27949 use constant HERE_DOC_WINDOW => 40;
27951 my $next_token = shift;
27952 my $here_doc_expected = 0;
27955 my $msg = "checking <<";
27957 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
27961 if ( $line =~ /^$next_token$/ ) {
27962 $msg .= " -- found target $next_token ahead $k lines\n";
27963 $here_doc_expected = 1; # got it
27966 last if ( $k >= HERE_DOC_WINDOW );
27969 unless ($here_doc_expected) {
27971 if ( !defined($line) ) {
27972 $here_doc_expected = -1; # hit eof without seeing target
27973 $msg .= " -- must be shift; target $next_token not in file\n";
27976 else { # still unsure..taking a wild guess
27978 if ( !$is_constant{$current_package}{$next_token} ) {
27979 $here_doc_expected = 1;
27981 " -- guessing it's a here-doc ($next_token not a constant)\n";
27985 " -- guessing it's a shift ($next_token is a constant)\n";
27989 write_logfile_entry($msg);
27990 return $here_doc_expected;
27993 #########i#############################################################
27994 # Tokenizer Routines for scanning identifiers and related items
27995 #######################################################################
27997 sub scan_bare_identifier_do {
27999 # this routine is called to scan a token starting with an alphanumeric
28000 # variable or package separator, :: or '.
28001 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
28002 # $last_nonblank_type,@paren_type, $paren_depth
28004 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
28008 my $package = undef;
28012 # we have to back up one pretoken at a :: since each : is one pretoken
28013 if ( $tok eq '::' ) { $i_beg-- }
28014 if ( $tok eq '->' ) { $i_beg-- }
28015 my $pos_beg = $$rtoken_map[$i_beg];
28016 pos($input_line) = $pos_beg;
28023 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
28025 my $pos = pos($input_line);
28026 my $numc = $pos - $pos_beg;
28027 $tok = substr( $input_line, $pos_beg, $numc );
28029 # type 'w' includes anything without leading type info
28030 # ($,%,@,*) including something like abc::def::ghi
28034 if ( defined($2) ) { $sub_name = $2; }
28035 if ( defined($1) ) {
28038 # patch: don't allow isolated package name which just ends
28039 # in the old style package separator (single quote). Example:
28041 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
28045 $package =~ s/\'/::/g;
28046 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
28047 $package =~ s/::$//;
28050 $package = $current_package;
28052 if ( $is_keyword{$tok} ) {
28057 # if it is a bareword..
28058 if ( $type eq 'w' ) {
28060 # check for v-string with leading 'v' type character
28061 # (This seems to have precedence over filehandle, type 'Y')
28062 if ( $tok =~ /^v\d[_\d]*$/ ) {
28064 # we only have the first part - something like 'v101' -
28066 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
28067 $pos = pos($input_line);
28068 $numc = $pos - $pos_beg;
28069 $tok = substr( $input_line, $pos_beg, $numc );
28073 # warn if this version can't handle v-strings
28074 report_v_string($tok);
28077 elsif ( $is_constant{$package}{$sub_name} ) {
28081 # bareword after sort has implied empty prototype; for example:
28082 # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
28083 # This has priority over whatever the user has specified.
28084 elsif ($last_nonblank_token eq 'sort'
28085 && $last_nonblank_type eq 'k' )
28090 # Note: strangely, perl does not seem to really let you create
28091 # functions which act like eval and do, in the sense that eval
28092 # and do may have operators following the final }, but any operators
28093 # that you create with prototype (&) apparently do not allow
28094 # trailing operators, only terms. This seems strange.
28095 # If this ever changes, here is the update
28096 # to make perltidy behave accordingly:
28098 # elsif ( $is_block_function{$package}{$tok} ) {
28099 # $tok='eval'; # patch to do braces like eval - doesn't work
28102 # FIXME: This could become a separate type to allow for different
28104 elsif ( $is_block_function{$package}{$sub_name} ) {
28108 elsif ( $is_block_list_function{$package}{$sub_name} ) {
28111 elsif ( $is_user_function{$package}{$sub_name} ) {
28113 $prototype = $user_function_prototype{$package}{$sub_name};
28116 # check for indirect object
28119 # added 2001-03-27: must not be followed immediately by '('
28121 ( $input_line !~ m/\G\(/gc )
28126 # preceded by keyword like 'print', 'printf' and friends
28127 $is_indirect_object_taker{$last_nonblank_token}
28129 # or preceded by something like 'print(' or 'printf('
28131 ( $last_nonblank_token eq '(' )
28132 && $is_indirect_object_taker{ $paren_type[$paren_depth]
28140 # may not be indirect object unless followed by a space
28141 if ( $input_line =~ m/\G\s+/gc ) {
28145 # Perl's indirect object notation is a very bad
28146 # thing and can cause subtle bugs, especially for
28147 # beginning programmers. And I haven't even been
28148 # able to figure out a sane warning scheme which
28149 # doesn't get in the way of good scripts.
28151 # Complain if a filehandle has any lower case
28152 # letters. This is suggested good practice.
28153 # Use 'sub_name' because something like
28154 # main::MYHANDLE is ok for filehandle
28155 if ( $sub_name =~ /[a-z]/ ) {
28157 # could be bug caused by older perltidy if
28159 if ( $input_line =~ m/\G\s*\(/gc ) {
28161 "Caution: unknown word '$tok' in indirect object slot\n"
28167 # bareword not followed by a space -- may not be filehandle
28168 # (may be function call defined in a 'use' statement)
28175 # Now we must convert back from character position
28176 # to pre_token index.
28177 # I don't think an error flag can occur here ..but who knows
28180 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
28182 warning("scan_bare_identifier: Possibly invalid tokenization\n");
28186 # no match but line not blank - could be syntax error
28187 # perl will take '::' alone without complaint
28191 # change this warning to log message if it becomes annoying
28192 warning("didn't find identifier after leading ::\n");
28194 return ( $i, $tok, $type, $prototype );
28199 # This is the new scanner and will eventually replace scan_identifier.
28200 # Only type 'sub' and 'package' are implemented.
28201 # Token types $ * % @ & -> are not yet implemented.
28203 # Scan identifier following a type token.
28204 # The type of call depends on $id_scan_state: $id_scan_state = ''
28205 # for starting call, in which case $tok must be the token defining
28208 # If the type token is the last nonblank token on the line, a value
28209 # of $id_scan_state = $tok is returned, indicating that further
28210 # calls must be made to get the identifier. If the type token is
28211 # not the last nonblank token on the line, the identifier is
28212 # scanned and handled and a value of '' is returned.
28213 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
28214 # $statement_type, $tokenizer_self
28216 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
28220 my ( $i_beg, $pos_beg );
28222 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
28223 #my ($a,$b,$c) = caller;
28224 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
28226 # on re-entry, start scanning at first token on the line
28227 if ($id_scan_state) {
28232 # on initial entry, start scanning just after type token
28235 $id_scan_state = $tok;
28239 # find $i_beg = index of next nonblank token,
28240 # and handle empty lines
28241 my $blank_line = 0;
28242 my $next_nonblank_token = $$rtokens[$i_beg];
28243 if ( $i_beg > $max_token_index ) {
28248 # only a '#' immediately after a '$' is not a comment
28249 if ( $next_nonblank_token eq '#' ) {
28250 unless ( $tok eq '$' ) {
28255 if ( $next_nonblank_token =~ /^\s/ ) {
28256 ( $next_nonblank_token, $i_beg ) =
28257 find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
28258 $max_token_index );
28259 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
28265 # handle non-blank line; identifier, if any, must follow
28266 unless ($blank_line) {
28268 if ( $id_scan_state eq 'sub' ) {
28269 ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
28270 $input_line, $i, $i_beg,
28271 $tok, $type, $rtokens,
28272 $rtoken_map, $id_scan_state, $max_token_index
28276 elsif ( $id_scan_state eq 'package' ) {
28277 ( $i, $tok, $type ) =
28278 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
28279 $rtoken_map, $max_token_index );
28280 $id_scan_state = '';
28284 warning("invalid token in scan_id: $tok\n");
28285 $id_scan_state = '';
28289 if ( $id_scan_state && ( !defined($type) || !$type ) ) {
28291 # shouldn't happen:
28293 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
28295 report_definite_bug();
28298 TOKENIZER_DEBUG_FLAG_NSCAN && do {
28300 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
28302 return ( $i, $tok, $type, $id_scan_state );
28305 sub check_prototype {
28306 my ( $proto, $package, $subname ) = @_;
28307 return unless ( defined($package) && defined($subname) );
28308 if ( defined($proto) ) {
28309 $proto =~ s/^\s*\(\s*//;
28310 $proto =~ s/\s*\)$//;
28312 $is_user_function{$package}{$subname} = 1;
28313 $user_function_prototype{$package}{$subname} = "($proto)";
28315 # prototypes containing '&' must be treated specially..
28316 if ( $proto =~ /\&/ ) {
28318 # right curly braces of prototypes ending in
28319 # '&' may be followed by an operator
28320 if ( $proto =~ /\&$/ ) {
28321 $is_block_function{$package}{$subname} = 1;
28324 # right curly braces of prototypes NOT ending in
28325 # '&' may NOT be followed by an operator
28326 elsif ( $proto !~ /\&$/ ) {
28327 $is_block_list_function{$package}{$subname} = 1;
28332 $is_constant{$package}{$subname} = 1;
28336 $is_user_function{$package}{$subname} = 1;
28340 sub do_scan_package {
28342 # do_scan_package parses a package name
28343 # it is called with $i_beg equal to the index of the first nonblank
28344 # token following a 'package' token.
28345 # USES GLOBAL VARIABLES: $current_package,
28347 # package NAMESPACE
28348 # package NAMESPACE VERSION
28349 # package NAMESPACE BLOCK
28350 # package NAMESPACE VERSION BLOCK
28352 # If VERSION is provided, package sets the $VERSION variable in the given
28353 # namespace to a version object with the VERSION provided. VERSION must be
28354 # a "strict" style version number as defined by the version module: a
28355 # positive decimal number (integer or decimal-fraction) without
28356 # exponentiation or else a dotted-decimal v-string with a leading 'v'
28357 # character and at least three components.
28358 # reference http://perldoc.perl.org/functions/package.html
28360 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
28363 my $package = undef;
28364 my $pos_beg = $$rtoken_map[$i_beg];
28365 pos($input_line) = $pos_beg;
28367 # handle non-blank line; package name, if any, must follow
28368 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
28370 $package = ( defined($1) && $1 ) ? $1 : 'main';
28371 $package =~ s/\'/::/g;
28372 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
28373 $package =~ s/::$//;
28374 my $pos = pos($input_line);
28375 my $numc = $pos - $pos_beg;
28376 $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
28379 # Now we must convert back from character position
28380 # to pre_token index.
28381 # I don't think an error flag can occur here ..but ?
28384 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
28385 if ($error) { warning("Possibly invalid package\n") }
28386 $current_package = $package;
28388 # we should now have package NAMESPACE
28389 # now expecting VERSION, BLOCK, or ; to follow ...
28390 # package NAMESPACE VERSION
28391 # package NAMESPACE BLOCK
28392 # package NAMESPACE VERSION BLOCK
28393 my ( $next_nonblank_token, $i_next ) =
28394 find_next_nonblank_token( $i, $rtokens, $max_token_index );
28396 # check that something recognizable follows, but do not parse.
28397 # A VERSION number will be parsed later as a number or v-string in the
28398 # normal way. What is important is to set the statement type if
28399 # everything looks okay so that the operator_expected() routine
28400 # knows that the number is in a package statement.
28401 # Examples of valid primitive tokens that might follow are:
28402 # 1235 . ; { } v3 v
28403 if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) {
28404 $statement_type = $tok;
28408 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
28413 # no match but line not blank --
28414 # could be a label with name package, like package: , for example.
28419 return ( $i, $tok, $type );
28422 sub scan_identifier_do {
28424 # This routine assembles tokens into identifiers. It maintains a
28425 # scan state, id_scan_state. It updates id_scan_state based upon
28426 # current id_scan_state and token, and returns an updated
28427 # id_scan_state and the next index after the identifier.
28428 # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
28429 # $last_nonblank_type
28431 my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
28432 $expecting, $container_type )
28436 my $tok_begin = $$rtokens[$i_begin];
28437 if ( $tok_begin eq ':' ) { $tok_begin = '::' }
28438 my $id_scan_state_begin = $id_scan_state;
28439 my $identifier_begin = $identifier;
28440 my $tok = $tok_begin;
28443 my $in_prototype_or_signature = $container_type =~ /^sub/;
28445 # these flags will be used to help figure out the type:
28446 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
28449 # allow old package separator (') except in 'use' statement
28450 my $allow_tick = ( $last_nonblank_token ne 'use' );
28452 # get started by defining a type and a state if necessary
28453 unless ($id_scan_state) {
28454 $context = UNKNOWN_CONTEXT;
28456 # fixup for digraph
28457 if ( $tok eq '>' ) {
28461 $identifier = $tok;
28463 if ( $tok eq '$' || $tok eq '*' ) {
28464 $id_scan_state = '$';
28465 $context = SCALAR_CONTEXT;
28467 elsif ( $tok eq '%' || $tok eq '@' ) {
28468 $id_scan_state = '$';
28469 $context = LIST_CONTEXT;
28471 elsif ( $tok eq '&' ) {
28472 $id_scan_state = '&';
28474 elsif ( $tok eq 'sub' or $tok eq 'package' ) {
28475 $saw_alpha = 0; # 'sub' is considered type info here
28476 $id_scan_state = '$';
28477 $identifier .= ' '; # need a space to separate sub from sub name
28479 elsif ( $tok eq '::' ) {
28480 $id_scan_state = 'A';
28482 elsif ( $tok =~ /^[A-Za-z_]/ ) {
28483 $id_scan_state = ':';
28485 elsif ( $tok eq '->' ) {
28486 $id_scan_state = '$';
28491 my ( $a, $b, $c ) = caller;
28492 warning("Program Bug: scan_identifier given bad token = $tok \n");
28493 warning(" called from sub $a line: $c\n");
28494 report_definite_bug();
28496 $saw_type = !$saw_alpha;
28500 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
28503 # now loop to gather the identifier
28506 while ( $i < $max_token_index ) {
28507 $i_save = $i unless ( $tok =~ /^\s*$/ );
28508 $tok = $$rtokens[ ++$i ];
28510 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
28515 if ( $id_scan_state eq '$' ) { # starting variable name
28517 if ( $tok eq '$' ) {
28519 $identifier .= $tok;
28521 # we've got a punctuation variable if end of line (punct.t)
28522 if ( $i == $max_token_index ) {
28524 $id_scan_state = '';
28529 # POSTDEFREF ->@ ->% ->& ->*
28530 elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
28531 $identifier .= $tok;
28533 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
28535 $id_scan_state = ':'; # now need ::
28536 $identifier .= $tok;
28538 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
28540 $id_scan_state = ':'; # now need ::
28541 $identifier .= $tok;
28543 # Perl will accept leading digits in identifiers,
28544 # although they may not always produce useful results.
28545 # Something like $main::0 is ok. But this also works:
28547 # sub howdy::123::bubba{ print "bubba $54321!\n" }
28548 # howdy::123::bubba();
28551 elsif ( $tok =~ /^[0-9]/ ) { # numeric
28553 $id_scan_state = ':'; # now need ::
28554 $identifier .= $tok;
28556 elsif ( $tok eq '::' ) {
28557 $id_scan_state = 'A';
28558 $identifier .= $tok;
28561 # $# and POSTDEFREF ->$#
28562 elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array
28563 $identifier .= $tok; # keep same state, a $ could follow
28565 elsif ( $tok eq '{' ) {
28567 # check for something like ${#} or ${©}
28568 ##if ( $identifier eq '$'
28572 || $identifier eq '@'
28573 || $identifier eq '$#'
28575 && $i + 2 <= $max_token_index
28576 && $$rtokens[ $i + 2 ] eq '}'
28577 && $$rtokens[ $i + 1 ] !~ /[\s\w]/
28580 my $next2 = $$rtokens[ $i + 2 ];
28581 my $next1 = $$rtokens[ $i + 1 ];
28582 $identifier .= $tok . $next1 . $next2;
28584 $id_scan_state = '';
28588 # skip something like ${xxx} or ->{
28589 $id_scan_state = '';
28591 # if this is the first token of a line, any tokens for this
28592 # identifier have already been accumulated
28593 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
28598 # space ok after leading $ % * & @
28599 elsif ( $tok =~ /^\s*$/ ) {
28601 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
28603 if ( length($identifier) > 1 ) {
28604 $id_scan_state = '';
28606 $type = 'i'; # probably punctuation variable
28611 # spaces after $'s are common, and space after @
28612 # is harmless, so only complain about space
28613 # after other type characters. Space after $ and
28614 # @ will be removed in formatting. Report space
28615 # after % and * because they might indicate a
28616 # parsing error. In other words '% ' might be a
28617 # modulo operator. Delete this warning if it
28619 if ( $identifier !~ /^[\@\$]$/ ) {
28621 "Space in identifier, following $identifier\n";
28627 # space after '->' is ok
28629 elsif ( $tok eq '^' ) {
28631 # check for some special variables like $^W
28632 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
28633 $identifier .= $tok;
28634 $id_scan_state = 'A';
28636 # Perl accepts '$^]' or '@^]', but
28637 # there must not be a space before the ']'.
28638 my $next1 = $$rtokens[ $i + 1 ];
28639 if ( $next1 eq ']' ) {
28641 $identifier .= $next1;
28642 $id_scan_state = "";
28647 $id_scan_state = '';
28650 else { # something else
28652 if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) {
28653 $id_scan_state = '';
28655 $type = 'i'; # probably punctuation variable
28659 # check for various punctuation variables
28660 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
28661 $identifier .= $tok;
28664 # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
28665 elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) {
28666 $identifier .= $tok;
28669 elsif ( $identifier eq '$#' ) {
28671 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
28673 # perl seems to allow just these: $#: $#- $#+
28674 elsif ( $tok =~ /^[\:\-\+]$/ ) {
28676 $identifier .= $tok;
28680 write_logfile_entry( 'Use of $# is deprecated' . "\n" );
28683 elsif ( $identifier eq '$$' ) {
28685 # perl does not allow references to punctuation
28686 # variables without braces. For example, this
28690 # You would have to use
28694 if ( $tok eq '{' ) { $type = 't' }
28695 else { $type = 'i' }
28697 elsif ( $identifier eq '->' ) {
28702 if ( length($identifier) == 1 ) { $identifier = ''; }
28704 $id_scan_state = '';
28708 elsif ( $id_scan_state eq '&' ) { # starting sub call?
28710 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric ..
28711 $id_scan_state = ':'; # now need ::
28713 $identifier .= $tok;
28715 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
28716 $id_scan_state = ':'; # now need ::
28718 $identifier .= $tok;
28720 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28721 $id_scan_state = ':'; # now need ::
28723 $identifier .= $tok;
28725 elsif ( $tok =~ /^\s*$/ ) { # allow space
28727 elsif ( $tok eq '::' ) { # leading ::
28728 $id_scan_state = 'A'; # accept alpha next
28729 $identifier .= $tok;
28731 elsif ( $tok eq '{' ) {
28732 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
28734 $id_scan_state = '';
28739 # punctuation variable?
28740 # testfile: cunningham4.pl
28742 # We have to be careful here. If we are in an unknown state,
28743 # we will reject the punctuation variable. In the following
28744 # example the '&' is a binary operator but we are in an unknown
28745 # state because there is no sigil on 'Prima', so we don't
28746 # know what it is. But it is a bad guess that
28747 # '&~' is a function variable.
28748 # $self->{text}->{colorMap}->[
28749 # Prima::PodView::COLOR_CODE_FOREGROUND
28750 # & ~tb::COLOR_INDEX ] =
28751 # $sec->{ColorCode}
28752 if ( $identifier eq '&' && $expecting ) {
28753 $identifier .= $tok;
28760 $id_scan_state = '';
28764 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)
28766 if ( $tok =~ /^[A-Za-z_]/ ) { # found it
28767 $identifier .= $tok;
28768 $id_scan_state = ':'; # now need ::
28771 elsif ( $tok eq "'" && $allow_tick ) {
28772 $identifier .= $tok;
28773 $id_scan_state = ':'; # now need ::
28776 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28777 $identifier .= $tok;
28778 $id_scan_state = ':'; # now need ::
28781 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
28782 $id_scan_state = '(';
28783 $identifier .= $tok;
28785 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
28786 $id_scan_state = ')';
28787 $identifier .= $tok;
28790 $id_scan_state = '';
28795 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha
28797 if ( $tok eq '::' ) { # got it
28798 $identifier .= $tok;
28799 $id_scan_state = 'A'; # now require alpha
28801 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here
28802 $identifier .= $tok;
28803 $id_scan_state = ':'; # now need ::
28806 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28807 $identifier .= $tok;
28808 $id_scan_state = ':'; # now need ::
28811 elsif ( $tok eq "'" && $allow_tick ) { # tick
28813 if ( $is_keyword{$identifier} ) {
28814 $id_scan_state = ''; # that's all
28818 $identifier .= $tok;
28821 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
28822 $id_scan_state = '(';
28823 $identifier .= $tok;
28825 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
28826 $id_scan_state = ')';
28827 $identifier .= $tok;
28830 $id_scan_state = ''; # that's all
28835 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype
28837 if ( $tok eq '(' ) { # got it
28838 $identifier .= $tok;
28839 $id_scan_state = ')'; # now find the end of it
28841 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
28842 $identifier .= $tok;
28845 $id_scan_state = ''; # that's all - no prototype
28850 elsif ( $id_scan_state eq ')' ) { # looking for ) to end
28852 if ( $tok eq ')' ) { # got it
28853 $identifier .= $tok;
28854 $id_scan_state = ''; # all done
28857 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
28858 $identifier .= $tok;
28860 else { # probable error in script, but keep going
28861 warning("Unexpected '$tok' while seeking end of prototype\n");
28862 $identifier .= $tok;
28865 else { # can get here due to error in initialization
28866 $id_scan_state = '';
28872 if ( $id_scan_state eq ')' ) {
28873 warning("Hit end of line while seeking ) to end prototype\n");
28876 # once we enter the actual identifier, it may not extend beyond
28877 # the end of the current line
28878 if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
28879 $id_scan_state = '';
28881 if ( $i < 0 ) { $i = 0 }
28888 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
28891 else { $type = 'i' }
28893 elsif ( $identifier eq '->' ) {
28897 ( length($identifier) > 1 )
28899 # In something like '@$=' we have an identifier '@$'
28900 # In something like '$${' we have type '$$' (and only
28901 # part of an identifier)
28902 && !( $identifier =~ /\$$/ && $tok eq '{' )
28903 && ( $identifier !~ /^(sub |package )$/ )
28908 else { $type = 't' }
28910 elsif ($saw_alpha) {
28912 # type 'w' includes anything without leading type info
28913 # ($,%,@,*) including something like abc::def::ghi
28918 } # this can happen on a restart
28922 $tok = $identifier;
28923 if ($message) { write_logfile_entry($message) }
28930 TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
28931 my ( $a, $b, $c ) = caller;
28933 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
28935 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
28937 return ( $i, $tok, $type, $id_scan_state, $identifier );
28942 # saved package and subnames in case prototype is on separate line
28943 my ( $package_saved, $subname_saved );
28947 # do_scan_sub parses a sub name and prototype
28948 # it is called with $i_beg equal to the index of the first nonblank
28949 # token following a 'sub' token.
28951 # TODO: add future error checks to be sure we have a valid
28952 # sub name. For example, 'sub &doit' is wrong. Also, be sure
28953 # a name is given if and only if a non-anonymous sub is
28955 # USES GLOBAL VARS: $current_package, $last_nonblank_token,
28956 # $in_attribute_list, %saw_function_definition,
28960 $input_line, $i, $i_beg,
28961 $tok, $type, $rtokens,
28962 $rtoken_map, $id_scan_state, $max_token_index
28964 $id_scan_state = ""; # normally we get everything in one call
28965 my $subname = undef;
28966 my $package = undef;
28971 my $pos_beg = $$rtoken_map[$i_beg];
28972 pos($input_line) = $pos_beg;
28974 # Look for the sub NAME
28976 $input_line =~ m/\G\s*
28977 ((?:\w*(?:'|::))*) # package - something that ends in :: or '
28978 (\w+) # NAME - required
28985 $package = ( defined($1) && $1 ) ? $1 : $current_package;
28986 $package =~ s/\'/::/g;
28987 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
28988 $package =~ s/::$//;
28989 my $pos = pos($input_line);
28990 my $numc = $pos - $pos_beg;
28991 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
28995 # Now look for PROTO ATTRS
28996 # Look for prototype/attributes which are usually on the same
28997 # line as the sub name but which might be on a separate line.
28998 # For example, we might have an anonymous sub with attributes,
28999 # or a prototype on a separate line from its sub name
29001 # NOTE: We only want to parse PROTOTYPES here. If we see anything that
29002 # does not look like a prototype, we assume it is a SIGNATURE and we
29003 # will stop and let the the standard tokenizer handle it. In
29004 # particular, we stop if we see any nested parens, braces, or commas.
29005 my $saw_opening_paren = $input_line =~ /\G\s*\(/;
29007 $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO
29008 (\s*:)? # ATTRS leading ':'
29016 # If we also found the sub name on this call then append PROTO.
29017 # This is not necessary but for compatability with previous
29018 # versions when the -csc flag is used:
29019 if ( $match && $proto ) {
29024 # Handle prototype on separate line from subname
29025 if ($subname_saved) {
29026 $package = $package_saved;
29027 $subname = $subname_saved;
29028 $tok = $last_nonblank_token;
29035 # ATTRS: if there are attributes, back up and let the ':' be
29036 # found later by the scanner.
29037 my $pos = pos($input_line);
29039 $pos -= length($attrs);
29042 my $next_nonblank_token = $tok;
29044 # catch case of line with leading ATTR ':' after anonymous sub
29045 if ( $pos == $pos_beg && $tok eq ':' ) {
29047 $in_attribute_list = 1;
29050 # Otherwise, if we found a match we must convert back from
29051 # string position to the pre_token index for continued parsing.
29054 # I don't think an error flag can occur here ..but ?
29056 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
29057 $max_token_index );
29058 if ($error) { warning("Possibly invalid sub\n") }
29060 # check for multiple definitions of a sub
29061 ( $next_nonblank_token, my $i_next ) =
29062 find_next_nonblank_token_on_this_line( $i, $rtokens,
29063 $max_token_index );
29066 if ( $next_nonblank_token =~ /^(\s*|#)$/ )
29067 { # skip blank or side comment
29068 my ( $rpre_tokens, $rpre_types ) =
29069 peek_ahead_for_n_nonblank_pre_tokens(1);
29070 if ( defined($rpre_tokens) && @$rpre_tokens ) {
29071 $next_nonblank_token = $rpre_tokens->[0];
29074 $next_nonblank_token = '}';
29077 $package_saved = "";
29078 $subname_saved = "";
29080 # See what's next...
29081 if ( $next_nonblank_token eq '{' ) {
29084 # Check for multiple definitions of a sub, but
29085 # it is ok to have multiple sub BEGIN, etc,
29086 # so we do not complain if name is all caps
29087 if ( $saw_function_definition{$package}{$subname}
29088 && $subname !~ /^[A-Z]+$/ )
29090 my $lno = $saw_function_definition{$package}{$subname};
29092 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
29095 $saw_function_definition{$package}{$subname} =
29096 $tokenizer_self->{_last_line_number};
29099 elsif ( $next_nonblank_token eq ';' ) {
29101 elsif ( $next_nonblank_token eq '}' ) {
29104 # ATTRS - if an attribute list follows, remember the name
29105 # of the sub so the next opening brace can be labeled.
29106 # Setting 'statement_type' causes any ':'s to introduce
29108 elsif ( $next_nonblank_token eq ':' ) {
29109 $statement_type = $tok;
29112 # if we stopped before an open paren ...
29113 elsif ( $next_nonblank_token eq '(' ) {
29115 # If we DID NOT see this paren above then it must be on the
29116 # next line so we will set a flag to come back here and see if
29117 # it is a PROTOTYPE
29119 # Otherwise, we assume it is a SIGNATURE rather than a
29120 # PROTOTYPE and let the normal tokenizer handle it as a list
29121 if ( !$saw_opening_paren ) {
29122 $id_scan_state = 'sub'; # we must come back to get proto
29123 $package_saved = $package;
29124 $subname_saved = $subname;
29126 $statement_type = $tok;
29128 elsif ($next_nonblank_token) { # EOF technically ok
29130 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
29133 check_prototype( $proto, $package, $subname );
29136 # no match but line not blank
29139 return ( $i, $tok, $type, $id_scan_state );
29143 #########i###############################################################
29144 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
29145 #########################################################################
29147 sub find_next_nonblank_token {
29148 my ( $i, $rtokens, $max_token_index ) = @_;
29150 if ( $i >= $max_token_index ) {
29151 if ( !peeked_ahead() ) {
29154 peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
29157 my $next_nonblank_token = $$rtokens[ ++$i ];
29159 if ( $next_nonblank_token =~ /^\s*$/ ) {
29160 $next_nonblank_token = $$rtokens[ ++$i ];
29162 return ( $next_nonblank_token, $i );
29165 sub numerator_expected {
29167 # this is a filter for a possible numerator, in support of guessing
29168 # for the / pattern delimiter token.
29173 # Note: I am using the convention that variables ending in
29174 # _expected have these 3 possible values.
29175 my ( $i, $rtokens, $max_token_index ) = @_;
29176 my $next_token = $$rtokens[ $i + 1 ];
29177 if ( $next_token eq '=' ) { $i++; } # handle /=
29178 my ( $next_nonblank_token, $i_next ) =
29179 find_next_nonblank_token( $i, $rtokens, $max_token_index );
29181 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
29186 if ( $next_nonblank_token =~ /^\s*$/ ) {
29195 sub pattern_expected {
29197 # This is the start of a filter for a possible pattern.
29198 # It looks at the token after a possible pattern and tries to
29199 # determine if that token could end a pattern.
29204 my ( $i, $rtokens, $max_token_index ) = @_;
29205 my $next_token = $$rtokens[ $i + 1 ];
29206 if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier
29207 my ( $next_nonblank_token, $i_next ) =
29208 find_next_nonblank_token( $i, $rtokens, $max_token_index );
29210 # list of tokens which may follow a pattern
29211 # (can probably be expanded)
29212 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
29218 if ( $next_nonblank_token =~ /^\s*$/ ) {
29227 sub find_next_nonblank_token_on_this_line {
29228 my ( $i, $rtokens, $max_token_index ) = @_;
29229 my $next_nonblank_token;
29231 if ( $i < $max_token_index ) {
29232 $next_nonblank_token = $$rtokens[ ++$i ];
29234 if ( $next_nonblank_token =~ /^\s*$/ ) {
29236 if ( $i < $max_token_index ) {
29237 $next_nonblank_token = $$rtokens[ ++$i ];
29242 $next_nonblank_token = "";
29244 return ( $next_nonblank_token, $i );
29247 sub find_angle_operator_termination {
29249 # We are looking at a '<' and want to know if it is an angle operator.
29250 # We are to return:
29251 # $i = pretoken index of ending '>' if found, current $i otherwise
29252 # $type = 'Q' if found, '>' otherwise
29253 my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
29256 pos($input_line) = 1 + $$rtoken_map[$i];
29260 # we just have to find the next '>' if a term is expected
29261 if ( $expecting == TERM ) { $filter = '[\>]' }
29263 # we have to guess if we don't know what is expected
29264 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
29266 # shouldn't happen - we shouldn't be here if operator is expected
29267 else { warning("Program Bug in find_angle_operator_termination\n") }
29269 # To illustrate what we might be looking at, in case we are
29270 # guessing, here are some examples of valid angle operators
29277 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
29278 # <${PREFIX}*img*.$IMAGE_TYPE>
29279 # <img*.$IMAGE_TYPE>
29280 # <Timg*.$IMAGE_TYPE>
29281 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
29283 # Here are some examples of lines which do not have angle operators:
29284 # return undef unless $self->[2]++ < $#{$self->[1]};
29287 # the following line from dlister.pl caused trouble:
29288 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
29290 # If the '<' starts an angle operator, it must end on this line and
29291 # it must not have certain characters like ';' and '=' in it. I use
29292 # this to limit the testing. This filter should be improved if
29295 if ( $input_line =~ /($filter)/g ) {
29299 # We MAY have found an angle operator termination if we get
29300 # here, but we need to do more to be sure we haven't been
29302 my $pos = pos($input_line);
29304 my $pos_beg = $$rtoken_map[$i];
29305 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
29307 # Reject if the closing '>' follows a '-' as in:
29308 # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
29309 if ( $expecting eq UNKNOWN ) {
29310 my $check = substr( $input_line, $pos - 2, 1 );
29311 if ( $check eq '-' ) {
29312 return ( $i, $type );
29316 ######################################debug#####
29317 #write_diagnostics( "ANGLE? :$str\n");
29318 #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
29319 ######################################debug#####
29323 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
29325 # It may be possible that a quote ends midway in a pretoken.
29326 # If this happens, it may be necessary to split the pretoken.
29329 "Possible tokinization error..please check this line\n");
29330 report_possible_bug();
29333 # Now let's see where we stand....
29334 # OK if math op not possible
29335 if ( $expecting == TERM ) {
29338 # OK if there are no more than 2 pre-tokens inside
29339 # (not possible to write 2 token math between < and >)
29340 # This catches most common cases
29341 elsif ( $i <= $i_beg + 3 ) {
29342 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
29348 # Let's try a Brace Test: any braces inside must balance
29350 while ( $str =~ /\{/g ) { $br++ }
29351 while ( $str =~ /\}/g ) { $br-- }
29353 while ( $str =~ /\[/g ) { $sb++ }
29354 while ( $str =~ /\]/g ) { $sb-- }
29356 while ( $str =~ /\(/g ) { $pr++ }
29357 while ( $str =~ /\)/g ) { $pr-- }
29359 # if braces do not balance - not angle operator
29360 if ( $br || $sb || $pr ) {
29364 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
29367 # we should keep doing more checks here...to be continued
29368 # Tentatively accepting this as a valid angle operator.
29369 # There are lots more things that can be checked.
29372 "ANGLE-Guessing yes: $str expecting=$expecting\n");
29373 write_logfile_entry("Guessing angle operator here: $str\n");
29378 # didn't find ending >
29380 if ( $expecting == TERM ) {
29381 warning("No ending > for angle operator\n");
29385 return ( $i, $type );
29388 sub scan_number_do {
29390 # scan a number in any of the formats that Perl accepts
29391 # Underbars (_) are allowed in decimal numbers.
29392 # input parameters -
29393 # $input_line - the string to scan
29394 # $i - pre_token index to start scanning
29395 # $rtoken_map - reference to the pre_token map giving starting
29396 # character position in $input_line of token $i
29397 # output parameters -
29398 # $i - last pre_token index of the number just scanned
29399 # number - the number (characters); or undef if not a number
29401 my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
29402 my $pos_beg = $$rtoken_map[$i];
29405 my $number = undef;
29406 my $type = $input_type;
29408 my $first_char = substr( $input_line, $pos_beg, 1 );
29410 # Look for bad starting characters; Shouldn't happen..
29411 if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
29412 warning("Program bug - scan_number given character $first_char\n");
29413 report_definite_bug();
29414 return ( $i, $type, $number );
29417 # handle v-string without leading 'v' character ('Two Dot' rule)
29419 # TODO: v-strings may contain underscores
29420 pos($input_line) = $pos_beg;
29421 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
29422 $pos = pos($input_line);
29423 my $numc = $pos - $pos_beg;
29424 $number = substr( $input_line, $pos_beg, $numc );
29426 report_v_string($number);
29429 # handle octal, hex, binary
29430 if ( !defined($number) ) {
29431 pos($input_line) = $pos_beg;
29432 if ( $input_line =~
29433 /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
29435 $pos = pos($input_line);
29436 my $numc = $pos - $pos_beg;
29437 $number = substr( $input_line, $pos_beg, $numc );
29443 if ( !defined($number) ) {
29444 pos($input_line) = $pos_beg;
29446 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
29447 $pos = pos($input_line);
29449 # watch out for things like 0..40 which would give 0. by this;
29450 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
29451 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
29455 my $numc = $pos - $pos_beg;
29456 $number = substr( $input_line, $pos_beg, $numc );
29461 # filter out non-numbers like e + - . e2 .e3 +e6
29462 # the rule: at least one digit, and any 'e' must be preceded by a digit
29464 $number !~ /\d/ # no digits
29465 || ( $number =~ /^(.*)[eE]/
29466 && $1 !~ /\d/ ) # or no digits before the 'e'
29470 $type = $input_type;
29471 return ( $i, $type, $number );
29474 # Found a number; now we must convert back from character position
29475 # to pre_token index. An error here implies user syntax error.
29476 # An example would be an invalid octal number like '009'.
29479 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
29480 if ($error) { warning("Possibly invalid number\n") }
29482 return ( $i, $type, $number );
29485 sub inverse_pretoken_map {
29487 # Starting with the current pre_token index $i, scan forward until
29488 # finding the index of the next pre_token whose position is $pos.
29489 my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
29492 while ( ++$i <= $max_token_index ) {
29494 if ( $pos <= $$rtoken_map[$i] ) {
29496 # Let the calling routine handle errors in which we do not
29497 # land on a pre-token boundary. It can happen by running
29498 # perltidy on some non-perl scripts, for example.
29499 if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
29504 return ( $i, $error );
29507 sub find_here_doc {
29509 # find the target of a here document, if any
29510 # input parameters:
29511 # $i - token index of the second < of <<
29512 # ($i must be less than the last token index if this is called)
29513 # output parameters:
29514 # $found_target = 0 didn't find target; =1 found target
29515 # HERE_TARGET - the target string (may be empty string)
29516 # $i - unchanged if not here doc,
29517 # or index of the last token of the here target
29518 # $saw_error - flag noting unbalanced quote on here target
29519 my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
29521 my $found_target = 0;
29522 my $here_doc_target = '';
29523 my $here_quote_character = '';
29525 my ( $next_nonblank_token, $i_next_nonblank, $next_token );
29526 $next_token = $$rtokens[ $i + 1 ];
29528 # perl allows a backslash before the target string (heredoc.t)
29530 if ( $next_token eq '\\' ) {
29532 $next_token = $$rtokens[ $i + 2 ];
29535 ( $next_nonblank_token, $i_next_nonblank ) =
29536 find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
29538 if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
29541 my $quote_depth = 0;
29546 $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
29549 = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
29550 $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
29552 if ($in_quote) { # didn't find end of quote, so no target found
29554 if ( $expecting == TERM ) {
29556 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
29561 else { # found ending quote
29566 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
29567 $tokj = $$rtokens[$j];
29569 # we have to remove any backslash before the quote character
29570 # so that the here-doc-target exactly matches this string
29574 && $$rtokens[ $j + 1 ] eq $here_quote_character );
29575 $here_doc_target .= $tokj;
29580 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
29582 write_logfile_entry(
29583 "found blank here-target after <<; suggest using \"\"\n");
29586 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
29588 my $here_doc_expected;
29589 if ( $expecting == UNKNOWN ) {
29590 $here_doc_expected = guess_if_here_doc($next_token);
29593 $here_doc_expected = 1;
29596 if ($here_doc_expected) {
29598 $here_doc_target = $next_token;
29605 if ( $expecting == TERM ) {
29607 write_logfile_entry("Note: bare here-doc operator <<\n");
29614 # patch to neglect any prepended backslash
29615 if ( $found_target && $backslash ) { $i++ }
29617 return ( $found_target, $here_doc_target, $here_quote_character, $i,
29623 # follow (or continue following) quoted string(s)
29624 # $in_quote return code:
29625 # 0 - ok, found end
29626 # 1 - still must find end of quote whose target is $quote_character
29627 # 2 - still looking for end of first of two quotes
29629 # Returns updated strings:
29630 # $quoted_string_1 = quoted string seen while in_quote=1
29631 # $quoted_string_2 = quoted string seen while in_quote=2
29633 $i, $in_quote, $quote_character,
29634 $quote_pos, $quote_depth, $quoted_string_1,
29635 $quoted_string_2, $rtokens, $rtoken_map,
29639 my $in_quote_starting = $in_quote;
29642 if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
29645 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29648 = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
29649 $quote_pos, $quote_depth, $max_token_index );
29650 $quoted_string_2 .= $quoted_string;
29651 if ( $in_quote == 1 ) {
29652 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
29653 $quote_character = '';
29656 $quoted_string_2 .= "\n";
29660 if ( $in_quote == 1 ) { # one (more) quote to follow
29663 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29666 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
29667 $quote_pos, $quote_depth, $max_token_index );
29668 $quoted_string_1 .= $quoted_string;
29669 if ( $in_quote == 1 ) {
29670 $quoted_string_1 .= "\n";
29673 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29674 $quoted_string_1, $quoted_string_2 );
29677 sub follow_quoted_string {
29679 # scan for a specific token, skipping escaped characters
29680 # if the quote character is blank, use the first non-blank character
29681 # input parameters:
29682 # $rtokens = reference to the array of tokens
29683 # $i = the token index of the first character to search
29684 # $in_quote = number of quoted strings being followed
29685 # $beginning_tok = the starting quote character
29686 # $quote_pos = index to check next for alphanumeric delimiter
29687 # output parameters:
29688 # $i = the token index of the ending quote character
29689 # $in_quote = decremented if found end, unchanged if not
29690 # $beginning_tok = the starting quote character
29691 # $quote_pos = index to check next for alphanumeric delimiter
29692 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
29693 # $quoted_string = the text of the quote (without quotation tokens)
29694 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
29697 my ( $tok, $end_tok );
29698 my $i = $i_beg - 1;
29699 my $quoted_string = "";
29701 TOKENIZER_DEBUG_FLAG_QUOTE && do {
29703 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
29706 # get the corresponding end token
29707 if ( $beginning_tok !~ /^\s*$/ ) {
29708 $end_tok = matching_end_token($beginning_tok);
29711 # a blank token means we must find and use the first non-blank one
29713 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
29715 while ( $i < $max_token_index ) {
29716 $tok = $$rtokens[ ++$i ];
29718 if ( $tok !~ /^\s*$/ ) {
29720 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
29721 $i = $max_token_index;
29725 if ( length($tok) > 1 ) {
29726 if ( $quote_pos <= 0 ) { $quote_pos = 1 }
29727 $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
29730 $beginning_tok = $tok;
29733 $end_tok = matching_end_token($beginning_tok);
29739 $allow_quote_comments = 1;
29744 # There are two different loops which search for the ending quote
29745 # character. In the rare case of an alphanumeric quote delimiter, we
29746 # have to look through alphanumeric tokens character-by-character, since
29747 # the pre-tokenization process combines multiple alphanumeric
29748 # characters, whereas for a non-alphanumeric delimiter, only tokens of
29749 # length 1 can match.
29751 ###################################################################
29752 # Case 1 (rare): loop for case of alphanumeric quote delimiter..
29753 # "quote_pos" is the position the current word to begin searching
29754 ###################################################################
29755 if ( $beginning_tok =~ /\w/ ) {
29757 # Note this because it is not recommended practice except
29758 # for obfuscated perl contests
29759 if ( $in_quote == 1 ) {
29760 write_logfile_entry(
29761 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
29764 while ( $i < $max_token_index ) {
29766 if ( $quote_pos == 0 || ( $i < 0 ) ) {
29767 $tok = $$rtokens[ ++$i ];
29769 if ( $tok eq '\\' ) {
29771 # retain backslash unless it hides the end token
29772 $quoted_string .= $tok
29773 unless $$rtokens[ $i + 1 ] eq $end_tok;
29775 last if ( $i >= $max_token_index );
29776 $tok = $$rtokens[ ++$i ];
29779 my $old_pos = $quote_pos;
29781 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
29785 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
29787 if ( $quote_pos > 0 ) {
29790 substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
29794 if ( $quote_depth == 0 ) {
29800 $quoted_string .= substr( $tok, $old_pos );
29805 ########################################################################
29806 # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
29807 ########################################################################
29810 while ( $i < $max_token_index ) {
29811 $tok = $$rtokens[ ++$i ];
29813 if ( $tok eq $end_tok ) {
29816 if ( $quote_depth == 0 ) {
29821 elsif ( $tok eq $beginning_tok ) {
29824 elsif ( $tok eq '\\' ) {
29826 # retain backslash unless it hides the beginning or end token
29827 $tok = $$rtokens[ ++$i ];
29828 $quoted_string .= '\\'
29829 unless ( $tok eq $end_tok || $tok eq $beginning_tok );
29831 $quoted_string .= $tok;
29834 if ( $i > $max_token_index ) { $i = $max_token_index }
29835 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
29839 sub indicate_error {
29840 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
29841 interrupt_logfile();
29843 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
29847 sub write_error_indicator_pair {
29848 my ( $line_number, $input_line, $pos, $carrat ) = @_;
29849 my ( $offset, $numbered_line, $underline ) =
29850 make_numbered_line( $line_number, $input_line, $pos );
29851 $underline = write_on_underline( $underline, $pos - $offset, $carrat );
29852 warning( $numbered_line . "\n" );
29853 $underline =~ s/\s*$//;
29854 warning( $underline . "\n" );
29857 sub make_numbered_line {
29859 # Given an input line, its line number, and a character position of
29860 # interest, create a string not longer than 80 characters of the form
29861 # $lineno: sub_string
29862 # such that the sub_string of $str contains the position of interest
29864 # Here is an example of what we want, in this case we add trailing
29865 # '...' because the line is long.
29867 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
29869 # Here is another example, this time in which we used leading '...'
29870 # because of excessive length:
29872 # 2: ... er of the World Wide Web Consortium's
29874 # input parameters are:
29875 # $lineno = line number
29876 # $str = the text of the line
29877 # $pos = position of interest (the error) : 0 = first character
29880 # - $offset = an offset which corrects the position in case we only
29881 # display part of a line, such that $pos-$offset is the effective
29882 # position from the start of the displayed line.
29883 # - $numbered_line = the numbered line as above,
29884 # - $underline = a blank 'underline' which is all spaces with the same
29885 # number of characters as the numbered line.
29887 my ( $lineno, $str, $pos ) = @_;
29888 my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
29889 my $excess = length($str) - $offset - 68;
29890 my $numc = ( $excess > 0 ) ? 68 : undef;
29892 if ( defined($numc) ) {
29893 if ( $offset == 0 ) {
29894 $str = substr( $str, $offset, $numc - 4 ) . " ...";
29897 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
29902 if ( $offset == 0 ) {
29905 $str = "... " . substr( $str, $offset + 4 );
29909 my $numbered_line = sprintf( "%d: ", $lineno );
29910 $offset -= length($numbered_line);
29911 $numbered_line .= $str;
29912 my $underline = " " x length($numbered_line);
29913 return ( $offset, $numbered_line, $underline );
29916 sub write_on_underline {
29918 # The "underline" is a string that shows where an error is; it starts
29919 # out as a string of blanks with the same length as the numbered line of
29920 # code above it, and we have to add marking to show where an error is.
29921 # In the example below, we want to write the string '--^' just below
29922 # the line of bad code:
29924 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
29926 # We are given the current underline string, plus a position and a
29927 # string to write on it.
29929 # In the above example, there will be 2 calls to do this:
29930 # First call: $pos=19, pos_chr=^
29931 # Second call: $pos=16, pos_chr=---
29933 # This is a trivial thing to do with substr, but there is some
29936 my ( $underline, $pos, $pos_chr ) = @_;
29938 # check for error..shouldn't happen
29939 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
29942 my $excess = length($pos_chr) + $pos - length($underline);
29943 if ( $excess > 0 ) {
29944 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
29946 substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
29947 return ($underline);
29952 # Break a string, $str, into a sequence of preliminary tokens. We
29953 # are interested in these types of tokens:
29954 # words (type='w'), example: 'max_tokens_wanted'
29955 # digits (type = 'd'), example: '0755'
29956 # whitespace (type = 'b'), example: ' '
29957 # any other single character (i.e. punct; type = the character itself).
29958 # We cannot do better than this yet because we might be in a quoted
29959 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
29961 my ( $str, $max_tokens_wanted ) = @_;
29963 # we return references to these 3 arrays:
29964 my @tokens = (); # array of the tokens themselves
29965 my @token_map = (0); # string position of start of each token
29966 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
29971 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
29974 # note that this must come before words!
29975 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
29978 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
29980 # single-character punctuation
29981 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
29985 return ( \@tokens, \@token_map, \@type );
29989 push @token_map, pos($str);
29991 } while ( --$max_tokens_wanted != 0 );
29993 return ( \@tokens, \@token_map, \@type );
29998 # this is an old debug routine
29999 my ( $rtokens, $rtoken_map ) = @_;
30000 my $num = scalar(@$rtokens);
30003 for ( $i = 0 ; $i < $num ; $i++ ) {
30004 my $len = length( $$rtokens[$i] );
30005 print STDOUT "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
30009 sub matching_end_token {
30011 # find closing character for a pattern
30012 my $beginning_token = shift;
30014 if ( $beginning_token eq '{' ) {
30017 elsif ( $beginning_token eq '[' ) {
30020 elsif ( $beginning_token eq '<' ) {
30023 elsif ( $beginning_token eq '(' ) {
30031 sub dump_token_types {
30035 # This should be the latest list of token types in use
30036 # adding NEW_TOKENS: add a comment here
30037 print $fh <<'END_OF_LIST';
30039 Here is a list of the token types currently used for lines of type 'CODE'.
30040 For the following tokens, the "type" of a token is just the token itself.
30042 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
30043 ( ) <= >= == =~ !~ != ++ -- /= x=
30044 ... **= <<= >>= &&= ||= //= <=>
30045 , + - / * | % ! x ~ = \ ? : . < > ^ &
30047 The following additional token types are defined:
30050 b blank (white space)
30051 { indent: opening structural curly brace or square bracket or paren
30052 (code block, anonymous hash reference, or anonymous array reference)
30053 } outdent: right structural curly brace or square bracket or paren
30054 [ left non-structural square bracket (enclosing an array index)
30055 ] right non-structural square bracket
30056 ( left non-structural paren (all but a list right of an =)
30057 ) right non-structural paren
30058 L left non-structural curly brace (enclosing a key)
30059 R right non-structural curly brace
30060 ; terminal semicolon
30061 f indicates a semicolon in a "for" statement
30062 h here_doc operator <<
30064 Q indicates a quote or pattern
30065 q indicates a qw quote block
30067 C user-defined constant or constant function (with void prototype = ())
30068 U user-defined function taking parameters
30069 G user-defined function taking block parameter (like grep/map/eval)
30070 M (unused, but reserved for subroutine definition name)
30071 P (unused, but -html uses it to label pod text)
30072 t type indicater such as %,$,@,*,&,sub
30073 w bare word (perhaps a subroutine call)
30074 i identifier of some type (with leading %, $, @, *, &, sub, -> )
30077 F a file test operator (like -e)
30079 Z identifier in indirect object slot: may be file handle, object
30080 J LABEL: code block label
30081 j LABEL after next, last, redo, goto
30084 pp pre-increment operator ++
30085 mm pre-decrement operator --
30086 A : used as attribute separator
30088 Here are the '_line_type' codes used internally:
30089 SYSTEM - system-specific code before hash-bang line
30090 CODE - line of perl code (including comments)
30091 POD_START - line starting pod, such as '=head'
30092 POD - pod documentation text
30093 POD_END - last line of pod section, '=cut'
30094 HERE - text of here-document
30095 HERE_END - last line of here-doc (target word)
30096 FORMAT - format section
30097 FORMAT_END - last line of format section, '.'
30098 DATA_START - __DATA__ line
30099 DATA - unidentified text following __DATA__
30100 END_START - __END__ line
30101 END - unidentified text following __END__
30102 ERROR - we are in big trouble, probably not a perl script
30108 # These names are used in error messages
30109 @opening_brace_names = qw# '{' '[' '(' '?' #;
30110 @closing_brace_names = qw# '}' ']' ')' ':' #;
30113 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
30114 <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
30116 @is_digraph{@digraphs} = (1) x scalar(@digraphs);
30118 my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.=);
30119 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
30121 my @tetragraphs = qw( <<>> );
30122 @is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs);
30124 # make a hash of all valid token types for self-checking the tokenizer
30125 # (adding NEW_TOKENS : select a new character and add to this list)
30126 my @valid_token_types = qw#
30127 A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
30128 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
30130 push( @valid_token_types, @digraphs );
30131 push( @valid_token_types, @trigraphs );
30132 push( @valid_token_types, @tetragraphs );
30133 push( @valid_token_types, ( '#', ',', 'CORE::' ) );
30134 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
30136 # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
30137 my @file_test_operators =
30138 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
30139 @is_file_test_operator{@file_test_operators} =
30140 (1) x scalar(@file_test_operators);
30142 # these functions have prototypes of the form (&), so when they are
30143 # followed by a block, that block MAY BE followed by an operator.
30144 # Smartmatch operator ~~ may be followed by anonymous hash or array ref
30145 @_ = qw( do eval );
30146 @is_block_operator{@_} = (1) x scalar(@_);
30148 # these functions allow an identifier in the indirect object slot
30149 @_ = qw( print printf sort exec system say);
30150 @is_indirect_object_taker{@_} = (1) x scalar(@_);
30152 # These tokens may precede a code block
30153 # patched for SWITCH/CASE/CATCH. Actually these could be removed
30154 # now and we could let the extended-syntax coding handle them
30156 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
30157 unless do while until eval for foreach map grep sort
30158 switch case given when catch try finally);
30159 @is_code_block_token{@_} = (1) x scalar(@_);
30161 # I'll build the list of keywords incrementally
30164 # keywords and tokens after which a value or pattern is expected,
30165 # but not an operator. In other words, these should consume terms
30166 # to their right, or at least they are not expected to be followed
30167 # immediately by operators.
30168 my @value_requestor = qw(
30391 # patched above for SWITCH/CASE given/when err say
30392 # 'err' is a fairly safe addition.
30393 # TODO: 'default' still needed if appropriate
30394 # 'use feature' seen, but perltidy works ok without it.
30395 # Concerned that 'default' could break code.
30396 push( @Keywords, @value_requestor );
30398 # These are treated the same but are not keywords:
30403 push( @value_requestor, @extra_vr );
30405 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
30407 # this list contains keywords which do not look for arguments,
30408 # so that they might be followed by an operator, or at least
30410 my @operator_requestor = qw(
30434 push( @Keywords, @operator_requestor );
30436 # These are treated the same but are not considered keywords:
30443 push( @operator_requestor, @extra_or );
30445 @expecting_operator_token{@operator_requestor} =
30446 (1) x scalar(@operator_requestor);
30448 # these token TYPES expect trailing operator but not a term
30449 # note: ++ and -- are post-increment and decrement, 'C' = constant
30450 my @operator_requestor_types = qw( ++ -- C <> q );
30451 @expecting_operator_types{@operator_requestor_types} =
30452 (1) x scalar(@operator_requestor_types);
30454 # these token TYPES consume values (terms)
30455 # note: pp and mm are pre-increment and decrement
30456 # f=semicolon in for, F=file test operator
30457 my @value_requestor_type = qw#
30458 L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
30459 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
30460 <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
30461 f F pp mm Y p m U J G j >> << ^ t
30462 ~. ^. |. &. ^.= |.= &.=
30464 push( @value_requestor_type, ',' )
30465 ; # (perl doesn't like a ',' in a qw block)
30466 @expecting_term_types{@value_requestor_type} =
30467 (1) x scalar(@value_requestor_type);
30469 # Note: the following valid token types are not assigned here to
30470 # hashes requesting to be followed by values or terms, but are
30471 # instead currently hard-coded into sub operator_expected:
30472 # ) -> :: Q R Z ] b h i k n v w } #
30474 # For simple syntax checking, it is nice to have a list of operators which
30475 # will really be unhappy if not followed by a term. This includes most
30477 %really_want_term = %expecting_term_types;
30479 # with these exceptions...
30480 delete $really_want_term{'U'}; # user sub, depends on prototype
30481 delete $really_want_term{'F'}; # file test works on $_ if no following term
30482 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
30485 @_ = qw(q qq qw qx qr s y tr m);
30486 @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
30488 # These keywords are handled specially in the tokenizer code:
30489 my @special_keywords = qw(
30505 push( @Keywords, @special_keywords );
30507 # Keywords after which list formatting may be used
30508 # WARNING: do not include |map|grep|eval or perl may die on
30509 # syntax errors (map1.t).
30510 my @keyword_taking_list = qw(
30584 @is_keyword_taking_list{@keyword_taking_list} =
30585 (1) x scalar(@keyword_taking_list);
30587 # These are not used in any way yet
30588 # my @unused_keywords = qw(
30594 # The list of keywords was originally extracted from function 'keyword' in
30595 # perl file toke.c version 5.005.03, using this utility, plus a
30596 # little editing: (file getkwd.pl):
30597 # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
30598 # Add 'get' prefix where necessary, then split into the above lists.
30599 # This list should be updated as necessary.
30600 # The list should not contain these special variables:
30601 # ARGV DATA ENV SIG STDERR STDIN STDOUT
30604 @is_keyword{@Keywords} = (1) x scalar(@Keywords);